Bug 1261841 part 1 - Vendor encoding_rs and encoding_c into m-c. rs=emk,SimonSapin.
authorHenri Sivonen <hsivonen@hsivonen.fi>
Tue, 25 Apr 2017 11:42:02 +0300
changeset 412433 a799ece5b8ad74e7aa64e0e4f74a866958a73000
parent 412432 33ce52b7c7ea6bcc57d6a97c56e2e74817ec47b3
child 412434 e155fa765af299f0e8cfb42e0a1709e5b04928b9
push id7566
push usermtabara@mozilla.com
push dateWed, 02 Aug 2017 08:25:16 +0000
treeherdermozilla-beta@86913f512c3c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemk, SimonSapin
bugs1261841
milestone56.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1261841 part 1 - Vendor encoding_rs and encoding_c into m-c. rs=emk,SimonSapin. MozReview-Commit-ID: Lphq69tSIXa
third_party/rust/encoding_c/.cargo-checksum.json
third_party/rust/encoding_c/.cargo-ok
third_party/rust/encoding_c/.gitignore
third_party/rust/encoding_c/CONTRIBUTING.md
third_party/rust/encoding_c/COPYRIGHT
third_party/rust/encoding_c/Cargo.toml
third_party/rust/encoding_c/LICENSE-APACHE
third_party/rust/encoding_c/LICENSE-MIT
third_party/rust/encoding_c/README.md
third_party/rust/encoding_c/build-disabled.rs
third_party/rust/encoding_c/include/encoding_rs.h
third_party/rust/encoding_c/include/encoding_rs_cpp.h
third_party/rust/encoding_c/include/encoding_rs_statics.h
third_party/rust/encoding_c/src/lib.rs
third_party/rust/encoding_rs/.cargo-checksum.json
third_party/rust/encoding_rs/.cargo-ok
third_party/rust/encoding_rs/.gitignore
third_party/rust/encoding_rs/.travis.yml
third_party/rust/encoding_rs/CONTRIBUTING.md
third_party/rust/encoding_rs/COPYRIGHT
third_party/rust/encoding_rs/Cargo.toml
third_party/rust/encoding_rs/Ideas.md
third_party/rust/encoding_rs/LICENSE-APACHE
third_party/rust/encoding_rs/LICENSE-MIT
third_party/rust/encoding_rs/README.md
third_party/rust/encoding_rs/generate-encoding-data.py
third_party/rust/encoding_rs/rustfmt.toml
third_party/rust/encoding_rs/src/ascii.rs
third_party/rust/encoding_rs/src/big5.rs
third_party/rust/encoding_rs/src/data.rs
third_party/rust/encoding_rs/src/euc_jp.rs
third_party/rust/encoding_rs/src/euc_kr.rs
third_party/rust/encoding_rs/src/gb18030.rs
third_party/rust/encoding_rs/src/handles.rs
third_party/rust/encoding_rs/src/iso_2022_jp.rs
third_party/rust/encoding_rs/src/lib.rs
third_party/rust/encoding_rs/src/macros.rs
third_party/rust/encoding_rs/src/replacement.rs
third_party/rust/encoding_rs/src/shift_jis.rs
third_party/rust/encoding_rs/src/simd_funcs.rs
third_party/rust/encoding_rs/src/single_byte.rs
third_party/rust/encoding_rs/src/test_data/big5_in.txt
third_party/rust/encoding_rs/src/test_data/big5_in_ref.txt
third_party/rust/encoding_rs/src/test_data/big5_out.txt
third_party/rust/encoding_rs/src/test_data/big5_out_ref.txt
third_party/rust/encoding_rs/src/test_data/euc_kr_in.txt
third_party/rust/encoding_rs/src/test_data/euc_kr_in_ref.txt
third_party/rust/encoding_rs/src/test_data/euc_kr_out.txt
third_party/rust/encoding_rs/src/test_data/euc_kr_out_ref.txt
third_party/rust/encoding_rs/src/test_data/gb18030_in.txt
third_party/rust/encoding_rs/src/test_data/gb18030_in_ref.txt
third_party/rust/encoding_rs/src/test_data/gb18030_out.txt
third_party/rust/encoding_rs/src/test_data/gb18030_out_ref.txt
third_party/rust/encoding_rs/src/test_data/iso_2022_jp_in.txt
third_party/rust/encoding_rs/src/test_data/iso_2022_jp_in_ref.txt
third_party/rust/encoding_rs/src/test_data/iso_2022_jp_out.txt
third_party/rust/encoding_rs/src/test_data/iso_2022_jp_out_ref.txt
third_party/rust/encoding_rs/src/test_data/jis0208_in.txt
third_party/rust/encoding_rs/src/test_data/jis0208_in_ref.txt
third_party/rust/encoding_rs/src/test_data/jis0208_out.txt
third_party/rust/encoding_rs/src/test_data/jis0208_out_ref.txt
third_party/rust/encoding_rs/src/test_data/jis0212_in.txt
third_party/rust/encoding_rs/src/test_data/jis0212_in_ref.txt
third_party/rust/encoding_rs/src/test_data/shift_jis_in.txt
third_party/rust/encoding_rs/src/test_data/shift_jis_in_ref.txt
third_party/rust/encoding_rs/src/test_data/shift_jis_out.txt
third_party/rust/encoding_rs/src/test_data/shift_jis_out_ref.txt
third_party/rust/encoding_rs/src/test_labels_names.rs
third_party/rust/encoding_rs/src/testing.rs
third_party/rust/encoding_rs/src/utf_16.rs
third_party/rust/encoding_rs/src/utf_8.rs
third_party/rust/encoding_rs/src/utf_8_core.rs
third_party/rust/encoding_rs/src/variant.rs
third_party/rust/encoding_rs/src/x_user_defined.rs
toolkit/library/gtest/rust/Cargo.lock
toolkit/library/rust/Cargo.lock
toolkit/library/rust/shared/Cargo.toml
toolkit/library/rust/shared/lib.rs
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/.cargo-checksum.json
@@ -0,0 +1,1 @@
+{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"3effb8c580299a86c7c816e456406be2763f0e0954c66fed2ce5ce06e750f997","CONTRIBUTING.md":"8cd9262df951c4b42078aa55064ca3b8ef2676c06b8fc7c281c02ee3f1ae04a8","COPYRIGHT":"65fb11bb8d2aac1ea00620273e0595ff71f4a335d25b67acbccbaa1b9ad5a409","Cargo.toml":"e7532cd5f9aba02726720ec8707914e6f5a8ce24401415233def34ec778d31c8","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"90df74ddb43e7f5aa5068890eacc151ecca7c997c9515cf17aea30b8734075bf","README.md":"1d360a54ac30d2ed84cada251eeaae0ef43c6f2b771856d6c181b592e8f6a471","build-disabled.rs":"d65ed45d33ce834ab9f9f7c5f308e0a72605aa34ede6dca45a2077a2deee5cfa","include/encoding_rs.h":"dc015596eb8b4b0b2e79569a519e81c14301db8f5b96b4013989645a67a73422","include/encoding_rs_cpp.h":"f93c0e2b3e1ec4f1efb1fcee1f43e8d1424faf3e26d7084404c5ba5f2f6a2c4d","include/encoding_rs_statics.h":"800e6aa5aafe2fa3a3826ed0c0a0da34ca9495ff9c75c84845d44b14f5be1078","src/lib.rs":"69ac99046085286c00534b6d107df269cfdd67fc488190d690d2d3e8c01bf916"},"package":"45ef700aebe8c5fb44f081a54ab400f4f6b002a426bc5332381c108f49713432"}
\ No newline at end of file
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/.gitignore
@@ -0,0 +1,6 @@
+target
+Cargo.lock
+.project
+.settings
+*~
+*.bk
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/CONTRIBUTING.md
@@ -0,0 +1,38 @@
+If you send a pull request / patch, please observe the following.
+
+## Licensing
+
+Since this crate is dual-licensed,
+[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
+is considered to apply in the sense of Contributions being automatically
+under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
+That is, by the act of offering a Contribution, you place your Contribution
+under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
+file. Please do not contribute if you aren't willing or allowed to license your
+contributions in this manner.
+
+You are encouraged to dedicate test code that you contribute to the Public
+Domain using the CC0 dedication. If you contribute test code that is not
+dedicated to the Public Domain, please be sure not to put it in a part of
+source code that the comments designate as being dedicated to the Public
+Domain.
+
+## Copyright Notices
+
+If you require the addition of your copyright notice, it's up to you to edit in
+your notice as part of your Contribution. Not adding a copyright notice is
+taken as a waiver of copyright notice.
+
+## Compatibility with Stable Rust
+
+Please ensure that your Contribution compiles with the latest stable-channel
+rustc.
+
+## rustfmt
+
+Please install [`rustfmt`](https://github.com/rust-lang-nursery/rustfmt) 0.4.1
+(the latest version has
+[a bug](https://github.com/rust-lang-nursery/rustfmt/issues/1149) that renders
+it unsuited  for encoding_rs) and run `cargo fmt` before creating a pull
+request. (It's OK for `cargo fmt` to exit with an error due to too long lines.)
+
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/COPYRIGHT
@@ -0,0 +1,9 @@
+encoding_c is copyright 2015-2017 Mozilla Foundation.
+
+Licensed under the Apache License, Version 2.0
+<LICENSE-APACHE or
+https://www.apache.org/licenses/LICENSE-2.0> or the MIT
+license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
+at your option. All files in the project carrying such
+notice may not be copied, modified, or distributed except
+according to those terms.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "encoding_c"
+description = "C API for encoding_rs"
+version = "0.7.4"
+authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
+license = "MIT/Apache-2.0"
+readme = "README.md"
+documentation = "https://docs.rs/encoding_c/"
+homepage = "https://docs.rs/encoding_c/"
+repository = "https://github.com/hsivonen/encoding_c"
+keywords = ["ffi", "capi", "encoding", "unicode", "charset"]
+# Uncomment the line below and rename build-disabled.rs to build.rs to re-run cheddar.
+# build = "build.rs"
+
+[features]
+simd-accel = ["encoding_rs/simd-accel"]
+no-static-ideograph-encoder-tables = ["encoding_rs/no-static-ideograph-encoder-tables"]
+parallel-utf8 = ["encoding_rs/parallel-utf8"]
+
+[dependencies]
+encoding_rs = "0.6.11"
+
+# Uncomment the lines below to re-run cheddar.
+# [build-dependencies]
+# rusty-cheddar = "0.3.3"
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/LICENSE-APACHE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2015-2016 Mozilla Foundation
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/README.md
@@ -0,0 +1,57 @@
+# encoding_c
+
+[![crates.io](https://meritbadge.herokuapp.com/encoding_c)](https://crates.io/crates/encoding_c)
+[![docs.rs](https://docs.rs/encoding_c/badge.svg)](https://docs.rs/encoding_c/)
+[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/encoding_c/blob/master/COPYRIGHT)
+
+encoding_c is an FFI wrapper for [encoding_rs](https://github.com/hsivonen/encoding_rs).
+
+## Licensing
+
+Please see the file named
+[COPYRIGHT](https://github.com/hsivonen/encoding_c/blob/master/COPYRIGHT).
+
+## C/C++ Headers
+
+`include/encoding_rs.h` and `include/encoding_rs_statics.h` are needed for C
+usage.
+
+`include/encoding_rs_cpp.h` is a sample C++ API built on top of the C API using
+GSL and the C++ standard library. Since C++ project typically roll their own
+string classes, etc., it's probably necessary for C++ projects to manually
+adapt the header to their replacements of standard-library types.
+
+## Release Notes
+
+### 0.7.4
+
+* Wrap `has_pending_state()`.
+
+### 0.7.3
+
+* Use C preprocessor definitions for encoding constant declarations.
+
+### 0.7.2
+
+* Parametrize the struct type names behind C preprocessor definitions.
+* Leave it to the user to provide `char16_t`. Avoid including a header for it.
+
+### 0.7.1
+
+* Fix documentation for pointers that get used in
+  `std::slice::from_raw_parts()`.
+
+### 0.7.0
+
+* Map `None` to `SIZE_MAX` in the max length calculation functions.
+
+### 0.6.0
+
+* Check in the `cheddar`-generated header and comment out the `cheddar`-using
+  `build.rs`.
+
+### 0.5.0
+
+* Initial release of encoding_c. (I.e. first release with FFI in a distinct
+  crate.)
+
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/build-disabled.rs
@@ -0,0 +1,60 @@
+// Copyright 2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+extern crate cheddar;
+
+use std::io::prelude::*;
+use std::fs::File;
+
+fn replace(path: &str) -> std::io::Result<()> {
+    let mut f = try!(File::open(path));
+    let mut s = String::new();
+    try!(f.read_to_string(&mut s));
+    s = s.replace("#ifndef cheddar_generated_encoding_rs_h", "// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using encoding_c/build.rs.
+
+#ifndef cheddar_generated_encoding_rs_h");
+    s = s.replace("uint16_t", "char16_t");
+    s = s.replace("uintptr_t", "size_t");
+    s = s.replace("Encoding", "ENCODING_RS_ENCODING");
+    s = s.replace("Encoder", "ENCODING_RS_ENCODER");
+    s = s.replace("Decoder", "ENCODING_RS_DECODER");
+    s = s.replace("ENCODING_RS_ENCODING.html", "Encoding.html");
+    s = s.replace("ENCODING_RS_ENCODER.html", "Encoder.html");
+    s = s.replace("ENCODING_RS_DECODER.html", "Decoder.html");
+    s = s.replace("#include <stdbool.h>",
+                  "#include <stdbool.h>\n#include \"encoding_rs_statics.h\"");
+    let mut f = try!(File::create(path));
+    try!(f.write_all(s.as_bytes()));
+    Ok(())
+}
+
+fn main() {
+    println!("cargo:rerun-if-changed=src/lib.rs");
+
+    let path = "include/encoding_rs.h";
+
+    cheddar::Cheddar::new()
+        .expect("could not read manifest")
+        .run_build(path);
+
+    match replace(path) {
+        Ok(_) => {}
+        Err(e) => println!("Performing replacements failed {}.", e),
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/include/encoding_rs.h
@@ -0,0 +1,639 @@
+
+// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using encoding_c/build.rs.
+
+#ifndef cheddar_generated_encoding_rs_h
+#define cheddar_generated_encoding_rs_h
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "encoding_rs_statics.h"
+
+
+
+/// Implements the
+/// [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
+/// algorithm.
+///
+/// If, after ASCII-lowercasing and removing leading and trailing
+/// whitespace, the argument matches a label defined in the ENCODING_RS_ENCODING
+/// Standard, `const ENCODING_RS_ENCODING*` representing the corresponding
+/// encoding is returned. If there is no match, `NULL` is returned.
+///
+/// This is the right function to use if the action upon the method returning
+/// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`) instead.
+/// When the action upon the method returning `NULL` is not to proceed with
+/// a fallback but to refuse processing, `encoding_for_label_no_replacement()` is
+/// more appropriate.
+///
+/// The argument buffer can be in any ASCII-compatible encoding. It is not
+/// required to be UTF-8.
+///
+/// `label` must be non-`NULL` even if `label_len` is zero. When `label_len`
+/// is zero, it is OK for `label` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `label` and `label_len` don't designate a valid memory block
+/// of if `label` is `NULL`.
+ENCODING_RS_ENCODING const* encoding_for_label(uint8_t const* label, size_t label_len);
+
+/// This function behaves the same as `encoding_for_label()`, except when
+/// `encoding_for_label()` would return `REPLACEMENT_ENCODING`, this method
+/// returns `NULL` instead.
+///
+/// This method is useful in scenarios where a fatal error is required
+/// upon invalid label, because in those cases the caller typically wishes
+/// to treat the labels that map to the replacement encoding as fatal
+/// errors, too.
+///
+/// It is not OK to use this funciton when the action upon the method returning
+/// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`). In
+/// such a case, the `encoding_for_label()` function should be used instead
+/// in order to avoid unsafe fallback for labels that `encoding_for_label()`
+/// maps to `REPLACEMENT_ENCODING`.
+///
+/// The argument buffer can be in any ASCII-compatible encoding. It is not
+/// required to be UTF-8.
+///
+/// `label` must be non-`NULL` even if `label_len` is zero. When `label_len`
+/// is zero, it is OK for `label` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `label` and `label_len` don't designate a valid memory block
+/// of if `label` is `NULL`.
+ENCODING_RS_ENCODING const* encoding_for_label_no_replacement(uint8_t const* label, size_t label_len);
+
+/// Performs non-incremental BOM sniffing.
+///
+/// The argument must either be a buffer representing the entire input
+/// stream (non-streaming case) or a buffer representing at least the first
+/// three bytes of the input stream (streaming case).
+///
+/// Returns `UTF_8_ENCODING`, `UTF_16LE_ENCODING` or `UTF_16BE_ENCODING` if the
+/// argument starts with the UTF-8, UTF-16LE or UTF-16BE BOM or `NULL`
+/// otherwise. Upon return, `*buffer_len` is the length of the BOM (zero if
+/// there is no BOM).
+///
+/// `buffer` must be non-`NULL` even if `*buffer_len` is zero. When
+/// `*buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `*buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+ENCODING_RS_ENCODING const* encoding_for_bom(uint8_t const* buffer, size_t* buffer_len);
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `const ENCODING_RS_ENCODING*` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this function is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `ENCODING_RS_ENCODING` objects. Using this function for other purposes is
+/// most likely the wrong thing to do.
+///
+/// `name` must be non-`NULL` even if `name_len` is zero. When `name_len`
+/// is zero, it is OK for `name` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `name` and `name_len` don't designate a valid memory block
+/// of if `name` is `NULL`.
+ENCODING_RS_ENCODING const* encoding_for_name(uint8_t const* name, size_t name_len);
+
+/// Writes the name of the given `ENCODING_RS_ENCODING` to a caller-supplied buffer as
+/// ASCII and returns the number of bytes / ASCII characters written.
+///
+/// The output is not null-terminated.
+///
+/// The caller _MUST_ ensure that `name_out` points to a buffer whose length
+/// is at least `ENCODING_NAME_MAX_LENGTH` bytes.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL` or if `name_out` doesn't point to
+/// a valid block of memory whose length is at least
+/// `ENCODING_NAME_MAX_LENGTH` bytes.
+size_t encoding_name(ENCODING_RS_ENCODING const* encoding, uint8_t* name_out);
+
+/// Checks whether the _output encoding_ of this encoding can encode every
+/// Unicode scalar. (Only true if the output encoding is UTF-8.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+bool encoding_can_encode_everything(ENCODING_RS_ENCODING const* encoding);
+
+/// Checks whether the bytes 0x00...0x7F map exclusively to the characters
+/// U+0000...U+007F and vice versa.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+bool encoding_is_ascii_compatible(ENCODING_RS_ENCODING const* encoding);
+
+/// Returns the _output encoding_ of this encoding. This is UTF-8 for
+/// UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODING const* encoding_output_encoding(ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING` on the heap with BOM
+/// sniffing enabled and returns a pointer to the newly-allocated `ENCODING_RS_DECODER`.
+///
+/// BOM sniffing may cause the returned decoder to morph into a decoder
+/// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_DECODER* encoding_new_decoder(ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING` on the heap with BOM
+/// removal and returns a pointer to the newly-allocated `ENCODING_RS_DECODER`.
+///
+/// If the input starts with bytes that are the BOM for this encoding,
+/// those bytes are removed. However, the decoder never morphs into a
+/// decoder for another encoding: A BOM for another encoding is treated as
+/// (potentially malformed) input to the decoding algorithm for this
+/// encoding.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_DECODER* encoding_new_decoder_with_bom_removal(ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING` on the heap with BOM
+/// handling disabled and returns a pointer to the newly-allocated `ENCODING_RS_DECODER`.
+///
+/// If the input starts with bytes that look like a BOM, those bytes are
+/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
+/// for another encoding.)
+///
+/// _Note:_ If the caller has performed BOM sniffing on its own but has not
+/// removed the BOM, the caller should use
+/// `encoding_new_decoder_with_bom_removal()` instead of this function to cause
+/// the BOM to be removed.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_DECODER* encoding_new_decoder_without_bom_handling(ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING` into memory provided by
+/// the caller with BOM sniffing enabled. (In practice, the target should
+/// likely be a pointer previously returned by `encoding_new_decoder()`.)
+///
+/// Note: If the caller has already performed BOM sniffing but has
+/// not removed the BOM, the caller should still use this function in
+/// order to cause the BOM to be ignored.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_decoder_into(ENCODING_RS_ENCODING const* encoding, ENCODING_RS_DECODER* decoder);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING` into memory provided by
+/// the caller with BOM removal.
+///
+/// If the input starts with bytes that are the BOM for this encoding,
+/// those bytes are removed. However, the decoder never morphs into a
+/// decoder for another encoding: A BOM for another encoding is treated as
+/// (potentially malformed) input to the decoding algorithm for this
+/// encoding.
+///
+/// Once the allocated `ENCODING_RS_DECODER` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_decoder_with_bom_removal_into(ENCODING_RS_ENCODING const* encoding, ENCODING_RS_DECODER* decoder);
+
+/// Allocates a new `ENCODING_RS_DECODER` for the given `ENCODING_RS_ENCODING` into memory provided by
+/// the caller with BOM handling disabled.
+///
+/// If the input starts with bytes that look like a BOM, those bytes are
+/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
+/// for another encoding.)
+///
+/// _Note:_ If the caller has performed BOM sniffing on its own but has not
+/// removed the BOM, the caller should use
+/// `encoding_new_decoder_with_bom_removal_into()` instead of this function to
+/// cause the BOM to be removed.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_decoder_without_bom_handling_into(ENCODING_RS_ENCODING const* encoding, ENCODING_RS_DECODER* decoder);
+
+/// Allocates a new `ENCODING_RS_ENCODER` for the given `ENCODING_RS_ENCODING` on the heap and returns a
+/// pointer to the newly-allocated `ENCODING_RS_ENCODER`. (Exception, if the `ENCODING_RS_ENCODING` is
+/// `replacement`, a new `ENCODING_RS_DECODER` for UTF-8 is instantiated (and that
+/// `ENCODING_RS_DECODER` reports `UTF_8` as its `ENCODING_RS_ENCODING`).
+///
+/// Once the allocated `ENCODING_RS_ENCODER` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `encoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODER* encoding_new_encoder(ENCODING_RS_ENCODING const* encoding);
+
+/// Allocates a new `ENCODING_RS_ENCODER` for the given `ENCODING_RS_ENCODING` into memory provided by
+/// the caller. (In practice, the target should likely be a pointer previously
+/// returned by `encoding_new_encoder()`.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+void encoding_new_encoder_into(ENCODING_RS_ENCODING const* encoding, ENCODING_RS_ENCODER* encoder);
+
+/// Validates UTF-8.
+///
+/// Returns the index of the first byte that makes the input malformed as
+/// UTF-8 or `buffer_len` if `buffer` is entirely valid.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+
+/// Validates ASCII.
+///
+/// Returns the index of the first byte that makes the input malformed as
+/// ASCII or `buffer_len` if `buffer` is entirely valid.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+size_t encoding_ascii_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+
+/// Validates ISO-2022-JP ASCII-state data.
+///
+/// Returns the index of the first byte that makes the input not representable
+/// in the ASCII state of ISO-2022-JP or `buffer_len` if `buffer` is entirely
+/// representable in the ASCII state of ISO-2022-JP.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+size_t encoding_iso_2022_jp_ascii_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+
+/// Deallocates a `ENCODING_RS_DECODER` previously allocated by `encoding_new_decoder()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+void decoder_free(ENCODING_RS_DECODER* decoder);
+
+/// The `ENCODING_RS_ENCODING` this `ENCODING_RS_DECODER` is for.
+///
+/// BOM sniffing can change the return value of this method during the life
+/// of the decoder.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODING const* decoder_encoding(ENCODING_RS_DECODER const* decoder);
+
+/// Query the worst-case UTF-8 output size _with replacement_.
+///
+/// Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes when decoding with
+/// errors handled by outputting a REPLACEMENT CHARACTER for each malformed
+/// sequence or `SIZE_MAX` if `size_t` would overflow.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+size_t decoder_max_utf8_buffer_length(ENCODING_RS_DECODER const* decoder, size_t byte_length);
+
+/// Query the worst-case UTF-8 output size _without replacement_.
+///
+/// Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes when decoding without
+/// replacement error handling or `SIZE_MAX` if `size_t` would overflow.
+///
+/// Note that this value may be too small for the `_with_replacement` case.
+/// Use `decoder_max_utf8_buffer_length()` for that case.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+size_t decoder_max_utf8_buffer_length_without_replacement(ENCODING_RS_DECODER const* decoder, size_t byte_length);
+
+/// Incrementally decode a byte stream into UTF-8 with malformed sequences
+/// replaced with the REPLACEMENT CHARACTER.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf8(ENCODING_RS_DECODER* decoder, uint8_t const* src, size_t* src_len, uint8_t* dst, size_t* dst_len, bool last, bool* had_replacements);
+
+/// Incrementally decode a byte stream into UTF-8 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf8_without_replacement(ENCODING_RS_DECODER* decoder, uint8_t const* src, size_t* src_len, uint8_t* dst, size_t* dst_len, bool last);
+
+/// Query the worst-case UTF-16 output size (with or without replacement).
+///
+/// Returns the size of the output buffer in UTF-16 code units (`char16_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes or `SIZE_MAX` if `size_t`
+/// would overflow.
+///
+/// Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the
+/// return value of this method applies also in the
+/// `_without_replacement` case.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+size_t decoder_max_utf16_buffer_length(ENCODING_RS_DECODER const* decoder, size_t u16_length);
+
+/// Incrementally decode a byte stream into UTF-16 with malformed sequences
+/// replaced with the REPLACEMENT CHARACTER.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf16(ENCODING_RS_DECODER* decoder, uint8_t const* src, size_t* src_len, char16_t* dst, size_t* dst_len, bool last, bool* had_replacements);
+
+/// Incrementally decode a byte stream into UTF-16 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_DECODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+uint32_t decoder_decode_to_utf16_without_replacement(ENCODING_RS_DECODER* decoder, uint8_t const* src, size_t* src_len, char16_t* dst, size_t* dst_len, bool last);
+
+/// Deallocates an `ENCODING_RS_ENCODER` previously allocated by `encoding_new_encoder()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+void encoder_free(ENCODING_RS_ENCODER* encoder);
+
+/// The `ENCODING_RS_ENCODING` this `ENCODING_RS_ENCODER` is for.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+ENCODING_RS_ENCODING const* encoder_encoding(ENCODING_RS_ENCODER const* encoder);
+
+/// Returns `true` if this is an ISO-2022-JP encoder that's not in the
+/// ASCII state and `false` otherwise.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+bool encoder_has_pending_state(ENCODING_RS_ENCODER const* encoder);
+
+/// Query the worst-case output size when encoding from UTF-8 with
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `byte_length` number of
+/// additional input code units if there are no unmappable characters in
+/// the input or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf8_if_no_unmappables(ENCODING_RS_ENCODER const* encoder, size_t byte_length);
+
+/// Query the worst-case output size when encoding from UTF-8 without
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `byte_length` number of
+/// additional input code units or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf8_without_replacement(ENCODING_RS_ENCODER const* encoder, size_t byte_length);
+
+/// Incrementally encode into byte stream from UTF-8 with unmappable
+/// characters replaced with HTML (decimal) numeric character references.
+///
+/// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe!
+/// If in doubt, check the validity of input before using!
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf8(ENCODING_RS_ENCODER* encoder, uint8_t const* src, size_t* src_len, uint8_t* dst, size_t* dst_len, bool last, bool* had_replacements);
+
+/// Incrementally encode into byte stream from UTF-8 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe!
+/// If in doubt, check the validity of input before using!
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf8_without_replacement(ENCODING_RS_ENCODER* encoder, uint8_t const* src, size_t* src_len, uint8_t* dst, size_t* dst_len, bool last);
+
+/// Query the worst-case output size when encoding from UTF-16 with
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `u16_length` number of
+/// additional input code units if there are no unmappable characters in
+/// the input or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf16_if_no_unmappables(ENCODING_RS_ENCODER const* encoder, size_t u16_length);
+
+/// Query the worst-case output size when encoding from UTF-16 without
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `u16_length` number of
+/// additional input code units or `SIZE_MAX` if `size_t` would overflow.
+size_t encoder_max_buffer_length_from_utf16_without_replacement(ENCODING_RS_ENCODER const* encoder, size_t u16_length);
+
+/// Incrementally encode into byte stream from UTF-16 with unmappable
+/// characters replaced with HTML (decimal) numeric character references.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf16(ENCODING_RS_ENCODER* encoder, char16_t const* src, size_t* src_len, uint8_t* dst, size_t* dst_len, bool last, bool* had_replacements);
+
+/// Incrementally encode into byte stream from UTF-16 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`ENCODING_RS_ENCODER`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+uint32_t encoder_encode_from_utf16_without_replacement(ENCODING_RS_ENCODER* encoder, char16_t const* src, size_t* src_len, uint8_t* dst, size_t* dst_len, bool last);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/include/encoding_rs_cpp.h
@@ -0,0 +1,1296 @@
+// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#pragma once
+
+#ifndef encoding_rs_cpp_h_
+#define encoding_rs_cpp_h_
+
+#include "gsl/gsl"
+#include <experimental/optional>
+#include <memory>
+#include <string>
+#include <tuple>
+
+#include "encoding_rs.h"
+
+class Encoding;
+
+/**
+ * A converter that decodes a byte stream into Unicode according to a
+ * character encoding in a streaming (incremental) manner.
+ *
+ * The various `decode_*` methods take an input buffer (`src`) and an output
+ * buffer `dst` both of which are caller-allocated. There are variants for
+ * both UTF-8 and UTF-16 output buffers.
+ *
+ * A `decode_*` method decodes bytes from `src` into Unicode characters stored
+ * into `dst` until one of the following three things happens:
+ *
+ * 1. A malformed byte sequence is encountered (`*_without_replacement`
+ *    variants only).
+ *
+ * 2. The output buffer has been filled so near capacity that the decoder
+ *    cannot be sure that processing an additional byte of input wouldn't
+ *    cause so much output that the output buffer would overflow.
+ *
+ * 3. All the input bytes have been processed.
+ *
+ * The `decode_*` method then returns tuple of a status indicating which one
+ * of the three reasons to return happened, how many input bytes were read,
+ * how many output code units (`uint8_t` when decoding into UTF-8 and `char16_t`
+ * when decoding to UTF-16) were written, and in the case of the
+ * variants performing replacement, a boolean indicating whether an error was
+ * replaced with the REPLACEMENT CHARACTER during the call.
+ *
+ * The number of bytes "written" is what's logically written. Garbage may be
+ * written in the output buffer beyond the point logically written to.
+ *
+ * In the case of the `*_without_replacement` variants, the status is a
+ * `uint32_t` whose possible values are packed info about a malformed byte
+ * sequence, `OUTPUT_FULL` and `INPUT_EMPTY` corresponding to the three cases
+ * listed above).
+ *
+ * Packed info about malformed sequences has the following format:
+ * The lowest 8 bits, which can have the decimal value 0, 1, 2 or 3,
+ * indicate the number of bytes that were consumed after the malformed
+ * sequence and whose next-lowest 8 bits, when shifted right by 8 indicate
+ * the length of the malformed byte sequence (possible decimal values 1, 2,
+ * 3 or 4). The maximum possible sum of the two is 6.
+ *
+ * In the case of methods whose name does not end with
+ * `*_without_replacement`, malformed sequences are automatically replaced
+ * with the REPLACEMENT CHARACTER and errors do not cause the methods to
+ * return early.
+ *
+ * When decoding to UTF-8, the output buffer must have at least 4 bytes of
+ * space. When decoding to UTF-16, the output buffer must have at least two
+ * UTF-16 code units (`char16_t`) of space.
+ *
+ * When decoding to UTF-8 without replacement, the methods are guaranteed
+ * not to return indicating that more output space is needed if the length
+ * of the output buffer is at least the length returned by
+ * `max_utf8_buffer_length_without_replacement()`. When decoding to UTF-8
+ * with replacement, the length of the output buffer that guarantees the
+ * methods not to return indicating that more output space is needed is given
+ * by `max_utf8_buffer_length()`. When decoding to UTF-16 with
+ * or without replacement, the length of the output buffer that guarantees
+ * the methods not to return indicating that more output space is needed is
+ * given by `max_utf16_buffer_length()`.
+ *
+ * The output written into `dst` is guaranteed to be valid UTF-8 or UTF-16,
+ * and the output after each `decode_*` call is guaranteed to consist of
+ * complete characters. (I.e. the code unit sequence for the last character is
+ * guaranteed not to be split across output buffers.)
+ *
+ * The boolean argument `last` indicates that the end of the stream is reached
+ * when all the bytes in `src` have been consumed.
+ *
+ * A `Decoder` object can be used to incrementally decode a byte stream.
+ *
+ * During the processing of a single stream, the caller must call `decode_*`
+ * zero or more times with `last` set to `false` and then call `decode_*` at
+ * least once with `last` set to `true`. If `decode_*` returns `INPUT_EMPTY`,
+ * the processing of the stream has ended. Otherwise, the caller must call
+ * `decode_*` again with `last` set to `true` (or treat a malformed result,
+ * i.e. neither `INPUT_EMPTY` nor `OUTPUT_FULL`, as a fatal error).
+ *
+ * Once the stream has ended, the `Decoder` object must not be used anymore.
+ * That is, you need to create another one to process another stream.
+ *
+ * When the decoder returns `OUTPUT_FULL` or the decoder returns a malformed
+ * result and the caller does not wish to treat it as a fatal error, the input
+ * buffer `src` may not have been completely consumed. In that case, the caller
+ * must pass the unconsumed contents of `src` to `decode_*` again upon the next
+ * call.
+ *
+ * # Infinite loops
+ *
+ * When converting with a fixed-size output buffer whose size is too small to
+ * accommodate one character of output, an infinite loop ensues. When
+ * converting with a fixed-size output buffer, it generally makes sense to
+ * make the buffer fairly large (e.g. couple of kilobytes).
+ */
+class Decoder final
+{
+public:
+  ~Decoder() {}
+  static void operator delete(void* decoder)
+  {
+    decoder_free(reinterpret_cast<Decoder*>(decoder));
+  }
+
+  /**
+   * The `Encoding` this `Decoder` is for.
+   *
+   * BOM sniffing can change the return value of this method during the life
+   * of the decoder.
+   */
+  inline gsl::not_null<const Encoding*> encoding() const
+  {
+    return decoder_encoding(this);
+  }
+
+  /**
+   * Query the worst-case UTF-8 output size _with replacement_.
+   *
+   * Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+   * that will not overflow given the current state of the decoder and
+   * `byte_length` number of additional input bytes when decoding with
+   * errors handled by outputting a REPLACEMENT CHARACTER for each malformed
+   * sequence or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline size_t max_utf8_buffer_length(size_t byte_length) const
+  {
+    return decoder_max_utf8_buffer_length(this, byte_length);
+  }
+
+  /**
+   * Query the worst-case UTF-8 output size _without replacement_.
+   *
+   * Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+   * that will not overflow given the current state of the decoder and
+   * `byte_length` number of additional input bytes when decoding without
+   * replacement error handling or `SIZE_MAX` if `size_t` would overflow.
+   *
+   * Note that this value may be too small for the `_with_replacement` case.
+   * Use `max_utf8_buffer_length()` for that case.
+   */
+  inline size_t max_utf8_buffer_length_without_replacement(
+    size_t byte_length) const
+  {
+    return decoder_max_utf8_buffer_length_without_replacement(this,
+                                                              byte_length);
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-8 with malformed sequences
+   * replaced with the REPLACEMENT CHARACTER.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> decode_to_utf8(
+    gsl::span<const uint8_t> src, gsl::span<uint8_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result =
+      decoder_decode_to_utf8(this, src.data(), &src_read, dst.data(),
+                             &dst_written, last, &had_replacements);
+    return std::make_tuple(result, src_read, dst_written, had_replacements);
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-8 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  decode_to_utf8_without_replacement(gsl::span<const uint8_t> src,
+                                     gsl::span<uint8_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = decoder_decode_to_utf8_without_replacement(
+      this, src.data(), &src_read, dst.data(), &dst_written, last);
+    return std::make_tuple(result, src_read, dst_written);
+  }
+
+  /**
+   * Query the worst-case UTF-16 output size (with or without replacement).
+   *
+   * Returns the size of the output buffer in UTF-16 code units (`char16_t`)
+   * that will not overflow given the current state of the decoder and
+   * `byte_length` number of additional input bytes or `SIZE_MAX` if
+   * `size_t` would overflow.
+   *
+   * Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the
+   * return value of this method applies also in the
+   * `_without_replacement` case.
+   */
+  inline size_t max_utf16_buffer_length(size_t u16_length) const
+  {
+    return decoder_max_utf16_buffer_length(this, u16_length);
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-16 with malformed sequences
+   * replaced with the REPLACEMENT CHARACTER.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> decode_to_utf16(
+    gsl::span<const uint8_t> src, gsl::span<char16_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result =
+      decoder_decode_to_utf16(this, src.data(), &src_read, dst.data(),
+                              &dst_written, last, &had_replacements);
+    return std::make_tuple(result, src_read, dst_written, had_replacements);
+  }
+
+  /**
+   * Incrementally decode a byte stream into UTF-16 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `decode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  decode_to_utf16_without_replacement(gsl::span<const uint8_t> src,
+                                      gsl::span<char16_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = decoder_decode_to_utf16_without_replacement(
+      this, src.data(), &src_read, dst.data(), &dst_written, last);
+    return std::make_tuple(result, src_read, dst_written);
+  }
+
+private:
+  Decoder() = delete;
+};
+
+/**
+ * A converter that encodes a Unicode stream into bytes according to a
+ * character encoding in a streaming (incremental) manner.
+ *
+ * The various `encode_*` methods take an input buffer (`src`) and an output
+ * buffer `dst` both of which are caller-allocated. There are variants for
+ * both UTF-8 and UTF-16 input buffers.
+ *
+ * An `encode_*` method encode characters from `src` into bytes characters
+ * stored into `dst` until one of the following three things happens:
+ *
+ * 1. An unmappable character is encountered (`*_without_replacement` variants
+ *    only).
+ *
+ * 2. The output buffer has been filled so near capacity that the decoder
+ *    cannot be sure that processing an additional character of input wouldn't
+ *    cause so much output that the output buffer would overflow.
+ *
+ * 3. All the input characters have been processed.
+ *
+ * The `encode_*` method then returns tuple of a status indicating which one
+ * of the three reasons to return happened, how many input code units (`uint8_t`
+ * when encoding from UTF-8 and `char16_t` when encoding from UTF-16) were read,
+ * how many output bytes were written, and in the case of the variants that
+ * perform replacement, a boolean indicating whether an unmappable
+ * character was replaced with a numeric character reference during the call.
+ *
+ * The number of bytes "written" is what's logically written. Garbage may be
+ * written in the output buffer beyond the point logically written to.
+ *
+ * In the case of the methods whose name ends with
+ * `*_without_replacement`, the status is a `uint32_t` whose possible values
+ * are an unmappable code point, `OUTPUT_FULL` and `INPUT_EMPTY` corresponding
+ * to the three cases listed above).
+ *
+ * In the case of methods whose name does not end with
+ * `*_without_replacement`, unmappable characters are automatically replaced
+ * with the corresponding numeric character references and unmappable
+ * characters do not cause the methods to return early.
+ *
+ * When encoding from UTF-8 without replacement, the methods are guaranteed
+ * not to return indicating that more output space is needed if the length
+ * of the output buffer is at least the length returned by
+ * `max_buffer_length_from_utf8_without_replacement()`. When encoding from
+ * UTF-8 with replacement, the length of the output buffer that guarantees the
+ * methods not to return indicating that more output space is needed in the
+ * absence of unmappable characters is given by
+ * `max_buffer_length_from_utf8_if_no_unmappables()`. When encoding from
+ * UTF-16 without replacement, the methods are guaranteed not to return
+ * indicating that more output space is needed if the length of the output
+ * buffer is at least the length returned by
+ * `max_buffer_length_from_utf16_without_replacement()`. When encoding
+ * from UTF-16 with replacement, the the length of the output buffer that
+ * guarantees the methods not to return indicating that more output space is
+ * needed in the absence of unmappable characters is given by
+ * `max_buffer_length_from_utf16_if_no_unmappables()`.
+ * When encoding with replacement, applications are not expected to size the
+ * buffer for the worst case ahead of time but to resize the buffer if there
+ * are unmappable characters. This is why max length queries are only available
+ * for the case where there are no unmappable characters.
+ *
+ * When encoding from UTF-8, each `src` buffer _must_ be valid UTF-8. When
+ * encoding from UTF-16, unpaired surrogates in the input are treated as U+FFFD
+ * REPLACEMENT CHARACTERS. Therefore, in order for astral characters not to
+ * turn into a pair of REPLACEMENT CHARACTERS, the caller must ensure that
+ * surrogate pairs are not split across input buffer boundaries.
+ *
+ * After an `encode_*` call returns, the output produced so far, taken as a
+ * whole from the start of the stream, is guaranteed to consist of a valid
+ * byte sequence in the target encoding. (I.e. the code unit sequence for a
+ * character is guaranteed not to be split across output buffers. However, due
+ * to the stateful nature of ISO-2022-JP, the stream needs to be considered
+ * from the start for it to be valid. For other encodings, the validity holds
+ * on a per-output buffer basis.)
+ *
+ * The boolean argument `last` indicates that the end of the stream is reached
+ * when all the characters in `src` have been consumed. This argument is needed
+ * for ISO-2022-JP and is ignored for other encodings.
+ *
+ * An `Encoder` object can be used to incrementally encode a byte stream.
+ *
+ * During the processing of a single stream, the caller must call `encode_*`
+ * zero or more times with `last` set to `false` and then call `encode_*` at
+ * least once with `last` set to `true`. If `encode_*` returns `INPUT_EMPTY`,
+ * the processing of the stream has ended. Otherwise, the caller must call
+ * `encode_*` again with `last` set to `true` (or treat an unmappable result,
+ * i.e. neither `INPUT_EMPTY` nor `OUTPUT_FULL`, as a fatal error).
+ *
+ * Once the stream has ended, the `Encoder` object must not be used anymore.
+ * That is, you need to create another one to process another stream.
+ *
+ * When the encoder returns `OUTPUT_FULL` or the encoder returns an unmappable
+ * result and the caller does not wish to treat it as a fatal error, the input
+ * buffer `src` may not have been completely consumed. In that case, the caller
+ * must pass the unconsumed contents of `src` to `encode_*` again upon the next
+ * call.
+ *
+ * # Infinite loops
+ *
+ * When converting with a fixed-size output buffer whose size is too small to
+ * accommodate one character of output, an infinite loop ensues. When
+ * converting with a fixed-size output buffer, it generally makes sense to
+ * make the buffer fairly large (e.g. couple of kilobytes).
+ */
+class Encoder final
+{
+public:
+  ~Encoder() {}
+
+  static void operator delete(void* encoder)
+  {
+    encoder_free(reinterpret_cast<Encoder*>(encoder));
+  }
+
+  /**
+   * The `Encoding` this `Encoder` is for.
+   */
+  inline gsl::not_null<const Encoding*> encoding() const
+  {
+    return encoder_encoding(this);
+  }
+
+  /**
+   * Returns `true` if this is an ISO-2022-JP encoder that's not in the
+   * ASCII state and `false` otherwise.
+   */
+  inline bool has_pending_state() const
+  {
+    return encoder_has_pending_state(this);
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-8 with
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `byte_length` number of
+   * additional input code units if there are no unmappable characters in
+   * the input or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline size_t max_buffer_length_from_utf8_if_no_unmappables(
+    size_t byte_length) const
+  {
+    return encoder_max_buffer_length_from_utf8_if_no_unmappables(this,
+                                                                 byte_length);
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-8 without
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `byte_length` number of
+   * additional input code units or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline size_t max_buffer_length_from_utf8_without_replacement(
+    size_t byte_length) const
+  {
+    return encoder_max_buffer_length_from_utf8_without_replacement(this,
+                                                                   byte_length);
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-8 with unmappable
+   * characters replaced with HTML (decimal) numeric character references.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> encode_from_utf8(
+    gsl::span<const uint8_t> src, gsl::span<uint8_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result =
+      encoder_encode_from_utf8(this, src.data(), &src_read, dst.data(),
+                               &dst_written, last, &had_replacements);
+    return std::make_tuple(result, src_read, dst_written, had_replacements);
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-8 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  encode_from_utf8_without_replacement(gsl::span<const uint8_t> src,
+                                       gsl::span<uint8_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = encoder_encode_from_utf8_without_replacement(
+      this, src.data(), &src_read, dst.data(), &dst_written, last);
+    return std::make_tuple(result, src_read, dst_written);
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-16 with
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `u16_length` number of
+   * additional input code units if there are no unmappable characters in
+   * the input or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline size_t max_buffer_length_from_utf16_if_no_unmappables(
+    size_t u16_length) const
+  {
+    return encoder_max_buffer_length_from_utf16_if_no_unmappables(this,
+                                                                  u16_length);
+  }
+
+  /**
+   * Query the worst-case output size when encoding from UTF-16 without
+   * replacement.
+   *
+   * Returns the size of the output buffer in bytes that will not overflow
+   * given the current state of the encoder and `u16_length` number of
+   * additional input code units or `SIZE_MAX` if `size_t` would overflow.
+   */
+  inline size_t max_buffer_length_from_utf16_without_replacement(
+    size_t u16_length) const
+  {
+    return encoder_max_buffer_length_from_utf16_without_replacement(this,
+                                                                    u16_length);
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-16 with unmappable
+   * characters replaced with HTML (decimal) numeric character references.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t, bool> encode_from_utf16(
+    gsl::span<const char16_t> src, gsl::span<uint8_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    bool had_replacements;
+    uint32_t result =
+      encoder_encode_from_utf16(this, src.data(), &src_read, dst.data(),
+                                &dst_written, last, &had_replacements);
+    return std::make_tuple(result, src_read, dst_written, had_replacements);
+  }
+
+  /**
+   * Incrementally encode into byte stream from UTF-16 _without replacement_.
+   *
+   * See the documentation of the class for documentation for `encode_*`
+   * methods collectively.
+   */
+  inline std::tuple<uint32_t, size_t, size_t>
+  encode_from_utf16_without_replacement(gsl::span<const char16_t> src,
+                                        gsl::span<uint8_t> dst, bool last)
+  {
+    size_t src_read = src.size();
+    size_t dst_written = dst.size();
+    uint32_t result = encoder_encode_from_utf16_without_replacement(
+      this, src.data(), &src_read, dst.data(), &dst_written, last);
+    return std::make_tuple(result, src_read, dst_written);
+  }
+
+private:
+  Encoder() = delete;
+};
+
+/**
+ * An encoding as defined in the Encoding Standard
+ * (https://encoding.spec.whatwg.org/).
+ *
+ * An _encoding_ defines a mapping from a byte sequence to a Unicode code point
+ * sequence and, in most cases, vice versa. Each encoding has a name, an output
+ * encoding, and one or more labels.
+ *
+ * _Labels_ are ASCII-case-insensitive strings that are used to identify an
+ * encoding in formats and protocols. The _name_ of the encoding is the
+ * preferred label in the case appropriate for returning from the
+ * `characterSet` property of the `Document` DOM interface, except for
+ * the replacement encoding whose name is not one of its labels.
+ *
+ * The _output encoding_ is the encoding used for form submission and URL
+ * parsing on Web pages in the encoding. This is UTF-8 for the replacement,
+ * UTF-16LE and UTF-16BE encodings and the encoding itself for other
+ * encodings.
+ *
+ * # Streaming vs. Non-Streaming
+ *
+ * When you have the entire input in a single buffer, you can use the
+ * methods `decode()`, `decode_with_bom_removal()`,
+ * `decode_without_bom_handling()`,
+ * `decode_without_bom_handling_and_without_replacement()` and
+ * `encode()`. Unlike the rest of the API, these methods perform heap
+ * allocations. You should the `Decoder` and `Encoder` objects when your input
+ * is split into multiple buffers or when you want to control the allocation of
+ * the output buffers.
+ *
+ * # Instances
+ *
+ * All instances of `Encoding` are statically allocated and have the process's
+ * lifetime. There is precisely one unique `Encoding` instance for each
+ * encoding defined in the Encoding Standard.
+ *
+ * To obtain a reference to a particular encoding whose identity you know at
+ * compile time, use a `static` that refers to encoding. There is a `static`
+ * for each encoding. The `static`s are named in all caps with hyphens
+ * replaced with underscores and with `_ENCODING` appended to the
+ * name. For example, if you know at compile time that you will want to
+ * decode using the UTF-8 encoding, use the `UTF_8_ENCODING` `static`.
+ *
+ * If you don't know what encoding you need at compile time and need to
+ * dynamically get an encoding by label, use `Encoding::for_label()`.
+ *
+ * Instances of `Encoding` can be compared with `==`.
+ */
+class Encoding final
+{
+public:
+  /**
+   * Implements the _get an encoding_ algorithm
+   * (https://encoding.spec.whatwg.org/#concept-encoding-get).
+   *
+   * If, after ASCII-lowercasing and removing leading and trailing
+   * whitespace, the argument matches a label defined in the Encoding
+   * Standard, `const Encoding*` representing the corresponding
+   * encoding is returned. If there is no match, `nullptr` is returned.
+   *
+   * This is the right method to use if the action upon the method returning
+   * `nullptr` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`)
+   * instead. When the action upon the method returning `nullptr` is not to
+   * proceed with a fallback but to refuse processing,
+   * `for_label_no_replacement()` is more appropriate.
+   */
+  static inline const Encoding* for_label(gsl::cstring_span<> label)
+  {
+    return encoding_for_label(reinterpret_cast<const uint8_t*>(label.data()),
+                              label.length());
+  }
+
+  /**
+   * This method behaves the same as `for_label()`, except when `for_label()`
+   * would return `REPLACEMENT_ENCODING`, this method returns `nullptr` instead.
+   *
+   * This method is useful in scenarios where a fatal error is required
+   * upon invalid label, because in those cases the caller typically wishes
+   * to treat the labels that map to the replacement encoding as fatal
+   * errors, too.
+   *
+   * It is not OK to use this method when the action upon the method returning
+   * `nullptr` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`). In
+   * such a case, the `for_label()` method should be used instead in order to avoid
+   * unsafe fallback for labels that `for_label()` maps to `REPLACEMENT_ENCODING`.
+   */
+  static inline const Encoding* for_label_no_replacement(
+    gsl::cstring_span<> label)
+  {
+    return encoding_for_label_no_replacement(
+      reinterpret_cast<const uint8_t*>(label.data()), label.length());
+  }
+
+  /**
+   * Performs non-incremental BOM sniffing.
+   *
+   * The argument must either be a buffer representing the entire input
+   * stream (non-streaming case) or a buffer representing at least the first
+   * three bytes of the input stream (streaming case).
+   *
+   * Returns `make_tuple(UTF_8_ENCODING, 3)`, `make_tuple(UTF_16LE_ENCODING, 2)`
+   * or `make_tuple(UTF_16BE_ENCODING, 3)` if the argument starts with the
+   * UTF-8, UTF-16LE or UTF-16BE BOM or `make_tuple(nullptr, 0)` otherwise.
+   */
+  static inline std::tuple<const Encoding*, size_t> for_bom(
+    gsl::span<const uint8_t> buffer)
+  {
+    size_t len = buffer.size();
+    const Encoding* encoding = encoding_for_bom(buffer.data(), &len);
+    return std::make_tuple(encoding, len);
+  }
+
+  /**
+   * If the argument matches exactly (case-sensitively; no whitespace
+   * removal performed) the name of an encoding, returns
+   * `const Encoding*` representing that encoding. Otherwise panics.
+   *
+   * The motivating use case for this method is interoperability with
+   * legacy Gecko code that represents encodings as name string instead of
+   * type-safe `Encoding` objects. Using this method for other purposes is
+   * most likely the wrong thing to do.
+   *
+   * # Panics
+   *
+   * Panics if the argument is not the name of an encoding.
+   */
+  static inline gsl::not_null<const Encoding*> for_name(
+    gsl::cstring_span<> name)
+  {
+    return encoding_for_name(reinterpret_cast<const uint8_t*>(name.data()),
+                             name.length());
+  }
+
+  /**
+   * Returns the name of this encoding.
+   *
+   * This name is appropriate to return as-is from the DOM
+   * `document.characterSet` property.
+   */
+  inline std::string name() const
+  {
+    std::string name(ENCODING_NAME_MAX_LENGTH, '\0');
+    // http://herbsutter.com/2008/04/07/cringe-not-vectors-are-guaranteed-to-be-contiguous/#comment-483
+    size_t length = encoding_name(this, reinterpret_cast<uint8_t*>(&name[0]));
+    name.resize(length);
+    return name;
+  }
+
+  /**
+   * Checks whether the _output encoding_ of this encoding can encode every
+   * Unicode code point. (Only true if the output encoding is UTF-8.)
+   */
+  inline bool can_encode_everything() const
+  {
+    return encoding_can_encode_everything(this);
+  }
+
+  /**
+   * Checks whether the bytes 0x00...0x7F map exclusively to the characters
+   * U+0000...U+007F and vice versa.
+   */
+  inline bool is_ascii_compatible() const
+  {
+    return encoding_is_ascii_compatible(this);
+  }
+
+  /**
+   * Returns the _output encoding_ of this encoding. This is UTF-8 for
+   * UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
+   */
+  inline gsl::not_null<const Encoding*> output_encoding() const
+  {
+    return encoding_output_encoding(this);
+  }
+
+  /**
+   * Decode complete input to `std::string` _with BOM sniffing_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _decode_ (https://encoding.spec.whatwg.org/#decode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to BOM sniffing).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_decoder()`
+   * when decoding segmented input.
+   */
+  inline std::tuple<std::string, const Encoding*, bool> decode(
+    gsl::span<const uint8_t> bytes) const
+  {
+    const Encoding* encoding;
+    size_t bom_length;
+    std::tie(encoding, bom_length) = Encoding::for_bom(bytes);
+    if (encoding) {
+      bytes = bytes.subspan(bom_length);
+    } else {
+      encoding = this;
+    }
+    bool had_errors;
+    std::string str;
+    std::tie(str, had_errors) = encoding->decode_without_bom_handling(bytes);
+    return std::make_tuple(str, encoding, had_errors);
+  }
+
+  /**
+   * Decode complete input to `std::string` _with BOM removal_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_with_bom_removal()` when decoding segmented input.
+   */
+  inline std::tuple<std::string, bool> decode_with_bom_removal(
+    gsl::span<const uint8_t> bytes) const
+  {
+    if (this == UTF_8_ENCODING && bytes.size() >= 3 &&
+        (gsl::as_bytes(bytes.first<3>()) ==
+         gsl::as_bytes(gsl::make_span("\xEF\xBB\xBF")))) {
+      bytes = bytes.subspan(3, bytes.size() - 3);
+    } else if (this == UTF_16LE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFF\xFE")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    } else if (this == UTF_16BE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFE\xFF")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    }
+    return decode_without_bom_handling(bytes);
+  }
+
+  /**
+   * Decode complete input to `std::string` _without BOM handling_ and
+   * with malformed sequences replaced with the REPLACEMENT CHARACTER when
+   * the entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::tuple<std::string, bool> decode_without_bom_handling(
+    gsl::span<const uint8_t> bytes) const
+  {
+    auto decoder = new_decoder_without_bom_handling();
+    size_t needed = decoder->max_utf8_buffer_length(bytes.size());
+    if (needed == SIZE_MAX) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::string string(needed, '\0');
+    uint32_t result;
+    size_t read;
+    size_t written;
+    bool had_errors;
+    std::tie(result, read, written, had_errors) = decoder->decode_to_utf8(
+      bytes,
+      gsl::make_span(reinterpret_cast<uint8_t*>(&string[0]), string.size()),
+      true);
+    assert(read == static_cast<size_t>(bytes.size()));
+    assert(written <= static_cast<size_t>(string.size()));
+    assert(result == INPUT_EMPTY);
+    string.resize(written);
+    return std::make_tuple(string, had_errors);
+  }
+
+  /**
+   * Decode complete input to `std::string` _without BOM handling_ and
+   * _with malformed sequences treated as fatal_ when the entire input is
+   * available as a single buffer (i.e. the end of the buffer marks the end
+   * of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM or fail_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail)
+   * spec concept.
+   *
+   * Returns `None` if a malformed sequence was encountered and the result
+   * of the decode as `Some(String)` otherwise.
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::experimental::optional<std::string>
+  decode_without_bom_handling_and_without_replacement(
+    gsl::span<const uint8_t> bytes) const
+  {
+    auto decoder = new_decoder_without_bom_handling();
+    size_t needed = decoder->max_utf8_buffer_length_without_replacement(bytes.size());
+    if (needed == SIZE_MAX) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::string string(needed, '\0');
+    uint32_t result;
+    size_t read;
+    size_t written;
+    std::tie(result, read, written) =
+      decoder->decode_to_utf8_without_replacement(
+        bytes,
+        gsl::make_span(reinterpret_cast<uint8_t*>(&string[0]), string.size()),
+        true);
+    assert(result != OUTPUT_FULL);
+    if (result == INPUT_EMPTY) {
+      assert(read == static_cast<size_t>(bytes.size()));
+      assert(written <= static_cast<size_t>(string.size()));
+      string.resize(written);
+      return string;
+    }
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _with BOM sniffing_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _decode_ (https://encoding.spec.whatwg.org/#decode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to BOM sniffing).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_decoder()`
+   * when decoding segmented input.
+   */
+  inline std::tuple<std::u16string, const Encoding*, bool> decode16(
+    gsl::span<const uint8_t> bytes) const
+  {
+    const Encoding* encoding;
+    size_t bom_length;
+    std::tie(encoding, bom_length) = Encoding::for_bom(bytes);
+    if (encoding) {
+      bytes = bytes.subspan(bom_length);
+    } else {
+      encoding = this;
+    }
+    bool had_errors;
+    std::u16string str;
+    std::tie(str, had_errors) = encoding->decode16_without_bom_handling(bytes);
+    return std::make_tuple(str, encoding, had_errors);
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _with BOM removal_ and with
+   * malformed sequences replaced with the REPLACEMENT CHARACTER when the
+   * entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_with_bom_removal()` when decoding segmented input.
+   */
+  inline std::tuple<std::u16string, bool> decode16_with_bom_removal(
+    gsl::span<const uint8_t> bytes) const
+  {
+    if (this == UTF_8_ENCODING && bytes.size() >= 3 &&
+        (gsl::as_bytes(bytes.first<3>()) ==
+         gsl::as_bytes(gsl::make_span("\xEF\xBB\xBF")))) {
+      bytes = bytes.subspan(3, bytes.size() - 3);
+    } else if (this == UTF_16LE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFF\xFE")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    } else if (this == UTF_16BE_ENCODING && bytes.size() >= 2 &&
+               (gsl::as_bytes(bytes.first<2>()) ==
+                gsl::as_bytes(gsl::make_span("\xFE\xFF")))) {
+      bytes = bytes.subspan(2, bytes.size() - 2);
+    }
+    return decode16_without_bom_handling(bytes);
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _without BOM handling_ and
+   * with malformed sequences replaced with the REPLACEMENT CHARACTER when
+   * the entire input is available as a single buffer (i.e. the end of the
+   * buffer marks the end of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom) spec concept.
+   *
+   * The second item in the returned pair indicates whether there were
+   * malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::tuple<std::u16string, bool> decode16_without_bom_handling(
+    gsl::span<const uint8_t> bytes) const
+  {
+    auto decoder = new_decoder_without_bom_handling();
+    size_t needed = decoder->max_utf16_buffer_length(bytes.size());
+    if (needed == SIZE_MAX) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::u16string string(needed, '\0');
+    uint32_t result;
+    size_t read;
+    size_t written;
+    bool had_errors;
+    std::tie(result, read, written, had_errors) = decoder->decode_to_utf16(
+      bytes,
+      gsl::make_span(&string[0], string.size()),
+      true);
+    assert(read == static_cast<size_t>(bytes.size()));
+    assert(written <= static_cast<size_t>(string.size()));
+    assert(result == INPUT_EMPTY);
+    string.resize(written);
+    return std::make_tuple(string, had_errors);
+  }
+
+  /**
+   * Decode complete input to `std::u16string` _without BOM handling_ and
+   * _with malformed sequences treated as fatal_ when the entire input is
+   * available as a single buffer (i.e. the end of the buffer marks the end
+   * of the stream).
+   *
+   * When invoked on `UTF_8`, this method implements the (non-streaming
+   * version of) the _UTF-8 decode without BOM or fail_
+   * (https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail)
+   * spec concept.
+   *
+   * Returns `None` if a malformed sequence was encountered and the result
+   * of the decode as `Some(String)` otherwise.
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use
+   * `new_decoder_without_bom_handling()` when decoding segmented input.
+   */
+  inline std::experimental::optional<std::u16string>
+  decode16_without_bom_handling_and_without_replacement(
+    gsl::span<const uint8_t> bytes) const
+  {
+    auto decoder = new_decoder_without_bom_handling();
+    size_t needed = decoder->max_utf16_buffer_length(bytes.size());
+    if (needed == SIZE_MAX) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::u16string string(needed, '\0');
+    uint32_t result;
+    size_t read;
+    size_t written;
+    std::tie(result, read, written) =
+      decoder->decode_to_utf16_without_replacement(
+        bytes,
+        gsl::make_span(&string[0], string.size()),
+        true);
+    assert(result != OUTPUT_FULL);
+    if (result == INPUT_EMPTY) {
+      assert(read == static_cast<size_t>(bytes.size()));
+      assert(written <= static_cast<size_t>(string.size()));
+      string.resize(written);
+      return string;
+    }
+  }
+
+  /**
+   * Encode complete input to `std::vector<uint8_t>` with unmappable characters
+   * replaced with decimal numeric character references when the entire input
+   * is available as a single buffer (i.e. the end of the buffer marks the
+   * end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _encode_ (https://encoding.spec.whatwg.org/#encode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to some encodings
+   * having UTF-8 as their output encoding).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * unmappable characters (that were replaced with HTML numeric character
+   * references).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_encoder()`
+   * when encoding segmented output.
+   */
+  inline std::tuple<std::vector<uint8_t>, const Encoding*, bool> encode(
+    gsl::span<const uint8_t> string) const
+  {
+    auto output_enc = output_encoding();
+    if (output_enc == UTF_8_ENCODING) {
+      std::vector<uint8_t> vec(string.size());
+      std::memcpy(&vec[0], string.data(), string.size());
+    }
+    auto encoder = output_enc->new_encoder();
+    size_t needed = encoder->max_buffer_length_from_utf8_if_no_unmappables(string.size());
+    if (needed == SIZE_MAX) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::vector<uint8_t> vec(needed);
+    bool total_had_errors = false;
+    size_t total_read = 0;
+    size_t total_written = 0;
+    uint32_t result;
+    size_t read;
+    size_t written;
+    bool had_errors;
+    for (;;) {
+      std::tie(result, read, written, had_errors) = encoder->encode_from_utf8(
+        gsl::make_span(string).subspan(total_read), gsl::make_span(vec).subspan(total_written), true);
+      total_read += read;
+      total_written += written;
+      total_had_errors |= had_errors;
+      if (result == INPUT_EMPTY) {
+        assert(total_read == static_cast<size_t>(string.size()));
+        assert(total_written <= static_cast<size_t>(vec.size()));
+        vec.resize(total_written);
+        return std::make_tuple(vec, output_enc, total_had_errors);
+      }
+      auto needed = encoder->max_buffer_length_from_utf8_if_no_unmappables(
+        string.size() - total_read);
+      if (needed == SIZE_MAX) {
+        throw std::overflow_error("Overflow in buffer size computation.");
+      }
+      vec.resize(total_written + needed);
+    }
+  }
+
+  /**
+   * Encode complete input to `std::vector<uint8_t>` with unmappable characters
+   * replaced with decimal numeric character references when the entire input
+   * is available as a single buffer (i.e. the end of the buffer marks the
+   * end of the stream).
+   *
+   * This method implements the (non-streaming version of) the
+   * _encode_ (https://encoding.spec.whatwg.org/#encode) spec concept.
+   *
+   * The second item in the returned tuple is the encoding that was actually
+   * used (which may differ from this encoding thanks to some encodings
+   * having UTF-8 as their output encoding).
+   *
+   * The third item in the returned tuple indicates whether there were
+   * unmappable characters (that were replaced with HTML numeric character
+   * references).
+   *
+   * _Note:_ It is wrong to use this when the input buffer represents only
+   * a segment of the input instead of the whole input. Use `new_encoder()`
+   * when encoding segmented output.
+   */
+  inline std::tuple<std::vector<uint8_t>, const Encoding*, bool> encode(
+    gsl::span<const char16_t> string) const
+  {
+    auto output_enc = output_encoding();
+    auto encoder = output_enc->new_encoder();
+    size_t needed = encoder->max_buffer_length_from_utf16_if_no_unmappables(string.size());
+    if (needed == SIZE_MAX) {
+      throw std::overflow_error("Overflow in buffer size computation.");
+    }
+    std::vector<uint8_t> vec(needed);
+    bool total_had_errors = false;
+    size_t total_read = 0;
+    size_t total_written = 0;
+    uint32_t result;
+    size_t read;
+    size_t written;
+    bool had_errors;
+    for (;;) {
+      std::tie(result, read, written, had_errors) = encoder->encode_from_utf16(
+        gsl::make_span(string).subspan(total_read), gsl::make_span(vec).subspan(total_written), true);
+      total_read += read;
+      total_written += written;
+      total_had_errors |= had_errors;
+      if (result == INPUT_EMPTY) {
+        assert(total_read == static_cast<size_t>(string.size()));
+        assert(total_written <= static_cast<size_t>(vec.size()));
+        vec.resize(total_written);
+        return std::make_tuple(vec, output_enc, total_had_errors);
+      }
+      auto needed = encoder->max_buffer_length_from_utf16_if_no_unmappables(
+        string.size() - total_read);
+      if (needed == SIZE_MAX) {
+        throw std::overflow_error("Overflow in buffer size computation.");
+      }
+      vec.resize(total_written + needed);
+    }
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM sniffing enabled.
+   *
+   * BOM sniffing may cause the returned decoder to morph into a decoder
+   * for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+   */
+  inline std::unique_ptr<Decoder> new_decoder() const
+  {
+    std::unique_ptr<Decoder> decoder(encoding_new_decoder(this));
+    return decoder;
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM sniffing enabled
+   * into memory occupied by a previously-instantiated decoder.
+   *
+   * BOM sniffing may cause the returned decoder to morph into a decoder
+   * for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+   */
+  inline void new_decoder_into(Decoder& decoder) const
+  {
+    encoding_new_decoder_into(this, &decoder);
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM removal.
+   *
+   * If the input starts with bytes that are the BOM for this encoding,
+   * those bytes are removed. However, the decoder never morphs into a
+   * decoder for another encoding: A BOM for another encoding is treated as
+   * (potentially malformed) input to the decoding algorithm for this
+   * encoding.
+   */
+  inline std::unique_ptr<Decoder> new_decoder_with_bom_removal() const
+  {
+    std::unique_ptr<Decoder> decoder(
+      encoding_new_decoder_with_bom_removal(this));
+    return decoder;
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM removal
+   * into memory occupied by a previously-instantiated decoder.
+   *
+   * If the input starts with bytes that are the BOM for this encoding,
+   * those bytes are removed. However, the decoder never morphs into a
+   * decoder for another encoding: A BOM for another encoding is treated as
+   * (potentially malformed) input to the decoding algorithm for this
+   * encoding.
+   */
+  inline void new_decoder_with_bom_removal_into(Decoder& decoder) const
+  {
+    encoding_new_decoder_with_bom_removal_into(this, &decoder);
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM handling disabled.
+   *
+   * If the input starts with bytes that look like a BOM, those bytes are
+   * not treated as a BOM. (Hence, the decoder never morphs into a decoder
+   * for another encoding.)
+   *
+   * _Note:_ If the caller has performed BOM sniffing on its own but has not
+   * removed the BOM, the caller should use `new_decoder_with_bom_removal()`
+   * instead of this method to cause the BOM to be removed.
+   */
+  inline std::unique_ptr<Decoder> new_decoder_without_bom_handling() const
+  {
+    std::unique_ptr<Decoder> decoder(
+      encoding_new_decoder_without_bom_handling(this));
+    return decoder;
+  }
+
+  /**
+   * Instantiates a new decoder for this encoding with BOM handling disabled
+   * into memory occupied by a previously-instantiated decoder.
+   *
+   * If the input starts with bytes that look like a BOM, those bytes are
+   * not treated as a BOM. (Hence, the decoder never morphs into a decoder
+   * for another encoding.)
+   *
+   * _Note:_ If the caller has performed BOM sniffing on its own but has not
+   * removed the BOM, the caller should use `new_decoder_with_bom_removal_into()`
+   * instead of this method to cause the BOM to be removed.
+   */
+  inline void new_decoder_without_bom_handling_into(Decoder& decoder) const
+  {
+    encoding_new_decoder_without_bom_handling_into(this, &decoder);
+  }
+
+  /**
+   * Instantiates a new encoder for the output encoding of this encoding.
+   */
+  inline std::unique_ptr<Encoder> new_encoder() const
+  {
+    std::unique_ptr<Encoder> encoder(encoding_new_encoder(this));
+    return encoder;
+  }
+
+  /**
+   * Instantiates a new encoder for the output encoding of this encoding
+   * into memory occupied by a previously-instantiated encoder.
+   */
+  inline void new_encoder_into(Encoder& encoder) const
+  {
+    encoding_new_encoder_into(this, &encoder);
+  }
+
+  /**
+   * Validates UTF-8.
+   *
+   * Returns the index of the first byte that makes the input malformed as
+   * UTF-8 or the length of the input if the input is entirely valid.
+   */
+  static inline size_t utf8_valid_up_to(gsl::span<const uint8_t> buffer)
+  {
+    return encoding_utf8_valid_up_to(buffer.data(), buffer.size());
+  }
+
+  /**
+   * Validates ASCII.
+   *
+   * Returns the index of the first byte that makes the input malformed as
+   * ASCII or the length of the input if the input is entirely valid.
+   */
+  static inline size_t ascii_valid_up_to(gsl::span<const uint8_t> buffer)
+  {
+    return encoding_ascii_valid_up_to(buffer.data(), buffer.size());
+  }
+
+  /**
+   * Validates ISO-2022-JP ASCII-state data.
+   *
+   * Returns the index of the first byte that makes the input not
+   * representable in the ASCII state of ISO-2022-JP or the length of the
+   * input if the input is entirely representable in the ASCII state of
+   * ISO-2022-JP.
+   */
+  static inline size_t iso_2022_jp_ascii_valid_up_to(
+    gsl::span<const uint8_t> buffer)
+  {
+    return encoding_iso_2022_jp_ascii_valid_up_to(buffer.data(), buffer.size());
+  }
+
+private:
+  Encoding() = delete;
+  ~Encoding() = delete;
+};
+
+#endif // encoding_rs_cpp_h_
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/include/encoding_rs_statics.h
@@ -0,0 +1,167 @@
+// Copyright 2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+// This file is not meant to be included directly. Instead, encoding_rs.h
+// includes this file.
+
+#ifndef encoding_rs_statics_h_
+#define encoding_rs_statics_h_
+
+#ifndef ENCODING_RS_ENCODING
+#define ENCODING_RS_ENCODING Encoding
+#ifndef __cplusplus
+typedef struct Encoding_ Encoding;
+#endif
+#endif
+
+#ifndef ENCODING_RS_ENCODER
+#define ENCODING_RS_ENCODER Encoder
+#ifndef __cplusplus
+typedef struct Encoder_ Encoder;
+#endif
+#endif
+
+#ifndef ENCODING_RS_DECODER
+#define ENCODING_RS_DECODER Decoder
+#ifndef __cplusplus
+typedef struct Decoder_ Decoder;
+#endif
+#endif
+
+#define INPUT_EMPTY 0
+
+#define OUTPUT_FULL 0xFFFFFFFF
+
+// x-mac-cyrillic
+#define ENCODING_NAME_MAX_LENGTH 14
+
+/// The Big5 encoding.
+extern const ENCODING_RS_ENCODING* const BIG5_ENCODING;
+
+/// The EUC-JP encoding.
+extern const ENCODING_RS_ENCODING* const EUC_JP_ENCODING;
+
+/// The EUC-KR encoding.
+extern const ENCODING_RS_ENCODING* const EUC_KR_ENCODING;
+
+/// The GBK encoding.
+extern const ENCODING_RS_ENCODING* const GBK_ENCODING;
+
+/// The IBM866 encoding.
+extern const ENCODING_RS_ENCODING* const IBM866_ENCODING;
+
+/// The ISO-2022-JP encoding.
+extern const ENCODING_RS_ENCODING* const ISO_2022_JP_ENCODING;
+
+/// The ISO-8859-10 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_10_ENCODING;
+
+/// The ISO-8859-13 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_13_ENCODING;
+
+/// The ISO-8859-14 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_14_ENCODING;
+
+/// The ISO-8859-15 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_15_ENCODING;
+
+/// The ISO-8859-16 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_16_ENCODING;
+
+/// The ISO-8859-2 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_2_ENCODING;
+
+/// The ISO-8859-3 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_3_ENCODING;
+
+/// The ISO-8859-4 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_4_ENCODING;
+
+/// The ISO-8859-5 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_5_ENCODING;
+
+/// The ISO-8859-6 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_6_ENCODING;
+
+/// The ISO-8859-7 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_7_ENCODING;
+
+/// The ISO-8859-8 encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_8_ENCODING;
+
+/// The ISO-8859-8-I encoding.
+extern const ENCODING_RS_ENCODING* const ISO_8859_8_I_ENCODING;
+
+/// The KOI8-R encoding.
+extern const ENCODING_RS_ENCODING* const KOI8_R_ENCODING;
+
+/// The KOI8-U encoding.
+extern const ENCODING_RS_ENCODING* const KOI8_U_ENCODING;
+
+/// The Shift_JIS encoding.
+extern const ENCODING_RS_ENCODING* const SHIFT_JIS_ENCODING;
+
+/// The UTF-16BE encoding.
+extern const ENCODING_RS_ENCODING* const UTF_16BE_ENCODING;
+
+/// The UTF-16LE encoding.
+extern const ENCODING_RS_ENCODING* const UTF_16LE_ENCODING;
+
+/// The UTF-8 encoding.
+extern const ENCODING_RS_ENCODING* const UTF_8_ENCODING;
+
+/// The gb18030 encoding.
+extern const ENCODING_RS_ENCODING* const GB18030_ENCODING;
+
+/// The macintosh encoding.
+extern const ENCODING_RS_ENCODING* const MACINTOSH_ENCODING;
+
+/// The replacement encoding.
+extern const ENCODING_RS_ENCODING* const REPLACEMENT_ENCODING;
+
+/// The windows-1250 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1250_ENCODING;
+
+/// The windows-1251 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1251_ENCODING;
+
+/// The windows-1252 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1252_ENCODING;
+
+/// The windows-1253 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1253_ENCODING;
+
+/// The windows-1254 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1254_ENCODING;
+
+/// The windows-1255 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1255_ENCODING;
+
+/// The windows-1256 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1256_ENCODING;
+
+/// The windows-1257 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1257_ENCODING;
+
+/// The windows-1258 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_1258_ENCODING;
+
+/// The windows-874 encoding.
+extern const ENCODING_RS_ENCODING* const WINDOWS_874_ENCODING;
+
+/// The x-mac-cyrillic encoding.
+extern const ENCODING_RS_ENCODING* const X_MAC_CYRILLIC_ENCODING;
+
+/// The x-user-defined encoding.
+extern const ENCODING_RS_ENCODING* const X_USER_DEFINED_ENCODING;
+
+#endif // encoding_rs_statics_h_
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_c/src/lib.rs
@@ -0,0 +1,1149 @@
+// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![doc(html_root_url = "https://docs.rs/encoding_c/0.7.4")]
+
+//! The C API for encoding_rs.
+//!
+//! # Mapping from Rust
+//!
+//! ## Naming convention
+//!
+//! The wrapper function for each method has a name that starts with the name
+//! of the struct lower-cased, followed by an underscore and ends with the
+//! name of the method.
+//!
+//! For example, `Encoding::for_name()` is wrapped as `encoding_for_name()`.
+//!
+//! ## Arguments
+//!
+//! Functions that wrap non-static methods take the `self` object as their
+//! first argument.
+//!
+//! Slice argument `foo` is decomposed into a pointer `foo` and a length
+//! `foo_len`.
+//!
+//! ## Return values
+//!
+//! Multiple return values become out-params. When an out-param is
+//! length-related, `foo_len` for a slice becomes a pointer in order to become
+//! an in/out-param.
+//!
+//! `DecoderResult`, `EncoderResult` and `CoderResult` become `uint32_t`.
+//! `InputEmpty` becomes `INPUT_EMPTY`. `OutputFull` becomes `OUTPUT_FULL`.
+//! `Unmappable` becomes the scalar value of the unmappable character.
+//! `Malformed` becomes a number whose lowest 8 bits, which can have the decimal
+//! value 0, 1, 2 or 3, indicate the number of bytes that were consumed after
+//! the malformed sequence and whose next-lowest 8 bits, when shifted right by
+//! 8 indicate the length of the malformed byte sequence (possible decimal
+//! values 1, 2, 3 or 4). The maximum possible sum of the two is 6.
+
+extern crate encoding_rs;
+
+use encoding_rs::*;
+
+/// Return value for `*_decode_*` and `*_encode_*` functions that indicates that
+/// the input has been exhausted.
+///
+/// (This is zero as a micro optimization. U+0000 is never unmappable and
+/// malformed sequences always have a positive length.)
+pub const INPUT_EMPTY: u32 = 0;
+
+/// Return value for `*_decode_*` and `*_encode_*` functions that indicates that
+/// the output space has been exhausted.
+pub const OUTPUT_FULL: u32 = 0xFFFFFFFF;
+
+/// Newtype for `*const Encoding` in order to be able to implement `Sync` for
+/// it.
+pub struct ConstEncoding(*const Encoding);
+
+/// Required for `static` fields.
+unsafe impl Sync for ConstEncoding {}
+
+// BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+/// The minimum length of buffers that may be passed to `encoding_name()`.
+pub const ENCODING_NAME_MAX_LENGTH: usize = 14; // x-mac-cyrillic
+
+/// The Big5 encoding.
+#[no_mangle]
+pub static BIG5_ENCODING: ConstEncoding = ConstEncoding(&BIG5_INIT);
+
+/// The EUC-JP encoding.
+#[no_mangle]
+pub static EUC_JP_ENCODING: ConstEncoding = ConstEncoding(&EUC_JP_INIT);
+
+/// The EUC-KR encoding.
+#[no_mangle]
+pub static EUC_KR_ENCODING: ConstEncoding = ConstEncoding(&EUC_KR_INIT);
+
+/// The GBK encoding.
+#[no_mangle]
+pub static GBK_ENCODING: ConstEncoding = ConstEncoding(&GBK_INIT);
+
+/// The IBM866 encoding.
+#[no_mangle]
+pub static IBM866_ENCODING: ConstEncoding = ConstEncoding(&IBM866_INIT);
+
+/// The ISO-2022-JP encoding.
+#[no_mangle]
+pub static ISO_2022_JP_ENCODING: ConstEncoding = ConstEncoding(&ISO_2022_JP_INIT);
+
+/// The ISO-8859-10 encoding.
+#[no_mangle]
+pub static ISO_8859_10_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_10_INIT);
+
+/// The ISO-8859-13 encoding.
+#[no_mangle]
+pub static ISO_8859_13_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_13_INIT);
+
+/// The ISO-8859-14 encoding.
+#[no_mangle]
+pub static ISO_8859_14_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_14_INIT);
+
+/// The ISO-8859-15 encoding.
+#[no_mangle]
+pub static ISO_8859_15_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_15_INIT);
+
+/// The ISO-8859-16 encoding.
+#[no_mangle]
+pub static ISO_8859_16_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_16_INIT);
+
+/// The ISO-8859-2 encoding.
+#[no_mangle]
+pub static ISO_8859_2_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_2_INIT);
+
+/// The ISO-8859-3 encoding.
+#[no_mangle]
+pub static ISO_8859_3_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_3_INIT);
+
+/// The ISO-8859-4 encoding.
+#[no_mangle]
+pub static ISO_8859_4_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_4_INIT);
+
+/// The ISO-8859-5 encoding.
+#[no_mangle]
+pub static ISO_8859_5_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_5_INIT);
+
+/// The ISO-8859-6 encoding.
+#[no_mangle]
+pub static ISO_8859_6_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_6_INIT);
+
+/// The ISO-8859-7 encoding.
+#[no_mangle]
+pub static ISO_8859_7_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_7_INIT);
+
+/// The ISO-8859-8 encoding.
+#[no_mangle]
+pub static ISO_8859_8_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_8_INIT);
+
+/// The ISO-8859-8-I encoding.
+#[no_mangle]
+pub static ISO_8859_8_I_ENCODING: ConstEncoding = ConstEncoding(&ISO_8859_8_I_INIT);
+
+/// The KOI8-R encoding.
+#[no_mangle]
+pub static KOI8_R_ENCODING: ConstEncoding = ConstEncoding(&KOI8_R_INIT);
+
+/// The KOI8-U encoding.
+#[no_mangle]
+pub static KOI8_U_ENCODING: ConstEncoding = ConstEncoding(&KOI8_U_INIT);
+
+/// The Shift_JIS encoding.
+#[no_mangle]
+pub static SHIFT_JIS_ENCODING: ConstEncoding = ConstEncoding(&SHIFT_JIS_INIT);
+
+/// The UTF-16BE encoding.
+#[no_mangle]
+pub static UTF_16BE_ENCODING: ConstEncoding = ConstEncoding(&UTF_16BE_INIT);
+
+/// The UTF-16LE encoding.
+#[no_mangle]
+pub static UTF_16LE_ENCODING: ConstEncoding = ConstEncoding(&UTF_16LE_INIT);
+
+/// The UTF-8 encoding.
+#[no_mangle]
+pub static UTF_8_ENCODING: ConstEncoding = ConstEncoding(&UTF_8_INIT);
+
+/// The gb18030 encoding.
+#[no_mangle]
+pub static GB18030_ENCODING: ConstEncoding = ConstEncoding(&GB18030_INIT);
+
+/// The macintosh encoding.
+#[no_mangle]
+pub static MACINTOSH_ENCODING: ConstEncoding = ConstEncoding(&MACINTOSH_INIT);
+
+/// The replacement encoding.
+#[no_mangle]
+pub static REPLACEMENT_ENCODING: ConstEncoding = ConstEncoding(&REPLACEMENT_INIT);
+
+/// The windows-1250 encoding.
+#[no_mangle]
+pub static WINDOWS_1250_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1250_INIT);
+
+/// The windows-1251 encoding.
+#[no_mangle]
+pub static WINDOWS_1251_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1251_INIT);
+
+/// The windows-1252 encoding.
+#[no_mangle]
+pub static WINDOWS_1252_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1252_INIT);
+
+/// The windows-1253 encoding.
+#[no_mangle]
+pub static WINDOWS_1253_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1253_INIT);
+
+/// The windows-1254 encoding.
+#[no_mangle]
+pub static WINDOWS_1254_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1254_INIT);
+
+/// The windows-1255 encoding.
+#[no_mangle]
+pub static WINDOWS_1255_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1255_INIT);
+
+/// The windows-1256 encoding.
+#[no_mangle]
+pub static WINDOWS_1256_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1256_INIT);
+
+/// The windows-1257 encoding.
+#[no_mangle]
+pub static WINDOWS_1257_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1257_INIT);
+
+/// The windows-1258 encoding.
+#[no_mangle]
+pub static WINDOWS_1258_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_1258_INIT);
+
+/// The windows-874 encoding.
+#[no_mangle]
+pub static WINDOWS_874_ENCODING: ConstEncoding = ConstEncoding(&WINDOWS_874_INIT);
+
+/// The x-mac-cyrillic encoding.
+#[no_mangle]
+pub static X_MAC_CYRILLIC_ENCODING: ConstEncoding = ConstEncoding(&X_MAC_CYRILLIC_INIT);
+
+/// The x-user-defined encoding.
+#[no_mangle]
+pub static X_USER_DEFINED_ENCODING: ConstEncoding = ConstEncoding(&X_USER_DEFINED_INIT);
+
+// END GENERATED CODE
+
+#[inline(always)]
+fn coder_result_to_u32(result: CoderResult) -> u32 {
+    match result {
+        CoderResult::InputEmpty => INPUT_EMPTY,
+        CoderResult::OutputFull => OUTPUT_FULL,
+    }
+}
+
+#[inline(always)]
+fn decoder_result_to_u32(result: DecoderResult) -> u32 {
+    match result {
+        DecoderResult::InputEmpty => INPUT_EMPTY,
+        DecoderResult::OutputFull => OUTPUT_FULL,
+        DecoderResult::Malformed(bad, good) => ((good as u32) << 8) | (bad as u32),
+    }
+}
+
+#[inline(always)]
+fn encoder_result_to_u32(result: EncoderResult) -> u32 {
+    match result {
+        EncoderResult::InputEmpty => INPUT_EMPTY,
+        EncoderResult::OutputFull => OUTPUT_FULL,
+        EncoderResult::Unmappable(c) => c as u32,
+    }
+}
+
+#[inline(always)]
+fn option_to_ptr(opt: Option<&'static Encoding>) -> *const Encoding {
+    match opt {
+        None => ::std::ptr::null(),
+        Some(e) => e,
+    }
+}
+
+/// Implements the
+/// [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
+/// algorithm.
+///
+/// If, after ASCII-lowercasing and removing leading and trailing
+/// whitespace, the argument matches a label defined in the Encoding
+/// Standard, `const Encoding*` representing the corresponding
+/// encoding is returned. If there is no match, `NULL` is returned.
+///
+/// This is the right function to use if the action upon the method returning
+/// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`) instead.
+/// When the action upon the method returning `NULL` is not to proceed with
+/// a fallback but to refuse processing, `encoding_for_label_no_replacement()` is
+/// more appropriate.
+///
+/// The argument buffer can be in any ASCII-compatible encoding. It is not
+/// required to be UTF-8.
+///
+/// `label` must be non-`NULL` even if `label_len` is zero. When `label_len`
+/// is zero, it is OK for `label` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `label` and `label_len` don't designate a valid memory block
+/// of if `label` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_for_label(label: *const u8, label_len: usize) -> *const Encoding {
+    let label_slice = ::std::slice::from_raw_parts(label, label_len);
+    option_to_ptr(Encoding::for_label(label_slice))
+}
+
+/// This function behaves the same as `encoding_for_label()`, except when
+/// `encoding_for_label()` would return `REPLACEMENT_ENCODING`, this method
+/// returns `NULL` instead.
+///
+/// This method is useful in scenarios where a fatal error is required
+/// upon invalid label, because in those cases the caller typically wishes
+/// to treat the labels that map to the replacement encoding as fatal
+/// errors, too.
+///
+/// It is not OK to use this funciton when the action upon the method returning
+/// `NULL` is to use a fallback encoding (e.g. `WINDOWS_1252_ENCODING`). In
+/// such a case, the `encoding_for_label()` function should be used instead
+/// in order to avoid unsafe fallback for labels that `encoding_for_label()`
+/// maps to `REPLACEMENT_ENCODING`.
+///
+/// The argument buffer can be in any ASCII-compatible encoding. It is not
+/// required to be UTF-8.
+///
+/// `label` must be non-`NULL` even if `label_len` is zero. When `label_len`
+/// is zero, it is OK for `label` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `label` and `label_len` don't designate a valid memory block
+/// of if `label` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_for_label_no_replacement(label: *const u8,
+                                                           label_len: usize)
+                                                           -> *const Encoding {
+    let label_slice = ::std::slice::from_raw_parts(label, label_len);
+    option_to_ptr(Encoding::for_label_no_replacement(label_slice))
+}
+
+/// Performs non-incremental BOM sniffing.
+///
+/// The argument must either be a buffer representing the entire input
+/// stream (non-streaming case) or a buffer representing at least the first
+/// three bytes of the input stream (streaming case).
+///
+/// Returns `UTF_8_ENCODING`, `UTF_16LE_ENCODING` or `UTF_16BE_ENCODING` if the
+/// argument starts with the UTF-8, UTF-16LE or UTF-16BE BOM or `NULL`
+/// otherwise. Upon return, `*buffer_len` is the length of the BOM (zero if
+/// there is no BOM).
+///
+/// `buffer` must be non-`NULL` even if `*buffer_len` is zero. When
+/// `*buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `*buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_for_bom(buffer: *const u8,
+                                          buffer_len: *mut usize)
+                                          -> *const Encoding {
+    let buffer_slice = ::std::slice::from_raw_parts(buffer, *buffer_len);
+    let (encoding, bom_length) = match Encoding::for_bom(buffer_slice) {
+        Some((encoding, bom_length)) => (encoding as *const Encoding, bom_length),
+        None => (::std::ptr::null(), 0),
+    };
+    *buffer_len = bom_length;
+    encoding
+}
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `const Encoding*` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this function is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `Encoding` objects. Using this function for other purposes is
+/// most likely the wrong thing to do.
+///
+/// `name` must be non-`NULL` even if `name_len` is zero. When `name_len`
+/// is zero, it is OK for `name` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `name` and `name_len` don't designate a valid memory block
+/// of if `name` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_for_name(name: *const u8, name_len: usize) -> *const Encoding {
+    let name_slice = ::std::slice::from_raw_parts(name, name_len);
+    Encoding::for_name(name_slice)
+}
+
+/// Writes the name of the given `Encoding` to a caller-supplied buffer as
+/// ASCII and returns the number of bytes / ASCII characters written.
+///
+/// The output is not null-terminated.
+///
+/// The caller _MUST_ ensure that `name_out` points to a buffer whose length
+/// is at least `ENCODING_NAME_MAX_LENGTH` bytes.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL` or if `name_out` doesn't point to
+/// a valid block of memory whose length is at least
+/// `ENCODING_NAME_MAX_LENGTH` bytes.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_name(encoding: *const Encoding, name_out: *mut u8) -> usize {
+    let bytes = (*encoding).name().as_bytes();
+    ::std::ptr::copy_nonoverlapping(bytes.as_ptr(), name_out, bytes.len());
+    bytes.len()
+}
+
+/// Checks whether the _output encoding_ of this encoding can encode every
+/// Unicode scalar. (Only true if the output encoding is UTF-8.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_can_encode_everything(encoding: *const Encoding) -> bool {
+    (*encoding).can_encode_everything()
+}
+
+/// Checks whether the bytes 0x00...0x7F map exclusively to the characters
+/// U+0000...U+007F and vice versa.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_is_ascii_compatible(encoding: *const Encoding) -> bool {
+    (*encoding).is_ascii_compatible()
+}
+
+/// Returns the _output encoding_ of this encoding. This is UTF-8 for
+/// UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_output_encoding(encoding: *const Encoding) -> *const Encoding {
+    (*encoding).output_encoding()
+}
+
+/// Allocates a new `Decoder` for the given `Encoding` on the heap with BOM
+/// sniffing enabled and returns a pointer to the newly-allocated `Decoder`.
+///
+/// BOM sniffing may cause the returned decoder to morph into a decoder
+/// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+///
+/// Once the allocated `Decoder` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_decoder(encoding: *const Encoding) -> *mut Decoder {
+    Box::into_raw(Box::new((*encoding).new_decoder()))
+}
+
+/// Allocates a new `Decoder` for the given `Encoding` on the heap with BOM
+/// removal and returns a pointer to the newly-allocated `Decoder`.
+///
+/// If the input starts with bytes that are the BOM for this encoding,
+/// those bytes are removed. However, the decoder never morphs into a
+/// decoder for another encoding: A BOM for another encoding is treated as
+/// (potentially malformed) input to the decoding algorithm for this
+/// encoding.
+///
+/// Once the allocated `Decoder` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_decoder_with_bom_removal(encoding: *const Encoding)
+                                                               -> *mut Decoder {
+    Box::into_raw(Box::new((*encoding).new_decoder_with_bom_removal()))
+}
+
+/// Allocates a new `Decoder` for the given `Encoding` on the heap with BOM
+/// handling disabled and returns a pointer to the newly-allocated `Decoder`.
+///
+/// If the input starts with bytes that look like a BOM, those bytes are
+/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
+/// for another encoding.)
+///
+/// _Note:_ If the caller has performed BOM sniffing on its own but has not
+/// removed the BOM, the caller should use
+/// `encoding_new_decoder_with_bom_removal()` instead of this function to cause
+/// the BOM to be removed.
+///
+/// Once the allocated `Decoder` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_decoder_without_bom_handling(encoding: *const Encoding)
+                                                                   -> *mut Decoder {
+    Box::into_raw(Box::new((*encoding).new_decoder_without_bom_handling()))
+}
+
+/// Allocates a new `Decoder` for the given `Encoding` into memory provided by
+/// the caller with BOM sniffing enabled. (In practice, the target should
+/// likely be a pointer previously returned by `encoding_new_decoder()`.)
+///
+/// Note: If the caller has already performed BOM sniffing but has
+/// not removed the BOM, the caller should still use this function in
+/// order to cause the BOM to be ignored.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_decoder_into(encoding: *const Encoding,
+                                                   decoder: *mut Decoder) {
+    *decoder = (*encoding).new_decoder();
+}
+
+/// Allocates a new `Decoder` for the given `Encoding` into memory provided by
+/// the caller with BOM removal.
+///
+/// If the input starts with bytes that are the BOM for this encoding,
+/// those bytes are removed. However, the decoder never morphs into a
+/// decoder for another encoding: A BOM for another encoding is treated as
+/// (potentially malformed) input to the decoding algorithm for this
+/// encoding.
+///
+/// Once the allocated `Decoder` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `decoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_decoder_with_bom_removal_into(encoding: *const Encoding,
+                                                                    decoder: *mut Decoder) {
+    *decoder = (*encoding).new_decoder_with_bom_removal();
+}
+
+/// Allocates a new `Decoder` for the given `Encoding` into memory provided by
+/// the caller with BOM handling disabled.
+///
+/// If the input starts with bytes that look like a BOM, those bytes are
+/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
+/// for another encoding.)
+///
+/// _Note:_ If the caller has performed BOM sniffing on its own but has not
+/// removed the BOM, the caller should use
+/// `encoding_new_decoder_with_bom_removal_into()` instead of this function to
+/// cause the BOM to be removed.
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_decoder_without_bom_handling_into(encoding: *const Encoding,
+                                                                        decoder: *mut Decoder) {
+    *decoder = (*encoding).new_decoder_without_bom_handling();
+}
+
+/// Allocates a new `Encoder` for the given `Encoding` on the heap and returns a
+/// pointer to the newly-allocated `Encoder`. (Exception, if the `Encoding` is
+/// `replacement`, a new `Decoder` for UTF-8 is instantiated (and that
+/// `Decoder` reports `UTF_8` as its `Encoding`).
+///
+/// Once the allocated `Encoder` is no longer needed, the caller _MUST_
+/// deallocate it by passing the pointer returned by this function to
+/// `encoder_free()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_encoder(encoding: *const Encoding) -> *mut Encoder {
+    Box::into_raw(Box::new((*encoding).new_encoder()))
+}
+
+/// Allocates a new `Encoder` for the given `Encoding` into memory provided by
+/// the caller. (In practice, the target should likely be a pointer previously
+/// returned by `encoding_new_encoder()`.)
+///
+/// # Undefined behavior
+///
+/// UB ensues if either argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_new_encoder_into(encoding: *const Encoding,
+                                                   encoder: *mut Encoder) {
+    *encoder = (*encoding).new_encoder();
+}
+
+/// Validates UTF-8.
+///
+/// Returns the index of the first byte that makes the input malformed as
+/// UTF-8 or `buffer_len` if `buffer` is entirely valid.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_utf8_valid_up_to(buffer: *const u8, buffer_len: usize) -> usize {
+    let buffer_slice = ::std::slice::from_raw_parts(buffer, buffer_len);
+    Encoding::utf8_valid_up_to(buffer_slice)
+}
+
+/// Validates ASCII.
+///
+/// Returns the index of the first byte that makes the input malformed as
+/// ASCII or `buffer_len` if `buffer` is entirely valid.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_ascii_valid_up_to(buffer: *const u8, buffer_len: usize) -> usize {
+    let buffer_slice = ::std::slice::from_raw_parts(buffer, buffer_len);
+    Encoding::ascii_valid_up_to(buffer_slice)
+}
+
+/// Validates ISO-2022-JP ASCII-state data.
+///
+/// Returns the index of the first byte that makes the input not representable
+/// in the ASCII state of ISO-2022-JP or `buffer_len` if `buffer` is entirely
+/// representable in the ASCII state of ISO-2022-JP.
+///
+/// `buffer` must be non-`NULL` even if `buffer_len` is zero. When
+/// `buffer_len` is zero, it is OK for `buffer` to be something
+/// non-dereferencable, such as `0x1`. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory
+/// block of if `buffer` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoding_iso_2022_jp_ascii_valid_up_to(buffer: *const u8,
+                                                                buffer_len: usize)
+                                                                -> usize {
+    let buffer_slice = ::std::slice::from_raw_parts(buffer, buffer_len);
+    Encoding::iso_2022_jp_ascii_valid_up_to(buffer_slice)
+}
+
+/// Deallocates a `Decoder` previously allocated by `encoding_new_decoder()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn decoder_free(decoder: *mut Decoder) {
+    let _ = Box::from_raw(decoder);
+}
+
+/// The `Encoding` this `Decoder` is for.
+///
+/// BOM sniffing can change the return value of this method during the life
+/// of the decoder.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn decoder_encoding(decoder: *const Decoder) -> *const Encoding {
+    (*decoder).encoding()
+}
+
+/// Query the worst-case UTF-8 output size _with replacement_.
+///
+/// Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes when decoding with
+/// errors handled by outputting a REPLACEMENT CHARACTER for each malformed
+/// sequence or `SIZE_MAX` if `size_t` would overflow.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn decoder_max_utf8_buffer_length(decoder: *const Decoder,
+                                                        byte_length: usize)
+                                                        -> usize {
+    (*decoder).max_utf8_buffer_length(byte_length).unwrap_or(::std::usize::MAX)
+}
+
+/// Query the worst-case UTF-8 output size _without replacement_.
+///
+/// Returns the size of the output buffer in UTF-8 code units (`uint8_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes when decoding without
+/// replacement error handling or `SIZE_MAX` if `size_t` would overflow.
+///
+/// Note that this value may be too small for the `_with_replacement` case.
+/// Use `decoder_max_utf8_buffer_length()` for that case.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn decoder_max_utf8_buffer_length_without_replacement(decoder: *const Decoder,
+                                                                            byte_length: usize)
+                                                                            -> usize {
+    (*decoder)
+        .max_utf8_buffer_length_without_replacement(byte_length)
+        .unwrap_or(::std::usize::MAX)
+}
+
+/// Incrementally decode a byte stream into UTF-8 with malformed sequences
+/// replaced with the REPLACEMENT CHARACTER.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`Decoder`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+#[no_mangle]
+pub unsafe extern "C" fn decoder_decode_to_utf8(decoder: *mut Decoder,
+                                                src: *const u8,
+                                                src_len: *mut usize,
+                                                dst: *mut u8,
+                                                dst_len: *mut usize,
+                                                last: bool,
+                                                had_replacements: *mut bool)
+                                                -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written, replaced) = (*decoder).decode_to_utf8(src_slice, dst_slice, last);
+    *src_len = read;
+    *dst_len = written;
+    *had_replacements = replaced;
+    coder_result_to_u32(result)
+}
+
+/// Incrementally decode a byte stream into UTF-8 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`Decoder`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+#[no_mangle]
+pub unsafe extern "C" fn decoder_decode_to_utf8_without_replacement(decoder: *mut Decoder,
+                                                                    src: *const u8,
+                                                                    src_len: *mut usize,
+                                                                    dst: *mut u8,
+                                                                    dst_len: *mut usize,
+                                                                    last: bool)
+                                                                    -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written) = (*decoder).decode_to_utf8_without_replacement(src_slice,
+                                                                                dst_slice,
+                                                                                last);
+    *src_len = read;
+    *dst_len = written;
+    decoder_result_to_u32(result)
+}
+
+/// Query the worst-case UTF-16 output size (with or without replacement).
+///
+/// Returns the size of the output buffer in UTF-16 code units (`char16_t`)
+/// that will not overflow given the current state of the decoder and
+/// `byte_length` number of additional input bytes or `SIZE_MAX` if `size_t`
+/// would overflow.
+///
+/// Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the
+/// return value of this method applies also in the
+/// `_without_replacement` case.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `decoder` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn decoder_max_utf16_buffer_length(decoder: *const Decoder,
+                                                         u16_length: usize)
+                                                         -> usize {
+    (*decoder).max_utf16_buffer_length(u16_length).unwrap_or(::std::usize::MAX)
+}
+
+/// Incrementally decode a byte stream into UTF-16 with malformed sequences
+/// replaced with the REPLACEMENT CHARACTER.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`Decoder`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+#[no_mangle]
+pub unsafe extern "C" fn decoder_decode_to_utf16(decoder: *mut Decoder,
+                                                 src: *const u8,
+                                                 src_len: *mut usize,
+                                                 dst: *mut u16,
+                                                 dst_len: *mut usize,
+                                                 last: bool,
+                                                 had_replacements: *mut bool)
+                                                 -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written, replaced) = (*decoder).decode_to_utf16(src_slice, dst_slice, last);
+    *src_len = read;
+    *dst_len = written;
+    *had_replacements = replaced;
+    coder_result_to_u32(result)
+}
+
+/// Incrementally decode a byte stream into UTF-16 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `decoder_decode_*` functions are mapped from Rust and the documentation
+/// for the [`Decoder`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Decoder.html
+#[no_mangle]
+pub unsafe extern "C" fn decoder_decode_to_utf16_without_replacement(decoder: *mut Decoder,
+                                                                     src: *const u8,
+                                                                     src_len: *mut usize,
+                                                                     dst: *mut u16,
+                                                                     dst_len: *mut usize,
+                                                                     last: bool)
+                                                                     -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written) = (*decoder).decode_to_utf16_without_replacement(src_slice,
+                                                                                 dst_slice,
+                                                                                 last);
+    *src_len = read;
+    *dst_len = written;
+    decoder_result_to_u32(result)
+}
+
+/// Deallocates an `Encoder` previously allocated by `encoding_new_encoder()`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoder_free(encoder: *mut Encoder) {
+    let _ = Box::from_raw(encoder);
+}
+
+/// The `Encoding` this `Encoder` is for.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoder_encoding(encoder: *const Encoder) -> *const Encoding {
+    (*encoder).encoding()
+}
+
+/// Returns `true` if this is an ISO-2022-JP encoder that's not in the
+/// ASCII state and `false` otherwise.
+///
+/// # Undefined behavior
+///
+/// UB ensues if the argument is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn encoder_has_pending_state(encoder: *const Encoder) -> bool {
+    (*encoder).has_pending_state()
+}
+
+/// Query the worst-case output size when encoding from UTF-8 with
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `byte_length` number of
+/// additional input code units if there are no unmappable characters in
+/// the input or `SIZE_MAX` if `size_t` would overflow.
+#[no_mangle]
+pub unsafe extern "C" fn encoder_max_buffer_length_from_utf8_if_no_unmappables
+    (encoder: *const Encoder,
+     byte_length: usize)
+     -> usize {
+    (*encoder)
+        .max_buffer_length_from_utf8_if_no_unmappables(byte_length)
+        .unwrap_or(::std::usize::MAX)
+}
+
+/// Query the worst-case output size when encoding from UTF-8 without
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `byte_length` number of
+/// additional input code units or `SIZE_MAX` if `size_t` would overflow.
+#[no_mangle]
+pub unsafe extern "C" fn encoder_max_buffer_length_from_utf8_without_replacement(encoder: *const Encoder,
+                                                             byte_length: usize)
+                                                             -> usize {
+    (*encoder)
+        .max_buffer_length_from_utf8_without_replacement(byte_length)
+        .unwrap_or(::std::usize::MAX)
+}
+
+/// Incrementally encode into byte stream from UTF-8 with unmappable
+/// characters replaced with HTML (decimal) numeric character references.
+///
+/// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe!
+/// If in doubt, check the validity of input before using!
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`Encoder`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+#[no_mangle]
+pub unsafe extern "C" fn encoder_encode_from_utf8(encoder: *mut Encoder,
+                                                  src: *const u8,
+                                                  src_len: *mut usize,
+                                                  dst: *mut u8,
+                                                  dst_len: *mut usize,
+                                                  last: bool,
+                                                  had_replacements: *mut bool)
+                                                  -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let string = ::std::str::from_utf8_unchecked(src_slice);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written, replaced) = (*encoder).encode_from_utf8(string, dst_slice, last);
+    *src_len = read;
+    *dst_len = written;
+    *had_replacements = replaced;
+    coder_result_to_u32(result)
+}
+
+/// Incrementally encode into byte stream from UTF-8 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`Encoder`][1] struct for the semantics.
+///
+/// The input absolutely _MUST_ be valid UTF-8 or the behavior is memory-unsafe!
+/// If in doubt, check the validity of input before using!
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+#[no_mangle]
+pub unsafe extern "C" fn encoder_encode_from_utf8_without_replacement(encoder: *mut Encoder,
+                                                                      src: *const u8,
+                                                                      src_len: *mut usize,
+                                                                      dst: *mut u8,
+                                                                      dst_len: *mut usize,
+                                                                      last: bool)
+                                                                      -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let string = ::std::str::from_utf8_unchecked(src_slice);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written) = (*encoder).encode_from_utf8_without_replacement(string,
+                                                                                  dst_slice,
+                                                                                  last);
+    *src_len = read;
+    *dst_len = written;
+    encoder_result_to_u32(result)
+}
+
+/// Query the worst-case output size when encoding from UTF-16 with
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `u16_length` number of
+/// additional input code units if there are no unmappable characters in
+/// the input or `SIZE_MAX` if `size_t` would overflow.
+#[no_mangle]
+pub unsafe extern "C" fn encoder_max_buffer_length_from_utf16_if_no_unmappables
+    (encoder: *const Encoder,
+     u16_length: usize)
+     -> usize {
+    (*encoder)
+        .max_buffer_length_from_utf16_if_no_unmappables(u16_length)
+        .unwrap_or(::std::usize::MAX)
+}
+
+/// Query the worst-case output size when encoding from UTF-16 without
+/// replacement.
+///
+/// Returns the size of the output buffer in bytes that will not overflow
+/// given the current state of the encoder and `u16_length` number of
+/// additional input code units or `SIZE_MAX` if `size_t` would overflow.
+#[no_mangle]
+pub unsafe extern "C" fn encoder_max_buffer_length_from_utf16_without_replacement(encoder: *const Encoder,
+                                                              u16_length: usize)
+                                                              -> usize {
+    (*encoder)
+        .max_buffer_length_from_utf16_without_replacement(u16_length)
+        .unwrap_or(::std::usize::MAX)
+}
+
+/// Incrementally encode into byte stream from UTF-16 with unmappable
+/// characters replaced with HTML (decimal) numeric character references.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`Encoder`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+#[no_mangle]
+pub unsafe extern "C" fn encoder_encode_from_utf16(encoder: *mut Encoder,
+                                                   src: *const u16,
+                                                   src_len: *mut usize,
+                                                   dst: *mut u8,
+                                                   dst_len: *mut usize,
+                                                   last: bool,
+                                                   had_replacements: *mut bool)
+                                                   -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written, replaced) = (*encoder)
+                                                .encode_from_utf16(src_slice, dst_slice, last);
+    *src_len = read;
+    *dst_len = written;
+    *had_replacements = replaced;
+    coder_result_to_u32(result)
+}
+
+/// Incrementally encode into byte stream from UTF-16 _without replacement_.
+///
+/// See the top-level FFI documentation for documentation for how the
+/// `encoder_encode_*` functions are mapped from Rust and the documentation
+/// for the [`Encoder`][1] struct for the semantics.
+///
+/// `src` must be non-`NULL` even if `src_len` is zero. When`src_len` is zero,
+/// it is OK for `src` to be something non-dereferencable, such as `0x1`.
+/// Likewise for `dst` when `dst_len` is zero. This is required due to Rust's
+/// optimization for slices within `Option`.
+///
+/// # Undefined behavior
+///
+/// UB ensues if any of the pointer arguments is `NULL`, `src` and `src_len`
+/// don't designate a valid block of memory or `dst` and `dst_len` don't
+/// designate a valid block of memory.
+///
+/// [1]: https://docs.rs/encoding_rs/0.6.10/encoding_rs/struct.Encoder.html
+#[no_mangle]
+pub unsafe extern "C" fn encoder_encode_from_utf16_without_replacement(encoder: *mut Encoder,
+                                                                       src: *const u16,
+                                                                       src_len: *mut usize,
+                                                                       dst: *mut u8,
+                                                                       dst_len: *mut usize,
+                                                                       last: bool)
+                                                                       -> u32 {
+    let src_slice = ::std::slice::from_raw_parts(src, *src_len);
+    let dst_slice = ::std::slice::from_raw_parts_mut(dst, *dst_len);
+    let (result, read, written) = (*encoder).encode_from_utf16_without_replacement(src_slice,
+                                                                                   dst_slice,
+                                                                                   last);
+    *src_len = read;
+    *dst_len = written;
+    encoder_result_to_u32(result)
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/.cargo-checksum.json
@@ -0,0 +1,1 @@
+{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"306f3b1134b5f75d0b98763cd2c9589c022676624e5117ec93ee57c0c5bdc2b8",".travis.yml":"4d1af7257c9619f7ae66fc271ba2c1be5f063640ae8ceaa235c8c8aaf32f44ea","CONTRIBUTING.md":"e4ffa92c979c7e6ca7b676842a708ea05b84181327fcde43dfcd8038b678a057","COPYRIGHT":"20d4fff11cca11529df3f02096fbe8ffe350219cdb07cdedea34e6a762866da5","Cargo.toml":"b74676e1affb0a2b528507be488bd9588db646b3b05807dada63cbe7b0747fc6","Ideas.md":"c1be4cc91621f52f38ea7febda7a4bb68086189cacc834c7edac4ba1a9da02fe","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"74aa8b6d04c36bb640ee81187a3f24a2fa94e36d4c1d4f2ca164c3784ae87a83","README.md":"276d801faf4d6347c3ea32ae252cab95df653c846beaac535c5d70cf32094f5e","generate-encoding-data.py":"0b62de6d3b6368166b78a9259f06dc8d0f558504a0ed866dbe75dc2efb4bf464","rustfmt.toml":"c01c06dfbdfcf30730535aab911d69068febb921e2faef9571ceeb6a5c2a3eab","src/ascii.rs":"240c607d3bad850c57d1e96871d5c0371278ed3923c38354bbb4c8a876c9a515","src/big5.rs":"614d479aabc63007f778d1f776a37b885e13d20b7c6c7a2818a729bde342f8a6","src/data.rs":"412c842c698c3ce1cec4a27ab19ca275372ac28940ac49cdf3e0dad71a2c2812","src/euc_jp.rs":"feda0ade5e1c3e4abd7637c59373b977662007990fd164ea7db1acc502ba3534","src/euc_kr.rs":"23e08359ccbe7602f3a90fce78dc76fd4065c236820ac0d11c9d9325045da0e6","src/gb18030.rs":"aa9de27a41715dfb02a3b9161d86e3775f635f625f70d3abaadcd583ee7022c0","src/handles.rs":"8b0691ab21d638bd20078e33247f13afbc8012ff4b843a2fd03e3314353e8520","src/iso_2022_jp.rs":"285e7cea6df41d182a345a0f394a2348b1c313f0d55ed48c349824f2a6aff526","src/lib.rs":"dad6465f541ccdb171312879999d842dcbf11bc09119d81963df3a20f7d4e474","src/macros.rs":"9ab30e7194f61f268cd7d899cabb06ff9ca7717663926fd583b20334f49ac8d3","src/replacement.rs":"782f03f04d110e9a0656262bf4296aa0ab8199e196cb63239c30d9649996caa4","src/shift_jis.rs":"84df4ff58b60e0827d6c0c7049f2cf19033f2b9e25a9186bcfb0bbb05e87b380","src/simd_funcs.rs":"ff30e10bfb58fb8f56f0cc0b4dbcc4af6b343487562ee279ace8b31afd7bcccc","src/single_byte.rs":"0342a921427ed160f5cbe4532490aff5db00886a36b70273f54d8f6a9dcf6974","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"0bcf7eeb8bb33cbc88bd3bd8462437501a43055db02c40a12a15fae8e68dd1cb","src/testing.rs":"60f85c6fb63fd4ab62e90dfa005920e79e0e1885795dc13a7a3c1980507925b1","src/utf_16.rs":"8155c1c0acaab2826ee9f99ba911fbd3125707d797327e630977bc2f3f9b1064","src/utf_8.rs":"14cd64de87d8fc5f814f52f76390bda0b4c705da98e73e376fb424ca02119ba5","src/utf_8_core.rs":"0229de223eef17ad16751a646bcd3839c24f24069d660a4dc61b8a5fad19d16f","src/variant.rs":"93dfec2dcfc9fd9711bb55d48177f4a0e9479c7fbd055f08db3853338569da83","src/x_user_defined.rs":"420fae797ea94e7a51eb005b97621ab32d68a8532c565afc60ecce6bdd84b6bd"},"package":"e00a1b1e95eb46988805ceee6f34cd95c46a6753e290cb3ff0486931989d4a4c"}
\ No newline at end of file
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/.gitignore
@@ -0,0 +1,10 @@
+target
+Cargo.lock
+.project
+.settings
+*~
+*.bk
+fuzz/target
+fuzz/Cargo.lock
+fuzz/artifacts
+fuzz/corpus
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/.travis.yml
@@ -0,0 +1,8 @@
+language: rust
+rust:
+  - stable
+  - beta
+  - nightly
+matrix:
+  allow_failures:
+    - rust: nightly
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/CONTRIBUTING.md
@@ -0,0 +1,45 @@
+If you send a pull request / patch, please observe the following.
+
+## Licensing
+
+Since this crate is dual-licensed,
+[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions)
+is considered to apply in the sense of Contributions being automatically
+under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file).
+That is, by the act of offering a Contribution, you place your Contribution
+under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT`
+file. Please do not contribute if you aren't willing or allowed to license your
+contributions in this manner.
+
+You are encouraged to dedicate test code that you contribute to the Public
+Domain using the CC0 dedication. If you contribute test code that is not
+dedicated to the Public Domain, please be sure not to put it in a part of
+source code that the comments designate as being dedicated to the Public
+Domain.
+
+## Copyright Notices
+
+If you require the addition of your copyright notice, it's up to you to edit in
+your notice as part of your Contribution. Not adding a copyright notice is
+taken as a waiver of copyright notice.
+
+## No Encodings Beyond The Encoding Standard
+
+Please do not contribute implementations of encodings that are not specified
+in the [Encoding Standard](https://encoding.spec.whatwg.org/).
+
+For example, an implementation of UTF-7 would be explicitly not welcome.
+
+## Compatibility with Stable Rust
+
+Please ensure that your Contribution compiles with the latest stable-channel
+rustc.
+
+## rustfmt
+
+Please install [`rustfmt`](https://github.com/rust-lang-nursery/rustfmt) and
+run `cargo fmt` before creating a pull request.
+
+## Unit tests
+
+Please ensure that `cargo test` succeeds.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/COPYRIGHT
@@ -0,0 +1,26 @@
+encoding_rs is copyright 2013-2016 Mozilla Foundation.
+
+Licensed under the Apache License, Version 2.0
+<LICENSE-APACHE or
+https://www.apache.org/licenses/LICENSE-2.0> or the MIT
+license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
+at your option. All files in the project carrying such
+notice may not be copied, modified, or distributed except
+according to those terms.
+
+Test code within encoding_rs is dedicated to the Public Domain when so
+designated (see the individual files for PD/CC0-dedicated sections).
+
+The file utf_8_core.rs was extracted from the Rust project at revision
+7ad7232422f7e5bbfa0e52dabe36c12677df19e2, whose COPYRIGHT file said (in part):
+
+The Rust Project is copyright 2010, The Rust Project
+Developers.
+
+Licensed under the Apache License, Version 2.0
+<LICENSE-APACHE or
+http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
+at your option. All files in the project carrying such
+notice may not be copied, modified, or distributed except
+according to those terms.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "encoding_rs"
+description = "A Gecko-oriented implementation of the Encoding Standard"
+version = "0.6.11" # Remember to keep html_root_url in lib.rs in sync!
+authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
+license = "MIT/Apache-2.0"
+readme = "README.md"
+documentation = "https://docs.rs/encoding_rs/"
+homepage = "https://docs.rs/encoding_rs/"
+repository = "https://github.com/hsivonen/encoding_rs"
+keywords = ["encoding", "web", "unicode", "charset"]
+categories = ["text-processing", "encoding", "web-programming", "email"]
+
+[badges]
+travis-ci = { repository = "hsivonen/encoding_rs" }
+
+[features]
+simd-accel = ["simd"]
+no-static-ideograph-encoder-tables = []
+parallel-utf8 = ["rayon"]
+
+[dependencies]
+cfg-if = "0.1.0"
+simd = { version = "0.2.0", optional = true }
+rayon = { version = "0.7.0", optional = true }
+
+[profile.release]
+lto = true
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/Ideas.md
@@ -0,0 +1,77 @@
+This document contains notes about various ideas that for one reason or another
+are not being actively pursued.
+
+## Next byte is non-ASCII after ASCII optimization
+
+The current plan for a SIMD-accelerated inner loop for handling ASCII bytes
+makes no use of the bit of information that if the buffers didn't end but the
+ASCII loop exited, the next byte will not be an ASCII byte.
+
+## The structure of handles.rs and bound checks
+
+handles.rs is designed to make it possible to avoid bound checks when writing
+to the slices. While it would be possible to omit the bound checks manually,
+it probably makes more sense to carry out an investigation to make sure that
+the compiler performs the omission. If not, it makes more sense to file a bug
+on the compiler than to omit the checks manually.
+
+## Handling ASCII with table lookups when decoding single-byte to UTF-16
+
+Both uconv and ICU outperform encoding_rs when decoding single-byte to UTF-16.
+unconv doesn't even do anything fancy to manually unroll the loop (see below).
+Both handle even the ASCII range using table lookup. That is, there's no branch
+for checking if we're in the lower or upper half of the encoding.
+
+However, adding SIMD acceleration for the ASCII half will likely be a bigger
+win than eliminating the branch to decide ASCII vs. non-ASCII.
+
+## Manual loop unrolling for single-byte encodings
+
+ICU currently outperforms encoding_rs (by over x2!) when decoding a single-byte
+encoding to UTF-16. This appears to be thanks to manually unrolling the
+conversion loop by 16. See [ucnv_MBCSSingleToBMPWithOffsets][1].
+
+[1]: https://ssl.icu-project.org/repos/icu/icu/tags/release-55-1/source/common/ucnvmbcs.cpp
+
+Notably, none of the single-byte encodings have bytes that'd decode to the
+upper half of BMP. Therefore, if the unmappable marker has the highest bit set
+instead of being zero, the check for unmappables within a 16-character stride
+can be done either by ORing the BMP characters in the stride together and
+checking the high bit or by loading the upper halves of the BMP charaters
+in a `u8x8` register and checking the high bits using the `_mm_movemask_epi8`
+/ `pmovmskb` SSE2 instruction.
+
+## After non-ASCII, handle ASCII punctuation without SIMD
+
+Since the failure mode of SIMD ASCII acceleration involves wasted aligment
+checks and a wasted SIMD read when the next code unit is non-ASCII and non-Latin
+scripts have runs of non-ASCII even if ASCII spaces and punctuation is used,
+consider handling the next two or three bytes following non-ASCII as non-SIMD
+before looping back to the SIMD mode. Maybe move back to SIMD ASCII faster if
+there's ASCII that's not space or punctuation. Maybe with the "space or
+punctuation" check in place, this code can be allowed to be in place even for
+UTF-8 and Latin single-byte (i.e. not having different code for Latin and
+non-Latin single-byte).
+
+## Prefer maintaining aligment
+
+Instead of returning to acceleration directly after non-ASCII, consider
+continuing to the alignment boundary without acceleration.
+
+## Read from SIMD lanes instead of RAM (cache) when ASCII check fails
+
+When the SIMD ASCII check fails, the data has already been read from memory.
+Test whether it's faster to read the data by lane from the SIMD register than
+to read it again from RAM (cache).
+
+## Use Level 2 Hanzi and Level 2 Kanji ordering
+
+These two are ordered by radical and then by stroke count, so in principle,
+they should be mostly Unicode-ordered, although at least Level 2 Hanzi isn't
+fully Unicode-ordered. Is "mostly" good enough for encode accelelation?
+
+## Create a `divmod_94()` function
+
+Experiment with a function that computes `(i / 94, i % 94)` more efficiently
+than generic code.
+
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/LICENSE-APACHE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/LICENSE-MIT
@@ -0,0 +1,54 @@
+Copyright (c) 2013-2016 Mozilla Foundation
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+The file utf_8_core.rs was extracted from the Rust project at revision
+7ad7232422f7e5bbfa0e52dabe36c12677df19e2, whose LICENSE-MIT file said:
+
+Copyright (c) 2010 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/README.md
@@ -0,0 +1,256 @@
+# encoding_rs
+
+[![Build Status](https://travis-ci.org/hsivonen/encoding_rs.svg?branch=master)](https://travis-ci.org/hsivonen/encoding_rs)
+[![crates.io](https://meritbadge.herokuapp.com/encoding_rs)](https://crates.io/crates/encoding_rs)
+[![docs.rs](https://docs.rs/encoding_rs/badge.svg)](https://docs.rs/encoding_rs/)
+[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/encoding_rs/blob/master/COPYRIGHT)
+
+encoding_rs aspires to become an implementation of the
+[Encoding Standard](https://encoding.spec.whatwg.org/) that
+
+1. Is written in Rust.
+2. Is suitable for use in Gecko as a replacement of uconv. (I.e. supports
+   decoding to UTF-16 and encoding from UTF-16.)
+3. Is suitable for use in Rust code (both in Gecko and independently of Gecko).
+   (I.e. supports decoding to UTF-8 and encoding from UTF-8 and provides an API
+   compatible with at least the most common ways of using
+   [rust-encoding](https://github.com/lifthrasiir/rust-encoding/).)
+
+## Licensing
+
+Please see the file named
+[COPYRIGHT](https://github.com/hsivonen/encoding_rs/blob/master/COPYRIGHT).
+
+## API Documentation
+
+Generated [API documentation](https://docs.rs/encoding_rs/) is available
+online.
+
+## Design
+
+For design considerations, please see the associated [technical proposal to
+rewrite uconv in Rust](https://docs.google.com/document/d/13GCbdvKi83a77ZcKOxaEteXp1SOGZ_9Fmztb9iX22v0/edit#).
+
+## Performance goals
+
+For decoding to UTF-16, the goal is to perform at least as well as Gecko's old
+uconv. For decoding to UTF-8, the goal is to perform at least as well as
+rust-encoding.
+
+Encoding to UTF-8 should be fast. (UTF-8 to UTF-8 encode should be equivalent
+to `memcpy` and UTF-16 to UTF-8 should be fast.)
+
+Speed is a non-goal when encoding to legacy encodings. Encoding to legacy
+encodings should not be optimized for speed at the expense of code size as long
+as form submission and URL parsing in Gecko don't become noticeably too slow
+in real-world use.
+
+A framework for measuring performance is [available separately][1].
+
+[1]: https://github.com/hsivonen/encoding_bench/
+
+## C binding
+
+An FFI layer for encoding_rs is available as a
+[separate crate](https://github.com/hsivonen/encoding_c).
+
+## Compatibility with rust-encoding
+
+A compatibility layer that implements the rust-encoding API on top of
+encoding_rs is
+[provided as a separate crate](https://github.com/hsivonen/encoding_rs_compat)
+(cannot be uploaded to crates.io).
+
+## Roadmap
+
+- [x] Design the low-level API.
+- [x] Provide Rust-only convenience features (some BOM sniffing variants still
+      TODO).
+- [x] Provide an stl/gsl-flavored C++ API.
+- [x] Implement all decoders and encoders.
+- [x] Add unit tests for all decoders and encoders.
+- [x] Finish BOM sniffing variants in Rust-only convenience features.
+- [x] Document the API.
+- [x] Publish the crate on crates.io.
+- [x] Create a solution for measuring performance.
+- [x] Accelerate ASCII conversions using SSE2 on x86.
+- [x] Accelerate ASCII conversions using ALU register-sized operations on
+      non-x86 architectures (process an `usize` instead of `u8` at a time).
+- [x] Split FFI into a separate crate so that the FFI doesn't interfere with
+      LTO in pure-Rust usage.
+- [x] Compress CJK indices by making use of sequential code points as well
+      as Unicode-ordered parts of indices.
+- [x] Make lookups by label or name use binary search that searches from the
+      end of the label/name to the start.
+- [x] Make labels with non-ASCII bytes fail fast.
+- [x] Parallelize UTF-8 validation using [Rayon](https://github.com/nikomatsakis/rayon).
+- [x] Provide an XPCOM/MFBT-flavored C++ API.
+- [ ] Investigate accelerating single-byte encode with a single fast-tracked
+      range per encoding.
+- [ ] Replace uconv with encoding_rs in Gecko.
+- [x] Implement the rust-encoding API in terms of encoding_rs.
+- [ ] Investigate the use of NEON on newer ARM CPUs that have a lesser penalty
+      on data flow from NEON to ALU registers.
+- [ ] Investigate Björn Höhrmann's lookup table acceleration for UTF-8 as
+      adapted to Rust in rust-encoding.
+
+## Release Notes
+
+### 0.6.11
+
+* Make `Encoder::has_pending_state()` public.
+* Update the `simd` crate dependency to 0.2.0.
+
+### 0.6.10
+
+* Reserve enough space for NCRs when encoding to ISO-2022-JP.
+* Correct max length calculations for multibyte decoders.
+* Correct max length calculations before BOM sniffing has been
+  performed.
+* Correctly calculate max length when encoding from UTF-16 to GBK.
+
+### 0.6.9
+
+* [Don't prepend anything when gb18030 range decode
+  fails](https://github.com/whatwg/encoding/issues/110). (Spec change.)
+
+### 0.6.8
+
+* Correcly handle the case where the first buffer contains potentially
+  partial BOM and the next buffer is the last buffer.
+* Decode byte `7F` correctly in ISO-2022-JP.
+* Make UTF-16 to UTF-8 encode write closer to the end of the buffer.
+* Implement `Hash` for `Encoding`.
+
+### 0.6.7
+
+* [Map half-width katakana to full-width katana in ISO-2022-JP
+  encoder](https://github.com/whatwg/encoding/issues/105). (Spec change.)
+* Give `InputEmpty` correct precedence over `OutputFull` when encoding
+  with replacement and the output buffer passed in is too short or the
+  remaining space in the output buffer is too small after a replacement.
+
+### 0.6.6
+
+* Correct max length calculation when a partial BOM prefix is part of
+  the decoder's state.
+
+### 0.6.5
+
+* Correct max length calculation in various encoders.
+* Correct max length calculation in the UTF-16 decoder.
+* Derive `PartialEq` and `Eq` for the `CoderResult`, `DecoderResult`
+  and `EncoderResult` types.
+
+### 0.6.4
+
+* Avoid panic when encoding with replacement and the destination buffer is
+  too short to hold one numeric character reference.
+
+### 0.6.3
+
+* Add support for 32-bit big-endian hosts. (For real this time.)
+
+### 0.6.2
+
+* Fix a panic from subslicing with bad indices in
+  `Encoder::encode_from_utf16`. (Due to an oversight, it lacked the fix that
+  `Encoder::encode_from_utf8` already had.)
+* Micro-optimize error status accumulation in non-streaming case.
+
+### 0.6.1
+
+* Avoid panic near integer overflow in a case that's unlikely to actually
+  happen.
+* Address Clippy lints.
+
+### 0.6.0
+
+* Make the methods for computing worst-case buffer size requirements check
+  for integer overflow.
+* Upgrade rayon to 0.7.0.
+
+### 0.5.1
+
+* Reorder methods for better documentation readability.
+* Add support for big-endian hosts. (Only 64-bit case actually tested.)
+* Optimize the ALU (non-SIMD) case for 32-bit ARM instead of x86_64.
+
+### 0.5.0
+
+* Avoid allocating an excessively long buffers in non-streaming decode.
+* Fix the behavior of ISO-2022-JP and replacement decoders near the end of the
+  output buffer.
+* Annotate the result structs with `#[must_use]`.
+
+### 0.4.0
+
+* Split FFI into a separate crate.
+* Performance tweaks.
+* CJK binary size and encoding performance changes.
+* Parallelize UTF-8 validation in the case of long buffers (with optional
+  feature `parallel-utf8`).
+* Borrow even with ISO-2022-JP when possible.
+
+### 0.3.2
+
+* Fix moving pointers to alignment in ALU-based ASCII acceleration.
+* Fix errors in documentation and improve documentation.
+
+### 0.3.1
+
+* Fix UTF-8 to UTF-16 decode for byte sequences beginning with 0xEE.
+* Make UTF-8 to UTF-8 decode SSE2-accelerated when feature `simd-accel` is used.
+* When decoding and encoding ASCII-only input from or to an ASCII-compatible
+  encoding using the non-streaming API, return a borrow of the input.
+* Make encode from UTF-16 to UTF-8 faster.
+
+### 0.3
+
+* Change the references to the instances of `Encoding` from `const` to `static`
+  to make the referents unique across crates that use the refernces.
+* Introduce non-reference-typed `FOO_INIT` instances of `Encoding` to allow
+  foreign crates to initialize `static` arrays with references to `Encoding`
+  instances even under Rust's constraints that prohibit the initialization of
+  `&'static Encoding`-typed array items with `&'static Encoding`-typed
+  `statics`.
+* Document that the above two points will be reverted if Rust changes `const`
+  to work so that cross-crate usage keeps the referents unique.
+* Return `Cow`s from Rust-only non-streaming methods for encode and decode.
+* `Encoding::for_bom()` returns the length of the BOM.
+* ASCII-accelerated conversions for encodings other than UTF-16LE, UTF-16BE,
+  ISO-2022-JP and x-user-defined.
+* Add SSE2 acceleration behind the `simd-accel` feature flag. (Requires
+  nightly Rust.)
+* Fix panic with long bogus labels.
+* Map [0xCA to U+05BA in windows-1255](https://github.com/whatwg/encoding/issues/73).
+  (Spec change.)
+* Correct the [end of the Shift_JIS EUDC range](https://github.com/whatwg/encoding/issues/53).
+  (Spec change.)
+
+### 0.2.4
+
+* Polish FFI documentation.
+
+### 0.2.3
+
+* Fix UTF-16 to UTF-8 encode.
+
+### 0.2.2
+
+* Add `Encoder.encode_from_utf8_to_vec_without_replacement()`.
+
+### 0.2.1
+
+* Add `Encoding.is_ascii_compatible()`.
+
+* Add `Encoding::for_bom()`.
+
+* Make `==` for `Encoding` use name comparison instead of pointer comparison,
+  because uses of the encoding constants in different crates result in
+  different addresses and the constant cannot be turned into statics without
+  breaking other things.
+
+### 0.2.0
+
+The initial release.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/generate-encoding-data.py
@@ -0,0 +1,2043 @@
+#!/usr/bin/python
+
+# Copyright 2013-2016 Mozilla Foundation. See the COPYRIGHT
+# file at the top-level directory of this distribution.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+import json
+import subprocess
+import sys
+
+def cmp_from_end(one, other):
+  c = cmp(len(one), len(other))
+  if c != 0:
+    return c
+  i = len(one) - 1
+  while i >= 0:
+    c = cmp(one[i], other[i])
+    if c != 0:
+      return c
+    i -= 1
+  return 0
+
+
+class Label:
+  def __init__(self, label, preferred):
+    self.label = label
+    self.preferred = preferred
+  def __cmp__(self, other):
+    return cmp_from_end(self.label, other.label)
+
+def static_u16_table(name, data):
+  data_file.write('''pub static %s: [u16; %d] = [
+  ''' % (name, len(data)))
+
+  for i in xrange(len(data)):
+    data_file.write('0x%04X,\n' % data[i])
+
+  data_file.write('''];
+
+  ''')
+
+def static_u16_table_from_indexable(name, data, item):
+  data_file.write('''#[cfg(not(feature = "no-static-ideograph-encoder-tables"))]
+static %s: [u16; %d] = [
+  ''' % (name, len(data)))
+
+  for i in xrange(len(data)):
+    data_file.write('0x%04X,\n' % data[i][item])
+
+  data_file.write('''];
+
+  ''')
+
+def static_u8_pair_table_from_indexable(name, data, item):
+  data_file.write('''#[cfg(not(feature = "no-static-ideograph-encoder-tables"))]
+static %s: [[u8; 2]; %d] = [
+  ''' % (name, len(data)))
+
+  for i in xrange(len(data)):
+    data_file.write('[0x%02X, 0x%02X],\n' % data[i][item])
+
+  data_file.write('''];
+
+  ''')
+
+preferred = []
+
+dom = []
+
+labels = []
+
+data = json.load(open("../encoding/encodings.json", "r"))
+
+indexes = json.load(open("../encoding/indexes.json", "r"))
+
+single_byte = []
+
+multi_byte = []
+
+def to_camel_name(name):
+  if name == u"iso-8859-8-i":
+    return u"Iso8I"
+  if name.startswith(u"iso-8859-"):
+    return name.replace(u"iso-8859-", u"Iso")
+  return name.title().replace(u"X-", u"").replace(u"-", u"").replace(u"_", u"")
+
+def to_constant_name(name):
+  return name.replace(u"-", u"_").upper()
+
+def to_snake_name(name):
+  return name.replace(u"-", u"_").lower()
+
+def to_dom_name(name):
+  return name
+
+#
+
+for group in data:
+  if group["heading"] == "Legacy single-byte encodings":
+    single_byte = group["encodings"]
+  else:
+    multi_byte.extend(group["encodings"])
+  for encoding in group["encodings"]:
+    preferred.append(encoding["name"])
+    for label in encoding["labels"]:
+      labels.append(Label(label, encoding["name"]))
+
+for name in preferred:
+  dom.append(to_dom_name(name))
+
+preferred.sort()
+labels.sort()
+dom.sort(cmp=cmp_from_end)
+
+longest_label_length = 0
+longest_name_length = 0
+longest_label = None
+longest_name = None
+
+for name in preferred:
+  if len(name) > longest_name_length:
+    longest_name_length = len(name)
+    longest_name = name
+
+for label in labels:
+  if len(label.label) > longest_label_length:
+    longest_label_length = len(label.label)
+    longest_label = label.label
+
+def is_single_byte(name):
+  for encoding in single_byte:
+    if name == encoding["name"]:
+      return True
+  return False
+
+def read_non_generated(path):
+  partially_generated_file = open(path, "r")
+  full = partially_generated_file.read()
+  partially_generated_file.close()
+
+  generated_begin = "// BEGIN GENERATED CODE. PLEASE DO NOT EDIT."
+  generated_end = "// END GENERATED CODE"
+
+  generated_begin_index = full.find(generated_begin)
+  if generated_begin_index < 0:
+    print "Can't find generated code start marker in %s. Exiting." % path
+    sys.exit(-1)
+  generated_end_index = full.find(generated_end)
+  if generated_end_index < 0:
+    print "Can't find generated code end marker in %s. Exiting." % path
+    sys.exit(-1)
+
+  return (full[0:generated_begin_index + len(generated_begin)],
+          full[generated_end_index:])
+
+(lib_rs_begin, lib_rs_end) = read_non_generated("src/lib.rs")
+
+label_file = open("src/lib.rs", "w")
+
+label_file.write(lib_rs_begin)
+label_file.write("""
+// Instead, please regenerate using generate-encoding-data.py
+
+const LONGEST_LABEL_LENGTH: usize = %d; // %s
+
+""" % (longest_label_length, longest_label))
+
+for name in preferred:
+  variant = None
+  if is_single_byte(name):
+    variant = "SingleByte(data::%s_DATA)" % to_constant_name(u"iso-8859-8" if name == u"ISO-8859-8-I" else name)
+  else:
+    variant = to_camel_name(name)
+
+  label_file.write('''/// The initializer for the %s encoding.
+///
+/// For use only for taking the address of this form when
+/// Rust prohibits the use of the non-`_INIT` form directly,
+/// such as in initializers of other `static`s. If in doubt,
+/// use the corresponding non-`_INIT` reference-typed `static`.
+///
+/// This part of the public API will go away if Rust changes
+/// to make the referent of `pub const FOO: &'static Encoding`
+/// unique cross-crate or if Rust starts allowing static arrays
+/// to be initialized with `pub static FOO: &'static Encoding`
+/// items.
+pub static %s_INIT: Encoding = Encoding {
+    name: "%s",
+    variant: VariantEncoding::%s,
+};
+
+/// The %s encoding.
+///
+/// This will change from `static` to `const` if Rust changes
+/// to make the referent of `pub const FOO: &'static Encoding`
+/// unique cross-crate, so don't take the address of this
+/// `static`.
+pub static %s: &'static Encoding = &%s_INIT;
+
+''' % (to_dom_name(name), to_constant_name(name), to_dom_name(name), variant, to_dom_name(name), to_constant_name(name), to_constant_name(name)))
+
+label_file.write("""static ENCODINGS_SORTED_BY_NAME: [&'static Encoding; %d] = [
+""" % (len(dom) - 1))
+
+for dom_name in dom:
+  if dom_name != "UTF-8":
+    label_file.write("&%s_INIT,\n" % to_constant_name(dom_name))
+
+label_file.write("""];
+
+static LABELS_SORTED: [&'static str; %d] = [
+""" % len(labels))
+
+for label in labels:
+  label_file.write('''"%s",\n''' % label.label)
+
+label_file.write("""];
+
+static ENCODINGS_IN_LABEL_SORT: [&'static Encoding; %d] = [
+""" % len(labels))
+
+for label in labels:
+  label_file.write('''&%s_INIT,\n''' % to_constant_name(label.preferred))
+
+label_file.write('''];
+
+''')
+label_file.write(lib_rs_end)
+label_file.close()
+
+label_test_file = open("src/test_labels_names.rs", "w")
+label_test_file.write('''// Any copyright to the test code below this comment is dedicated to the
+// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+use super::*;
+
+#[test]
+fn test_all_labels() {
+''')
+
+for label in labels:
+  label_test_file.write('''assert_eq!(Encoding::for_label(b"%s"), Some(%s));\n''' % (label.label, to_constant_name(label.preferred)))
+
+label_test_file.write('''}
+
+#[test]
+fn test_all_names() {
+''')
+
+for dom_name in dom:
+  label_test_file.write('''assert_eq!(Encoding::for_name(b"%s"), %s);\n''' % (dom_name, to_constant_name(dom_name)))
+
+label_test_file.write('''}
+''')
+label_test_file.close()
+
+def null_to_zero(code_point):
+  if not code_point:
+    code_point = 0
+  return code_point
+
+data_file = open("src/data.rs", "w")
+data_file.write('''// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+''')
+
+# Single-byte
+
+for encoding in single_byte:
+  name = encoding["name"]
+  if name == u"ISO-8859-8-I":
+    continue
+
+  data_file.write('''pub const %s_DATA: &'static [u16; 128] = &[
+''' % to_constant_name(name))
+
+  for code_point in indexes[name.lower()]:
+    data_file.write('0x%04X,\n' % null_to_zero(code_point))
+
+  data_file.write('''];
+
+''')
+
+# Big5
+
+index = indexes["big5"]
+
+astralness = []
+low_bits = []
+
+for code_point in index[942:19782]:
+  if code_point:
+    astralness.append(1 if code_point > 0xFFFF else 0)
+    low_bits.append(code_point & 0xFFFF)
+  else:
+    astralness.append(0)
+    low_bits.append(0)
+
+# pad length to multiple of 32
+for j in xrange(32 - (len(astralness) % 32)):
+  astralness.append(0)
+
+data_file.write('''static BIG5_ASTRALNESS: [u32; %d] = [
+''' % (len(astralness) / 32))
+
+i = 0
+while i < len(astralness):
+  accu = 0
+  for j in xrange(32):
+    accu |= astralness[i + j] << j
+  data_file.write('0x%08X,\n' % accu)
+  i += 32
+
+data_file.write('''];
+
+''')
+
+static_u16_table("BIG5_LOW_BITS", low_bits)
+
+# Encoder table for Level 1 Hanzi
+# Note: If we were OK with doubling this table, we
+# could use a directly-indexable table instead...
+level1_hanzi_index = index[5495:10896]
+level1_hanzi_pairs = []
+for i in xrange(len(level1_hanzi_index)):
+  hanzi_lead = (i / 157) + 0xA4
+  hanzi_trail = (i % 157)
+  hanzi_trail += 0x40 if hanzi_trail < 0x3F else 0x62
+  level1_hanzi_pairs.append((level1_hanzi_index[i], (hanzi_lead, hanzi_trail)))
+level1_hanzi_pairs.append((0x4E5A, (0xC8, 0x7B)))
+level1_hanzi_pairs.append((0x5202, (0xC8, 0x7D)))
+level1_hanzi_pairs.append((0x9FB0, (0xC8, 0xA1)))
+level1_hanzi_pairs.append((0x5188, (0xC8, 0xA2)))
+level1_hanzi_pairs.append((0x9FB1, (0xC8, 0xA3)))
+level1_hanzi_pairs.sort(key=lambda x: x[0])
+
+static_u16_table_from_indexable("BIG5_LEVEL1_HANZI_CODE_POINTS", level1_hanzi_pairs, 0)
+static_u8_pair_table_from_indexable("BIG5_LEVEL1_HANZI_BYTES", level1_hanzi_pairs, 1)
+
+# JIS0208
+
+index = indexes["jis0208"]
+
+# JIS 0208 Level 1 Kanji
+static_u16_table("JIS0208_LEVEL1_KANJI", index[1410:4375])
+
+# JIS 0208 Level 2 Kanji and Additional Kanji
+static_u16_table("JIS0208_LEVEL2_AND_ADDITIONAL_KANJI", index[4418:7808])
+
+# IBM Kanji
+static_u16_table("IBM_KANJI", index[8272:8632])
+
+# Check that the other instance is the same
+if index[8272:8632] != index[10744:11104]:
+  raise Error()
+
+# JIS 0208 symbols (all non-Kanji, non-range items)
+symbol_index = []
+symbol_triples = []
+pointers_to_scan = [
+  (0, 188),
+  (658, 691),
+  (1159, 1221),
+]
+in_run = False
+run_start_pointer = 0
+run_start_array_index = 0
+for (start, end) in pointers_to_scan:
+  for i in range(start, end):
+    code_point = index[i]
+    if in_run:
+      if code_point:
+        symbol_index.append(code_point)
+      else:
+        symbol_triples.append(run_start_pointer)
+        symbol_triples.append(i - run_start_pointer)
+        symbol_triples.append(run_start_array_index)
+        in_run = False
+    else:
+      if code_point:
+        in_run = True
+        run_start_pointer = i
+        run_start_array_index = len(symbol_index)
+        symbol_index.append(code_point)
+  if in_run:
+    symbol_triples.append(run_start_pointer)
+    symbol_triples.append(end - run_start_pointer)
+    symbol_triples.append(run_start_array_index)
+    in_run = False
+if in_run:
+  raise Error()
+
+# Now add manually the two overlapping slices of
+# index from the NEC/IBM extensions.
+run_start_array_index = len(symbol_index)
+symbol_index.extend(index[10736:10744])
+# Later
+symbol_triples.append(10736)
+symbol_triples.append(8)
+symbol_triples.append(run_start_array_index)
+# Earlier
+symbol_triples.append(8644)
+symbol_triples.append(4)
+symbol_triples.append(run_start_array_index)
+
+static_u16_table("JIS0208_SYMBOLS", symbol_index)
+static_u16_table("JIS0208_SYMBOL_TRIPLES", symbol_triples)
+
+# Write down the magic numbers needed when preferring the earlier case
+data_file.write('''const IBM_SYMBOL_START: usize = %d;''' % (run_start_array_index + 1))
+data_file.write('''const IBM_SYMBOL_END: usize = %d;''' % (run_start_array_index + 4))
+data_file.write('''const IBM_SYMBOL_POINTER_START: usize = %d;''' % 8645)
+
+# JIS 0208 ranges (excluding kana)
+range_triples = []
+pointers_to_scan = [
+  (188, 281),
+  (470, 657),
+  (1128, 1159),
+  (8634, 8644),
+  (10716, 10736),
+]
+in_run = False
+run_start_pointer = 0
+run_start_code_point = 0
+previous_code_point = 0
+for (start, end) in pointers_to_scan:
+  for i in range(start, end):
+    code_point = index[i]
+    if in_run:
+      if code_point:
+        if previous_code_point + 1 != code_point:
+          range_triples.append(run_start_pointer)
+          range_triples.append(i - run_start_pointer)
+          range_triples.append(run_start_code_point)
+          run_start_pointer = i
+          run_start_code_point = code_point
+        previous_code_point = code_point
+      else:
+          range_triples.append(run_start_pointer)
+          range_triples.append(i - run_start_pointer)
+          range_triples.append(run_start_code_point)
+          run_start_pointer = 0
+          run_start_code_point = 0
+          previous_code_point = 0
+          in_run = False
+    else:
+      if code_point:
+        in_run = True
+        run_start_pointer = i
+        run_start_code_point = code_point
+        previous_code_point = code_point
+  if in_run:
+    range_triples.append(run_start_pointer)
+    range_triples.append(end - run_start_pointer)
+    range_triples.append(run_start_code_point)
+    run_start_pointer = 0
+    run_start_code_point = 0
+    previous_code_point = 0
+    in_run = False
+if in_run:
+  raise Error()
+
+static_u16_table("JIS0208_RANGE_TRIPLES", range_triples)
+
+# Encoder table for Level 1 Kanji
+# Note: If we were OK with 30 KB more footprint, we
+# could use a directly-indexable table instead...
+level1_kanji_index = index[1410:4375]
+level1_kanji_pairs = []
+for i in xrange(len(level1_kanji_index)):
+  pointer = 1410 + i
+  (lead, trail) = divmod(pointer, 188)
+  lead += 0x81 if lead < 0x1F else 0xC1
+  trail += 0x40 if trail < 0x3F else 0x41
+  level1_kanji_pairs.append((level1_kanji_index[i], (lead, trail)))
+level1_kanji_pairs.sort(key=lambda x: x[0])
+
+static_u16_table_from_indexable("JIS0208_LEVEL1_KANJI_CODE_POINTS", level1_kanji_pairs, 0)
+static_u8_pair_table_from_indexable("JIS0208_LEVEL1_KANJI_SHIFT_JIS_BYTES", level1_kanji_pairs, 1)
+
+# ISO-2022-JP half-width katakana
+
+# index is still jis0208
+half_width_index = indexes["iso-2022-jp-katakana"]
+
+data_file.write('''pub static ISO_2022_JP_HALF_WIDTH_TRAIL: [u8; %d] = [
+''' % len(half_width_index))
+
+for i in xrange(len(half_width_index)):
+  code_point = half_width_index[i]
+  pointer = index.index(code_point)
+  trail = pointer % 94 + 0x21
+  data_file.write('0x%02X,\n' % trail)
+
+data_file.write('''];
+
+''')
+
+# EUC-KR
+
+index = indexes["euc-kr"]
+
+# Unicode 1.1 Hangul above the old KS X 1001 block
+# Compressed form takes 35% of uncompressed form
+pointers = []
+offsets = []
+previous_code_point = 0
+for row in xrange(0x20):
+  for column in xrange(190):
+    i = column + (row * 190)
+    # Skip the gaps
+    if (column >= 0x1A and column < 0x20) or (column >= 0x3A and column < 0x40):
+      continue
+    code_point = index[i]
+    if previous_code_point > code_point:
+      raise Error()
+    if code_point - previous_code_point != 1:
+      adjustment = 0
+      if column >= 0x40:
+        adjustment = 12
+      elif column >= 0x20:
+        adjustment = 6
+      pointers.append(column - adjustment + (row * (190 - 12)))
+      offsets.append(code_point)
+    previous_code_point = code_point
+
+static_u16_table("CP949_TOP_HANGUL_POINTERS", pointers)
+static_u16_table("CP949_TOP_HANGUL_OFFSETS", offsets)
+
+# Unicode 1.1 Hangul to the left of the old KS X 1001 block
+pointers = []
+offsets = []
+previous_code_point = 0
+for row in xrange(0x46 - 0x20):
+  for column in xrange(190 - 94):
+    i = 6080 + column + (row * 190)
+    # Skip the gaps
+    if (column >= 0x1A and column < 0x20) or (column >= 0x3A and column < 0x40):
+      continue
+    if i > 13127:
+      # Exclude unassigned on partial last row
+      break
+    code_point = index[i]
+    if previous_code_point > code_point:
+      raise Error()
+    if code_point - previous_code_point != 1:
+      adjustment = 0
+      if column >= 0x40:
+        adjustment = 12
+      elif column >= 0x20:
+        adjustment = 6
+      pointers.append(column - adjustment + (row * (190 - 94 - 12)))
+      offsets.append(code_point)
+    previous_code_point = code_point
+
+static_u16_table("CP949_LEFT_HANGUL_POINTERS", pointers)
+static_u16_table("CP949_LEFT_HANGUL_OFFSETS", offsets)
+
+# KS X 1001 Hangul
+hangul_index = []
+previous_code_point = 0
+for row in xrange(0x48 - 0x2F):
+  for column in xrange(94):
+    code_point = index[9026 + column + (row * 190)]
+    if previous_code_point >= code_point:
+      raise Error()
+    hangul_index.append(code_point)
+    previous_code_point = code_point
+
+static_u16_table("KSX1001_HANGUL", hangul_index)
+
+# KS X 1001 Hanja
+hanja_index = []
+for row in xrange(0x7D - 0x49):
+  for column in xrange(94):
+    hanja_index.append(index[13966 + column + (row * 190)])
+
+static_u16_table("KSX1001_HANJA", hanja_index)
+
+# KS X 1001 symbols
+symbol_index = []
+for i in range(6176, 6270):
+  symbol_index.append(index[i])
+for i in range(6366, 6437):
+  symbol_index.append(index[i])
+
+static_u16_table("KSX1001_SYMBOLS", symbol_index)
+
+# KS X 1001 Uppercase Latin
+subindex = []
+for i in range(7506, 7521):
+  subindex.append(null_to_zero(index[i]))
+
+static_u16_table("KSX1001_UPPERCASE", subindex)
+
+# KS X 1001 Lowercase Latin
+subindex = []
+for i in range(7696, 7712):
+  subindex.append(index[i])
+
+static_u16_table("KSX1001_LOWERCASE", subindex)
+
+# KS X 1001 Box drawing
+subindex = []
+for i in range(7126, 7194):
+  subindex.append(index[i])
+
+static_u16_table("KSX1001_BOX", subindex)
+
+# KS X 1001 other
+pointers = []
+offsets = []
+previous_code_point = 0
+for row in xrange(10):
+  for column in xrange(94):
+    i = 6556 + column + (row * 190)
+    code_point = index[i]
+    # Exclude ranges that were processed as lookup tables
+    # or that contain unmapped cells by filling them with
+    # ASCII. Upon encode, ASCII code points will
+    # never appear as the search key.
+    if (i >= 6946 and i <= 6950):
+      code_point = i - 6946
+    elif (i >= 6961 and i <= 6967):
+      code_point = i - 6961
+    elif (i >= 6992 and i <= 6999):
+      code_point = i - 6992
+    elif (i >= 7024 and i <= 7029):
+      code_point = i - 7024
+    elif (i >= 7126 and i <= 7219):
+      code_point = i - 7126
+    elif (i >= 7395 and i <= 7409):
+      code_point = i - 7395
+    elif (i >= 7506 and i <= 7521):
+      code_point = i - 7506
+    elif (i >= 7696 and i <= 7711):
+      code_point = i - 7696
+    elif (i >= 7969 and i <= 7979):
+      code_point = i - 7969
+    elif (i >= 8162 and i <= 8169):
+      code_point = i - 8162
+    elif (i >= 8299 and i <= 8313):
+      code_point = i - 8299
+    elif (i >= 8347 and i <= 8359):
+      code_point = i - 8347
+    if code_point - previous_code_point != 1:
+      pointers.append(column + (row * 94))
+      offsets.append(code_point)
+    previous_code_point = code_point
+
+static_u16_table("KSX1001_OTHER_POINTERS", pointers)
+# Omit the last offset, because the end of the last line
+# is unmapped, so we don't want to look at it.
+static_u16_table("KSX1001_OTHER_UNSORTED_OFFSETS", offsets[:-1])
+
+# JIS 0212
+
+index = indexes["jis0212"]
+
+# JIS 0212 Kanji
+static_u16_table("JIS0212_KANJI", index[1410:7211])
+
+# JIS 0212 accented (all non-Kanji, non-range items)
+symbol_index = []
+symbol_triples = []
+pointers_to_scan = [
+  (0, 596),
+  (608, 644),
+  (656, 1409),
+]
+in_run = False
+run_start_pointer = 0
+run_start_array_index = 0
+for (start, end) in pointers_to_scan:
+  for i in range(start, end):
+    code_point = index[i]
+    if in_run:
+      if code_point:
+        symbol_index.append(code_point)
+      elif index[i + 1]:
+        symbol_index.append(0)
+      else:
+        symbol_triples.append(run_start_pointer)
+        symbol_triples.append(i - run_start_pointer)
+        symbol_triples.append(run_start_array_index)
+        in_run = False
+    else:
+      if code_point:
+        in_run = True
+        run_start_pointer = i
+        run_start_array_index = len(symbol_index)
+        symbol_index.append(code_point)
+  if in_run:
+    symbol_triples.append(run_start_pointer)
+    symbol_triples.append(end - run_start_pointer)
+    symbol_triples.append(run_start_array_index)
+    in_run = False
+if in_run:
+  raise Error()
+
+static_u16_table("JIS0212_ACCENTED", symbol_index)
+static_u16_table("JIS0212_ACCENTED_TRIPLES", symbol_triples)
+
+# gb18030
+
+index = indexes["gb18030"]
+
+# Unicode 1.1 ideographs above the old GB2312 block
+# Compressed form takes 63% of uncompressed form
+pointers = []
+offsets = []
+previous_code_point = 0
+for i in xrange(6080):
+  code_point = index[i]
+  if previous_code_point > code_point:
+    raise Error()
+  if code_point - previous_code_point != 1:
+    pointers.append(i)
+    offsets.append(code_point)
+  previous_code_point = code_point
+
+static_u16_table("GBK_TOP_IDEOGRAPH_POINTERS", pointers)
+static_u16_table("GBK_TOP_IDEOGRAPH_OFFSETS", offsets)
+
+# Unicode 1.1 ideographs to the left of the old GB2312 block
+# Compressed form takes 40% of uncompressed form
+pointers = []
+offsets = []
+previous_code_point = 0
+for row in xrange(0x7D - 0x29):
+  for column in xrange(190 - 94):
+    i = 7790 + column + (row * 190)
+    if i > 23650:
+      # Exclude compatibility ideographs at the end
+      break
+    code_point = index[i]
+    if previous_code_point > code_point:
+      raise Error()
+    if code_point - previous_code_point != 1:
+      pointers.append(column + (row * (190 - 94)))
+      offsets.append(code_point)
+    previous_code_point = code_point
+
+static_u16_table("GBK_LEFT_IDEOGRAPH_POINTERS", pointers)
+static_u16_table("GBK_LEFT_IDEOGRAPH_OFFSETS", offsets)
+
+# GBK other (excl. Ext A, Compat & PUA at the bottom)
+pointers = []
+offsets = []
+previous_code_point = 0
+for row in xrange(0x29 - 0x20):
+  for column in xrange(190 - 94):
+    i = 6080 + column + (row * 190)
+    code_point = index[i]
+    if code_point - previous_code_point != 1:
+      pointers.append(column + (row * (190 - 94)))
+      offsets.append(code_point)
+    previous_code_point = code_point
+
+pointers.append((190 - 94) * (0x29 - 0x20))
+static_u16_table("GBK_OTHER_POINTERS", pointers)
+static_u16_table("GBK_OTHER_UNSORTED_OFFSETS", offsets)
+
+# GBK bottom: Compatibility ideagraphs, Ext A and PUA
+bottom_index = []
+# 5 compat following Unified Ideographs
+for i in range(23651, 23656):
+  bottom_index.append(index[i])
+# Last row
+for i in range(23750, 23846):
+  bottom_index.append(index[i])
+
+static_u16_table("GBK_BOTTOM", bottom_index)
+
+# GB2312 Hanzi
+# (and the 5 PUA code points in between Level 1 and Level 2)
+hanzi_index = []
+for row in xrange(0x77 - 0x2F):
+  for column in xrange(94):
+    hanzi_index.append(index[9026 + column + (row * 190)])
+
+static_u16_table("GB2312_HANZI", hanzi_index)
+
+# GB2312 symbols
+symbol_index = []
+for i in xrange(94):
+  symbol_index.append(index[6176 + i])
+
+static_u16_table("GB2312_SYMBOLS", symbol_index)
+
+# GB2312 symbols on Greek row (incl. PUA)
+symbol_index = []
+for i in xrange(22):
+  symbol_index.append(index[7189 + i])
+
+static_u16_table("GB2312_SYMBOLS_AFTER_GREEK", symbol_index)
+
+# GB2312 Pinyin
+pinyin_index = []
+for i in xrange(32):
+  pinyin_index.append(index[7506 + i])
+
+static_u16_table("GB2312_PINYIN", pinyin_index)
+
+# GB2312 other (excl. bottom PUA)
+pointers = []
+offsets = []
+previous_code_point = 0
+for row in xrange(14):
+  for column in xrange(94):
+    i = 6366 + column + (row * 190)
+    code_point = index[i]
+    # Exclude the two ranges that were processed as
+    # lookup tables above by filling them with
+    # ASCII. Upon encode, ASCII code points will
+    # never appear as the search key.
+    if (i >= 7189 and i < 7189 + 22):
+      code_point = i - 7189
+    elif (i >= 7506 and i < 7506 + 32):
+      code_point = i - 7506
+    if code_point - previous_code_point != 1:
+      pointers.append(column + (row * 94))
+      offsets.append(code_point)
+    previous_code_point = code_point
+
+pointers.append(14 * 94)
+static_u16_table("GB2312_OTHER_POINTERS", pointers)
+static_u16_table("GB2312_OTHER_UNSORTED_OFFSETS", offsets)
+
+# Non-gbk code points
+pointers = []
+offsets = []
+for pair in indexes["gb18030-ranges"]:
+  if pair[1] == 0x10000:
+    break # the last entry doesn't fit in u16
+  pointers.append(pair[0])
+  offsets.append(pair[1])
+
+static_u16_table("GB18030_RANGE_POINTERS", pointers)
+static_u16_table("GB18030_RANGE_OFFSETS", offsets)
+
+# Encoder table for Level 1 Hanzi
+# The units here really fit into 12 bits, but since we're
+# looking for speed here, let's use 16 bits per unit.
+# Once we use 16 bits per unit, we might as well precompute
+# the output bytes.
+level1_hanzi_index = hanzi_index[:(94 * (0xD8 - 0xB0) - 5)]
+level1_hanzi_pairs = []
+for i in xrange(len(level1_hanzi_index)):
+  hanzi_lead = (i / 94) + 0xB0
+  hanzi_trail = (i % 94) + 0xA1
+  level1_hanzi_pairs.append((level1_hanzi_index[i], (hanzi_lead, hanzi_trail)))
+level1_hanzi_pairs.sort(key=lambda x: x[0])
+
+static_u16_table_from_indexable("GB2312_LEVEL1_HANZI_CODE_POINTS", level1_hanzi_pairs, 0)
+static_u8_pair_table_from_indexable("GB2312_LEVEL1_HANZI_BYTES", level1_hanzi_pairs, 1)
+
+data_file.write('''#[inline(always)]
+fn map_with_ranges(haystack: &[u16], other: &[u16], needle: u16) -> u16 {
+    debug_assert_eq!(haystack.len(), other.len());
+    match haystack.binary_search(&needle) {
+        Ok(i) => other[i],
+        Err(i) => other[i - 1] + (needle - haystack[i - 1]),
+    }
+}
+
+#[inline(always)]
+fn map_with_unsorted_ranges(haystack: &[u16], other: &[u16], needle: u16) -> Option<u16> {
+    debug_assert_eq!(haystack.len() + 1, other.len());
+    for i in 0..haystack.len() {
+        let start = other[i];
+        let end = other[i + 1];
+        let length = end - start;
+        let offset = needle.wrapping_sub(haystack[i]);
+        if offset < length {
+            return Some(start + offset);
+        }
+    }
+    None
+}
+
+#[inline(always)]
+pub fn position(haystack: &[u16], needle: u16) -> Option<usize> {
+    haystack.iter().position(|&x| x == needle)
+}
+
+#[inline(always)]
+pub fn gb18030_range_decode(pointer: u16) -> u16 {
+    map_with_ranges(&GB18030_RANGE_POINTERS[..],
+                    &GB18030_RANGE_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn gb18030_range_encode(bmp: u16) -> usize {
+    if bmp == 0xE7C7 {
+        return 7457;
+    }
+    map_with_ranges(&GB18030_RANGE_OFFSETS[..], &GB18030_RANGE_POINTERS[..], bmp) as usize
+}
+
+#[inline(always)]
+pub fn gbk_top_ideograph_decode(pointer: u16) -> u16 {
+    map_with_ranges(&GBK_TOP_IDEOGRAPH_POINTERS[..],
+                    &GBK_TOP_IDEOGRAPH_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn gbk_top_ideograph_encode(bmp: u16) -> u16 {
+    map_with_ranges(&GBK_TOP_IDEOGRAPH_OFFSETS[..],
+                    &GBK_TOP_IDEOGRAPH_POINTERS[..],
+                    bmp)
+}
+
+#[inline(always)]
+pub fn gbk_left_ideograph_decode(pointer: u16) -> u16 {
+    map_with_ranges(&GBK_LEFT_IDEOGRAPH_POINTERS[..],
+                    &GBK_LEFT_IDEOGRAPH_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn gbk_left_ideograph_encode(bmp: u16) -> u16 {
+    map_with_ranges(&GBK_LEFT_IDEOGRAPH_OFFSETS[..],
+                    &GBK_LEFT_IDEOGRAPH_POINTERS[..],
+                    bmp)
+}
+
+#[inline(always)]
+pub fn cp949_top_hangul_decode(pointer: u16) -> u16 {
+    map_with_ranges(&CP949_TOP_HANGUL_POINTERS[..],
+                    &CP949_TOP_HANGUL_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn cp949_top_hangul_encode(bmp: u16) -> u16 {
+    map_with_ranges(&CP949_TOP_HANGUL_OFFSETS[..],
+                    &CP949_TOP_HANGUL_POINTERS[..],
+                    bmp)
+}
+
+#[inline(always)]
+pub fn cp949_left_hangul_decode(pointer: u16) -> u16 {
+    map_with_ranges(&CP949_LEFT_HANGUL_POINTERS[..],
+                    &CP949_LEFT_HANGUL_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn cp949_left_hangul_encode(bmp: u16) -> u16 {
+    map_with_ranges(&CP949_LEFT_HANGUL_OFFSETS[..],
+                    &CP949_LEFT_HANGUL_POINTERS[..],
+                    bmp)
+}
+
+#[inline(always)]
+pub fn gbk_other_decode(pointer: u16) -> u16 {
+    map_with_ranges(&GBK_OTHER_POINTERS[..GBK_OTHER_POINTERS.len() - 1],
+                    &GBK_OTHER_UNSORTED_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn gbk_other_encode(bmp: u16) -> Option<u16> {
+    map_with_unsorted_ranges(&GBK_OTHER_UNSORTED_OFFSETS[..],
+                             &GBK_OTHER_POINTERS[..],
+                             bmp)
+}
+
+#[inline(always)]
+pub fn gb2312_other_decode(pointer: u16) -> u16 {
+    map_with_ranges(&GB2312_OTHER_POINTERS[..GB2312_OTHER_POINTERS.len() - 1],
+                    &GB2312_OTHER_UNSORTED_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn gb2312_other_encode(bmp: u16) -> Option<u16> {
+    map_with_unsorted_ranges(&GB2312_OTHER_UNSORTED_OFFSETS[..],
+                             &GB2312_OTHER_POINTERS[..],
+                             bmp)
+}
+
+#[cfg(feature = "no-static-ideograph-encoder-tables")]
+#[inline(always)]
+pub fn gb2312_level1_hanzi_encode(bmp: u16) -> Option<(u8, u8)> {
+    position(&GB2312_HANZI[..(94 * (0xD8 - 0xB0) - 5)], bmp).map(|hanzi_pointer| {
+        let hanzi_lead = (hanzi_pointer / 94) + 0xB0;
+        let hanzi_trail = (hanzi_pointer % 94) + 0xA1;
+        (hanzi_lead as u8, hanzi_trail as u8)
+    })
+}
+
+#[cfg(not(feature = "no-static-ideograph-encoder-tables"))]
+#[inline(always)]
+pub fn gb2312_level1_hanzi_encode(bmp: u16) -> Option<(u8, u8)> {
+    match GB2312_LEVEL1_HANZI_CODE_POINTS.binary_search(&bmp) {
+        Ok(i) => {
+            let pair = &GB2312_LEVEL1_HANZI_BYTES[i];
+            Some((pair[0], pair[1]))
+        }
+        Err(_) => None,
+    }
+}
+
+#[inline(always)]
+pub fn gb2312_level2_hanzi_encode(bmp: u16) -> Option<usize> {
+    // TODO: optimize
+    position(&GB2312_HANZI[(94 * (0xD8 - 0xB0))..], bmp)
+}
+
+#[inline(always)]
+pub fn ksx1001_other_decode(pointer: u16) -> u16 {
+    map_with_ranges(&KSX1001_OTHER_POINTERS[..KSX1001_OTHER_POINTERS.len() - 1],
+                    &KSX1001_OTHER_UNSORTED_OFFSETS[..],
+                    pointer)
+}
+
+#[inline(always)]
+pub fn ksx1001_other_encode(bmp: u16) -> Option<u16> {
+    map_with_unsorted_ranges(&KSX1001_OTHER_UNSORTED_OFFSETS[..],
+                             &KSX1001_OTHER_POINTERS[..],
+                             bmp)
+}
+
+#[cfg(feature = "no-static-ideograph-encoder-tables")]
+#[inline(always)]
+pub fn jis0208_level1_kanji_shift_jis_encode(bmp: u16) -> Option<(u8, u8)> {
+    position(&JIS0208_LEVEL1_KANJI[..], bmp).map(|kanji_pointer| {
+        let pointer = 1410 + kanji_pointer;
+        let lead = pointer / 188;
+        let lead_offset = if lead < 0x1F {
+            0x81
+        } else {
+            0xC1
+        };
+        let trail = pointer % 188;
+        let trail_offset = if trail < 0x3F {
+            0x40
+        } else {
+            0x41
+        };
+        ((lead + lead_offset) as u8, (trail + trail_offset) as u8)
+    })
+}
+
+#[cfg(not(feature = "no-static-ideograph-encoder-tables"))]
+#[inline(always)]
+pub fn jis0208_level1_kanji_shift_jis_encode(bmp: u16) -> Option<(u8, u8)> {
+    match JIS0208_LEVEL1_KANJI_CODE_POINTS.binary_search(&bmp) {
+        Ok(i) => {
+            let pair = &JIS0208_LEVEL1_KANJI_SHIFT_JIS_BYTES[i];
+            Some((pair[0], pair[1]))
+        }
+        Err(_) => None,
+    }
+}
+
+#[cfg(feature = "no-static-ideograph-encoder-tables")]
+#[inline(always)]
+pub fn jis0208_level1_kanji_euc_jp_encode(bmp: u16) -> Option<(u8, u8)> {
+    position(&JIS0208_LEVEL1_KANJI[..], bmp).map(|kanji_pointer| {
+        let lead = (kanji_pointer / 94) + 0xB0;
+        let trail = (kanji_pointer % 94) + 0xA1;
+        (lead as u8, trail as u8)
+    })
+}
+
+#[cfg(not(feature = "no-static-ideograph-encoder-tables"))]
+#[inline(always)]
+pub fn jis0208_level1_kanji_euc_jp_encode(bmp: u16) -> Option<(u8, u8)> {
+    jis0208_level1_kanji_shift_jis_encode(bmp).map(|(shift_jis_lead, shift_jis_trail)| {
+        let mut lead = shift_jis_lead as usize;
+        if shift_jis_lead >= 0xA0 {
+            lead -= 0xC1 - 0x81;
+        }
+        // The next line would overflow u8. Letting it go over allows us to
+        // subtract fewer times.
+        lead <<= 1;
+        // Bring it back to u8 range
+        lead -= 0x61;
+        let trail = if shift_jis_trail >= 0x9F {
+            lead += 1;
+            shift_jis_trail + (0xA1 - 0x9F)
+        } else if shift_jis_trail < 0x7F {
+            shift_jis_trail + (0xA1 - 0x40)
+        } else {
+            shift_jis_trail + (0xA1 - 0x41)
+        };
+        (lead as u8, trail)
+    })
+}
+
+#[cfg(feature = "no-static-ideograph-encoder-tables")]
+#[inline(always)]
+pub fn jis0208_level1_kanji_iso_2022_jp_encode(bmp: u16) -> Option<(u8, u8)> {
+    position(&JIS0208_LEVEL1_KANJI[..], bmp).map(|kanji_pointer| {
+        let lead = (kanji_pointer / 94) + (0xB0 - 0x80);
+        let trail = (kanji_pointer % 94) + 0x21;
+        (lead as u8, trail as u8)
+    })
+}
+
+#[cfg(not(feature = "no-static-ideograph-encoder-tables"))]
+#[inline(always)]
+pub fn jis0208_level1_kanji_iso_2022_jp_encode(bmp: u16) -> Option<(u8, u8)> {
+    jis0208_level1_kanji_shift_jis_encode(bmp).map(|(shift_jis_lead, shift_jis_trail)| {
+        let mut lead = shift_jis_lead as usize;
+        if shift_jis_lead >= 0xA0 {
+            lead -= 0xC1 - 0x81;
+        }
+        // The next line would overflow u8. Letting it go over allows us to
+        // subtract fewer times.
+        lead <<= 1;
+        // Bring it back to u8 range
+        lead -= 0xE1;
+        let trail = if shift_jis_trail >= 0x9F {
+            lead += 1;
+            shift_jis_trail - (0x9F - 0x21)
+        } else if shift_jis_trail < 0x7F {
+            shift_jis_trail - (0x40 - 0x21)
+        } else {
+            shift_jis_trail - (0x41 - 0x21)
+        };
+        (lead as u8, trail)
+    })
+}
+
+#[inline(always)]
+pub fn jis0208_level2_and_additional_kanji_encode(bmp: u16) -> Option<usize> {
+    // TODO: optimize
+    position(&JIS0208_LEVEL2_AND_ADDITIONAL_KANJI[..], bmp)
+}
+
+pub fn jis0208_symbol_decode(pointer: usize) -> Option<u16> {
+    let mut i = 0;
+    while i < JIS0208_SYMBOL_TRIPLES.len() {
+        let start = JIS0208_SYMBOL_TRIPLES[i] as usize;
+        let length = JIS0208_SYMBOL_TRIPLES[i + 1] as usize;
+        let pointer_minus_start = pointer.wrapping_sub(start);
+        if pointer_minus_start < length {
+            let offset = JIS0208_SYMBOL_TRIPLES[i + 2] as usize;
+            return Some(JIS0208_SYMBOLS[pointer_minus_start + offset]);
+        }
+        i += 3;
+    }
+    None
+}
+
+/// Prefers Shift_JIS pointers for the three symbols that are in both ranges.
+#[inline(always)]
+pub fn jis0208_symbol_encode(bmp: u16) -> Option<usize> {
+    let mut i = 0;
+    while i < JIS0208_SYMBOL_TRIPLES.len() {
+        let pointer_start = JIS0208_SYMBOL_TRIPLES[i] as usize;
+        let length = JIS0208_SYMBOL_TRIPLES[i + 1] as usize;
+        let symbol_start = JIS0208_SYMBOL_TRIPLES[i + 2] as usize;
+        let symbol_end = symbol_start + length;
+        let mut symbol_pos = symbol_start;
+        while symbol_pos < symbol_end {
+            if JIS0208_SYMBOLS[symbol_pos] == bmp {
+                return Some(symbol_pos - symbol_start + pointer_start);
+            }
+            symbol_pos += 1;
+        }
+        i += 3;
+    }
+    None
+}
+
+#[inline(always)]
+pub fn ibm_symbol_encode(bmp: u16) -> Option<usize> {
+    position(&JIS0208_SYMBOLS[IBM_SYMBOL_START..IBM_SYMBOL_END], bmp)
+        .map(|x| x + IBM_SYMBOL_POINTER_START)
+}
+
+#[inline(always)]
+pub fn jis0208_range_decode(pointer: usize) -> Option<u16> {
+    let mut i = 0;
+    while i < JIS0208_RANGE_TRIPLES.len() {
+        let start = JIS0208_RANGE_TRIPLES[i] as usize;
+        let length = JIS0208_RANGE_TRIPLES[i + 1] as usize;
+        let pointer_minus_start = pointer.wrapping_sub(start);
+        if pointer_minus_start < length {
+            let offset = JIS0208_RANGE_TRIPLES[i + 2] as usize;
+            return Some((pointer_minus_start + offset) as u16);
+        }
+        i += 3;
+    }
+    None
+}
+
+#[inline(always)]
+pub fn jis0208_range_encode(bmp: u16) -> Option<usize> {
+    let mut i = 0;
+    while i < JIS0208_RANGE_TRIPLES.len() {
+        let start = JIS0208_RANGE_TRIPLES[i + 2] as usize;
+        let length = JIS0208_RANGE_TRIPLES[i + 1] as usize;
+        let bmp_minus_start = (bmp as usize).wrapping_sub(start);
+        if bmp_minus_start < length {
+            let offset = JIS0208_RANGE_TRIPLES[i] as usize;
+            return Some(bmp_minus_start + offset);
+        }
+        i += 3;
+    }
+    None
+}
+
+pub fn jis0212_accented_decode(pointer: usize) -> Option<u16> {
+    let mut i = 0;
+    while i < JIS0212_ACCENTED_TRIPLES.len() {
+        let start = JIS0212_ACCENTED_TRIPLES[i] as usize;
+        let length = JIS0212_ACCENTED_TRIPLES[i + 1] as usize;
+        let pointer_minus_start = pointer.wrapping_sub(start);
+        if pointer_minus_start < length {
+            let offset = JIS0212_ACCENTED_TRIPLES[i + 2] as usize;
+            let candidate = JIS0212_ACCENTED[pointer_minus_start + offset];
+            if candidate == 0 {
+                return None;
+            }
+            return Some(candidate);
+        }
+        i += 3;
+    }
+    None
+}
+
+#[inline(always)]
+pub fn big5_is_astral(rebased_pointer: usize) -> bool {
+    (BIG5_ASTRALNESS[rebased_pointer >> 5] & (1 << (rebased_pointer & 0x1F))) != 0
+}
+
+#[inline(always)]
+pub fn big5_low_bits(rebased_pointer: usize) -> u16 {
+    if rebased_pointer < BIG5_LOW_BITS.len() {
+        BIG5_LOW_BITS[rebased_pointer]
+    } else {
+        0
+    }
+}
+
+#[inline(always)]
+pub fn big5_astral_encode(low_bits: u16) -> Option<usize> {
+    match low_bits {
+        0x00CC => Some(11205 - 942),
+        0x008A => Some(11207 - 942),
+        0x7607 => Some(11213 - 942),
+        _ => {
+            let mut i = 18997 - 942;
+            while i < BIG5_LOW_BITS.len() - 1 {
+                if BIG5_LOW_BITS[i] == low_bits && big5_is_astral(i) {
+                    return Some(i);
+                }
+                i += 1;
+            }
+            None
+        }
+    }
+}
+
+#[cfg(feature = "no-static-ideograph-encoder-tables")]
+#[inline(always)]
+pub fn big5_level1_hanzi_encode(bmp: u16) -> Option<(u8, u8)> {
+    if super::in_inclusive_range16(bmp, 0x4E00, 0x9FB1) {
+        if let Some(hanzi_pointer) = position(&BIG5_LOW_BITS[(5495 - 942)..(10951 - 942)], bmp) {
+            let lead = hanzi_pointer / 157 + 0xA4;
+            let remainder = hanzi_pointer % 157;
+            let trail = if remainder < 0x3F {
+                remainder + 0x40
+            } else {
+                remainder + 0x62
+            };
+            return Some((lead as u8, trail as u8));
+        }
+        match bmp {
+            0x4E5A => {
+                return Some((0xC8, 0x7B));
+            }
+            0x5202 => {
+                return Some((0xC8, 0x7D));
+            }
+            0x9FB0 => {
+                return Some((0xC8, 0xA1));
+            }
+            0x5188 => {
+                return Some((0xC8, 0xA2));
+            }
+            0x9FB1 => {
+                return Some((0xC8, 0xA3));
+            }
+            _ => {
+                return None;
+            }
+        }
+    }
+    None
+}
+
+#[cfg(not(feature = "no-static-ideograph-encoder-tables"))]
+#[inline(always)]
+pub fn big5_level1_hanzi_encode(bmp: u16) -> Option<(u8, u8)> {
+    if super::in_inclusive_range16(bmp, 0x4E00, 0x9FB1) {
+        match BIG5_LEVEL1_HANZI_CODE_POINTS.binary_search(&bmp) {
+            Ok(i) => {
+                let pair = &BIG5_LEVEL1_HANZI_BYTES[i];
+                Some((pair[0], pair[1]))
+            }
+            Err(_) => None,
+        }
+    } else {
+        None
+    }
+}
+
+#[inline(always)]
+pub fn big5_box_encode(bmp: u16) -> Option<usize> {
+    position(&BIG5_LOW_BITS[(18963 - 942)..(18992 - 942)], bmp).map(|x| x + 18963)
+}
+
+#[inline(always)]
+pub fn big5_other_encode(bmp: u16) -> Option<usize> {
+    if 0x4491 == bmp {
+        return Some(11209);
+    }
+    if let Some(pos) = position(&BIG5_LOW_BITS[(5024 - 942)..(5466 - 942)], bmp) {
+        return Some(pos + 5024);
+    }
+    if let Some(pos) = position(&BIG5_LOW_BITS[(10896 - 942)..(11205 - 942)], bmp) {
+        return Some(pos + 10896);
+    }
+    if let Some(pos) = position(&BIG5_LOW_BITS[(11254 - 942)..(18963 - 942)], bmp) {
+        return Some(pos + 11254);
+    }
+    let mut i = 18996 - 942;
+    while i < BIG5_LOW_BITS.len() {
+        if BIG5_LOW_BITS[i] == bmp && !big5_is_astral(i) {
+            return Some(i + 942);
+        }
+        i += 1;
+    }
+    None
+}
+
+#[inline(always)]
+pub fn mul_94(lead: u8) -> usize {
+    lead as usize * 94
+}
+''')
+
+data_file.close()
+
+# Variant
+
+variant_file = open("src/variant.rs", "w")
+variant_file.write('''// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+//! This module provides enums that wrap the various decoders and encoders.
+//! The purpose is to make `Decoder` and `Encoder` `Sized` by writing the
+//! dispatch explicitly for a finite set of specialized decoders and encoders.
+//! Unfortunately, this means the compiler doesn't generate the dispatch code
+//! and it has to be written here instead.
+//!
+//! The purpose of making `Decoder` and `Encoder` `Sized` is to allow stack
+//! allocation in Rust code, including the convenience methods on `Encoding`.
+
+''')
+
+encoding_variants = [u"single-byte",]
+for encoding in multi_byte:
+  if encoding["name"] in [u"UTF-16LE", u"UTF-16BE"]:
+    continue
+  else:
+    encoding_variants.append(encoding["name"])
+encoding_variants.append(u"UTF-16")
+
+decoder_variants = []
+for variant in encoding_variants:
+  if variant == u"GBK":
+    continue
+  decoder_variants.append(variant)
+
+encoder_variants = []
+for variant in encoding_variants:
+  if variant in [u"replacement", u"GBK", u"UTF-16"]:
+    continue
+  encoder_variants.append(variant)
+
+for variant in decoder_variants:
+  variant_file.write("use %s::*;\n" % to_snake_name(variant))
+
+variant_file.write('''use super::*;
+
+pub enum VariantDecoder {
+''')
+
+for variant in decoder_variants:
+  variant_file.write("   %s(%sDecoder),\n" % (to_camel_name(variant), to_camel_name(variant)))
+
+variant_file.write('''}
+
+impl VariantDecoder {
+''')
+
+def write_variant_method(name, mut, arg_list, ret, variants, excludes, kind):
+  variant_file.write('''pub fn %s(&''' % name)
+  if mut:
+    variant_file.write('''mut ''')
+  variant_file.write('''self''')
+  for arg in arg_list:
+    variant_file.write(''', %s: %s''' % (arg[0], arg[1]))
+  variant_file.write(''')''')
+  if ret:
+    variant_file.write(''' -> %s''' % ret)
+  variant_file.write(''' {\nmatch *self {\n''')
+  for variant in variants:
+    variant_file.write('''Variant%s::%s(ref ''' % (kind, to_camel_name(variant)))
+    if mut:
+      variant_file.write('''mut ''')
+    if variant in excludes:
+      variant_file.write('''v) => (),''')
+      continue
+    variant_file.write('''v) => v.%s(''' % name)
+    first = True
+    for arg in arg_list:
+      if not first:
+        variant_file.write(''', ''')
+      first = False
+      variant_file.write(arg[0])
+    variant_file.write('''),\n''')
+  variant_file.write('''}\n}\n\n''')
+
+write_variant_method("max_utf16_buffer_length", False, [("byte_length", "usize")], "Option<usize>", decoder_variants, [], "Decoder")
+
+write_variant_method("max_utf8_buffer_length_without_replacement", False, [("byte_length", "usize")], "Option<usize>", decoder_variants, [], "Decoder")
+
+write_variant_method("max_utf8_buffer_length", False, [("byte_length", "usize")], "Option<usize>", decoder_variants, [], "Decoder")
+
+write_variant_method("decode_to_utf16_raw", True, [("src", "&[u8]"),
+                           ("dst", "&mut [u16]"),
+                           ("last", "bool")], "(DecoderResult, usize, usize)", decoder_variants, [], "Decoder")
+
+write_variant_method("decode_to_utf8_raw", True, [("src", "&[u8]"),
+                           ("dst", "&mut [u8]"),
+                           ("last", "bool")], "(DecoderResult, usize, usize)", decoder_variants, [], "Decoder")
+
+variant_file.write('''
+}
+
+pub enum VariantEncoder {
+''')
+
+for variant in encoder_variants:
+  variant_file.write("   %s(%sEncoder),\n" % (to_camel_name(variant), to_camel_name(variant)))
+
+variant_file.write('''}
+
+impl VariantEncoder {
+    pub fn has_pending_state(&self) -> bool {
+        match *self {
+            VariantEncoder::Iso2022Jp(ref v) => {
+                v.has_pending_state()
+            }
+            _ => false,
+        }
+    }
+''')
+
+write_variant_method("max_buffer_length_from_utf16_without_replacement", False, [("u16_length", "usize")], "Option<usize>", encoder_variants, [], "Encoder")
+
+write_variant_method("max_buffer_length_from_utf8_without_replacement", False, [("byte_length", "usize")], "Option<usize>", encoder_variants, [], "Encoder")
+
+write_variant_method("encode_from_utf16_raw", True, [("src", "&[u16]"),
+                           ("dst", "&mut [u8]"),
+                           ("last", "bool")], "(EncoderResult, usize, usize)", encoder_variants, [], "Encoder")
+
+write_variant_method("encode_from_utf8_raw", True, [("src", "&str"),
+                           ("dst", "&mut [u8]"),
+                           ("last", "bool")], "(EncoderResult, usize, usize)", encoder_variants, [], "Encoder")
+
+
+variant_file.write('''}
+
+pub enum VariantEncoding {
+    SingleByte(&'static [u16; 128]),''')
+
+for encoding in multi_byte:
+  variant_file.write("%s,\n" % to_camel_name(encoding["name"]))
+
+variant_file.write('''}
+
+impl VariantEncoding {
+    pub fn new_variant_decoder(&self) -> VariantDecoder {
+        match *self {
+            VariantEncoding::SingleByte(table) => SingleByteDecoder::new(table),
+            VariantEncoding::Utf8 => Utf8Decoder::new(),
+            VariantEncoding::Gbk | VariantEncoding::Gb18030 => Gb18030Decoder::new(),
+            VariantEncoding::Big5 => Big5Decoder::new(),
+            VariantEncoding::EucJp => EucJpDecoder::new(),
+            VariantEncoding::Iso2022Jp => Iso2022JpDecoder::new(),
+            VariantEncoding::ShiftJis => ShiftJisDecoder::new(),
+            VariantEncoding::EucKr => EucKrDecoder::new(),
+            VariantEncoding::Replacement => ReplacementDecoder::new(),
+            VariantEncoding::UserDefined => UserDefinedDecoder::new(),
+            VariantEncoding::Utf16Be => Utf16Decoder::new(true),
+            VariantEncoding::Utf16Le => Utf16Decoder::new(false),
+        }
+    }
+
+    pub fn new_encoder(&self, encoding: &'static Encoding) -> Encoder {
+        match *self {
+            VariantEncoding::SingleByte(table) => SingleByteEncoder::new(encoding, table),
+            VariantEncoding::Utf8 => Utf8Encoder::new(encoding),
+            VariantEncoding::Gbk => Gb18030Encoder::new(encoding, false),
+            VariantEncoding::Gb18030 => Gb18030Encoder::new(encoding, true),
+            VariantEncoding::Big5 => Big5Encoder::new(encoding),
+            VariantEncoding::EucJp => EucJpEncoder::new(encoding),
+            VariantEncoding::Iso2022Jp => Iso2022JpEncoder::new(encoding),
+            VariantEncoding::ShiftJis => ShiftJisEncoder::new(encoding),
+            VariantEncoding::EucKr => EucKrEncoder::new(encoding),
+            VariantEncoding::UserDefined => UserDefinedEncoder::new(encoding),
+            VariantEncoding::Utf16Be | VariantEncoding::Replacement |
+            VariantEncoding::Utf16Le => unreachable!(),
+        }
+    }
+}
+''')
+
+variant_file.close()
+
+(ffi_rs_begin, ffi_rs_end) = read_non_generated("../encoding_c/src/lib.rs")
+
+ffi_file = open("../encoding_c/src/lib.rs", "w")
+
+ffi_file.write(ffi_rs_begin)
+ffi_file.write("""
+// Instead, please regenerate using generate-encoding-data.py
+
+/// The minimum length of buffers that may be passed to `encoding_name()`.
+pub const ENCODING_NAME_MAX_LENGTH: usize = %d; // %s
+
+""" % (longest_name_length, longest_name))
+
+for name in preferred:
+  ffi_file.write('''/// The %s encoding.
+#[no_mangle]
+pub static %s_ENCODING: ConstEncoding = ConstEncoding(&%s_INIT);
+
+''' % (to_dom_name(name), to_constant_name(name), to_constant_name(name)))
+
+ffi_file.write(ffi_rs_end)
+ffi_file.close()
+
+(single_byte_rs_begin, single_byte_rs_end) = read_non_generated("src/single_byte.rs")
+
+single_byte_file = open("src/single_byte.rs", "w")
+
+single_byte_file.write(single_byte_rs_begin)
+single_byte_file.write("""
+// Instead, please regenerate using generate-encoding-data.py
+
+    #[test]
+    fn test_single_byte_decode() {""")
+
+for name in preferred:
+  if name == u"ISO-8859-8-I":
+    continue;
+  if is_single_byte(name):
+    single_byte_file.write("""
+        decode_single_byte(%s, %s_DATA);""" % (to_constant_name(name), to_constant_name(name)))
+
+single_byte_file.write("""
+    }
+
+    #[test]
+    fn test_single_byte_encode() {""")
+
+for name in preferred:
+  if name == u"ISO-8859-8-I":
+    continue;
+  if is_single_byte(name):
+    single_byte_file.write("""
+        encode_single_byte(%s, %s_DATA);""" % (to_constant_name(name), to_constant_name(name)))
+
+
+single_byte_file.write("""
+    }
+""")
+
+single_byte_file.write(single_byte_rs_end)
+single_byte_file.close()
+
+static_file = open("../encoding_c/include/encoding_rs_statics.h", "w")
+
+static_file.write("""// Copyright 2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+// This file is not meant to be included directly. Instead, encoding_rs.h
+// includes this file.
+
+#ifndef encoding_rs_statics_h_
+#define encoding_rs_statics_h_
+
+#ifndef ENCODING_RS_ENCODING
+#define ENCODING_RS_ENCODING Encoding
+#ifndef __cplusplus
+typedef struct Encoding_ Encoding;
+#endif
+#endif
+
+#ifndef ENCODING_RS_ENCODER
+#define ENCODING_RS_ENCODER Encoder
+#ifndef __cplusplus
+typedef struct Encoder_ Encoder;
+#endif
+#endif
+
+#ifndef ENCODING_RS_DECODER
+#define ENCODING_RS_DECODER Decoder
+#ifndef __cplusplus
+typedef struct Decoder_ Decoder;
+#endif
+#endif
+
+#define INPUT_EMPTY 0
+
+#define OUTPUT_FULL 0xFFFFFFFF
+
+// %s
+#define ENCODING_NAME_MAX_LENGTH %d
+
+""" % (longest_name, longest_name_length))
+
+for name in preferred:
+  static_file.write('''/// The %s encoding.
+extern const ENCODING_RS_ENCODING* const %s_ENCODING;
+
+''' % (to_dom_name(name), to_constant_name(name)))
+
+static_file.write("""#endif // encoding_rs_statics_h_
+""")
+static_file.close()
+
+(utf_8_rs_begin, utf_8_rs_end) = read_non_generated("src/utf_8.rs")
+
+utf_8_file = open("src/utf_8.rs", "w")
+
+utf_8_file.write(utf_8_rs_begin)
+utf_8_file.write("""
+// Instead, please regenerate using generate-encoding-data.py
+
+/// Bit is 1 if the trail is invalid.
+static UTF8_TRAIL_INVALID: [u8; 256] = [""")
+
+for i in range(256):
+  combined = 0
+  if i < 0x80 or i > 0xBF:
+    combined |= (1 << 3)
+  if i < 0xA0 or i > 0xBF:
+    combined |= (1 << 4)
+  if i < 0x80 or i > 0x9F:
+    combined |= (1 << 5)
+  if i < 0x90 or i > 0xBF:
+    combined |= (1 << 6)
+  if i < 0x80 or i > 0x8F:
+    combined |= (1 << 7)
+  utf_8_file.write("%d," % combined)
+
+utf_8_file.write("""
+];
+""")
+
+utf_8_file.write(utf_8_rs_end)
+utf_8_file.close()
+
+# Unit tests
+
+TEST_HEADER = '''Any copyright to the test code below this comment is dedicated to the
+Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
+
+This is a generated file. Please do not edit.
+Instead, please regenerate using generate-encoding-data.py
+'''
+
+index = indexes["jis0208"]
+
+jis0208_in_file = open("src/test_data/jis0208_in.txt", "w")
+jis0208_in_file.write(TEST_HEADER)
+for pointer in range(0, 94 * 94):
+  (lead, trail) = divmod(pointer, 94)
+  lead += 0xA1
+  trail += 0xA1
+  jis0208_in_file.write("%s%s\n" % (chr(lead), chr(trail)))
+jis0208_in_file.close()
+
+jis0208_in_ref_file = open("src/test_data/jis0208_in_ref.txt", "w")
+jis0208_in_ref_file.write(TEST_HEADER)
+for pointer in range(0, 94 * 94):
+  code_point = index[pointer]
+  if code_point:
+    jis0208_in_ref_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+  else:
+    jis0208_in_ref_file.write(u"\uFFFD\n".encode("utf-8"))
+jis0208_in_ref_file.close()
+
+jis0208_out_file = open("src/test_data/jis0208_out.txt", "w")
+jis0208_out_ref_file = open("src/test_data/jis0208_out_ref.txt", "w")
+jis0208_out_file.write(TEST_HEADER)
+jis0208_out_ref_file.write(TEST_HEADER)
+for pointer in range(0, 94 * 94):
+  code_point = index[pointer]
+  if code_point:
+    revised_pointer = pointer
+    if revised_pointer == 8644 or (revised_pointer >= 1207 and revised_pointer < 1220):
+      revised_pointer = index.index(code_point)
+    (lead, trail) = divmod(revised_pointer, 94)
+    lead += 0xA1
+    trail += 0xA1
+    jis0208_out_ref_file.write("%s%s\n" % (chr(lead), chr(trail)))
+    jis0208_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+jis0208_out_file.close()
+jis0208_out_ref_file.close()
+
+shift_jis_in_file = open("src/test_data/shift_jis_in.txt", "w")
+shift_jis_in_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  (lead, trail) = divmod(pointer, 188)
+  lead += 0x81 if lead < 0x1F else 0xC1
+  trail += 0x40 if trail < 0x3F else 0x41
+  shift_jis_in_file.write("%s%s\n" % (chr(lead), chr(trail)))
+shift_jis_in_file.close()
+
+shift_jis_in_ref_file = open("src/test_data/shift_jis_in_ref.txt", "w")
+shift_jis_in_ref_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  code_point = 0xE000 - 8836 + pointer if pointer >= 8836 and pointer <= 10715 else index[pointer]
+  if code_point:
+    shift_jis_in_ref_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+  else:
+    trail = pointer % 188
+    trail += 0x40 if trail < 0x3F else 0x41
+    if trail < 0x80:
+      shift_jis_in_ref_file.write((u"\uFFFD%s\n" % unichr(trail)).encode("utf-8"))
+    else:
+      shift_jis_in_ref_file.write(u"\uFFFD\n".encode("utf-8"))
+shift_jis_in_ref_file.close()
+
+shift_jis_out_file = open("src/test_data/shift_jis_out.txt", "w")
+shift_jis_out_ref_file = open("src/test_data/shift_jis_out_ref.txt", "w")
+shift_jis_out_file.write(TEST_HEADER)
+shift_jis_out_ref_file.write(TEST_HEADER)
+for pointer in range(0, 8272):
+  code_point = index[pointer]
+  if code_point:
+    revised_pointer = pointer
+    if revised_pointer >= 1207 and revised_pointer < 1220:
+      revised_pointer = index.index(code_point)
+    (lead, trail) = divmod(revised_pointer, 188)
+    lead += 0x81 if lead < 0x1F else 0xC1
+    trail += 0x40 if trail < 0x3F else 0x41
+    shift_jis_out_ref_file.write("%s%s\n" % (chr(lead), chr(trail)))
+    shift_jis_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+for pointer in range(8836, len(index)):
+  code_point = index[pointer]
+  if code_point:
+    revised_pointer = index.index(code_point)
+    if revised_pointer >= 8272 and revised_pointer < 8836:
+      revised_pointer = pointer
+    (lead, trail) = divmod(revised_pointer, 188)
+    lead += 0x81 if lead < 0x1F else 0xC1
+    trail += 0x40 if trail < 0x3F else 0x41
+    shift_jis_out_ref_file.write("%s%s\n" % (chr(lead), chr(trail)))
+    shift_jis_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+shift_jis_out_file.close()
+shift_jis_out_ref_file.close()
+
+iso_2022_jp_in_file = open("src/test_data/iso_2022_jp_in.txt", "w")
+iso_2022_jp_in_file.write(TEST_HEADER)
+for pointer in range(0, 94 * 94):
+  (lead, trail) = divmod(pointer, 94)
+  lead += 0x21
+  trail += 0x21
+  iso_2022_jp_in_file.write("\x1B$B%s%s\x1B(B\n" % (chr(lead), chr(trail)))
+iso_2022_jp_in_file.close()
+
+iso_2022_jp_in_ref_file = open("src/test_data/iso_2022_jp_in_ref.txt", "w")
+iso_2022_jp_in_ref_file.write(TEST_HEADER)
+for pointer in range(0, 94 * 94):
+  code_point = index[pointer]
+  if code_point:
+    iso_2022_jp_in_ref_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+  else:
+    iso_2022_jp_in_ref_file.write(u"\uFFFD\n".encode("utf-8"))
+iso_2022_jp_in_ref_file.close()
+
+iso_2022_jp_out_file = open("src/test_data/iso_2022_jp_out.txt", "w")
+iso_2022_jp_out_ref_file = open("src/test_data/iso_2022_jp_out_ref.txt", "w")
+iso_2022_jp_out_file.write(TEST_HEADER)
+iso_2022_jp_out_ref_file.write(TEST_HEADER)
+for pointer in range(0, 94 * 94):
+  code_point = index[pointer]
+  if code_point:
+    revised_pointer = pointer
+    if revised_pointer == 8644 or (revised_pointer >= 1207 and revised_pointer < 1220):
+      revised_pointer = index.index(code_point)
+    (lead, trail) = divmod(revised_pointer, 94)
+    lead += 0x21
+    trail += 0x21
+    iso_2022_jp_out_ref_file.write("\x1B$B%s%s\x1B(B\n" % (chr(lead), chr(trail)))
+    iso_2022_jp_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+for i in xrange(len(half_width_index)):
+  code_point = i + 0xFF61
+  normalized_code_point = half_width_index[i]
+  pointer = index.index(normalized_code_point)
+  (lead, trail) = divmod(pointer, 94)
+  lead += 0x21
+  trail += 0x21
+  iso_2022_jp_out_ref_file.write("\x1B$B%s%s\x1B(B\n" % (chr(lead), chr(trail)))
+  iso_2022_jp_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+iso_2022_jp_out_file.close()
+iso_2022_jp_out_ref_file.close()
+
+index = indexes["euc-kr"]
+
+euc_kr_in_file = open("src/test_data/euc_kr_in.txt", "w")
+euc_kr_in_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  (lead, trail) = divmod(pointer, 190)
+  lead += 0x81
+  trail += 0x41
+  euc_kr_in_file.write("%s%s\n" % (chr(lead), chr(trail)))
+euc_kr_in_file.close()
+
+euc_kr_in_ref_file = open("src/test_data/euc_kr_in_ref.txt", "w")
+euc_kr_in_ref_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  code_point = index[pointer]
+  if code_point:
+    euc_kr_in_ref_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+  else:
+    trail = pointer % 190
+    trail += 0x41
+    if trail < 0x80:
+      euc_kr_in_ref_file.write((u"\uFFFD%s\n" % unichr(trail)).encode("utf-8"))
+    else:
+      euc_kr_in_ref_file.write(u"\uFFFD\n".encode("utf-8"))
+euc_kr_in_ref_file.close()
+
+euc_kr_out_file = open("src/test_data/euc_kr_out.txt", "w")
+euc_kr_out_ref_file = open("src/test_data/euc_kr_out_ref.txt", "w")
+euc_kr_out_file.write(TEST_HEADER)
+euc_kr_out_ref_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  code_point = index[pointer]
+  if code_point:
+    (lead, trail) = divmod(pointer, 190)
+    lead += 0x81
+    trail += 0x41
+    euc_kr_out_ref_file.write("%s%s\n" % (chr(lead), chr(trail)))
+    euc_kr_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+euc_kr_out_file.close()
+euc_kr_out_ref_file.close()
+
+index = indexes["gb18030"]
+
+gb18030_in_file = open("src/test_data/gb18030_in.txt", "w")
+gb18030_in_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  (lead, trail) = divmod(pointer, 190)
+  lead += 0x81
+  trail += 0x40 if trail < 0x3F else 0x41
+  gb18030_in_file.write("%s%s\n" % (chr(lead), chr(trail)))
+gb18030_in_file.close()
+
+gb18030_in_ref_file = open("src/test_data/gb18030_in_ref.txt", "w")
+gb18030_in_ref_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  code_point = index[pointer]
+  if code_point:
+    gb18030_in_ref_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+  else:
+    trail = pointer % 190
+    trail += 0x40 if trail < 0x3F else 0x41
+    if trail < 0x80:
+      gb18030_in_ref_file.write((u"\uFFFD%s\n" % unichr(trail)).encode("utf-8"))
+    else:
+      gb18030_in_ref_file.write(u"\uFFFD\n".encode("utf-8"))
+gb18030_in_ref_file.close()
+
+gb18030_out_file = open("src/test_data/gb18030_out.txt", "w")
+gb18030_out_ref_file = open("src/test_data/gb18030_out_ref.txt", "w")
+gb18030_out_file.write(TEST_HEADER)
+gb18030_out_ref_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  if pointer == 6555:
+    continue
+  code_point = index[pointer]
+  if code_point:
+    (lead, trail) = divmod(pointer, 190)
+    lead += 0x81
+    trail += 0x40 if trail < 0x3F else 0x41
+    gb18030_out_ref_file.write("%s%s\n" % (chr(lead), chr(trail)))
+    gb18030_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+gb18030_out_file.close()
+gb18030_out_ref_file.close()
+
+index = indexes["big5"]
+
+big5_in_file = open("src/test_data/big5_in.txt", "w")
+big5_in_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  (lead, trail) = divmod(pointer, 157)
+  lead += 0x81
+  trail += 0x40 if trail < 0x3F else 0x62
+  big5_in_file.write("%s%s\n" % (chr(lead), chr(trail)))
+big5_in_file.close()
+
+big5_two_characters = {
+  1133: u"\u00CA\u0304",
+  1135: u"\u00CA\u030C",
+  1164: u"\u00EA\u0304",
+  1166: u"\u00EA\u030C",
+}
+
+big5_in_ref_file = open("src/test_data/big5_in_ref.txt", "w")
+big5_in_ref_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  if pointer in big5_two_characters.keys():
+    big5_in_ref_file.write((u"%s\n" % big5_two_characters[pointer]).encode("utf-8"))
+    continue
+  code_point = index[pointer]
+  if code_point:
+    big5_in_ref_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+  else:
+    trail = pointer % 157
+    trail += 0x40 if trail < 0x3F else 0x62
+    if trail < 0x80:
+      big5_in_ref_file.write((u"\uFFFD%s\n" % unichr(trail)).encode("utf-8"))
+    else:
+      big5_in_ref_file.write(u"\uFFFD\n".encode("utf-8"))
+big5_in_ref_file.close()
+
+prefer_last = [
+  0x2550,
+  0x255E,
+  0x2561,
+  0x256A,
+  0x5341,
+  0x5345,
+]
+
+pointer_for_prefer_last = []
+
+for code_point in prefer_last:
+  # Python lists don't have .rindex() :-(
+  for i in xrange(len(index) - 1, -1, -1):
+    candidate = index[i]
+    if candidate == code_point:
+       pointer_for_prefer_last.append(i)
+       break
+
+big5_out_file = open("src/test_data/big5_out.txt", "w")
+big5_out_ref_file = open("src/test_data/big5_out_ref.txt", "w")
+big5_out_file.write(TEST_HEADER)
+big5_out_ref_file.write(TEST_HEADER)
+for pointer in range(((0xA1 - 0x81) * 157), len(index)):
+  code_point = index[pointer]
+  if code_point:
+    if code_point in prefer_last:
+      if pointer != pointer_for_prefer_last[prefer_last.index(code_point)]:
+        continue
+    else:
+      if pointer != index.index(code_point):
+        continue
+    (lead, trail) = divmod(pointer, 157)
+    lead += 0x81
+    trail += 0x40 if trail < 0x3F else 0x62
+    big5_out_ref_file.write("%s%s\n" % (chr(lead), chr(trail)))
+    big5_out_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+big5_out_file.close()
+big5_out_ref_file.close()
+
+index = indexes["jis0212"]
+
+jis0212_in_file = open("src/test_data/jis0212_in.txt", "w")
+jis0212_in_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  (lead, trail) = divmod(pointer, 94)
+  lead += 0xA1
+  trail += 0xA1
+  jis0212_in_file.write("\x8F%s%s\n" % (chr(lead), chr(trail)))
+jis0212_in_file.close()
+
+jis0212_in_ref_file = open("src/test_data/jis0212_in_ref.txt", "w")
+jis0212_in_ref_file.write(TEST_HEADER)
+for pointer in range(0, len(index)):
+  code_point = index[pointer]
+  if code_point:
+    jis0212_in_ref_file.write((u"%s\n" % unichr(code_point)).encode("utf-8"))
+  else:
+    jis0212_in_ref_file.write(u"\uFFFD\n".encode("utf-8"))
+jis0212_in_ref_file.close()
+
+subprocess.call(["cargo", "fmt"])
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/rustfmt.toml
@@ -0,0 +1,2 @@
+fn_call_style = "Block"
+error_on_line_overflow = false
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/src/ascii.rs
@@ -0,0 +1,847 @@
+// Copyright 2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// It's assumed that in due course Rust will have explicit SIMD but will not
+// be good at run-time selection of SIMD vs. no-SIMD. In such a future,
+// x86_64 will always use SSE2 and 32-bit x86 will use SSE2 when compiled with
+// a Mozilla-shipped rustc. SIMD support and especially detection on ARM is a
+// mess. Under the circumstances, it seems to make sense to optimize the ALU
+// case for ARMv7 rather than x86. Annoyingly, I was unable to get useful
+// numbers of the actual ARMv7 CPU I have access to, because (thermal?)
+// throttling kept interfering. Since Raspberry Pi 3 (ARMv8 core but running
+// ARMv7 code) produced reproducible performance numbers, that's the ARM
+// computer that this code ended up being optimized for in the ALU case.
+// Less popular CPU architectures simply get the approach that was chosen based
+// on Raspberry Pi 3 measurements. The UTF-16 and UTF-8 ALU cases take
+// different approaches based on benchmarking on Raspberry Pi 3.
+
+#[cfg(feature = "simd-accel")]
+use simd_funcs::*;
+
+macro_rules! ascii_naive {
+    ($name:ident,
+     $src_unit:ty,
+     $dst_unit:ty) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) -> Option<($src_unit, usize)> {
+        // Yes, manually omitting the bound check here matters
+        // a lot for perf.
+        for i in 0..len {
+            let code_unit = *(src.offset(i as isize));
+            if code_unit > 127 {
+                return Some((code_unit, i));
+            }
+            *(dst.offset(i as isize)) = code_unit as $dst_unit;
+        }
+        return None;
+    });
+}
+
+macro_rules! ascii_alu {
+    ($name:ident,
+     $src_unit:ty,
+     $dst_unit:ty,
+     $stride_fn:ident) => (
+    #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]
+    #[inline(always)]
+    pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) -> Option<($src_unit, usize)> {
+        let mut offset = 0usize;
+        // This loop is only broken out of as a `goto` forward
+        loop {
+            let mut until_alignment = {
+                // Check if the other unit aligns if we move the narrower unit
+                // to alignment.
+                //               if ::std::mem::size_of::<$src_unit>() == ::std::mem::size_of::<$dst_unit>() {
+                // ascii_to_ascii
+                let src_alignment = (src as usize) & ALIGNMENT_MASK;
+                let dst_alignment = (dst as usize) & ALIGNMENT_MASK;
+                if src_alignment != dst_alignment {
+                    break;
+                }
+                (ALIGNMENT - src_alignment) & ALIGNMENT_MASK
+                //               } else if ::std::mem::size_of::<$src_unit>() < ::std::mem::size_of::<$dst_unit>() {
+                // ascii_to_basic_latin
+                //                   let src_until_alignment = (ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
+                //                   if (dst.offset(src_until_alignment as isize) as usize) & ALIGNMENT_MASK != 0 {
+                //                       break;
+                //                   }
+                //                   src_until_alignment
+                //               } else {
+                // basic_latin_to_ascii
+                //                   let dst_until_alignment = (ALIGNMENT - ((dst as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
+                //                   if (src.offset(dst_until_alignment as isize) as usize) & ALIGNMENT_MASK != 0 {
+                //                       break;
+                //                   }
+                //                   dst_until_alignment
+                //               }
+                };
+                if until_alignment + STRIDE_SIZE <= len {
+                // Moving pointers to alignment seems to be a pessimization on
+                // x86_64 for operations that have UTF-16 as the internal
+                // Unicode representation. However, since it seems to be a win
+                // on ARM (tested ARMv7 code running on ARMv8 [rpi3]), except
+                // mixed results when encoding from UTF-16 and since x86 and
+                // x86_64 should be using SSE2 in due course, keeping the move
+                // to alignment here. It would be good to test on more ARM CPUs
+                // and on real MIPS and POWER hardware.
+                while until_alignment != 0 {
+                    let code_unit = *(src.offset(offset as isize));
+                    if code_unit > 127 {
+                        return Some((code_unit, offset));
+                    }
+                    *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+                    offset += 1;
+                    until_alignment -= 1;
+                }
+                loop {
+                    if let Some(num_ascii) = $stride_fn(src.offset(offset as isize) as *const usize,
+                                   dst.offset(offset as isize) as *mut usize) {
+                        offset += num_ascii;
+                        return Some((*(src.offset(offset as isize)), offset));
+                    }
+                    offset += STRIDE_SIZE;
+                    if offset + STRIDE_SIZE > len {
+                        break;
+                    }
+                }
+            }
+            break;
+        }
+        while offset < len {
+            let code_unit = *(src.offset(offset as isize));
+            if code_unit > 127 {
+                return Some((code_unit, offset));
+            }
+            *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+            offset += 1;
+        }
+        None
+    });
+}
+
+macro_rules! basic_latin_alu {
+    ($name:ident,
+     $src_unit:ty,
+     $dst_unit:ty,
+     $stride_fn:ident) => (
+    #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]
+    #[inline(always)]
+    pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) -> Option<($src_unit, usize)> {
+        let mut offset = 0usize;
+        // This loop is only broken out of as a `goto` forward
+        loop {
+            let mut until_alignment = {
+                // Check if the other unit aligns if we move the narrower unit
+                // to alignment.
+                //               if ::std::mem::size_of::<$src_unit>() == ::std::mem::size_of::<$dst_unit>() {
+                // ascii_to_ascii
+                //                   let src_alignment = (src as usize) & ALIGNMENT_MASK;
+                //                   let dst_alignment = (dst as usize) & ALIGNMENT_MASK;
+                //                   if src_alignment != dst_alignment {
+                //                       break;
+                //                   }
+                //                   (ALIGNMENT - src_alignment) & ALIGNMENT_MASK
+                //               } else
+                if ::std::mem::size_of::<$src_unit>() < ::std::mem::size_of::<$dst_unit>() {
+                    // ascii_to_basic_latin
+                    let src_until_alignment = (ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
+                    if (dst.offset(src_until_alignment as isize) as usize) & ALIGNMENT_MASK != 0 {
+                        break;
+                    }
+                    src_until_alignment
+                } else {
+                    // basic_latin_to_ascii
+                    let dst_until_alignment = (ALIGNMENT - ((dst as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
+                    if (src.offset(dst_until_alignment as isize) as usize) & ALIGNMENT_MASK != 0 {
+                        break;
+                    }
+                    dst_until_alignment
+                }
+            };
+            if until_alignment + STRIDE_SIZE <= len {
+                // Moving pointers to alignment seems to be a pessimization on
+                // x86_64 for operations that have UTF-16 as the internal
+                // Unicode representation. However, since it seems to be a win
+                // on ARM (tested ARMv7 code running on ARMv8 [rpi3]), except
+                // mixed results when encoding from UTF-16 and since x86 and
+                // x86_64 should be using SSE2 in due course, keeping the move
+                // to alignment here. It would be good to test on more ARM CPUs
+                // and on real MIPS and POWER hardware.
+                while until_alignment != 0 {
+                    let code_unit = *(src.offset(offset as isize));
+                    if code_unit > 127 {
+                        return Some((code_unit, offset));
+                    }
+                    *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+                    offset += 1;
+                    until_alignment -= 1;
+                }
+                loop {
+                    if !$stride_fn(src.offset(offset as isize) as *const usize,
+                                   dst.offset(offset as isize) as *mut usize) {
+                        break;
+                    }
+                    offset += STRIDE_SIZE;
+                    if offset + STRIDE_SIZE > len {
+                        break;
+                    }
+                }
+            }
+            break;
+        }
+        while offset < len {
+            let code_unit = *(src.offset(offset as isize));
+            if code_unit > 127 {
+                return Some((code_unit, offset));
+            }
+            *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+            offset += 1;
+        }
+        None
+    });
+}
+
+macro_rules! ascii_simd {
+    ($name:ident,
+     $src_unit:ty,
+     $dst_unit:ty,
+     $stride_both_aligned:ident,
+     $stride_src_aligned:ident,
+     $stride_dst_aligned:ident,
+     $stride_neither_aligned:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) -> Option<($src_unit, usize)> {
+        let mut offset = 0usize;
+        // XXX should we have more branchy code to move the pointers to
+        // alignment if they aren't aligned but could align after
+        // processing a few code units?
+        if STRIDE_SIZE <= len {
+            // XXX Should we first process one stride unconditinoally as unaligned to
+            // avoid the cost of the branchiness below if the first stride fails anyway?
+            // XXX Should we just use unaligned SSE2 access unconditionally? It seems that
+            // on Haswell, it would make sense to just use unaligned and not bother
+            // checking. Need to benchmark older architectures before deciding.
+            let dst_masked = (dst as usize) & ALIGNMENT_MASK;
+            if ((src as usize) & ALIGNMENT_MASK) == 0 {
+                if dst_masked == 0 {
+                    loop {
+                        if !$stride_both_aligned(src.offset(offset as isize),
+                                                 dst.offset(offset as isize)) {
+                            break;
+                        }
+                        offset += STRIDE_SIZE;
+                        if offset + STRIDE_SIZE > len {
+                            break;
+                        }
+                    }
+                } else {
+                    loop {
+                        if !$stride_src_aligned(src.offset(offset as isize),
+                                                dst.offset(offset as isize)) {
+                            break;
+                        }
+                        offset += STRIDE_SIZE;
+                        if offset + STRIDE_SIZE > len {
+                            break;
+                        }
+                    }
+                }
+            } else {
+                if dst_masked == 0 {
+                    loop {
+                        if !$stride_dst_aligned(src.offset(offset as isize),
+                                                dst.offset(offset as isize)) {
+                            break;
+                        }
+                        offset += STRIDE_SIZE;
+                        if offset + STRIDE_SIZE > len {
+                            break;
+                        }
+                    }
+                } else {
+                    loop {
+                        if !$stride_neither_aligned(src.offset(offset as isize),
+                                                    dst.offset(offset as isize)) {
+                            break;
+                        }
+                        offset += STRIDE_SIZE;
+                        if offset + STRIDE_SIZE > len {
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        while offset < len {
+            let code_unit = *(src.offset(offset as isize));
+            if code_unit > 127 {
+                return Some((code_unit, offset));
+            }
+            *(dst.offset(offset as isize)) = code_unit as $dst_unit;
+            offset += 1;
+        }
+        None
+    });
+}
+
+macro_rules! ascii_to_ascii_simd_stride {
+    ($name:ident,
+     $load:ident,
+     $store:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const u8, dst: *mut u8) -> bool {
+        let simd = $load(src);
+        if !is_ascii(simd) {
+            return false;
+        }
+        $store(dst, simd);
+        true
+    });
+}
+
+macro_rules! ascii_to_basic_latin_simd_stride {
+    ($name:ident,
+     $load:ident,
+     $store:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const u8, dst: *mut u16) -> bool {
+        let simd = $load(src);
+        if !is_ascii(simd) {
+            return false;
+        }
+        let (first, second) = unpack(simd);
+        $store(dst, first);
+        $store(dst.offset(8), second);
+        true
+    });
+}
+
+macro_rules! basic_latin_to_ascii_simd_stride {
+    ($name:ident,
+     $load:ident,
+     $store:ident) => (
+    #[inline(always)]
+    pub unsafe fn $name(src: *const u16, dst: *mut u8) -> bool {
+        let first = $load(src);
+        let second = $load(src.offset(8));
+        match pack_basic_latin(first, second) {
+            Some(packed) => {
+                $store(dst, packed);
+                true
+            },
+            None => false,
+        }
+    });
+}
+
+cfg_if! {
+    if #[cfg(all(feature = "simd-accel", target_feature = "sse2"))] {
+        // SIMD
+
+        pub const STRIDE_SIZE: usize = 16;
+
+        const ALIGNMENT_MASK: usize = 15;
+
+        ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_both_aligned, load16_aligned, store16_aligned);
+        ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_src_aligned, load16_aligned, store16_unaligned);
+        ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_dst_aligned, load16_unaligned, store16_aligned);
+        ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_neither_aligned, load16_unaligned, store16_unaligned);
+
+        ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_both_aligned, load16_aligned, store8_aligned);
+        ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_src_aligned, load16_aligned, store8_unaligned);
+        ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_dst_aligned, load16_unaligned, store8_aligned);
+        ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_neither_aligned, load16_unaligned, store8_unaligned);
+
+        basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_both_aligned, load8_aligned, store16_aligned);
+        basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_src_aligned, load8_aligned, store16_unaligned);
+        basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_dst_aligned, load8_unaligned, store16_aligned);
+        basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_neither_aligned, load8_unaligned, store16_unaligned);
+
+        ascii_simd!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride_both_aligned, ascii_to_ascii_stride_src_aligned, ascii_to_ascii_stride_dst_aligned, ascii_to_ascii_stride_neither_aligned);
+        ascii_simd!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_both_aligned, ascii_to_basic_latin_stride_src_aligned, ascii_to_basic_latin_stride_dst_aligned, ascii_to_basic_latin_stride_neither_aligned);
+        ascii_simd!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_both_aligned, basic_latin_to_ascii_stride_src_aligned, basic_latin_to_ascii_stride_dst_aligned, basic_latin_to_ascii_stride_neither_aligned);
+    } else if #[cfg(all(target_endian = "little", target_pointer_width = "64"))] {
+        // Aligned ALU word, little-endian, 64-bit
+
+        pub const STRIDE_SIZE: usize = 16;
+
+        const ALIGNMENT: usize = 8;
+
+        const ALIGNMENT_MASK: usize = 7;
+
+        #[inline(always)]
+        unsafe fn ascii_to_basic_latin_stride_little_64(src: *const usize, dst: *mut usize) -> bool {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            // Check if the words contains non-ASCII
+            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
+                return false;
+            }
+            let first = ((0x00000000_FF000000usize & word) << 24) |
+                        ((0x00000000_00FF0000usize & word) << 16) |
+                        ((0x00000000_0000FF00usize & word) << 8) |
+                        (0x00000000_000000FFusize & word);
+            let second = ((0xFF000000_00000000usize & word) >> 8) |
+                         ((0x00FF0000_00000000usize & word) >> 16) |
+                         ((0x0000FF00_00000000usize & word) >> 24) |
+                         ((0x000000FF_00000000usize & word) >> 32);
+            let third = ((0x00000000_FF000000usize & second_word) << 24) |
+                        ((0x00000000_00FF0000usize & second_word) << 16) |
+                        ((0x00000000_0000FF00usize & second_word) << 8) |
+                        (0x00000000_000000FFusize & second_word);
+            let fourth = ((0xFF000000_00000000usize & second_word) >> 8) |
+                         ((0x00FF0000_00000000usize & second_word) >> 16) |
+                         ((0x0000FF00_00000000usize & second_word) >> 24) |
+                         ((0x000000FF_00000000usize & second_word) >> 32);
+            *dst = first;
+            *(dst.offset(1)) = second;
+            *(dst.offset(2)) = third;
+            *(dst.offset(3)) = fourth;
+            true
+        }
+
+        #[inline(always)]
+        unsafe fn basic_latin_to_ascii_stride_little_64(src: *const usize, dst: *mut usize) -> bool {
+            let first = *src;
+            let second = *(src.offset(1));
+            let third = *(src.offset(2));
+            let fourth = *(src.offset(3));
+            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
+                return false;
+            }
+            let word = ((0x00FF0000_00000000usize & second) << 8) |
+                       ((0x000000FF_00000000usize & second) << 16) |
+                       ((0x00000000_00FF0000usize & second) << 24) |
+                       ((0x00000000_000000FFusize & second) << 32) |
+                       ((0x00FF0000_00000000usize & first) >> 24) |
+                       ((0x000000FF_00000000usize & first) >> 16) |
+                       ((0x00000000_00FF0000usize & first) >> 8) |
+                       (0x00000000_000000FFusize & first);
+            let second_word = ((0x00FF0000_00000000usize & fourth) << 8) |
+                              ((0x000000FF_00000000usize & fourth) << 16) |
+                              ((0x00000000_00FF0000usize & fourth) << 24) |
+                              ((0x00000000_000000FFusize & fourth) << 32) |
+                              ((0x00FF0000_00000000usize & third) >> 24) |
+                              ((0x000000FF_00000000usize & third) >> 16) |
+                              ((0x00000000_00FF0000usize & third) >> 8) |
+                              (0x00000000_000000FFusize & third);
+            *dst = word;
+            *(dst.offset(1)) = second_word;
+            true
+        }
+
+        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_little_64);
+        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_little_64);
+    } else if #[cfg(all(target_endian = "little", target_pointer_width = "32"))] {
+        // Aligned ALU word, little-endian, 32-bit
+
+        pub const STRIDE_SIZE: usize = 8;
+
+        const ALIGNMENT: usize = 4;
+
+        const ALIGNMENT_MASK: usize = 3;
+
+        #[inline(always)]
+        unsafe fn ascii_to_basic_latin_stride_little_32(src: *const usize, dst: *mut usize) -> bool {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            // Check if the words contains non-ASCII
+            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
+                return false;
+            }
+            let first = ((0x0000FF00usize & word) << 8) |
+                        (0x000000FFusize & word);
+            let second = ((0xFF000000usize & word) >> 8) |
+                         ((0x00FF0000usize & word) >> 16);
+            let third = ((0x0000FF00usize & second_word) << 8) |
+                        (0x000000FFusize & second_word);
+            let fourth = ((0xFF000000usize & second_word) >> 8) |
+                         ((0x00FF0000usize & second_word) >> 16);
+            *dst = first;
+            *(dst.offset(1)) = second;
+            *(dst.offset(2)) = third;
+            *(dst.offset(3)) = fourth;
+            return true;
+        }
+
+        #[inline(always)]
+        unsafe fn basic_latin_to_ascii_stride_little_32(src: *const usize, dst: *mut usize) -> bool {
+            let first = *src;
+            let second = *(src.offset(1));
+            let third = *(src.offset(2));
+            let fourth = *(src.offset(3));
+            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
+                return false;
+            }
+            let word = ((0x00FF0000usize & second) << 8) |
+                       ((0x000000FFusize & second) << 16) |
+                       ((0x00FF0000usize & first) >> 8) |
+                       (0x000000FFusize & first);
+            let second_word = ((0x00FF0000usize & fourth) << 8) |
+                              ((0x000000FFusize & fourth) << 16) |
+                              ((0x00FF0000usize & third) >> 8) |
+                              (0x000000FFusize & third);
+            *dst = word;
+            *(dst.offset(1)) = second_word;
+            return true;
+        }
+
+        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_little_32);
+        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_little_32);
+    } else if #[cfg(all(target_endian = "big", target_pointer_width = "64"))] {
+        // Aligned ALU word, big-endian, 64-bit
+
+        pub const STRIDE_SIZE: usize = 16;
+
+        const ALIGNMENT: usize = 8;
+
+        const ALIGNMENT_MASK: usize = 7;
+
+        #[inline(always)]
+        unsafe fn ascii_to_basic_latin_stride_big_64(src: *const usize, dst: *mut usize) -> bool {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            // Check if the words contains non-ASCII
+            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
+                return false;
+            }
+            let first = ((0xFF000000_00000000usize & word) >> 8) |
+                         ((0x00FF0000_00000000usize & word) >> 16) |
+                         ((0x0000FF00_00000000usize & word) >> 24) |
+                         ((0x000000FF_00000000usize & word) >> 32);
+            let second = ((0x00000000_FF000000usize & word) << 24) |
+                        ((0x00000000_00FF0000usize & word) << 16) |
+                        ((0x00000000_0000FF00usize & word) << 8) |
+                        (0x00000000_000000FFusize & word);
+            let third = ((0xFF000000_00000000usize & second_word) >> 8) |
+                         ((0x00FF0000_00000000usize & second_word) >> 16) |
+                         ((0x0000FF00_00000000usize & second_word) >> 24) |
+                         ((0x000000FF_00000000usize & second_word) >> 32);
+            let fourth = ((0x00000000_FF000000usize & second_word) << 24) |
+                        ((0x00000000_00FF0000usize & second_word) << 16) |
+                        ((0x00000000_0000FF00usize & second_word) << 8) |
+                        (0x00000000_000000FFusize & second_word);
+            *dst = first;
+            *(dst.offset(1)) = second;
+            *(dst.offset(2)) = third;
+            *(dst.offset(3)) = fourth;
+            return true;
+        }
+
+        #[inline(always)]
+        unsafe fn basic_latin_to_ascii_stride_big_64(src: *const usize, dst: *mut usize) -> bool {
+            let first = *src;
+            let second = *(src.offset(1));
+            let third = *(src.offset(2));
+            let fourth = *(src.offset(3));
+            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
+                return false;
+            }
+            let word = ((0x00FF0000_00000000usize & first) << 8) |
+                       ((0x000000FF_00000000usize & first) << 16) |
+                       ((0x00000000_00FF0000usize & first) << 24) |
+                       ((0x00000000_000000FFusize & first) << 32) |
+                       ((0x00FF0000_00000000usize & second) >> 24) |
+                       ((0x000000FF_00000000usize & second) >> 16) |
+                       ((0x00000000_00FF0000usize & second) >> 8) |
+                       (0x00000000_000000FFusize & second);
+            let second_word = ((0x00FF0000_00000000usize & third) << 8) |
+                              ((0x000000FF_00000000usize & third) << 16) |
+                              ((0x00000000_00FF0000usize & third) << 24) |
+                              ((0x00000000_000000FFusize & third) << 32) |
+                              ((0x00FF0000_00000000usize & fourth) >> 24) |
+                              ((0x000000FF_00000000usize & fourth) >> 16) |
+                              ((0x00000000_00FF0000usize & fourth) >> 8) |
+                              (0x00000000_000000FFusize &  fourth);
+            *dst = word;
+            *(dst.offset(1)) = second_word;
+            return true;
+        }
+
+        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_big_64);
+        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_big_64);
+    } else if #[cfg(all(target_endian = "big", target_pointer_width = "32"))] {
+        // Aligned ALU word, big-endian, 32-bit
+
+        pub const STRIDE_SIZE: usize = 8;
+
+        const ALIGNMENT: usize = 4;
+
+        const ALIGNMENT_MASK: usize = 3;
+
+        #[inline(always)]
+        unsafe fn ascii_to_basic_latin_stride_big_32(src: *const usize, dst: *mut usize) -> bool {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            // Check if the words contains non-ASCII
+            if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
+                return false;
+            }
+            let first = ((0xFF000000usize & word) >> 8) |
+                         ((0x00FF0000usize & word) >> 16);
+            let second = ((0x0000FF00usize & word) << 8) |
+                        (0x000000FFusize & word);
+            let third = ((0xFF000000usize & second_word) >> 8) |
+                         ((0x00FF0000usize & second_word) >> 16);
+            let fourth = ((0x0000FF00usize & second_word) << 8) |
+                        (0x000000FFusize & second_word);
+            *dst = first;
+            *(dst.offset(1)) = second;
+            *(dst.offset(2)) = third;
+            *(dst.offset(3)) = fourth;
+            return true;
+        }
+
+        #[inline(always)]
+        unsafe fn basic_latin_to_ascii_stride_big_32(src: *const usize, dst: *mut usize) -> bool {
+            let first = *src;
+            let second = *(src.offset(1));
+            let third = *(src.offset(2));
+            let fourth = *(src.offset(3));
+            if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
+                return false;
+            }
+            let word = ((0x00FF0000usize & first) << 8) |
+                       ((0x000000FFusize & first) << 16) |
+                       ((0x00FF0000usize & second) >> 8) |
+                       (0x000000FFusize & second);
+            let second_word = ((0x00FF0000usize & third) << 8) |
+                              ((0x000000FFusize & third) << 16) |
+                              ((0x00FF0000usize & fourth) >> 8) |
+                              (0x000000FFusize & fourth);
+            *dst = word;
+            *(dst.offset(1)) = second_word;
+            return true;
+        }
+
+        basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_big_32);
+        basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_big_32);
+    } else {
+        ascii_naive!(ascii_to_ascii, u8, u8);
+        ascii_naive!(ascii_to_basic_latin, u8, u16);
+        ascii_naive!(basic_latin_to_ascii, u16, u8);
+    }
+}
+
+cfg_if! {
+    if #[cfg(all(feature = "simd-accel", target_feature = "sse2"))] {
+    } else if #[cfg(target_endian = "little")] {
+        #[inline(always)]
+        fn count_zeros(word: usize) -> u32 {
+            word.trailing_zeros()
+        }
+    } else {
+        #[inline(always)]
+        fn count_zeros(word: usize) -> u32 {
+            word.leading_zeros()
+        }
+    }
+}
+
+cfg_if! {
+    if #[cfg(all(feature = "simd-accel", target_feature = "sse2"))] {
+        #[inline(always)]
+        pub fn validate_ascii(slice: &[u8]) -> Option<(u8, usize)> {
+            let src = slice.as_ptr();
+            let len = slice.len();
+            let mut offset = 0usize;
+            if STRIDE_SIZE <= len {
+                // XXX Should we first process one stride unconditionally as unaligned to
+                // avoid the cost of the branchiness below if the first stride fails anyway?
+                // XXX Should we just use unaligned SSE2 access unconditionally? It seems that
+                // on Haswell, it would make sense to just use unaligned and not bother
+                // checking. Need to benchmark older architectures before deciding.
+                if ((src as usize) & ALIGNMENT_MASK) == 0 {
+                    loop {
+                        let simd = unsafe { load16_aligned(src.offset(offset as isize)) };
+                        if let Some(consumed) = check_ascii(simd) {
+                            offset += consumed;
+                            let non_ascii = unsafe { *src.offset(offset as isize) };
+                            return Some((non_ascii, offset));
+                        }
+                        offset += STRIDE_SIZE;
+                        if offset + STRIDE_SIZE > len {
+                            break;
+                        }
+                    }
+                } else {
+                    loop {
+                        let simd = unsafe { load16_unaligned(src.offset(offset as isize)) };
+                        if let Some(consumed) = check_ascii(simd) {
+                            offset += consumed;
+                            let non_ascii = unsafe { *src.offset(offset as isize) };
+                            return Some((non_ascii, offset));
+                        }
+                        offset += STRIDE_SIZE;
+                        if offset + STRIDE_SIZE > len {
+                            break;
+                        }
+                    }
+                }
+            }
+            while offset < len {
+                let code_unit = slice[offset];
+                if code_unit > 127 {
+                    return Some((code_unit, offset));
+                }
+                offset += 1;
+            }
+            None
+        }
+    } else {
+        // `as` truncates, so works on 32-bit, too.
+        const ASCII_MASK: usize = 0x80808080_80808080u64 as usize;
+        const BASIC_LATIN_MASK: usize = 0xFF80FF80_FF80FF80u64 as usize;
+
+        #[inline(always)]
+        unsafe fn ascii_to_ascii_stride(src: *const usize, dst: *mut usize) -> Option<usize> {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            *dst = word;
+            *(dst.offset(1)) = second_word;
+            find_non_ascii(word, second_word)
+        }
+
+        #[inline(always)]
+        unsafe fn validate_ascii_stride(src: *const usize) -> Option<usize> {
+            let word = *src;
+            let second_word = *(src.offset(1));
+            find_non_ascii(word, second_word)
+        }
+
+        #[inline(always)]
+        fn find_non_ascii(word: usize, second_word: usize) -> Option<usize> {
+            let word_masked = word & ASCII_MASK;
+            let second_masked = second_word & ASCII_MASK;
+            if (word_masked | second_masked) == 0 {
+                return None;
+            }
+            if word_masked != 0 {
+                let zeros = count_zeros(word_masked);
+                // `zeros` now contains 7 (for the seven bits of non-ASCII)
+                // plus 8 times the number of ASCII in text order before the
+                // non-ASCII byte in the little-endian case or 8 times the number of ASCII in
+                // text order before the non-ASCII byte in the big-endian case.
+                let num_ascii = (zeros >> 3) as usize;
+                return Some(num_ascii);
+            }
+            let zeros = count_zeros(second_masked);
+            // `zeros` now contains 7 (for the seven bits of non-ASCII)
+            // plus 8 times the number of ASCII in text order before the
+            // non-ASCII byte in the little-endian case or 8 times the number of ASCII in
+            // text order before the non-ASCII byte in the big-endian case.
+            let num_ascii = (zeros >> 3) as usize;
+            Some(ALIGNMENT + num_ascii)
+        }
+
+        ascii_alu!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride);
+
+        #[inline(always)]
+        pub fn validate_ascii(slice: &[u8]) -> Option<(u8, usize)> {
+           let src = slice.as_ptr();
+           let len = slice.len();
+           let mut offset = 0usize;
+           let mut until_alignment = (ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
+           if until_alignment + STRIDE_SIZE <= len {
+               while until_alignment != 0 {
+                   let code_unit = slice[offset];
+                   if code_unit > 127 {
+                       return Some((code_unit, offset));
+                   }
+                   offset += 1;
+                   until_alignment -= 1;
+               }
+               loop {
+                   let ptr = unsafe { src.offset(offset as isize) as *const usize };
+                   if let Some(num_ascii) = unsafe { validate_ascii_stride(ptr) } {
+                       offset += num_ascii;
+                       return Some((unsafe { *(src.offset(offset as isize)) }, offset));
+                   }
+                   offset += STRIDE_SIZE;
+                   if offset + STRIDE_SIZE > len {
+                       break;
+                   }
+               }
+           }
+           while offset < len {
+               let code_unit = slice[offset];
+               if code_unit > 127 {
+                   return Some((code_unit, offset));
+               }
+               offset += 1;
+           }
+           None
+        }
+
+    }
+}
+
+pub fn ascii_valid_up_to(bytes: &[u8]) -> usize {
+    match validate_ascii(bytes) {
+        None => bytes.len(),
+        Some((_, num_valid)) => num_valid,
+    }
+}
+
+pub fn iso_2022_jp_ascii_valid_up_to(bytes: &[u8]) -> usize {
+    for (i, b_ref) in bytes.iter().enumerate() {
+        let b = *b_ref;
+        if b >= 0x80 || b == 0x1B || b == 0x0E || b == 0x0F {
+            return i;
+        }
+    }
+    bytes.len()
+}
+
+// Any copyright to the test code below this comment is dedicated to the
+// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    macro_rules! test_ascii {
+        ($test_name:ident,
+         $fn_tested:ident,
+         $src_unit:ty,
+         $dst_unit:ty) => (
+        #[test]
+        fn $test_name() {
+            let mut src: Vec<$src_unit> = Vec::with_capacity(32);
+            let mut dst: Vec<$dst_unit> = Vec::with_capacity(32);
+            for i in 0..32 {
+                src.clear();
+                dst.clear();
+                dst.resize(32, 0);
+                for j in 0..32 {
+                    let c = if i == j {
+                        0xAA
+                    } else {
+                        j + 0x40
+                    };
+                    src.push(c as $src_unit);
+                }
+                match unsafe { $fn_tested(src.as_ptr(), dst.as_mut_ptr(), 32) } {
+                    None => unreachable!("Should always find non-ASCII"),
+                    Some((non_ascii, num_ascii)) => {
+                        assert_eq!(non_ascii, 0xAA);
+                        assert_eq!(num_ascii, i);
+                        for j in 0..i {
+                            assert_eq!(dst[j], (j + 0x40) as $dst_unit);
+                        }
+                    }
+                }
+            }
+        });
+    }
+
+    test_ascii!(test_ascii_to_ascii, ascii_to_ascii, u8, u8);
+    test_ascii!(test_ascii_to_basic_latin, ascii_to_basic_latin, u8, u16);
+    test_ascii!(test_basic_latin_to_ascii, basic_latin_to_ascii, u16, u8);
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/src/big5.rs
@@ -0,0 +1,393 @@
+// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use handles::*;
+use data::*;
+use variant::*;
+use super::*;
+// Rust 1.14.0 requires the following despite the asterisk above.
+use super::in_inclusive_range32;
+
+pub struct Big5Decoder {
+    lead: Option<u8>,
+}
+
+impl Big5Decoder {
+    pub fn new() -> VariantDecoder {
+        VariantDecoder::Big5(Big5Decoder { lead: None })
+    }
+
+    fn plus_one_if_lead(&self, byte_length: usize) -> Option<usize> {
+        byte_length.checked_add(
+            match self.lead {
+                None => 0,
+                Some(_) => 1,
+            }
+        )
+    }
+
+    pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
+        // If there is a lead but the next byte isn't a valid trail, an
+        // error is generated for the lead (+1). Then another iteration checks
+        // space, which needs +1 to account for the possibility of astral
+        // output or combining pair.
+        checked_add(1, self.plus_one_if_lead(byte_length))
+    }
+
+    pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
+        // No need to account for REPLACEMENT CHARACTERS.
+        // Cases:
+        // ASCII: 1 to 1
+        // Valid pair: 2 to 2, 2 to 3 or 2 to 4, i.e. worst case 2 to 4
+        // lead set and first byte is trail: 1 to 4 worst case
+        //
+        // When checking for space for the last byte:
+        // no lead: the last byte must be ASCII (or fatal error): 1 to 1
+        // lead set: space for 4 bytes was already checked when reading the
+        // lead, hence the last lead and the last trail together are worst
+        // case 2 to 4.
+        //
+        // If lead set and the input is a single trail byte, the worst-case
+        // output is 4, so we need to add one before multiplying if lead is
+        // set.
+        //
+        // Finally, add two so that if input is non-zero, the output is at
+        // least 4.
+        checked_add(2, checked_mul(2, self.plus_one_if_lead(byte_length)))
+    }
+
+    pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
+        // If there is a lead but the next byte isn't a valid trail, an
+        // error is generated for the lead (+(1*3)). Then another iteration
+        // checks space, which needs +3 to account for the possibility of astral
+        // output or combining pair. In between start and end, the worst case
+        // is that every byte is bad: *3.
+        checked_add(3, checked_mul(3, self.plus_one_if_lead(byte_length)))
+    }
+
+    ascii_compatible_two_byte_decoder_functions!(
+        {
+            // If lead is between 0x81 and 0xFE, inclusive,
+            // subtract offset 0x81.
+            let non_ascii_minus_offset =
+                non_ascii.wrapping_sub(0x81);
+            if non_ascii_minus_offset > (0xFE - 0x81) {
+                return (DecoderResult::Malformed(1, 0),
+                        source.consumed(),
+                        handle.written());
+            }
+            non_ascii_minus_offset
+        },
+        {
+            // If trail is between 0x40 and 0x7E, inclusive,
+            // subtract offset 0x40. Else if trail is
+            // between 0xA1 and 0xFE, inclusive, subtract
+            // offset 0x62.
+            // TODO: Find out which range is more probable.
+            let mut trail_minus_offset =
+                byte.wrapping_sub(0x40);
+            if trail_minus_offset > (0x7E - 0x40) {
+                let trail_minus_range_start =
+                    byte.wrapping_sub(0xA1);
+                if trail_minus_range_start >
+                   (0xFE - 0xA1) {
+                    if byte < 0x80 {
+                        return (DecoderResult::Malformed(1, 0),
+                                unread_handle_trail.unread(),
+                                handle.written());
+                    }
+                    return (DecoderResult::Malformed(2, 0),
+                            unread_handle_trail.consumed(),
+                            handle.written());
+                }
+                trail_minus_offset = byte - 0x62;
+            }
+            let pointer = lead_minus_offset as usize *
+                          157usize +
+                          trail_minus_offset as usize;
+            let rebased_pointer = pointer.wrapping_sub(942);
+            let low_bits = big5_low_bits(rebased_pointer);
+            if low_bits == 0 {
+                match pointer {
+                    1133 => {
+                        handle.write_big5_combination(0x00CAu16,
+                                                      0x0304u16)
+                    }
+                    1135 => {
+                        handle.write_big5_combination(0x00CAu16,
+                                                      0x030Cu16)
+                    }
+                    1164 => {
+                        handle.write_big5_combination(0x00EAu16,
+                                                      0x0304u16)
+                    }
+                    1166 => {
+                        handle.write_big5_combination(0x00EAu16,
+                                                      0x030Cu16)
+                    }
+                    _ => {
+                        if byte < 0x80 {
+                            return (DecoderResult::Malformed(1, 0),
+                                    unread_handle_trail.unread(),
+                                    handle.written());
+                        }
+                        return (DecoderResult::Malformed(2, 0),
+                                unread_handle_trail.consumed(),
+                                handle.written());
+                    }
+                }
+            } else if big5_is_astral(rebased_pointer) {
+                handle.write_astral(low_bits as u32 |
+                                    0x20000u32)
+            } else {
+                handle.write_bmp_excl_ascii(low_bits)
+            }
+        },
+        self,
+        non_ascii,
+        byte,
+        lead_minus_offset,
+        unread_handle_trail,
+        source,
+        handle,
+        'outermost,
+        copy_ascii_from_check_space_astral,
+        check_space_astral,
+        false);
+}
+
+pub struct Big5Encoder;
+
+impl Big5Encoder {
+    pub fn new(encoding: &'static Encoding) -> Encoder {
+        Encoder::new(encoding, VariantEncoder::Big5(Big5Encoder))
+    }
+
+    pub fn max_buffer_length_from_utf16_without_replacement(&self,
+                                                            u16_length: usize)
+                                                            -> Option<usize> {
+        // Astral: 2 to 2
+        // ASCII: 1 to 1
+        // Other: 1 to 2
+        u16_length.checked_mul(2)
+    }
+
+    pub fn max_buffer_length_from_utf8_without_replacement(&self,
+                                                           byte_length: usize)
+                                                           -> Option<usize> {
+        // Astral: 4 to 2
+        // Upper BMP: 3 to 2
+        // Lower BMP: 2 to 2
+        // ASCII: 1 to 1
+        byte_length.checked_add(1)
+    }
+
+    ascii_compatible_encoder_functions!(
+        {
+            // For simplicity, unified ideographs
+            // in the pointer range 11206...11212 are handled
+            // as Level 1 Hanzi.
+            if let Some((lead, trail)) = big5_level1_hanzi_encode(bmp) {
+                handle.write_two(lead, trail)
+            } else {
+                let pointer = if let Some(pointer) = big5_box_encode(bmp) {
+                    pointer
+                } else if let Some(pointer) = big5_other_encode(bmp) {
+                    pointer
+                } else {
+                    return (EncoderResult::unmappable_from_bmp(bmp),
+                            source.consumed(),
+                            handle.written());
+                };
+                let lead = pointer / 157 + 0x81;
+                let remainder = pointer % 157;
+                let trail = if remainder < 0x3F {
+                    remainder + 0x40
+                } else {
+                    remainder + 0x62
+                };
+                handle.write_two(lead as u8, trail as u8)
+            }
+        },
+        {
+            if in_inclusive_range32(astral as u32, 0x2008A, 0x2F8A6) {
+                if let Some(rebased_pointer) = big5_astral_encode(astral as u16) {
+                    // big5_astral_encode returns rebased pointer,
+                    // so adding 0x87 instead of 0x81.
+                    let lead = rebased_pointer / 157 + 0x87;
+                    let remainder = rebased_pointer % 157;
+                    let trail = if remainder < 0x3F {
+                        remainder + 0x40
+                    } else {
+                        remainder + 0x62
+                    };
+                    handle.write_two(lead as u8, trail as u8)
+                } else {
+                    return (EncoderResult::Unmappable(astral), source.consumed(), handle.written());
+                }
+            } else {
+                return (EncoderResult::Unmappable(astral), source.consumed(), handle.written());
+            }
+        },
+        bmp,
+        astral,
+        self,
+        source,
+        handle,
+        copy_ascii_to_check_space_two,
+        check_space_two,
+        false
+    );
+}
+
+// Any copyright to the test code below this comment is dedicated to the
+// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
+
+#[cfg(test)]
+mod tests {
+    use super::super::testing::*;
+    use super::super::*;
+
+    fn decode_big5(bytes: &[u8], expect: &str) {
+        decode(BIG5, bytes, expect);
+    }
+
+    fn encode_big5(string: &str, expect: &[u8]) {
+        encode(BIG5, string, expect);
+    }
+
+    #[test]
+    fn test_big5_decode() {
+        // Empty
+        decode_big5(b"", &"");
+
+        // ASCII
+        decode_big5(&[0x61u8, 0x62u8], &"\u{0061}\u{0062}");
+
+        // Edge cases
+        decode_big5(&[0x87u8, 0x40u8], &"\u{43F0}");
+        decode_big5(&[0xFEu8, 0xFEu8], &"\u{79D4}");
+        decode_big5(&[0xFEu8, 0xFDu8], &"\u{2910D}");
+        decode_big5(&[0x88u8, 0x62u8], &"\u{00CA}\u{0304}");
+        decode_big5(&[0x88u8, 0x64u8], &"\u{00CA}\u{030C}");
+        decode_big5(&[0x88u8, 0x66u8], &"\u{00CA}");
+        decode_big5(&[0x88u8, 0xA3u8], &"\u{00EA}\u{0304}");
+        decode_big5(&[0x88u8, 0xA5u8], &"\u{00EA}\u{030C}");
+        decode_big5(&[0x88u8, 0xA7u8], &"\u{00EA}");
+        decode_big5(&[0x99u8, 0xD4u8], &"\u{8991}");
+        decode_big5(&[0x99u8, 0xD5u8], &"\u{27967}");
+        decode_big5(&[0x99u8, 0xD6u8], &"\u{8A29}");
+
+        // Edge cases surrounded with ASCII
+        decode_big5(
+            &[0x61u8, 0x87u8, 0x40u8, 0x62u8],
+            &"\u{0061}\u{43F0}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0xFEu8, 0xFEu8, 0x62u8],
+            &"\u{0061}\u{79D4}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0xFEu8, 0xFDu8, 0x62u8],
+            &"\u{0061}\u{2910D}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x88u8, 0x62u8, 0x62u8],
+            &"\u{0061}\u{00CA}\u{0304}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x88u8, 0x64u8, 0x62u8],
+            &"\u{0061}\u{00CA}\u{030C}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x88u8, 0x66u8, 0x62u8],
+            &"\u{0061}\u{00CA}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x88u8, 0xA3u8, 0x62u8],
+            &"\u{0061}\u{00EA}\u{0304}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x88u8, 0xA5u8, 0x62u8],
+            &"\u{0061}\u{00EA}\u{030C}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x88u8, 0xA7u8, 0x62u8],
+            &"\u{0061}\u{00EA}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x99u8, 0xD4u8, 0x62u8],
+            &"\u{0061}\u{8991}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x99u8, 0xD5u8, 0x62u8],
+            &"\u{0061}\u{27967}\u{0062}",
+        );
+        decode_big5(
+            &[0x61u8, 0x99u8, 0xD6u8, 0x62u8],
+            &"\u{0061}\u{8A29}\u{0062}",
+        );
+
+        // Bad sequences
+        decode_big5(&[0x80u8, 0x61u8], &"\u{FFFD}\u{0061}");
+        decode_big5(&[0xFFu8, 0x61u8], &"\u{FFFD}\u{0061}");
+        decode_big5(&[0xFEu8, 0x39u8], &"\u{FFFD}\u{0039}");
+        decode_big5(&[0x87u8, 0x66u8], &"\u{FFFD}\u{0066}");
+        decode_big5(&[0x81u8, 0x40u8], &"\u{FFFD}\u{0040}");
+        decode_big5(&[0x61u8, 0x81u8], &"\u{0061}\u{FFFD}");
+    }
+
+    #[test]
+    fn test_big5_encode() {
+        // Empty
+        encode_big5("", b"");
+
+        // ASCII
+        encode_big5("\u{0061}\u{0062}", b"\x61\x62");
+
+        // Edge cases
+        encode_big5("\u{9EA6}\u{0061}", b"&#40614;\x61");
+        encode_big5("\u{2626B}\u{0061}", b"&#156267;\x61");
+        encode_big5("\u{3000}", b"\xA1\x40");
+        encode_big5("\u{20AC}", b"\xA3\xE1");
+        encode_big5("\u{4E00}", b"\xA4\x40");
+        encode_big5("\u{27607}", b"\xC8\xA4");
+        encode_big5("\u{FFE2}", b"\xC8\xCD");
+        encode_big5("\u{79D4}", b"\xFE\xFE");
+
+        // Not in index
+        encode_big5("\u{2603}\u{0061}", b"&#9731;\x61");
+
+        // duplicate low bits
+        encode_big5("\u{203B5}", b"\xFD\x6A");
+        encode_big5("\u{25605}", b"\xFE\x46");
+
+        // prefer last
+        encode_big5("\u{2550}", b"\xF9\xF9");
+    }
+
+    #[test]
+    fn test_big5_decode_all() {
+        let input = include_bytes!("test_data/big5_in.txt");
+        let expectation = include_str!("test_data/big5_in_ref.txt");
+        let (cow, had_errors) = BIG5.decode_without_bom_handling(input);
+        assert!(had_errors, "Should have had errors.");
+        assert_eq!(&cow[..], expectation);
+    }
+
+    #[test]
+    fn test_big5_encode_all() {
+        let input = include_str!("test_data/big5_out.txt");
+        let expectation = include_bytes!("test_data/big5_out_ref.txt");
+        let (cow, encoding, had_errors) = BIG5.encode(input);
+        assert!(!had_errors, "Should not have had errors.");
+        assert_eq!(encoding, BIG5);
+        assert_eq!(&cow[..], &expectation[..]);
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/encoding_rs/src/data.rs
@@ -0,0 +1,19582 @@
+// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
+// Instead, please regenerate using generate-encoding-data.py
+
+pub const IBM866_DATA: &'static [u16; 128] =
+    &[0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A,
+      0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425,
+      0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x0430,
+      0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B,
+      0x043C, 0x043D, 0x043E, 0x043F, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562,
+      0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, 0x2514, 0x2534,
+      0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560,
+      0x2550, 0x256C, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
+      0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, 0x0440, 0x0441, 0x0442,
+      0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D,
+      0x044E, 0x044F, 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, 0x00B0,
+      0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0];
+
+pub const ISO_8859_2_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164,
+      0x0179, 0x00AD, 0x017D, 0x017B, 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B,
+      0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, 0x0154, 0x00C1,
+      0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A,
+      0x00CD, 0x00CE, 0x010E, 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
+      0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, 0x0155, 0x00E1, 0x00E2,
+      0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED,
+      0x00EE, 0x010F, 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159,
+      0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9];
+
+pub const ISO_8859_3_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0126, 0x02D8, 0x00A3, 0x00A4, 0x0000, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E,
+      0x0134, 0x00AD, 0x0000, 0x017B, 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125,
+      0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0x0000, 0x017C, 0x00C0, 0x00C1,
+      0x00C2, 0x0000, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC,
+      0x00CD, 0x00CE, 0x00CF, 0x0000, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
+      0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
+      0x0000, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
+      0x00EE, 0x00EF, 0x0000, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D,
+      0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9];
+
+pub const ISO_8859_4_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122,
+      0x0166, 0x00AD, 0x017D, 0x00AF, 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C,
+      0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, 0x0100, 0x00C1,
+      0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116,
+      0x00CD, 0x00CE, 0x012A, 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+      0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, 0x0101, 0x00E1, 0x00E2,
+      0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED,
+      0x00EE, 0x012B, 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
+      0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9];
+
+pub const ISO_8859_5_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B,
+      0x040C, 0x00AD, 0x040E, 0x040F, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416,
+      0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420, 0x0421,
+      0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C,
+      0x042D, 0x042E, 0x042F, 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+      0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x0440, 0x0441, 0x0442,
+      0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D,
+      0x044E, 0x044F, 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458,
+      0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F];
+
+pub const ISO_8859_6_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0000, 0x0000, 0x0000, 0x00A4, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+      0x060C, 0x00AD, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+      0x0000, 0x0000, 0x0000, 0x0000, 0x061B, 0x0000, 0x0000, 0x0000, 0x061F, 0x0000, 0x0621,
+      0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
+      0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
+      0x0638, 0x0639, 0x063A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0640, 0x0641, 0x0642,
+      0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D,
+      0x064E, 0x064F, 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000];
+
+pub const ISO_8859_7_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x037A, 0x00AB,
+      0x00AC, 0x00AD, 0x0000, 0x2015, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386,
+      0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, 0x0390, 0x0391,
+      0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C,
+      0x039D, 0x039E, 0x039F, 0x03A0, 0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
+      0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, 0x03B0, 0x03B1, 0x03B2,
+      0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD,
+      0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8,
+      0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0000];
+
+pub const ISO_8859_8_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0000, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x0000, 0x0000, 0x0000,
+      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 0x05D0, 0x05D1, 0x05D2,
+      0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD,
+      0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8,
+      0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E, 0x200F, 0x0000];
+
+pub const ISO_8859_10_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166,
+      0x017D, 0x00AD, 0x016A, 0x014A, 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137,
+      0x00B7, 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, 0x0100, 0x00C1,
+      0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116,
+      0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
+      0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x0101, 0x00E1, 0x00E2,
+      0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED,
+      0x00EE, 0x00EF, 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8,
+      0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138];
+
+pub const ISO_8859_13_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00C6, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6,
+      0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, 0x0104, 0x012E,
+      0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122,
+      0x0136, 0x012A, 0x013B, 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
+      0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, 0x0105, 0x012F, 0x0101,
+      0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137,
+      0x012B, 0x013C, 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173,
+      0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019];
+
+pub const ISO_8859_14_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B,
+      0x1EF2, 0x00AD, 0x00AE, 0x0178, 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6,
+      0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, 0x00C0, 0x00C1,
+      0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC,
+      0x00CD, 0x00CE, 0x00CF, 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
+      0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
+      0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
+      0x00EE, 0x00EF, 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8,
+      0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF];
+
+pub const ISO_8859_15_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x00AA, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6,
+      0x00B7, 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF, 0x00C0, 0x00C1,
+      0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC,
+      0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+      0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
+      0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
+      0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
+      0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF];
+
+pub const ISO_8859_16_DATA: &'static [u16; 128] =
+    &[0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
+      0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB,
+      0x0179, 0x00AD, 0x017A, 0x017B, 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6,
+      0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, 0x00C0, 0x00C1,
+      0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC,
+      0x00CD, 0x00CE, 0x00CF, 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
+      0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
+      0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
+      0x00EE, 0x00EF, 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171,
+      0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF];
+
+pub const KOI8_R_DATA: &'static [u16; 128] =
+    &[0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C,
+      0x2580, 0x2584, 0x2588, 0x258C, 0x2590, 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219,
+      0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, 0x2550,
+      0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255A,
+      0x255B, 0x255C, 0x255D, 0x255E, 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564,
+      0x2565, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, 0x044E, 0x0430,
+      0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B,
+      0x043C, 0x043D, 0x043E, 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
+      0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, 0x042E, 0x0410, 0x0411,
+      0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C,
+      0x041D, 0x041E, 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C,
+      0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A];
+
+pub const KOI8_U_DATA: &'static [u16; 128] =
+    &[0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C,
+      0x2580, 0x2584, 0x2588, 0x258C, 0x2590, 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219,
+      0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, 0x2550,
+      0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, 0x2557, 0x2558, 0x2559, 0x255A,
+      0x255B, 0x0491, 0x045E, 0x255E, 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406,
+      0x0407, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x040E, 0x00A9, 0x044E, 0x0430,
+      0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B,
+      0x043C, 0x043D, 0x043E, 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
+      0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, 0x042E, 0x0410, 0x0411,
+      0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C,
+      0x041D, 0x041E, 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C,
+      0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A];
+
+pub const MACINTOSH_DATA: &'static [u16; 128] =
+    &[0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4,
+      0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF,
+      0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 0x2020,
+      0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4,
+      0x00A8, 0x2260, 0x00C6, 0x00D8, 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202,
+      0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 0x00BF, 0x00A1,
+      0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3,
+      0x00D5, 0x0152, 0x0153, 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
+      0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 0x2021, 0x00B7, 0x201A,
+      0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC,
+      0x00D3, 0x00D4, 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF,
+      0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7];
+
+pub const WINDOWS_874_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x0082, 0x0083, 0x0084, 0x2026, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A,
+      0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B,
+      0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16,
+      0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, 0x0E20, 0x0E21,
+      0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C,
+      0x0E2D, 0x0E2E, 0x0E2F, 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
+      0x0E38, 0x0E39, 0x0E3A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0E3F, 0x0E40, 0x0E41, 0x0E42,
+      0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D,
+      0x0E4E, 0x0E4F, 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58,
+      0x0E59, 0x0E5A, 0x0E5B, 0x0000, 0x0000, 0x0000, 0x0000];
+
+pub const WINDOWS_1250_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x201A, 0x0083, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x0160,
+      0x2039, 0x015A, 0x0164, 0x017D, 0x0179, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x0098, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, 0x00A0,
+      0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x017B, 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, 0x0154, 0x00C1,
+      0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A,
+      0x00CD, 0x00CE, 0x010E, 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
+      0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, 0x0155, 0x00E1, 0x00E2,
+      0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED,
+      0x00EE, 0x010F, 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159,
+      0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9];
+
+pub const WINDOWS_1251_DATA: &'static [u16; 128] =
+    &[0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x0409,
+      0x2039, 0x040A, 0x040C, 0x040B, 0x040F, 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x0098, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, 0x00A0,
+      0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, 0x0401, 0x00A9, 0x0404, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x0407, 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6,
+      0x00B7, 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, 0x0410, 0x0411,
+      0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C,
+      0x041D, 0x041E, 0x041F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+      0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x0430, 0x0431, 0x0432,
+      0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D,
+      0x043E, 0x043F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448,
+      0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F];
+
+pub const WINDOWS_1252_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160,
+      0x2039, 0x0152, 0x008D, 0x017D, 0x008F, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, 0x00A0,
+      0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
+      0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC,
+      0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+      0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
+      0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
+      0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
+      0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF];
+
+pub const WINDOWS_1253_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x008A,
+      0x2039, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x0098, 0x2122, 0x009A, 0x203A, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x0000, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x2015, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6,
+      0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, 0x0390, 0x0391,
+      0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C,
+      0x039D, 0x039E, 0x039F, 0x03A0, 0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
+      0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, 0x03B0, 0x03B1, 0x03B2,
+      0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD,
+      0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8,
+      0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0000];
+
+pub const WINDOWS_1254_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160,
+      0x2039, 0x0152, 0x008D, 0x008E, 0x008F, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x009E, 0x0178, 0x00A0,
+      0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
+      0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC,
+      0x00CD, 0x00CE, 0x00CF, 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+      0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
+      0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
+      0x00EE, 0x00EF, 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
+      0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF];
+
+pub const WINDOWS_1255_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x008A,
+      0x2039, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x02DC, 0x2122, 0x009A, 0x203A, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0,
+      0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x05B0, 0x05B1,
+      0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC,
+      0x05BD, 0x05BE, 0x05BF, 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
+      0x05F4, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x05D0, 0x05D1, 0x05D2,
+      0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD,
+      0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8,
+      0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E, 0x200F, 0x0000];
+
+pub const WINDOWS_1256_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0679,
+      0x2039, 0x0152, 0x0686, 0x0698, 0x0688, 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, 0x00A0,
+      0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x06BE, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, 0x06C1, 0x0621,
+      0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
+      0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
+      0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, 0x00E0, 0x0644, 0x00E2,
+      0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A,
+      0x00EE, 0x00EF, 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, 0x0651,
+      0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2];
+
+pub const WINDOWS_1257_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x201A, 0x0083, 0x201E, 0x2026, 0x2020, 0x2021, 0x0088, 0x2030, 0x008A,
+      0x2039, 0x008C, 0x00A8, 0x02C7, 0x00B8, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x0098, 0x2122, 0x009A, 0x203A, 0x009C, 0x00AF, 0x02DB, 0x009F, 0x00A0,
+      0x0000, 0x00A2, 0x00A3, 0x00A4, 0x0000, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00C6, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, 0x0104, 0x012E,
+      0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122,
+      0x0136, 0x012A, 0x013B, 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
+      0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, 0x0105, 0x012F, 0x0101,
+      0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137,
+      0x012B, 0x013C, 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173,
+      0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9];
+
+pub const WINDOWS_1258_DATA: &'static [u16; 128] =
+    &[0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x008A,
+      0x2039, 0x0152, 0x008D, 0x008E, 0x008F, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022,
+      0x2013, 0x2014, 0x02DC, 0x2122, 0x009A, 0x203A, 0x0153, 0x009D, 0x009E, 0x0178, 0x00A0,
+      0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB,
+      0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6,
+      0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
+      0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300,
+      0x00CD, 0x00CE, 0x00CF, 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
+      0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
+      0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED,
+      0x00EE, 0x00EF, 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, 0x00F8,
+      0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF];
+
+pub const X_MAC_CYRILLIC_DATA: &'static [u16; 128] =
+    &[0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A,
+      0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425,
+      0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x2020,
+      0x00B0, 0x0490, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x0406, 0x00AE, 0x00A9, 0x2122, 0x0402,
+      0x0452, 0x2260, 0x0403, 0x0453, 0x221E, 0x00B1, 0x2264, 0x2265, 0x0456, 0x00B5, 0x0491,
+      0x0408, 0x0404, 0x0454, 0x0407, 0x0457, 0x0409, 0x0459, 0x040A, 0x045A, 0x0458, 0x0405,
+      0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x040B, 0x045B,
+      0x040C, 0x045C, 0x0455, 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x201E,
+      0x040E, 0x045E, 0x040F, 0x045F, 0x2116, 0x0401, 0x0451, 0x044F, 0x0430, 0x0431, 0x0432,
+      0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D,
+      0x043E, 0x043F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448,
+      0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x20AC];
+
+static BIG5_ASTRALNESS: [u32; 589] =
+    [0x445F0520, 0xB882520F, 0x400000F8, 0x044EA920, 0x00000000, 0x00010B34, 0x00000000,
+     0x00000000, 0x00000000, 0x0C000000, 0x00000040, 0x00000000, 0x00580400, 0x0000003C,
+     0x5C800000, 0xBBF3DCAD, 0xEDEE43C9, 0xC1260FA4, 0xEFF2769B, 0xF7FAFBDE, 0xAF44320F,
+     0xFEFDEFFE, 0x00B06011, 0x81192100, 0xA8881020, 0x24692160, 0xC4894400, 0x40030000,
+     0x84430035, 0x68935131, 0x00000202, 0x00000000, 0x00000000, 0x01004000, 0x8264AC80,
+     0x90A18C40, 0xD4605004, 0x182AD200, 0x9735689D, 0x20F8D84F, 0x281C82C4, 0x02947582,
+     0xA1A04038, 0x4D7110C5, 0xB9980DC4, 0x43015B5E, 0x7803999F, 0x081FE220, 0x40C90189,
+     0x9A0D0307, 0x4FD15290, 0x2102037C, 0x136EC020, 0xF5DBB583, 0x4F9077E5, 0x17369A17,
+     0xBA6E000C, 0x54602F09, 0x68781E00, 0x9E567022, 0xB78EF325, 0xE6EF0B17, 0x1B5B1777,
+     0x8D9810C1, 0xCFB61812, 0x1A723101, 0xC0B62C5B, 0x5CC3E220, 0xDAE01B82, 0x40D685CE,
+     0xD00B54A9, 0x4044480F, 0x40D40910, 0x1E643BAE, 0x08172B44, 0x18368286, 0x9CE789A0,
+     0x0384085B, 0xEC90DA02, 0xBF8587DB, 0x8E9DADA2, 0x2520989D, 0x9F460046, 0xC9D5E37F,
+     0x4DD6F484, 0x18B6E306, 0x00000912, 0x00000400, 0x00000020, 0x00000200, 0x00000000,
+     0x20000000, 0x00000000, 0x00000000, 0x00000000, 0x8E000008, 0x3F5F6E78, 0xF73DDD7E,
+     0xAEFC9BFC, 0x6B8FFFD2, 0xBABFE7F1, 0xEF176EAE, 0xB63EA7DC, 0x92C977B5, 0x4AB6A5D7,
+     0x81B99B6E, 0x54FE674D, 0xFE629BF1, 0x36D7526F, 0x4529BFFE, 0x65BDF981, 0x003942A6,
+     0x8F604200, 0x40409400, 0xD1D7AFD2, 0x98A000DB, 0x52067BB2, 0xC8169820, 0x0000093D,
+     0x00000001, 0x08B20A01, 0x00000080, 0x14208804, 0x00004C10, 0x44A10004, 0x01410010,
+     0xF0400800, 0x00024FDF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x82800000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xBE2B0680, 0xFEB016BE, 0x0008100C,
+     0x990829CA, 0x0C0B4010, 0x0025603B, 0x1882D42A, 0x822DDE48, 0x18872415, 0x2180E118,
+     0x7892D301, 0x93038213, 0x34C550CD, 0xADA228C0, 0x5818997C, 0x42C42212, 0x3E8E53A2,
+     0x4542E1CD, 0x0F7E9034, 0x01748100, 0x39858212, 0xDE002947, 0x88014162, 0x819D43A1,
+     0x0048A1C8];
+
+pub static BIG5_LOW_BITS: [u16; 18840] =
+    [0x43F0, 0x4C32, 0x4603, 0x45A6, 0x4578, 0x7267, 0x4D77, 0x45B3, 0x7CB1, 0x4CE2, 0x7CC5,
+     0x3B95, 0x4736, 0x4744, 0x4C47, 0x4C40, 0x42BF, 0x3617, 0x7352, 0x6E8B, 0x70D2, 0x4C57,
+     0xA351, 0x474F, 0x45DA, 0x4C85, 0x7C6C, 0x4D07, 0x4AA4, 0x46A1, 0x6B23, 0x7225, 0x5A54,
+     0x1A63, 0x3E06, 0x3F61, 0x664D, 0x56FB, 0x0000, 0x7D95, 0x591D, 0x8BB9, 0x3DF4, 0x9734,
+     0x7BEF, 0x5BDB, 0x1D5E, 0x5AA4, 0x3625, 0x9EB0, 0x5AD1, 0x5BB7, 0x5CFC, 0x676E, 0x8593,
+     0x9945, 0x7461, 0x749D, 0x3875, 0x1D53, 0x369E, 0x6021, 0x3EEC, 0x58DE, 0x3AF5, 0x7AFC,
+     0x9F97, 0x4161, 0x890D, 0x31EA, 0x0A8A, 0x325E, 0x430A, 0x8484, 0x9F96, 0x942F, 0x4930,
+     0x8613, 0x5896, 0x974A, 0x9218, 0x79D0, 0x7A32, 0x6660, 0x6A29, 0x889D, 0x744C, 0x7BC5,
+     0x6782, 0x7A2C, 0x524F, 0x9046, 0x34E6, 0x73C4, 0x5DB9, 0x74C6, 0x9FC7, 0x57B3, 0x492F,
+     0x544C, 0x4131, 0x368E, 0x5818, 0x7A72, 0x7B65, 0x8B8F, 0x46AE, 0x6E88, 0x4181, 0x5D99,
+     0x7BAE, 0x24BC, 0x9FC8, 0x24C1, 0x24C9, 0x24CC, 0x9FC9, 0x8504, 0x35BB, 0x40B4, 0x9FCA,
+     0x44E1, 0xADFF, 0x62C1, 0x706E, 0x9FCB, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x31C0, 0x31C1, 0x31C2, 0x31C3, 0x31C4, 0x010C, 0x31C5, 0x00D1,
+     0x00CD, 0x31C6, 0x31C7, 0x00CB, 0x1FE8, 0x31C8, 0x00CA, 0x31C9, 0x31CA, 0x31CB, 0x31CC,
+     0x010E, 0x31CD, 0x31CE, 0x0100, 0x00C1, 0x01CD, 0x00C0, 0x0112, 0x00C9, 0x011A, 0x00C8,
+     0x014C, 0x00D3, 0x01D1, 0x00D2, 0x0000, 0x1EBE, 0x0000, 0x1EC0, 0x00CA, 0x0101, 0x00E1,
+     0x01CE, 0x00E0, 0x0251, 0x0113, 0x00E9, 0x011B, 0x00E8, 0x012B, 0x00ED, 0x01D0, 0x00EC,
+     0x014D, 0x00F3, 0x01D2, 0x00F2, 0x016B, 0x00FA, 0x01D4, 0x00F9, 0x01D6, 0x01D8, 0x01DA,
+     0x01DC, 0x00FC, 0x0000, 0x1EBF, 0x0000, 0x1EC1, 0x00EA, 0x0261, 0x23DA, 0x23DB, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xA3A9, 0x1145, 0x0000, 0x650A, 0x0000,
+     0x0000, 0x4E3D, 0x6EDD, 0x9D4E, 0x91DF, 0x0000, 0x0000, 0x7735, 0x6491, 0x4F1A, 0x4F28,
+     0x4FA8, 0x5156, 0x5174, 0x519C, 0x51E4, 0x52A1, 0x52A8, 0x533B, 0x534E, 0x53D1, 0x53D8,
+     0x56E2, 0x58F0, 0x5904, 0x5907, 0x5932, 0x5934, 0x5B66, 0x5B9E, 0x5B9F, 0x5C9A, 0x5E86,
+     0x603B, 0x6589, 0x67FE, 0x6804, 0x6865, 0x6D4E, 0x70BC, 0x7535, 0x7EA4, 0x7EAC, 0x7EBA,
+     0x7EC7, 0x7ECF, 0x7EDF, 0x7F06, 0x7F37, 0x827A, 0x82CF, 0x836F, 0x89C6, 0x8BBE, 0x8BE2,
+     0x8F66, 0x8F67, 0x8F6E, 0x7411, 0x7CFC, 0x7DCD, 0x6946, 0x7AC9, 0x5227, 0x0000, 0x0000,
+     0x0000, 0x0000, 0x918C, 0x78B8, 0x915E, 0x80BC, 0x0000, 0x8D0B, 0x80F6, 0x09E7, 0x0000,
+     0x0000, 0x809F, 0x9EC7, 0x4CCD, 0x9DC9, 0x9E0C, 0x4C3E, 0x9DF6, 0x700E, 0x9E0A, 0xA133,
+     0x35C1, 0x0000, 0x6E9A, 0x823E, 0x7519, 0x0000, 0x4911, 0x9A6C, 0x9A8F, 0x9F99, 0x7987,
+     0x846C, 0x1DCA, 0x05D0, 0x2AE6, 0x4E24, 0x4E81, 0x4E80, 0x4E87, 0x4EBF, 0x4EEB, 0x4F37,
+     0x344C, 0x4FBD, 0x3E48, 0x5003, 0x5088, 0x347D, 0x3493, 0x34A5, 0x5186, 0x5905, 0x51DB,
+     0x51FC, 0x5205, 0x4E89, 0x5279, 0x5290, 0x5327, 0x35C7, 0x53A9, 0x3551, 0x53B0, 0x3553,
+     0x53C2, 0x5423, 0x356D, 0x3572, 0x3681, 0x5493, 0x54A3, 0x54B4, 0x54B9, 0x54D0, 0x54EF,
+     0x5518, 0x5523, 0x5528, 0x3598, 0x553F, 0x35A5, 0x35BF, 0x55D7, 0x35C5, 0x7D84, 0x5525,
+     0x0000, 0x0C42, 0x0D15, 0x512B, 0x5590, 0x2CC6, 0x39EC, 0x0341, 0x8E46, 0x4DB8, 0x94E5,
+     0x4053, 0x80BE, 0x777A, 0x2C38, 0x3A34, 0x47D5, 0x815D, 0x69F2, 0x4DEA, 0x64DD, 0x0D7C,
+     0x0FB4, 0x0CD5, 0x10F4, 0x648D, 0x8E7E, 0x0E96, 0x0C0B, 0x0F64, 0x2CA9, 0x8256, 0x44D3,
+     0x0000, 0x0D46, 0x9A4D, 0x80E9, 0x47F4, 0x4EA7, 0x2CC2, 0x9AB2, 0x3A67, 0x95F4, 0x3FED,
+     0x3506, 0x52C7, 0x97D4, 0x78C8, 0x2D44, 0x9D6E, 0x9815, 0x0000, 0x43D9, 0x60A5, 0x64B4,
+     0x54E3, 0x2D4C, 0x2BCA, 0x1077, 0x39FB, 0x106F, 0x66DA, 0x6716, 0x79A0, 0x64EA, 0x5052,
+     0x0C43, 0x8E68, 0x21A1, 0x8B4C, 0x0731, 0x0000, 0x480B, 0x01A9, 0x3FFA, 0x5873, 0x2D8D,
+     0x0000, 0x45C8, 0x04FC, 0x6097, 0x0F4C, 0x0D96, 0x5579, 0x40BB, 0x43BA, 0x0000, 0x4AB4,
+     0x2A66, 0x109D, 0x81AA, 0x98F5, 0x0D9C, 0x6379, 0x39FE, 0x2775, 0x8DC0, 0x56A1, 0x647C,
+     0x3E43, 0x0000, 0xA601, 0x0E09, 0x2ACF, 0x2CC9, 0x0000, 0x10C8, 0x39C2, 0x3992, 0x3A06,
+     0x829B, 0x3578, 0x5E49, 0x20C7, 0x5652, 0x0F31, 0x2CB2, 0x9720, 0x34BC, 0x6C3D, 0x4E3B,
+     0x0000, 0x0000, 0x7574, 0x2E8B, 0x2208, 0xA65B, 0x8CCD, 0x0E7A, 0x0C34, 0x681C, 0x7F93,
+     0x10CF, 0x2803, 0x2939, 0x35FB, 0x51E3, 0x0E8C, 0x0F8D, 0x0EAA, 0x3F93, 0x0F30, 0x0D47,
+     0x114F, 0x0E4C, 0x0000, 0x0EAB, 0x0BA9, 0x0D48, 0x10C0, 0x113D, 0x3FF9, 0x2696, 0x6432,
+     0x0FAD, 0x33F4, 0x7639, 0x2BCE, 0x0D7E, 0x0D7F, 0x2C51, 0x2C55, 0x3A18, 0x0E98, 0x10C7,
+     0x0F2E, 0xA632, 0x6B50, 0x8CD2, 0x8D99, 0x8CCA, 0x95AA, 0x54CC, 0x82C4, 0x55B9, 0x0000,
+     0x9EC3, 0x9C26, 0x9AB6, 0x775E, 0x2DEE, 0x7140, 0x816D, 0x80EC, 0x5C1C, 0x6572, 0x8134,
+     0x3797, 0x535F, 0x80BD, 0x91B6, 0x0EFA, 0x0E0F, 0x0E77, 0x0EFB, 0x35DD, 0x4DEB, 0x3609,
+     0x0CD6, 0x56AF, 0x27B5, 0x10C9, 0x0E10, 0x0E78, 0x1078, 0x1148, 0x8207, 0x1455, 0x0E79,
+     0x4E50, 0x2DA4, 0x5A54, 0x101D, 0x101E, 0x10F5, 0x10F6, 0x579C, 0x0E11, 0x7694, 0x82CD,
+     0x0FB5, 0x0E7B, 0x517E, 0x3703, 0x0FB6, 0x1180, 0x52D8, 0xA2BD, 0x49DA, 0x183A, 0x4177,
+     0x827C, 0x5899, 0x5268, 0x361A, 0x573D, 0x7BB2, 0x5B68, 0x4800, 0x4B2C, 0x9F27, 0x49E7,
+     0x9C1F, 0x9B8D, 0x5B74, 0x313D, 0x55FB, 0x35F2, 0x5689, 0x4E28, 0x5902, 0x1BC1, 0xF878,
+     0x9751, 0x0086, 0x4E5B, 0x4EBB, 0x353E, 0x5C23, 0x5F51, 0x5FC4, 0x38FA, 0x624C, 0x6535,
+     0x6B7A, 0x6C35, 0x6C3A, 0x706C, 0x722B, 0x4E2C, 0x72AD, 0x48E9, 0x7F52, 0x793B, 0x7CF9,
+     0x7F53, 0x626A, 0x34C1, 0x0000, 0x634B, 0x8002, 0x8080, 0x6612, 0x6951, 0x535D, 0x8864,
+     0x89C1, 0x78B2, 0x8BA0, 0x8D1D, 0x9485, 0x9578, 0x957F, 0x95E8, 0x8E0F, 0x97E6, 0x9875,
+     0x98CE, 0x98DE, 0x9963, 0x9810, 0x9C7C, 0x9E1F, 0x9EC4, 0x6B6F, 0xF907, 0x4E37, 0x0087,
+     0x961D, 0x6237, 0x94A2, 0x0000, 0x503B, 0x6DFE, 0x9C73, 0x9FA6, 0x3DC9, 0x888F, 0x414E,
+     0x7077, 0x5CF5, 0x4B20, 0x51CD, 0x3559, 0x5D30, 0x6122, 0x8A32, 0x8FA7, 0x91F6, 0x7191,
+     0x6719, 0x73BA, 0x3281, 0xA107, 0x3C8B, 0x1980, 0x4B10, 0x78E4, 0x7402, 0x51AE, 0x870F,
+     0x4009, 0x6A63, 0xA2BA, 0x4223, 0x860F, 0x0A6F, 0x7A2A, 0x9947, 0x8AEA, 0x9755, 0x704D,
+     0x5324, 0x207E, 0x93F4, 0x76D9, 0x89E3, 0x9FA7, 0x77DD, 0x4EA3, 0x4FF0, 0x50BC, 0x4E2F,
+     0x4F17, 0x9FA8, 0x5434, 0x7D8B, 0x5892, 0x58D0, 0x1DB6, 0x5E92, 0x5E99, 0x5FC2, 0x2712,
+     0x658B, 0x33F9, 0x6919, 0x6A43, 0x3C63, 0x6CFF, 0x0000, 0x7200, 0x4505, 0x738C, 0x3EDB,
+     0x4A13, 0x5B15, 0x74B9, 0x8B83, 0x5CA4, 0x5695, 0x7A93, 0x7BEC, 0x7CC3, 0x7E6C, 0x82F8,
+     0x8597, 0x9FA9, 0x8890, 0x9FAA, 0x8EB9, 0x9FAB, 0x8FCF, 0x855F, 0x99E0, 0x9221, 0x9FAC,
+     0x8DB9, 0x143F, 0x4071, 0x42A2, 0x5A1A, 0x0000, 0x0000, 0x0000, 0x9868, 0x676B, 0x4276,
+     0x573D, 0x0000, 0x85D6, 0x497B, 0x82BF, 0x710D, 0x4C81, 0x6D74, 0x5D7B, 0x6B15, 0x6FBE,
+     0x9FAD, 0x9FAE, 0x5B96, 0x9FAF, 0x66E7, 0x7E5B, 0x6E57, 0x79CA, 0x3D88, 0x44C3, 0x3256,
+     0x2796, 0x439A, 0x4536, 0x0000, 0x5CD5, 0x3B1A, 0x8AF9, 0x5C78, 0x3D12, 0x3551, 0x5D78,
+     0x9FB2, 0x7157, 0x4558, 0x40EC, 0x1E23, 0x4C77, 0x3978, 0x344A, 0x01A4, 0x6C41, 0x8ACC,
+     0x4FB4, 0x0239, 0x59BF, 0x816C, 0x9856, 0x98FA, 0x5F3B, 0x0B9F, 0x0000, 0x21C1, 0x896D,
+     0x4102, 0x46BB, 0x9079, 0x3F07, 0x9FB3, 0xA1B5, 0x40F8, 0x37D6, 0x46F7, 0x6C46, 0x417C,
+     0x86B2, 0x73FF, 0x456D, 0x38D4, 0x549A, 0x4561, 0x451B, 0x4D89, 0x4C7B, 0x4D76, 0x45EA,
+     0x3FC8, 0x4B0F, 0x3661, 0x44DE, 0x44BD, 0x41ED, 0x5D3E, 0x5D48, 0x5D56, 0x3DFC, 0x380F,
+     0x5DA4, 0x5DB9, 0x3820, 0x3838, 0x5E42, 0x