Bug 1441204 - Upgrade unicode-segmentation from 1.1.0 to 1.2.1. r?maja_zf draft
authorAndreas Tolfsen <ato@sny.no>
Thu, 14 Jun 2018 13:01:20 -0700
changeset 807592 a7e0bb330b45a3b8fe4de3ec21ae35614b364990
parent 807591 43e606fdb03c76830f99db4a80c5781d472a223f
child 807593 2f41b1a51e9a4589d62bf39286e237ca44e55bb1
push id113168
push userbmo:ato@sny.no
push dateFri, 15 Jun 2018 03:04:58 +0000
reviewersmaja_zf
bugs1441204
milestone62.0a1
Bug 1441204 - Upgrade unicode-segmentation from 1.1.0 to 1.2.1. r?maja_zf MozReview-Commit-ID: 1KUU2U7AVz4
Cargo.lock
testing/webdriver/Cargo.toml
third_party/rust/unicode-segmentation/.cargo-checksum.json
third_party/rust/unicode-segmentation/Cargo.toml
third_party/rust/unicode-segmentation/README.md
third_party/rust/unicode-segmentation/scripts/unicode.py
third_party/rust/unicode-segmentation/src/grapheme.rs
third_party/rust/unicode-segmentation/src/lib.rs
third_party/rust/unicode-segmentation/src/tables.rs
third_party/rust/unicode-segmentation/src/test.rs
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1945,17 +1945,17 @@ dependencies = [
  "smallbitvec 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "smallvec 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "style_derive 0.0.1",
  "style_traits 0.0.1",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
  "toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "uluru 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "style_derive"
 version = "0.0.1"
 dependencies = [
@@ -2205,17 +2205,17 @@ dependencies = [
 
 [[package]]
 name = "unicode-normalization"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "unicode-segmentation"
-version = "1.1.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "unicode-width"
 version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
@@ -2289,17 +2289,17 @@ name = "webdriver"
 version = "0.35.1"
 dependencies = [
  "cookie 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webidl"
 version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
@@ -2702,17 +2702,17 @@ dependencies = [
 "checksum toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a7540f4ffc193e0d3c94121edb19b055670d369f77d5804db11ae053a45b6e7e"
 "checksum traitobject 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "efd1f82c56340fdf16f2a953d7bda4f8fdffba13d93b00844c25572110b26079"
 "checksum typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1410f6f91f21d1612654e7cc69193b0334f909dcf2c790c4826254fbb86f8887"
 "checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
 "checksum uluru 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "519130f0ea964ba540a9d8af1373738c2226f1d465eda07e61db29feb5479db9"
 "checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33"
 "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
 "checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f"
-"checksum unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18127285758f0e2c6cf325bb3f3d138a12fee27de4f23e146cd6a179f26c2cf3"
+"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"
 "checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f"
 "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
 "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
 "checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7"
 "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
 "checksum uuid 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)" = "78c590b5bd79ed10aad8fb75f078a59d8db445af6c743e55c4a53227fc01c13f"
 "checksum vcpkg 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9e0a7d8bed3178a8fb112199d466eeca9ed09a14ba8ad67718179b4fd5487d0b"
 "checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c"
--- a/testing/webdriver/Cargo.toml
+++ b/testing/webdriver/Cargo.toml
@@ -11,10 +11,10 @@ license = "MPL-2.0"
 
 [dependencies]
 cookie = { version = "0.10", default-features = false }
 hyper = "0.10"
 log = "0.4"
 regex = "1.0"
 rustc-serialize = "0.3"
 time = "0.1"
-unicode-segmentation = "1.1.0"
+unicode-segmentation = "1.2"
 url = "1"
--- a/third_party/rust/unicode-segmentation/.cargo-checksum.json
+++ b/third_party/rust/unicode-segmentation/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{".travis.yml":"45a9e28a40dc51950223f939e74e09e3792d797db4c43c6a2e7d2ebc82e405e7","COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"5d3ae74bc112f9c3e728e6a2e596a23ee8b2fab7df0e238b09306c2b6d3e59b7","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"32f1640f92c102be60895ceda728dce63820bb13a8a24065fe8a33d69a0e36e2","scripts/unicode.py":"5b4f051b7c934df8579f14051bc65ccda46583b8d216f5d6caceffa010c48a2e","scripts/unicode_gen_breaktests.py":"a8d9c88aec31f4d33b9850e71d497efbc9e6e9268a1b67ce55bd2a45ad552c67","src/grapheme.rs":"d7ad64cf1345d4821e24c21cb3186ba3a923955b6f1aeefba88abee5dcef6c3f","src/lib.rs":"62047530c83a33e756dd3b13f19cfc95c15b6f26c28e76949ff910f917816cf0","src/tables.rs":"9d08e3f87c25b165b7902b7a0fb1682ac3a4003aa598ed9d32b465e8911f33e3","src/test.rs":"1e203fdc5ccdf5fe6f3da23b307c807cafa4b2250f55698f4ba385978f23e80c","src/testdata.rs":"33d4c58cf9416b342f31b1139b428676deecec34b0994c40ebe0c5cf4a639c79","src/word.rs":"eefecfcf85554fc529a44892fa7e3be50377b24a095610b7feb5f184c82818b9"},"package":"18127285758f0e2c6cf325bb3f3d138a12fee27de4f23e146cd6a179f26c2cf3"}
\ No newline at end of file
+{"files":{".travis.yml":"45a9e28a40dc51950223f939e74e09e3792d797db4c43c6a2e7d2ebc82e405e7","COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"f5cdd71581b22dd5aedcf9cc9d5b817cfc6df3163185b069b668a10c21912979","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"9fe1c77a06b34cbe0dca1bb987e7551a210d37287f517014345402c74d25eb1f","scripts/unicode.py":"4bbbbb274c358fa46eea130e99c7f77c1c2c4bb4f60f7380e77ac93e47a20143","scripts/unicode_gen_breaktests.py":"a8d9c88aec31f4d33b9850e71d497efbc9e6e9268a1b67ce55bd2a45ad552c67","src/grapheme.rs":"5b86f40dff85a539d80c5bdac496311295a5358a2558032111d4bec39767f15f","src/lib.rs":"445d37ea35cd787d8b5f43f853b74e4e21e374d65e78e45dbd77122bedf96b31","src/tables.rs":"a8fc42a3f3d788bf3154ee4012e9e90c1f311eba67ede3ff1d69fd4eb8cefe74","src/test.rs":"58d5d80e17df64a95c86728416afac0a4744adf5020bbbc053b2514ddeecd2da","src/testdata.rs":"33d4c58cf9416b342f31b1139b428676deecec34b0994c40ebe0c5cf4a639c79","src/word.rs":"eefecfcf85554fc529a44892fa7e3be50377b24a095610b7feb5f184c82818b9"},"package":"aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"}
\ No newline at end of file
--- a/third_party/rust/unicode-segmentation/Cargo.toml
+++ b/third_party/rust/unicode-segmentation/Cargo.toml
@@ -1,25 +1,29 @@
-[package]
-
-name = "unicode-segmentation"
-version = "1.1.0"
-authors = ["kwantam <kwantam@gmail.com>"]
-
-homepage = "https://github.com/unicode-rs/unicode-segmentation"
-repository = "https://github.com/unicode-rs/unicode-segmentation"
-documentation = "https://unicode-rs.github.io/unicode-segmentation"
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
 
-license = "MIT/Apache-2.0"
-keywords = ["text", "unicode", "grapheme", "word", "boundary"]
+[package]
+name = "unicode-segmentation"
+version = "1.2.1"
+authors = ["kwantam <kwantam@gmail.com>"]
+exclude = ["target/*", "Cargo.lock", "scripts/tmp", "*.txt"]
+description = "This crate provides Grapheme Cluster and Word boundaries\naccording to Unicode Standard Annex #29 rules.\n"
+homepage = "https://github.com/unicode-rs/unicode-segmentation"
+documentation = "https://unicode-rs.github.io/unicode-segmentation"
 readme = "README.md"
-description = """
-This crate provides Grapheme Cluster and Word boundaries
-according to Unicode Standard Annex #29 rules.
-"""
-
-exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "*.txt" ]
+keywords = ["text", "unicode", "grapheme", "word", "boundary"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/unicode-rs/unicode-segmentation"
+[dev-dependencies.quickcheck]
+version = "0.4"
 
 [features]
-no_std = [] # This is a no-op, preserved for backward compatibility only.
-
-[dev-dependencies]
-quickcheck = "0.4"
+no_std = []
--- a/third_party/rust/unicode-segmentation/README.md
+++ b/third_party/rust/unicode-segmentation/README.md
@@ -40,16 +40,21 @@ to your `Cargo.toml`:
 
 ```toml
 [dependencies]
 unicode-segmentation = "1.1.0"
 ```
 
 # Change Log
 
+## 1.2.0
+
+* New `GraphemeCursor` API allows random access and bidirectional iteration.
+* Fixed incorrect splitting of certain emoji modifier sequences.
+
 ## 1.1.0
 
 * Add `as_str` methods to the iterator types.
 
 ## 1.0.3
 
 * Code cleanup and additional tests.
 
--- a/third_party/rust/unicode-segmentation/scripts/unicode.py
+++ b/third_party/rust/unicode-segmentation/scripts/unicode.py
@@ -325,31 +325,23 @@ pub const UNICODE_VERSION: (u64, u64, u6
                                   ("derived_property", derived, ["Alphabetic"]):
             emit_property_module(rf, name, cat, pfuns)
 
         ### grapheme cluster module
         # from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values
         grapheme_cats = load_properties("auxiliary/GraphemeBreakProperty.txt", [])
 
         # Control
-        #  Note 1:
+        #  Note:
         # This category also includes Cs (surrogate codepoints), but Rust's `char`s are
         # Unicode Scalar Values only, and surrogates are thus invalid `char`s.
         # Thus, we have to remove Cs from the Control category
-        #  Note 2:
-        # 0x0a and 0x0d (CR and LF) are not in the Control category for Graphemes.
-        # However, the Graphemes iterator treats these as a special case, so they
-        # should be included in grapheme_cats["Control"] for our implementation.
         grapheme_cats["Control"] = group_cat(list(
-            (set(ungroup_cat(grapheme_cats["Control"]))
-             | set(ungroup_cat(grapheme_cats["CR"]))
-             | set(ungroup_cat(grapheme_cats["LF"])))
+            set(ungroup_cat(grapheme_cats["Control"]))
             - set(ungroup_cat([surrogate_codepoints]))))
-        del(grapheme_cats["CR"])
-        del(grapheme_cats["LF"])
 
         grapheme_table = []
         for cat in grapheme_cats:
             grapheme_table.extend([(x, y, cat) for (x, y) in grapheme_cats[cat]])
         grapheme_table.sort(key=lambda w: w[0])
         emit_break_module(rf, grapheme_table, grapheme_cats.keys(), "grapheme")
         rf.write("\n")
 
--- a/third_party/rust/unicode-segmentation/src/grapheme.rs
+++ b/third_party/rust/unicode-segmentation/src/grapheme.rs
@@ -59,20 +59,18 @@ impl<'a> DoubleEndedIterator for Graphem
     }
 }
 
 /// External iterator for a string's
 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries).
 #[derive(Clone)]
 pub struct Graphemes<'a> {
     string: &'a str,
-    extended: bool,
-    cat: Option<GraphemeCat>,
-    catb: Option<GraphemeCat>,
-    regional_count_back: Option<usize>,
+    cursor: GraphemeCursor,
+    cursor_back: GraphemeCursor,
 }
 
 impl<'a> Graphemes<'a> {
     #[inline]
     /// View the underlying data (the part yet to be iterated) as a slice of the original string.
     ///
     /// ```rust
     /// # use unicode_segmentation::UnicodeSegmentation;
@@ -80,356 +78,627 @@ impl<'a> Graphemes<'a> {
     /// assert_eq!(iter.as_str(), "abc");
     /// iter.next();
     /// assert_eq!(iter.as_str(), "bc");
     /// iter.next();
     /// iter.next();
     /// assert_eq!(iter.as_str(), "");
     /// ```
     pub fn as_str(&self) -> &'a str {
-        self.string
+        &self.string[self.cursor.cur_cursor()..self.cursor_back.cur_cursor()]
     }
 }
 
-// state machine for cluster boundary rules
-#[derive(Copy,Clone,PartialEq,Eq)]
-enum GraphemeState {
-    Start,
-    FindExtend,
-    HangulL,
-    HangulLV,
-    HangulLVT,
-    Prepend,
-    Regional,
-    Emoji,
-    Zwj,
-}
-
 impl<'a> Iterator for Graphemes<'a> {
     type Item = &'a str;
 
     #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
-        let slen = self.string.len();
+        let slen = self.cursor_back.cur_cursor() - self.cursor.cur_cursor();
         (cmp::min(slen, 1), Some(slen))
     }
 
     #[inline]
     fn next(&mut self) -> Option<&'a str> {
-        use self::GraphemeState::*;
-        use tables::grapheme as gr;
-        if self.string.len() == 0 {
+        let start = self.cursor.cur_cursor();
+        if start == self.cursor_back.cur_cursor() {
             return None;
         }
-
-        let mut take_curr = true;
-        let mut idx = 0;
-        let mut state = Start;
-        let mut cat = gr::GC_Any;
-
-        // caching used by next_back() should be invalidated
-        self.regional_count_back = None;
-        self.catb = None;
-
-        for (curr, ch) in self.string.char_indices() {
-            idx = curr;
-
-            // retrieve cached category, if any
-            // We do this because most of the time we would end up
-            // looking up each character twice.
-            cat = match self.cat {
-                None => gr::grapheme_category(ch),
-                _ => self.cat.take().unwrap()
-            };
-
-            if (state, cat) == (Emoji, gr::GC_Extend) {
-                continue;                   // rule GB10
-            }
-
-            if let Some(new_state) = match cat {
-                gr::GC_Extend => Some(FindExtend),                       // rule GB9
-                gr::GC_SpacingMark if self.extended => Some(FindExtend), // rule GB9a
-                gr::GC_ZWJ => Some(Zwj),                                 // rule GB9/GB11
-                _ => None
-            } {
-                state = new_state;
-                continue;
-            }
-
-            state = match state {
-                Start if '\r' == ch => {
-                    let slen = self.string.len();
-                    let nidx = idx + 1;
-                    if nidx != slen && self.string[nidx..].chars().next().unwrap() == '\n' {
-                        idx = nidx;             // rule GB3
-                    }
-                    break;                      // rule GB4
-                }
-                Start | Prepend => match cat {
-                    gr::GC_Control => {         // rule GB5
-                        take_curr = state == Start;
-                        break;
-                    }
-                    gr::GC_L => HangulL,
-                    gr::GC_LV | gr::GC_V => HangulLV,
-                    gr::GC_LVT | gr::GC_T => HangulLVT,
-                    gr::GC_Prepend if self.extended => Prepend,
-                    gr::GC_Regional_Indicator => Regional,
-                    gr::GC_E_Base | gr::GC_E_Base_GAZ => Emoji,
-                    _ => FindExtend
-                },
-                FindExtend => {         // found non-extending when looking for extending
-                    take_curr = false;
-                    break;
-                },
-                HangulL => match cat {      // rule GB6: L x (L|V|LV|LVT)
-                    gr::GC_L => continue,
-                    gr::GC_LV | gr::GC_V => HangulLV,
-                    gr::GC_LVT => HangulLVT,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLV => match cat {     // rule GB7: (LV|V) x (V|T)
-                    gr::GC_V => continue,
-                    gr::GC_T => HangulLVT,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLVT => match cat {    // rule GB8: (LVT|T) x T
-                    gr::GC_T => continue,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Regional => match cat {     // rule GB12/GB13
-                    gr::GC_Regional_Indicator => FindExtend,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Emoji => match cat {        // rule GB10: (E_Base|EBG) Extend* x E_Modifier
-                    gr::GC_E_Modifier => continue,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Zwj => match cat {          // rule GB11: ZWJ x (GAZ|EBG)
-                    gr::GC_Glue_After_Zwj => continue,
-                    gr::GC_E_Base_GAZ => Emoji,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-            }
-        }
-
-        self.cat = if take_curr {
-            idx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
-            None
-        } else {
-            Some(cat)
-        };
-
-        let retstr = &self.string[..idx];
-        self.string = &self.string[idx..];
-        Some(retstr)
+        let next = self.cursor.next_boundary(self.string, 0).unwrap().unwrap();
+        Some(&self.string[start..next])
     }
 }
 
 impl<'a> DoubleEndedIterator for Graphemes<'a> {
     #[inline]
     fn next_back(&mut self) -> Option<&'a str> {
-        use self::GraphemeState::*;
-        use tables::grapheme as gr;
-        if self.string.len() == 0 {
+        let end = self.cursor_back.cur_cursor();
+        if end == self.cursor.cur_cursor() {
             return None;
         }
-
-        let mut take_curr = true;
-        let mut idx = self.string.len();
-        let mut previdx = idx;
-        let mut state = Start;
-        let mut cat = gr::GC_Any;
-
-        // caching used by next() should be invalidated
-        self.cat = None;
-
-        'outer: for (curr, ch) in self.string.char_indices().rev() {
-            previdx = idx;
-            idx = curr;
-
-            // cached category, if any
-            cat = match self.catb {
-                None => gr::grapheme_category(ch),
-                _ => self.catb.take().unwrap()
-            };
-
-            // a matching state machine that runs *backwards* across an input string
-            // note that this has some implications for the Hangul matching, since
-            // we now need to know what the rightward letter is:
-            //
-            // Right to left, we have:
-            //      L x L
-            //      V x (L|V|LV)
-            //      T x (V|T|LV|LVT)
-            // HangulL means the letter to the right is L
-            // HangulLV means the letter to the right is V
-            // HangulLVT means the letter to the right is T
-            state = match state {
-                Start if '\n' == ch => {
-                    if idx > 0 && '\r' == self.string[..idx].chars().next_back().unwrap() {
-                        idx -= 1;       // rule GB3
-                    }
-                    break;              // rule GB4
-                },
-                Start | FindExtend => match cat {
-                    gr::GC_Extend => FindExtend,
-                    gr::GC_SpacingMark if self.extended => FindExtend,
-                    gr::GC_ZWJ => FindExtend,
-                    gr::GC_E_Modifier => Emoji,
-                    gr::GC_Glue_After_Zwj | gr::GC_E_Base_GAZ => Zwj,
-                    gr::GC_L | gr::GC_LV | gr::GC_LVT => HangulL,
-                    gr::GC_V => HangulLV,
-                    gr::GC_T => HangulLVT,
-                    gr::GC_Regional_Indicator => Regional,
-                    gr::GC_Control => {
-                        take_curr = Start == state;
-                        break;
-                    },
-                    _ => break
-                },
-                HangulL => match cat {      // char to right is an L
-                    gr::GC_L => continue,               // L x L is the only legal match
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLV => match cat {     // char to right is a V
-                    gr::GC_V => continue,               // V x V, right char is still V
-                    gr::GC_L | gr::GC_LV => HangulL,    // (L|V) x V, right char is now L
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLVT => match cat {    // char to right is a T
-                    gr::GC_T => continue,               // T x T, right char is still T
-                    gr::GC_V => HangulLV,               // V x T, right char is now V
-                    gr::GC_LV | gr::GC_LVT => HangulL,  // (LV|LVT) x T, right char is now L
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Prepend => {
-                    // not used in reverse iteration
-                    unreachable!()
-                },
-                Regional => {               // rule GB12/GB13
-                    // Need to scan backward to find if this is preceded by an odd or even number
-                    // of Regional_Indicator characters.
-                    let count = match self.regional_count_back {
-                        Some(count) => count,
-                        None => self.string[..previdx].chars().rev().take_while(|c| {
-                                    gr::grapheme_category(*c) == gr::GC_Regional_Indicator
-                                }).count()
-                    };
-                    // Cache the count to avoid re-scanning the same chars on the next iteration.
-                    self.regional_count_back = count.checked_sub(1);
-
-                    if count % 2 == 0 {
-                        take_curr = false;
-                        break;
-                    }
-                    continue;
-                },
-                Emoji => {                  // char to right is E_Modifier
-                    // In order to decide whether to break before this E_Modifier char, we need to
-                    // scan backward past any Extend chars to look for (E_Base|(ZWJ? EBG)).
-                    let mut ebg_idx = None;
-                    for (startidx, prev) in self.string[..previdx].char_indices().rev() {
-                        match (ebg_idx, gr::grapheme_category(prev)) {
-                            (None, gr::GC_Extend) => continue,
-                            (None, gr::GC_E_Base) => {      // rule GB10
-                                // Found an Emoji modifier sequence. Return the whole sequence.
-                                idx = startidx;
-                                break 'outer;
-                            }
-                            (None, gr::GC_E_Base_GAZ) => {  // rule GB10
-                                // Keep scanning in case this is part of an ZWJ x EBJ pair.
-                                ebg_idx = Some(startidx);
-                            }
-                            (Some(_), gr::GC_ZWJ) => {      // rule GB11
-                                idx = startidx;
-                                break 'outer;
-                            }
-                            _ => break
-                        }
-                    }
-                    if let Some(ebg_idx) = ebg_idx {
-                        // Found an EBG without a ZWJ before it.
-                        idx = ebg_idx;
-                        break;
-                    }
-                    // Not part of an Emoji modifier sequence. Break here.
-                    take_curr = false;
-                    break;
-                },
-                Zwj => match cat {            // char to right is (GAZ|EBG)
-                    gr::GC_ZWJ => FindExtend, // rule GB11: ZWJ x (GAZ|EBG)
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                }
-            }
-        }
-
-        self.catb = if take_curr {
-            None
-        } else  {
-            idx = previdx;
-            Some(cat)
-        };
-
-        if self.extended && cat != gr::GC_Control {
-            // rule GB9b: include any preceding Prepend characters
-            for (i, c) in self.string[..idx].char_indices().rev() {
-                match gr::grapheme_category(c) {
-                    gr::GC_Prepend => idx = i,
-                    cat => {
-                        self.catb = Some(cat);
-                        break;
-                    }
-                }
-            }
-        }
-
-        let retstr = &self.string[idx..];
-        self.string = &self.string[..idx];
-        Some(retstr)
+        let prev = self.cursor_back.prev_boundary(self.string, 0).unwrap().unwrap();
+        Some(&self.string[prev..end])
     }
 }
 
 #[inline]
 pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
+    let len = s.len();
     Graphemes {
         string: s,
-        extended: is_extended,
-        cat: None,
-        catb: None,
-        regional_count_back: None
+        cursor: GraphemeCursor::new(0, len, is_extended),
+        cursor_back: GraphemeCursor::new(len, len, is_extended),
     }
 }
 
 #[inline]
 pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
     GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) }
 }
+
+// maybe unify with PairResult?
+// An enum describing information about a potential boundary.
+#[derive(PartialEq, Eq, Clone)]
+enum GraphemeState {
+    // No information is known.
+    Unknown,
+    // It is known to not be a boundary.
+    NotBreak,
+    // It is known to be a boundary.
+    Break,
+    // The codepoint after is a Regional Indicator Symbol, so a boundary iff
+    // it is preceded by an even number of RIS codepoints. (GB12, GB13)
+    Regional,
+    // The codepoint after is in the E_Modifier category, so whether it's a boundary
+    // depends on pre-context according to GB10.
+    Emoji,
+}
+
+/// Cursor-based segmenter for grapheme clusters.
+#[derive(Clone)]
+pub struct GraphemeCursor {
+    // Current cursor position.
+    offset: usize,
+    // Total length of the string.
+    len: usize,
+    // A config flag indicating whether this cursor computes legacy or extended
+    // grapheme cluster boundaries (enables GB9a and GB9b if set).
+    is_extended: bool,
+    // Information about the potential boundary at `offset`
+    state: GraphemeState,
+    // Category of codepoint immediately preceding cursor, if known.
+    cat_before: Option<GraphemeCat>,
+    // Category of codepoint immediately after cursor, if known.
+    cat_after: Option<GraphemeCat>,
+    // If set, at least one more codepoint immediately preceding this offset
+    // is needed to resolve whether there's a boundary at `offset`.
+    pre_context_offset: Option<usize>,
+    // The number of RIS codepoints preceding `offset`. If `pre_context_offset`
+    // is set, then counts the number of RIS between that and `offset`, otherwise
+    // is an accurate count relative to the string.
+    ris_count: Option<usize>,
+    // Set if a call to `prev_boundary` or `next_boundary` was suspended due
+    // to needing more input.
+    resuming: bool,
+}
+
+/// An error return indicating that not enough content was available in the
+/// provided chunk to satisfy the query, and that more content must be provided.
+#[derive(PartialEq, Eq, Debug)]
+pub enum GraphemeIncomplete {
+    /// More pre-context is needed. The caller should call `provide_context`
+    /// with a chunk ending at the offset given, then retry the query. This
+    /// will only be returned if the `chunk_start` parameter is nonzero.
+    PreContext(usize),
+
+    /// When requesting `prev_boundary`, the cursor is moving past the beginning
+    /// of the current chunk, so the chunk before that is requested. This will
+    /// only be returned if the `chunk_start` parameter is nonzero.
+    PrevChunk,
+
+    /// When requesting `next_boundary`, the cursor is moving past the end of the
+    /// current chunk, so the chunk after that is requested. This will only be
+    /// returned if the chunk ends before the `len` parameter provided on
+    /// creation of the cursor.
+    NextChunk,  // requesting chunk following the one given
+
+    /// An error returned when the chunk given does not contain the cursor position.
+    InvalidOffset,
+}
+
+// An enum describing the result from lookup of a pair of categories.
+#[derive(PartialEq, Eq)]
+enum PairResult {
+    NotBreak,  // definitely not a break
+    Break,  // definitely a break
+    Extended,  // a break iff not in extended mode
+    Regional,  // a break if preceded by an even number of RIS
+    Emoji,  // a break if preceded by emoji base and (Extend)*
+}
+
+fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
+    use tables::grapheme::GraphemeCat::*;
+    use self::PairResult::*;
+    match (before, after) {
+        (GC_CR, GC_LF) => NotBreak,  // GB3
+        (GC_Control, _) => Break,  // GB4
+        (GC_CR, _) => Break,  // GB4
+        (GC_LF, _) => Break,  // GB4
+        (_, GC_Control) => Break,  // GB5
+        (_, GC_CR) => Break,  // GB5
+        (_, GC_LF) => Break,  // GB5
+        (GC_L, GC_L) => NotBreak,  // GB6
+        (GC_L, GC_V) => NotBreak,  // GB6
+        (GC_L, GC_LV) => NotBreak,  // GB6
+        (GC_L, GC_LVT) => NotBreak,  // GB6
+        (GC_LV, GC_V) => NotBreak,  // GB7
+        (GC_LV, GC_T) => NotBreak,  // GB7
+        (GC_V, GC_V) => NotBreak,  // GB7
+        (GC_V, GC_T) => NotBreak,  // GB7
+        (GC_LVT, GC_T) => NotBreak,  // GB8
+        (GC_T, GC_T) => NotBreak,  // GB8
+        (_, GC_Extend) => NotBreak, // GB9
+        (_, GC_ZWJ) => NotBreak,  // GB9
+        (_, GC_SpacingMark) => Extended,  // GB9a
+        (GC_Prepend, _) => Extended,  // GB9b
+        (GC_E_Base, GC_E_Modifier) => NotBreak,  // GB10
+        (GC_E_Base_GAZ, GC_E_Modifier) => NotBreak,  // GB10
+        (GC_Extend, GC_E_Modifier) => Emoji,  // GB10
+        (GC_ZWJ, GC_Glue_After_Zwj) => NotBreak,  // GB11
+        (GC_ZWJ, GC_E_Base_GAZ) => NotBreak,  // GB11
+        (GC_Regional_Indicator, GC_Regional_Indicator) => Regional,  // GB12, GB13
+        (_, _) => Break,  // GB999
+    }
+}
+
+impl GraphemeCursor {
+    /// Create a new cursor. The string and initial offset are given at creation
+    /// time, but the contents of the string are not. The `is_extended` parameter
+    /// controls whether extended grapheme clusters are selected.
+    ///
+    /// The `offset` parameter must be on a codepoint boundary.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let s = "हिन्दी";
+    /// let mut legacy = GraphemeCursor::new(0, s.len(), false);
+    /// assert_eq!(legacy.next_boundary(s, 0), Ok(Some("ह".len())));
+    /// let mut extended = GraphemeCursor::new(0, s.len(), true);
+    /// assert_eq!(extended.next_boundary(s, 0), Ok(Some("हि".len())));
+    /// ```
+    pub fn new(offset: usize, len: usize, is_extended: bool) -> GraphemeCursor {
+        let state = if offset == 0 || offset == len {
+            GraphemeState::Break
+        } else {
+            GraphemeState::Unknown
+        };
+        GraphemeCursor {
+            offset: offset,
+            len: len,
+            state: state,
+            is_extended: is_extended,
+            cat_before: None,
+            cat_after: None,
+            pre_context_offset: None,
+            ris_count: None,
+            resuming: false,
+        }
+    }
+
+    // Not sure I'm gonna keep this, the advantage over new() seems thin.
+
+    /// Set the cursor to a new location in the same string.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let s = "abcd";
+    /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
+    /// assert_eq!(cursor.cur_cursor(), 0);
+    /// cursor.set_cursor(2);
+    /// assert_eq!(cursor.cur_cursor(), 2);
+    /// ```
+    pub fn set_cursor(&mut self, offset: usize) {
+        if offset != self.offset {
+            self.offset = offset;
+            self.state = if offset == 0 || offset == self.len {
+                GraphemeState::Break
+            } else {
+                GraphemeState::Unknown
+            };
+            // reset state derived from text around cursor
+            self.cat_before = None;
+            self.cat_after = None;
+            self.ris_count = None;
+        }
+    }
+
+    /// The current offset of the cursor. Equal to the last value provided to
+    /// `new()` or `set_cursor()`, or returned from `next_boundary()` or
+    /// `prev_boundary()`.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// // Two flags (🇷🇸🇮🇴), each flag is two RIS codepoints, each RIS is 4 bytes.
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
+    /// assert_eq!(cursor.cur_cursor(), 4);
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
+    /// assert_eq!(cursor.cur_cursor(), 8);
+    /// ```
+    pub fn cur_cursor(&self) -> usize {
+        self.offset
+    }
+
+    /// Provide additional pre-context when it is needed to decide a boundary.
+    /// The end of the chunk must coincide with the value given in the
+    /// `GraphemeIncomplete::PreContext` request.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
+    /// // Not enough pre-context to decide if there's a boundary between the two flags.
+    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(8)));
+    /// // Provide one more Regional Indicator Symbol of pre-context
+    /// cursor.provide_context(&flags[4..8], 4);
+    /// // Still not enough context to decide.
+    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(4)));
+    /// // Provide additional requested context.
+    /// cursor.provide_context(&flags[0..4], 0);
+    /// // That's enough to decide (it always is when context goes to the start of the string)
+    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
+    /// ```
+    pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
+        use tables::grapheme as gr;
+        assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
+        self.pre_context_offset = None;
+        if self.is_extended && chunk_start + chunk.len() == self.offset {
+            let ch = chunk.chars().rev().next().unwrap();
+            if gr::grapheme_category(ch) == gr::GC_Prepend {
+                self.decide(false);  // GB9b
+                return;
+            }
+        }
+        match self.state {
+            GraphemeState::Regional => self.handle_regional(chunk, chunk_start),
+            GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
+            _ => if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
+                let ch = chunk.chars().rev().next().unwrap();
+                self.cat_before = Some(gr::grapheme_category(ch));
+            },
+        }
+    }
+
+    fn decide(&mut self, is_break: bool) {
+        self.state = if is_break {
+            GraphemeState::Break
+        } else {
+            GraphemeState::NotBreak
+        };
+    }
+
+    fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
+        self.decide(is_break);
+        Ok(is_break)
+    }
+
+    fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
+        if self.state == GraphemeState::Break {
+            Ok(true)
+        } else if self.state == GraphemeState::NotBreak {
+            Ok(false)
+        } else if let Some(pre_context_offset) = self.pre_context_offset {
+            Err(GraphemeIncomplete::PreContext(pre_context_offset))
+        } else {
+            unreachable!("inconsistent state");
+        }
+    }
+
+    fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
+        use tables::grapheme as gr;
+        let mut ris_count = self.ris_count.unwrap_or(0);
+        for ch in chunk.chars().rev() {
+            if gr::grapheme_category(ch) != gr::GC_Regional_Indicator {
+                self.ris_count = Some(ris_count);
+                self.decide((ris_count % 2) == 0);
+                return;
+            }
+            ris_count += 1;
+        }
+        self.ris_count = Some(ris_count);
+        if chunk_start == 0 {
+            self.decide((ris_count % 2) == 0);
+            return;
+        }
+        self.pre_context_offset = Some(chunk_start);
+        self.state = GraphemeState::Regional;
+    }
+
+    fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
+        use tables::grapheme as gr;
+        for ch in chunk.chars().rev() {
+            match gr::grapheme_category(ch) {
+                gr::GC_Extend => (),
+                gr::GC_E_Base | gr::GC_E_Base_GAZ => {
+                    self.decide(false);
+                    return;
+                }
+                _ => {
+                    self.decide(true);
+                    return;
+                }
+            }
+        }
+        if chunk_start == 0 {
+            self.decide(true);
+            return;
+        }
+        self.pre_context_offset = Some(chunk_start);
+        self.state = GraphemeState::Emoji;
+    }
+
+    /// Determine whether the current cursor location is a grapheme cluster boundary.
+    /// Only a part of the string need be supplied. If `chunk_start` is nonzero or
+    /// the length of `chunk` is not equal to `len` on creation, then this method
+    /// may return `GraphemeIncomplete::PreContext`. The caller should then
+    /// call `provide_context` with the requested chunk, then retry calling this
+    /// method.
+    ///
+    /// For partial chunks, if the cursor is not at the beginning or end of the
+    /// string, the chunk should contain at least the codepoint following the cursor.
+    /// If the string is nonempty, the chunk must be nonempty.
+    ///
+    /// All calls should have consistent chunk contents (ie, if a chunk provides
+    /// content for a given slice, all further chunks covering that slice must have
+    /// the same content for it).
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
+    /// assert_eq!(cursor.is_boundary(flags, 0), Ok(true));
+    /// cursor.set_cursor(12);
+    /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
+    /// ```
+    pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> {
+        use tables::grapheme as gr;
+        if self.state == GraphemeState::Break {
+            return Ok(true)
+        }
+        if self.state == GraphemeState::NotBreak {
+            return Ok(false)
+        }
+        if self.offset < chunk_start || self.offset >= chunk_start + chunk.len() {
+            if self.offset > chunk_start + chunk.len() || self.cat_after.is_none() {
+                return Err(GraphemeIncomplete::InvalidOffset)
+            }
+        }
+        if let Some(pre_context_offset) = self.pre_context_offset {
+            return Err(GraphemeIncomplete::PreContext(pre_context_offset));
+        }
+        let offset_in_chunk = self.offset - chunk_start;
+        if self.cat_after.is_none() {
+            let ch = chunk[offset_in_chunk..].chars().next().unwrap();
+            self.cat_after = Some(gr::grapheme_category(ch));
+        }
+        if self.offset == chunk_start {
+            let mut need_pre_context = true;
+            match self.cat_after.unwrap() {
+                gr::GC_Regional_Indicator => self.state = GraphemeState::Regional,
+                gr::GC_E_Modifier => self.state = GraphemeState::Emoji,
+                _ => need_pre_context = self.cat_before.is_none(),
+            }
+            if need_pre_context {
+                self.pre_context_offset = Some(chunk_start);
+                return Err(GraphemeIncomplete::PreContext(chunk_start));
+            }
+        }
+        if self.cat_before.is_none() {
+            let ch = chunk[..offset_in_chunk].chars().rev().next().unwrap();
+            self.cat_before = Some(gr::grapheme_category(ch));
+        }
+        match check_pair(self.cat_before.unwrap(), self.cat_after.unwrap()) {
+            PairResult::NotBreak => return self.decision(false),
+            PairResult::Break => return self.decision(true),
+            PairResult::Extended => {
+                let is_extended = self.is_extended;
+                return self.decision(!is_extended);
+            }
+            PairResult::Regional => {
+                if let Some(ris_count) = self.ris_count {
+                    return self.decision((ris_count % 2) == 0);
+                }
+                self.handle_regional(&chunk[..offset_in_chunk], chunk_start);
+                self.is_boundary_result()
+            }
+            PairResult::Emoji => {
+                self.handle_emoji(&chunk[..offset_in_chunk], chunk_start);
+                self.is_boundary_result()
+            }
+        }
+    }
+
+    /// Find the next boundary after the current cursor position. Only a part of
+    /// the string need be supplied. If the chunk is incomplete, then this
+    /// method might return `GraphemeIncomplete::PreContext` or
+    /// `GraphemeIncomplete::NextChunk`. In the former case, the caller should
+    /// call `provide_context` with the requested chunk, then retry. In the
+    /// latter case, the caller should provide the chunk following the one
+    /// given, then retry.
+    ///
+    /// See `is_boundary` for expectations on the provided chunk.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(16)));
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(None));
+    /// ```
+    ///
+    /// And an example that uses partial strings:
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+    /// let s = "abcd";
+    /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
+    /// assert_eq!(cursor.next_boundary(&s[..2], 0), Ok(Some(1)));
+    /// assert_eq!(cursor.next_boundary(&s[..2], 0), Err(GraphemeIncomplete::NextChunk));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(2)));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(3)));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4)));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
+    /// ```
+    pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
+        use tables::grapheme as gr;
+        if self.offset == self.len {
+            return Ok(None);
+        }
+        let mut iter = chunk[self.offset - chunk_start..].chars();
+        let mut ch = iter.next().unwrap();
+        loop {
+            if self.resuming {
+                if self.cat_after.is_none() {
+                    self.cat_after = Some(gr::grapheme_category(ch));
+                }
+            } else {
+                self.offset += ch.len_utf8();
+                self.state = GraphemeState::Unknown;
+                self.cat_before = self.cat_after.take();
+                if self.cat_before.is_none() {
+                    self.cat_before = Some(gr::grapheme_category(ch));
+                }
+                if self.cat_before.unwrap() == GraphemeCat::GC_Regional_Indicator {
+                    self.ris_count = self.ris_count.map(|c| c + 1);
+                } else {
+                    self.ris_count = Some(0);
+                }
+                if let Some(next_ch) = iter.next() {
+                    ch = next_ch;
+                    self.cat_after = Some(gr::grapheme_category(ch));
+                } else if self.offset == self.len {
+                    self.decide(true);
+                } else {
+                    self.resuming = true;
+                    return Err(GraphemeIncomplete::NextChunk);
+                }
+            }
+            self.resuming = true;
+            if self.is_boundary(chunk, chunk_start)? {
+                self.resuming = false;
+                return Ok(Some(self.offset));
+            }
+            self.resuming = false;
+        }
+    }
+
+    /// Find the previous boundary after the current cursor position. Only a part
+    /// of the string need be supplied. If the chunk is incomplete, then this
+    /// method might return `GraphemeIncomplete::PreContext` or
+    /// `GraphemeIncomplete::PrevChunk`. In the former case, the caller should
+    /// call `provide_context` with the requested chunk, then retry. In the
+    /// latter case, the caller should provide the chunk preceding the one
+    /// given, then retry.
+    ///
+    /// See `is_boundary` for expectations on the provided chunk.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(12, flags.len(), false);
+    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(8)));
+    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(0)));
+    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(None));
+    /// ```
+    ///
+    /// And an example that uses partial strings (note the exact return is not
+    /// guaranteed, and may be `PrevChunk` or `PreContext` arbitrarily):
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+    /// let s = "abcd";
+    /// let mut cursor = GraphemeCursor::new(4, s.len(), false);
+    /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Ok(Some(3)));
+    /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Err(GraphemeIncomplete::PrevChunk));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(2)));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(1)));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0)));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
+    /// ```
+    pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
+        use tables::grapheme as gr;
+        if self.offset == 0 {
+            return Ok(None);
+        }
+        if self.offset == chunk_start {
+            return Err(GraphemeIncomplete::PrevChunk);
+        }
+        let mut iter = chunk[..self.offset - chunk_start].chars().rev();
+        let mut ch = iter.next().unwrap();
+        loop {
+            if self.offset == chunk_start {
+                self.resuming = true;
+                return Err(GraphemeIncomplete::PrevChunk);
+            }
+            if self.resuming {
+                self.cat_before = Some(gr::grapheme_category(ch));
+            } else {
+                self.offset -= ch.len_utf8();
+                self.cat_after = self.cat_before.take();
+                self.state = GraphemeState::Unknown;
+                if let Some(ris_count) = self.ris_count {
+                    self.ris_count = if ris_count > 0 { Some(ris_count - 1) } else { None };
+                }
+                if let Some(prev_ch) = iter.next() {
+                    ch = prev_ch;
+                    self.cat_before = Some(gr::grapheme_category(ch));
+                } else if self.offset == 0 {
+                    self.decide(true);
+                } else {
+                    self.resuming = true;
+                    self.cat_after = Some(gr::grapheme_category(ch));
+                    return Err(GraphemeIncomplete::PrevChunk);
+                }
+            }
+            self.resuming = true;
+            if self.is_boundary(chunk, chunk_start)? {
+                self.resuming = false;
+                return Ok(Some(self.offset));
+            }
+            self.resuming = false;
+        }
+    }
+}
+
+#[test]
+fn test_grapheme_cursor_ris_precontext() {
+    let s = "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}";
+    let mut c = GraphemeCursor::new(8, s.len(), true);
+    assert_eq!(c.is_boundary(&s[4..], 4), Err(GraphemeIncomplete::PreContext(4)));
+    c.provide_context(&s[..4], 0);
+    assert_eq!(c.is_boundary(&s[4..], 4), Ok(true));
+}
+
+#[test]
+fn test_grapheme_cursor_chunk_start_require_precontext() {
+    let s = "\r\n";
+    let mut c = GraphemeCursor::new(1, s.len(), true);
+    assert_eq!(c.is_boundary(&s[1..], 1), Err(GraphemeIncomplete::PreContext(1)));
+    c.provide_context(&s[..1], 0);
+    assert_eq!(c.is_boundary(&s[1..], 1), Ok(false));
+}
+
+#[test]
+fn test_grapheme_cursor_prev_boundary() {
+    let s = "abcd";
+    let mut c = GraphemeCursor::new(3, s.len(), true);
+    assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
+    assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(2)));
+}
+
+#[test]
+fn test_grapheme_cursor_prev_boundary_chunk_start() {
+    let s = "abcd";
+    let mut c = GraphemeCursor::new(2, s.len(), true);
+    assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
+    assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(1)));
+}
--- a/third_party/rust/unicode-segmentation/src/lib.rs
+++ b/third_party/rust/unicode-segmentation/src/lib.rs
@@ -59,16 +59,17 @@
 #[macro_use]
 extern crate std;
 
 #[cfg(test)]
 #[macro_use]
 extern crate quickcheck;
 
 pub use grapheme::{Graphemes, GraphemeIndices};
+pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
 pub use tables::UNICODE_VERSION;
 pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords};
 
 mod grapheme;
 mod tables;
 mod word;
 
 #[cfg(test)]
--- a/third_party/rust/unicode-segmentation/src/tables.rs
+++ b/third_party/rust/unicode-segmentation/src/tables.rs
@@ -291,23 +291,25 @@ pub mod grapheme {
     use core::result::Result::{Ok, Err};
 
     pub use self::GraphemeCat::*;
 
     #[allow(non_camel_case_types)]
     #[derive(Clone, Copy, PartialEq, Eq)]
     pub enum GraphemeCat {
         GC_Any,
+        GC_CR,
         GC_Control,
         GC_E_Base,
         GC_E_Base_GAZ,
         GC_E_Modifier,
         GC_Extend,
         GC_Glue_After_Zwj,
         GC_L,
+        GC_LF,
         GC_LV,
         GC_LVT,
         GC_Prepend,
         GC_Regional_Indicator,
         GC_SpacingMark,
         GC_T,
         GC_V,
         GC_ZWJ,
@@ -328,81 +330,83 @@ pub mod grapheme {
         }
     }
 
     pub fn grapheme_category(c: char) -> GraphemeCat {
         bsearch_range_value_table(c, grapheme_cat_table)
     }
 
     const grapheme_cat_table: &'static [(char, char, GraphemeCat)] = &[
-        ('\u{0}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}', GC_Control), ('\u{ad}', '\u{ad}',
-        GC_Control), ('\u{300}', '\u{36f}', GC_Extend), ('\u{483}', '\u{489}', GC_Extend),
-        ('\u{591}', '\u{5bd}', GC_Extend), ('\u{5bf}', '\u{5bf}', GC_Extend), ('\u{5c1}', '\u{5c2}',
-        GC_Extend), ('\u{5c4}', '\u{5c5}', GC_Extend), ('\u{5c7}', '\u{5c7}', GC_Extend),
-        ('\u{600}', '\u{605}', GC_Prepend), ('\u{610}', '\u{61a}', GC_Extend), ('\u{61c}',
-        '\u{61c}', GC_Control), ('\u{64b}', '\u{65f}', GC_Extend), ('\u{670}', '\u{670}',
-        GC_Extend), ('\u{6d6}', '\u{6dc}', GC_Extend), ('\u{6dd}', '\u{6dd}', GC_Prepend),
-        ('\u{6df}', '\u{6e4}', GC_Extend), ('\u{6e7}', '\u{6e8}', GC_Extend), ('\u{6ea}', '\u{6ed}',
-        GC_Extend), ('\u{70f}', '\u{70f}', GC_Prepend), ('\u{711}', '\u{711}', GC_Extend),
-        ('\u{730}', '\u{74a}', GC_Extend), ('\u{7a6}', '\u{7b0}', GC_Extend), ('\u{7eb}', '\u{7f3}',
-        GC_Extend), ('\u{816}', '\u{819}', GC_Extend), ('\u{81b}', '\u{823}', GC_Extend),
-        ('\u{825}', '\u{827}', GC_Extend), ('\u{829}', '\u{82d}', GC_Extend), ('\u{859}', '\u{85b}',
-        GC_Extend), ('\u{8d4}', '\u{8e1}', GC_Extend), ('\u{8e2}', '\u{8e2}', GC_Prepend),
-        ('\u{8e3}', '\u{902}', GC_Extend), ('\u{903}', '\u{903}', GC_SpacingMark), ('\u{93a}',
-        '\u{93a}', GC_Extend), ('\u{93b}', '\u{93b}', GC_SpacingMark), ('\u{93c}', '\u{93c}',
-        GC_Extend), ('\u{93e}', '\u{940}', GC_SpacingMark), ('\u{941}', '\u{948}', GC_Extend),
-        ('\u{949}', '\u{94c}', GC_SpacingMark), ('\u{94d}', '\u{94d}', GC_Extend), ('\u{94e}',
-        '\u{94f}', GC_SpacingMark), ('\u{951}', '\u{957}', GC_Extend), ('\u{962}', '\u{963}',
-        GC_Extend), ('\u{981}', '\u{981}', GC_Extend), ('\u{982}', '\u{983}', GC_SpacingMark),
-        ('\u{9bc}', '\u{9bc}', GC_Extend), ('\u{9be}', '\u{9be}', GC_Extend), ('\u{9bf}', '\u{9c0}',
-        GC_SpacingMark), ('\u{9c1}', '\u{9c4}', GC_Extend), ('\u{9c7}', '\u{9c8}', GC_SpacingMark),
-        ('\u{9cb}', '\u{9cc}', GC_SpacingMark), ('\u{9cd}', '\u{9cd}', GC_Extend), ('\u{9d7}',
-        '\u{9d7}', GC_Extend), ('\u{9e2}', '\u{9e3}', GC_Extend), ('\u{a01}', '\u{a02}', GC_Extend),
-        ('\u{a03}', '\u{a03}', GC_SpacingMark), ('\u{a3c}', '\u{a3c}', GC_Extend), ('\u{a3e}',
-        '\u{a40}', GC_SpacingMark), ('\u{a41}', '\u{a42}', GC_Extend), ('\u{a47}', '\u{a48}',
-        GC_Extend), ('\u{a4b}', '\u{a4d}', GC_Extend), ('\u{a51}', '\u{a51}', GC_Extend),
-        ('\u{a70}', '\u{a71}', GC_Extend), ('\u{a75}', '\u{a75}', GC_Extend), ('\u{a81}', '\u{a82}',
-        GC_Extend), ('\u{a83}', '\u{a83}', GC_SpacingMark), ('\u{abc}', '\u{abc}', GC_Extend),
-        ('\u{abe}', '\u{ac0}', GC_SpacingMark), ('\u{ac1}', '\u{ac5}', GC_Extend), ('\u{ac7}',
-        '\u{ac8}', GC_Extend), ('\u{ac9}', '\u{ac9}', GC_SpacingMark), ('\u{acb}', '\u{acc}',
-        GC_SpacingMark), ('\u{acd}', '\u{acd}', GC_Extend), ('\u{ae2}', '\u{ae3}', GC_Extend),
-        ('\u{b01}', '\u{b01}', GC_Extend), ('\u{b02}', '\u{b03}', GC_SpacingMark), ('\u{b3c}',
-        '\u{b3c}', GC_Extend), ('\u{b3e}', '\u{b3f}', GC_Extend), ('\u{b40}', '\u{b40}',
-        GC_SpacingMark), ('\u{b41}', '\u{b44}', GC_Extend), ('\u{b47}', '\u{b48}', GC_SpacingMark),
-        ('\u{b4b}', '\u{b4c}', GC_SpacingMark), ('\u{b4d}', '\u{b4d}', GC_Extend), ('\u{b56}',
-        '\u{b57}', GC_Extend), ('\u{b62}', '\u{b63}', GC_Extend), ('\u{b82}', '\u{b82}', GC_Extend),
-        ('\u{bbe}', '\u{bbe}', GC_Extend), ('\u{bbf}', '\u{bbf}', GC_SpacingMark), ('\u{bc0}',
-        '\u{bc0}', GC_Extend), ('\u{bc1}', '\u{bc2}', GC_SpacingMark), ('\u{bc6}', '\u{bc8}',
-        GC_SpacingMark), ('\u{bca}', '\u{bcc}', GC_SpacingMark), ('\u{bcd}', '\u{bcd}', GC_Extend),
-        ('\u{bd7}', '\u{bd7}', GC_Extend), ('\u{c00}', '\u{c00}', GC_Extend), ('\u{c01}', '\u{c03}',
-        GC_SpacingMark), ('\u{c3e}', '\u{c40}', GC_Extend), ('\u{c41}', '\u{c44}', GC_SpacingMark),
-        ('\u{c46}', '\u{c48}', GC_Extend), ('\u{c4a}', '\u{c4d}', GC_Extend), ('\u{c55}', '\u{c56}',
-        GC_Extend), ('\u{c62}', '\u{c63}', GC_Extend), ('\u{c81}', '\u{c81}', GC_Extend),
-        ('\u{c82}', '\u{c83}', GC_SpacingMark), ('\u{cbc}', '\u{cbc}', GC_Extend), ('\u{cbe}',
-        '\u{cbe}', GC_SpacingMark), ('\u{cbf}', '\u{cbf}', GC_Extend), ('\u{cc0}', '\u{cc1}',
-        GC_SpacingMark), ('\u{cc2}', '\u{cc2}', GC_Extend), ('\u{cc3}', '\u{cc4}', GC_SpacingMark),
-        ('\u{cc6}', '\u{cc6}', GC_Extend), ('\u{cc7}', '\u{cc8}', GC_SpacingMark), ('\u{cca}',
-        '\u{ccb}', GC_SpacingMark), ('\u{ccc}', '\u{ccd}', GC_Extend), ('\u{cd5}', '\u{cd6}',
-        GC_Extend), ('\u{ce2}', '\u{ce3}', GC_Extend), ('\u{d01}', '\u{d01}', GC_Extend),
-        ('\u{d02}', '\u{d03}', GC_SpacingMark), ('\u{d3e}', '\u{d3e}', GC_Extend), ('\u{d3f}',
-        '\u{d40}', GC_SpacingMark), ('\u{d41}', '\u{d44}', GC_Extend), ('\u{d46}', '\u{d48}',
-        GC_SpacingMark), ('\u{d4a}', '\u{d4c}', GC_SpacingMark), ('\u{d4d}', '\u{d4d}', GC_Extend),
-        ('\u{d4e}', '\u{d4e}', GC_Prepend), ('\u{d57}', '\u{d57}', GC_Extend), ('\u{d62}',
-        '\u{d63}', GC_Extend), ('\u{d82}', '\u{d83}', GC_SpacingMark), ('\u{dca}', '\u{dca}',
-        GC_Extend), ('\u{dcf}', '\u{dcf}', GC_Extend), ('\u{dd0}', '\u{dd1}', GC_SpacingMark),
-        ('\u{dd2}', '\u{dd4}', GC_Extend), ('\u{dd6}', '\u{dd6}', GC_Extend), ('\u{dd8}', '\u{dde}',
-        GC_SpacingMark), ('\u{ddf}', '\u{ddf}', GC_Extend), ('\u{df2}', '\u{df3}', GC_SpacingMark),
-        ('\u{e31}', '\u{e31}', GC_Extend), ('\u{e33}', '\u{e33}', GC_SpacingMark), ('\u{e34}',
-        '\u{e3a}', GC_Extend), ('\u{e47}', '\u{e4e}', GC_Extend), ('\u{eb1}', '\u{eb1}', GC_Extend),
-        ('\u{eb3}', '\u{eb3}', GC_SpacingMark), ('\u{eb4}', '\u{eb9}', GC_Extend), ('\u{ebb}',
-        '\u{ebc}', GC_Extend), ('\u{ec8}', '\u{ecd}', GC_Extend), ('\u{f18}', '\u{f19}', GC_Extend),
-        ('\u{f35}', '\u{f35}', GC_Extend), ('\u{f37}', '\u{f37}', GC_Extend), ('\u{f39}', '\u{f39}',
-        GC_Extend), ('\u{f3e}', '\u{f3f}', GC_SpacingMark), ('\u{f71}', '\u{f7e}', GC_Extend),
-        ('\u{f7f}', '\u{f7f}', GC_SpacingMark), ('\u{f80}', '\u{f84}', GC_Extend), ('\u{f86}',
-        '\u{f87}', GC_Extend), ('\u{f8d}', '\u{f97}', GC_Extend), ('\u{f99}', '\u{fbc}', GC_Extend),
+        ('\u{0}', '\u{9}', GC_Control), ('\u{a}', '\u{a}', GC_LF), ('\u{b}', '\u{c}', GC_Control),
+        ('\u{d}', '\u{d}', GC_CR), ('\u{e}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}',
+        GC_Control), ('\u{ad}', '\u{ad}', GC_Control), ('\u{300}', '\u{36f}', GC_Extend),
+        ('\u{483}', '\u{489}', GC_Extend), ('\u{591}', '\u{5bd}', GC_Extend), ('\u{5bf}', '\u{5bf}',
+        GC_Extend), ('\u{5c1}', '\u{5c2}', GC_Extend), ('\u{5c4}', '\u{5c5}', GC_Extend),
+        ('\u{5c7}', '\u{5c7}', GC_Extend), ('\u{600}', '\u{605}', GC_Prepend), ('\u{610}',
+        '\u{61a}', GC_Extend), ('\u{61c}', '\u{61c}', GC_Control), ('\u{64b}', '\u{65f}',
+        GC_Extend), ('\u{670}', '\u{670}', GC_Extend), ('\u{6d6}', '\u{6dc}', GC_Extend),
+        ('\u{6dd}', '\u{6dd}', GC_Prepend), ('\u{6df}', '\u{6e4}', GC_Extend), ('\u{6e7}',
+        '\u{6e8}', GC_Extend), ('\u{6ea}', '\u{6ed}', GC_Extend), ('\u{70f}', '\u{70f}',
+        GC_Prepend), ('\u{711}', '\u{711}', GC_Extend), ('\u{730}', '\u{74a}', GC_Extend),
+        ('\u{7a6}', '\u{7b0}', GC_Extend), ('\u{7eb}', '\u{7f3}', GC_Extend), ('\u{816}', '\u{819}',
+        GC_Extend), ('\u{81b}', '\u{823}', GC_Extend), ('\u{825}', '\u{827}', GC_Extend),
+        ('\u{829}', '\u{82d}', GC_Extend), ('\u{859}', '\u{85b}', GC_Extend), ('\u{8d4}', '\u{8e1}',
+        GC_Extend), ('\u{8e2}', '\u{8e2}', GC_Prepend), ('\u{8e3}', '\u{902}', GC_Extend),
+        ('\u{903}', '\u{903}', GC_SpacingMark), ('\u{93a}', '\u{93a}', GC_Extend), ('\u{93b}',
+        '\u{93b}', GC_SpacingMark), ('\u{93c}', '\u{93c}', GC_Extend), ('\u{93e}', '\u{940}',
+        GC_SpacingMark), ('\u{941}', '\u{948}', GC_Extend), ('\u{949}', '\u{94c}', GC_SpacingMark),
+        ('\u{94d}', '\u{94d}', GC_Extend), ('\u{94e}', '\u{94f}', GC_SpacingMark), ('\u{951}',
+        '\u{957}', GC_Extend), ('\u{962}', '\u{963}', GC_Extend), ('\u{981}', '\u{981}', GC_Extend),
+        ('\u{982}', '\u{983}', GC_SpacingMark), ('\u{9bc}', '\u{9bc}', GC_Extend), ('\u{9be}',
+        '\u{9be}', GC_Extend), ('\u{9bf}', '\u{9c0}', GC_SpacingMark), ('\u{9c1}', '\u{9c4}',
+        GC_Extend), ('\u{9c7}', '\u{9c8}', GC_SpacingMark), ('\u{9cb}', '\u{9cc}', GC_SpacingMark),
+        ('\u{9cd}', '\u{9cd}', GC_Extend), ('\u{9d7}', '\u{9d7}', GC_Extend), ('\u{9e2}', '\u{9e3}',
+        GC_Extend), ('\u{a01}', '\u{a02}', GC_Extend), ('\u{a03}', '\u{a03}', GC_SpacingMark),
+        ('\u{a3c}', '\u{a3c}', GC_Extend), ('\u{a3e}', '\u{a40}', GC_SpacingMark), ('\u{a41}',
+        '\u{a42}', GC_Extend), ('\u{a47}', '\u{a48}', GC_Extend), ('\u{a4b}', '\u{a4d}', GC_Extend),
+        ('\u{a51}', '\u{a51}', GC_Extend), ('\u{a70}', '\u{a71}', GC_Extend), ('\u{a75}', '\u{a75}',
+        GC_Extend), ('\u{a81}', '\u{a82}', GC_Extend), ('\u{a83}', '\u{a83}', GC_SpacingMark),
+        ('\u{abc}', '\u{abc}', GC_Extend), ('\u{abe}', '\u{ac0}', GC_SpacingMark), ('\u{ac1}',
+        '\u{ac5}', GC_Extend), ('\u{ac7}', '\u{ac8}', GC_Extend), ('\u{ac9}', '\u{ac9}',
+        GC_SpacingMark), ('\u{acb}', '\u{acc}', GC_SpacingMark), ('\u{acd}', '\u{acd}', GC_Extend),
+        ('\u{ae2}', '\u{ae3}', GC_Extend), ('\u{b01}', '\u{b01}', GC_Extend), ('\u{b02}', '\u{b03}',
+        GC_SpacingMark), ('\u{b3c}', '\u{b3c}', GC_Extend), ('\u{b3e}', '\u{b3f}', GC_Extend),
+        ('\u{b40}', '\u{b40}', GC_SpacingMark), ('\u{b41}', '\u{b44}', GC_Extend), ('\u{b47}',
+        '\u{b48}', GC_SpacingMark), ('\u{b4b}', '\u{b4c}', GC_SpacingMark), ('\u{b4d}', '\u{b4d}',
+        GC_Extend), ('\u{b56}', '\u{b57}', GC_Extend), ('\u{b62}', '\u{b63}', GC_Extend),
+        ('\u{b82}', '\u{b82}', GC_Extend), ('\u{bbe}', '\u{bbe}', GC_Extend), ('\u{bbf}', '\u{bbf}',
+        GC_SpacingMark), ('\u{bc0}', '\u{bc0}', GC_Extend), ('\u{bc1}', '\u{bc2}', GC_SpacingMark),
+        ('\u{bc6}', '\u{bc8}', GC_SpacingMark), ('\u{bca}', '\u{bcc}', GC_SpacingMark), ('\u{bcd}',
+        '\u{bcd}', GC_Extend), ('\u{bd7}', '\u{bd7}', GC_Extend), ('\u{c00}', '\u{c00}', GC_Extend),
+        ('\u{c01}', '\u{c03}', GC_SpacingMark), ('\u{c3e}', '\u{c40}', GC_Extend), ('\u{c41}',
+        '\u{c44}', GC_SpacingMark), ('\u{c46}', '\u{c48}', GC_Extend), ('\u{c4a}', '\u{c4d}',
+        GC_Extend), ('\u{c55}', '\u{c56}', GC_Extend), ('\u{c62}', '\u{c63}', GC_Extend),
+        ('\u{c81}', '\u{c81}', GC_Extend), ('\u{c82}', '\u{c83}', GC_SpacingMark), ('\u{cbc}',
+        '\u{cbc}', GC_Extend), ('\u{cbe}', '\u{cbe}', GC_SpacingMark), ('\u{cbf}', '\u{cbf}',
+        GC_Extend), ('\u{cc0}', '\u{cc1}', GC_SpacingMark), ('\u{cc2}', '\u{cc2}', GC_Extend),
+        ('\u{cc3}', '\u{cc4}', GC_SpacingMark), ('\u{cc6}', '\u{cc6}', GC_Extend), ('\u{cc7}',
+        '\u{cc8}', GC_SpacingMark), ('\u{cca}', '\u{ccb}', GC_SpacingMark), ('\u{ccc}', '\u{ccd}',
+        GC_Extend), ('\u{cd5}', '\u{cd6}', GC_Extend), ('\u{ce2}', '\u{ce3}', GC_Extend),
+        ('\u{d01}', '\u{d01}', GC_Extend), ('\u{d02}', '\u{d03}', GC_SpacingMark), ('\u{d3e}',
+        '\u{d3e}', GC_Extend), ('\u{d3f}', '\u{d40}', GC_SpacingMark), ('\u{d41}', '\u{d44}',
+        GC_Extend), ('\u{d46}', '\u{d48}', GC_SpacingMark), ('\u{d4a}', '\u{d4c}', GC_SpacingMark),
+        ('\u{d4d}', '\u{d4d}', GC_Extend), ('\u{d4e}', '\u{d4e}', GC_Prepend), ('\u{d57}',
+        '\u{d57}', GC_Extend), ('\u{d62}', '\u{d63}', GC_Extend), ('\u{d82}', '\u{d83}',
+        GC_SpacingMark), ('\u{dca}', '\u{dca}', GC_Extend), ('\u{dcf}', '\u{dcf}', GC_Extend),
+        ('\u{dd0}', '\u{dd1}', GC_SpacingMark), ('\u{dd2}', '\u{dd4}', GC_Extend), ('\u{dd6}',
+        '\u{dd6}', GC_Extend), ('\u{dd8}', '\u{dde}', GC_SpacingMark), ('\u{ddf}', '\u{ddf}',
+        GC_Extend), ('\u{df2}', '\u{df3}', GC_SpacingMark), ('\u{e31}', '\u{e31}', GC_Extend),
+        ('\u{e33}', '\u{e33}', GC_SpacingMark), ('\u{e34}', '\u{e3a}', GC_Extend), ('\u{e47}',
+        '\u{e4e}', GC_Extend), ('\u{eb1}', '\u{eb1}', GC_Extend), ('\u{eb3}', '\u{eb3}',
+        GC_SpacingMark), ('\u{eb4}', '\u{eb9}', GC_Extend), ('\u{ebb}', '\u{ebc}', GC_Extend),
+        ('\u{ec8}', '\u{ecd}', GC_Extend), ('\u{f18}', '\u{f19}', GC_Extend), ('\u{f35}', '\u{f35}',
+        GC_Extend), ('\u{f37}', '\u{f37}', GC_Extend), ('\u{f39}', '\u{f39}', GC_Extend),
+        ('\u{f3e}', '\u{f3f}', GC_SpacingMark), ('\u{f71}', '\u{f7e}', GC_Extend), ('\u{f7f}',
+        '\u{f7f}', GC_SpacingMark), ('\u{f80}', '\u{f84}', GC_Extend), ('\u{f86}', '\u{f87}',
+        GC_Extend), ('\u{f8d}', '\u{f97}', GC_Extend), ('\u{f99}', '\u{fbc}', GC_Extend),
         ('\u{fc6}', '\u{fc6}', GC_Extend), ('\u{102d}', '\u{1030}', GC_Extend), ('\u{1031}',
         '\u{1031}', GC_SpacingMark), ('\u{1032}', '\u{1037}', GC_Extend), ('\u{1039}', '\u{103a}',
         GC_Extend), ('\u{103b}', '\u{103c}', GC_SpacingMark), ('\u{103d}', '\u{103e}', GC_Extend),
         ('\u{1056}', '\u{1057}', GC_SpacingMark), ('\u{1058}', '\u{1059}', GC_Extend), ('\u{105e}',
         '\u{1060}', GC_Extend), ('\u{1071}', '\u{1074}', GC_Extend), ('\u{1082}', '\u{1082}',
         GC_Extend), ('\u{1084}', '\u{1084}', GC_SpacingMark), ('\u{1085}', '\u{1086}', GC_Extend),
         ('\u{108d}', '\u{108d}', GC_Extend), ('\u{109d}', '\u{109d}', GC_Extend), ('\u{1100}',
         '\u{115f}', GC_L), ('\u{1160}', '\u{11a7}', GC_V), ('\u{11a8}', '\u{11ff}', GC_T),
@@ -863,17 +867,17 @@ pub mod grapheme {
 }
 
 pub mod word {
     use core::result::Result::{Ok, Err};
 
     pub use self::WordCat::*;
 
     #[allow(non_camel_case_types)]
-    #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+    #[derive(Clone, Copy, PartialEq, Eq)]
     pub enum WordCat {
         WC_ALetter,
         WC_Any,
         WC_CR,
         WC_Double_Quote,
         WC_E_Base,
         WC_E_Base_GAZ,
         WC_E_Modifier,
--- a/third_party/rust/unicode-segmentation/src/test.rs
+++ b/third_party/rust/unicode-segmentation/src/test.rs
@@ -29,16 +29,20 @@ fn test_graphemes() {
          &["\u{600}\u{20}", "\u{20}"],
          &["\u{600}", "\u{20}", "\u{20}"]),
     ];
 
     pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
         // family emoji (more than two emoji joined by ZWJ)
         ("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
          &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]),
+        // cartwheel emoji followed by two fitzpatrick skin tone modifiers
+        // (test case from issue #19)
+        ("\u{1F938}\u{1F3FE}\u{1F3FE}",
+         &["\u{1F938}\u{1F3FE}", "\u{1F3FE}"]),
     ];
 
     for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
         // test forward iterator
         assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
         assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
 
         // test reverse iterator