Bug 1441204 - Upgrade unicode-segmentation from 1.1.0 to 1.2.1. r=maja_zf
authorAndreas Tolfsen <ato@sny.no>
Thu, 14 Jun 2018 13:01:20 -0700
changeset 476953 66765efe2e494b3d59386ba8a3f65b665a0e55c7
parent 476952 05e3f0f3268cdad89ea8a4f67cc736153169a4fd
child 476954 371a38d383d5327f5dd056721e650859ffdc90f1
push id9374
push userjlund@mozilla.com
push dateMon, 18 Jun 2018 21:43:20 +0000
treeherdermozilla-beta@160e085dfb0b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmaja_zf
bugs1441204
milestone62.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1441204 - Upgrade unicode-segmentation from 1.1.0 to 1.2.1. r=maja_zf MozReview-Commit-ID: 1KUU2U7AVz4
Cargo.lock
testing/webdriver/Cargo.toml
third_party/rust/unicode-segmentation/.cargo-checksum.json
third_party/rust/unicode-segmentation/Cargo.toml
third_party/rust/unicode-segmentation/README.md
third_party/rust/unicode-segmentation/scripts/unicode.py
third_party/rust/unicode-segmentation/src/grapheme.rs
third_party/rust/unicode-segmentation/src/lib.rs
third_party/rust/unicode-segmentation/src/tables.rs
third_party/rust/unicode-segmentation/src/test.rs
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1962,17 +1962,17 @@ dependencies = [
  "smallbitvec 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "smallvec 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "style_derive 0.0.1",
  "style_traits 0.0.1",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
  "toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "uluru 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "style_derive"
 version = "0.0.1"
 dependencies = [
@@ -2223,17 +2223,17 @@ dependencies = [
 
 [[package]]
 name = "unicode-normalization"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "unicode-segmentation"
-version = "1.1.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "unicode-width"
 version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
@@ -2307,17 +2307,17 @@ name = "webdriver"
 version = "0.35.1"
 dependencies = [
  "cookie 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webidl"
 version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
@@ -2722,17 +2722,17 @@ dependencies = [
 "checksum toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a7540f4ffc193e0d3c94121edb19b055670d369f77d5804db11ae053a45b6e7e"
 "checksum traitobject 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "efd1f82c56340fdf16f2a953d7bda4f8fdffba13d93b00844c25572110b26079"
 "checksum typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1410f6f91f21d1612654e7cc69193b0334f909dcf2c790c4826254fbb86f8887"
 "checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
 "checksum uluru 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "519130f0ea964ba540a9d8af1373738c2226f1d465eda07e61db29feb5479db9"
 "checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33"
 "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
 "checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f"
-"checksum unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18127285758f0e2c6cf325bb3f3d138a12fee27de4f23e146cd6a179f26c2cf3"
+"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"
 "checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f"
 "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
 "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
 "checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7"
 "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
 "checksum uuid 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)" = "78c590b5bd79ed10aad8fb75f078a59d8db445af6c743e55c4a53227fc01c13f"
 "checksum vcpkg 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9e0a7d8bed3178a8fb112199d466eeca9ed09a14ba8ad67718179b4fd5487d0b"
 "checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c"
--- a/testing/webdriver/Cargo.toml
+++ b/testing/webdriver/Cargo.toml
@@ -11,10 +11,10 @@ license = "MPL-2.0"
 
 [dependencies]
 cookie = { version = "0.10", default-features = false }
 hyper = "0.10"
 log = "0.4"
 regex = "1.0"
 rustc-serialize = "0.3"
 time = "0.1"
-unicode-segmentation = "1.1.0"
+unicode-segmentation = "1.2"
 url = "1"
--- a/third_party/rust/unicode-segmentation/.cargo-checksum.json
+++ b/third_party/rust/unicode-segmentation/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{".travis.yml":"45a9e28a40dc51950223f939e74e09e3792d797db4c43c6a2e7d2ebc82e405e7","COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"5d3ae74bc112f9c3e728e6a2e596a23ee8b2fab7df0e238b09306c2b6d3e59b7","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"32f1640f92c102be60895ceda728dce63820bb13a8a24065fe8a33d69a0e36e2","scripts/unicode.py":"5b4f051b7c934df8579f14051bc65ccda46583b8d216f5d6caceffa010c48a2e","scripts/unicode_gen_breaktests.py":"a8d9c88aec31f4d33b9850e71d497efbc9e6e9268a1b67ce55bd2a45ad552c67","src/grapheme.rs":"d7ad64cf1345d4821e24c21cb3186ba3a923955b6f1aeefba88abee5dcef6c3f","src/lib.rs":"62047530c83a33e756dd3b13f19cfc95c15b6f26c28e76949ff910f917816cf0","src/tables.rs":"9d08e3f87c25b165b7902b7a0fb1682ac3a4003aa598ed9d32b465e8911f33e3","src/test.rs":"1e203fdc5ccdf5fe6f3da23b307c807cafa4b2250f55698f4ba385978f23e80c","src/testdata.rs":"33d4c58cf9416b342f31b1139b428676deecec34b0994c40ebe0c5cf4a639c79","src/word.rs":"eefecfcf85554fc529a44892fa7e3be50377b24a095610b7feb5f184c82818b9"},"package":"18127285758f0e2c6cf325bb3f3d138a12fee27de4f23e146cd6a179f26c2cf3"}
\ No newline at end of file
+{"files":{".travis.yml":"45a9e28a40dc51950223f939e74e09e3792d797db4c43c6a2e7d2ebc82e405e7","COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"f5cdd71581b22dd5aedcf9cc9d5b817cfc6df3163185b069b668a10c21912979","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"9fe1c77a06b34cbe0dca1bb987e7551a210d37287f517014345402c74d25eb1f","scripts/unicode.py":"4bbbbb274c358fa46eea130e99c7f77c1c2c4bb4f60f7380e77ac93e47a20143","scripts/unicode_gen_breaktests.py":"a8d9c88aec31f4d33b9850e71d497efbc9e6e9268a1b67ce55bd2a45ad552c67","src/grapheme.rs":"5b86f40dff85a539d80c5bdac496311295a5358a2558032111d4bec39767f15f","src/lib.rs":"445d37ea35cd787d8b5f43f853b74e4e21e374d65e78e45dbd77122bedf96b31","src/tables.rs":"a8fc42a3f3d788bf3154ee4012e9e90c1f311eba67ede3ff1d69fd4eb8cefe74","src/test.rs":"58d5d80e17df64a95c86728416afac0a4744adf5020bbbc053b2514ddeecd2da","src/testdata.rs":"33d4c58cf9416b342f31b1139b428676deecec34b0994c40ebe0c5cf4a639c79","src/word.rs":"eefecfcf85554fc529a44892fa7e3be50377b24a095610b7feb5f184c82818b9"},"package":"aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"}
\ No newline at end of file
--- a/third_party/rust/unicode-segmentation/Cargo.toml
+++ b/third_party/rust/unicode-segmentation/Cargo.toml
@@ -1,25 +1,29 @@
-[package]
-
-name = "unicode-segmentation"
-version = "1.1.0"
-authors = ["kwantam <kwantam@gmail.com>"]
-
-homepage = "https://github.com/unicode-rs/unicode-segmentation"
-repository = "https://github.com/unicode-rs/unicode-segmentation"
-documentation = "https://unicode-rs.github.io/unicode-segmentation"
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
 
-license = "MIT/Apache-2.0"
-keywords = ["text", "unicode", "grapheme", "word", "boundary"]
+[package]
+name = "unicode-segmentation"
+version = "1.2.1"
+authors = ["kwantam <kwantam@gmail.com>"]
+exclude = ["target/*", "Cargo.lock", "scripts/tmp", "*.txt"]
+description = "This crate provides Grapheme Cluster and Word boundaries\naccording to Unicode Standard Annex #29 rules.\n"
+homepage = "https://github.com/unicode-rs/unicode-segmentation"
+documentation = "https://unicode-rs.github.io/unicode-segmentation"
 readme = "README.md"
-description = """
-This crate provides Grapheme Cluster and Word boundaries
-according to Unicode Standard Annex #29 rules.
-"""
-
-exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "*.txt" ]
+keywords = ["text", "unicode", "grapheme", "word", "boundary"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/unicode-rs/unicode-segmentation"
+[dev-dependencies.quickcheck]
+version = "0.4"
 
 [features]
-no_std = [] # This is a no-op, preserved for backward compatibility only.
-
-[dev-dependencies]
-quickcheck = "0.4"
+no_std = []
--- a/third_party/rust/unicode-segmentation/README.md
+++ b/third_party/rust/unicode-segmentation/README.md
@@ -40,16 +40,21 @@ to your `Cargo.toml`:
 
 ```toml
 [dependencies]
 unicode-segmentation = "1.1.0"
 ```
 
 # Change Log
 
+## 1.2.0
+
+* New `GraphemeCursor` API allows random access and bidirectional iteration.
+* Fixed incorrect splitting of certain emoji modifier sequences.
+
 ## 1.1.0
 
 * Add `as_str` methods to the iterator types.
 
 ## 1.0.3
 
 * Code cleanup and additional tests.
 
--- a/third_party/rust/unicode-segmentation/scripts/unicode.py
+++ b/third_party/rust/unicode-segmentation/scripts/unicode.py
@@ -325,31 +325,23 @@ pub const UNICODE_VERSION: (u64, u64, u6
                                   ("derived_property", derived, ["Alphabetic"]):
             emit_property_module(rf, name, cat, pfuns)
 
         ### grapheme cluster module
         # from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values
         grapheme_cats = load_properties("auxiliary/GraphemeBreakProperty.txt", [])
 
         # Control
-        #  Note 1:
+        #  Note:
         # This category also includes Cs (surrogate codepoints), but Rust's `char`s are
         # Unicode Scalar Values only, and surrogates are thus invalid `char`s.
         # Thus, we have to remove Cs from the Control category
-        #  Note 2:
-        # 0x0a and 0x0d (CR and LF) are not in the Control category for Graphemes.
-        # However, the Graphemes iterator treats these as a special case, so they
-        # should be included in grapheme_cats["Control"] for our implementation.
         grapheme_cats["Control"] = group_cat(list(
-            (set(ungroup_cat(grapheme_cats["Control"]))
-             | set(ungroup_cat(grapheme_cats["CR"]))
-             | set(ungroup_cat(grapheme_cats["LF"])))
+            set(ungroup_cat(grapheme_cats["Control"]))
             - set(ungroup_cat([surrogate_codepoints]))))
-        del(grapheme_cats["CR"])
-        del(grapheme_cats["LF"])
 
         grapheme_table = []
         for cat in grapheme_cats:
             grapheme_table.extend([(x, y, cat) for (x, y) in grapheme_cats[cat]])
         grapheme_table.sort(key=lambda w: w[0])
         emit_break_module(rf, grapheme_table, grapheme_cats.keys(), "grapheme")
         rf.write("\n")
 
--- a/third_party/rust/unicode-segmentation/src/grapheme.rs
+++ b/third_party/rust/unicode-segmentation/src/grapheme.rs
@@ -59,20 +59,18 @@ impl<'a> DoubleEndedIterator for Graphem
     }
 }
 
 /// External iterator for a string's
 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries).
 #[derive(Clone)]
 pub struct Graphemes<'a> {
     string: &'a str,
-    extended: bool,
-    cat: Option<GraphemeCat>,
-    catb: Option<GraphemeCat>,
-    regional_count_back: Option<usize>,
+    cursor: GraphemeCursor,
+    cursor_back: GraphemeCursor,
 }
 
 impl<'a> Graphemes<'a> {
     #[inline]
     /// View the underlying data (the part yet to be iterated) as a slice of the original string.
     ///
     /// ```rust
     /// # use unicode_segmentation::UnicodeSegmentation;
@@ -80,356 +78,627 @@ impl<'a> Graphemes<'a> {
     /// assert_eq!(iter.as_str(), "abc");
     /// iter.next();
     /// assert_eq!(iter.as_str(), "bc");
     /// iter.next();
     /// iter.next();
     /// assert_eq!(iter.as_str(), "");
     /// ```
     pub fn as_str(&self) -> &'a str {
-        self.string
+        &self.string[self.cursor.cur_cursor()..self.cursor_back.cur_cursor()]
     }
 }
 
-// state machine for cluster boundary rules
-#[derive(Copy,Clone,PartialEq,Eq)]
-enum GraphemeState {
-    Start,
-    FindExtend,
-    HangulL,
-    HangulLV,
-    HangulLVT,
-    Prepend,
-    Regional,
-    Emoji,
-    Zwj,
-}
-
 impl<'a> Iterator for Graphemes<'a> {
     type Item = &'a str;
 
     #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
-        let slen = self.string.len();
+        let slen = self.cursor_back.cur_cursor() - self.cursor.cur_cursor();
         (cmp::min(slen, 1), Some(slen))
     }
 
     #[inline]
     fn next(&mut self) -> Option<&'a str> {
-        use self::GraphemeState::*;
-        use tables::grapheme as gr;
-        if self.string.len() == 0 {
+        let start = self.cursor.cur_cursor();
+        if start == self.cursor_back.cur_cursor() {
             return None;
         }
-
-        let mut take_curr = true;
-        let mut idx = 0;
-        let mut state = Start;
-        let mut cat = gr::GC_Any;
-
-        // caching used by next_back() should be invalidated
-        self.regional_count_back = None;
-        self.catb = None;
-
-        for (curr, ch) in self.string.char_indices() {
-            idx = curr;
-
-            // retrieve cached category, if any
-            // We do this because most of the time we would end up
-            // looking up each character twice.
-            cat = match self.cat {
-                None => gr::grapheme_category(ch),
-                _ => self.cat.take().unwrap()
-            };
-
-            if (state, cat) == (Emoji, gr::GC_Extend) {
-                continue;                   // rule GB10
-            }
-
-            if let Some(new_state) = match cat {
-                gr::GC_Extend => Some(FindExtend),                       // rule GB9
-                gr::GC_SpacingMark if self.extended => Some(FindExtend), // rule GB9a
-                gr::GC_ZWJ => Some(Zwj),                                 // rule GB9/GB11
-                _ => None
-            } {
-                state = new_state;
-                continue;
-            }
-
-            state = match state {
-                Start if '\r' == ch => {
-                    let slen = self.string.len();
-                    let nidx = idx + 1;
-                    if nidx != slen && self.string[nidx..].chars().next().unwrap() == '\n' {
-                        idx = nidx;             // rule GB3
-                    }
-                    break;                      // rule GB4
-                }
-                Start | Prepend => match cat {
-                    gr::GC_Control => {         // rule GB5
-                        take_curr = state == Start;
-                        break;
-                    }
-                    gr::GC_L => HangulL,
-                    gr::GC_LV | gr::GC_V => HangulLV,
-                    gr::GC_LVT | gr::GC_T => HangulLVT,
-                    gr::GC_Prepend if self.extended => Prepend,
-                    gr::GC_Regional_Indicator => Regional,
-                    gr::GC_E_Base | gr::GC_E_Base_GAZ => Emoji,
-                    _ => FindExtend
-                },
-                FindExtend => {         // found non-extending when looking for extending
-                    take_curr = false;
-                    break;
-                },
-                HangulL => match cat {      // rule GB6: L x (L|V|LV|LVT)
-                    gr::GC_L => continue,
-                    gr::GC_LV | gr::GC_V => HangulLV,
-                    gr::GC_LVT => HangulLVT,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLV => match cat {     // rule GB7: (LV|V) x (V|T)
-                    gr::GC_V => continue,
-                    gr::GC_T => HangulLVT,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLVT => match cat {    // rule GB8: (LVT|T) x T
-                    gr::GC_T => continue,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Regional => match cat {     // rule GB12/GB13
-                    gr::GC_Regional_Indicator => FindExtend,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Emoji => match cat {        // rule GB10: (E_Base|EBG) Extend* x E_Modifier
-                    gr::GC_E_Modifier => continue,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Zwj => match cat {          // rule GB11: ZWJ x (GAZ|EBG)
-                    gr::GC_Glue_After_Zwj => continue,
-                    gr::GC_E_Base_GAZ => Emoji,
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-            }
-        }
-
-        self.cat = if take_curr {
-            idx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
-            None
-        } else {
-            Some(cat)
-        };
-
-        let retstr = &self.string[..idx];
-        self.string = &self.string[idx..];
-        Some(retstr)
+        let next = self.cursor.next_boundary(self.string, 0).unwrap().unwrap();
+        Some(&self.string[start..next])
     }
 }
 
 impl<'a> DoubleEndedIterator for Graphemes<'a> {
     #[inline]
     fn next_back(&mut self) -> Option<&'a str> {
-        use self::GraphemeState::*;
-        use tables::grapheme as gr;
-        if self.string.len() == 0 {
+        let end = self.cursor_back.cur_cursor();
+        if end == self.cursor.cur_cursor() {
             return None;
         }
-
-        let mut take_curr = true;
-        let mut idx = self.string.len();
-        let mut previdx = idx;
-        let mut state = Start;
-        let mut cat = gr::GC_Any;
-
-        // caching used by next() should be invalidated
-        self.cat = None;
-
-        'outer: for (curr, ch) in self.string.char_indices().rev() {
-            previdx = idx;
-            idx = curr;
-
-            // cached category, if any
-            cat = match self.catb {
-                None => gr::grapheme_category(ch),
-                _ => self.catb.take().unwrap()
-            };
-
-            // a matching state machine that runs *backwards* across an input string
-            // note that this has some implications for the Hangul matching, since
-            // we now need to know what the rightward letter is:
-            //
-            // Right to left, we have:
-            //      L x L
-            //      V x (L|V|LV)
-            //      T x (V|T|LV|LVT)
-            // HangulL means the letter to the right is L
-            // HangulLV means the letter to the right is V
-            // HangulLVT means the letter to the right is T
-            state = match state {
-                Start if '\n' == ch => {
-                    if idx > 0 && '\r' == self.string[..idx].chars().next_back().unwrap() {
-                        idx -= 1;       // rule GB3
-                    }
-                    break;              // rule GB4
-                },
-                Start | FindExtend => match cat {
-                    gr::GC_Extend => FindExtend,
-                    gr::GC_SpacingMark if self.extended => FindExtend,
-                    gr::GC_ZWJ => FindExtend,
-                    gr::GC_E_Modifier => Emoji,
-                    gr::GC_Glue_After_Zwj | gr::GC_E_Base_GAZ => Zwj,
-                    gr::GC_L | gr::GC_LV | gr::GC_LVT => HangulL,
-                    gr::GC_V => HangulLV,
-                    gr::GC_T => HangulLVT,
-                    gr::GC_Regional_Indicator => Regional,
-                    gr::GC_Control => {
-                        take_curr = Start == state;
-                        break;
-                    },
-                    _ => break
-                },
-                HangulL => match cat {      // char to right is an L
-                    gr::GC_L => continue,               // L x L is the only legal match
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLV => match cat {     // char to right is a V
-                    gr::GC_V => continue,               // V x V, right char is still V
-                    gr::GC_L | gr::GC_LV => HangulL,    // (L|V) x V, right char is now L
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                HangulLVT => match cat {    // char to right is a T
-                    gr::GC_T => continue,               // T x T, right char is still T
-                    gr::GC_V => HangulLV,               // V x T, right char is now V
-                    gr::GC_LV | gr::GC_LVT => HangulL,  // (LV|LVT) x T, right char is now L
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                },
-                Prepend => {
-                    // not used in reverse iteration
-                    unreachable!()
-                },
-                Regional => {               // rule GB12/GB13
-                    // Need to scan backward to find if this is preceded by an odd or even number
-                    // of Regional_Indicator characters.
-                    let count = match self.regional_count_back {
-                        Some(count) => count,
-                        None => self.string[..previdx].chars().rev().take_while(|c| {
-                                    gr::grapheme_category(*c) == gr::GC_Regional_Indicator
-                                }).count()
-                    };
-                    // Cache the count to avoid re-scanning the same chars on the next iteration.
-                    self.regional_count_back = count.checked_sub(1);
-
-                    if count % 2 == 0 {
-                        take_curr = false;
-                        break;
-                    }
-                    continue;
-                },
-                Emoji => {                  // char to right is E_Modifier
-                    // In order to decide whether to break before this E_Modifier char, we need to
-                    // scan backward past any Extend chars to look for (E_Base|(ZWJ? EBG)).
-                    let mut ebg_idx = None;
-                    for (startidx, prev) in self.string[..previdx].char_indices().rev() {
-                        match (ebg_idx, gr::grapheme_category(prev)) {
-                            (None, gr::GC_Extend) => continue,
-                            (None, gr::GC_E_Base) => {      // rule GB10
-                                // Found an Emoji modifier sequence. Return the whole sequence.
-                                idx = startidx;
-                                break 'outer;
-                            }
-                            (None, gr::GC_E_Base_GAZ) => {  // rule GB10
-                                // Keep scanning in case this is part of an ZWJ x EBJ pair.
-                                ebg_idx = Some(startidx);
-                            }
-                            (Some(_), gr::GC_ZWJ) => {      // rule GB11
-                                idx = startidx;
-                                break 'outer;
-                            }
-                            _ => break
-                        }
-                    }
-                    if let Some(ebg_idx) = ebg_idx {
-                        // Found an EBG without a ZWJ before it.
-                        idx = ebg_idx;
-                        break;
-                    }
-                    // Not part of an Emoji modifier sequence. Break here.
-                    take_curr = false;
-                    break;
-                },
-                Zwj => match cat {            // char to right is (GAZ|EBG)
-                    gr::GC_ZWJ => FindExtend, // rule GB11: ZWJ x (GAZ|EBG)
-                    _ => {
-                        take_curr = false;
-                        break;
-                    }
-                }
-            }
-        }
-
-        self.catb = if take_curr {
-            None
-        } else  {
-            idx = previdx;
-            Some(cat)
-        };
-
-        if self.extended && cat != gr::GC_Control {
-            // rule GB9b: include any preceding Prepend characters
-            for (i, c) in self.string[..idx].char_indices().rev() {
-                match gr::grapheme_category(c) {
-                    gr::GC_Prepend => idx = i,
-                    cat => {
-                        self.catb = Some(cat);
-                        break;
-                    }
-                }
-            }
-        }
-
-        let retstr = &self.string[idx..];
-        self.string = &self.string[..idx];
-        Some(retstr)
+        let prev = self.cursor_back.prev_boundary(self.string, 0).unwrap().unwrap();
+        Some(&self.string[prev..end])
     }
 }
 
 #[inline]
 pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
+    let len = s.len();
     Graphemes {
         string: s,
-        extended: is_extended,
-        cat: None,
-        catb: None,
-        regional_count_back: None
+        cursor: GraphemeCursor::new(0, len, is_extended),
+        cursor_back: GraphemeCursor::new(len, len, is_extended),
     }
 }
 
 #[inline]
 pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
     GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) }
 }
+
+// maybe unify with PairResult?
+// An enum describing information about a potential boundary.
+#[derive(PartialEq, Eq, Clone)]
+enum GraphemeState {
+    // No information is known.
+    Unknown,
+    // It is known to not be a boundary.
+    NotBreak,
+    // It is known to be a boundary.
+    Break,
+    // The codepoint after is a Regional Indicator Symbol, so a boundary iff
+    // it is preceded by an even number of RIS codepoints. (GB12, GB13)
+    Regional,
+    // The codepoint after is in the E_Modifier category, so whether it's a boundary
+    // depends on pre-context according to GB10.
+    Emoji,
+}
+
+/// Cursor-based segmenter for grapheme clusters.
+#[derive(Clone)]
+pub struct GraphemeCursor {
+    // Current cursor position.
+    offset: usize,
+    // Total length of the string.
+    len: usize,
+    // A config flag indicating whether this cursor computes legacy or extended
+    // grapheme cluster boundaries (enables GB9a and GB9b if set).
+    is_extended: bool,
+    // Information about the potential boundary at `offset`
+    state: GraphemeState,
+    // Category of codepoint immediately preceding cursor, if known.
+    cat_before: Option<GraphemeCat>,
+    // Category of codepoint immediately after cursor, if known.
+    cat_after: Option<GraphemeCat>,
+    // If set, at least one more codepoint immediately preceding this offset
+    // is needed to resolve whether there's a boundary at `offset`.
+    pre_context_offset: Option<usize>,
+    // The number of RIS codepoints preceding `offset`. If `pre_context_offset`
+    // is set, then counts the number of RIS between that and `offset`, otherwise
+    // is an accurate count relative to the string.
+    ris_count: Option<usize>,
+    // Set if a call to `prev_boundary` or `next_boundary` was suspended due
+    // to needing more input.
+    resuming: bool,
+}
+
+/// An error return indicating that not enough content was available in the
+/// provided chunk to satisfy the query, and that more content must be provided.
+#[derive(PartialEq, Eq, Debug)]
+pub enum GraphemeIncomplete {
+    /// More pre-context is needed. The caller should call `provide_context`
+    /// with a chunk ending at the offset given, then retry the query. This
+    /// will only be returned if the `chunk_start` parameter is nonzero.
+    PreContext(usize),
+
+    /// When requesting `prev_boundary`, the cursor is moving past the beginning
+    /// of the current chunk, so the chunk before that is requested. This will
+    /// only be returned if the `chunk_start` parameter is nonzero.
+    PrevChunk,
+
+    /// When requesting `next_boundary`, the cursor is moving past the end of the
+    /// current chunk, so the chunk after that is requested. This will only be
+    /// returned if the chunk ends before the `len` parameter provided on
+    /// creation of the cursor.
+    NextChunk,  // requesting chunk following the one given
+
+    /// An error returned when the chunk given does not contain the cursor position.
+    InvalidOffset,
+}
+
+// An enum describing the result from lookup of a pair of categories.
+#[derive(PartialEq, Eq)]
+enum PairResult {
+    NotBreak,  // definitely not a break
+    Break,  // definitely a break
+    Extended,  // a break iff not in extended mode
+    Regional,  // a break if preceded by an even number of RIS
+    Emoji,  // a break if preceded by emoji base and (Extend)*
+}
+
+fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
+    use tables::grapheme::GraphemeCat::*;
+    use self::PairResult::*;
+    match (before, after) {
+        (GC_CR, GC_LF) => NotBreak,  // GB3
+        (GC_Control, _) => Break,  // GB4
+        (GC_CR, _) => Break,  // GB4
+        (GC_LF, _) => Break,  // GB4
+        (_, GC_Control) => Break,  // GB5
+        (_, GC_CR) => Break,  // GB5
+        (_, GC_LF) => Break,  // GB5
+        (GC_L, GC_L) => NotBreak,  // GB6
+        (GC_L, GC_V) => NotBreak,  // GB6
+        (GC_L, GC_LV) => NotBreak,  // GB6
+        (GC_L, GC_LVT) => NotBreak,  // GB6
+        (GC_LV, GC_V) => NotBreak,  // GB7
+        (GC_LV, GC_T) => NotBreak,  // GB7
+        (GC_V, GC_V) => NotBreak,  // GB7
+        (GC_V, GC_T) => NotBreak,  // GB7
+        (GC_LVT, GC_T) => NotBreak,  // GB8
+        (GC_T, GC_T) => NotBreak,  // GB8
+        (_, GC_Extend) => NotBreak, // GB9
+        (_, GC_ZWJ) => NotBreak,  // GB9
+        (_, GC_SpacingMark) => Extended,  // GB9a
+        (GC_Prepend, _) => Extended,  // GB9b
+        (GC_E_Base, GC_E_Modifier) => NotBreak,  // GB10
+        (GC_E_Base_GAZ, GC_E_Modifier) => NotBreak,  // GB10
+        (GC_Extend, GC_E_Modifier) => Emoji,  // GB10
+        (GC_ZWJ, GC_Glue_After_Zwj) => NotBreak,  // GB11
+        (GC_ZWJ, GC_E_Base_GAZ) => NotBreak,  // GB11
+        (GC_Regional_Indicator, GC_Regional_Indicator) => Regional,  // GB12, GB13
+        (_, _) => Break,  // GB999
+    }
+}
+
+impl GraphemeCursor {
+    /// Create a new cursor. The string and initial offset are given at creation
+    /// time, but the contents of the string are not. The `is_extended` parameter
+    /// controls whether extended grapheme clusters are selected.
+    ///
+    /// The `offset` parameter must be on a codepoint boundary.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let s = "हिन्दी";
+    /// let mut legacy = GraphemeCursor::new(0, s.len(), false);
+    /// assert_eq!(legacy.next_boundary(s, 0), Ok(Some("ह".len())));
+    /// let mut extended = GraphemeCursor::new(0, s.len(), true);
+    /// assert_eq!(extended.next_boundary(s, 0), Ok(Some("हि".len())));
+    /// ```
+    pub fn new(offset: usize, len: usize, is_extended: bool) -> GraphemeCursor {
+        let state = if offset == 0 || offset == len {
+            GraphemeState::Break
+        } else {
+            GraphemeState::Unknown
+        };
+        GraphemeCursor {
+            offset: offset,
+            len: len,
+            state: state,
+            is_extended: is_extended,
+            cat_before: None,
+            cat_after: None,
+            pre_context_offset: None,
+            ris_count: None,
+            resuming: false,
+        }
+    }
+
+    // Not sure I'm gonna keep this, the advantage over new() seems thin.
+
+    /// Set the cursor to a new location in the same string.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let s = "abcd";
+    /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
+    /// assert_eq!(cursor.cur_cursor(), 0);
+    /// cursor.set_cursor(2);
+    /// assert_eq!(cursor.cur_cursor(), 2);
+    /// ```
+    pub fn set_cursor(&mut self, offset: usize) {
+        if offset != self.offset {
+            self.offset = offset;
+            self.state = if offset == 0 || offset == self.len {
+                GraphemeState::Break
+            } else {
+                GraphemeState::Unknown
+            };
+            // reset state derived from text around cursor
+            self.cat_before = None;
+            self.cat_after = None;
+            self.ris_count = None;
+        }
+    }
+
+    /// The current offset of the cursor. Equal to the last value provided to
+    /// `new()` or `set_cursor()`, or returned from `next_boundary()` or
+    /// `prev_boundary()`.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// // Two flags (🇷🇸🇮🇴), each flag is two RIS codepoints, each RIS is 4 bytes.
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
+    /// assert_eq!(cursor.cur_cursor(), 4);
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
+    /// assert_eq!(cursor.cur_cursor(), 8);
+    /// ```
+    pub fn cur_cursor(&self) -> usize {
+        self.offset
+    }
+
+    /// Provide additional pre-context when it is needed to decide a boundary.
+    /// The end of the chunk must coincide with the value given in the
+    /// `GraphemeIncomplete::PreContext` request.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
+    /// // Not enough pre-context to decide if there's a boundary between the two flags.
+    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(8)));
+    /// // Provide one more Regional Indicator Symbol of pre-context
+    /// cursor.provide_context(&flags[4..8], 4);
+    /// // Still not enough context to decide.
+    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(4)));
+    /// // Provide additional requested context.
+    /// cursor.provide_context(&flags[0..4], 0);
+    /// // That's enough to decide (it always is when context goes to the start of the string)
+    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
+    /// ```
+    pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
+        use tables::grapheme as gr;
+        assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
+        self.pre_context_offset = None;
+        if self.is_extended && chunk_start + chunk.len() == self.offset {
+            let ch = chunk.chars().rev().next().unwrap();
+            if gr::grapheme_category(ch) == gr::GC_Prepend {
+                self.decide(false);  // GB9b
+                return;
+            }
+        }
+        match self.state {
+            GraphemeState::Regional => self.handle_regional(chunk, chunk_start),
+            GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
+            _ => if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
+                let ch = chunk.chars().rev().next().unwrap();
+                self.cat_before = Some(gr::grapheme_category(ch));
+            },
+        }
+    }
+
+    fn decide(&mut self, is_break: bool) {
+        self.state = if is_break {
+            GraphemeState::Break
+        } else {
+            GraphemeState::NotBreak
+        };
+    }
+
+    fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
+        self.decide(is_break);
+        Ok(is_break)
+    }
+
+    fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
+        if self.state == GraphemeState::Break {
+            Ok(true)
+        } else if self.state == GraphemeState::NotBreak {
+            Ok(false)
+        } else if let Some(pre_context_offset) = self.pre_context_offset {
+            Err(GraphemeIncomplete::PreContext(pre_context_offset))
+        } else {
+            unreachable!("inconsistent state");
+        }
+    }
+
+    fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
+        use tables::grapheme as gr;
+        let mut ris_count = self.ris_count.unwrap_or(0);
+        for ch in chunk.chars().rev() {
+            if gr::grapheme_category(ch) != gr::GC_Regional_Indicator {
+                self.ris_count = Some(ris_count);
+                self.decide((ris_count % 2) == 0);
+                return;
+            }
+            ris_count += 1;
+        }
+        self.ris_count = Some(ris_count);
+        if chunk_start == 0 {
+            self.decide((ris_count % 2) == 0);
+            return;
+        }
+        self.pre_context_offset = Some(chunk_start);
+        self.state = GraphemeState::Regional;
+    }
+
+    fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
+        use tables::grapheme as gr;
+        for ch in chunk.chars().rev() {
+            match gr::grapheme_category(ch) {
+                gr::GC_Extend => (),
+                gr::GC_E_Base | gr::GC_E_Base_GAZ => {
+                    self.decide(false);
+                    return;
+                }
+                _ => {
+                    self.decide(true);
+                    return;
+                }
+            }
+        }
+        if chunk_start == 0 {
+            self.decide(true);
+            return;
+        }
+        self.pre_context_offset = Some(chunk_start);
+        self.state = GraphemeState::Emoji;
+    }
+
+    /// Determine whether the current cursor location is a grapheme cluster boundary.
+    /// Only a part of the string need be supplied. If `chunk_start` is nonzero or
+    /// the length of `chunk` is not equal to `len` on creation, then this method
+    /// may return `GraphemeIncomplete::PreContext`. The caller should then
+    /// call `provide_context` with the requested chunk, then retry calling this
+    /// method.
+    ///
+    /// For partial chunks, if the cursor is not at the beginning or end of the
+    /// string, the chunk should contain at least the codepoint following the cursor.
+    /// If the string is nonempty, the chunk must be nonempty.
+    ///
+    /// All calls should have consistent chunk contents (ie, if a chunk provides
+    /// content for a given slice, all further chunks covering that slice must have
+    /// the same content for it).
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
+    /// assert_eq!(cursor.is_boundary(flags, 0), Ok(true));
+    /// cursor.set_cursor(12);
+    /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
+    /// ```
+    pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> {
+        use tables::grapheme as gr;
+        if self.state == GraphemeState::Break {
+            return Ok(true)
+        }
+        if self.state == GraphemeState::NotBreak {
+            return Ok(false)
+        }
+        if self.offset < chunk_start || self.offset >= chunk_start + chunk.len() {
+            if self.offset > chunk_start + chunk.len() || self.cat_after.is_none() {
+                return Err(GraphemeIncomplete::InvalidOffset)
+            }
+        }
+        if let Some(pre_context_offset) = self.pre_context_offset {
+            return Err(GraphemeIncomplete::PreContext(pre_context_offset));
+        }
+        let offset_in_chunk = self.offset - chunk_start;
+        if self.cat_after.is_none() {
+            let ch = chunk[offset_in_chunk..].chars().next().unwrap();
+            self.cat_after = Some(gr::grapheme_category(ch));
+        }
+        if self.offset == chunk_start {
+            let mut need_pre_context = true;
+            match self.cat_after.unwrap() {
+                gr::GC_Regional_Indicator => self.state = GraphemeState::Regional,
+                gr::GC_E_Modifier => self.state = GraphemeState::Emoji,
+                _ => need_pre_context = self.cat_before.is_none(),
+            }
+            if need_pre_context {
+                self.pre_context_offset = Some(chunk_start);
+                return Err(GraphemeIncomplete::PreContext(chunk_start));
+            }
+        }
+        if self.cat_before.is_none() {
+            let ch = chunk[..offset_in_chunk].chars().rev().next().unwrap();
+            self.cat_before = Some(gr::grapheme_category(ch));
+        }
+        match check_pair(self.cat_before.unwrap(), self.cat_after.unwrap()) {
+            PairResult::NotBreak => return self.decision(false),
+            PairResult::Break => return self.decision(true),
+            PairResult::Extended => {
+                let is_extended = self.is_extended;
+                return self.decision(!is_extended);
+            }
+            PairResult::Regional => {
+                if let Some(ris_count) = self.ris_count {
+                    return self.decision((ris_count % 2) == 0);
+                }
+                self.handle_regional(&chunk[..offset_in_chunk], chunk_start);
+                self.is_boundary_result()
+            }
+            PairResult::Emoji => {
+                self.handle_emoji(&chunk[..offset_in_chunk], chunk_start);
+                self.is_boundary_result()
+            }
+        }
+    }
+
+    /// Find the next boundary after the current cursor position. Only a part of
+    /// the string need be supplied. If the chunk is incomplete, then this
+    /// method might return `GraphemeIncomplete::PreContext` or
+    /// `GraphemeIncomplete::NextChunk`. In the former case, the caller should
+    /// call `provide_context` with the requested chunk, then retry. In the
+    /// latter case, the caller should provide the chunk following the one
+    /// given, then retry.
+    ///
+    /// See `is_boundary` for expectations on the provided chunk.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(16)));
+    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(None));
+    /// ```
+    ///
+    /// And an example that uses partial strings:
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+    /// let s = "abcd";
+    /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
+    /// assert_eq!(cursor.next_boundary(&s[..2], 0), Ok(Some(1)));
+    /// assert_eq!(cursor.next_boundary(&s[..2], 0), Err(GraphemeIncomplete::NextChunk));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(2)));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(3)));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4)));
+    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
+    /// ```
+    pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
+        use tables::grapheme as gr;
+        if self.offset == self.len {
+            return Ok(None);
+        }
+        let mut iter = chunk[self.offset - chunk_start..].chars();
+        let mut ch = iter.next().unwrap();
+        loop {
+            if self.resuming {
+                if self.cat_after.is_none() {
+                    self.cat_after = Some(gr::grapheme_category(ch));
+                }
+            } else {
+                self.offset += ch.len_utf8();
+                self.state = GraphemeState::Unknown;
+                self.cat_before = self.cat_after.take();
+                if self.cat_before.is_none() {
+                    self.cat_before = Some(gr::grapheme_category(ch));
+                }
+                if self.cat_before.unwrap() == GraphemeCat::GC_Regional_Indicator {
+                    self.ris_count = self.ris_count.map(|c| c + 1);
+                } else {
+                    self.ris_count = Some(0);
+                }
+                if let Some(next_ch) = iter.next() {
+                    ch = next_ch;
+                    self.cat_after = Some(gr::grapheme_category(ch));
+                } else if self.offset == self.len {
+                    self.decide(true);
+                } else {
+                    self.resuming = true;
+                    return Err(GraphemeIncomplete::NextChunk);
+                }
+            }
+            self.resuming = true;
+            if self.is_boundary(chunk, chunk_start)? {
+                self.resuming = false;
+                return Ok(Some(self.offset));
+            }
+            self.resuming = false;
+        }
+    }
+
+    /// Find the previous boundary after the current cursor position. Only a part
+    /// of the string need be supplied. If the chunk is incomplete, then this
+    /// method might return `GraphemeIncomplete::PreContext` or
+    /// `GraphemeIncomplete::PrevChunk`. In the former case, the caller should
+    /// call `provide_context` with the requested chunk, then retry. In the
+    /// latter case, the caller should provide the chunk preceding the one
+    /// given, then retry.
+    ///
+    /// See `is_boundary` for expectations on the provided chunk.
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::GraphemeCursor;
+    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
+    /// let mut cursor = GraphemeCursor::new(12, flags.len(), false);
+    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(8)));
+    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(0)));
+    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(None));
+    /// ```
+    ///
+    /// And an example that uses partial strings (note the exact return is not
+    /// guaranteed, and may be `PrevChunk` or `PreContext` arbitrarily):
+    ///
+    /// ```rust
+    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
+    /// let s = "abcd";
+    /// let mut cursor = GraphemeCursor::new(4, s.len(), false);
+    /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Ok(Some(3)));
+    /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Err(GraphemeIncomplete::PrevChunk));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(2)));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(1)));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0)));
+    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
+    /// ```
+    pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
+        use tables::grapheme as gr;
+        if self.offset == 0 {
+            return Ok(None);
+        }
+        if self.offset == chunk_start {
+            return Err(GraphemeIncomplete::PrevChunk);
+        }
+        let mut iter = chunk[..self.offset - chunk_start].chars().rev();
+        let mut ch = iter.next().unwrap();
+        loop {
+            if self.offset == chunk_start {
+                self.resuming = true;
+                return Err(GraphemeIncomplete::PrevChunk);
+            }
+            if self.resuming {
+                self.cat_before = Some(gr::grapheme_category(ch));
+            } else {
+                self.offset -= ch.len_utf8();
+                self.cat_after = self.cat_before.take();
+                self.state = GraphemeState::Unknown;
+                if let Some(ris_count) = self.ris_count {
+                    self.ris_count = if ris_count > 0 { Some(ris_count - 1) } else { None };
+                }
+                if let Some(prev_ch) = iter.next() {
+                    ch = prev_ch;
+                    self.cat_before = Some(gr::grapheme_category(ch));
+                } else if self.offset == 0 {
+                    self.decide(true);
+                } else {
+                    self.resuming = true;
+                    self.cat_after = Some(gr::grapheme_category(ch));
+                    return Err(GraphemeIncomplete::PrevChunk);
+                }
+            }
+            self.resuming = true;
+            if self.is_boundary(chunk, chunk_start)? {
+                self.resuming = false;
+                return Ok(Some(self.offset));
+            }
+            self.resuming = false;
+        }
+    }
+}
+
+#[test]
+fn test_grapheme_cursor_ris_precontext() {
+    let s = "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}";
+    let mut c = GraphemeCursor::new(8, s.len(), true);
+    assert_eq!(c.is_boundary(&s[4..], 4), Err(GraphemeIncomplete::PreContext(4)));
+    c.provide_context(&s[..4], 0);
+    assert_eq!(c.is_boundary(&s[4..], 4), Ok(true));
+}
+
+#[test]
+fn test_grapheme_cursor_chunk_start_require_precontext() {
+    let s = "\r\n";
+    let mut c = GraphemeCursor::new(1, s.len(), true);
+    assert_eq!(c.is_boundary(&s[1..], 1), Err(GraphemeIncomplete::PreContext(1)));
+    c.provide_context(&s[..1], 0);
+    assert_eq!(c.is_boundary(&s[1..], 1), Ok(false));
+}
+
+#[test]
+fn test_grapheme_cursor_prev_boundary() {
+    let s = "abcd";
+    let mut c = GraphemeCursor::new(3, s.len(), true);
+    assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
+    assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(2)));
+}
+
+#[test]
+fn test_grapheme_cursor_prev_boundary_chunk_start() {
+    let s = "abcd";
+    let mut c = GraphemeCursor::new(2, s.len(), true);
+    assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
+    assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(1)));
+}
--- a/third_party/rust/unicode-segmentation/src/lib.rs
+++ b/third_party/rust/unicode-segmentation/src/lib.rs
@@ -59,16 +59,17 @@
 #[macro_use]
 extern crate std;
 
 #[cfg(test)]
 #[macro_use]
 extern crate quickcheck;
 
 pub use grapheme::{Graphemes, GraphemeIndices};
+pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
 pub use tables::UNICODE_VERSION;
 pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords};
 
 mod grapheme;
 mod tables;
 mod word;
 
 #[cfg(test)]
--- a/third_party/rust/unicode-segmentation/src/tables.rs
+++ b/third_party/rust/unicode-segmentation/src/tables.rs
@@ -291,23 +291,25 @@ pub mod grapheme {
     use core::result::Result::{Ok, Err};
 
     pub use self::GraphemeCat::*;
 
     #[allow(non_camel_case_types)]
     #[derive(Clone, Copy, PartialEq, Eq)]
     pub enum GraphemeCat {
         GC_Any,
+        GC_CR,
         GC_Control,
         GC_E_Base,
         GC_E_Base_GAZ,
         GC_E_Modifier,
         GC_Extend,
         GC_Glue_After_Zwj,
         GC_L,
+        GC_LF,
         GC_LV,
         GC_LVT,
         GC_Prepend,
         GC_Regional_Indicator,
         GC_SpacingMark,
         GC_T,
         GC_V,
         GC_ZWJ,
@@ -328,81 +330,83 @@ pub mod grapheme {
         }
     }
 
     pub fn grapheme_category(c: char) -> GraphemeCat {
         bsearch_range_value_table(c, grapheme_cat_table)
     }
 
     const grapheme_cat_table: &'static [(char, char, GraphemeCat)] = &[
-        ('\u{0}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}', GC_Control), ('\u{ad}', '\u{ad}',
-        GC_Control), ('\u{300}', '\u{36f}', GC_Extend), ('\u{483}', '\u{489}', GC_Extend),
-        ('\u{591}', '\u{5bd}', GC_Extend), ('\u{5bf}', '\u{5bf}', GC_Extend), ('\u{5c1}', '\u{5c2}',
-        GC_Extend), ('\u{5c4}', '\u{5c5}', GC_Extend), ('\u{5c7}', '\u{5c7}', GC_Extend),
-        ('\u{600}', '\u{605}', GC_Prepend), ('\u{610}', '\u{61a}', GC_Extend), ('\u{61c}',
-        '\u{61c}', GC_Control), ('\u{64b}', '\u{65f}', GC_Extend), ('\u{670}', '\u{670}',
-        GC_Extend), ('\u{6d6}', '\u{6dc}', GC_Extend), ('\u{6dd}', '\u{6dd}', GC_Prepend),
-        ('\u{6df}', '\u{6e4}', GC_Extend), ('\u{6e7}', '\u{6e8}', GC_Extend), ('\u{6ea}', '\u{6ed}',
-        GC_Extend), ('\u{70f}', '\u{70f}', GC_Prepend), ('\u{711}', '\u{711}', GC_Extend),
-        ('\u{730}', '\u{74a}', GC_Extend), ('\u{7a6}', '\u{7b0}', GC_Extend), ('\u{7eb}', '\u{7f3}',
-        GC_Extend), ('\u{816}', '\u{819}', GC_Extend), ('\u{81b}', '\u{823}', GC_Extend),
-        ('\u{825}', '\u{827}', GC_Extend), ('\u{829}', '\u{82d}', GC_Extend), ('\u{859}', '\u{85b}',
-        GC_Extend), ('\u{8d4}', '\u{8e1}', GC_Extend), ('\u{8e2}', '\u{8e2}', GC_Prepend),
-        ('\u{8e3}', '\u{902}', GC_Extend), ('\u{903}', '\u{903}', GC_SpacingMark), ('\u{93a}',
-        '\u{93a}', GC_Extend), ('\u{93b}', '\u{93b}', GC_SpacingMark), ('\u{93c}', '\u{93c}',
-        GC_Extend), ('\u{93e}', '\u{940}', GC_SpacingMark), ('\u{941}', '\u{948}', GC_Extend),
-        ('\u{949}', '\u{94c}', GC_SpacingMark), ('\u{94d}', '\u{94d}', GC_Extend), ('\u{94e}',
-        '\u{94f}', GC_SpacingMark), ('\u{951}', '\u{957}', GC_Extend), ('\u{962}', '\u{963}',
-        GC_Extend), ('\u{981}', '\u{981}', GC_Extend), ('\u{982}', '\u{983}', GC_SpacingMark),
-        ('\u{9bc}', '\u{9bc}', GC_Extend), ('\u{9be}', '\u{9be}', GC_Extend), ('\u{9bf}', '\u{9c0}',
-        GC_SpacingMark), ('\u{9c1}', '\u{9c4}', GC_Extend), ('\u{9c7}', '\u{9c8}', GC_SpacingMark),
-        ('\u{9cb}', '\u{9cc}', GC_SpacingMark), ('\u{9cd}', '\u{9cd}', GC_Extend), ('\u{9d7}',
-        '\u{9d7}', GC_Extend), ('\u{9e2}', '\u{9e3}', GC_Extend), ('\u{a01}', '\u{a02}', GC_Extend),
-        ('\u{a03}', '\u{a03}', GC_SpacingMark), ('\u{a3c}', '\u{a3c}', GC_Extend), ('\u{a3e}',
-        '\u{a40}', GC_SpacingMark), ('\u{a41}', '\u{a42}', GC_Extend), ('\u{a47}', '\u{a48}',
-        GC_Extend), ('\u{a4b}', '\u{a4d}', GC_Extend), ('\u{a51}', '\u{a51}', GC_Extend),
-        ('\u{a70}', '\u{a71}', GC_Extend), ('\u{a75}', '\u{a75}', GC_Extend), ('\u{a81}', '\u{a82}',
-        GC_Extend), ('\u{a83}', '\u{a83}', GC_SpacingMark), ('\u{abc}', '\u{abc}', GC_Extend),
-        ('\u{abe}', '\u{ac0}', GC_SpacingMark), ('\u{ac1}', '\u{ac5}', GC_Extend), ('\u{ac7}',
-        '\u{ac8}', GC_Extend), ('\u{ac9}', '\u{ac9}', GC_SpacingMark), ('\u{acb}', '\u{acc}',
-        GC_SpacingMark), ('\u{acd}', '\u{acd}', GC_Extend), ('\u{ae2}', '\u{ae3}', GC_Extend),
-        ('\u{b01}', '\u{b01}', GC_Extend), ('\u{b02}', '\u{b03}', GC_SpacingMark), ('\u{b3c}',
-        '\u{b3c}', GC_Extend), ('\u{b3e}', '\u{b3f}', GC_Extend), ('\u{b40}', '\u{b40}',
-        GC_SpacingMark), ('\u{b41}', '\u{b44}', GC_Extend), ('\u{b47}', '\u{b48}', GC_SpacingMark),
-        ('\u{b4b}', '\u{b4c}', GC_SpacingMark), ('\u{b4d}', '\u{b4d}', GC_Extend), ('\u{b56}',
-        '\u{b57}', GC_Extend), ('\u{b62}', '\u{b63}', GC_Extend), ('\u{b82}', '\u{b82}', GC_Extend),
-        ('\u{bbe}', '\u{bbe}', GC_Extend), ('\u{bbf}', '\u{bbf}', GC_SpacingMark), ('\u{bc0}',
-        '\u{bc0}', GC_Extend), ('\u{bc1}', '\u{bc2}', GC_SpacingMark), ('\u{bc6}', '\u{bc8}',
-        GC_SpacingMark), ('\u{bca}', '\u{bcc}', GC_SpacingMark), ('\u{bcd}', '\u{bcd}', GC_Extend),
-        ('\u{bd7}', '\u{bd7}', GC_Extend), ('\u{c00}', '\u{c00}', GC_Extend), ('\u{c01}', '\u{c03}',
-        GC_SpacingMark), ('\u{c3e}', '\u{c40}', GC_Extend), ('\u{c41}', '\u{c44}', GC_SpacingMark),
-        ('\u{c46}', '\u{c48}', GC_Extend), ('\u{c4a}', '\u{c4d}', GC_Extend), ('\u{c55}', '\u{c56}',
-        GC_Extend), ('\u{c62}', '\u{c63}', GC_Extend), ('\u{c81}', '\u{c81}', GC_Extend),
-        ('\u{c82}', '\u{c83}', GC_SpacingMark), ('\u{cbc}', '\u{cbc}', GC_Extend), ('\u{cbe}',
-        '\u{cbe}', GC_SpacingMark), ('\u{cbf}', '\u{cbf}', GC_Extend), ('\u{cc0}', '\u{cc1}',
-        GC_SpacingMark), ('\u{cc2}', '\u{cc2}', GC_Extend), ('\u{cc3}', '\u{cc4}', GC_SpacingMark),
-        ('\u{cc6}', '\u{cc6}', GC_Extend), ('\u{cc7}', '\u{cc8}', GC_SpacingMark), ('\u{cca}',
-        '\u{ccb}', GC_SpacingMark), ('\u{ccc}', '\u{ccd}', GC_Extend), ('\u{cd5}', '\u{cd6}',
-        GC_Extend), ('\u{ce2}', '\u{ce3}', GC_Extend), ('\u{d01}', '\u{d01}', GC_Extend),
-        ('\u{d02}', '\u{d03}', GC_SpacingMark), ('\u{d3e}', '\u{d3e}', GC_Extend), ('\u{d3f}',
-        '\u{d40}', GC_SpacingMark), ('\u{d41}', '\u{d44}', GC_Extend), ('\u{d46}', '\u{d48}',
-        GC_SpacingMark), ('\u{d4a}', '\u{d4c}', GC_SpacingMark), ('\u{d4d}', '\u{d4d}', GC_Extend),
-        ('\u{d4e}', '\u{d4e}', GC_Prepend), ('\u{d57}', '\u{d57}', GC_Extend), ('\u{d62}',
-        '\u{d63}', GC_Extend), ('\u{d82}', '\u{d83}', GC_SpacingMark), ('\u{dca}', '\u{dca}',
-        GC_Extend), ('\u{dcf}', '\u{dcf}', GC_Extend), ('\u{dd0}', '\u{dd1}', GC_SpacingMark),
-        ('\u{dd2}', '\u{dd4}', GC_Extend), ('\u{dd6}', '\u{dd6}', GC_Extend), ('\u{dd8}', '\u{dde}',
-        GC_SpacingMark), ('\u{ddf}', '\u{ddf}', GC_Extend), ('\u{df2}', '\u{df3}', GC_SpacingMark),
-        ('\u{e31}', '\u{e31}', GC_Extend), ('\u{e33}', '\u{e33}', GC_SpacingMark), ('\u{e34}',
-        '\u{e3a}', GC_Extend), ('\u{e47}', '\u{e4e}', GC_Extend), ('\u{eb1}', '\u{eb1}', GC_Extend),
-        ('\u{eb3}', '\u{eb3}', GC_SpacingMark), ('\u{eb4}', '\u{eb9}', GC_Extend), ('\u{ebb}',
-        '\u{ebc}', GC_Extend), ('\u{ec8}', '\u{ecd}', GC_Extend), ('\u{f18}', '\u{f19}', GC_Extend),
-        ('\u{f35}', '\u{f35}', GC_Extend), ('\u{f37}', '\u{f37}', GC_Extend), ('\u{f39}', '\u{f39}',
-        GC_Extend), ('\u{f3e}', '\u{f3f}', GC_SpacingMark), ('\u{f71}', '\u{f7e}', GC_Extend),
-        ('\u{f7f}', '\u{f7f}', GC_SpacingMark), ('\u{f80}', '\u{f84}', GC_Extend), ('\u{f86}',
-        '\u{f87}', GC_Extend), ('\u{f8d}', '\u{f97}', GC_Extend), ('\u{f99}', '\u{fbc}', GC_Extend),
+        ('\u{0}', '\u{9}', GC_Control), ('\u{a}', '\u{a}', GC_LF), ('\u{b}', '\u{c}', GC_Control),
+        ('\u{d}', '\u{d}', GC_CR), ('\u{e}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}',
+        GC_Control), ('\u{ad}', '\u{ad}', GC_Control), ('\u{300}', '\u{36f}', GC_Extend),
+        ('\u{483}', '\u{489}', GC_Extend), ('\u{591}', '\u{5bd}', GC_Extend), ('\u{5bf}', '\u{5bf}',
+        GC_Extend), ('\u{5c1}', '\u{5c2}', GC_Extend), ('\u{5c4}', '\u{5c5}', GC_Extend),
+        ('\u{5c7}', '\u{5c7}', GC_Extend), ('\u{600}', '\u{605}', GC_Prepend), ('\u{610}',
+        '\u{61a}', GC_Extend), ('\u{61c}', '\u{61c}', GC_Control), ('\u{64b}', '\u{65f}',
+        GC_Extend), ('\u{670}', '\u{670}', GC_Extend), ('\u{6d6}', '\u{6dc}', GC_Extend),
+        ('\u{6dd}', '\u{6dd}', GC_Prepend), ('\u{6df}', '\u{6e4}', GC_Extend), ('\u{6e7}',
+        '\u{6e8}', GC_Extend), ('\u{6ea}', '\u{6ed}', GC_Extend), ('\u{70f}', '\u{70f}',
+        GC_Prepend), ('\u{711}', '\u{711}', GC_Extend), ('\u{730}', '\u{74a}', GC_Extend),
+        ('\u{7a6}', '\u{7b0}', GC_Extend), ('\u{7eb}', '\u{7f3}', GC_Extend), ('\u{816}', '\u{819}',
+        GC_Extend), ('\u{81b}', '\u{823}', GC_Extend), ('\u{825}', '\u{827}', GC_Extend),
+        ('\u{829}', '\u{82d}', GC_Extend), ('\u{859}', '\u{85b}', GC_Extend), ('\u{8d4}', '\u{8e1}',
+        GC_Extend), ('\u{8e2}', '\u{8e2}', GC_Prepend), ('\u{8e3}', '\u{902}', GC_Extend),
+        ('\u{903}', '\u{903}', GC_SpacingMark), ('\u{93a}', '\u{93a}', GC_Extend), ('\u{93b}',
+        '\u{93b}', GC_SpacingMark), ('\u{93c}', '\u{93c}', GC_Extend), ('\u{93e}', '\u{940}',
+        GC_SpacingMark), ('\u{941}', '\u{948}', GC_Extend), ('\u{949}', '\u{94c}', GC_SpacingMark),
+        ('\u{94d}', '\u{94d}', GC_Extend), ('\u{94e}', '\u{94f}', GC_SpacingMark), ('\u{951}',
+        '\u{957}', GC_Extend), ('\u{962}', '\u{963}', GC_Extend), ('\u{981}', '\u{981}', GC_Extend),
+        ('\u{982}', '\u{983}', GC_SpacingMark), ('\u{9bc}', '\u{9bc}', GC_Extend), ('\u{9be}',
+        '\u{9be}', GC_Extend), ('\u{9bf}', '\u{9c0}', GC_SpacingMark), ('\u{9c1}', '\u{9c4}',
+        GC_Extend), ('\u{9c7}', '\u{9c8}', GC_SpacingMark), ('\u{9cb}', '\u{9cc}', GC_SpacingMark),
+        ('\u{9cd}', '\u{9cd}', GC_Extend), ('\u{9d7}', '\u{9d7}', GC_Extend), ('\u{9e2}', '\u{9e3}',
+        GC_Extend), ('\u{a01}', '\u{a02}', GC_Extend), ('\u{a03}', '\u{a03}', GC_SpacingMark),
+        ('\u{a3c}', '\u{a3c}', GC_Extend), ('\u{a3e}', '\u{a40}', GC_SpacingMark), ('\u{a41}',
+        '\u{a42}', GC_Extend), ('\u{a47}', '\u{a48}', GC_Extend), ('\u{a4b}', '\u{a4d}', GC_Extend),
+        ('\u{a51}', '\u{a51}', GC_Extend), ('\u{a70}', '\u{a71}', GC_Extend), ('\u{a75}', '\u{a75}',
+        GC_Extend), ('\u{a81}', '\u{a82}', GC_Extend), ('\u{a83}', '\u{a83}', GC_SpacingMark),
+        ('\u{abc}', '\u{abc}', GC_Extend), ('\u{abe}', '\u{ac0}', GC_SpacingMark), ('\u{ac1}',
+        '\u{ac5}', GC_Extend), ('\u{ac7}', '\u{ac8}', GC_Extend), ('\u{ac9}', '\u{ac9}',
+        GC_SpacingMark), ('\u{acb}', '\u{acc}', GC_SpacingMark), ('\u{acd}', '\u{acd}', GC_Extend),
+        ('\u{ae2}', '\u{ae3}', GC_Extend), ('\u{b01}', '\u{b01}', GC_Extend), ('\u{b02}', '\u{b03}',
+        GC_SpacingMark), ('\u{b3c}', '\u{b3c}', GC_Extend), ('\u{b3e}', '\u{b3f}', GC_Extend),
+        ('\u{b40}', '\u{b40}', GC_SpacingMark), ('\u{b41}', '\u{b44}', GC_Extend), ('\u{b47}',
+        '\u{b48}', GC_SpacingMark), ('\u{b4b}', '\u{b4c}', GC_SpacingMark), ('\u{b4d}', '\u{b4d}',
+        GC_Extend), ('\u{b56}', '\u{b57}', GC_Extend), ('\u{b62}', '\u{b63}', GC_Extend),
+        ('\u{b82}', '\u{b82}', GC_Extend), ('\u{bbe}', '\u{bbe}', GC_Extend), ('\u{bbf}', '\u{bbf}',
+        GC_SpacingMark), ('\u{bc0}', '\u{bc0}', GC_Extend), ('\u{bc1}', '\u{bc2}', GC_SpacingMark),
+        ('\u{bc6}', '\u{bc8}', GC_SpacingMark), ('\u{bca}', '\u{bcc}', GC_SpacingMark), ('\u{bcd}',
+        '\u{bcd}', GC_Extend), ('\u{bd7}', '\u{bd7}', GC_Extend), ('\u{c00}', '\u{c00}', GC_Extend),
+        ('\u{c01}', '\u{c03}', GC_SpacingMark), ('\u{c3e}', '\u{c40}', GC_Extend), ('\u{c41}',
+        '\u{c44}', GC_SpacingMark), ('\u{c46}', '\u{c48}', GC_Extend), ('\u{c4a}', '\u{c4d}',
+        GC_Extend), ('\u{c55}', '\u{c56}', GC_Extend), ('\u{c62}', '\u{c63}', GC_Extend),
+        ('\u{c81}', '\u{c81}', GC_Extend), ('\u{c82}', '\u{c83}', GC_SpacingMark), ('\u{cbc}',
+        '\u{cbc}', GC_Extend), ('\u{cbe}', '\u{cbe}', GC_SpacingMark), ('\u{cbf}', '\u{cbf}',
+        GC_Extend), ('\u{cc0}', '\u{cc1}', GC_SpacingMark), ('\u{cc2}', '\u{cc2}', GC_Extend),
+        ('\u{cc3}', '\u{cc4}', GC_SpacingMark), ('\u{cc6}', '\u{cc6}', GC_Extend), ('\u{cc7}',
+        '\u{cc8}', GC_SpacingMark), ('\u{cca}', '\u{ccb}', GC_SpacingMark), ('\u{ccc}', '\u{ccd}',
+        GC_Extend), ('\u{cd5}', '\u{cd6}', GC_Extend), ('\u{ce2}', '\u{ce3}', GC_Extend),
+        ('\u{d01}', '\u{d01}', GC_Extend), ('\u{d02}', '\u{d03}', GC_SpacingMark), ('\u{d3e}',
+        '\u{d3e}', GC_Extend), ('\u{d3f}', '\u{d40}', GC_SpacingMark), ('\u{d41}', '\u{d44}',
+        GC_Extend), ('\u{d46}', '\u{d48}', GC_SpacingMark), ('\u{d4a}', '\u{d4c}', GC_SpacingMark),
+        ('\u{d4d}', '\u{d4d}', GC_Extend), ('\u{d4e}', '\u{d4e}', GC_Prepend), ('\u{d57}',
+        '\u{d57}', GC_Extend), ('\u{d62}', '\u{d63}', GC_Extend), ('\u{d82}', '\u{d83}',
+        GC_SpacingMark), ('\u{dca}', '\u{dca}', GC_Extend), ('\u{dcf}', '\u{dcf}', GC_Extend),
+        ('\u{dd0}', '\u{dd1}', GC_SpacingMark), ('\u{dd2}', '\u{dd4}', GC_Extend), ('\u{dd6}',
+        '\u{dd6}', GC_Extend), ('\u{dd8}', '\u{dde}', GC_SpacingMark), ('\u{ddf}', '\u{ddf}',
+        GC_Extend), ('\u{df2}', '\u{df3}', GC_SpacingMark), ('\u{e31}', '\u{e31}', GC_Extend),
+        ('\u{e33}', '\u{e33}', GC_SpacingMark), ('\u{e34}', '\u{e3a}', GC_Extend), ('\u{e47}',
+        '\u{e4e}', GC_Extend), ('\u{eb1}', '\u{eb1}', GC_Extend), ('\u{eb3}', '\u{eb3}',
+        GC_SpacingMark), ('\u{eb4}', '\u{eb9}', GC_Extend), ('\u{ebb}', '\u{ebc}', GC_Extend),
+        ('\u{ec8}', '\u{ecd}', GC_Extend), ('\u{f18}', '\u{f19}', GC_Extend), ('\u{f35}', '\u{f35}',
+        GC_Extend), ('\u{f37}', '\u{f37}', GC_Extend), ('\u{f39}', '\u{f39}', GC_Extend),
+        ('\u{f3e}', '\u{f3f}', GC_SpacingMark), ('\u{f71}', '\u{f7e}', GC_Extend), ('\u{f7f}',
+        '\u{f7f}', GC_SpacingMark), ('\u{f80}', '\u{f84}', GC_Extend), ('\u{f86}', '\u{f87}',
+        GC_Extend), ('\u{f8d}', '\u{f97}', GC_Extend), ('\u{f99}', '\u{fbc}', GC_Extend),
         ('\u{fc6}', '\u{fc6}', GC_Extend), ('\u{102d}', '\u{1030}', GC_Extend), ('\u{1031}',
         '\u{1031}', GC_SpacingMark), ('\u{1032}', '\u{1037}', GC_Extend), ('\u{1039}', '\u{103a}',
         GC_Extend), ('\u{103b}', '\u{103c}', GC_SpacingMark), ('\u{103d}', '\u{103e}', GC_Extend),
         ('\u{1056}', '\u{1057}', GC_SpacingMark), ('\u{1058}', '\u{1059}', GC_Extend), ('\u{105e}',
         '\u{1060}', GC_Extend), ('\u{1071}', '\u{1074}', GC_Extend), ('\u{1082}', '\u{1082}',
         GC_Extend), ('\u{1084}', '\u{1084}', GC_SpacingMark), ('\u{1085}', '\u{1086}', GC_Extend),
         ('\u{108d}', '\u{108d}', GC_Extend), ('\u{109d}', '\u{109d}', GC_Extend), ('\u{1100}',
         '\u{115f}', GC_L), ('\u{1160}', '\u{11a7}', GC_V), ('\u{11a8}', '\u{11ff}', GC_T),
@@ -863,17 +867,17 @@ pub mod grapheme {
 }
 
 pub mod word {
     use core::result::Result::{Ok, Err};
 
     pub use self::WordCat::*;
 
     #[allow(non_camel_case_types)]
-    #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+    #[derive(Clone, Copy, PartialEq, Eq)]
     pub enum WordCat {
         WC_ALetter,
         WC_Any,
         WC_CR,
         WC_Double_Quote,
         WC_E_Base,
         WC_E_Base_GAZ,
         WC_E_Modifier,
--- a/third_party/rust/unicode-segmentation/src/test.rs
+++ b/third_party/rust/unicode-segmentation/src/test.rs
@@ -29,16 +29,20 @@ fn test_graphemes() {
          &["\u{600}\u{20}", "\u{20}"],
          &["\u{600}", "\u{20}", "\u{20}"]),
     ];
 
     pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
         // family emoji (more than two emoji joined by ZWJ)
         ("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
          &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]),
+        // cartwheel emoji followed by two fitzpatrick skin tone modifiers
+        // (test case from issue #19)
+        ("\u{1F938}\u{1F3FE}\u{1F3FE}",
+         &["\u{1F938}\u{1F3FE}", "\u{1F3FE}"]),
     ];
 
     for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
         // test forward iterator
         assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
         assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
 
         // test reverse iterator