Bug 1348751 - [Form Autofill] A utility library for handling full name and separated names, r=MattN
authorLuke Chang <lchang@mozilla.com>
Wed, 12 Apr 2017 20:05:54 +0800
changeset 355274 93378ffa235a6106297d55f3285bee40c5742957
parent 355273 ea481e3d0cda15e328b838dd8637ee768310eddb
child 355275 fe5a5bcd4cb39d8be7dcea8e269ed5f57ff71495
push id31724
push userkwierso@gmail.com
push dateThu, 27 Apr 2017 19:44:12 +0000
treeherdermozilla-central@ffdedb9c5aad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersMattN
bugs1348751
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1348751 - [Form Autofill] A utility library for handling full name and separated names, r=MattN MozReview-Commit-ID: 3rcuxbFHKOq
browser/extensions/formautofill/FormAutofillNameUtils.jsm
browser/extensions/formautofill/ProfileStorage.jsm
browser/extensions/formautofill/content/nameReferences.js
browser/extensions/formautofill/test/unit/test_isCJKName.js
browser/extensions/formautofill/test/unit/test_nameUtils.js
browser/extensions/formautofill/test/unit/xpcshell.ini
toolkit/content/license.html
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/FormAutofillNameUtils.jsm
@@ -0,0 +1,280 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components;
+
+// Cu.import loads jsm files based on ISO-Latin-1 for now (see bug 530257).
+// However, the references about name parts include multi-byte characters.
+// Thus, we use |loadSubScript| to load the references instead.
+const NAME_REFERENCES = "chrome://formautofill/content/nameReferences.js";
+
+this.EXPORTED_SYMBOLS = ["FormAutofillNameUtils"];
+
+// FormAutofillNameUtils is initially translated from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util.cc?rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
+var FormAutofillNameUtils = {
+  // Will be loaded from NAME_REFERENCES.
+  NAME_PREFIXES: [],
+  NAME_SUFFIXES: [],
+  FAMILY_NAME_PREFIXES: [],
+  COMMON_CJK_MULTI_CHAR_SURNAMES: [],
+  KOREAN_MULTI_CHAR_SURNAMES: [],
+
+  // The whitespace definition based on
+  // https://cs.chromium.org/chromium/src/base/strings/string_util_constants.cc?l=9&rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
+  WHITESPACE: [
+    "\u0009", // CHARACTER TABULATION
+    "\u000A", // LINE FEED (LF)
+    "\u000B", // LINE TABULATION
+    "\u000C", // FORM FEED (FF)
+    "\u000D", // CARRIAGE RETURN (CR)
+    "\u0020", // SPACE
+    "\u0085", // NEXT LINE (NEL)
+    "\u00A0", // NO-BREAK SPACE
+    "\u1680", // OGHAM SPACE MARK
+    "\u2000", // EN QUAD
+    "\u2001", // EM QUAD
+    "\u2002", // EN SPACE
+    "\u2003", // EM SPACE
+    "\u2004", // THREE-PER-EM SPACE
+    "\u2005", // FOUR-PER-EM SPACE
+    "\u2006", // SIX-PER-EM SPACE
+    "\u2007", // FIGURE SPACE
+    "\u2008", // PUNCTUATION SPACE
+    "\u2009", // THIN SPACE
+    "\u200A", // HAIR SPACE
+    "\u2028", // LINE SEPARATOR
+    "\u2029", // PARAGRAPH SEPARATOR
+    "\u202F", // NARROW NO-BREAK SPACE
+    "\u205F", // MEDIUM MATHEMATICAL SPACE
+    "\u3000", // IDEOGRAPHIC SPACE
+  ],
+
+  // The middle dot is used as a separator for foreign names in Japanese.
+  MIDDLE_DOT: [
+    "\u30FB", // KATAKANA MIDDLE DOT
+    "\u00B7", // A (common?) typo for "KATAKANA MIDDLE DOT"
+  ],
+
+  // The Unicode range is based on Wiki:
+  // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
+  // https://en.wikipedia.org/wiki/Hangul
+  // https://en.wikipedia.org/wiki/Japanese_writing_system
+  CJK_RANGE: [
+    "\u1100-\u11FF", // Hangul Jamo
+    "\u3040-\u309F", // Hiragana
+    "\u30A0-\u30FF", // Katakana
+    "\u3105-\u312C", // Bopomofo
+    "\u3130-\u318F", // Hangul Compatibility Jamo
+    "\u31F0-\u31FF", // Katakana Phonetic Extensions
+    "\u3200-\u32FF", // Enclosed CJK Letters and Months
+    "\u3400-\u4DBF", // CJK unified ideographs Extension A
+    "\u4E00-\u9FFF", // CJK Unified Ideographs
+    "\uA960-\uA97F", // Hangul Jamo Extended-A
+    "\uAC00-\uD7AF", // Hangul Syllables
+    "\uD7B0-\uD7FF", // Hangul Jamo Extended-B
+    "\uFF00-\uFFEF", // Halfwidth and Fullwidth Forms
+  ],
+
+  HANGUL_RANGE: [
+    "\u1100-\u11FF", // Hangul Jamo
+    "\u3130-\u318F", // Hangul Compatibility Jamo
+    "\uA960-\uA97F", // Hangul Jamo Extended-A
+    "\uAC00-\uD7AF", // Hangul Syllables
+    "\uD7B0-\uD7FF", // Hangul Jamo Extended-B
+  ],
+
+  _dataLoaded: false,
+
+  // Returns true if |set| contains |token|, modulo a final period.
+  _containsString(set, token) {
+    let target = token.replace(/\.$/, "").toLowerCase();
+    return set.includes(target);
+  },
+
+  // Removes common name prefixes from |name_tokens|.
+  _stripPrefixes(nameTokens) {
+    for (let i in nameTokens) {
+      if (!this._containsString(this.NAME_PREFIXES, nameTokens[i])) {
+        return nameTokens.slice(i);
+      }
+    }
+    return [];
+  },
+
+  // Removes common name suffixes from |name_tokens|.
+  _stripSuffixes(nameTokens) {
+    for (let i = nameTokens.length - 1; i >= 0; i--) {
+      if (!this._containsString(this.NAME_SUFFIXES, nameTokens[i])) {
+        return nameTokens.slice(0, i + 1);
+      }
+    }
+    return [];
+  },
+
+  _isCJKName(name) {
+    // The name is considered to be a CJK name if it is only CJK characters,
+    // spaces, and "middle dot" separators, with at least one CJK character, and
+    // no more than 2 words.
+    //
+    // Chinese and Japanese names are usually spelled out using the Han
+    // characters (logographs), which constitute the "CJK Unified Ideographs"
+    // block in Unicode, also referred to as Unihan. Korean names are usually
+    // spelled out in the Korean alphabet (Hangul), although they do have a Han
+    // equivalent as well.
+
+    let previousWasCJK = false;
+    let wordCount = 0;
+
+    for (let c of name) {
+      let isMiddleDot = this.MIDDLE_DOT.includes(c);
+      let isCJK = !isMiddleDot && this.reCJK.test(c);
+      if (!isCJK && !isMiddleDot && !this.WHITESPACE.includes(c)) {
+        return false;
+      }
+      if (isCJK && !previousWasCJK) {
+        wordCount++;
+      }
+      previousWasCJK = isCJK;
+    }
+
+    return wordCount > 0 && wordCount < 3;
+  },
+
+  // Tries to split a Chinese, Japanese, or Korean name into its given name &
+  // surname parts. If splitting did not work for whatever reason, returns null.
+  _splitCJKName(nameTokens) {
+    // The convention for CJK languages is to put the surname (last name) first,
+    // and the given name (first name) second. In a continuous text, there is
+    // normally no space between the two parts of the name. When entering their
+    // name into a field, though, some people add a space to disambiguate. CJK
+    // names (almost) never have a middle name.
+
+    let reHangulName = new RegExp(
+      "^[" + this.HANGUL_RANGE.join("") + this.WHITESPACE.join("") + "]+$", "u");
+    let nameParts = {
+      given: "",
+      middle: "",
+      family: "",
+    };
+
+    if (nameTokens.length == 1) {
+      // There is no space between the surname and given name. Try to infer
+      // where to separate between the two. Most Chinese and Korean surnames
+      // have only one character, but there are a few that have 2. If the name
+      // does not start with a surname from a known list, default to one
+      // character.
+      let name = nameTokens[0];
+      let isKorean = reHangulName.test(name);
+      let surnameLength = 0;
+
+      // 4-character Korean names are more likely to be 2/2 than 1/3, so use
+      // the full list of Korean 2-char surnames. (instead of only the common
+      // ones)
+      let multiCharSurnames = (isKorean && name.length > 3) ?
+        this.KOREAN_MULTI_CHAR_SURNAMES :
+        this.COMMON_CJK_MULTI_CHAR_SURNAMES;
+
+      // Default to 1 character if the surname is not in the list.
+      surnameLength =
+        multiCharSurnames.some(surname => name.startsWith(surname)) ? 2 : 1;
+
+      nameParts.family = name.substr(0, surnameLength);
+      nameParts.given = name.substr(surnameLength);
+    } else if (nameTokens.length == 2) {
+      // The user entered a space between the two name parts. This makes our job
+      // easier. Family name first, given name second.
+      nameParts.family = nameTokens[0];
+      nameParts.given = nameTokens[1];
+    } else {
+      return null;
+    }
+
+    return nameParts;
+  },
+
+  init() {
+    if (this._dataLoaded) {
+      return;
+    }
+    let sandbox = {};
+    let scriptLoader = Cc["@mozilla.org/moz/jssubscript-loader;1"]
+                         .getService(Ci.mozIJSSubScriptLoader);
+    scriptLoader.loadSubScript(NAME_REFERENCES, sandbox, "utf-8");
+    Object.assign(this, sandbox.nameReferences);
+    this._dataLoaded = true;
+
+    this.reCJK = new RegExp("[" + this.CJK_RANGE.join("") + "]", "u");
+  },
+
+  splitName(name) {
+    let nameTokens = name.trim().split(/[ ,\u3000\u30FB\u00B7]+/);
+    let nameParts = {
+      given: "",
+      middle: "",
+      family: "",
+    };
+
+    nameTokens = this._stripPrefixes(nameTokens);
+
+    if (this._isCJKName(name)) {
+      let parts = this._splitCJKName(nameTokens);
+      if (parts) {
+        return parts;
+      }
+    }
+
+    // Don't assume "Ma" is a suffix in John Ma.
+    if (nameTokens.length > 2) {
+      nameTokens = this._stripSuffixes(nameTokens);
+    }
+
+    if (!nameTokens.length) {
+      // Bad things have happened; just assume the whole thing is a given name.
+      nameParts.given = name;
+      return nameParts;
+    }
+
+    // Only one token, assume given name.
+    if (nameTokens.length == 1) {
+      nameParts.given = nameTokens[0];
+      return nameParts;
+    }
+
+    // 2 or more tokens. Grab the family, which is the last word plus any
+    // recognizable family prefixes.
+    let familyTokens = [nameTokens.pop()];
+    while (nameTokens.length) {
+      let lastToken = nameTokens[nameTokens.length - 1];
+      if (!this._containsString(this.FAMILY_NAME_PREFIXES, lastToken)) {
+        break;
+      }
+      familyTokens.unshift(lastToken);
+      nameTokens.pop();
+    }
+    nameParts.family = familyTokens.join(" ");
+
+    // Take the last remaining token as the middle name (if there are at least 2
+    // tokens).
+    if (nameTokens.length >= 2) {
+      nameParts.middle = nameTokens.pop();
+    }
+
+    // Remainder is given name.
+    nameParts.given = nameTokens.join(" ");
+
+    return nameParts;
+  },
+
+  joinNameParts({given, middle, family}) {
+    if (this._isCJKName(given) && this._isCJKName(family) && middle == "") {
+      return family + given;
+    }
+    return [given, middle, family].filter(part => part && part.length).join(" ");
+  },
+};
+
+FormAutofillNameUtils.init();
--- a/browser/extensions/formautofill/ProfileStorage.jsm
+++ b/browser/extensions/formautofill/ProfileStorage.jsm
@@ -49,16 +49,18 @@ const {classes: Cc, interfaces: Ci, util
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://gre/modules/Services.jsm");
 Cu.import("resource://gre/modules/Task.jsm");
 
 Cu.import("resource://formautofill/FormAutofillUtils.jsm");
 
 XPCOMUtils.defineLazyModuleGetter(this, "JSONFile",
                                   "resource://gre/modules/JSONFile.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "FormAutofillNameUtils",
+                                  "resource://formautofill/FormAutofillNameUtils.jsm");
 
 XPCOMUtils.defineLazyServiceGetter(this, "gUUIDGenerator",
                                    "@mozilla.org/uuid-generator;1",
                                    "nsIUUIDGenerator");
 
 this.log = null;
 FormAutofillUtils.defineLazyLogGetter(this, this.EXPORTED_SYMBOLS[0]);
 
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/content/nameReferences.js
@@ -0,0 +1,144 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* exported nameReferences */
+
+"use strict";
+
+// The data below is initially copied from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util.cc?rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
+var nameReferences = {
+  NAME_PREFIXES: [
+    "1lt",
+    "1st",
+    "2lt",
+    "2nd",
+    "3rd",
+    "admiral",
+    "capt",
+    "captain",
+    "col",
+    "cpt",
+    "dr",
+    "gen",
+    "general",
+    "lcdr",
+    "lt",
+    "ltc",
+    "ltg",
+    "ltjg",
+    "maj",
+    "major",
+    "mg",
+    "mr",
+    "mrs",
+    "ms",
+    "pastor",
+    "prof",
+    "rep",
+    "reverend",
+    "rev",
+    "sen",
+    "st",
+  ],
+
+  NAME_SUFFIXES: [
+    "b.a",
+    "ba",
+    "d.d.s",
+    "dds",
+    "i",
+    "ii",
+    "iii",
+    "iv",
+    "ix",
+    "jr",
+    "m.a",
+    "m.d",
+    "ma",
+    "md",
+    "ms",
+    "ph.d",
+    "phd",
+    "sr",
+    "v",
+    "vi",
+    "vii",
+    "viii",
+    "x",
+  ],
+
+  FAMILY_NAME_PREFIXES: [
+    "d'",
+    "de",
+    "del",
+    "der",
+    "di",
+    "la",
+    "le",
+    "mc",
+    "san",
+    "st",
+    "ter",
+    "van",
+    "von",
+  ],
+
+  // The common and non-ambiguous CJK surnames (last names) that have more than
+  // one character.
+  COMMON_CJK_MULTI_CHAR_SURNAMES: [
+    // Korean, taken from the list of surnames:
+    // https://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EC%84%B1%EC%94%A8_%EB%AA%A9%EB%A1%9D
+    "남궁",
+    "사공",
+    "서문",
+    "선우",
+    "제갈",
+    "황보",
+    "독고",
+    "망절",
+
+    // Chinese, taken from the top 10 Chinese 2-character surnames:
+    // https://zh.wikipedia.org/wiki/%E8%A4%87%E5%A7%93#.E5.B8.B8.E8.A6.8B.E7.9A.84.E8.A4.87.E5.A7.93
+    // Simplified Chinese (mostly mainland China)
+    "欧阳",
+    "令狐",
+    "皇甫",
+    "上官",
+    "司徒",
+    "诸葛",
+    "司马",
+    "宇文",
+    "呼延",
+    "端木",
+    // Traditional Chinese (mostly Taiwan)
+    "張簡",
+    "歐陽",
+    "諸葛",
+    "申屠",
+    "尉遲",
+    "司馬",
+    "軒轅",
+    "夏侯",
+  ],
+
+  // All Korean surnames that have more than one character, even the
+  // rare/ambiguous ones.
+  KOREAN_MULTI_CHAR_SURNAMES: [
+    "강전",
+    "남궁",
+    "독고",
+    "동방",
+    "망절",
+    "사공",
+    "서문",
+    "선우",
+    "소봉",
+    "어금",
+    "장곡",
+    "제갈",
+    "황목",
+    "황보",
+  ],
+};
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/test/unit/test_isCJKName.js
@@ -0,0 +1,76 @@
+/**
+ * Tests the "isCJKName" function of FormAutofillNameUtils object.
+ */
+
+"use strict";
+
+Cu.import("resource://gre/modules/Task.jsm");
+Cu.import("resource://formautofill/FormAutofillNameUtils.jsm");
+
+// Test cases is initially copied from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util_unittest.cc
+const TESTCASES = [
+  {
+    // Non-CJK language with only ASCII characters.
+    fullName: "Homer Jay Simpson",
+    expectedResult: false,
+  },
+  {
+    // Non-CJK language with some ASCII characters.
+    fullName: "Éloïse Paré",
+    expectedResult: false,
+  },
+  {
+    // Non-CJK language with no ASCII characters.
+    fullName: "Σωκράτης",
+    expectedResult: false,
+  },
+  {
+    // (Simplified) Chinese name, Unihan.
+    fullName: "刘翔",
+    expectedResult: true,
+  },
+  {
+    // (Simplified) Chinese name, Unihan, with an ASCII space.
+    fullName: "成 龙",
+    expectedResult: true,
+  },
+  {
+    // Korean name, Hangul.
+    fullName: "송지효",
+    expectedResult: true,
+  },
+  {
+    // Korean name, Hangul, with an 'IDEOGRAPHIC SPACE' (U+3000).
+    fullName: "김 종국",
+    expectedResult: true,
+  },
+  {
+    // Japanese name, Unihan.
+    fullName: "山田貴洋",
+    expectedResult: true,
+  },
+  {
+    // Japanese name, Katakana, with a 'KATAKANA MIDDLE DOT' (U+30FB).
+    fullName: "ビル・ゲイツ",
+    expectedResult: true,
+  },
+  {
+    // Japanese name, Katakana, with a 'MIDDLE DOT' (U+00B7) (likely a typo).
+    fullName: "ビル·ゲイツ",
+    expectedResult: true,
+  },
+  {
+    // CJK names don't have a middle name, so a 3-part name is bogus to us.
+    fullName: "반 기 문",
+    expectedResult: false,
+  },
+];
+
+add_task(function* test_isCJKName() {
+  TESTCASES.forEach(testcase => {
+    do_print("Starting testcase: " + testcase.fullName);
+    let result = FormAutofillNameUtils._isCJKName(testcase.fullName);
+    do_check_eq(result, testcase.expectedResult);
+  });
+});
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/test/unit/test_nameUtils.js
@@ -0,0 +1,285 @@
+/**
+ * Tests FormAutofillNameUtils object.
+ */
+
+"use strict";
+
+Cu.import("resource://gre/modules/Task.jsm");
+Cu.import("resource://formautofill/FormAutofillNameUtils.jsm");
+
+// Test cases initially copied from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util_unittest.cc
+const TESTCASES = [
+  {
+    description: "Full name including given, middle and family names",
+    fullName: "Homer Jay Simpson",
+    nameParts: {
+      given: "Homer",
+      middle: "Jay",
+      family: "Simpson",
+    },
+  },
+  {
+    description: "No middle name",
+    fullName: "Moe Szyslak",
+    nameParts: {
+      given: "Moe",
+      middle: "",
+      family: "Szyslak",
+    },
+  },
+  {
+    description: "Common name prefixes removed",
+    fullName: "Reverend Timothy Lovejoy",
+    nameParts: {
+      given: "Timothy",
+      middle: "",
+      family: "Lovejoy",
+    },
+    expectedFullName: "Timothy Lovejoy",
+  },
+  {
+    description: "Common name suffixes removed",
+    fullName: "John Frink Phd",
+    nameParts: {
+      given: "John",
+      middle: "",
+      family: "Frink",
+    },
+    expectedFullName: "John Frink",
+  },
+  {
+    description: "Exception to the name suffix removal",
+    fullName: "John Ma",
+    nameParts: {
+      given: "John",
+      middle: "",
+      family: "Ma",
+    },
+  },
+  {
+    description: "Common family name prefixes not considered a middle name",
+    fullName: "Milhouse Van Houten",
+    nameParts: {
+      given: "Milhouse",
+      middle: "",
+      family: "Van Houten",
+    },
+  },
+
+  // CJK names have reverse order (surname goes first, given name goes second).
+  {
+    description: "Chinese name, Unihan",
+    fullName: "孫 德明",
+    nameParts: {
+      given: "德明",
+      middle: "",
+      family: "孫",
+    },
+    expectedFullName: "孫德明",
+  },
+  {
+    description: "Chinese name, Unihan, \"IDEOGRAPHIC SPACE\"",
+    fullName: "孫 德明",
+    nameParts: {
+      given: "德明",
+      middle: "",
+      family: "孫",
+    },
+    expectedFullName: "孫德明",
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "홍 길동",
+    nameParts: {
+      given: "길동",
+      middle: "",
+      family: "홍",
+    },
+    expectedFullName: "홍길동",
+  },
+  {
+    description: "Japanese name, Unihan",
+    fullName: "山田 貴洋",
+    nameParts: {
+      given: "貴洋",
+      middle: "",
+      family: "山田",
+    },
+    expectedFullName: "山田貴洋",
+  },
+
+  // In Japanese, foreign names use 'KATAKANA MIDDLE DOT' (U+30FB) as a
+  // separator. There is no consensus for the ordering. For now, we use the same
+  // ordering as regular Japanese names ("last・first").
+  {
+    description: "Foreign name in Japanese, Katakana",
+    fullName: "ゲイツ・ビル",
+    nameParts: {
+      given: "ビル",
+      middle: "",
+      family: "ゲイツ",
+    },
+    expectedFullName: "ゲイツビル",
+  },
+
+  // 'KATAKANA MIDDLE DOT' is occasionally typoed as 'MIDDLE DOT' (U+00B7).
+  {
+    description: "Foreign name in Japanese, Katakana",
+    fullName: "ゲイツ·ビル",
+    nameParts: {
+      given: "ビル",
+      middle: "",
+      family: "ゲイツ",
+    },
+    expectedFullName: "ゲイツビル",
+  },
+
+  // CJK names don't usually have a space in the middle, but most of the time,
+  // the surname is only one character (in Chinese & Korean).
+  {
+    description: "Korean name, Hangul",
+    fullName: "최성훈",
+    nameParts: {
+      given: "성훈",
+      middle: "",
+      family: "최",
+    },
+  },
+  {
+    description: "(Simplified) Chinese name, Unihan",
+    fullName: "刘翔",
+    nameParts: {
+      given: "翔",
+      middle: "",
+      family: "刘",
+    },
+  },
+  {
+    description: "(Traditional) Chinese name, Unihan",
+    fullName: "劉翔",
+    nameParts: {
+      given: "翔",
+      middle: "",
+      family: "劉",
+    },
+  },
+
+  // There are a few exceptions. Occasionally, the surname has two characters.
+  {
+    description: "Korean name, Hangul",
+    fullName: "남궁도",
+    nameParts: {
+      given: "도",
+      middle: "",
+      family: "남궁",
+    },
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "황보혜정",
+    nameParts: {
+      given: "혜정",
+      middle: "",
+      family: "황보",
+    },
+  },
+  {
+    description: "(Traditional) Chinese name, Unihan",
+    fullName: "歐陽靖",
+    nameParts: {
+      given: "靖",
+      middle: "",
+      family: "歐陽",
+    },
+  },
+
+  // In Korean, some 2-character surnames are rare/ambiguous, like "강전": "강"
+  // is a common surname, and "전" can be part of a given name. In those cases,
+  // we assume it's 1/2 for 3-character names, or 2/2 for 4-character names.
+  {
+    description: "Korean name, Hangul",
+    fullName: "강전희",
+    nameParts: {
+      given: "전희",
+      middle: "",
+      family: "강",
+    },
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "황목치승",
+    nameParts: {
+      given: "치승",
+      middle: "",
+      family: "황목",
+    },
+  },
+
+  // It occasionally happens that a full name is 2 characters, 1/1.
+  {
+    description: "Korean name, Hangul",
+    fullName: "이도",
+    nameParts: {
+      given: "도",
+      middle: "",
+      family: "이",
+    },
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "孫文",
+    nameParts: {
+      given: "文",
+      middle: "",
+      family: "孫",
+    },
+  },
+
+  // These are no CJK names for us, they're just bogus.
+  {
+    description: "Bogus",
+    fullName: "Homer シンプソン",
+    nameParts: {
+      given: "Homer",
+      middle: "",
+      family: "シンプソン",
+    },
+  },
+  {
+    description: "Bogus",
+    fullName: "ホーマー Simpson",
+    nameParts: {
+      given: "ホーマー",
+      middle: "",
+      family: "Simpson",
+    },
+  },
+  {
+    description: "CJK has a middle-name, too unusual",
+    fullName: "반 기 문",
+    nameParts: {
+      given: "반",
+      middle: "기",
+      family: "문",
+    },
+  },
+];
+
+add_task(function* test_splitName() {
+  TESTCASES.forEach(testcase => {
+    if (testcase.fullName) {
+      do_print("Starting testcase: " + testcase.description);
+      let nameParts = FormAutofillNameUtils.splitName(testcase.fullName);
+      Assert.deepEqual(nameParts, testcase.nameParts);
+    }
+  });
+});
+
+add_task(function* test_joinName() {
+  TESTCASES.forEach(testcase => {
+    do_print("Starting testcase: " + testcase.description);
+    let name = FormAutofillNameUtils.joinNameParts(testcase.nameParts);
+    do_check_eq(name, testcase.expectedFullName || testcase.fullName);
+  });
+});
--- a/browser/extensions/formautofill/test/unit/xpcshell.ini
+++ b/browser/extensions/formautofill/test/unit/xpcshell.ini
@@ -3,14 +3,15 @@ firefox-appdir = browser
 head = head.js
 support-files =
 
 [test_autofillFormFields.js]
 [test_collectFormFields.js]
 [test_enabledStatus.js]
 [test_findLabelElements.js]
 [test_getFormInputDetails.js]
+[test_isCJKName.js]
 [test_markAsAutofillField.js]
+[test_nameUtils.js]
 [test_onFormSubmitted.js]
 [test_profileAutocompleteResult.js]
 [test_profileStorage.js]
 [test_savedFieldNames.js]
-
--- a/toolkit/content/license.html
+++ b/toolkit/content/license.html
@@ -2719,16 +2719,18 @@ WITH THE USE OR PERFORMANCE OF THIS SOFT
 
 
     <hr>
 
     <h1><a id="chromium"></a>Chromium License</h1>
 
     <p>This license applies to parts of the code in:</p>
     <ul>
+        <li><span class="path">browser/extensions/formautofill/content/nameReferences.js</span></li>
+        <li><span class="path">browser/extensions/formautofill/FormAutofillNameUtils.jsm</span></li>
         <li><span class="path">browser/extensions/mortar/host/common/opengles2-utils.jsm</span></li>
         <li><span class="path">editor/libeditor/EditorEventListener.cpp</span></li>
         <li><span class="path">security/sandbox/</span></li>
         <li><span class="path">widget/cocoa/GfxInfo.mm</span></li>
     </ul>
     <p>and also some files in these directories:</p>
     <ul>
         <li><span class="path">browser/extensions/mortar/ppapi/</span></li>