Bug 1423204 - Improve the address line parser to detect address-line* more correctly. r=lchang
authorSean Lee <selee@mozilla.com>
Sun, 07 Jan 2018 15:42:41 +0800
changeset 449926 7560a29b0f9755c158996781e2e73b3b3a3d49ee
parent 449919 e14fbde5349138be557996ca3f55c687ebcfb7be
child 449927 7a30d386b21a20eca4852969885413f606bdbd99
push id8527
push userCallek@gmail.com
push dateThu, 11 Jan 2018 21:05:50 +0000
treeherdermozilla-beta@95342d212a7a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslchang
bugs1423204
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1423204 - Improve the address line parser to detect address-line* more correctly. r=lchang MozReview-Commit-ID: JHqKSnBZsVV
browser/extensions/formautofill/FormAutofillHeuristics.jsm
browser/extensions/formautofill/content/heuristicsRegexp.js
browser/extensions/formautofill/test/fixtures/autocomplete_basic.html
browser/extensions/formautofill/test/unit/heuristics/test_basic.js
browser/extensions/formautofill/test/unit/test_getAdaptedProfiles.js
--- a/browser/extensions/formautofill/FormAutofillHeuristics.jsm
+++ b/browser/extensions/formautofill/FormAutofillHeuristics.jsm
@@ -529,27 +529,66 @@ this.FormAutofillHeuristics = {
    * @param {FieldScanner} fieldScanner
    *        The current parsing status for all elements
    * @returns {boolean}
    *          Return true if there is any field can be recognized in the parser,
    *          otherwise false.
    */
   _parseAddressFields(fieldScanner) {
     let parsedFields = false;
-    let addressLines = ["address-line1", "address-line2", "address-line3"];
-    for (let i = 0; !fieldScanner.parsingFinished && i < addressLines.length; i++) {
+    const addressLines = ["address-line1", "address-line2", "address-line3"];
+
+    // TODO: These address-line* regexps are for the lines with numbers, and
+    // they are the subset of the regexps in `heuristicsRegexp.js`. We have to
+    // find a better way to make them consistent.
+    const addressLineRegexps = {
+      "address-line1": new RegExp(
+        "address[_-]?line(1|one)|address1|addr1" +
+        "|addrline1|address_1" + // Extra rules by Firefox
+        "|indirizzo1" + // it-IT
+        "|住所1" + // ja-JP
+        "|地址1" + // zh-CN
+        "|주소.?1", // ko-KR
+        "iu"
+      ),
+      "address-line2": new RegExp(
+        "address[_-]?line(2|two)|address2|addr2" +
+        "|addrline2|address_2" + // Extra rules by Firefox
+        "|indirizzo2" + // it-IT
+        "|住所2" + // ja-JP
+        "|地址2" + // zh-CN
+        "|주소.?2", // ko-KR
+        "iu"
+      ),
+      "address-line3": new RegExp(
+        "address[_-]?line(3|three)|address3|addr3" +
+        "|addrline3|address_3" + // Extra rules by Firefox
+        "|indirizzo3" + // it-IT
+        "|住所3" + // ja-JP
+        "|地址3" + // zh-CN
+        "|주소.?3", // ko-KR
+        "iu"
+      ),
+    };
+    while (!fieldScanner.parsingFinished) {
       let detail = fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex);
-      if (!detail || !addressLines.includes(detail.fieldName)) {
-        // When the field is not related to any address-line[1-3] fields, it
-        // means the parsing process can be terminated.
+      if (!detail || !addressLines.includes(detail.fieldName) || detail._reason == "autocomplete") {
+        // When the field is not related to any address-line[1-3] fields or
+        // determined by autocomplete attr, it means the parsing process can be
+        // terminated.
         break;
       }
-      fieldScanner.updateFieldName(fieldScanner.parsingIndex, addressLines[i]);
+      const elem = detail.elementWeakRef.get();
+      for (let regexp of Object.keys(addressLineRegexps)) {
+        if (this._matchRegexp(elem, addressLineRegexps[regexp])) {
+          fieldScanner.updateFieldName(fieldScanner.parsingIndex, regexp);
+          parsedFields = true;
+        }
+      }
       fieldScanner.parsingIndex++;
-      parsedFields = true;
     }
 
     return parsedFields;
   },
 
   /**
    * Try to look for expiration date fields and revise the field names if needed.
    *
--- a/browser/extensions/formautofill/content/heuristicsRegexp.js
+++ b/browser/extensions/formautofill/content/heuristicsRegexp.js
@@ -61,44 +61,47 @@ var HeuristicsRegExp = {
       "iu"
     ),
     "street-address": new RegExp(
       "streetaddress|street-address",
       "iu"
     ),
     "address-line1": new RegExp(
       "^address$|address[_-]?line(one)?|address1|addr1|street" +
+      "|addrline1|address_1" + // Extra rules by Firefox
       "|(?:shipping|billing)address$" +
       "|strasse|straße|hausnummer|housenumber" + // de-DE
       "|house.?name" + // en-GB
       "|direccion|dirección" + // es
       "|adresse" + // fr-FR
       "|indirizzo" + // it-IT
       "|^住所$|住所1" + // ja-JP
       "|morada|endereço" + // pt-BR, pt-PT
       "|Адрес" + // ru
       "|地址" + // zh-CN
       "|^주소.?$|주소.?1", // ko-KR
       "iu"
     ),
     "address-line2": new RegExp(
       "address[_-]?line(2|two)|address2|addr2|street|suite|unit" +
+      "|addrline2|address_2" + // Extra rules by Firefox
       "|adresszusatz|ergänzende.?angaben" + // de-DE
       "|direccion2|colonia|adicional" + // es
       "|addresssuppl|complementnom|appartement" + // fr-FR
       "|indirizzo2" + // it-IT
       "|住所2" + // ja-JP
       "|complemento|addrcomplement" + // pt-BR, pt-PT
       "|Улица" + // ru
       "|地址2" + // zh-CN
       "|주소.?2", // ko-KR
       "iu"
     ),
     "address-line3": new RegExp(
       "address[_-]?line(3|three)|address3|addr3|street|suite|unit" +
+      "|addrline3|address_3" + // Extra rules by Firefox
       "|adresszusatz|ergänzende.?angaben" + // de-DE
       "|direccion3|colonia|adicional" + // es
       "|addresssuppl|complementnom|appartement" + // fr-FR
       "|indirizzo3" + // it-IT
       "|住所3" + // ja-JP
       "|complemento|addrcomplement" + // pt-BR, pt-PT
       "|Улица" + // ru
       "|地址3" + // zh-CN
--- a/browser/extensions/formautofill/test/fixtures/autocomplete_basic.html
+++ b/browser/extensions/formautofill/test/fixtures/autocomplete_basic.html
@@ -35,11 +35,19 @@
     <p><label>cc-name <input type="text" id="B_cc-name" autocomplete="cc-name" /></label></p>
     <p><label>cc-exp-month <input type="text" id="B_cc-exp-month" autocomplete="cc-exp-month" /></label></p>
     <p><label>cc-exp-year <input type="text" id="B_cc-exp-year" autocomplete="cc-exp-year" /></label></p>
     <hr>
     <p><input type="submit" /></p>
     <p><button type="reset">Reset</button></p>
   </form>
 
+  <form id="formC">
+    <p><label><input type="text" name="someprefixAddrLine1" /></label></p>
+    <p><label>City: <input type="text" name="address-level2" /></label></p>
+    <p><label><input type="text" name="someprefixAddrLine2" /></label></p>
+    <p><label>Organization: <input type="text" name="organization" /></label></p>
+    <p><label><input type="text" name="someprefixAddrLine3" /></label></p>
+  </form>
+
 </body>
 </html>
 
--- a/browser/extensions/formautofill/test/unit/heuristics/test_basic.js
+++ b/browser/extensions/formautofill/test/unit/heuristics/test_basic.js
@@ -27,12 +27,19 @@ runHeuristicsTest([
         {"section": "", "addressType": "", "contactType": "", "fieldName": "country"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "tel"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "email"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-number"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-name"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp-month"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp-year"},
       ]],
+      [[
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-line1"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-level2"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-line2"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "organization"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-line3"},
+      ]],
     ],
   },
 ], "../../fixtures/");
 
--- a/browser/extensions/formautofill/test/unit/test_getAdaptedProfiles.js
+++ b/browser/extensions/formautofill/test/unit/test_getAdaptedProfiles.js
@@ -115,29 +115,56 @@ const TESTCASES = [
       "address-line3": "line3",
       "address-level1": "CA",
       "country": "US",
       "tel": "+19876543210",
       "tel-national": "9876543210",
     }],
   },
   {
-    description: "Address form with street-address, address-line[1, 3]",
+    description: "Address form with street-address, address-line[1, 3]" +
+                 ", determined by autocomplete attr",
     document: `<form>
                <input id="street-addr" autocomplete="street-address">
                <input id="line1" autocomplete="address-line1">
                <input id="line3" autocomplete="address-line3">
                </form>`,
     profileData: [Object.assign({}, DEFAULT_ADDRESS_RECORD)],
     expectedResult: [{
       "guid": "123",
       "street-address": "2 Harrison St line2 line3",
       "-moz-street-address-one-line": "2 Harrison St line2 line3",
-      "address-line1": "2 Harrison St",
-      "address-line2": "line2 line3",
+      // Since the form is missing address-line2 field, the value of
+      // address-line1 should contain line2 value as well.
+      "address-line1": "2 Harrison St line2",
+      "address-line2": "line2",
+      "address-line3": "line3",
+      "address-level1": "CA",
+      "country": "US",
+      "tel": "+19876543210",
+      "tel-national": "9876543210",
+    }],
+  },
+  {
+    description: "Address form with street-address, address-line[1, 3]" +
+                 ", determined by heuristics",
+    document: `<form>
+               <input id="street-address">
+               <input id="address-line1">
+               <input id="address-line3">
+               </form>`,
+    profileData: [Object.assign({}, DEFAULT_ADDRESS_RECORD)],
+    expectedResult: [{
+      "guid": "123",
+      "street-address": "2 Harrison St line2 line3",
+      "-moz-street-address-one-line": "2 Harrison St line2 line3",
+      // Since the form is missing address-line2 field, the value of
+      // address-line1 should contain line2 value as well.
+      "address-line1": "2 Harrison St line2",
+      "address-line2": "line2",
       "address-line3": "line3",
       "address-level1": "CA",
       "country": "US",
       "tel": "+19876543210",
       "tel-national": "9876543210",
     }],
   },
   {