Bug 1040721 - The search service should provide details for parsing search result URLs. r=gavin
authorPaolo Amadini <paolo.mozmail@amadzone.org>
Thu, 31 Jul 2014 15:57:02 +0100
changeset 197037 482533cb9495fe0448baeb5054d69c003f96a95a
parent 197036 65ae48644b2e1f61c6db50cb1a5c337379dbc9a0
child 197038 2c54d1bdd33962cc8473ab36b68dcfca77db1238
push id7993
push userpaolo.mozmail@amadzone.org
push dateThu, 31 Jul 2014 15:42:31 +0000
treeherderfx-team@482533cb9495 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgavin
bugs1040721
milestone34.0a1
Bug 1040721 - The search service should provide details for parsing search result URLs. r=gavin
browser/components/search/test/browser_google.js
netwerk/base/public/nsIBrowserSearchService.idl
toolkit/components/search/SearchStaticData.jsm
toolkit/components/search/moz.build
toolkit/components/search/nsSearchService.js
toolkit/components/search/tests/xpcshell/data/engine-fr.xml
toolkit/components/search/tests/xpcshell/test_SearchStaticData.js
toolkit/components/search/tests/xpcshell/test_parseSubmissionURL.js
toolkit/components/search/tests/xpcshell/xpcshell.ini
--- a/browser/components/search/test/browser_google.js
+++ b/browser/components/search/test/browser_google.js
@@ -69,16 +69,23 @@ function test() {
   is(url, base + "&channel=np&source=hp", "Check homepage search URL for 'foo'");
   url = engine.getSubmission("foo", null, "newtab").uri.spec;
   is(url, base + "&channel=nts", "Check newtab search URL for 'foo'");
 
   // Check search suggestion URL.
   url = engine.getSubmission("foo", "application/x-suggestions+json").uri.spec;
   is(url, "https://www.google.com/complete/search?client=firefox&q=foo", "Check search suggestion URL for 'foo'");
 
+  // Check result parsing and alternate domains.
+  let alternateBase = base.replace("www.google.com", "www.google.fr");
+  is(Services.search.parseSubmissionURL(base).terms, "foo",
+     "Check result parsing");
+  is(Services.search.parseSubmissionURL(alternateBase).terms, "foo",
+     "Check alternate domain");
+
   // Check all other engine properties.
   const EXPECTED_ENGINE = {
     name: "Google",
     alias: null,
     description: "Google Search",
     searchForm: "https://www.google.com/",
     type: Ci.nsISearchEngine.TYPE_MOZSEARCH,
     hidden: false,
--- a/netwerk/base/public/nsIBrowserSearchService.idl
+++ b/netwerk/base/public/nsIBrowserSearchService.idl
@@ -179,16 +179,33 @@ interface nsISearchEngine : nsISupports
    * @param  responseType [optional]
    *         The MIME type to get resultDomain for.  Defaults to "text/html".
    *
    * @return the resultDomain for the given responseType.
    */
   AString getResultDomain([optional] in AString responseType);
 };
 
+[scriptable, uuid(856a31ff-b451-4101-b12e-ff399485ac8a)]
+interface nsISearchParseSubmissionResult : nsISupports
+{
+  /**
+   * The search engine associated with the URL passed in to
+   * nsISearchEngine::parseSubmissionURL, or null if the URL does not represent
+   * a search submission.
+   */
+  readonly attribute nsISearchEngine engine;
+
+  /**
+   * String containing the sought terms.  This can be an empty string in case no
+   * terms were specified or the URL does not represent a search submission.
+   */
+  readonly attribute AString terms;
+};
+
 [scriptable, uuid(9fc39136-f08b-46d3-b232-96f4b7b0e235)]
 interface nsISearchInstallCallback : nsISupports
 {
   const unsigned long ERROR_UNKNOWN_FAILURE = 0x1;
   const unsigned long ERROR_DUPLICATE_ENGINE = 0x2;
 
   /**
    * Called to indicate that the engine addition process succeeded.
@@ -404,16 +421,32 @@ interface nsIBrowserSearchService : nsIS
    */
   attribute nsISearchEngine defaultEngine;
 
   /**
    * The currently active search engine. May be null if there are no visible
    * search engines.
    */
   attribute nsISearchEngine currentEngine;
+
+  /**
+   * Determines if the provided URL represents results from a search engine, and
+   * provides details about the match.
+   *
+   * The lookup mechanism checks whether the domain name and path of the
+   * provided HTTP or HTTPS URL matches one of the known values for the visible
+   * search engines.  The match does not depend on which of the schemes is used.
+   * The expected URI parameter for the search terms must exist in the query
+   * string, but other parameters are ignored.
+   *
+   * @param url
+   *        String containing the URL to parse, for example
+   *        "https://www.google.com/search?q=terms".
+   */
+  nsISearchParseSubmissionResult parseSubmissionURL(in AString url);
 };
 
 %{ C++
 /**
  * The observer topic to listen to for actions performed on installed
  * search engines.
  */
 #define SEARCH_ENGINE_TOPIC "browser-search-engine-modified"
new file mode 100644
--- /dev/null
+++ b/toolkit/components/search/SearchStaticData.jsm
@@ -0,0 +1,43 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This module contains additional data about default search engines that is the
+ * same across all languages.  This information is defined outside of the actual
+ * search engine definition files, so that localizers don't need to update them
+ * when a change is made.
+ *
+ * This separate module is also easily overridable, in case a hotfix is needed.
+ * No high-level processing logic is applied here.
+ */
+
+"use strict";
+
+this.EXPORTED_SYMBOLS = [
+  "SearchStaticData",
+];
+
+const { classes: Cc, interfaces: Ci, utils: Cu, results: Cr } = Components;
+
+// To update this list of known alternate domains, just cut-and-paste from
+// https://www.google.com/supported_domains
+const gGoogleDomainsSource = ".google.com .google.ad .google.ae .google.com.af .google.com.ag .google.com.ai .google.al .google.am .google.co.ao .google.com.ar .google.as .google.at .google.com.au .google.az .google.ba .google.com.bd .google.be .google.bf .google.bg .google.com.bh .google.bi .google.bj .google.com.bn .google.com.bo .google.com.br .google.bs .google.bt .google.co.bw .google.by .google.com.bz .google.ca .google.cd .google.cf .google.cg .google.ch .google.ci .google.co.ck .google.cl .google.cm .google.cn .google.com.co .google.co.cr .google.com.cu .google.cv .google.com.cy .google.cz .google.de .google.dj .google.dk .google.dm .google.com.do .google.dz .google.com.ec .google.ee .google.com.eg .google.es .google.com.et .google.fi .google.com.fj .google.fm .google.fr .google.ga .google.ge .google.gg .google.com.gh .google.com.gi .google.gl .google.gm .google.gp .google.gr .google.com.gt .google.gy .google.com.hk .google.hn .google.hr .google.ht .google.hu .google.co.id .google.ie .google.co.il .google.im .google.co.in .google.iq .google.is .google.it .google.je .google.com.jm .google.jo .google.co.jp .google.co.ke .google.com.kh .google.ki .google.kg .google.co.kr .google.com.kw .google.kz .google.la .google.com.lb .google.li .google.lk .google.co.ls .google.lt .google.lu .google.lv .google.com.ly .google.co.ma .google.md .google.me .google.mg .google.mk .google.ml .google.com.mm .google.mn .google.ms .google.com.mt .google.mu .google.mv .google.mw .google.com.mx .google.com.my .google.co.mz .google.com.na .google.com.nf .google.com.ng .google.com.ni .google.ne .google.nl .google.no .google.com.np .google.nr .google.nu .google.co.nz .google.com.om .google.com.pa .google.com.pe .google.com.pg .google.com.ph .google.com.pk .google.pl .google.pn .google.com.pr .google.ps .google.pt .google.com.py .google.com.qa .google.ro .google.ru .google.rw .google.com.sa .google.com.sb .google.sc .google.se .google.com.sg .google.sh .google.si .google.sk .google.com.sl .google.sn .google.so .google.sm .google.sr .google.st .google.com.sv .google.td .google.tg .google.co.th .google.com.tj .google.tk .google.tl .google.tm .google.tn .google.to .google.com.tr .google.tt .google.com.tw .google.co.tz .google.com.ua .google.co.ug .google.co.uk .google.com.uy .google.co.uz .google.com.vc .google.co.ve .google.vg .google.co.vi .google.com.vn .google.vu .google.ws .google.rs .google.co.za .google.co.zm .google.co.zw .google.cat";
+const gGoogleDomains = gGoogleDomainsSource.split(" ").map(d => "www" + d);
+
+this.SearchStaticData = {
+  /**
+   * Returns a list of alternate domains for a given search engine domain.
+   *
+   * @param aDomain
+   *        Lowercase host name to look up. For example, if this argument is
+   *        "www.google.com" or "www.google.co.uk", the function returns the
+   *        full list of supported Google domains.
+   *
+   * @return Array containing one entry for each alternate host name, or empty
+   *         array if none is known.  The returned array should not be modified.
+   */
+  getAlternateDomains: function (aDomain) {
+    return gGoogleDomains.indexOf(aDomain) == -1 ? [] : gGoogleDomains;
+  },
+};
--- a/toolkit/components/search/moz.build
+++ b/toolkit/components/search/moz.build
@@ -14,12 +14,16 @@ EXTRA_COMPONENTS += [
 EXTRA_JS_MODULES += [
     'SearchSuggestionController.jsm',
 ]
 
 EXTRA_PP_COMPONENTS += [
     'nsSearchService.js',
 ]
 
+EXTRA_JS_MODULES += [
+    'SearchStaticData.jsm',
+]
+
 DEFINES['MOZ_DISTRIBUTION_ID'] = CONFIG['MOZ_DISTRIBUTION_ID']
 
 if CONFIG['MOZ_BUILD_APP'] == 'mobile/android':
     DEFINES['MOZ_FENNEC'] = True
--- a/toolkit/components/search/nsSearchService.js
+++ b/toolkit/components/search/nsSearchService.js
@@ -18,16 +18,22 @@ XPCOMUtils.defineLazyModuleGetter(this, 
 XPCOMUtils.defineLazyModuleGetter(this, "OS",
   "resource://gre/modules/osfile.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "Task",
   "resource://gre/modules/Task.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch",
   "resource://gre/modules/TelemetryStopwatch.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "Deprecated",
   "resource://gre/modules/Deprecated.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "SearchStaticData",
+  "resource://gre/modules/SearchStaticData.jsm");
+
+XPCOMUtils.defineLazyServiceGetter(this, "gTextToSubURI",
+                                   "@mozilla.org/intl/texttosuburi;1",
+                                   "nsITextToSubURI");
 
 // A text encoder to UTF8, used whenever we commit the
 // engine metadata to disk.
 XPCOMUtils.defineLazyGetter(this, "gEncoder",
                             function() {
                               return new TextEncoder();
                             });
 
@@ -899,16 +905,21 @@ EngineURL.prototype = {
       postData.addHeader("Content-Type", "application/x-www-form-urlencoded");
       postData.addContentLength = true;
       postData.setData(stringStream);
     }
 
     return new Submission(makeURI(url), postData);
   },
 
+  _getTermsParameterName: function SRCH_EURL__getTermsParameterName() {
+    let queryParam = this.params.find(p => p.value == USER_DEFINED);
+    return queryParam ? queryParam.name : "";
+  },
+
   _hasRelation: function SRC_EURL__hasRelation(aRel)
     this.rels.some(function(e) e == aRel.toLowerCase()),
 
   _initWithJSON: function SRC_EURL__initWithJSON(aJson, aEngine) {
     if (!aJson.params)
       return;
 
     this.rels = aJson.rels;
@@ -2454,17 +2465,17 @@ Engine.prototype = {
     if (this._file) {
       return this._file.leafName;
     }
     if (this._uri && this._uri instanceof Ci.nsIURL) {
       return this._uri.fileName;
     }
     return null;
   },
-    
+
   // The file that the plugin is loaded from is a unique identifier for it.  We
   // use this as the identifier to store data in the sqlite database
   __id: null,
   get _id() {
     if (this.__id) {
       return this.__id;
     }
 
@@ -2636,24 +2647,22 @@ Engine.prototype = {
       return null;
 
     if (!aData) {
       // Return a dummy submission object with our searchForm attribute
       return new Submission(makeURI(this.searchForm), null);
     }
 
     LOG("getSubmission: In data: \"" + aData + "\"; Purpose: \"" + aPurpose + "\"");
-    var textToSubURI = Cc["@mozilla.org/intl/texttosuburi;1"].
-                       getService(Ci.nsITextToSubURI);
     var data = "";
     try {
-      data = textToSubURI.ConvertAndEscape(this.queryCharset, aData);
+      data = gTextToSubURI.ConvertAndEscape(this.queryCharset, aData);
     } catch (ex) {
       LOG("getSubmission: Falling back to default queryCharset!");
-      data = textToSubURI.ConvertAndEscape(DEFAULT_QUERY_CHARSET, aData);
+      data = gTextToSubURI.ConvertAndEscape(DEFAULT_QUERY_CHARSET, aData);
     }
     LOG("getSubmission: Out data: \"" + data + "\"");
     return url.getSubmission(data, this, aPurpose);
   },
 
   // from nsISearchEngine
   supportsResponseType: function SRCH_ENG_supportsResponseType(type) {
     return (this._getURLOfType(type) != null);
@@ -2673,16 +2682,46 @@ Engine.prototype = {
     LOG("getResultDomain: responseType: \"" + aResponseType + "\"");
 
     let url = this._getURLOfType(aResponseType);
     if (url)
       return url.resultDomain;
     return "";
   },
 
+  /**
+   * Returns URL parsing properties used by _buildParseSubmissionMap.
+   */
+  getURLParsingInfo: function () {
+#ifdef ANDROID
+    let responseType = this._defaultMobileResponseType;
+#else
+    let responseType = URLTYPE_SEARCH_HTML;
+#endif
+
+    LOG("getURLParsingInfo: responseType: \"" + responseType + "\"");
+
+    let url = this._getURLOfType(responseType);
+    if (!url || url.method != "GET") {
+      return null;
+    }
+
+    let termsParameterName = url._getTermsParameterName();
+    if (!termsParameterName) {
+      return null;
+    }
+
+    let templateUrl = NetUtil.newURI(url.template).QueryInterface(Ci.nsIURL);
+    return {
+      mainDomain: templateUrl.host,
+      path: templateUrl.filePath.toLowerCase(),
+      termsParameterName: termsParameterName,
+    };
+  },
+
   // nsISupports
   QueryInterface: function SRCH_ENG_QI(aIID) {
     if (aIID.equals(Ci.nsISearchEngine) ||
         aIID.equals(Ci.nsISupports))
       return this;
     throw Cr.NS_ERROR_NO_INTERFACE;
   },
 
@@ -2784,16 +2823,34 @@ Submission.prototype = {
   QueryInterface: function SRCH_SUBM_QI(aIID) {
     if (aIID.equals(Ci.nsISearchSubmission) ||
         aIID.equals(Ci.nsISupports))
       return this;
     throw Cr.NS_ERROR_NO_INTERFACE;
   }
 }
 
+// nsISearchParseSubmissionResult
+function ParseSubmissionResult(aEngine, aTerms) {
+  this._engine = aEngine;
+  this._terms = aTerms;
+}
+ParseSubmissionResult.prototype = {
+  get engine() {
+    return this._engine;
+  },
+  get terms() {
+    return this._terms;
+  },
+  QueryInterface: XPCOMUtils.generateQI([Ci.nsISearchParseSubmissionResult]),
+}
+
+const gEmptyParseSubmissionResult =
+      Object.freeze(new ParseSubmissionResult(null, ""));
+
 function executeSoon(func) {
   Services.tm.mainThread.dispatch(func, Ci.nsIThread.DISPATCH_NORMAL);
 }
 
 /**
  * Check for sync initialization has completed or not.
  *
  * @param {aPromise} A promise.
@@ -3194,17 +3251,17 @@ SearchService.prototype = {
     // Start by clearing the initialized state, so we don't abort early.
     gInitialized = false;
 
     // Clear the engines, too, so we don't stick with the stale ones.
     this._engines = {};
     this.__sortedEngines = null;
 
     // Typically we'll re-init as a result of a pref observer,
-    // so signal to 'callers' that we're done. 
+    // so signal to 'callers' that we're done.
     return this._asyncLoadEngines()
                .then(() => {
                        Services.obs.notifyObservers(null, SEARCH_SERVICE_TOPIC, "reinit-complete");
                        gInitialized = true;
                      },
                      (err) => {
                        LOG("Reinit failed: " + err);
                        Services.obs.notifyObservers(null, SEARCH_SERVICE_TOPIC, "reinit-failed");
@@ -3308,17 +3365,17 @@ SearchService.prototype = {
       }
       notifyAction(aEngine, SEARCH_ENGINE_ADDED);
     }
 
     if (aEngine._hasUpdates) {
       // Schedule the engine's next update, if it isn't already.
       if (!engineMetadataService.getAttr(aEngine, "updateexpir"))
         engineUpdateService.scheduleNextUpdate(aEngine);
-  
+
       // We need to save the engine's _dataType, if this is the first time the
       // engine is added to the dataStore, since ._dataType isn't persisted
       // and will change on the next startup (since the engine will then be
       // XML). We need this so that we know how to load any future updates from
       // this engine.
       if (!engineMetadataService.getAttr(aEngine, "updatedatatype"))
         engineMetadataService.setAttr(aEngine, "updatedatatype",
                                       aEngine._dataType);
@@ -3528,17 +3585,17 @@ SearchService.prototype = {
       } catch (ex) {
         LOG("_findJAREngines: failed to retrieve list.txt from " + listURL + ": " + ex);
 
         return;
       }
 
       names.forEach(function (n) uris.push(root + n + ".xml"));
     });
-    
+
     return [chromeFiles, uris];
   },
 
   /**
    * Loads jar engines asynchronously.
    *
    * @returns {Promise} A promise, resolved successfully if finding jar engines
    * succeeds.
@@ -3637,17 +3694,17 @@ SearchService.prototype = {
     var engine;
 
     // If the user has specified a custom engine order, read the order
     // information from the engineMetadataService instead of the default
     // prefs.
     if (getBoolPref(BROWSER_SEARCH_PREF + "useDBForOrder", false)) {
       LOG("_buildSortedEngineList: using db for order");
 
-      // Flag to keep track of whether or not we need to call _saveSortedEngineList. 
+      // Flag to keep track of whether or not we need to call _saveSortedEngineList.
       let needToSaveEngineList = false;
 
       for each (engine in this._engines) {
         var orderNumber = engineMetadataService.getAttr(engine, "order");
 
         // Since the DB isn't regularly cleared, and engine files may disappear
         // without us knowing, we may already have an engine in this slot. If
         // that happens, we just skip it - it will be added later on as an
@@ -3695,17 +3752,17 @@ SearchService.prototype = {
       while (true) {
         engineName = getLocalizedPref(BROWSER_SEARCH_PREF + "order." + (++i));
         if (!engineName)
           break;
 
         engine = this._engines[engineName];
         if (!engine || engine.name in addedEngines)
           continue;
-        
+
         this.__sortedEngines.push(engine);
         addedEngines[engine.name] = engine;
       }
     }
 
     // Array for the remaining engines, alphabetically sorted
     var alphaEngines = [];
 
@@ -3884,18 +3941,16 @@ SearchService.prototype = {
       FAIL("Invalid template passed to addEngineWithDetails!");
     if (this._engines[aName])
       FAIL("An engine with that name already exists!", Cr.NS_ERROR_FILE_ALREADY_EXISTS);
 
     var engine = new Engine(getSanitizedFile(aName), SEARCH_DATA_XML, false);
     engine._initFromMetadata(aName, aIconURL, aAlias, aDescription,
                              aMethod, aTemplate);
     this._addEngineToStore(engine);
-    this.batchTask.disarm();
-    this.batchTask.arm();
   },
 
   addEngine: function SRCH_SVC_addEngine(aEngineURL, aDataType, aIconURL,
                                          aConfirm, aCallback) {
     LOG("addEngine: Adding \"" + aEngineURL + "\".");
     this._ensureInitialized();
     try {
       var uri = makeURI(aEngineURL);
@@ -3995,17 +4050,17 @@ SearchService.prototype = {
     if (currentIndex == -1)
       FAIL("moveEngine: Can't find engine to move!", Cr.NS_ERROR_UNEXPECTED);
 
     // Our callers only take into account non-hidden engines when calculating
     // aNewIndex, but we need to move it in the array of all engines, so we
     // need to adjust aNewIndex accordingly. To do this, we count the number
     // of hidden engines in the list before the engine that we're taking the
     // place of. We do this by first finding newIndexEngine (the engine that
-    // we were supposed to replace) and then iterating through the complete 
+    // we were supposed to replace) and then iterating through the complete
     // engine list until we reach it, increasing aNewIndex for each hidden
     // engine we find on our way there.
     //
     // This could be further simplified by having our caller pass in
     // newIndexEngine directly instead of aNewIndex.
     var newIndexEngine = this._getSortedEngines(false)[aNewIndex];
     if (!newIndexEngine)
       FAIL("moveEngine: Can't find engine to replace!", Cr.NS_ERROR_UNEXPECTED);
@@ -4136,37 +4191,185 @@ SearchService.prototype = {
     else {
       setLocalizedPref(currentEnginePref, this._currentEngine.name);
     }
     this._changingCurrentEngine = false;
 
     notifyAction(this._currentEngine, SEARCH_ENGINE_CURRENT);
   },
 
+  /**
+   * This map is built lazily after the available search engines change.  It
+   * allows quick parsing of an URL representing a search submission into the
+   * search engine name and original terms.
+   *
+   * The keys are strings containing the domain name and lowercase path of the
+   * engine submission, for example "www.google.com/search".
+   *
+   * The values are objects with these properties:
+   * {
+   *   engine: The associated nsISearchEngine.
+   *   termsParameterName: Name of the URL parameter containing the search
+   *                       terms, for example "q".
+   * }
+   */
+  _parseSubmissionMap: null,
+
+  _buildParseSubmissionMap: function SRCH_SVC__buildParseSubmissionMap() {
+    LOG("_buildParseSubmissionMap");
+    this._parseSubmissionMap = new Map();
+
+    // Used only while building the map, indicates which entries do not refer to
+    // the main domain of the engine but to an alternate domain, for example
+    // "www.google.fr" for the "www.google.com" search engine.
+    let keysOfAlternates = new Set();
+
+    for (let engine of this._sortedEngines) {
+      LOG("Processing engine: " + engine.name);
+
+      if (engine.hidden) {
+        LOG("Engine is hidden.");
+        continue;
+      }
+
+      let urlParsingInfo = engine.getURLParsingInfo();
+      if (!urlParsingInfo) {
+        LOG("Engine does not support URL parsing.");
+        continue;
+      }
+
+      // Store the same object on each matching map key, as an optimization.
+      let mapValueForEngine = {
+        engine: engine,
+        termsParameterName: urlParsingInfo.termsParameterName,
+      };
+
+      let processDomain = (domain, isAlternate) => {
+        let key = domain + urlParsingInfo.path;
+
+        // Apply the logic for which main domains take priority over alternate
+        // domains, even if they are found later in the ordered engine list.
+        let existingEntry = this._parseSubmissionMap.get(key);
+        if (!existingEntry) {
+          LOG("Adding new entry: " + key);
+          if (isAlternate) {
+            keysOfAlternates.add(key);
+          }
+        } else if (!isAlternate && keysOfAlternates.has(key)) {
+          LOG("Overriding alternate entry: " + key +
+              " (" + existingEntry.engine.name + ")");
+          keysOfAlternates.delete(key);
+        } else {
+          LOG("Keeping existing entry: " + key +
+              " (" + existingEntry.engine.name + ")");
+          return;
+        }
+
+        this._parseSubmissionMap.set(key, mapValueForEngine);
+      };
+
+      processDomain(urlParsingInfo.mainDomain, false);
+      SearchStaticData.getAlternateDomains(urlParsingInfo.mainDomain)
+                      .forEach(d => processDomain(d, true));
+    }
+  },
+
+  parseSubmissionURL: function SRCH_SVC_parseSubmissionURL(aURL) {
+    this._ensureInitialized();
+    LOG("parseSubmissionURL: Parsing \"" + aURL + "\".");
+
+    if (!this._parseSubmissionMap) {
+      this._buildParseSubmissionMap();
+    }
+
+    // Extract the elements of the provided URL first.
+    let soughtKey, soughtQuery;
+    try {
+      let soughtUrl = NetUtil.newURI(aURL).QueryInterface(Ci.nsIURL);
+
+      // Exclude any URL that is not HTTP or HTTPS from the beginning.
+      if (soughtUrl.scheme != "http" && soughtUrl.scheme != "https") {
+        LOG("The URL scheme is not HTTP or HTTPS.");
+        return gEmptyParseSubmissionResult;
+      }
+
+      // Reading these URL properties may fail and raise an exception.
+      soughtKey = soughtUrl.host + soughtUrl.filePath.toLowerCase();
+      soughtQuery = soughtUrl.query;
+    } catch (ex) {
+      // Errors while parsing the URL or accessing the properties are not fatal.
+      LOG("The value does not look like a structured URL.");
+      return gEmptyParseSubmissionResult;
+    }
+
+    // Look up the domain and path in the map to identify the search engine.
+    let mapEntry = this._parseSubmissionMap.get(soughtKey);
+    if (!mapEntry) {
+      LOG("No engine associated with domain and path: " + soughtKey);
+      return gEmptyParseSubmissionResult;
+    }
+
+    // Extract the search terms from the parameter, for example "caff%C3%A8"
+    // from the URL "https://www.google.com/search?q=caff%C3%A8&client=firefox".
+    let encodedTerms = null;
+    for (let param of soughtQuery.split("&")) {
+      let equalPos = param.indexOf("=");
+      if (equalPos != -1 &&
+          param.substr(0, equalPos) == mapEntry.termsParameterName) {
+        // This is the parameter we are looking for.
+        encodedTerms = param.substr(equalPos + 1);
+        break;
+      }
+    }
+    if (encodedTerms === null) {
+      LOG("Missing terms parameter: " + mapEntry.termsParameterName);
+      return gEmptyParseSubmissionResult;
+    }
+
+    // Decode the terms using the charset defined in the search engine.
+    let terms;
+    try {
+      terms = gTextToSubURI.UnEscapeAndConvert(
+                                       mapEntry.engine.queryCharset,
+                                       encodedTerms.replace("+", " "));
+    } catch (ex) {
+      // Decoding errors will cause this match to be ignored.
+      LOG("Parameter decoding failed. Charset: " +
+          mapEntry.engine.queryCharset);
+      return gEmptyParseSubmissionResult;
+    }
+
+    LOG("Match found. Terms: " + terms);
+    return new ParseSubmissionResult(mapEntry.engine, terms);
+  },
+
   // nsIObserver
   observe: function SRCH_SVC_observe(aEngine, aTopic, aVerb) {
     switch (aTopic) {
       case SEARCH_ENGINE_TOPIC:
         switch (aVerb) {
           case SEARCH_ENGINE_LOADED:
             var engine = aEngine.QueryInterface(Ci.nsISearchEngine);
             LOG("nsSearchService::observe: Done installation of " + engine.name
                 + ".");
             this._addEngineToStore(engine.wrappedJSObject);
             if (engine.wrappedJSObject._useNow) {
               LOG("nsSearchService::observe: setting current");
               this.currentEngine = aEngine;
             }
-            this.batchTask.disarm();
-            this.batchTask.arm();
+            // The addition of the engine to the store always triggers an ADDED
+            // or a CHANGED notification, that will trigger the task below.
             break;
+          case SEARCH_ENGINE_ADDED:
           case SEARCH_ENGINE_CHANGED:
           case SEARCH_ENGINE_REMOVED:
             this.batchTask.disarm();
             this.batchTask.arm();
+            // Invalidate the map used to parse URLs to search engines.
+            this._parseSubmissionMap = null;
             break;
         }
         break;
 
       case QUIT_APPLICATION_TOPIC:
         this._removeObservers();
         break;
 
@@ -4586,17 +4789,17 @@ var engineUpdateService = {
     // We use the cache to store updated app engines, so refuse to update if the
     // cache is disabled.
     if (engine._readOnly &&
         !getBoolPref(BROWSER_SEARCH_PREF + "cache.enabled", true))
       return;
 
     let testEngine = null;
     let updateURL = engine._getURLOfType(URLTYPE_OPENSEARCH);
-    let updateURI = (updateURL && updateURL._hasRelation("self")) ? 
+    let updateURI = (updateURL && updateURL._hasRelation("self")) ?
                      updateURL.getSubmission("", engine).uri :
                      makeURI(engine._updateURL);
     if (updateURI) {
       if (engine._isDefault && !updateURI.schemeIs("https")) {
         ULOG("Invalid scheme for default engine update");
         return;
       }
 
new file mode 100644
--- /dev/null
+++ b/toolkit/components/search/tests/xpcshell/data/engine-fr.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SearchPlugin xmlns="http://www.mozilla.org/2006/browser/search/">
+<ShortName>Test search engine (fr)</ShortName>
+<Description>A test search engine (based on Google search for a different locale)</Description>
+<InputEncoding>ISO-8859-1</InputEncoding>
+<Url type="text/html" method="GET" template="http://www.google.fr/search" resultdomain="google.fr">
+  <Param name="q" value="{searchTerms}"/>
+  <Param name="ie" value="iso-8859-1"/>
+  <Param name="oe" value="iso-8859-1"/>
+</Url>
+<SearchForm>http://www.google.fr/</SearchForm>
+</SearchPlugin>
new file mode 100644
--- /dev/null
+++ b/toolkit/components/search/tests/xpcshell/test_SearchStaticData.js
@@ -0,0 +1,20 @@
+/* Any copyright is dedicated to the Public Domain.
+ *    http://creativecommons.org/publicdomain/zero/1.0/ */
+
+/*
+ * Tests the SearchStaticData module.
+ */
+
+"use strict";
+
+Cu.import("resource://gre/modules/SearchStaticData.jsm", this);
+
+function run_test() {
+  do_check_true(SearchStaticData.getAlternateDomains("www.google.com")
+                                .indexOf("www.google.fr") != -1);
+  do_check_true(SearchStaticData.getAlternateDomains("www.google.fr")
+                                .indexOf("www.google.com") != -1);
+  do_check_true(SearchStaticData.getAlternateDomains("www.google.com")
+                                .every(d => d.startsWith("www.google.")));
+  do_check_true(SearchStaticData.getAlternateDomains("google.com").length == 0);
+}
new file mode 100644
--- /dev/null
+++ b/toolkit/components/search/tests/xpcshell/test_parseSubmissionURL.js
@@ -0,0 +1,110 @@
+/* Any copyright is dedicated to the Public Domain.
+ *    http://creativecommons.org/publicdomain/zero/1.0/ */
+
+/*
+ * Tests getAlternateDomains API.
+ */
+
+"use strict";
+
+function run_test() {
+  removeMetadata();
+  updateAppInfo();
+  useHttpServer();
+
+  run_next_test();
+}
+
+add_task(function* test_parseSubmissionURL() {
+  // Hide the default engines to prevent them from being used in the search.
+  for (let engine of Services.search.getEngines()) {
+    Services.search.removeEngine(engine);
+  }
+
+  let [engine1, engine2, engine3] = yield addTestEngines([
+    { name: "Test search engine", xmlFileName: "engine.xml" },
+    { name: "Test search engine (fr)", xmlFileName: "engine-fr.xml" },
+    { name: "bacon_addParam", details: ["", "bacon_addParam", "Search Bacon",
+                                        "GET", "http://www.bacon.test/find"] },
+    // The following engines cannot identify the search parameter.
+    { name: "A second test engine", xmlFileName: "engine2.xml" },
+    { name: "Sherlock test search engine", srcFileName: "engine.src",
+      iconFileName: "ico-size-16x16-png.ico" },
+    { name: "bacon", details: ["", "bacon", "Search Bacon", "GET",
+                               "http://www.bacon.moz/search?q={searchTerms}"] },
+  ]);
+
+  engine3.addParam("q", "{searchTerms}", null);
+
+  // Test the first engine, whose URLs use UTF-8 encoding.
+  let result = Services.search.parseSubmissionURL(
+                               "http://www.google.com/search?q=caff%C3%A8");
+  do_check_eq(result.engine, engine1);
+  do_check_eq(result.terms, "caff\u00E8");
+
+  // The second engine uses a locale-specific domain that is an alternate domain
+  // of the first one, but the second engine should get priority when matching.
+  // The URL used with this engine uses ISO-8859-1 encoding instead.
+  let result = Services.search.parseSubmissionURL(
+                               "http://www.google.fr/search?q=caff%E8");
+  do_check_eq(result.engine, engine2);
+  do_check_eq(result.terms, "caff\u00E8");
+
+  // Test a domain that is an alternate domain of those defined.  In this case,
+  // the first matching engine from the ordered list should be returned.
+  let result = Services.search.parseSubmissionURL(
+                               "http://www.google.co.uk/search?q=caff%C3%A8");
+  do_check_eq(result.engine, engine1);
+  do_check_eq(result.terms, "caff\u00E8");
+
+  // We support parsing URLs from a dynamically added engine.  Those engines use
+  // windows-1252 encoding by default.
+  let result = Services.search.parseSubmissionURL(
+                               "http://www.bacon.test/find?q=caff%E8");
+  do_check_eq(result.engine, engine3);
+  do_check_eq(result.terms, "caff\u00E8");
+
+  // Parsing of parameters from an engine template URL is not supported.
+  do_check_eq(Services.search.parseSubmissionURL(
+                              "http://www.bacon.moz/search?q=").engine, null);
+  do_check_eq(Services.search.parseSubmissionURL(
+                              "https://duckduckgo.com?q=test").engine, null);
+  do_check_eq(Services.search.parseSubmissionURL(
+                              "https://duckduckgo.com/?q=test").engine, null);
+
+  // Sherlock engines are not supported.
+  do_check_eq(Services.search.parseSubmissionURL(
+                              "http://getfirefox.com?q=test").engine, null);
+  do_check_eq(Services.search.parseSubmissionURL(
+                              "http://getfirefox.com/?q=test").engine, null);
+
+  // HTTP and HTTPS schemes are interchangeable.
+  let result = Services.search.parseSubmissionURL(
+                               "https://www.google.com/search?q=caff%C3%A8");
+  do_check_eq(result.engine, engine1);
+  do_check_eq(result.terms, "caff\u00E8");
+
+  // An empty query parameter should work the same.
+  let result = Services.search.parseSubmissionURL(
+                               "http://www.google.com/search?q=");
+  do_check_eq(result.engine, engine1);
+  do_check_eq(result.terms, "");
+
+  // There should be no match when the path is different.
+  let result = Services.search.parseSubmissionURL(
+                               "http://www.google.com/search/?q=test");
+  do_check_eq(result.engine, null);
+  do_check_eq(result.terms, "");
+
+  // There should be no match when the argument is different.
+  let result = Services.search.parseSubmissionURL(
+                               "http://www.google.com/search?q2=test");
+  do_check_eq(result.engine, null);
+  do_check_eq(result.terms, "");
+
+  // There should be no match for URIs that are not HTTP or HTTPS.
+  let result = Services.search.parseSubmissionURL(
+                               "file://localhost/search?q=test");
+  do_check_eq(result.engine, null);
+  do_check_eq(result.terms, "");
+});
--- a/toolkit/components/search/tests/xpcshell/xpcshell.ini
+++ b/toolkit/components/search/tests/xpcshell/xpcshell.ini
@@ -2,16 +2,17 @@
 head = head_search.js
 tail =
 firefox-appdir = browser
 support-files =
   data/chrome.manifest
   data/engine.src
   data/engine.xml
   data/engine2.xml
+  data/engine-fr.xml
   data/engineMaker.sjs
   data/engine-rel-searchform.xml
   data/engine-rel-searchform-post.xml
   data/engineImages.xml
   data/ico-size-16x16-png.ico
   data/invalid-engine.xml
   data/search-metadata.json
   data/search.json
@@ -28,16 +29,18 @@ support-files =
 [test_json_cache.js]
 [test_nodb.js]
 [test_nodb_pluschanges.js]
 [test_save_sorted_engines.js]
 [test_purpose.js]
 [test_defaultEngine.js]
 [test_prefSync.js]
 [test_notifications.js]
+[test_parseSubmissionURL.js]
+[test_SearchStaticData.js]
 [test_addEngine_callback.js]
 [test_multipleIcons.js]
 [test_resultDomain.js]
 [test_serialize_file.js]
 [test_searchSuggest.js]
 [test_async.js]
 [test_sync.js]
 [test_sync_fallback.js]