Bug 1532170 - Move search ignorelist data to RemoteSettings. r=mikedeboer,glasserc,a=pascalc
authorMark Banner <standard8@mozilla.com>
Tue, 23 Apr 2019 13:39:30 +0100
changeset 526295 327e3dce985ba4c81a2d8a46a56ea764e9c0aef4
parent 526294 e6a1ef8866c069bead8c3a9de23f1ba697c56703
child 526296 a64500e3a18b4355db601dcdeda5b348e31bb523
push id2032
push userffxbld-merge
push dateMon, 13 May 2019 09:36:57 +0000
treeherdermozilla-release@455c1065dcbe [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmikedeboer, glasserc, pascalc
bugs1532170
milestone67.0
Bug 1532170 - Move search ignorelist data to RemoteSettings. r=mikedeboer,glasserc,a=pascalc Differential Revision: https://phabricator.services.mozilla.com/D28474
services/settings/dumps/main/hijack-blocklists.json
services/settings/dumps/main/moz.build
toolkit/components/search/SearchService.jsm
toolkit/components/search/tests/xpcshell/head_search.js
toolkit/components/search/tests/xpcshell/test_ignorelist.js
toolkit/components/search/tests/xpcshell/test_ignorelist_update.js
toolkit/components/search/tests/xpcshell/test_json_cache_ignorelist.js
new file mode 100644
--- /dev/null
+++ b/services/settings/dumps/main/hijack-blocklists.json
@@ -0,0 +1,1 @@
+{"data":[{"schema":1554459974103,"matches":["[https]opensearch.startpageweb.com/bing-search.xml","[https]opensearch.startwebsearch.com/bing-search.xml","[https]opensearch.webstartsearch.com/bing-search.xml","[https]opensearch.webofsearch.com/bing-search.xml","[profile]/searchplugins/Yahoo! Powered.xml","[profile]/searchplugins/yahoo! powered.xml"],"id":"load-paths","last_modified":1554460008541},{"schema":1554320493893,"matches":["hspart=lvs","pc=COSP","clid=2308146","fr=mca","PC=MC0","lavasoft.gosearchresults","securedsearch.lavasoft"],"id":"submission-urls","last_modified":1554459974090}]}
\ No newline at end of file
--- a/services/settings/dumps/main/moz.build
+++ b/services/settings/dumps/main/moz.build
@@ -1,13 +1,14 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 FINAL_TARGET_FILES.defaults.settings.main += [
     'example.json',
+    'hijack-blocklists.json',
     'language-dictionaries.json',
     'onboarding.json',
     'sites-classification.json',
 ]
 
 if CONFIG['MOZ_BUILD_APP'] == 'browser':
     DIST_SUBDIR = 'browser'
--- a/toolkit/components/search/SearchService.jsm
+++ b/toolkit/components/search/SearchService.jsm
@@ -9,16 +9,17 @@ const {AppConstants} = ChromeUtils.impor
 
 XPCOMUtils.defineLazyModuleGetters(this, {
   DeferredTask: "resource://gre/modules/DeferredTask.jsm",
   OS: "resource://gre/modules/osfile.jsm",
   SearchStaticData: "resource://gre/modules/SearchStaticData.jsm",
   setTimeout: "resource://gre/modules/Timer.jsm",
   clearTimeout: "resource://gre/modules/Timer.jsm",
   ExtensionParent: "resource://gre/modules/ExtensionParent.jsm",
+  RemoteSettings: "resource://services-settings/remote-settings.js",
 });
 
 XPCOMUtils.defineLazyServiceGetters(this, {
   gEnvironment: ["@mozilla.org/process/environment;1", "nsIEnvironment"],
   gChromeReg: ["@mozilla.org/chrome/chrome-registry;1", "nsIChromeRegistry"],
 });
 
 const BROWSER_SEARCH_PREF = "browser.search.";
@@ -167,16 +168,22 @@ const SEARCH_DEFAULT_UPDATE_INTERVAL = 7
 const SEARCH_GEO_DEFAULT_UPDATE_INTERVAL = 2592000; // 30 days.
 
 /**
  * Prefixed to all search debug output.
  */
 const SEARCH_LOG_PREFIX = "*** Search: ";
 
 /**
+ * This is the Remote Settings key that we use to get the ignore lists for
+ * engines.
+ */
+const SETTINGS_IGNORELIST_KEY = "hijack-blocklists";
+
+/**
  * Outputs aText to the JavaScript console as well as to stdout.
  */
 function LOG(aText) {
   if (loggingEnabled) {
     dump(SEARCH_LOG_PREFIX + aText + "\n");
     Services.console.logStringMessage(aText);
   }
 }
@@ -2527,17 +2534,22 @@ ParseSubmissionResult.prototype = {
     return this._termsLength;
   },
   QueryInterface: ChromeUtils.generateQI([Ci.nsISearchParseSubmissionResult]),
 };
 
 const gEmptyParseSubmissionResult =
       Object.freeze(new ParseSubmissionResult(null, "", -1, 0));
 
-// nsISearchService
+/**
+ * The search service handles loading and maintaining of search engines. It will
+ * also work out the default lists for each locale/region.
+ *
+ * @implements {nsISearchService}
+ */
 function SearchService() {
   this._initObservers = PromiseUtils.defer();
 }
 
 SearchService.prototype = {
   classID: Components.ID("{7319788a-fe93-4db3-9f39-818cf08f4256}"),
 
   // The current status of initialization. Note that it does not determine if
@@ -2551,16 +2563,28 @@ SearchService.prototype = {
 
   // Reading the JSON cache file is the first thing done during initialization.
   // During the async init, we save it in a field so that if we have to do a
   // sync init before the async init finishes, we can avoid reading the cache
   // with sync disk I/O and handling lz4 decompression synchronously.
   // This is set back to null as soon as the initialization is finished.
   _cacheFileJSON: null,
 
+  /**
+   * Various search engines may be ignored if their submission urls contain a
+   * string that is in the list. The list is controlled via remote settings.
+   */
+  _submissionURLIgnoreList: [],
+
+  /**
+   * Various search engines may be ignored if their load path is contained
+   * in this list. The list is controlled via remote settings.
+   */
+  _loadPathIgnoreList: [],
+
   // If initialization has not been completed yet, perform synchronous
   // initialization.
   // Throws in case of initialization error.
   _ensureInitialized: function SRCH_SVC__ensureInitialized() {
     if (gInitialized) {
       if (!Components.isSuccessCode(this._initRV)) {
         LOG("_ensureInitialized: failure");
         throw this._initRV;
@@ -2597,16 +2621,18 @@ SearchService.prototype = {
     // much as possible.
     this._ensureKnownRegionPromise = ensureKnownRegion(this)
       .catch(ex => LOG("_init: failure determining region: " + ex))
       .finally(() => this._ensureKnownRegionPromise = null);
     if (!skipRegionCheck) {
       await this._ensureKnownRegionPromise;
     }
 
+    this._setupRemoteSettings().catch(Cu.reportError);
+
     try {
       await this._loadEngines(cache);
     } catch (ex) {
       this._initRV = Cr.NS_ERROR_FAILURE;
       LOG("_init: failure loading engines: " + ex);
     }
     // Make sure the current list of engines is persisted, without the need to wait.
     this._buildCache();
@@ -2618,16 +2644,100 @@ SearchService.prototype = {
       this._initObservers.reject(this._initRV);
     }
     Services.obs.notifyObservers(null, SEARCH_SERVICE_TOPIC, "init-complete");
 
     LOG("_init: Completed _init");
     return this._initRV;
   },
 
+  /**
+   * Obtains the remote settings for the search service. This should only be
+   * called from init(). Any subsequent updates to the remote settings are
+   * handled via a sync listener.
+   *
+   * For desktop, the initial remote settings are obtained from dumps in
+   * `services/settings/dumps/main/`. These are not shipped with Android, and
+   * hence the `get` may take a while to return.
+   */
+  async _setupRemoteSettings() {
+    const ignoreListSettings = RemoteSettings(SETTINGS_IGNORELIST_KEY);
+    // Trigger a get of the initial value.
+    const current = await ignoreListSettings.get();
+
+    // Now we have the values, listen for future updates.
+    this._ignoreListListener = this._handleIgnoreListUpdated.bind(this);
+    ignoreListSettings.on("sync", this._ignoreListListener);
+
+    await this._handleIgnoreListUpdated({data: {current}});
+    Services.obs.notifyObservers(null, SEARCH_SERVICE_TOPIC, "settings-update-complete");
+  },
+
+  /**
+   * This handles updating of the ignore list settings, and removing any ignored
+   * engines.
+   *
+   * @param {object} eventData
+   *   The event in the format received from RemoteSettings.
+   */
+  async _handleIgnoreListUpdated(eventData) {
+    LOG("_handleIgnoreListUpdated");
+    const {data: {current}} = eventData;
+
+    for (const entry of current) {
+      if (entry.id == "load-paths") {
+        this._loadPathIgnoreList = [...entry.matches];
+      } else if (entry.id == "submission-urls") {
+        this._submissionURLIgnoreList = [...entry.matches];
+      }
+    }
+
+    // If we have not finished initializing, then we wait for the initialization
+    // to complete.
+    if (!this.isInitialized) {
+      await this._initObservers;
+    }
+    // We try to remove engines manually, as this should be more efficient and
+    // we don't really want to cause a re-init as this upsets unit tests.
+    let engineRemoved = false;
+    for (let name in this._engines) {
+      let engine = this._engines[name];
+      if (this._engineMatchesIgnoreLists(engine)) {
+        await this.removeEngine(engine);
+        engineRemoved = true;
+      }
+    }
+    // If we've removed an engine, and we don't have any left, we need to do
+    // a re-init - it is possible the cache just had one engine in it, and that
+    // is now empty, so we need to load from our main list.
+    if (engineRemoved && !Object.keys(this._engines).length) {
+      this._reInit();
+    }
+  },
+
+  /**
+   * Determines if a given engine matches the ignorelists or not.
+   *
+   * @param {Engine} engine
+   *   The engine to check against the ignorelists.
+   * @returns {boolean}
+   *   Returns true if the engine matches a ignorelists entry.
+   */
+  _engineMatchesIgnoreLists(engine) {
+    if (this._loadPathIgnoreList.includes(engine._loadPath)) {
+      return true;
+    }
+    let url = engine._getURLOfType("text/html")
+                    .getSubmission("dummy", engine).uri.spec.toLowerCase();
+    if (this._submissionURLIgnoreList.some(code => url.includes(code.toLowerCase()))) {
+      return true;
+    }
+    return false;
+  },
+
   _metaData: { },
   setGlobalAttr(name, val) {
     this._metaData[name] = val;
     this.batchTask.disarm();
     this.batchTask.arm();
   },
   setVerifiedGlobalAttr(name, val) {
     this.setGlobalAttr(name, val);
@@ -2960,44 +3070,18 @@ SearchService.prototype = {
         LOG("batchTask: Invalidating engine cache");
         await this._buildCache();
       };
       this._batchTask = new DeferredTask(task, CACHE_INVALIDATION_DELAY);
     }
     return this._batchTask;
   },
 
-  _submissionURLIgnoreList: [
-    "ignore=true",
-    "hspart=lvs",
-    "pc=COSP",
-    "clid=2308146",
-    "fr=mca",
-    "PC=MC0",
-    "lavasoft.gosearchresults",
-    "securedsearch.lavasoft",
-  ],
-
-  _loadPathIgnoreList: [
-    "[other]addEngineWithDetails:searchignore@mozilla.com",
-    "[https]opensearch.startpageweb.com/bing-search.xml",
-    "[https]opensearch.startwebsearch.com/bing-search.xml",
-    "[https]opensearch.webstartsearch.com/bing-search.xml",
-    "[https]opensearch.webofsearch.com/bing-search.xml",
-    "[profile]/searchplugins/Yahoo! Powered.xml",
-    "[profile]/searchplugins/yahoo! powered.xml",
-  ],
-
   _addEngineToStore: function SRCH_SVC_addEngineToStore(aEngine) {
-    let url = aEngine._getURLOfType("text/html").getSubmission("dummy", aEngine).uri.spec.toLowerCase();
-    if (this._submissionURLIgnoreList.some(code => url.includes(code.toLowerCase()))) {
-      LOG("_addEngineToStore: Ignoring engine");
-      return;
-    }
-    if (this._loadPathIgnoreList.includes(aEngine._loadPath)) {
+    if (this._engineMatchesIgnoreLists(aEngine)) {
       LOG("_addEngineToStore: Ignoring engine");
       return;
     }
 
     LOG("_addEngineToStore: Adding engine: \"" + aEngine.name + "\"");
 
     // See if there is an existing engine with the same name. However, if this
     // engine is updating another engine, it's allowed to have the same name.
@@ -4409,16 +4493,21 @@ SearchService.prototype = {
       })(),
 
       () => shutdownState
     );
   },
   _observersAdded: false,
 
   _removeObservers: function SRCH_SVC_removeObservers() {
+    if (this._ignoreListListener) {
+      RemoteSettings(SETTINGS_IGNORELIST_KEY).off("sync", this._ignoreListListener);
+      delete this._ignoreListListener;
+    }
+
     Services.obs.removeObserver(this, SEARCH_ENGINE_TOPIC);
     Services.obs.removeObserver(this, QUIT_APPLICATION_TOPIC);
     Services.obs.removeObserver(this, TOPIC_LOCALES_CHANGE);
   },
 
   QueryInterface: ChromeUtils.generateQI([
     Ci.nsISearchService,
     Ci.nsIObserver,
--- a/toolkit/components/search/tests/xpcshell/head_search.js
+++ b/toolkit/components/search/tests/xpcshell/head_search.js
@@ -1,16 +1,17 @@
 /* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et: */
 
 const {XPCOMUtils} = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
 
 XPCOMUtils.defineLazyModuleGetters(this, {
   FileUtils: "resource://gre/modules/FileUtils.jsm",
   NetUtil: "resource://gre/modules/NetUtil.jsm",
+  RemoteSettings: "resource://services-settings/remote-settings.js",
   SearchTestUtils: "resource://testing-common/SearchTestUtils.jsm",
   Services: "resource://gre/modules/Services.jsm",
   setTimeout: "resource://gre/modules/Timer.jsm",
   TestUtils: "resource://testing-common/TestUtils.jsm",
 });
 
 var {OS, require} = ChromeUtils.import("resource://gre/modules/osfile.jsm");
 var {getAppInfo, newAppInfo, updateAppInfo} = ChromeUtils.import("resource://testing-common/AppInfo.jsm");
@@ -531,8 +532,42 @@ function checkCountryResultTelemetry(aEx
   let histogram = Services.telemetry.getHistogramById("SEARCH_SERVICE_COUNTRY_FETCH_RESULT");
   let snapshot = histogram.snapshot();
   if (aExpectedValue != null) {
     equal(snapshot.values[aExpectedValue], 1);
   } else {
     deepEqual(snapshot.values, {});
   }
 }
+
+/**
+ * Provides a basic set of remote settings for use in tests.
+ */
+async function setupRemoteSettings() {
+  const collection = await RemoteSettings("hijack-blocklists").openCollection();
+  await collection.clear();
+  await collection.create({
+    "id": "submission-urls",
+    "matches": [
+      "ignore=true",
+    ],
+  }, { synced: true });
+  await collection.create({
+    "id": "load-paths",
+    "matches": [
+      "[other]addEngineWithDetails:searchignore@mozilla.com",
+    ],
+  }, { synced: true });
+  await collection.db.saveLastModified(42);
+}
+
+/**
+ * Some tests might trigger initialisation which will trigger the search settings
+ * update. We need to make sure we wait for that to finish before we exit, otherwise
+ * it may cause shutdown issues.
+ */
+let updatePromise = SearchTestUtils.promiseSearchNotification("settings-update-complete");
+
+registerCleanupFunction(async () => {
+  if (!isChild && Services.search.isInitialized) {
+    await updatePromise;
+  }
+});
--- a/toolkit/components/search/tests/xpcshell/test_ignorelist.js
+++ b/toolkit/components/search/tests/xpcshell/test_ignorelist.js
@@ -6,30 +6,37 @@
 const kSearchEngineID1 = "ignorelist_test_engine1";
 const kSearchEngineID2 = "ignorelist_test_engine2";
 const kSearchEngineID3 = "ignorelist_test_engine3";
 const kSearchEngineURL1 = "http://example.com/?search={searchTerms}&ignore=true";
 const kSearchEngineURL2 = "http://example.com/?search={searchTerms}&IGNORE=TRUE";
 const kSearchEngineURL3 = "http://example.com/?search={searchTerms}";
 const kExtensionID = "searchignore@mozilla.com";
 
-add_task(async function test_ignorelistEngineLowerCase() {
-  Assert.ok(!Services.search.isInitialized);
+add_task(async function test_ignoreList() {
+  await setupRemoteSettings();
+
+  Assert.ok(!Services.search.isInitialized,
+    "Search service should not be initialized to begin with.");
+
+  let updatePromise = SearchTestUtils.promiseSearchNotification("settings-update-complete");
 
   await Services.search.addEngineWithDetails(kSearchEngineID1, "", "", "", "get", kSearchEngineURL1);
 
+  await updatePromise;
+
   // An ignored engine shouldn't be available at all
   let engine = Services.search.getEngineByName(kSearchEngineID1);
-  Assert.equal(engine, null, "Engine should not exist");
+  Assert.equal(engine, null, "Engine with ignored search params should not exist");
 
   await Services.search.addEngineWithDetails(kSearchEngineID2, "", "", "", "get", kSearchEngineURL2);
 
   // An ignored engine shouldn't be available at all
   engine = Services.search.getEngineByName(kSearchEngineID2);
-  Assert.equal(engine, null, "Engine should not exist");
+  Assert.equal(engine, null, "Engine with ignored search params of a different case should not exist");
 
   await Services.search.addEngineWithDetails(kSearchEngineID3, "", "", "", "get",
                                              kSearchEngineURL3, kExtensionID);
 
   // An ignored engine shouldn't be available at all
   engine = Services.search.getEngineByName(kSearchEngineID3);
-  Assert.equal(engine, null, "Engine should not exist");
+  Assert.equal(engine, null, "Engine with ignored extension id should not exist");
 });
new file mode 100644
--- /dev/null
+++ b/toolkit/components/search/tests/xpcshell/test_ignorelist_update.js
@@ -0,0 +1,58 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+const kSearchEngineID1 = "ignorelist_test_engine1";
+const kSearchEngineID2 = "ignorelist_test_engine2";
+const kSearchEngineID3 = "ignorelist_test_engine3";
+const kSearchEngineURL1 = "http://example.com/?search={searchTerms}&ignore=true";
+const kSearchEngineURL2 = "http://example.com/?search={searchTerms}&IGNORE=TRUE";
+const kSearchEngineURL3 = "http://example.com/?search={searchTerms}";
+const kExtensionID = "searchignore@mozilla.com";
+
+add_task(async function test_ignoreList() {
+  Assert.ok(!Services.search.isInitialized,
+    "Search service should not be initialized to begin with.");
+
+  let updatePromise = SearchTestUtils.promiseSearchNotification("settings-update-complete");
+  await Services.search.addEngineWithDetails(kSearchEngineID1, "", "", "", "get", kSearchEngineURL1);
+  await Services.search.addEngineWithDetails(kSearchEngineID2, "", "", "", "get", kSearchEngineURL2);
+  await Services.search.addEngineWithDetails(kSearchEngineID3, "", "", "", "get",
+                                             kSearchEngineURL3, kExtensionID);
+
+  // Ensure that the initial remote settings update from default values is
+  // complete. The defaults do not include the special inclusions inserted below.
+  await updatePromise;
+
+  for (let engineName of [kSearchEngineID1, kSearchEngineID2, kSearchEngineID3]) {
+    Assert.ok(await Services.search.getEngineByName(engineName),
+      `Engine ${engineName} should be present`);
+  }
+
+  // Simulate an ignore list update.
+  await RemoteSettings("hijack-blocklists").emit("sync", {
+    data: {
+      current: [{
+        "id": "load-paths",
+        "schema": 1553857697843,
+        "last_modified": 1553859483588,
+        "matches": [
+          "[other]addEngineWithDetails:searchignore@mozilla.com",
+        ],
+      }, {
+        "id": "submission-urls",
+        "schema": 1553857697843,
+        "last_modified": 1553859435500,
+        "matches": [
+          "ignore=true",
+        ],
+      }],
+    },
+  });
+
+  for (let engineName of [kSearchEngineID1, kSearchEngineID2, kSearchEngineID3]) {
+    Assert.equal(await Services.search.getEngineByName(engineName), null,
+      `Engine ${engineName} should not be present`);
+  }
+});
--- a/toolkit/components/search/tests/xpcshell/test_json_cache_ignorelist.js
+++ b/toolkit/components/search/tests/xpcshell/test_json_cache_ignorelist.js
@@ -7,53 +7,53 @@
 
 "use strict";
 
 var cacheTemplate, appPluginsPath, profPlugins;
 
 /**
  * Test reading from search.json.mozlz4
  */
-function run_test() {
+add_task(async function setup() {
+  await setupRemoteSettings();
+
   let cacheTemplateFile = do_get_file("data/search_ignorelist.json");
   cacheTemplate = readJSONFile(cacheTemplateFile);
   cacheTemplate.buildID = getAppInfo().platformBuildID;
 
   let engineFile = gProfD.clone();
   engineFile.append("searchplugins");
   engineFile.append("test-search-engine.xml");
   engineFile.parent.create(Ci.nsIFile.DIRECTORY_TYPE, FileUtils.PERMS_DIRECTORY);
 
   // Copy the test engine to the test profile.
   let engineTemplateFile = do_get_file("data/engine.xml");
   engineTemplateFile.copyTo(engineFile.parent, "test-search-engine.xml");
 
   // The list of visibleDefaultEngines needs to match or the cache will be ignored.
   cacheTemplate.visibleDefaultEngines = getDefaultEngineList(false);
 
-  run_next_test();
-}
-
-add_test(function prepare_test_data() {
-  promiseSaveCacheData(cacheTemplate).then(run_next_test);
+  await promiseSaveCacheData(cacheTemplate);
 });
 
 /**
  * Start the search service and confirm the cache was reset
  */
-add_test(function test_cache_rest() {
+add_task(async function test_cache_rest() {
   info("init search service");
 
-  Services.search.init().then(function initComplete(aResult) {
-    info("init'd search service");
-    Assert.ok(Components.isSuccessCode(aResult));
+  let updatePromise = SearchTestUtils.promiseSearchNotification("settings-update-complete");
+
+  let result = await Services.search.init();
+
+  Assert.ok(Components.isSuccessCode(result),
+    "Search service should be successfully initialized");
+  await updatePromise;
 
-    Services.search.getEngines().then(engines => {
-      // Engine list will have been reset to the default,
-      // Not the one engine in the cache.
-      // It should have more than one engine.
-      Assert.ok(engines.length > 1);
+  const engines = await Services.search.getEngines();
 
-      removeCacheFile();
-      run_next_test();
-    });
-  });
+  // Engine list will have been reset to the default,
+  // Not the one engine in the cache.
+  // It should have more than one engine.
+  Assert.greater(engines.length, 1, "Should have more than one engine in the list");
+
+  removeCacheFile();
 });