Bug 1514780 - Use the Quantum Bar tokenizer in UnifiedComplete. r=adw
authorMarco Bonardo <mbonardo@mozilla.com>
Thu, 17 Jan 2019 11:26:29 +0000
changeset 511370 f2f4a6eb1576cba580ee99971f5fc035cbaeeab1
parent 511369 d11bb3bd81951e0e5a66bc9e3ae80ca22d2ad13f
child 511378 1bbec154fb73b0fa3756a9ed417815c87fc82b4e
child 511379 ff26ee3e478c19baf73700cbd9467e897711ad66
push id10547
push userffxbld-merge
push dateMon, 21 Jan 2019 13:03:58 +0000
treeherdermozilla-beta@24ec1916bffe [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersadw
bugs1514780
milestone66.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1514780 - Use the Quantum Bar tokenizer in UnifiedComplete. r=adw Differential Revision: https://phabricator.services.mozilla.com/D16676
browser/components/urlbar/UrlbarTokenizer.jsm
browser/components/urlbar/tests/unit/test_tokenizer.js
toolkit/components/places/UnifiedComplete.js
toolkit/components/places/tests/unifiedcomplete/test_search_suggestions.js
toolkit/components/places/tests/unifiedcomplete/test_special_search.js
toolkit/components/places/tests/unifiedcomplete/test_word_boundary_search.js
--- a/browser/components/urlbar/UrlbarTokenizer.jsm
+++ b/browser/components/urlbar/UrlbarTokenizer.jsm
@@ -20,18 +20,19 @@ ChromeUtils.defineModuleGetter(this, "Lo
 XPCOMUtils.defineLazyGetter(this, "logger", () =>
   Log.repository.getLogger("Places.Urlbar.Tokenizer"));
 
 var UrlbarTokenizer = {
   // Regex matching on whitespaces.
   REGEXP_SPACES: /\s+/,
 
   // Regex used to guess url-like strings.
-  // These are not expected to cover 100% of the cases.
-  REGEXP_PROTOCOL: /^[A-Z+.-]+:(\/\/)?(?!\/)/i,
+  // These are not expected to be 100% correct, we accept some user mistypes
+  // and we're unlikely to be able to cover 100% of the cases.
+  REGEXP_LIKE_PROTOCOL: /^[A-Z+.-]+:\/{0,2}(?!\/)/i,
   REGEXP_USERINFO_INVALID_CHARS: /[^\w.~%!$&'()*+,;=:-]/,
   REGEXP_HOSTPORT_INVALID_CHARS: /[^\[\]A-Z0-9.:-]/i,
   REGEXP_HOSTPORT_IP_LIKE: /^[a-f0-9\.\[\]:]+$/i,
   REGEXP_HOSTPORT_INVALID_IP: /\.{2,}|\d{5,}|\d{4,}(?![:\]])|^\.|\.$|^(\d+\.){4,}\d+$|^\d+$/,
   REGEXP_HOSTPORT_IPV4: /^(\d{1,3}\.){3,}\d{1,3}(:\d+)?$/,
   REGEXP_HOSTPORT_IPV6: /^[0-9A-F:\[\]]{1,4}$/i,
   REGEXP_COMMON_EMAIL: /^[\w!#$%&'*+\/=?^`{|}~-]+@[\[\]A-Z0-9.-]+$/i,
 
@@ -79,17 +80,17 @@ var UrlbarTokenizer = {
    */
   looksLikeUrl(token, options = {}) {
     if (token.length < 2)
       return false;
     // It should be a single word.
     if (this.REGEXP_SPACES.test(token))
       return false;
     // If it starts with something that looks like a protocol, it's likely a url.
-    if (this.REGEXP_PROTOCOL.test(token))
+    if (this.REGEXP_LIKE_PROTOCOL.test(token))
       return true;
     // Guess path and prePath. At this point we should be analyzing strings not
     // having a protocol.
     let slashIndex = token.indexOf("/");
     let prePath = slashIndex != -1 ? token.slice(0, slashIndex) : token;
     if (!this.looksLikeOrigin(prePath))
       return false;
 
@@ -98,16 +99,21 @@ var UrlbarTokenizer = {
     if (options.requirePath && !path)
       return false;
     // If there are both path and userinfo, it's likely a url.
     let atIndex = prePath.indexOf("@");
     let userinfo = atIndex != -1 ? prePath.slice(0, atIndex) : "";
     if (path.length && userinfo.length)
       return true;
 
+    // If the first character after the slash in the path is a letter, then the
+    // token may be an "abc/def" url.
+    if (/^\/[a-z]/i.test(path)) {
+      return true;
+    }
     // If the path contains special chars, it is likely a url.
     if (["%", "?", "#"].some(c => path.includes(c)))
       return true;
 
     // The above looksLikeOrigin call told us the prePath looks like an origin,
     // now we go into details checking some common origins.
     let hostPort = atIndex != -1 ? prePath.slice(atIndex + 1) : prePath;
     if (this.REGEXP_HOSTPORT_IPV4.test(hostPort))
@@ -127,31 +133,36 @@ var UrlbarTokenizer = {
    * returns false, it's surely not an origin, if it returns true, the result
    * must still be verified through URIFixup.
    *
    * @param {string} token
    *        The string token to verify
    * @returns {boolean} whether the token looks like an origin.
    */
   looksLikeOrigin(token) {
+    if (token.length == 0) {
+      return false;
+    }
     let atIndex = token.indexOf("@");
     if (atIndex != -1 && this.REGEXP_COMMON_EMAIL.test(token)) {
       // We prefer handling it as an email rather than an origin with userinfo.
       return false;
     }
     let userinfo = atIndex != -1 ? token.slice(0, atIndex) : "";
     let hostPort = atIndex != -1 ? token.slice(atIndex + 1) : token;
     logger.debug("userinfo", userinfo);
     logger.debug("hostPort", hostPort);
-    if (this.REGEXP_HOSTPORT_IPV4.test(hostPort))
+    if (this.REGEXP_HOSTPORT_IPV4.test(hostPort) ||
+        this.REGEXP_HOSTPORT_IPV6.test(hostPort)) {
       return true;
-    if (this.REGEXP_HOSTPORT_IPV6.test(hostPort))
-      return true;
+    }
+
     // Check for invalid chars.
-    return !this.REGEXP_USERINFO_INVALID_CHARS.test(userinfo) &&
+    return !this.REGEXP_LIKE_PROTOCOL.test(hostPort) &&
+           !this.REGEXP_USERINFO_INVALID_CHARS.test(userinfo) &&
            !this.REGEXP_HOSTPORT_INVALID_CHARS.test(hostPort) &&
            (!this.REGEXP_HOSTPORT_IP_LIKE.test(hostPort) ||
             !this.REGEXP_HOSTPORT_INVALID_IP.test(hostPort));
   },
 
   /**
    * Tokenizes the searchString from a QueryContext.
    * @param {object} queryContext
@@ -232,43 +243,76 @@ function splitString(searchString) {
 
 /**
  * Given an array of unfiltered tokens, this function filters them and converts
  * to token objects with a type.
  *
  * @param {array} tokens
  *        An array of strings, representing search tokens.
  * @returns {array} An array of token objects.
+ * @note restriction characters are only considered if they appear at the start
+ *       or at the end of the tokens list. In case of restriction characters
+ *       conflict, the most external ones win. Leading ones win over trailing
+ *       ones. Discarded restriction characters are considered text.
  */
 function filterTokens(tokens) {
   let filtered = [];
-  let foundRestriction = [];
-  // Tokens that can be combined with others (but not with themselves).
-  // We can have a maximum of 2 tokens, one combinable and one non-combinable.
-  let combinables = new Set([
-    UrlbarTokenizer.TYPE.RESTRICT_TITLE,
-    UrlbarTokenizer.TYPE.RESTRICT_URL,
-  ]);
-  for (let token of tokens) {
+  let restrictions = [];
+  for (let i = 0; i < tokens.length; ++i) {
+    let token = tokens[i];
     let tokenObj = {
       value: token,
       type: UrlbarTokenizer.TYPE.TEXT,
     };
     let restrictionType = CHAR_TO_TYPE_MAP.get(token);
-    let firstRestriction = foundRestriction.length > 0 ? foundRestriction[0] : null;
-    if (tokens.length > 1 &&
-        restrictionType &&
-        !firstRestriction ||
-        (foundRestriction.length == 1 &&
-         (combinables.has(firstRestriction) && !combinables.has(restrictionType)) ||
-         (!combinables.has(firstRestriction) && combinables.has(restrictionType)))) {
-      tokenObj.type = restrictionType;
-      foundRestriction.push(restrictionType);
+    if (restrictionType) {
+      restrictions.push({index: i, type: restrictionType});
     } else if (UrlbarTokenizer.looksLikeOrigin(token)) {
       tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN;
     } else if (UrlbarTokenizer.looksLikeUrl(token, {requirePath: true})) {
       tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_URL;
     }
     filtered.push(tokenObj);
   }
+
+  // Handle restriction characters.
+  if (restrictions.length > 0) {
+    // We can apply two kind of restrictions: type (bookmark, search, ...) and
+    // matching (url, title). These kind of restrictions can be combined, but we
+    // can only have one restriction per kind.
+    let matchingRestrictionFound = false;
+    let typeRestrictionFound = false;
+    function assignRestriction(r) {
+      if (r && !(matchingRestrictionFound && typeRestrictionFound)) {
+        if ([UrlbarTokenizer.TYPE.RESTRICT_TITLE,
+             UrlbarTokenizer.TYPE.RESTRICT_URL].includes(r.type)) {
+          if (!matchingRestrictionFound) {
+            matchingRestrictionFound = true;
+            filtered[r.index].type = r.type;
+            return true;
+          }
+        } else if (!typeRestrictionFound) {
+          typeRestrictionFound = true;
+          filtered[r.index].type = r.type;
+          return true;
+        }
+      }
+      return false;
+    }
+
+    // Look at the first token.
+    let found = assignRestriction(restrictions.find(r => r.index == 0));
+    if (found) {
+      // If the first token was assigned, look at the next one.
+      assignRestriction(restrictions.find(r => r.index == 1));
+    }
+    // Then look at the last token.
+    let lastIndex = tokens.length - 1;
+    found = assignRestriction(restrictions.find(r => r.index == lastIndex));
+    if (found) {
+      // If the last token was assigned, look at the previous one.
+      assignRestriction(restrictions.find(r => r.index == lastIndex - 1));
+    }
+  }
+
   logger.info("Filtered Tokens", tokens);
   return filtered;
 }
--- a/browser/components/urlbar/tests/unit/test_tokenizer.js
+++ b/browser/components/urlbar/tests/unit/test_tokenizer.js
@@ -1,18 +1,16 @@
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
 add_task(async function test_tokenizer() {
   let testContexts = [
     { desc: "Empty string",
-      searchString: "test",
-      expectedTokens: [
-        { value: "test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
-      ],
+      searchString: "",
+      expectedTokens: [],
     },
     { desc: "Single word string",
       searchString: "test",
       expectedTokens: [
         { value: "test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
       ],
     },
     { desc: "Multi word string with mixed whitespace types",
@@ -40,16 +38,30 @@ add_task(async function test_tokenizer()
     },
     { desc: "boundary restriction char at end",
       searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
       expectedTokens: [
         { value: "test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
         { value: UrlbarTokenizer.RESTRICT.BOOKMARK, type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK },
       ],
     },
+    { desc: "separate restriction char in the middle",
+      searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK} test`,
+      expectedTokens: [
+        { value: "test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+        { value: UrlbarTokenizer.RESTRICT.BOOKMARK, type: UrlbarTokenizer.TYPE.TEXT },
+        { value: "test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+      ],
+    },
+    { desc: "restriction char in the middle",
+      searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`,
+      expectedTokens: [
+        { value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`, type: UrlbarTokenizer.TYPE.TEXT },
+      ],
+    },
     { desc: "double boundary restriction char",
       searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}test${UrlbarTokenizer.RESTRICT.TITLE}`,
       expectedTokens: [
         { value: UrlbarTokenizer.RESTRICT.BOOKMARK, type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK },
         { value: `test${UrlbarTokenizer.RESTRICT.TITLE}`, type: UrlbarTokenizer.TYPE.TEXT },
       ],
     },
     { desc: "double non-combinable restriction char, single char string",
@@ -64,17 +76,17 @@ add_task(async function test_tokenizer()
       expectedTokens: [
         { value: UrlbarTokenizer.RESTRICT.BOOKMARK, type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK },
         { value: UrlbarTokenizer.RESTRICT.TITLE, type: UrlbarTokenizer.TYPE.RESTRICT_TITLE },
       ],
     },
     { desc: "only the boundary restriction char",
       searchString: UrlbarTokenizer.RESTRICT.BOOKMARK,
       expectedTokens: [
-        { value: UrlbarTokenizer.RESTRICT.BOOKMARK, type: UrlbarTokenizer.TYPE.TEXT },
+        { value: UrlbarTokenizer.RESTRICT.BOOKMARK, type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK },
       ],
     },
     // Some restriction chars may be # or ?, that are also valid path parts.
     // The next 2 tests will check we consider those as part of url paths.
     { desc: "boundary # char on path",
       searchString: "test/#",
       expectedTokens: [
         { value: "test/#", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
@@ -85,18 +97,18 @@ add_task(async function test_tokenizer()
       expectedTokens: [
         { value: "test/?", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
       ],
     },
     { desc: "multiple boundary restriction chars suffix",
       searchString: `test ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG}`,
       expectedTokens: [
         { value: "test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
-        { value: UrlbarTokenizer.RESTRICT.HISTORY, type: UrlbarTokenizer.TYPE.RESTRICT_HISTORY },
-        { value: UrlbarTokenizer.RESTRICT.TAG, type: UrlbarTokenizer.TYPE.TEXT },
+        { value: UrlbarTokenizer.RESTRICT.HISTORY, type: UrlbarTokenizer.TYPE.TEXT },
+        { value: UrlbarTokenizer.RESTRICT.TAG, type: UrlbarTokenizer.TYPE.RESTRICT_TAG },
       ],
     },
     { desc: "multiple boundary restriction chars prefix",
       searchString: `${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG} test`,
       expectedTokens: [
         { value: UrlbarTokenizer.RESTRICT.HISTORY, type: UrlbarTokenizer.TYPE.RESTRICT_HISTORY },
         { value: UrlbarTokenizer.RESTRICT.TAG, type: UrlbarTokenizer.TYPE.TEXT },
         { value: "test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
@@ -122,16 +134,28 @@ add_task(async function test_tokenizer()
       ],
     },
     { desc: "protocol",
       searchString: "http://test",
       expectedTokens: [
         { value: "http://test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
       ],
     },
+    { desc: "almost a protocol",
+      searchString: "http:",
+      expectedTokens: [
+        { value: "http:", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
+    { desc: "almost a protocol 2",
+      searchString: "http:/",
+      expectedTokens: [
+        { value: "http:/", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
     { desc: "bogus protocol",
       searchString: "http:///",
       expectedTokens: [
         { value: "http:///", type: UrlbarTokenizer.TYPE.TEXT },
       ],
     },
     { desc: "userinfo",
       searchString: "user:pass@test",
@@ -164,16 +188,28 @@ add_task(async function test_tokenizer()
       ],
     },
     { desc: "invalid ip",
       searchString: "192.2134.1.2",
       expectedTokens: [
         { value: "192.2134.1.2", type: UrlbarTokenizer.TYPE.TEXT },
       ],
     },
+    { desc: "ipv4",
+      searchString: "1.2.3.4",
+      expectedTokens: [
+        { value: "1.2.3.4", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
+      ],
+    },
+    { desc: "host/path",
+      searchString: "test/test",
+      expectedTokens: [
+        { value: "test/test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
+      ],
+    },
   ];
 
   for (let queryContext of testContexts) {
     info(queryContext.desc);
     let newQueryContext = UrlbarTokenizer.tokenize(queryContext);
     Assert.equal(queryContext, newQueryContext,
                  "The queryContext object is the same");
     Assert.deepEqual(queryContext.tokens, queryContext.expectedTokens,
--- a/toolkit/components/places/UnifiedComplete.js
+++ b/toolkit/components/places/UnifiedComplete.js
@@ -31,38 +31,30 @@ const FRECENCY_DEFAULT = 1000;
 
 // After this time, we'll give up waiting for the extension to return matches.
 const MAXIMUM_ALLOWED_EXTENSION_TIME_MS = 3000;
 
 // By default we add remote tabs that have been used less than this time ago.
 // Any remaining remote tabs are added in queue if no other results are found.
 const RECENT_REMOTE_TAB_THRESHOLD_MS = 259200000; // 72 hours.
 
-// A regex that matches "single word" hostnames for whitelisting purposes.
-// The hostname will already have been checked for general validity, so we
-// don't need to be exhaustive here, so allow dashes anywhere.
-const REGEXP_SINGLEWORD_HOST = new RegExp("^[a-z0-9-]+$", "i");
-
 // Regex used to match userContextId.
 const REGEXP_USER_CONTEXT_ID = /(?:^| )user-context-id:(\d+)/;
 
 // Regex used to match maxResults.
 const REGEXP_MAX_RESULTS = /(?:^| )max-results:(\d+)/;
 
 // Regex used to match insertMethod.
 const REGEXP_INSERT_METHOD = /(?:^| )insert-method:(\d+)/;
 
 // Regex used to match one or more whitespace.
 const REGEXP_SPACES = /\s+/;
 
 // Regex used to strip prefixes from URLs.  See stripPrefix().
-const REGEXP_STRIP_PREFIX = /^[a-zA-Z]+:(?:\/\/)?/;
-
-// Cannot contains spaces or path delims.
-const REGEXP_ORIGIN = /^[^\s\/\?\#]+$/;
+const REGEXP_STRIP_PREFIX = /^[a-z]+:(?:\/){0,2}/i;
 
 // The result is notified on a delay, to avoid rebuilding the panel at every match.
 const NOTIFYRESULT_DELAY_MS = 16;
 
 // Sqlite result row index constants.
 const QUERYINDEX_QUERYTYPE     = 0;
 const QUERYINDEX_URL           = 1;
 const QUERYINDEX_TITLE         = 2;
@@ -339,25 +331,16 @@ XPCOMUtils.defineLazyModuleGetters(this,
   UrlbarProvidersManager: "resource:///modules/UrlbarProvidersManager.jsm",
   UrlbarTokenizer: "resource:///modules/UrlbarTokenizer.jsm",
   UrlbarUtils: "resource:///modules/UrlbarUtils.jsm",
 });
 
 XPCOMUtils.defineLazyPreferenceGetter(this, "syncUsernamePref",
                                       "services.sync.username");
 
-// The special characters below can be typed into the urlbar to either restrict
-// the search to visited history, bookmarked, tagged pages; or force a match on
-// just the title text or url.
-XPCOMUtils.defineLazyGetter(this, "TOKEN_TO_BEHAVIOR_MAP", () => new Map(
-  Object.entries(UrlbarTokenizer.RESTRICT).map(
-    ([type, char]) => [char, type.toLowerCase()]
-  )
-));
-
 function setTimeout(callback, ms) {
   let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
   timer.initWithCallback(callback, ms, timer.TYPE_ONE_SHOT);
   return timer;
 }
 
 const kProtocolsWithIcons = ["chrome:", "moz-extension:", "about:", "http:", "https:", "ftp:"];
 function iconHelper(url) {
@@ -404,31 +387,30 @@ XPCOMUtils.defineLazyGetter(this, "Prelo
   },
 }));
 
 XPCOMUtils.defineLazyGetter(this, "ProfileAgeCreatedPromise", async () => {
   let times = await ProfileAge();
   return times.created;
 });
 
-// Helper functions
+// Maps restriction character types to textual behaviors.
+XPCOMUtils.defineLazyGetter(this, "typeToBehaviorMap", () => {
+  return new Map([
+    [UrlbarTokenizer.TYPE.RESTRICT_HISTORY, "history"],
+    [UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, "bookmark"],
+    [UrlbarTokenizer.TYPE.RESTRICT_TAG, "tag"],
+    [UrlbarTokenizer.TYPE.RESTRICT_OPENPAGE, "openpage"],
+    [UrlbarTokenizer.TYPE.RESTRICT_SEARCH, "search"],
+    [UrlbarTokenizer.TYPE.RESTRICT_TITLE, "title"],
+    [UrlbarTokenizer.TYPE.RESTRICT_URL, "url"],
+  ]);
+});
 
-/**
- * Generates the tokens used in searching from a given string.
- *
- * @param searchString
- *        The string to generate tokens from.
- * @return an array of tokens.
- * @note Calling split on an empty string will return an array containing one
- *       empty string.  We don't want that, as it'll break our logic, so return
- *       an empty array then.
- */
-function getUnfilteredSearchTokens(searchString) {
-  return searchString.length ? searchString.split(REGEXP_SPACES) : [];
-}
+// Helper functions
 
 /**
  * Strips the prefix from a URL and returns the prefix and the remainder of the
  * URL.  "Prefix" is defined to be the scheme and colon, plus, if present, two
  * slashes.  If the given string is not actually a URL, then an empty prefix and
  * the string itself is returned.
  *
  * @param  str
@@ -522,24 +504,16 @@ function looksLikeUrl(str, ignoreAlphanu
   return !REGEXP_SPACES.test(str) &&
          (["/", "@", ":", "["].some(c => str.includes(c)) ||
           (ignoreAlphanumericHosts ?
             /^([\[\]A-Z0-9.:-]+[\.:]){3,}[\[\]A-Z0-9.:-]+$/i.test(str) :
             str.includes(".")));
 }
 
 /**
- * Returns whether the passed in string looks like an origin.
- */
-function looksLikeOrigin(str) {
-  // Single word not including path delimiters.
-  return REGEXP_ORIGIN.test(str);
-}
-
-/**
  * Returns the portion of a string starting at the index where another string
  * begins.
  *
  * @param   {string} sourceStr
  *          The string to search within.
  * @param   {string} targetStr
  *          The string to search for.
  * @returns {string} The substring within sourceStr starting at targetStr, or
@@ -595,18 +569,20 @@ function substringAfter(sourceStr, targe
  * @param [optional] previousResult
  *        The result object from the previous search. if available.
  */
 function Search(searchString, searchParam, autocompleteListener,
                 autocompleteSearch, prohibitSearchSuggestions, previousResult) {
   // We want to store the original string for case sensitive searches.
   this._originalSearchString = searchString;
   this._trimmedOriginalSearchString = searchString.trim();
-  let [prefix, suffix] = stripPrefix(this._trimmedOriginalSearchString);
-  this._searchString = Services.textToSubURI.unEscapeURIForUI("UTF-8", suffix);
+  let unescapedSearchString =
+    Services.textToSubURI.unEscapeURIForUI("UTF-8", this._trimmedOriginalSearchString);
+  let [prefix, suffix] = stripPrefix(unescapedSearchString);
+  this._searchString = suffix;
   this._strippedPrefix = prefix.toLowerCase();
 
   this._matchBehavior = Ci.mozIPlacesAutoComplete.MATCH_BOUNDARY;
   // Set the default behavior for this search.
   this._behavior = this._searchString ? UrlbarPrefs.get("defaultBehavior")
                                       : UrlbarPrefs.get("emptySearchDefaultBehavior");
 
   let params = new Set(searchParam.split(" "));
@@ -622,44 +598,46 @@ function Search(searchString, searchPara
                                 : UrlbarPrefs.get("maxRichResults");
 
   // Extract the user-context-id param.
   let userContextId = searchParam.match(REGEXP_USER_CONTEXT_ID);
   this._userContextId = userContextId ?
                           parseInt(userContextId[1], 10) :
                           Ci.nsIScriptSecurityManager.DEFAULT_USER_CONTEXT_ID;
 
-  let unfilteredTokens = getUnfilteredSearchTokens(this._searchString);
+  // Use the original string here, not the stripped one, so the tokenizer can
+  // properly recognize token types.
+  let {tokens} = UrlbarTokenizer.tokenize({searchString: unescapedSearchString});
 
-  // We handle any leading restriction character specially, in particular for
-  // a search restriction we also handle the case where there's no space before
-  // the query, like "?porcupine".
+  // This allows to handle a leading restriction character specially.
   this._leadingRestrictionToken = null;
-  if (unfilteredTokens.length > 1 &&
-      this._trimmedOriginalSearchString.startsWith(unfilteredTokens[0]) &&
-      Object.values(UrlbarTokenizer.RESTRICT).includes(unfilteredTokens[0])) {
-    this._leadingRestrictionToken = unfilteredTokens[0];
-  } else if (this._trimmedOriginalSearchString.startsWith(UrlbarTokenizer.RESTRICT.SEARCH)) {
-    this._leadingRestrictionToken = UrlbarTokenizer.RESTRICT.SEARCH;
+  if (tokens.length > 0) {
+    if (UrlbarTokenizer.isRestrictionToken(tokens[0]) &&
+        (tokens.length > 1 || tokens[0].type == UrlbarTokenizer.TYPE.RESTRICT_SEARCH)) {
+      this._leadingRestrictionToken = tokens[0].value;
+    }
+    // Check if the first token has a strippable prefix and remove it, but don't
+    // create an empty token.
+    if (prefix && tokens[0].value.length > prefix.length) {
+      tokens[0].value = tokens[0].value.substring(prefix.length);
+    }
   }
 
-  this._searchTokens = this.filterTokens(unfilteredTokens);
+  this._searchTokens = this.filterTokens(tokens);
 
   // The heuristic token is the first filtered search token, but only when it's
   // actually the first thing in the search string.  If a prefix or restriction
   // character occurs first, then the heurstic token is null.  We use the
   // heuristic token to help determine the heuristic result.  It may be a Places
   // keyword, a search engine alias, an extension keyword, or simply a URL or
   // part of the search string the user has typed.  We won't know until we
   // create the heuristic result.
-  this._heuristicToken =
-    this._searchTokens[0] &&
-      this._trimmedOriginalSearchString.startsWith(this._searchTokens[0]) ?
-    this._searchTokens[0] :
-    null;
+  let firstToken = this._searchTokens.length > 0 && this._searchTokens[0].value;
+  this._heuristicToken = firstToken &&
+    this._trimmedOriginalSearchString.startsWith(firstToken) ? firstToken : null;
 
   this._keywordSubstitute = null;
 
   this._prohibitSearchSuggestions = prohibitSearchSuggestions;
 
   this._listener = autocompleteListener;
   this._autocompleteSearch = autocompleteSearch;
 
@@ -763,52 +741,56 @@ Search.prototype = {
       this._sleepResolve = resolve;
       this._sleepTimer.initWithCallback(resolve, aTimeMs,
                                         Ci.nsITimer.TYPE_ONE_SHOT);
     });
   },
 
   /**
    * Given an array of tokens, this function determines which query should be
-   * ran.  It also removes any special search tokens.  The given array of tokens
-   * is modified in place and returned.
+   * ran.  It also removes any special search tokens.
    *
    * @param tokens
-   *        An array of search tokens.  This array is modified in place.
-   * @return The given array of tokens, modified to remove special search tokens.
+   *        An array of search tokens.
+   * @return A new, filtered array of tokens.
    */
   filterTokens(tokens) {
     let foundToken = false;
     // Set the proper behavior while filtering tokens.
-    for (let i = tokens.length - 1; i >= 0; i--) {
-      let behavior = TOKEN_TO_BEHAVIOR_MAP.get(tokens[i]);
+    let filtered = [];
+    for (let token of tokens) {
+      if (!UrlbarTokenizer.isRestrictionToken(token)) {
+        filtered.push(token);
+        continue;
+      }
+      let behavior = typeToBehaviorMap.get(token.type);
+      if (!behavior) {
+        throw new Error(`Unknown token type ${token.type}`);
+      }
       // Don't remove the token if it didn't match, or if it's an action but
       // actions are not enabled.
-      if (behavior && (behavior != "openpage" || this._enableActions)) {
+      if (behavior != "openpage" || this._enableActions) {
         // Don't use the suggest preferences if it is a token search and
         // set the restrict bit to 1 (to intersect the search results).
         if (!foundToken) {
           foundToken = true;
           // Do not take into account previous behavior (e.g.: history, bookmark)
           this._behavior = 0;
           this.setBehavior("restrict");
         }
         this.setBehavior(behavior);
-        tokens.splice(i, 1);
       }
     }
-
     // Set the right JavaScript behavior based on our preference.  Note that the
     // preference is whether or not we should filter JavaScript, and the
     // behavior is if we should search it or not.
     if (!UrlbarPrefs.get("filter.javascript")) {
       this.setBehavior("javascript");
     }
-
-    return tokens;
+    return filtered;
   },
 
   /**
    * Stop this search.
    * After invoking this method, we won't run any more searches or heuristics,
    * and no new matches may be added to the current result.
    */
   stop() {
@@ -962,17 +944,17 @@ Search.prototype = {
     // Start adding search suggestions, unless they aren't required or the
     // window is private.
     let searchSuggestionsCompletePromise = Promise.resolve();
     if (this._enableActions &&
         this.hasBehavior("search") &&
         !this._inPrivateWindow) {
       let query =
         this._searchEngineAliasMatch ? this._searchEngineAliasMatch.query :
-        substringAt(this._originalSearchString, this._searchTokens[0]);
+        substringAt(this._originalSearchString, this._searchTokens[0].value);
       if (query) {
         // Limit the string sent for search suggestions to a maximum length.
         query = query.substr(0, UrlbarPrefs.get("maxCharsForSearchSuggestions"));
         // Don't add suggestions if the query may expose sensitive information.
         if (!this._prohibitSearchSuggestionsFor(query)) {
           let engine;
           if (this._searchEngineAliasMatch) {
             engine = this._searchEngineAliasMatch.engine;
@@ -1412,41 +1394,46 @@ Search.prototype = {
     }
 
     // Suggestions for a single letter are unlikely to be useful.
     if (searchString.length < 2)
       return true;
 
     // The first token may be a whitelisted host.
     if (this._searchTokens.length == 1 &&
-        REGEXP_SINGLEWORD_HOST.test(this._searchTokens[0]) &&
-        Services.uriFixup.isDomainWhitelisted(this._searchTokens[0], -1)) {
+        this._searchTokens[0].type == UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN &&
+        Services.uriFixup.isDomainWhitelisted(this._searchTokens[0].value, -1)) {
       return true;
     }
 
     // Disallow fetching search suggestions for strings that start off looking
     // like urls.
     if (DISALLOWED_URLLIKE_PREFIXES.some(prefix => this._trimmedOriginalSearchString == prefix) ||
         DISALLOWED_URLLIKE_PREFIXES.some(prefix => this._trimmedOriginalSearchString.startsWith(prefix + ":"))) {
       return true;
     }
 
-    // Disallow fetching search suggestions for strings looking like URLs, to
-    // avoid disclosing information about networks or passwords.
-    return this._searchTokens.some(looksLikeUrl);
+    // Disallow fetching search suggestions for strings looking like URLs, or
+    // non-alphanumeric origins, to avoid disclosing information about networks
+    // or passwords.
+    return this._searchTokens.some(t => {
+      return t.type == UrlbarTokenizer.TYPE.POSSIBLE_URL ||
+             (t.type == UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN &&
+              !/^[a-z0-9-]+$/i.test(t.value));
+    });
   },
 
   async _matchKnownUrl(conn) {
     let gotResult = false;
 
     // If search string looks like an origin, try to autofill against origins.
     // Otherwise treat it as a possible URL.  When the string has only one slash
     // at the end, we still treat it as an URL.
     let query, params;
-    if (looksLikeOrigin(this._searchString)) {
+    if (UrlbarTokenizer.looksLikeOrigin(this._searchString)) {
       [query, params] = this._originQuery;
     } else {
       [query, params] = this._urlQuery;
     }
 
     // _urlQuery doesn't always return a query.
     if (query) {
       await conn.executeCached(query, params, (row, cancel) => {
@@ -1534,17 +1521,17 @@ Search.prototype = {
     // prefix) and check if the resulting string is worth matching.
     // Later, we'll verify that the found result matches the original
     // searchString and eventually discard it.
     let searchStr = this._searchString;
     if (searchStr.indexOf("/") == searchStr.length - 1) {
       searchStr = searchStr.slice(0, -1);
     }
     // If the search string looks more like a url than a domain, bail out.
-    if (!looksLikeOrigin(searchStr)) {
+    if (!UrlbarTokenizer.looksLikeOrigin(searchStr)) {
       return false;
     }
 
     let engine =
       await PlacesSearchAutocompleteProvider.engineForDomainPrefix(searchStr);
     if (!engine) {
       return false;
     }
@@ -1755,16 +1742,21 @@ Search.prototype = {
         this._extraRemoteTabRows.push(match);
       }
     }
   },
 
   // TODO (bug 1054814): Use visited URLs to inform which scheme to use, if the
   // scheme isn't specificed.
   _matchUnknownUrl() {
+    if (!this._searchString && this._strippedPrefix) {
+      // The user just typed a stripped protocol, don't build a non-sense url
+      // like http://http/ for it.
+      return false;
+    }
     let flags = Ci.nsIURIFixup.FIXUP_FLAG_FIX_SCHEME_TYPOS |
                 Ci.nsIURIFixup.FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP;
     let fixupInfo = null;
     let searchUrl = this._trimmedOriginalSearchString;
     try {
       fixupInfo = Services.uriFixup.getFixupURIInfo(searchUrl,
                                                     flags);
     } catch (e) {
@@ -1874,17 +1866,17 @@ Search.prototype = {
     }
 
     // Do not apply the special style if the user is doing a search from the
     // location bar but the entered terms match an irrelevant portion of the
     // URL. For example, "https://www.google.com/search?q=terms&client=firefox"
     // when searching for "Firefox".
     let terms = parseResult.terms.toLowerCase();
     if (this._searchTokens.length > 0 &&
-        this._searchTokens.every(token => !terms.includes(token))) {
+        this._searchTokens.every(token => !terms.includes(token.value))) {
       return;
     }
 
     // Turn the match into a searchengine action with a favicon.
     match.value = PlacesUtils.mozActionURI("searchengine", {
       engineName: parseResult.engineName,
       input: parseResult.terms,
       searchQuery: parseResult.terms,
@@ -2282,19 +2274,19 @@ Search.prototype = {
   /**
    * Get the search string with the keyword substitution applied.
    * If the user-provided string starts with a keyword that gave a heuristic
    * result, it can provide a substitute string (e.g. the domain that keyword
    * will search) so that the history/bookmark results we show will correspond
    * to the keyword search rather than searching for the literal keyword.
    */
   get _keywordSubstitutedSearchString() {
-    let tokens = this._searchTokens;
+    let tokens = this._searchTokens.map(t => t.value);
     if (this._keywordSubstitute) {
-      tokens = [this._keywordSubstitute, ...this._searchTokens.slice(1)];
+      tokens = [this._keywordSubstitute, ...tokens.slice(1)];
     }
     return tokens.join(" ");
   },
 
   /**
    * Obtains the search query to be used based on the previously set search
    * preferences (accessed by this.hasBehavior).
    *
--- a/toolkit/components/places/tests/unifiedcomplete/test_search_suggestions.js
+++ b/toolkit/components/places/tests/unifiedcomplete/test_search_suggestions.js
@@ -916,21 +916,17 @@ add_task(async function avoid_http_url_s
       makeSearchMatch("ftp:", { engineName: ENGINE_NAME, heuristic: true }),
     ],
   });
 
   await check_autocomplete({
     search: "ftp:/",
     searchParam: "enable-actions",
     matches: [
-      {
-        uri: makeActionURI("visiturl", { url: "http://ftp/", input: "ftp:/" }),
-        style: [ "action", "visiturl", "heuristic" ],
-        title: "http://ftp/",
-      },
+      makeSearchMatch("ftp:/", { engineName: ENGINE_NAME, heuristic: true }),
     ],
   });
 
   await check_autocomplete({
     search: "ftp://",
     searchParam: "enable-actions",
     matches: [
       makeSearchMatch("ftp://", { engineName: ENGINE_NAME, heuristic: true }),
@@ -982,33 +978,25 @@ add_task(async function avoid_http_url_s
       makeSearchMatch("ftp://test", { engineName: ENGINE_NAME, heuristic: true }),
     ],
   });
 
   await check_autocomplete({
     search: "http:/",
     searchParam: "enable-actions",
     matches: [
-      {
-        uri: makeActionURI("visiturl", { url: "http://http/", input: "http:/" }),
-        style: [ "action", "visiturl", "heuristic" ],
-        title: "http://http/",
-      },
+      makeSearchMatch("http:/", { engineName: ENGINE_NAME, heuristic: true }),
     ],
   });
 
   await check_autocomplete({
     search: "https:/",
     searchParam: "enable-actions",
     matches: [
-      {
-        uri: makeActionURI("visiturl", { url: "http://https/", input: "https:/" }),
-        style: [ "action", "visiturl", "heuristic" ],
-        title: "http://https/",
-      },
+      makeSearchMatch("https:/", { engineName: ENGINE_NAME, heuristic: true }),
     ],
   });
 
   await check_autocomplete({
     search: "http://",
     searchParam: "enable-actions",
     matches: [
       makeSearchMatch("http://", { engineName: ENGINE_NAME, heuristic: true }),
--- a/toolkit/components/places/tests/unifiedcomplete/test_special_search.js
+++ b/toolkit/components/places/tests/unifiedcomplete/test_special_search.js
@@ -40,17 +40,18 @@ add_task(async function test_special_sea
   await addBookmark( { uri: uri6, title: "foo.bar" } );
   await addBookmark( { uri: uri7, title: "title" } );
   await addBookmark( { uri: uri8, title: "foo.bar" } );
   await addBookmark( { uri: uri9, title: "title", tags: [ "foo.bar" ] } );
   await addBookmark( { uri: uri10, title: "foo.bar", tags: [ "foo.bar" ] } );
   await addBookmark( { uri: uri11, title: "title", tags: [ "foo.bar" ] } );
   await addBookmark( { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ] } );
 
-  // Test restricting searches
+  // Test restricting searches.
+
   info("History restrict");
   await check_autocomplete({
     search: UrlbarTokenizer.RESTRICT.HISTORY,
     matches: [ { uri: uri1, title: "title" },
                { uri: uri2, title: "foo.bar" },
                { uri: uri3, title: "title" },
                { uri: uri4, title: "foo.bar" },
                { uri: uri6, title: "foo.bar" },
@@ -74,48 +75,38 @@ add_task(async function test_special_sea
   await check_autocomplete({
     search: UrlbarTokenizer.RESTRICT.TAG,
     matches: [ { uri: uri9, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri10, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] } ],
   });
 
-  // Test specials as any word position
   info("Special as first word");
   await check_autocomplete({
     search: `${UrlbarTokenizer.RESTRICT.HISTORY} foo bar`,
     matches: [ { uri: uri2, title: "foo.bar" },
                { uri: uri3, title: "title" },
                { uri: uri4, title: "foo.bar" },
                { uri: uri6, title: "foo.bar" },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] } ],
   });
 
-  info("Special as middle word");
-  await check_autocomplete({
-    search: `foo ${UrlbarTokenizer.RESTRICT.HISTORY} bar`,
-    matches: [ { uri: uri2, title: "foo.bar" },
-               { uri: uri3, title: "title" },
-               { uri: uri4, title: "foo.bar" },
-               { uri: uri6, title: "foo.bar" },
-               { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] } ],
-  });
-
   info("Special as last word");
   await check_autocomplete({
     search: `foo bar ${UrlbarTokenizer.RESTRICT.HISTORY}`,
     matches: [ { uri: uri2, title: "foo.bar" },
                { uri: uri3, title: "title" },
                { uri: uri4, title: "foo.bar" },
                { uri: uri6, title: "foo.bar" },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] } ],
   });
 
-  // Test restricting and matching searches with a term
+  // Test restricting and matching searches with a term.
+
   info(`foo ${UrlbarTokenizer.RESTRICT.HISTORY} -> history`);
   await check_autocomplete({
     search: `foo ${UrlbarTokenizer.RESTRICT.HISTORY}`,
     matches: [ { uri: uri2, title: "foo.bar" },
                { uri: uri3, title: "title" },
                { uri: uri4, title: "foo.bar" },
                { uri: uri6, title: "foo.bar" },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] } ],
@@ -162,22 +153,16 @@ add_task(async function test_special_sea
     search: `foo ${UrlbarTokenizer.RESTRICT.TAG}`,
     matches: [ { uri: uri9, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri10, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] } ],
   });
 
   // Test various pairs of special searches
-  info(`foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.BOOKMARK} -> history, is star`);
-  await check_autocomplete({
-    search: `foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
-    matches: [ { uri: uri6, title: "foo.bar", style: [ "bookmark" ] },
-               { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] } ],
-  });
 
   info(`foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TITLE} -> history, in title`);
   await check_autocomplete({
     search: `foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TITLE}`,
     matches: [ { uri: uri2, title: "foo.bar" },
                { uri: uri4, title: "foo.bar" },
                { uri: uri6, title: "foo.bar" },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] } ],
@@ -186,22 +171,16 @@ add_task(async function test_special_sea
   info(`foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.URL} -> history, in url`);
   await check_autocomplete({
     search: `foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.URL}`,
     matches: [ { uri: uri3, title: "title" },
                { uri: uri4, title: "foo.bar" },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] } ],
   });
 
-  info(`foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG} -> history, is tag`);
-  await check_autocomplete({
-    search: `foo ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG}`,
-    matches: [ { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] } ],
-  });
-
   info(`foo ${UrlbarTokenizer.RESTRICT.BOOKMARK} ${UrlbarTokenizer.RESTRICT.TITLE} -> is star, in title`);
   await check_autocomplete({
     search: `foo ${UrlbarTokenizer.RESTRICT.BOOKMARK} ${UrlbarTokenizer.RESTRICT.TITLE}`,
     matches: [ { uri: uri6, title: "foo.bar", style: [ "bookmark" ] },
                { uri: uri8, title: "foo.bar", style: [ "bookmark" ] },
                { uri: uri9, title: "title", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] },
                { uri: uri10, title: "foo.bar", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] },
@@ -212,50 +191,65 @@ add_task(async function test_special_sea
   await check_autocomplete({
     search: `foo ${UrlbarTokenizer.RESTRICT.BOOKMARK} ${UrlbarTokenizer.RESTRICT.URL}`,
     matches: [ { uri: uri7, title: "title", style: [ "bookmark" ] },
                { uri: uri8, title: "foo.bar", style: [ "bookmark" ] },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] },
                { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] } ],
   });
 
-  info(`foo ${UrlbarTokenizer.RESTRICT.BOOKMARK} ${UrlbarTokenizer.RESTRICT.TAG} -> same as ${UrlbarTokenizer.RESTRICT.TAG}`);
-  await check_autocomplete({
-    search: `foo ${UrlbarTokenizer.RESTRICT.BOOKMARK} ${UrlbarTokenizer.RESTRICT.TAG}`,
-    matches: [ { uri: uri9, title: "title", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] },
-               { uri: uri10, title: "foo.bar", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] },
-               { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] },
-               { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ], style: [ "bookmark-tag" ] } ],
-  });
-
-  info(`foo ${UrlbarTokenizer.RESTRICT.TITLE} ${UrlbarTokenizer.RESTRICT.URL} -> in title, in url`);
-  await check_autocomplete({
-    search: `foo ${UrlbarTokenizer.RESTRICT.TITLE} ${UrlbarTokenizer.RESTRICT.URL}`,
-    matches: [ { uri: uri4, title: "foo.bar" },
-               { uri: uri8, title: "foo.bar", style: [ "bookmark" ] },
-               { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
-               { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] } ],
-  });
-
   info(`foo ${UrlbarTokenizer.RESTRICT.TITLE} ${UrlbarTokenizer.RESTRICT.TAG} -> in title, is tag`);
   await check_autocomplete({
     search: `foo ${UrlbarTokenizer.RESTRICT.TITLE} ${UrlbarTokenizer.RESTRICT.TAG}`,
     matches: [ { uri: uri9, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri10, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] } ],
   });
 
   info(`foo ${UrlbarTokenizer.RESTRICT.URL} ${UrlbarTokenizer.RESTRICT.TAG} -> in url, is tag`);
   await check_autocomplete({
     search: `foo ${UrlbarTokenizer.RESTRICT.URL} ${UrlbarTokenizer.RESTRICT.TAG}`,
     matches: [ { uri: uri11, title: "title", tags: [ "foo.bar" ], style: [ "tag" ] },
                { uri: uri12, title: "foo.bar", tags: [ "foo.bar" ], style: [ "tag" ] } ],
   });
 
+  // Test conflicting restrictions.
+
+  info(`conflict ${UrlbarTokenizer.RESTRICT.TITLE} ${UrlbarTokenizer.RESTRICT.URL} -> url wins`);
+  await PlacesTestUtils.addVisits([
+    { uri: `http://conflict.com/${UrlbarTokenizer.RESTRICT.TITLE}`, title: "test" },
+    { uri: "http://conflict.com/", title: `test${UrlbarTokenizer.RESTRICT.TITLE}` },
+  ]);
+  await check_autocomplete({
+    search: `conflict ${UrlbarTokenizer.RESTRICT.TITLE} ${UrlbarTokenizer.RESTRICT.URL}`,
+    matches: [
+      { uri: `http://conflict.com/${UrlbarTokenizer.RESTRICT.TITLE}`, title: "test" },
+    ],
+  });
+
+  info(`conflict ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.BOOKMARK} -> bookmark wins`);
+  await addBookmark( { uri: "http://bookmark.conflict.com/", title: `conflict ${UrlbarTokenizer.RESTRICT.HISTORY}` } );
+  await check_autocomplete({
+    search: `conflict ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
+    matches: [
+      { uri: "http://bookmark.conflict.com/", title: `conflict ${UrlbarTokenizer.RESTRICT.HISTORY}`, style: [ "bookmark" ] },
+    ],
+  });
+
+  info(`conflict ${UrlbarTokenizer.RESTRICT.BOOKMARK} ${UrlbarTokenizer.RESTRICT.TAG} -> tag wins`);
+  await addBookmark( { uri: "http://tag.conflict.com/", title: `conflict ${UrlbarTokenizer.RESTRICT.BOOKMARK}`, tags: [ "one" ] } );
+  await addBookmark( { uri: "http://nontag.conflict.com/", title: `conflict ${UrlbarTokenizer.RESTRICT.BOOKMARK}` } );
+  await check_autocomplete({
+    search: `conflict ${UrlbarTokenizer.RESTRICT.BOOKMARK} ${UrlbarTokenizer.RESTRICT.TAG}`,
+    matches: [
+      { uri: "http://tag.conflict.com/", title: `conflict ${UrlbarTokenizer.RESTRICT.BOOKMARK}`, tags: [ "one" ], style: [ "tag" ] },
+    ],
+  });
+
   // Disable autoFill for the next tests, see test_autoFill_default_behavior.js
   // for specific tests.
   Services.prefs.setBoolPref("browser.urlbar.autoFill", false);
 
   // Test default usage by setting certain browser.urlbar.suggest.* prefs
   info("foo -> default history");
   setSuggestPrefsToFalse();
   Services.prefs.setBoolPref("browser.urlbar.suggest.history", true);
--- a/toolkit/components/places/tests/unifiedcomplete/test_word_boundary_search.js
+++ b/toolkit/components/places/tests/unifiedcomplete/test_word_boundary_search.js
@@ -110,19 +110,19 @@ add_task(async function test_escape() {
       { uri: "http://crazytitle/", title: "!@#$%^&*()_+{}|:<>?word" },
       { uri: "http://title/2", title: "dontmatchme3" },
       { uri: "http://title/1", title: "matchme2" },
       { uri: "http://dontmatchme/", title: "title1" },
       { uri: "http://matchme/", title: "title1" },
     ],
   });
 
-  info("Match word boundaries '()_+' that are among word boundaries");
+  info("Match word boundaries '()_' that are among word boundaries");
   await check_autocomplete({
-    search: "()_+",
+    search: "()_",
     checkSorting: true,
     matches: [
       { uri: "http://crazytitle/", title: "!@#$%^&*()_+{}|:<>?word" },
     ],
   });
 
   info("Katakana characters form a string, so match the beginning");
   await check_autocomplete({