Bug 1570868 - Ignore redundant wildcards in MatchGlob, avoid regex backtracking r=robwu
authorTomislav Jovanovic <tomica@gmail.com>
Thu, 04 Mar 2021 11:55:52 +0000
changeset 569658 fdde8c6c8ddb742c31c07858cc809202cf1526ca
parent 569657 18c481824b45dec528fe0eb2c4c8625299520d6d
child 569659 ba96e3947d75e89168e31c88effbb9450c8439fd
push id137736
push usertjovanovic@mozilla.com
push dateThu, 04 Mar 2021 11:58:14 +0000
treeherderautoland@fdde8c6c8ddb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrobwu
bugs1570868
milestone88.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1570868 - Ignore redundant wildcards in MatchGlob, avoid regex backtracking r=robwu Differential Revision: https://phabricator.services.mozilla.com/D106725
toolkit/components/extensions/MatchPattern.cpp
toolkit/components/extensions/test/xpcshell/test_MatchPattern.js
--- a/toolkit/components/extensions/MatchPattern.cpp
+++ b/toolkit/components/extensions/MatchPattern.cpp
@@ -637,27 +637,37 @@ void MatchGlob::Init(JSContext* aCx, con
   }
 
   // Fall back to the regexp slow path.
   constexpr auto metaChars = ".+*?^${}()|[]\\"_ns;
 
   nsAutoString escaped;
   escaped.Append('^');
 
+  // For any continuous string of * (and ? if aAllowQuestion) wildcards, only
+  // emit the first *, later ones are redundant, and can hang regex matching.
+  bool emittedFirstStar = false;
+
   for (uint32_t i = 0; i < mGlob.Length(); i++) {
     auto c = mGlob[i];
     if (c == '*') {
-      escaped.AppendLiteral(".*");
+      if (!emittedFirstStar) {
+        escaped.AppendLiteral(".*");
+        emittedFirstStar = true;
+      }
     } else if (c == '?' && aAllowQuestion) {
       escaped.Append('.');
     } else {
       if (metaChars.Contains(c)) {
         escaped.Append('\\');
       }
       escaped.Append(c);
+
+      // String of wildcards broken by a non-wildcard char, reset tracking flag.
+      emittedFirstStar = false;
     }
   }
 
   escaped.Append('$');
 
   // TODO: Switch to the Rust regexp crate, when Rust integration is easier.
   // It uses a much more efficient, linear time matching algorithm, and
   // doesn't require special casing for the literal and prefix cases.
--- a/toolkit/components/extensions/test/xpcshell/test_MatchPattern.js
+++ b/toolkit/components/extensions/test/xpcshell/test_MatchPattern.js
@@ -462,16 +462,43 @@ add_task(async function test_MatchGlob()
   fail({ url: moz, pattern: ["*.org"] });
 
   // Wrong TLD
   fail({ url: moz, pattern: ["*oz*.com/"] });
   // Case sensitive
   fail({ url: moz, pattern: ["*.ORG/"] });
 });
 
+add_task(async function test_MatchGlob_redundant_wildcards_backtracking() {
+  {
+    // Bug 1570868 - repeated * in tabs.query glob causes too much backtracking.
+    let title = `Monster${"*".repeat(99)}Mash`;
+
+    let start = Date.now();
+    let glob = new MatchGlob(title);
+    let matches = glob.matches(title);
+    let duration = Date.now() - start;
+
+    ok(matches, `Expected match: ${title}, ${title}`);
+    ok(duration < 10, `Matching duration: ${duration}ms`);
+  }
+  {
+    // Similarly with any continuous combination of ?**???****? wildcards.
+    let title = `Monster${"?*".repeat(99)}Mash`;
+
+    let start = Date.now();
+    let glob = new MatchGlob(title);
+    let matches = glob.matches(title);
+    let duration = Date.now() - start;
+
+    ok(matches, `Expected match: ${title}, ${title}`);
+    ok(duration < 10, `Matching duration: ${duration}ms`);
+  }
+});
+
 add_task(async function test_MatchPattern_subsumes() {
   function test(oldPat, newPat) {
     let m = new MatchPatternSet(oldPat);
     return m.subsumes(new MatchPattern(newPat));
   }
 
   function pass({ oldPat, newPat }) {
     ok(test(oldPat, newPat), `${JSON.stringify(oldPat)} subsumes "${newPat}"`);