Bug 339069 - Optimization for RFindInReadable
authorasqueella@gmail.com
Mon, 23 Jul 2007 18:30:19 -0700
changeset 3809 1a650bf8678d8d4c16543d102c864b8e31899c53
parent 3808 e6ab3de32462885721fc284104ae1c43525ae740
child 3810 a1a66b69f304454715e2f3fc32808d24ef73f5a4
push id1
push userbsmedberg@mozilla.com
push dateThu, 20 Mar 2008 16:49:24 +0000
treeherdermozilla-central@61007906a1f8 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs339069
milestone1.9a7pre
Bug 339069 - Optimization for RFindInReadable p=Alfred Kayser <alfredkayser@nl.ibm.com> r=darin, sr=dbaron
xpcom/string/public/nsReadableUtils.h
xpcom/string/src/nsReadableUtils.cpp
xpcom/tests/TestStrings.cpp
--- a/xpcom/string/public/nsReadableUtils.h
+++ b/xpcom/string/public/nsReadableUtils.h
@@ -305,18 +305,16 @@ inline PRBool FindInReadable( const nsAC
 
 NS_COM PRBool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& );
 
   /**
    * Finds the rightmost occurrence of |aPattern| 
    * Returns |PR_TRUE| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
    * point to the match.  If no match was found, returns |PR_FALSE| and makes |aSearchStart == aSearchEnd|.
    *
-   * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
-   * If we need something faster, then we can implement that later.
    */
 NS_COM PRBool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
 NS_COM PRBool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
 
    /**
    * Finds the leftmost occurrence of |aChar|, if any in the range 
    * |aSearchStart|..|aSearchEnd|.
    *
--- a/xpcom/string/src/nsReadableUtils.cpp
+++ b/xpcom/string/src/nsReadableUtils.cpp
@@ -892,16 +892,72 @@ FindInReadable_Impl( const StringT& aPat
                   }
               }
           }
       }
 
     return found_it;
   }
 
+  /**
+   * This searches the entire string from right to left, and returns the first match found, if any.
+   */
+template <class StringT, class IteratorT, class Comparator>
+PRBool
+RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
+  {
+    IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
+    aPattern.BeginReading(patternStart);
+    aPattern.EndReading(patternEnd);
+
+      // Point to the last character in the pattern
+    --patternEnd;
+      // outer loop keeps searching till we run out of string to search
+    while ( aSearchStart != searchEnd )
+      {
+          // Point to the end position of the next possible match
+        --searchEnd;
+    
+          // Check last character, if a match, explore further from here
+        if ( compare(*patternEnd, *searchEnd) == 0 )
+          {  
+              // We're at a potential match, let's see if we really hit one
+            IteratorT testPattern(patternEnd);
+            IteratorT testSearch(searchEnd);
+
+              // inner loop verifies the potential match at the current position
+            do
+              {
+                  // if we verified all the way to the end of the pattern, then we found it!
+                if ( testPattern == patternStart )
+                  {
+                    aSearchStart = testSearch;  // point to start of match
+                    aSearchEnd = ++searchEnd;   // point to end of match
+                    return PR_TRUE;
+                  }
+    
+                  // if we got to end of the string we're searching before we hit the end of the
+                  //  pattern, we'll never find what we're looking for
+                if ( testSearch == aSearchStart )
+                  {
+                    aSearchStart = aSearchEnd;
+                    return PR_FALSE;
+                  }
+    
+                  // test previous character for a match
+                --testPattern;
+                --testSearch;
+              }
+            while ( compare(*testPattern, *testSearch) == 0 );
+          }
+      }
+
+    aSearchStart = aSearchEnd;
+    return PR_FALSE;
+  }
 
 NS_COM
 PRBool
 FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
   {
     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
   }
 
@@ -914,85 +970,28 @@ FindInReadable( const nsACString& aPatte
 
 NS_COM
 PRBool
 CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
   {
     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
   }
 
-  /**
-   * This implementation is simple, but does too much work.
-   * It searches the entire string from left to right, and returns the last match found, if any.
-   * This implementation will be replaced when I get |reverse_iterator|s working.
-   */
 NS_COM
 PRBool
 RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
   {
-    PRBool found_it = PR_FALSE;
-
-    nsAString::const_iterator savedSearchEnd(aSearchEnd);
-    nsAString::const_iterator searchStart(aSearchStart), searchEnd(aSearchEnd);
-
-    while ( searchStart != searchEnd )
-      {
-        if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
-          {
-            found_it = PR_TRUE;
-
-              // this is the best match so far, so remember it
-            aSearchStart = searchStart;
-            aSearchEnd = searchEnd;
-
-              // ...and get ready to search some more
-              //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
-            ++searchStart;
-            searchEnd = savedSearchEnd;
-          }
-      }
-
-      // if we never found it, return an empty range
-    if ( !found_it )
-      aSearchStart = aSearchEnd;
-
-    return found_it;
+    return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
   }
 
 NS_COM
 PRBool
 RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
   {
-    PRBool found_it = PR_FALSE;
-
-    nsACString::const_iterator savedSearchEnd(aSearchEnd);
-    nsACString::const_iterator searchStart(aSearchStart), searchEnd(aSearchEnd);
-
-    while ( searchStart != searchEnd )
-      {
-        if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
-          {
-            found_it = PR_TRUE;
-
-              // this is the best match so far, so remember it
-            aSearchStart = searchStart;
-            aSearchEnd = searchEnd;
-
-              // ...and get ready to search some more
-              //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
-            ++searchStart;
-            searchEnd = savedSearchEnd;
-          }
-      }
-
-      // if we never found it, return an empty range
-    if ( !found_it )
-      aSearchStart = aSearchEnd;
-
-    return found_it;
+    return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
   }
 
 NS_COM 
 PRBool 
 FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
   {
     PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
 
--- a/xpcom/tests/TestStrings.cpp
+++ b/xpcom/tests/TestStrings.cpp
@@ -181,16 +181,208 @@ PRBool test_rfind_4()
       {
         printf("i=%d\n", i);
         return PR_FALSE;
       }
 
     return PR_TRUE;
   }
 
+PRBool test_findinreadable()
+  {
+    const char text[] = "jar:jar:file:///c:/software/mozilla/mozilla_2006_02_21.jar!/browser/chrome/classic.jar!/";
+    nsCAutoString value(text);
+
+    nsACString::const_iterator begin, end;
+    value.BeginReading(begin);
+    value.EndReading(end);
+    nsACString::const_iterator delim_begin (begin),
+                               delim_end   (end);
+
+    // Search for last !/ at the end of the string
+    if (!FindInReadable(NS_LITERAL_CSTRING("!/"), delim_begin, delim_end))
+        return PR_FALSE;
+    char *r = ToNewCString(Substring(delim_begin, delim_end));
+    // Should match the first "!/" but not the last
+    if ((delim_end == end) || (strcmp(r, "!/")!=0))
+      {
+        printf("r = %s\n", r);
+        nsMemory::Free(r);
+        return PR_FALSE;
+      }
+    nsMemory::Free(r);
+
+    delim_begin = begin;
+    delim_end = end;
+
+    // Search for first jar:
+    if (!FindInReadable(NS_LITERAL_CSTRING("jar:"), delim_begin, delim_end))
+        return PR_FALSE;
+
+    r = ToNewCString(Substring(delim_begin, delim_end));
+    // Should not match the first jar:, but the second one
+    if ((delim_begin != begin) || (strcmp(r, "jar:")!=0))
+      {
+        printf("r = %s\n", r);
+        nsMemory::Free(r);
+        return PR_FALSE;
+      }
+    nsMemory::Free(r);
+
+    // Search for jar: in a Substring
+    delim_begin = begin; delim_begin++;
+    delim_end = end;
+    if (!FindInReadable(NS_LITERAL_CSTRING("jar:"), delim_begin, delim_end))
+        return PR_FALSE;
+
+    r = ToNewCString(Substring(delim_begin, delim_end));
+    // Should not match the first jar:, but the second one
+    if ((delim_begin == begin) || (strcmp(r, "jar:")!=0))
+      {
+        printf("r = %s\n", r);
+        nsMemory::Free(r);
+        return PR_FALSE;
+      }
+    nsMemory::Free(r);
+
+    // Should not find a match
+    if (FindInReadable(NS_LITERAL_CSTRING("gecko"), delim_begin, delim_end))
+        return PR_FALSE;
+
+    // When no match is found, range should be empty
+    if (delim_begin != delim_end) 
+        return PR_FALSE;
+
+    // Should not find a match (search not beyond Substring)
+    delim_begin = begin; for (int i=0;i<6;i++) delim_begin++;
+    delim_end = end;
+    if (FindInReadable(NS_LITERAL_CSTRING("jar:"), delim_begin, delim_end))
+        return PR_FALSE;
+
+    // When no match is found, range should be empty
+    if (delim_begin != delim_end) 
+        return PR_FALSE;
+
+    // Should not find a match (search not beyond Substring)
+    delim_begin = begin;
+    delim_end = end; for (int i=0;i<7;i++) delim_end--;
+    if (FindInReadable(NS_LITERAL_CSTRING("classic"), delim_begin, delim_end))
+        return PR_FALSE;
+
+    // When no match is found, range should be empty
+    if (delim_begin != delim_end) 
+        return PR_FALSE;
+
+    return PR_TRUE;
+  }
+
+PRBool test_rfindinreadable()
+  {
+    const char text[] = "jar:jar:file:///c:/software/mozilla/mozilla_2006_02_21.jar!/browser/chrome/classic.jar!/";
+    nsCAutoString value(text);
+
+    nsACString::const_iterator begin, end;
+    value.BeginReading(begin);
+    value.EndReading(end);
+    nsACString::const_iterator delim_begin (begin),
+                               delim_end   (end);
+
+    // Search for last !/ at the end of the string
+    if (!RFindInReadable(NS_LITERAL_CSTRING("!/"), delim_begin, delim_end))
+        return PR_FALSE;
+    char *r = ToNewCString(Substring(delim_begin, delim_end));
+    // Should match the last "!/"
+    if ((delim_end != end) || (strcmp(r, "!/")!=0))
+      {
+        printf("r = %s\n", r);
+        nsMemory::Free(r);
+        return PR_FALSE;
+      }
+    nsMemory::Free(r);
+
+    delim_begin = begin;
+    delim_end = end;
+
+    // Search for last jar: but not the first one...
+    if (!RFindInReadable(NS_LITERAL_CSTRING("jar:"), delim_begin, delim_end))
+        return PR_FALSE;
+
+    r = ToNewCString(Substring(delim_begin, delim_end));
+    // Should not match the first jar:, but the second one
+    if ((delim_begin == begin) || (strcmp(r, "jar:")!=0))
+      {
+        printf("r = %s\n", r);
+        nsMemory::Free(r);
+        return PR_FALSE;
+      }
+    nsMemory::Free(r);
+
+    // Search for jar: in a Substring
+    delim_begin = begin;
+    delim_end = begin; for (int i=0;i<6;i++) delim_end++;
+    if (!RFindInReadable(NS_LITERAL_CSTRING("jar:"), delim_begin, delim_end)) {
+        printf("Search for jar: in a Substring\n");
+        return PR_FALSE;
+    }
+
+    r = ToNewCString(Substring(delim_begin, delim_end));
+    // Should not match the first jar:, but the second one
+    if ((delim_begin != begin) || (strcmp(r, "jar:")!=0))
+      {
+        printf("r = %s\n", r);
+        nsMemory::Free(r);
+        return PR_FALSE;
+      }
+    nsMemory::Free(r);
+
+    // Should not find a match
+    delim_begin = begin;
+    delim_end = end;
+    if (RFindInReadable(NS_LITERAL_CSTRING("gecko"), delim_begin, delim_end)) {
+        printf("Should not find a match\n");
+        return PR_FALSE;
+    }
+
+    // When no match is found, range should be empty
+    if (delim_begin != delim_end) {
+        printf("1: When no match is found, range should be empty\n");
+        return PR_FALSE;
+    }
+
+    // Should not find a match (search not before Substring)
+    delim_begin = begin; for (int i=0;i<6;i++) delim_begin++;
+    delim_end = end;
+    if (RFindInReadable(NS_LITERAL_CSTRING("jar:"), delim_begin, delim_end)) {
+        printf("Should not find a match (search not before Substring)\n");
+        return PR_FALSE;
+    }
+
+    // When no match is found, range should be empty
+    if (delim_begin != delim_end) {
+        printf("2: When no match is found, range should be empty\n");
+        return PR_FALSE;
+    }
+
+    // Should not find a match (search not beyond Substring)
+    delim_begin = begin;
+    delim_end = end; for (int i=0;i<7;i++) delim_end--;
+    if (RFindInReadable(NS_LITERAL_CSTRING("classic"), delim_begin, delim_end)) {
+        printf("Should not find a match (search not beyond Substring)\n");
+        return PR_FALSE;
+    }
+
+    // When no match is found, range should be empty
+    if (delim_begin != delim_end) {
+        printf("3: When no match is found, range should be empty\n");
+        return PR_FALSE;
+    }
+
+    return PR_TRUE;
+  }
+
 PRBool test_distance()
   {
     const char text[] = "abc-xyz";
     nsCString s(text);
     nsCString::const_iterator begin, end;
     s.BeginReading(begin);
     s.EndReading(end);
     size_t d = Distance(begin, end);
@@ -716,16 +908,18 @@ tests[] =
     { "test_assign_c", test_assign_c },
     { "test1", test1 },
     { "test2", test2 },
     { "test_find", test_find },
     { "test_rfind", test_rfind },
     { "test_rfind_2", test_rfind_2 },
     { "test_rfind_3", test_rfind_3 },
     { "test_rfind_4", test_rfind_4 },
+    { "test_findinreadable", test_findinreadable },
+    { "test_rfindinreadable", test_rfindinreadable },
     { "test_distance", test_distance },
     { "test_length", test_length },
     { "test_trim", test_trim },
     { "test_replace_substr", test_replace_substr },
     { "test_replace_substr_2", test_replace_substr_2 },
     { "test_strip_ws", test_strip_ws },
     { "test_equals_ic", test_equals_ic },
     { "test_fixed_string", test_fixed_string },