Bug 1101337 - Make the ReplaceSubstring() XPCOM string API linear; r=froydnj
☠☠ backed out by 60d66fb4083b ☠ ☠
authorEhsan Akhgari <ehsan@mozilla.com>
Fri, 12 Dec 2014 11:47:55 -0500
changeset 219418 2954a37bc2df49c799e3ffbc29b8abaad56ccb6b
parent 219417 cc019512abab61ebf8d13eed58fe9d5013fd952b
child 219419 36d7afaaca18d40f8902409843e175cfa030b09a
push id52826
push usereakhgari@mozilla.com
push dateFri, 12 Dec 2014 16:52:56 +0000
treeherdermozilla-inbound@2954a37bc2df [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj
bugs1101337
milestone37.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1101337 - Make the ReplaceSubstring() XPCOM string API linear; r=froydnj ReplaceSubstring() is an O(n*m) algorithm (n being the length of the string and m being the number of occurrences of aTarget) because we have to move the remainder of the string, search it again and potentially memmove most of it again as we find more matches. This patch rewrites that function to make it O(n+m). Note that we currently don't build TestStrings.cpp, so the test case in this patch is not run automatically, but the test case has been verified to pass separately by moving the test function into Gecko and calling it during startup and stepping through it in the debugger.
xpcom/string/nsSubstring.cpp
xpcom/string/nsTString.h
xpcom/string/nsTStringObsolete.cpp
xpcom/tests/TestStrings.cpp
--- a/xpcom/string/nsSubstring.cpp
+++ b/xpcom/string/nsSubstring.cpp
@@ -101,17 +101,17 @@ public:
 static nsStringStats gStringStats;
 #define STRING_STAT_INCREMENT(_s) (gStringStats.m ## _s ## Count)++
 #else
 #define STRING_STAT_INCREMENT(_s)
 #endif
 
 // ---------------------------------------------------------------------------
 
-inline void
+void
 ReleaseData(void* aData, uint32_t aFlags)
 {
   if (aFlags & nsSubstring::F_SHARED) {
     nsStringBuffer::FromData(aData)->Release();
   } else if (aFlags & nsSubstring::F_OWNED) {
     nsMemory::Free(aData);
     STRING_STAT_INCREMENT(AdoptFree);
 #ifdef NS_BUILD_REFCNT_LOGGING
--- a/xpcom/string/nsTString.h
+++ b/xpcom/string/nsTString.h
@@ -381,16 +381,21 @@ public:
    *  swaps occurence of 1 string for another
    */
 
   void ReplaceChar(char_type aOldChar, char_type aNewChar);
   void ReplaceChar(const char* aSet, char_type aNewChar);
 #ifdef CharT_is_PRUnichar
   void ReplaceChar(const char16_t* aSet, char16_t aNewChar);
 #endif
+  /**
+   * Replace all occurrences of aTarget with aNewValue.
+   * The complexity of this function is O(n+m), n being the length of the string
+   * and m being the length of aNewValue.
+   */
   void ReplaceSubstring(const self_type& aTarget, const self_type& aNewValue);
   void ReplaceSubstring(const char_type* aTarget, const char_type* aNewValue);
 
 
   /**
    *  This method trims characters found in aTrimSet from
    *  either end of the underlying string.
    *
--- a/xpcom/string/nsTStringObsolete.cpp
+++ b/xpcom/string/nsTStringObsolete.cpp
@@ -1,15 +1,15 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-
+#include "nsTArray.h"
 
 /**
  * nsTString::Find
  *
  * aOffset specifies starting index
  * aCount specifies number of string compares (iterations)
  */
 
@@ -446,42 +446,124 @@ nsTString_CharT::ReplaceChar( const char
       break;
 
     data[i++] = aNewChar;
     data += i;
     lenRemaining -= i;
   }
 }
 
+void ReleaseData(void* aData, uint32_t aFlags);
+
 void
 nsTString_CharT::ReplaceSubstring( const char_type* aTarget, const char_type* aNewValue )
 {
   ReplaceSubstring(nsTDependentString_CharT(aTarget),
                    nsTDependentString_CharT(aNewValue));
 }
 
 void
 nsTString_CharT::ReplaceSubstring( const self_type& aTarget, const self_type& aNewValue )
 {
   if (aTarget.Length() == 0)
     return;
 
+  struct Segment {
+    uint32_t mBegin, mLength;
+    Segment(uint32_t aBegin, uint32_t aLength)
+      : mBegin(aBegin)
+      , mLength(aLength)
+    {}
+  };
+
+  // Remember all of the non-matching parts.
+  nsAutoTArray<Segment, 16> nonMatching;
   uint32_t i = 0;
-  while (i < mLength)
+  uint32_t newLength = 0;
+  while (true)
   {
     int32_t r = FindSubstring(mData + i, mLength - i, static_cast<const char_type*>(aTarget.Data()), aTarget.Length(), false);
-    if (r == kNotFound)
+    int32_t until = (r == kNotFound) ? mLength - i : r;
+    nonMatching.AppendElement(Segment(i, until));
+    newLength += until;
+    if (r == kNotFound) {
+      break;
+    }
+
+    newLength += aNewValue.Length();
+    i += r + aTarget.Length();
+    if (i >= mLength) {
+      // Add an auxiliary entry at the end of the list to help as an edge case
+      // for the algorithms below.
+      nonMatching.AppendElement(Segment(mLength, 0));
       break;
+    }
+  }
+
+  // If there's only one non-matching segment, then the target string was not
+  // found, and there's nothing to do.
+  if (nonMatching.Length() == 1) {
+    MOZ_ASSERT(nonMatching[0].mBegin == 0 && nonMatching[0].mLength == mLength,
+               "We should have the correct non-matching segment.");
+    return;
+  }
+
+  // Make sure that we can mutate our buffer.
+  char_type* oldData;
+  uint32_t oldFlags;
+  if (!MutatePrep(newLength, &oldData, &oldFlags))
+    return;
+  if (oldData) {
+    // Copy all of the old data to the new buffer.
+    char_traits::copy(mData, oldData, mLength);
+    ::ReleaseData(oldData, oldFlags);
+  }
 
-    Replace(i + r, aTarget.Length(), aNewValue);
-    i += r + aNewValue.Length();
+  if (aTarget.Length() >= aNewValue.Length()) {
+    // In the shrinking case, start filling the buffer from the beginning.
+    const uint32_t delta = (aTarget.Length() - aNewValue.Length());
+    for (i = 1; i < nonMatching.Length(); ++i) {
+      // When we move the i'th non-matching segment into position, we need to
+      // account for the characters deleted by the previous |i| replacements by
+      // subtracting |i * delta|.
+      const char_type* sourceSegmentPtr = mData + nonMatching[i].mBegin;
+      char_type* destinationSegmentPtr = mData + nonMatching[i].mBegin - i * delta;
+      memmove(destinationSegmentPtr, sourceSegmentPtr,
+              sizeof(char_type) * nonMatching[i].mLength);
+      // Write the i'th replacement immediately before the new i'th non-matching
+      // segment.
+      char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
+                        aNewValue.Data(), aNewValue.Length());
+    }
+  } else {
+    // In the growing case, start filling the buffer from the end.
+    const uint32_t delta = (aNewValue.Length() - aTarget.Length());
+    const uint32_t numNonMatching = nonMatching.Length();
+    if (numNonMatching > 1) {
+      for (i = numNonMatching - 1; i > 0; --i) {
+        // When we move the i'th non-matching segment into position, we need to
+        // account for the characters added by the previous |i| replacements
+        // by adding |i * delta|.
+        const char_type* sourceSegmentPtr = mData + nonMatching[i].mBegin;
+        char_type* destinationSegmentPtr = mData + nonMatching[i].mBegin + i * delta;
+        memmove(destinationSegmentPtr, sourceSegmentPtr,
+                sizeof(char_type) * nonMatching[i].mLength);
+        // Write the i'th replacement immediately before the new i'th
+        // non-matching segment.
+        char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
+                          aNewValue.Data(), aNewValue.Length());
+      }
+    }
   }
+
+  // Adjust the length and make sure the string is null terminated.
+  mLength = newLength;
+  mData[mLength] = char_type(0);
 }
 
-
 /**
  * nsTString::Trim
  */
 
 void
 nsTString_CharT::Trim( const char* aSet, bool aTrimLeading, bool aTrimTrailing, bool aIgnoreQuotes )
 {
   // the old implementation worried about aSet being null :-/
--- a/xpcom/tests/TestStrings.cpp
+++ b/xpcom/tests/TestStrings.cpp
@@ -446,16 +446,166 @@ bool test_replace_substr_2()
 
     // we expect that newAcctName will be unchanged.
     if (!newAcctName.Equals(acctName))
       return false;
 
     return true;
   }
 
+bool test_replace_substr_3()
+  {
+    nsCString s;
+    s.Assign("abcabcabc");
+    s.ReplaceSubstring("ca", "X");
+    bool r = strcmp(s.get(), "abXbXbc") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcabcabc");
+    s.ReplaceSubstring("ca", "XYZ");
+    r = strcmp(s.get(), "abXYZbXYZbc") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcabcabc");
+    s.ReplaceSubstring("ca", "XY");
+    r = strcmp(s.get(), "abXYbXYbc") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcabcabc");
+    s.ReplaceSubstring("ca", "XYZ!");
+    r = strcmp(s.get(), "abXYZ!bXYZ!bc") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("bcd", "X");
+    r = strcmp(s.get(), "aXaXaX") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("bcd", "XYZ!");
+    r = strcmp(s.get(), "aXYZ!aXYZ!aXYZ!") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("bcd", "XY");
+    r = strcmp(s.get(), "aXYaXYaXY") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("bcd", "XYZABC");
+    r = strcmp(s.get(), "aXYZABCaXYZABCaXYZABC") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("bcd", "XYZ");
+    r = strcmp(s.get(), "aXYZaXYZaXYZ") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("bcd", "XYZ!");
+    r = strcmp(s.get(), "aXYZ!aXYZ!aXYZ!") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("ab", "X");
+    r = strcmp(s.get(), "XcdXcdXcd") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("ab", "XYZABC");
+    r = strcmp(s.get(), "XYZABCcdXYZABCcdXYZABCcd") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("ab", "XY");
+    r = strcmp(s.get(), "XYcdXYcdXYcd") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("ab", "XYZ!");
+    r = strcmp(s.get(), "XYZ!cdXYZ!cdXYZ!cd") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("notfound", "X");
+    r = strcmp(s.get(), "abcdabcdabcd") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    s.Assign("abcdabcdabcd");
+    s.ReplaceSubstring("notfound", "longlongstring");
+    r = strcmp(s.get(), "abcdabcdabcd") == 0;
+    if (!r)
+      {
+        printf("[s=%s]\n", s.get());
+        return false;
+      }
+
+    return true;
+  }
+
 bool test_strip_ws()
   {
     const char text[] = " a    $   ";
     nsCString s(text);
     s.StripWhitespace();
     bool r = strcmp(s.get(), "a$") == 0;
     if (!r)
       printf("[s=%s]\n", s.get());
@@ -1196,16 +1346,17 @@ tests[] =
     { "test_rfind_4", test_rfind_4 },
     { "test_findinreadable", test_findinreadable },
     { "test_rfindinreadable", test_rfindinreadable },
     { "test_distance", test_distance },
     { "test_length", test_length },
     { "test_trim", test_trim },
     { "test_replace_substr", test_replace_substr },
     { "test_replace_substr_2", test_replace_substr_2 },
+    { "test_replace_substr_3", test_replace_substr_3 },
     { "test_strip_ws", test_strip_ws },
     { "test_equals_ic", test_equals_ic },
     { "test_fixed_string", test_fixed_string },
     { "test_concat", test_concat },
     { "test_concat_2", test_concat_2 },
     { "test_concat_3", test_concat_3 },
     { "test_xpidl_string", test_xpidl_string },
     { "test_empty_assign", test_empty_assign },