Bug 755480 - Part 3: Make mozInlineSpellWordUtil::SplitDOMWord O(n) for strings with no special charts (as opposed to O(n2)); r=roc
authorEhsan Akhgari <ehsan@mozilla.com>
Tue, 15 May 2012 16:38:43 -0400
changeset 94274 942b6e232d80e92939f7f0bd1a41fe37f957119b
parent 94273 7b6dd63e140001c87ce43f92706404b28ac53b4e
child 94275 a56ebbf0f293694d2c54ece61edf28080ba99c38
push id9549
push usereakhgari@mozilla.com
push dateFri, 18 May 2012 00:09:16 +0000
treeherdermozilla-inbound@942b6e232d80 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersroc
bugs755480
milestone15.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 755480 - Part 3: Make mozInlineSpellWordUtil::SplitDOMWord O(n) for strings with no special charts (as opposed to O(n2)); r=roc
extensions/spellcheck/src/mozInlineSpellWordUtil.cpp
--- a/extensions/spellcheck/src/mozInlineSpellWordUtil.cpp
+++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.cpp
@@ -756,17 +756,17 @@ mozInlineSpellWordUtil::FindRealWordCont
 
 // classifies a given character in the DOM word
 enum CharClass {
   CHAR_CLASS_WORD,
   CHAR_CLASS_SEPARATOR,
   CHAR_CLASS_END_OF_INPUT };
 
 // Encapsulates DOM-word to real-word splitting
-struct WordSplitState
+struct NS_STACK_CLASS WordSplitState
 {
   mozInlineSpellWordUtil*    mWordUtil;
   const nsDependentSubstring mDOMWordText;
   PRInt32                    mDOMWordOffset;
   CharClass                  mCurCharClass;
 
   WordSplitState(mozInlineSpellWordUtil* aWordUtil,
                  const nsString& aString, PRInt32 aStart, PRInt32 aLen)
@@ -777,19 +777,19 @@ struct WordSplitState
   void Advance();
   void AdvanceThroughSeparators();
   void AdvanceThroughWord();
 
   // Finds special words like email addresses and URLs that may start at the
   // current position, and returns their length, or 0 if not found. This allows
   // arbitrary word breaking rules to be used for these special entities, as
   // long as they can not contain whitespace.
-  PRInt32 FindSpecialWord();
+  bool IsSpecialWord();
 
-  // Similar to FindSpecialWord except that this takes a split word as
+  // Similar to IsSpecialWord except that this takes a split word as
   // input. This checks for things that do not require special word-breaking
   // rules.
   bool ShouldSkipWord(PRInt32 aStart, PRInt32 aLength);
 };
 
 // WordSplitState::ClassifyCharacter
 
 CharClass
@@ -907,53 +907,51 @@ WordSplitState::AdvanceThroughSeparators
 void
 WordSplitState::AdvanceThroughWord()
 {
   while (mCurCharClass == CHAR_CLASS_WORD)
     Advance();
 }
 
 
-// WordSplitState::FindSpecialWord
+// WordSplitState::IsSpecialWord
 
-PRInt32
-WordSplitState::FindSpecialWord()
+bool
+WordSplitState::IsSpecialWord()
 {
-  PRInt32 i;
-
   // Search for email addresses. We simply define these as any sequence of
   // characters with an '@' character in the middle. The DOM word is already
   // split on whitepace, so we know that everything to the end is the address
   PRInt32 firstColon = -1;
-  for (i = mDOMWordOffset;
+  for (PRInt32 i = mDOMWordOffset;
        i < PRInt32(mDOMWordText.Length()); i ++) {
     if (mDOMWordText[i] == '@') {
       // only accept this if there are unambiguous word characters (don't bother
       // recursing to disambiguate apostrophes) on each side. This prevents
       // classifying, e.g. "@home" as an email address
 
       // Use this condition to only accept words with '@' in the middle of
       // them. It works, but the inlinespellcker doesn't like this. The problem
       // is that you type "fhsgfh@" that's a misspelled word followed by a
       // symbol, but when you type another letter "fhsgfh@g" that first word
       // need to be unmarked misspelled. It doesn't do this. it only checks the
       // current position for potentially removing a spelling range.
       if (i > 0 && ClassifyCharacter(i - 1, false) == CHAR_CLASS_WORD &&
           i < (PRInt32)mDOMWordText.Length() - 1 &&
-          ClassifyCharacter(i + 1, false) == CHAR_CLASS_WORD)
-
-      return mDOMWordText.Length() - mDOMWordOffset;
+          ClassifyCharacter(i + 1, false) == CHAR_CLASS_WORD) {
+        return true;
+      }
     } else if (mDOMWordText[i] == ':' && firstColon < 0) {
       firstColon = i;
 
       // If the first colon is followed by a slash, consider it a URL
       // This will catch things like asdf://foo.com
       if (firstColon < (PRInt32)mDOMWordText.Length() - 1 &&
           mDOMWordText[firstColon + 1] == '/') {
-        return mDOMWordText.Length() - mDOMWordOffset;
+        return true;
       }
     }
   }
 
   // Check the text before the first colon against some known protocols. It
   // is impossible to check against all protocols, especially since you can
   // plug in new protocols. We also don't want to waste time here checking
   // against a lot of obscure protocols.
@@ -962,22 +960,22 @@ WordSplitState::FindSpecialWord()
                       firstColon - mDOMWordOffset));
     if (protocol.EqualsIgnoreCase("http") ||
         protocol.EqualsIgnoreCase("https") ||
         protocol.EqualsIgnoreCase("news") ||
         protocol.EqualsIgnoreCase("file") ||
         protocol.EqualsIgnoreCase("javascript") ||
         protocol.EqualsIgnoreCase("data") ||
         protocol.EqualsIgnoreCase("ftp")) {
-      return mDOMWordText.Length() - mDOMWordOffset;
+      return true;
     }
   }
 
   // not anything special
-  return -1;
+  return false;
 }
 
 // WordSplitState::ShouldSkipWord
 
 bool
 WordSplitState::ShouldSkipWord(PRInt32 aStart, PRInt32 aLength)
 {
   PRInt32 last = aStart + aLength;
@@ -997,35 +995,31 @@ WordSplitState::ShouldSkipWord(PRInt32 a
 // mozInlineSpellWordUtil::SplitDOMWord
 
 void
 mozInlineSpellWordUtil::SplitDOMWord(PRInt32 aStart, PRInt32 aEnd)
 {
   WordSplitState state(this, mSoftText, aStart, aEnd - aStart);
   state.mCurCharClass = state.ClassifyCharacter(0, true);
 
+  state.AdvanceThroughSeparators();
+  if (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT &&
+      state.IsSpecialWord()) {
+    PRInt32 specialWordLength = state.mDOMWordText.Length() - state.mDOMWordOffset;
+    mRealWords.AppendElement(
+        RealWord(aStart + state.mDOMWordOffset, specialWordLength, false));
+
+    return;
+  }
+
   while (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT) {
     state.AdvanceThroughSeparators();
     if (state.mCurCharClass == CHAR_CLASS_END_OF_INPUT)
       break;
 
-    PRInt32 specialWordLength = state.FindSpecialWord();
-    if (specialWordLength > 0) {
-      mRealWords.AppendElement(
-        RealWord(aStart + state.mDOMWordOffset, specialWordLength, false));
-
-      // skip the special word
-      state.mDOMWordOffset += specialWordLength;
-      if (state.mDOMWordOffset + aStart >= aEnd)
-        state.mCurCharClass = CHAR_CLASS_END_OF_INPUT;
-      else
-        state.mCurCharClass = state.ClassifyCharacter(state.mDOMWordOffset, true);
-      continue;
-    }
-
     // save the beginning of the word
     PRInt32 wordOffset = state.mDOMWordOffset;
 
     // find the end of the word
     state.AdvanceThroughWord();
     PRInt32 wordLen = state.mDOMWordOffset - wordOffset;
     mRealWords.AppendElement(
       RealWord(aStart + wordOffset, wordLen,