Bug 145975 - Implement nsCaseInsensitiveUTF8StringComparator r=smontagu
authorJustin Lebar <justin.lebar@gmail.com>, Kyle Huey <me@kylehuey.com>
Tue, 31 Aug 2010 18:03:40 -0700
changeset 56399 57bff252b161d1a0c30fd3261edd2720a56469a3
parent 56398 092596c1faefeccdeaed18a4843271d4f82f4f70
child 56400 e7bf870a8bb4deecbe4ca489ef24312bdff5312e
push idunknown
push userunknown
push dateunknown
reviewerssmontagu
bugs145975
milestone2.0b8pre
Bug 145975 - Implement nsCaseInsensitiveUTF8StringComparator r=smontagu
intl/unicharutil/src/casetable.h
intl/unicharutil/tests/UnicharSelfTest.cpp
intl/unicharutil/tools/README.txt
intl/unicharutil/tools/gencasetable.pl
intl/unicharutil/util/nsUnicharUtils.cpp
intl/unicharutil/util/nsUnicharUtils.h
parser/htmlparser/src/nsScannerString.cpp
toolkit/components/places/src/SQLFunctions.cpp
widget/src/windows/nsWindow.cpp
xpcom/string/public/nsAString.h
xpcom/string/public/nsTSubstring.h
xpcom/string/src/nsReadableUtils.cpp
xpcom/string/src/nsStringComparator.cpp
xpcom/string/src/nsTStringComparator.cpp
xpcom/string/src/nsTSubstring.cpp
--- a/intl/unicharutil/src/casetable.h
+++ b/intl/unicharutil/src/casetable.h
@@ -358,8 +358,21 @@ 0xE001003F,
 0x00003012,
 0x00000000,
 0x00000000,
 0x00000000,
 0x000000C0,
 0x00000000,
 0x80000000
 };
+
+// We map x -> x, except for upper-case letters,
+// which we map to their lower-case equivalents.
+static const PRUint8 gASCIIToLower [128] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+    0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+};
--- a/intl/unicharutil/tests/UnicharSelfTest.cpp
+++ b/intl/unicharutil/tests/UnicharSelfTest.cpp
@@ -276,78 +276,249 @@ static PRUnichar t4result[T4LEN+2] =  {
   0x01F2 ,  // 27
   0x01F2 ,  // 28
   0x2C6F ,  // 29
   0x2C6E ,  // 30
   0xA640 ,  // 31
   0x0041 ,  // Dummy entry to prevent overflow
   0x00  
 };
+ 
+static unsigned char t6lhs[] = {
+  0x31 ,       //  0
+  0x19 ,       //  1
+  0x43 ,       //  2
+  0x67 ,       //  3
+  0xC3, 0x88 , //  4
+  0xC3, 0xA9 , //  5
+  0xC5, 0x87 , //  6
+  0xC7, 0x84 , //  7
+  0xC7, 0x86 , //  8
+  0xC7, 0x85 , //  9
+  0xCF, 0x80 ,  // 10
+  0xCE, 0xB2 ,  // 11
+  0xD0, 0xB8 ,  // 12
+  0xD2, 0xA5 ,  // 13
+  0xD7, 0x90 ,  // 14
+  0xE0, 0xA8, 0xA0 ,  // 15
+  0xE3, 0x82, 0xB0 ,  // 16
+  0xE5, 0x86, 0x85 ,  // 17
+  0xEC, 0x80, 0xA1 ,  // 18
+  0xEF, 0xBD, 0x88 ,  // 19
+  0xC7, 0x87 ,  // 20
+  0xC7, 0x88 ,  // 21
+  0xC7, 0x89 ,  // 22
+  0xC7, 0x8A ,  // 23
+  0xC7, 0x8B ,  // 24
+  0xC7, 0x8C ,  // 25
+  0xC7, 0xB1 ,  // 26
+  0xC7, 0xB2 ,  // 27
+  0xC7, 0xB3 ,  // 28
+  0xC9, 0x90 ,  // 29
+  0xC9, 0xB1 ,  // 30
+  0xEA, 0x99, 0x81 ,  // 31
+  0x00  
+};
+
+static unsigned char t6rhs[] =  {
+  0x31 ,  //  0
+  0x19 ,  //  1
+  0x43 ,  //  2
+  0x47 ,  //  3
+  0xC3, 0x88 ,  //  4
+  0xC3, 0x89 ,  //  5
+  0xC5, 0x87 ,  //  6
+  0xC7, 0x84 ,  //  7
+  0xC7, 0x84 ,  //  8
+  0xC7, 0x84 ,  //  9
+  0xCE, 0xA0 ,  // 10
+  0xCE, 0x92 ,  // 11
+  0xD0, 0x98 ,  // 12
+  0xD2, 0xA4 ,  // 13
+  0xD7, 0x90 ,  // 14
+  0xE0, 0xA8, 0xA0 ,  // 15
+  0xE3, 0x82, 0xB0 ,  // 16
+  0xE5, 0x86, 0x85 ,  // 17
+  0xEC, 0x80, 0xA1 ,  // 18
+  0xEF, 0xBC, 0xA8 ,  // 19
+  0xC7, 0x87 ,  // 20
+  0xC7, 0x87 ,  // 21
+  0xC7, 0x87 ,  // 22
+  0xC7, 0x8a ,  // 23
+  0xC7, 0x8a ,  // 24
+  0xC7, 0x8a ,  // 25
+  0xC7, 0xB1 ,  // 26
+  0xC7, 0xB1 ,  // 27
+  0xC7, 0xB1 ,  // 28
+  0xE2, 0xB1, 0xAF ,  // 29
+  0xE2, 0xB1, 0xAE ,  // 30
+  0xEA, 0x99, 0x80 ,  // 31
+  0x00  
+};
+
+static const char *t7lhs = "aBcDeFGHIJKL1!!2!!a!uuuu";
+static const char *t7rhs = "AbCdEFghijkL1!!2!!A!UUuU";
+
+static const char *t8lhs = "aazzz";
+static const char *t8rhs = "aBa";
+
+static const char *t9lhs = "@a";
+static const char *t9rhs = "`a";
+
+bool CharByCharCompareEqual(const char *a, const char *b,
+                            PRUint32 aLen, PRUint32 bLen)
+{
+  // Do basically a CaseInsensitiveCompare(), but using
+  // CaseInsensitiveUTF8CharsEqual().
+
+  const char *aEnd = a + aLen;
+  const char *bEnd = b + bLen;
+  while (a < aEnd && b < bEnd) {
+    PRBool err;
+    if (!CaseInsensitiveUTF8CharsEqual(a, b, aEnd, bEnd, &a, &b, &err) || err)
+      return PR_FALSE;
+  }
+  return PR_TRUE;
+}
 
 void TestCaseConversion()
 {
   printf("==========================\n");
   printf("Start case conversion test\n");
   printf("==========================\n");
 
   int i;
   PRUnichar buf[256];
 
-  printf("Test 2 - ToUpper(PRUnichar, PRUnichar*):\n");
+  printf("Test 1 - ToUpper(PRUnichar, PRUnichar*):\n");
   for(i=0;i < T2LEN ; i++)
   {
     PRUnichar ch = ToUpperCase(t2data[i]);
     if(ch != t2result[i])
       printf("\tFailed!! result unexpected %d\n", i);
   }
 
 
-  printf("Test 3 - ToLower(PRUnichar, PRUnichar*):\n");
+  printf("Test 2 - ToLower(PRUnichar, PRUnichar*):\n");
   for(i=0;i < T3LEN; i++)
   {
     PRUnichar ch = ToLowerCase(t3data[i]);
     if(ch != t3result[i])
       printf("\tFailed!! result unexpected %d\n", i);
   }
 
-  printf("Test 4 - ToTitle(PRUnichar, PRUnichar*):\n");
+  printf("Test 3 - ToTitle(PRUnichar, PRUnichar*):\n");
   for(i=0;i < T4LEN; i++)
   {
     PRUnichar ch = ToTitleCase(t4data[i]);
     if(ch != t4result[i])
       printf("\tFailed!! result unexpected %d\n", i);
   }
 
-  printf("Test 5 - ToUpper(PRUnichar*, PRUnichar*, PRUint32):\n");
+  printf("Test 4 - ToUpper(PRUnichar*, PRUnichar*, PRUint32):\n");
   ToUpperCase(t2data, buf, T2LEN);
   for(i = 0; i < T2LEN; i++)
   {
      if(buf[i] != t2result[i])
      {
        printf("\tFailed!! result unexpected %d\n", i);
        break;
      }
   }
 
-  printf("Test 6 - ToLower(PRUnichar*, PRUnichar*, PRUint32):\n");
+  printf("Test 5 - ToLower(PRUnichar*, PRUnichar*, PRUint32):\n");
   ToLowerCase(t3data, buf, T3LEN);
   for(i = 0; i < T3LEN; i++)
   {
      if(buf[i] != t3result[i])
      {
        printf("\tFailed!! result unexpected %d\n", i);
        break;
      }
   }
 
+  printf("Test 6 - CaseInsensitiveCompare UTF-8 (1):\n");
+  if (CaseInsensitiveCompare((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs)))
+    printf("\tFailed!\n");
+  if (!CharByCharCompareEqual((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs)))
+    printf("\tFailed character-by-character comparison!\n");
+
+  printf("Test 7 - CaseInsensitiveCompare UTF-8 (2):\n");
+  if (CaseInsensitiveCompare(t7lhs, t7rhs, strlen(t7lhs), strlen(t7rhs)))
+    printf("\tFailed!\n");
+  if (!CharByCharCompareEqual(t7lhs, t7rhs, sizeof(t7lhs), sizeof(t7rhs)))
+    printf("\tFailed character-by-character comparison!\n");
+
+  printf("Test 8a - CaseInsensitiveCompare UTF-8 (3):\n");
+  if (CaseInsensitiveCompare(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)) != -1)
+    printf("\tFailed!\n");
+  if (CharByCharCompareEqual(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)))
+    printf("\tFailed character-by-character comparison!\n");
+
+  printf("Test 8b - CaseInsensitiveCompare UTF-8 (4):\n");
+  if (CaseInsensitiveCompare(t8rhs, t8lhs, strlen(t8rhs), strlen(t8lhs)) != 1)
+    printf("\tFailed!\n");
+
+  // This test may seem a bit strange.  But it's actually an easy bug to make
+  // if we tried to be clever and say that two ASCII characters x and y are
+  // case-insensitively equal if (x & ~0x20) == (y & ~0x20).
+  printf("Test 9 - CaseInsensitiveCompare UTF-8 (5):\n");
+  if (CaseInsensitiveCompare(t9rhs, t9lhs, strlen(t9lhs), strlen(t9rhs)) != 1)
+    printf("\tFailed!\n");
+  if (CharByCharCompareEqual(t9lhs, t9rhs, strlen(t9lhs), strlen(t9rhs)))
+    printf("\tFailed character-by-character comparison!\n");
+
   printf("===========================\n");
   printf("Finish case conversion test\n");
   printf("===========================\n");
 }
 
+static void FuzzOneInvalidCaseConversion()
+{
+  PRUint32 aLen = rand() % 32;
+  PRUint32 bLen = rand() % 32;
+
+  // We could use a static length-32 buffer for these, but then Valgrind
+  // wouldn't be able to detect errors.
+  unsigned char *aBuf = (unsigned char*)malloc(aLen * sizeof(unsigned char));
+  unsigned char *bBuf = (unsigned char*)malloc(bLen * sizeof(unsigned char));
+
+  for (PRUint32 i = 0; i < aLen; i++) {
+    aBuf[i] = rand() & 0xff;
+  }
+
+  for (PRUint32 i = 0; i < bLen; i++) {
+    bBuf[i] = rand() & 0xff;
+  }
+
+  CaseInsensitiveCompare((char*)aBuf, (char*)bBuf, aLen, bLen);
+  CharByCharCompareEqual((char*)aBuf, (char*)bBuf, aLen, bLen);
+
+  free(aBuf);
+  free(bBuf);
+}
+
+static void FuzzCaseConversion()
+{
+  printf("==========================\n");
+  printf("Start fuzz case conversion\n");
+  printf("==========================\n");
+
+  srand(0);
+
+  printf("Fuzzing invalid UTF8 data...\n");
+  for (PRUint32 i = 0; i < 100000; i++) {
+    FuzzOneInvalidCaseConversion();
+  }
+
+  printf("===========================\n");
+  printf("Finish fuzz case conversion\n");
+  printf("===========================\n");
+}
+
 static void TestEntityConversion(PRUint32 version)
 {
   printf("==============================\n");
   printf("Start nsIEntityConverter Test \n");
   printf("==============================\n");
 
   PRUint32 i;
   nsString inString;
@@ -562,16 +733,20 @@ int main(int argc, char** argv) {
    }
 
    // --------------------------------------------
 
    TestCaseConversion();
 
    // --------------------------------------------
 
+   FuzzCaseConversion();
+
+   // --------------------------------------------
+
    TestEntityConversion(nsIEntityConverter::html40);
 
    // --------------------------------------------
 
    TestSaveAsCharset();
 
    // --------------------------------------------
 
--- a/intl/unicharutil/tools/README.txt
+++ b/intl/unicharutil/tools/README.txt
@@ -1,32 +1,3 @@
-
-* How to generate various properties files in intl/unicharutils/tables and
-  header files in intl/unicharutils/src
-   ( written by Jungshik Shin for bug 210502 
-     https://bugzilla.mozilla.org/show_bug.cgi?id=210502 on 2005-04-05 )
-
-1. Grab the latest version of idnkit at http://www.nic.ad.jp/en/idn/index.html
-   (http://www.nic.ad.jp/ja/idn/idnkit/download/index.html )
-
-2. There are three files we need in the kit:
-   generate_normalize_data.pl, UCD.pm and SparseMap.pm
-
-3. a. Download the following Unicode data files  :
-     CaseFolding.txt,CompositionExclusions.txt, 
-     SpecialCasing.txt, UnicodeData.txt
+Instructions for using these tools are on the Mozilla Wiki.
 
-   b. Rename UnicodeData.txt to UnicodeData-Latest.txt
-
-   The latest version is, as of this writing, in 
-   ftp://ftp.unicode.org/Public/4.1.0/ucd
-
-4. a. Run generate_normalize_data.pl and save the output to a temporary file
-   b. Edit the file 
-      - remove the case folding part (search for 'Lowercase' and delete 
-        all the lines following it) because we have separate scripts for that, 
-      - replace 'unsigned short' and 'unsigned long' with 'PRUnichar' and 
-        'PRUint32'
-   c. Replace the actual source part (after the license) of 
-      intl/unicharutil/src/normalization_data.h with the file you edited.
-
-5. Generate casetable.h and cattable.h with  gencasetable.pl and gencattable.pl
-   Just running them will put casetable.h and cattable.h in the right place.
+https://wiki.mozilla.org/I18n:Updating_Unicode_version
--- a/intl/unicharutil/tools/gencasetable.pl
+++ b/intl/unicharutil/tools/gencasetable.pl
@@ -378,16 +378,48 @@ for($idx=0;$idx<8;$idx++)
 {
    printf OUT "0x%08X", $blk[$idx];
    if($idx != 7) {
       printf OUT ",\n";
    } else {
       printf OUT "\n";
    }
 }
+print OUT "};\n\n";
+
+######################################################################
+#
+# Print out gASCIIToLower table
+#
+######################################################################
+print OUT "// We map x -> x, except for upper-case letters,\n";
+print OUT "// which we map to their lower-case equivalents.\n";
+print OUT "static const PRUint8 gASCIIToLower [128] = {\n";
+
+# Map x -> x, except for upper-case letters, which we map to lower-case
+# letters.
+for($idx=0; $idx < 128; $idx++)
+{
+  if ($idx % 16 == 0) {
+    print OUT "    "
+  }
+
+  if (65 <= $idx && $idx <= 90) {
+    printf OUT "0x%02x", ($idx + 0x20);
+  } else {
+    printf OUT "0x%02x", $idx;
+  }
+
+  if (($idx+1) % 16 != 0) {
+    print OUT ", ";
+  }
+  else {
+    print OUT ",\n";
+  }
+}
 print OUT "};\n";
 
 
 ######################################################################
 #
 # Close files
 #
 ######################################################################
--- a/intl/unicharutil/util/nsUnicharUtils.cpp
+++ b/intl/unicharutil/util/nsUnicharUtils.cpp
@@ -41,137 +41,181 @@
 #include "nsUnicharUtils.h"
 #include "nsUnicharUtilCIID.h"
 
 #include "nsCRT.h"
 #include "nsICaseConversion.h"
 #include "nsServiceManagerUtils.h"
 #include "nsXPCOMStrings.h"
 #include "casetable.h"
+#include "nsUTF8Utils.h"
 
 #include <ctype.h>
 
-// For gUpperToTitle 
+// For gUpperToTitle
 enum {
   kUpperIdx =0,
   kTitleIdx
 };
 
 // For gUpperToTitle
 enum {
   kLowIdx =0,
   kSizeEveryIdx,
   kDiffIdx
 };
 
-#define IS_ASCII(u)       ( 0x0000 == ((u) & 0xFF80))
-#define IS_ASCII_UPPER(u) ((0x0041 <= (u)) && ( (u) <= 0x005a))
-#define IS_ASCII_LOWER(u) ((0x0061 <= (u)) && ( (u) <= 0x007a))
+#define IS_ASCII(u)       ((u) < 0x80)
+#define IS_ASCII_UPPER(u) (('A' <= (u)) && ( (u) <= 'Z' ))
+#define IS_ASCII_LOWER(u) (('a' <= (u)) && ( (u) <= 'z'))
 #define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u))
-#define IS_ASCII_SPACE(u) ( 0x0020 == (u) )
+#define IS_ASCII_SPACE(u) ( ' ' == (u) )
 
 #define IS_NOCASE_CHAR(u)  (0==(1&(gCaseBlocks[(u)>>13]>>(0x001F&((u)>>8)))))
-  
+
 // Size of Tables
 
-#define CASE_MAP_CACHE_SIZE 0x40
-#define CASE_MAP_CACHE_MASK 0x3F
+// Changing these numbers may break UTF-8 caching.  Be careful!
+#define CASE_MAP_CACHE_SIZE 0x100
+#define CASE_MAP_CACHE_MASK 0xFF
 
 struct nsCompressedMap {
   const PRUnichar *mTable;
   PRUint32 mSize;
   PRUint32 mCache[CASE_MAP_CACHE_SIZE];
   PRUint32 mLastBase;
 
   PRUnichar Map(PRUnichar aChar)
   {
-    // no need to worry about thread safety since cached values are
-    // not objects but primitive data types which could be 
-    // accessed in atomic operations. We need to access
-    // the whole 32 bit of cachedData at once in order to make it
-    // thread safe. Never access bits from mCache directly.
+    // We don't need explicit locking here since the cached values are int32s,
+    // which are read and written atomically.  The following code is threadsafe
+    // because we never access bits from mCache directly -- we always first
+    // read the entire entry into a local variable and then mask off the bits
+    // we're interested in.
 
+    // Check the 256-byte cache first and bail with our answer if we can.
     PRUint32 cachedData = mCache[aChar & CASE_MAP_CACHE_MASK];
-    if(aChar == ((cachedData >> 16) & 0x0000FFFF))
-      return (cachedData & 0x0000FFFF);
+    if (aChar == ((cachedData >> 16) & 0x0000FFFF))
+      return cachedData & 0x0000FFFF;
 
-    // try the last index first
-    // store into local variable so we can be thread safe
-    PRUint32 base = mLastBase; 
+    // Now try the last index we looked up, storing it into a local variable
+    // for thread-safety.
+    PRUint32 base = mLastBase;
     PRUnichar res = 0;
-    
-    if (( aChar <=  ((mTable[base+kSizeEveryIdx] >> 8) + 
-                  mTable[base+kLowIdx])) &&
-        ( mTable[base+kLowIdx]  <= aChar )) 
-    {
-        // Hit the last base
-        if(((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) && 
+
+    // Does this character fit in the slot?
+    if ((aChar <= ((mTable[base+kSizeEveryIdx] >> 8) +
+                   mTable[base+kLowIdx])) &&
+        (mTable[base+kLowIdx] <= aChar)) {
+
+      // This character uses the same base as our last lookup, so the
+      // conversion is easy.
+      if (((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) &&
           (0 != ((aChar - mTable[base+kLowIdx]) % 
-                (mTable[base+kSizeEveryIdx] & 0x00FF))))
-        {
-          res = aChar;
-        } else {
-          res = aChar + mTable[base+kDiffIdx];
-        }
+                 (mTable[base+kSizeEveryIdx] & 0x00FF))))
+      {
+        res = aChar;
+      } else {
+        res = aChar + mTable[base+kDiffIdx];
+      }
+
     } else {
-        res = this->Lookup(0, (mSize/2), mSize-1, aChar);
+      // Do the full lookup.
+      res = this->Lookup(0, mSize/2, mSize-1, aChar);
     }
 
+    // Cache the result and return.
     mCache[aChar & CASE_MAP_CACHE_MASK] =
-        (((aChar << 16) & 0xFFFF0000) | (0x0000FFFF & res));
+        ((aChar << 16) & 0xFFFF0000) | (0x0000FFFF & res);
     return res;
   }
 
+  // Takes as arguments the left bound, middle, right bound, and character to
+  // search for.  Executes a binary search.
   PRUnichar Lookup(PRUint32 l,
                    PRUint32 m,
                    PRUint32 r,
                    PRUnichar aChar)
   {
-    PRUint32 base = m*3;
-    if ( aChar >  ((mTable[base+kSizeEveryIdx] >> 8) + 
+    PRUint32 base = m*3; // Every line in the table is 3 units wide.
+
+    // Is aChar past the top of the current table entry?  (The upper byte of
+    // the 'every' entry contains the offset to the end of this entry.)
+    if (aChar > ((mTable[base+kSizeEveryIdx] >> 8) + 
                   mTable[base+kLowIdx])) 
     {
-      if( l > m )
+      if (l > m || l == r)
         return aChar;
+      // Advance one round.
       PRUint32 newm = (m+r+1)/2;
-      if(newm == m)
+      if (newm == m)
         newm++;
-      return this->Lookup(m+1, newm , r, aChar);
-      
-    } else if ( mTable[base+kLowIdx]  > aChar ) {
-      if( r < m )
+      return this->Lookup(m+1, newm, r, aChar);
+
+    // Is aChar below the bottom of the current table entry?
+    } else if (mTable[base+kLowIdx] > aChar) {
+      if (r < m || l == r)
         return aChar;
+      // Advance one round
       PRUint32 newm = (l+m-1)/2;
       if(newm == m)
         newm++;
       return this->Lookup(l, newm, m-1, aChar);
 
-    } else  {
-      if(((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) && 
-        (0 != ((aChar - mTable[base+kLowIdx]) % 
-                (mTable[base+kSizeEveryIdx] & 0x00FF))))
+    // We've found the entry aChar should live in.
+    } else {
+      // Determine if aChar falls in a gap.  (The lower byte of the 'every'
+      // entry contains n for which every nth character from the base is a
+      // character of interest.)
+      if (((mTable[base+kSizeEveryIdx] & 0x00FF) > 0) && 
+          (0 != ((aChar - mTable[base+kLowIdx]) % 
+                 (mTable[base+kSizeEveryIdx] & 0x00FF))))
       {
         return aChar;
       }
-      mLastBase = base; // cache the base
+      // If aChar doesn't fall in the gap, cache and convert.
+      mLastBase = base;
       return aChar + mTable[base+kDiffIdx];
     }
   }
 };
 
 static nsCompressedMap gUpperMap = {
   reinterpret_cast<const PRUnichar*>(&gToUpper[0]),
   gToUpperItems
 };
 
 static nsCompressedMap gLowerMap = {
   reinterpret_cast<const PRUnichar*>(&gToLower[0]),
   gToLowerItems
 };
 
+// We want ToLowerCase(PRUnichar) and ToLowerCaseASCII(PRUnichar) to be fast
+// when they're called from within the case-insensitive comparators, so we
+// define inlined versions.
+static NS_ALWAYS_INLINE PRUnichar
+ToLowerCase_inline(PRUnichar aChar)
+{
+  if (IS_ASCII(aChar)) {
+    return gASCIIToLower[aChar];
+  } else if (IS_NOCASE_CHAR(aChar)) {
+     return aChar;
+  }
+
+  return gLowerMap.Map(aChar);
+}
+
+static NS_ALWAYS_INLINE PRUnichar
+ToLowerCaseASCII_inline(const PRUnichar aChar)
+{
+  if (IS_ASCII(aChar))
+    return gASCIIToLower[aChar];
+  return aChar;
+}
+
 void
 ToLowerCase(nsAString& aString)
 {
   PRUnichar *buf = aString.BeginWriting();
   ToLowerCase(buf, buf, aString.Length());
 }
 
 void
@@ -184,19 +228,17 @@ ToLowerCase(const nsAString& aSource,
   NS_StringGetMutableData(aDest, len, &out);
   NS_ASSERTION(out, "Uh...");
   ToLowerCase(in, out, len);
 }
 
 PRUnichar
 ToLowerCaseASCII(const PRUnichar aChar)
 {
-  if (IS_ASCII_UPPER(aChar))
-    return aChar + 0x0020;
-  return aChar;
+  return ToLowerCaseASCII_inline(aChar);
 }
 
 void
 ToUpperCase(nsAString& aString)
 {
   PRUnichar *buf = aString.BeginWriting();
   ToUpperCase(buf, buf, aString.Length());
 }
@@ -213,141 +255,84 @@ ToUpperCase(const nsAString& aSource,
   ToUpperCase(in, out, len);
 }
 
 #ifdef MOZILLA_INTERNAL_API
 
 PRInt32
 nsCaseInsensitiveStringComparator::operator()(const PRUnichar* lhs,
                                               const PRUnichar* rhs,
-                                              PRUint32 aLength) const
+                                              PRUint32 lLength,
+                                              PRUint32 rLength) const
 {
-  return CaseInsensitiveCompare(lhs, rhs, aLength);
+  return (lLength == rLength) ? CaseInsensitiveCompare(lhs, rhs, lLength) :
+         (lLength > rLength) ? 1 : -1;
 }
 
 PRInt32
-nsCaseInsensitiveStringComparator::operator()(PRUnichar lhs,
-                                              PRUnichar rhs) const
+nsCaseInsensitiveUTF8StringComparator::operator()(const char* lhs,
+                                                  const char* rhs,
+                                                  PRUint32 lLength,
+                                                  PRUint32 rLength) const
 {
-  // see if they're an exact match first
-  if (lhs == rhs)
-    return 0;
-  
-  lhs = ToLowerCase(lhs);
-  rhs = ToLowerCase(rhs);
-  
-  if (lhs == rhs)
-    return 0;
-  else if (lhs < rhs)
-    return -1;
-  else
-    return 1;
+  return CaseInsensitiveCompare(lhs, rhs, lLength, rLength);
 }
 
 PRInt32
 nsASCIICaseInsensitiveStringComparator::operator()(const PRUnichar* lhs,
                                                    const PRUnichar* rhs,
-                                                   PRUint32 aLength) const
+                                                   PRUint32 lLength,
+                                                   PRUint32 rLength) const
 {
-  while (aLength) {
+  if (lLength != rLength) {
+    if (lLength > rLength)
+      return 1;
+    return -1;
+  }
+
+  while (rLength) {
     PRUnichar l = *lhs++;
     PRUnichar r = *rhs++;
     if (l != r) {
-      l = ToLowerCaseASCII(l);
-      r = ToLowerCaseASCII(r);
+      l = ToLowerCaseASCII_inline(l);
+      r = ToLowerCaseASCII_inline(r);
 
       if (l > r)
         return 1;
       else if (r > l)
         return -1;
     }
-    aLength--;
+    rLength--;
   }
 
   return 0;
 }
 
-PRInt32
-nsASCIICaseInsensitiveStringComparator::operator()(PRUnichar lhs,
-                                                   PRUnichar rhs) const
-{
-  // see if they're an exact match first
-  if (lhs == rhs)
-    return 0;
-  
-  lhs = ToLowerCaseASCII(lhs);
-  rhs = ToLowerCaseASCII(rhs);
-  
-  if (lhs == rhs)
-    return 0;
-  else if (lhs < rhs)
-    return -1;
-  else
-    return 1;
-}
-
-
 #endif // MOZILLA_INTERNAL_API
 
-PRInt32
-CaseInsensitiveCompare(const PRUnichar *a,
-                       const PRUnichar *b,
-                       PRUint32 len)
-{
-  NS_ASSERTION(a && b, "Do not pass in invalid pointers!");
-  
-  if (len) {
-    do {
-      PRUnichar c1 = *a++;
-      PRUnichar c2 = *b++;
-      
-      if (c1 != c2) {
-        c1 = ToLowerCase(c1);
-        c2 = ToLowerCase(c2);
-        if (c1 != c2) {
-          if (c1 < c2) {
-            return -1;
-          }
-          return 1;
-        }
-      }
-    } while (--len != 0);
-  }
-  return 0;
-}
-
 PRUnichar
 ToLowerCase(PRUnichar aChar)
 {
-  if (IS_ASCII(aChar)) {
-    if (IS_ASCII_UPPER(aChar))
-      return aChar + 0x0020;
-    else
-      return aChar;
-  } else if (IS_NOCASE_CHAR(aChar)) {
-     return aChar;
-  }
-
-  return gLowerMap.Map(aChar);
+  return ToLowerCase_inline(aChar);
 }
 
 void
 ToLowerCase(const PRUnichar *aIn, PRUnichar *aOut, PRUint32 aLen)
 {
   for (PRUint32 i = 0; i < aLen; i++) {
     aOut[i] = ToLowerCase(aIn[i]);
   }
 }
 
 PRUnichar
 ToUpperCase(PRUnichar aChar)
 {
   if (IS_ASCII(aChar)) {
     if (IS_ASCII_LOWER(aChar))
-      return aChar - 0x0020;
+      return aChar - 0x20;
     else
       return aChar;
   } else if (IS_NOCASE_CHAR(aChar)) {
     return aChar;
   }
 
   return gUpperMap.Map(aChar);
 }
@@ -375,19 +360,183 @@ ToTitleCase(PRUnichar aChar)
     for (PRUint32 i = 0; i < gUpperToTitleItems; i++) {
       if (aChar == gUpperToTitle[(i*2)+kUpperIdx]) {
         return aChar;
       }
     }
   }
 
   PRUnichar upper = gUpperMap.Map(aChar);
-  
+
   if (0x01C0 == ( upper & 0xFFC0)) {
     for (PRUint32 i = 0 ; i < gUpperToTitleItems; i++) {
       if (upper == gUpperToTitle[(i*2)+kUpperIdx]) {
          return gUpperToTitle[(i*2)+kTitleIdx];
       }
     }
   }
 
   return upper;
 }
+
+PRInt32
+CaseInsensitiveCompare(const PRUnichar *a,
+                       const PRUnichar *b,
+                       PRUint32 len)
+{
+  NS_ASSERTION(a && b, "Do not pass in invalid pointers!");
+
+  if (len) {
+    do {
+      PRUnichar c1 = *a++;
+      PRUnichar c2 = *b++;
+
+      if (c1 != c2) {
+        c1 = ToLowerCase_inline(c1);
+        c2 = ToLowerCase_inline(c2);
+        if (c1 != c2) {
+          if (c1 < c2) {
+            return -1;
+          }
+          return 1;
+        }
+      }
+    } while (--len != 0);
+  }
+  return 0;
+}
+
+// Calculates the codepoint of the UTF8 sequence starting at aStr.  Sets aNext
+// to the byte following the end of the sequence.
+//
+// If the sequence is invalid, or if computing the codepoint would take us off
+// the end of the string (as marked by aEnd), returns -1 and does not set
+// aNext.  Note that this function doesn't check that aStr < aEnd -- it assumes
+// you've done that already.
+static NS_ALWAYS_INLINE PRUint32
+GetLowerUTF8Codepoint(const char* aStr, const char* aEnd, const char **aNext)
+{
+  // Convert to unsigned char so that stuffing chars into PRUint32s doesn't
+  // sign extend.
+  const unsigned char *str = (unsigned char*)aStr;
+
+  if (UTF8traits::isASCII(str[0])) {
+    // It's ASCII; just convert to lower-case and return it.
+    *aNext = aStr + 1;
+    return gASCIIToLower[*str];
+  }
+  if (UTF8traits::is2byte(str[0]) && NS_LIKELY(aStr + 1 < aEnd)) {
+    // It's a two-byte sequence, so it looks like
+    //  110XXXXX 10XXXXXX.
+    // This is definitely in the BMP, so we can store straightaway into a
+    // PRUint16.
+
+    PRUint16 c;
+    c  = (str[0] & 0x1F) << 6;
+    c += (str[1] & 0x3F);
+
+    if (!IS_NOCASE_CHAR(c))
+      c = gLowerMap.Map(c);
+
+    *aNext = aStr + 2;
+    return c;
+  }
+  if (UTF8traits::is3byte(str[0]) && NS_LIKELY(aStr + 2 < aEnd)) {
+    // It's a three-byte sequence, so it looks like
+    //  1110XXXX 10XXXXXX 10XXXXXX.
+    // This will just barely fit into 16-bits, so store into a PRUint16.
+
+    PRUint16 c;
+    c  = (str[0] & 0x0F) << 12;
+    c += (str[1] & 0x3F) << 6;
+    c += (str[2] & 0x3F);
+
+    if (!IS_NOCASE_CHAR(c))
+      c = gLowerMap.Map(c);
+
+    *aNext = aStr + 3;
+    return c;
+  }
+  if (UTF8traits::is4byte(str[0]) && NS_LIKELY(aStr + 3 < aEnd)) {
+    // It's a four-byte sequence, so it looks like
+    //   11110XXX 10XXXXXX 10XXXXXX 10XXXXXX.
+    // Unless this is an overlong sequence, the codepoint it encodes definitely
+    // isn't in the BMP, so we don't bother trying to convert it to lower-case.
+
+    PRUint32 c;
+    c  = (str[0] & 0x07) << 18;
+    c += (str[1] & 0x3F) << 12;
+    c += (str[2] & 0x3F) << 6;
+    c += (str[3] & 0x3F);
+
+    *aNext = aStr + 4;
+    return c;
+  }
+
+  // Hm, we don't understand this sequence.
+  return -1;
+}
+
+PRInt32 CaseInsensitiveCompare(const char *aLeft,
+                               const char *aRight,
+                               PRUint32 aLeftBytes,
+                               PRUint32 aRightBytes)
+{
+  const char *leftEnd = aLeft + aLeftBytes;
+  const char *rightEnd = aRight + aRightBytes;
+
+  while (aLeft < leftEnd && aRight < rightEnd) {
+    PRUint32 leftChar = GetLowerUTF8Codepoint(aLeft, leftEnd, &aLeft);
+    if (NS_UNLIKELY(leftChar == PRUint32(-1)))
+      return -1;
+
+    PRUint32 rightChar = GetLowerUTF8Codepoint(aRight, rightEnd, &aRight);
+    if (NS_UNLIKELY(rightChar == PRUint32(-1)))
+      return -1;
+
+    // Now leftChar and rightChar are lower-case, so we can compare them.
+    if (leftChar != rightChar) {
+      if (leftChar > rightChar)
+        return 1;
+      return -1;
+    }
+  }
+
+  // Make sure that if one string is longer than the other we return the
+  // correct result.
+  if (aLeft < leftEnd)
+    return 1;
+  if (aRight < rightEnd)
+    return -1;
+
+  return 0;
+}
+
+PRBool
+CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
+                              const char* aLeftEnd, const char* aRightEnd,
+                              const char** aLeftNext, const char** aRightNext,
+                              PRBool* aErr)
+{
+  NS_ASSERTION(aLeftNext, "Out pointer shouldn't be null.");
+  NS_ASSERTION(aRightNext, "Out pointer shouldn't be null.");
+  NS_ASSERTION(aErr, "Out pointer shouldn't be null.");
+  NS_ASSERTION(aLeft < aLeftEnd, "aLeft must be less than aLeftEnd.");
+  NS_ASSERTION(aRight < aRightEnd, "aRight must be less than aRightEnd.");
+
+  PRUint32 leftChar = GetLowerUTF8Codepoint(aLeft, aLeftEnd, aLeftNext);
+  if (NS_UNLIKELY(leftChar == PRUint32(-1))) {
+    *aErr = PR_TRUE;
+    return PR_FALSE;
+  }
+
+  PRUint32 rightChar = GetLowerUTF8Codepoint(aRight, aRightEnd, aRightNext);
+  if (NS_UNLIKELY(rightChar == PRUint32(-1))) {
+    *aErr = PR_TRUE;
+    return PR_FALSE;
+  }
+
+  // Can't have an error past this point.
+  *aErr = PR_FALSE;
+
+  return leftChar == rightChar;
+}
+
--- a/intl/unicharutil/util/nsUnicharUtils.h
+++ b/intl/unicharutil/util/nsUnicharUtils.h
@@ -73,38 +73,45 @@ inline PRBool IsLowerCase(PRUnichar c) {
 
 #ifdef MOZILLA_INTERNAL_API
 
 class nsCaseInsensitiveStringComparator : public nsStringComparator
 {
 public:
   virtual PRInt32 operator() (const PRUnichar*,
                               const PRUnichar*,
-                              PRUint32 aLength) const;
-  virtual PRInt32 operator() (PRUnichar,
-                              PRUnichar) const;
+                              PRUint32,
+                              PRUint32) const;
+};
+
+class nsCaseInsensitiveUTF8StringComparator : public nsCStringComparator
+{
+public:
+  virtual PRInt32 operator() (const char*,
+                              const char*,
+                              PRUint32,
+                              PRUint32) const;
 };
 
 class nsCaseInsensitiveStringArrayComparator
 {
 public:
   template<class A, class B>
   PRBool Equals(const A& a, const B& b) const {
     return a.Equals(b, nsCaseInsensitiveStringComparator());
   }
 };
 
 class nsASCIICaseInsensitiveStringComparator : public nsStringComparator
 {
 public:
   virtual int operator() (const PRUnichar*,
                           const PRUnichar*,
-                          PRUint32 aLength) const;
-  virtual int operator() (PRUnichar,
-                          PRUnichar) const;
+                          PRUint32,
+                          PRUint32) const;
 };
 
 inline PRBool
 CaseInsensitiveFindInReadable(const nsAString& aPattern,
                               nsAString::const_iterator& aSearchStart,
                               nsAString::const_iterator& aSearchEnd)
 {
   return FindInReadable(aPattern, aSearchStart, aSearchEnd,
@@ -121,9 +128,38 @@ CaseInsensitiveFindInReadable(const nsAS
                         nsCaseInsensitiveStringComparator());
 }
 
 #endif // MOZILLA_INTERNAL_API
 
 PRInt32
 CaseInsensitiveCompare(const PRUnichar *a, const PRUnichar *b, PRUint32 len);
 
+PRInt32
+CaseInsensitiveCompare(const char* aLeft, const char* aRight,
+                       PRUint32 aLeftBytes, PRUint32 aRightBytes);
+
+/**
+ * This function determines whether the UTF-8 sequence pointed to by aLeft is
+ * case-insensitively-equal to the UTF-8 sequence pointed to by aRight.
+ *
+ * aLeftEnd marks the first memory location past aLeft that is not part of
+ * aLeft; aRightEnd similarly marks the end of aRight.
+ *
+ * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd.
+ *
+ * The function stores the addresses of the next characters in the sequence
+ * into aLeftNext and aRightNext.  It's up to the caller to make sure that the
+ * returned pointers are valid -- i.e. the function may return aLeftNext >=
+ * aLeftEnd or aRightNext >= aRightEnd.
+ *
+ * If the function encounters invalid text, it sets aErr to true and returns
+ * false, possibly leaving aLeftNext and aRightNext uninitialized.  If the
+ * function returns true, aErr is guaranteed to be false and both aLeftNext and
+ * aRightNext are guaranteed to be initialized.
+ */
+PRBool
+CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
+                              const char* aLeftEnd, const char* aRightEnd,
+                              const char** aLeftNext, const char** aRightNext,
+                              PRBool* aErr);
+
 #endif  /* nsUnicharUtils_h__ */
--- a/parser/htmlparser/src/nsScannerString.cpp
+++ b/parser/htmlparser/src/nsScannerString.cpp
@@ -589,17 +589,17 @@ FindInReadable( const nsAString& aPatter
         aPattern.BeginReading(aPatternStart);
         aPattern.EndReading(aPatternEnd);
 
           // outer loop keeps searching till we find it or run out of string to search
         while ( !found_it )
           {
               // fast inner loop (that's what it's called, not what it is) looks for a potential match
             while ( aSearchStart != aSearchEnd &&
-                    compare(*aPatternStart, *aSearchStart) )
+                    compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
               ++aSearchStart;
 
               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
             if ( aSearchStart == aSearchEnd )
               break;
 
               // otherwise, we're at a potential match, let's see if we really hit one
             nsAString::const_iterator testPattern(aPatternStart);
@@ -626,17 +626,17 @@ FindInReadable( const nsAString& aPatter
                 if ( testSearch == aSearchEnd )
                   {
                     aSearchStart = aSearchEnd;
                     break;
                   }
 
                   // else if we mismatched ... it's time to advance to the next search position
                   //  and get back into the `fast' loop
-                if ( compare(*testPattern, *testSearch) )
+                if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
                   {
                     ++aSearchStart;
                     break;
                   }
               }
           }
       }
 
--- a/toolkit/components/places/src/SQLFunctions.cpp
+++ b/toolkit/components/places/src/SQLFunctions.cpp
@@ -135,17 +135,17 @@ namespace places {
 
     // The start of aSourceString is considered a word boundary, so start there.
     do {
       // We are on a word boundary, so start by copying the iterators.
       const_wchar_iterator testTokenItr(tokenStart),
                            testSourceItr(sourceStart);
 
       // Keep trying to match the token one by one until it doesn't match.
-      while (!caseInsensitiveCompare(*testTokenItr, *testSourceItr)) {
+      while (!caseInsensitiveCompare(testTokenItr, testSourceItr, 1, 1)) {
         // We matched something, so move down one.
         testTokenItr++;
         testSourceItr++;
 
         // Matched the full token, so we are done!
         if (testTokenItr == tokenEnd)
           return true;
 
--- a/widget/src/windows/nsWindow.cpp
+++ b/widget/src/windows/nsWindow.cpp
@@ -6432,17 +6432,17 @@ PRBool nsWindow::OnMouseWheel(UINT msg, 
 static PRBool
 StringCaseInsensitiveEquals(const PRUnichar* aChars1, const PRUint32 aNumChars1,
                             const PRUnichar* aChars2, const PRUint32 aNumChars2)
 {
   if (aNumChars1 != aNumChars2)
     return PR_FALSE;
 
   nsCaseInsensitiveStringComparator comp;
-  return comp(aChars1, aChars2, aNumChars1) == 0;
+  return comp(aChars1, aChars2, aNumChars1, aNumChars2) == 0;
 }
 
 UINT nsWindow::MapFromNativeToDOM(UINT aNativeKeyCode)
 {
 #ifndef WINCE
   switch (aNativeKeyCode) {
     case VK_OEM_1:     return NS_VK_SEMICOLON;     // 0xBA, For the US standard keyboard, the ';:' key
     case VK_OEM_PLUS:  return NS_VK_ADD;           // 0xBB, For any country/region, the '+' key
--- a/xpcom/string/public/nsAString.h
+++ b/xpcom/string/public/nsAString.h
@@ -75,18 +75,17 @@
    * comparision, see nsUnicharUtils.h)
    */
 class NS_COM nsCaseInsensitiveCStringComparator
     : public nsCStringComparator
   {
     public:
       typedef char char_type;
 
-      virtual int operator()( const char_type*, const char_type*, PRUint32 length ) const;
-      virtual int operator()( char_type, char_type ) const;
+      virtual int operator()( const char_type*, const char_type*, PRUint32, PRUint32 ) const;
   };
 
 class nsCaseInsensitiveCStringArrayComparator
   {
     public:
       template<class A, class B>
       PRBool Equals(const A& a, const B& b) const {
         return a.Equals(b, nsCaseInsensitiveCStringComparator());
--- a/xpcom/string/public/nsTSubstring.h
+++ b/xpcom/string/public/nsTSubstring.h
@@ -45,34 +45,32 @@
    */
 class NS_COM nsTStringComparator_CharT
   {
     public:
       typedef CharT char_type;
 
       nsTStringComparator_CharT() {}
 
-      virtual int operator()( const char_type*, const char_type*, PRUint32 length ) const = 0;
-      virtual int operator()( char_type, char_type ) const = 0;
+      virtual int operator()( const char_type*, const char_type*, PRUint32, PRUint32 ) const = 0;
   };
 
 
   /**
    * The default string comparator (case-sensitive comparision)
    */
 class NS_COM nsTDefaultStringComparator_CharT
     : public nsTStringComparator_CharT
   {
     public:
       typedef CharT char_type;
 
       nsTDefaultStringComparator_CharT() {}
 
-      virtual int operator()( const char_type*, const char_type*, PRUint32 length ) const;
-      virtual int operator()( char_type, char_type ) const;
+      virtual int operator()( const char_type*, const char_type*, PRUint32, PRUint32 ) const;
   };
 
   /**
    * nsTSubstring is the most abstract class in the string hierarchy. It
    * represents a single contiguous array of characters, which may or may not
    * be null-terminated. This type is not instantiated directly.  A sub-class
    * is instantiated instead.  For example, see nsTString.
    *
--- a/xpcom/string/src/nsReadableUtils.cpp
+++ b/xpcom/string/src/nsReadableUtils.cpp
@@ -797,17 +797,17 @@ FindInReadable_Impl( const StringT& aPat
         aPattern.BeginReading(aPatternStart);
         aPattern.EndReading(aPatternEnd);
 
           // outer loop keeps searching till we find it or run out of string to search
         while ( !found_it )
           {
               // fast inner loop (that's what it's called, not what it is) looks for a potential match
             while ( aSearchStart != aSearchEnd &&
-                    compare(*aPatternStart, *aSearchStart) )
+                    compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
               ++aSearchStart;
 
               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
             if ( aSearchStart == aSearchEnd )
               break;
 
               // otherwise, we're at a potential match, let's see if we really hit one
             IteratorT testPattern(aPatternStart);
@@ -834,17 +834,17 @@ FindInReadable_Impl( const StringT& aPat
                 if ( testSearch == aSearchEnd )
                   {
                     aSearchStart = aSearchEnd;
                     break;
                   }
 
                   // else if we mismatched ... it's time to advance to the next search position
                   //  and get back into the `fast' loop
-                if ( compare(*testPattern, *testSearch) )
+                if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
                   {
                     ++aSearchStart;
                     break;
                   }
               }
           }
       }
 
@@ -866,17 +866,17 @@ RFindInReadable_Impl( const StringT& aPa
     --patternEnd;
       // outer loop keeps searching till we run out of string to search
     while ( aSearchStart != searchEnd )
       {
           // Point to the end position of the next possible match
         --searchEnd;
     
           // Check last character, if a match, explore further from here
-        if ( compare(*patternEnd, *searchEnd) == 0 )
+        if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 )
           {  
               // We're at a potential match, let's see if we really hit one
             IteratorT testPattern(patternEnd);
             IteratorT testSearch(searchEnd);
 
               // inner loop verifies the potential match at the current position
             do
               {
@@ -895,17 +895,17 @@ RFindInReadable_Impl( const StringT& aPa
                     aSearchStart = aSearchEnd;
                     return PR_FALSE;
                   }
     
                   // test previous character for a match
                 --testPattern;
                 --testSearch;
               }
-            while ( compare(*testPattern, *testSearch) == 0 );
+            while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 );
           }
       }
 
     aSearchStart = aSearchEnd;
     return PR_FALSE;
   }
 
 NS_COM
--- a/xpcom/string/src/nsStringComparator.cpp
+++ b/xpcom/string/src/nsStringComparator.cpp
@@ -48,28 +48,22 @@
 
   // define nsCStringComparator
 #include "string-template-def-char.h"
 #include "nsTStringComparator.cpp"
 #include "string-template-undef.h"
 
 
 int
-nsCaseInsensitiveCStringComparator::operator()( const char_type* lhs, const char_type* rhs, PRUint32 aLength ) const
+nsCaseInsensitiveCStringComparator::operator()( const char_type* lhs,
+                                                const char_type* rhs,
+                                                PRUint32 lLength,
+                                                PRUint32 rLength ) const
   {
-    PRInt32 result=PRInt32(PL_strncasecmp(lhs, rhs, aLength));
+    if (lLength != rLength)
+      return (lLength > rLength) ? 1 : -1;
+    PRInt32 result=PRInt32(PL_strncasecmp(lhs, rhs, lLength));
     //Egads. PL_strncasecmp is returning *very* negative numbers.
     //Some folks expect -1,0,1, so let's temper its enthusiasm.
     if (result<0) 
       result=-1;
     return result;
   }
-
-int
-nsCaseInsensitiveCStringComparator::operator()( char lhs, char rhs ) const
-  {
-    if (lhs == rhs) return 0;
-    
-    lhs = tolower(lhs);
-    rhs = tolower(rhs);
-
-    return lhs - rhs;
-  }
--- a/xpcom/string/src/nsTStringComparator.cpp
+++ b/xpcom/string/src/nsTStringComparator.cpp
@@ -48,32 +48,27 @@ Compare( const nsTSubstring_CharT::base_
     lhs.BeginReading(leftIter);
     rhs.BeginReading(rightIter);
 
     size_type lLength = leftIter.size_forward();
     size_type rLength = rightIter.size_forward();
     size_type lengthToCompare = NS_MIN(lLength, rLength);
 
     int result;
-    if ( (result = comp(leftIter.get(), rightIter.get(), lengthToCompare)) == 0 )
+    if ( (result = comp(leftIter.get(), rightIter.get(), lengthToCompare, lengthToCompare)) == 0 )
       {
         if ( lLength < rLength )
           result = -1;
         else if ( rLength < lLength )
           result = 1;
         else
           result = 0;
       }
 
     return result;
   }
 
 int
-nsTDefaultStringComparator_CharT::operator()( const char_type* lhs, const char_type* rhs, PRUint32 aLength ) const
+nsTDefaultStringComparator_CharT::operator()( const char_type* lhs, const char_type* rhs, PRUint32 lLength, PRUint32 rLength) const
   {
-    return nsCharTraits<CharT>::compare(lhs, rhs, aLength);
+    return (lLength == rLength) ? nsCharTraits<CharT>::compare(lhs, rhs, lLength) :
+           (lLength > rLength) ? 1 : -1;
   }
-
-int
-nsTDefaultStringComparator_CharT::operator()( char_type lhs, char_type rhs) const
-  {
-    return lhs - rhs;
-  } 
--- a/xpcom/string/src/nsTSubstring.cpp
+++ b/xpcom/string/src/nsTSubstring.cpp
@@ -600,17 +600,17 @@ PRBool
 nsTSubstring_CharT::Equals( const self_type& str ) const
   {
     return mLength == str.mLength && char_traits::compare(mData, str.mData, mLength) == 0;
   }
 
 PRBool
 nsTSubstring_CharT::Equals( const self_type& str, const comparator_type& comp ) const
   {
-    return mLength == str.mLength && comp(mData, str.mData, mLength) == 0;
+    return mLength == str.mLength && comp(mData, str.mData, mLength, str.mLength) == 0;
   }
 
 PRBool
 nsTSubstring_CharT::Equals( const char_type* data ) const
   {
     // unfortunately, some callers pass null :-(
     if (!data)
       {
@@ -630,17 +630,17 @@ nsTSubstring_CharT::Equals( const char_t
     if (!data)
       {
         NS_NOTREACHED("null data pointer");
         return mLength == 0;
       }
 
     // XXX avoid length calculation?
     size_type length = char_traits::length(data);
-    return mLength == length && comp(mData, data, mLength) == 0;
+    return mLength == length && comp(mData, data, mLength, length) == 0;
   }
 
 PRBool
 nsTSubstring_CharT::EqualsASCII( const char* data, size_type len ) const
   {
     return mLength == len && char_traits::compareASCII(mData, data, len) == 0;
   }