Fixing bug 377360. Fix atom table crash due to invalid UTF data in atoms. r=jonas@sicking.cc, sr=peterv@propagandism.org
authorjst@mozilla.org
Wed, 11 Jul 2007 13:46:43 -0700
changeset 3351 0275c640e71262be5e1886fda12d8933be30539c
parent 3350 1f5bb006f7f515d46658477ae676819970218ce2
child 3352 162e4d61b2925c432c524e2d464e9cb249e49376
push idunknown
push userunknown
push dateunknown
reviewersjonas, peterv
bugs377360
milestone1.9a7pre
Fixing bug 377360. Fix atom table crash due to invalid UTF data in atoms. r=jonas@sicking.cc, sr=peterv@propagandism.org
xpcom/ds/nsAtomTable.cpp
xpcom/ds/nsAtomTable.h
xpcom/ds/nsCRT.cpp
xpcom/ds/nsCRT.h
xpcom/string/public/nsUTF8Utils.h
--- a/xpcom/ds/nsAtomTable.cpp
+++ b/xpcom/ds/nsAtomTable.cpp
@@ -35,16 +35,17 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsAtomTable.h"
 #include "nsStaticAtom.h"
 #include "nsString.h"
 #include "nsReadableUtils.h"
+#include "nsUTF8Utils.h"
 #include "nsCRT.h"
 #include "pldhash.h"
 #include "prenv.h"
 
 #define PL_ARENA_CONST_ALIGN_MASK 3
 #include "plarena.h"
 
 class nsStaticAtomWrapper;
@@ -62,18 +63,18 @@ static PLDHashTable gAtomTable;
 
 // this is where we keep the nsStaticAtomWrapper objects
 
 static PLArenaPool* gStaticAtomArena = 0;
 
 class nsStaticAtomWrapper : public nsIAtom
 {
 public:
-  nsStaticAtomWrapper(const nsStaticAtom* aAtom) :
-    mStaticAtom(aAtom)
+  nsStaticAtomWrapper(const nsStaticAtom* aAtom, PRUint32 aLength) :
+    mStaticAtom(aAtom), mLength(aLength)
   {
     MOZ_COUNT_CTOR(nsStaticAtomWrapper);
   }
   ~nsStaticAtomWrapper() {   // no subclasses -> not virtual
     // this is arena allocated and won't be called except in debug
     // builds. If this function ever does anything non-debug, be sure
     // to get rid of the ifdefs in AtomTableClearEntry!
     MOZ_COUNT_DTOR(nsStaticAtomWrapper);
@@ -81,21 +82,31 @@ public:
 
   NS_IMETHOD QueryInterface(REFNSIID aIID,
                             void** aInstancePtr);
   NS_IMETHOD_(nsrefcnt) AddRef(void);
   NS_IMETHOD_(nsrefcnt) Release(void);
 
   NS_DECL_NSIATOM
 
-  const nsStaticAtom* GetStaticAtom() {
+  const nsStaticAtom* GetStaticAtom() const {
     return mStaticAtom;
   }
+
+  PRUint32 getLength() const {
+    return mLength;
+  }
+
 private:
   const nsStaticAtom* mStaticAtom;
+
+  // The length of the string in the static atom. The static atom
+  // (nsStaticAtom) doesn't hold a length, so we keep it here in the
+  // wrapper instead.
+  PRUint32 mLength;
 };
 
 // The |key| pointer in the various PLDHashTable callbacks we use is an
 // AtomTableClearEntry*.  These pointers can come from two places: either a
 // (probably stack-allocated) string key being passed to PL_DHashTableOperate,
 // or an actual entry in the atom table. PLDHashTable reseves the keyHash
 // values 0 and 1 for internal use, which means that the *PLDHashTable code*
 // will never pass an entry whose keyhash is 0 or 1 to our hooks. That means we
@@ -117,25 +128,26 @@ private:
 // XXX This whole mess could be vastly simplified now that pldhash
 // no longer has a getKey callback.
 typedef PRUword PtrBits;
 
 struct AtomTableEntry : public PLDHashEntryHdr {
   // If keyHash > 1, mBits & 0x1 means (mBits & ~0x1) points to an
   // nsStaticAtomWrapper else it points to an nsAtomImpl
   PtrBits mBits;
+  PRUint32 mLength;
 
-  inline AtomTableEntry(const char *aString)
-    : mBits(PtrBits(aString))
+  inline AtomTableEntry(const char *aString, PRUint32 aLength)
+    : mBits(PtrBits(aString)), mLength(aLength)
   {
     keyHash = 0;
   }
 
-  inline AtomTableEntry(const PRUnichar *aString)
-    : mBits(PtrBits(aString))
+  inline AtomTableEntry(const PRUnichar *aString, PRUint32 aLength)
+    : mBits(PtrBits(aString)), mLength(aLength)
   {
     keyHash = 1;
   }
 
   inline PRBool IsStaticAtom() const {
     NS_ASSERTION(keyHash > 1,
                  "IsStaticAtom() called on non-atom AtomTableEntry!");
     return (mBits & 0x1) != 0;
@@ -149,26 +161,28 @@ struct AtomTableEntry : public PLDHashEn
     return keyHash == 1;
   }
 
   inline void SetAtomImpl(AtomImpl* aAtom) {
     NS_ASSERTION(keyHash > 1,
                  "SetAtomImpl() called on non-atom AtomTableEntry!");
     NS_ASSERTION(aAtom, "Setting null atom");
     mBits = PtrBits(aAtom);
+    mLength = aAtom->mLength;
   }
 
   inline void SetStaticAtomWrapper(nsStaticAtomWrapper* aAtom) {
     NS_ASSERTION(keyHash > 1,
                  "SetStaticAtomWrapper() called on non-atom AtomTableEntry!");
     NS_ASSERTION(aAtom, "Setting null atom");
     NS_ASSERTION((PtrBits(aAtom) & ~0x1) == PtrBits(aAtom),
                  "Pointers must align or this is broken");
 
     mBits = PtrBits(aAtom) | 0x1;
+    mLength = aAtom->getLength();
   }
   
   inline void ClearAtom() {
     mBits = nsnull;
   }
 
   inline PRBool HasValue() const {
     NS_ASSERTION(keyHash > 1,
@@ -178,17 +192,17 @@ struct AtomTableEntry : public PLDHashEn
 
   // these accessors assume that you already know the type
   inline AtomImpl *GetAtomImpl() const {
     NS_ASSERTION(keyHash > 1,
                  "GetAtomImpl() called on non-atom AtomTableEntry!");
     NS_ASSERTION(!IsStaticAtom(), "This is a static atom, not an AtomImpl");
     return (AtomImpl*) (mBits & ~0x1);
   }
-  
+
   inline nsStaticAtomWrapper *GetStaticAtomWrapper() const {
     NS_ASSERTION(keyHash > 1,
                  "GetStaticAtomWrapper() called on non-atom AtomTableEntry!");
     NS_ASSERTION(IsStaticAtom(), "This is an AtomImpl, not a static atom");
     return (nsStaticAtomWrapper*) (mBits & ~0x1);
   }
 
   inline const nsStaticAtom* GetStaticAtom() const {
@@ -218,16 +232,21 @@ struct AtomTableEntry : public PLDHashEn
   // get the string buffer
   inline const PRUnichar* getUTF16String() const {
     NS_ASSERTION(keyHash == 1,
                  "getUTF16String() called on non-UTF16 AtomTableEntry!");
 
     return (PRUnichar *)mBits;
   }
 
+  // get the string length
+  inline PRUint32 getLength() const {
+    return mLength;
+  }
+
   // get an addreffed nsIAtom - not using already_AddRef'ed atom
   // because the callers are not (and should not be) using nsCOMPtr
   inline nsIAtom* GetAtom() const {
     NS_ASSERTION(keyHash > 1,
                  "GetAtom() called on non-atom AtomTableEntry!");
 
     nsIAtom* result;
     
@@ -243,45 +262,55 @@ struct AtomTableEntry : public PLDHashEn
 };
 
 PR_STATIC_CALLBACK(PLDHashNumber)
 AtomTableGetHash(PLDHashTable *table, const void *key)
 {
   const AtomTableEntry *e = static_cast<const AtomTableEntry*>(key);
 
   if (e->IsUTF16String()) {
-    return nsCRT::HashCodeAsUTF8(e->getUTF16String());
+    return nsCRT::HashCodeAsUTF8(e->getUTF16String(), e->getLength());;
   }
 
   NS_ASSERTION(e->IsUTF8String(),
                "AtomTableGetHash() called on non-string-key AtomTableEntry!");
 
-  return nsCRT::HashCode(e->getUTF8String());
+  return nsCRT::HashCode(e->getUTF8String(), e->getLength());
 }
 
 PR_STATIC_CALLBACK(PRBool)
 AtomTableMatchKey(PLDHashTable *table, const PLDHashEntryHdr *entry,
                   const void *key)
 {
   const AtomTableEntry *he = static_cast<const AtomTableEntry*>(entry);
   const AtomTableEntry *strKey = static_cast<const AtomTableEntry*>(key);
 
   const char *atomString = he->getAtomString();
 
   if (strKey->IsUTF16String()) {
     return
-      CompareUTF8toUTF16(nsDependentCString(atomString),
-                         nsDependentString(strKey->getUTF16String())) == 0;
+      CompareUTF8toUTF16(nsDependentCString(atomString, he->getLength()),
+                         nsDependentString(strKey->getUTF16String(),
+                                           strKey->getLength())) == 0;
   }
 
+  PRUint32 length = he->getLength();
+  if (length != strKey->getLength()) {
+    return PR_FALSE;
+  }
+
+  const char *str;
+
   if (strKey->IsUTF8String()) {
-    return strcmp(atomString, strKey->getUTF8String()) == 0;
+    str = strKey->getUTF8String();
+  } else {
+    str = strKey->getAtomString();
   }
 
-  return strcmp(atomString, strKey->getAtomString()) == 0;
+  return memcmp(atomString, str, length * sizeof(char)) == 0;
 }
 
 PR_STATIC_CALLBACK(void)
 AtomTableClearEntry(PLDHashTable *table, PLDHashEntryHdr *entry)
 {
   AtomTableEntry *he = static_cast<AtomTableEntry*>(entry);
   
   if (!he->IsStaticAtom()) {
@@ -300,25 +329,38 @@ AtomTableClearEntry(PLDHashTable *table,
   }
   else {
     he->GetStaticAtomWrapper()->~nsStaticAtomWrapper();
   }
   
   he->ClearAtom();
 }
 
+PR_STATIC_CALLBACK(PRBool)
+AtomTableInitEntry(PLDHashTable *table, PLDHashEntryHdr *entry,
+                   const void *key)
+{
+  AtomTableEntry *he = NS_STATIC_CAST(AtomTableEntry*, entry);
+  const AtomTableEntry *strKey = NS_STATIC_CAST(const AtomTableEntry*, key);
+
+  he->mLength = strKey->getLength();
+
+  return PR_TRUE;
+}
+
+
 static const PLDHashTableOps AtomTableOps = {
   PL_DHashAllocTable,
   PL_DHashFreeTable,
   AtomTableGetHash,
   AtomTableMatchKey,
   PL_DHashMoveEntryStub,
   AtomTableClearEntry,
   PL_DHashFinalizeStub,
-  NULL
+  AtomTableInitEntry
 };
 
 
 #ifdef DEBUG
 
 PR_STATIC_CALLBACK(PLDHashOperator)
 DumpAtomLeaks(PLDHashTable *table, PLDHashEntryHdr *he,
               PRUint32 index, void *arg)
@@ -386,17 +428,17 @@ AtomImpl::AtomImpl()
 
 AtomImpl::~AtomImpl()
 {
   NS_PRECONDITION(gAtomTable.ops, "uninitialized atom hashtable");
   // Permanent atoms are removed from the hashtable at shutdown, and we
   // don't want to remove them twice.  See comment above in
   // |AtomTableClearEntry|.
   if (!IsPermanentInDestructor()) {
-    AtomTableEntry key(mString);
+    AtomTableEntry key(mString, mLength);
     PL_DHashTableOperate(&gAtomTable, &key, PL_DHASH_REMOVE);
     if (gAtomTable.entryCount == 0) {
       PL_DHashTableFinish(&gAtomTable);
       NS_ASSERTION(gAtomTable.entryCount == 0,
                    "PL_DHashTableFinish changed the entry count");
     }
   }
 }
@@ -449,60 +491,61 @@ void* AtomImpl::operator new ( size_t si
      */
   size += aString.Length() * sizeof(char);
   AtomImpl* ii = static_cast<AtomImpl*>(::operator new(size));
   NS_ENSURE_TRUE(ii, nsnull);
 
   char* toBegin = &ii->mString[0];
   nsACString::const_iterator fromBegin, fromEnd;
   *copy_string(aString.BeginReading(fromBegin), aString.EndReading(fromEnd), toBegin) = '\0';
+  ii->mLength = aString.Length();
   return ii;
 }
 
 void* PermanentAtomImpl::operator new ( size_t size, AtomImpl* aAtom ) CPP_THROW_NEW {
   NS_ASSERTION(!aAtom->IsPermanent(),
                "converting atom that's already permanent");
 
   // Just let the constructor overwrite the vtable pointer.
   return aAtom;
 }
 
 NS_IMETHODIMP 
 AtomImpl::ToString(nsAString& aBuf)
 {
-  CopyUTF8toUTF16(nsDependentCString(mString), aBuf);
+  CopyUTF8toUTF16(nsDependentCString(mString, mLength), aBuf);
   return NS_OK;
 }
 
 NS_IMETHODIMP
 AtomImpl::ToUTF8String(nsACString& aBuf)
 {
-  aBuf.Assign(mString);
+  aBuf.Assign(mString, mLength);
   return NS_OK;
 }
 
 NS_IMETHODIMP 
 AtomImpl::GetUTF8String(const char **aResult)
 {
   NS_PRECONDITION(aResult, "null out param");
   *aResult = mString;
   return NS_OK;
 }
 
 NS_IMETHODIMP
 AtomImpl::EqualsUTF8(const nsACString& aString, PRBool* aResult)
 {
-  *aResult = aString.Equals(mString);
+  *aResult = aString.Equals(nsDependentCString(mString, mLength));
   return NS_OK;
 }
 
 NS_IMETHODIMP
 AtomImpl::Equals(const nsAString& aString, PRBool* aResult)
 {
-  *aResult = CompareUTF8toUTF16(nsDependentCString(mString),
+  *aResult = CompareUTF8toUTF16(nsDependentCString(mString, mLength),
                                 PromiseFlatString(aString)) == 0;
   return NS_OK;
 }
 
 //----------------------------------------------------------------------
 
 // wrapper class for the nsStaticAtom struct
 
@@ -529,139 +572,149 @@ nsStaticAtomWrapper::GetUTF8String(const
 
 NS_IMETHODIMP
 nsStaticAtomWrapper::ToString(nsAString& aBuf)
 {
   // static should always be always ASCII, to allow tools like gperf
   // to generate the tables, and to avoid unnecessary conversion
   NS_ASSERTION(nsCRT::IsAscii(mStaticAtom->mString),
                "Data loss - atom should be ASCII");
-  CopyASCIItoUTF16(nsDependentCString(mStaticAtom->mString), aBuf);
+  CopyASCIItoUTF16(nsDependentCString(mStaticAtom->mString, mLength), aBuf);
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsStaticAtomWrapper::ToUTF8String(nsACString& aBuf)
 {
   aBuf.Assign(mStaticAtom->mString);
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsStaticAtomWrapper::EqualsUTF8(const nsACString& aString, PRBool* aResult)
 {
-  *aResult = aString.Equals(mStaticAtom->mString);
+  *aResult = aString.Equals(nsDependentCString(mStaticAtom->mString, mLength));
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsStaticAtomWrapper::Equals(const nsAString& aString, PRBool* aResult)
 {
-  *aResult = CompareUTF8toUTF16(nsDependentCString(mStaticAtom->mString),
+  *aResult = CompareUTF8toUTF16(nsDependentCString(mStaticAtom->mString,
+                                                   mLength),
                                 PromiseFlatString(aString)) == 0;
   return NS_OK;
 }
 //----------------------------------------------------------------------
 
 static nsStaticAtomWrapper*
-WrapStaticAtom(const nsStaticAtom* aAtom)
+WrapStaticAtom(const nsStaticAtom* aAtom, PRUint32 aLength)
 {
   if (!gStaticAtomArena) {
     gStaticAtomArena = new PLArenaPool;
     if (!gStaticAtomArena)
       return nsnull;
     
     PL_INIT_ARENA_POOL(gStaticAtomArena, "nsStaticAtomArena", 4096);
   }
-  
+
   void* mem;
-  PL_ARENA_ALLOCATE(mem, gStaticAtomArena, sizeof(nsStaticAtom));
-  
+  PL_ARENA_ALLOCATE(mem, gStaticAtomArena, sizeof(nsStaticAtomWrapper));
+
   nsStaticAtomWrapper* wrapper =
-    new (mem) nsStaticAtomWrapper(aAtom);
-  
+    new (mem) nsStaticAtomWrapper(aAtom, aLength);
+
   return wrapper;
 }
 
 static inline AtomTableEntry*
-GetAtomHashEntry(const char* aString)
+GetAtomHashEntry(const char* aString, PRUint32 aLength)
 {
   if (!gAtomTable.ops &&
       !PL_DHashTableInit(&gAtomTable, &AtomTableOps, 0,
                          sizeof(AtomTableEntry), 2048)) {
     gAtomTable.ops = nsnull;
     return nsnull;
   }
 
-  AtomTableEntry key(aString);
+  AtomTableEntry key(aString, aLength);
   return static_cast<AtomTableEntry*>
                     (PL_DHashTableOperate(&gAtomTable, &key, PL_DHASH_ADD));
 }
 
 static inline AtomTableEntry*
-GetAtomHashEntry(const PRUnichar* aString)
+GetAtomHashEntry(const PRUnichar* aString, PRUint32 aLength)
 {
   if (!gAtomTable.ops &&
       !PL_DHashTableInit(&gAtomTable, &AtomTableOps, 0,
                          sizeof(AtomTableEntry), 2048)) {
     gAtomTable.ops = nsnull;
     return nsnull;
   }
 
-  AtomTableEntry key(aString);
+  AtomTableEntry key(aString, aLength);
   return static_cast<AtomTableEntry*>
                     (PL_DHashTableOperate(&gAtomTable, &key, PL_DHASH_ADD));
 }
 
 NS_COM nsresult
 NS_RegisterStaticAtoms(const nsStaticAtom* aAtoms, PRUint32 aAtomCount)
 {
   // this does two things:
   // 1) wraps each static atom in a wrapper, if necessary
   // 2) initializes the address pointed to by each mBits slot
   
   for (PRUint32 i=0; i<aAtomCount; i++) {
     NS_ASSERTION(nsCRT::IsAscii(aAtoms[i].mString),
                  "Static atoms must be ASCII!");
+
+    PRUint32 stringLen = strlen(aAtoms[i].mString);
+
     AtomTableEntry *he =
-      GetAtomHashEntry(aAtoms[i].mString);
-    
+      GetAtomHashEntry(aAtoms[i].mString, stringLen);
+
     if (he->HasValue() && aAtoms[i].mAtom) {
       // there already is an atom with this name in the table.. but we
       // still have to update mBits
       if (!he->IsStaticAtom() && !he->GetAtomImpl()->IsPermanent()) {
         // since we wanted to create a static atom but there is
         // already one there, we convert it to a non-refcounting
         // permanent atom
         PromoteToPermanent(he->GetAtomImpl());
       }
       
       // and now, if the consumer wants to remember this value in a
       // slot, we do so
       if (aAtoms[i].mAtom)
         *aAtoms[i].mAtom = he->GetAtom();
     }
-    
     else {
-      nsStaticAtomWrapper* atom = WrapStaticAtom(&aAtoms[i]);
+      nsStaticAtomWrapper* atom = WrapStaticAtom(&aAtoms[i], stringLen);
       NS_ASSERTION(atom, "Failed to wrap static atom");
 
       // but even if atom is null, no real difference in code..
       he->SetStaticAtomWrapper(atom);
       if (aAtoms[i].mAtom)
         *aAtoms[i].mAtom = atom;
     }
   }
   return NS_OK;
 }
 
 NS_COM nsIAtom*
 NS_NewAtom(const char* aUTF8String)
 {
-  AtomTableEntry *he = GetAtomHashEntry(aUTF8String);
+  return NS_NewAtom(nsDependentCString(aUTF8String));
+}
+
+NS_COM nsIAtom*
+NS_NewAtom(const nsACString& aUTF8String)
+{
+  AtomTableEntry *he = GetAtomHashEntry(PromiseFlatCString(aUTF8String).get(),
+                                        aUTF8String.Length());
 
   if (!he) {
     return nsnull;
   }
 
   NS_ASSERTION(!he->IsUTF8String() && !he->IsUTF16String(),
                "Atom hash entry is string?  Should be atom!");
 
@@ -678,25 +731,26 @@ NS_NewAtom(const char* aUTF8String)
     return nsnull;
   }
 
   NS_ADDREF(atom);
   return atom;
 }
 
 NS_COM nsIAtom*
-NS_NewAtom(const nsACString& aUTF8String)
+NS_NewAtom(const PRUnichar* aUTF16String)
 {
-  return NS_NewAtom(PromiseFlatCString(aUTF8String).get());
+  return NS_NewAtom(nsDependentString(aUTF16String));
 }
 
 NS_COM nsIAtom*
-NS_NewAtom(const PRUnichar* aUTF16String)
+NS_NewAtom(const nsAString& aUTF16String)
 {
-  AtomTableEntry *he = GetAtomHashEntry(aUTF16String);
+  AtomTableEntry *he = GetAtomHashEntry(PromiseFlatString(aUTF16String).get(),
+                                        aUTF16String.Length());
 
   if (he->HasValue())
     return he->GetAtom();
 
   // MSVC.NET doesn't like passing a temporary NS_ConvertUTF16toUTF8() to
   // operator new, so declare one as a local instead.
   NS_ConvertUTF16toUTF8 str(aUTF16String);
   AtomImpl* atom = new (str) AtomImpl();
@@ -706,31 +760,26 @@ NS_NewAtom(const PRUnichar* aUTF16String
     return nsnull;
   }
 
   NS_ADDREF(atom);
   return atom;
 }
 
 NS_COM nsIAtom*
-NS_NewAtom(const nsAString& aUTF16String)
-{
-  return NS_NewAtom(PromiseFlatString(aUTF16String).get());
-}
-
-NS_COM nsIAtom*
 NS_NewPermanentAtom(const char* aUTF8String)
 {
   return NS_NewPermanentAtom(nsDependentCString(aUTF8String));
 }
 
 NS_COM nsIAtom*
 NS_NewPermanentAtom(const nsACString& aUTF8String)
 {
-  AtomTableEntry *he = GetAtomHashEntry(PromiseFlatCString(aUTF8String).get());
+  AtomTableEntry *he = GetAtomHashEntry(PromiseFlatCString(aUTF8String).get(),
+                                        aUTF8String.Length());
 
   if (he->HasValue() && he->IsStaticAtom())
     return he->GetStaticAtomWrapper();
   
   // either there is no atom and we'll create an AtomImpl,
   // or there is an existing AtomImpl
   AtomImpl* atom = he->GetAtomImpl();
   
--- a/xpcom/ds/nsAtomTable.h
+++ b/xpcom/ds/nsAtomTable.h
@@ -72,16 +72,19 @@ public:
 
   void operator delete(void* ptr) {
     ::operator delete(ptr);
   }
 
   // for |#ifdef NS_BUILD_REFCNT_LOGGING| access to reference count
   nsrefcnt GetRefCount() { return mRefCnt; }
 
+  // The length of the string in the atom.
+  PRUint32 mLength;
+
   // Actually more; 0 terminated. This slot is reserved for the
   // terminating zero.
   char mString[1];
 };
 
 /**
  * A non-refcounted implementation of nsIAtom.
  */
--- a/xpcom/ds/nsCRT.cpp
+++ b/xpcom/ds/nsCRT.cpp
@@ -200,118 +200,148 @@ PRUint32 nsCRT::HashCode(const char* str
   while ( (c = *s++) )
     ADD_TO_HASHVAL(h, c);
 
   if ( resultingStrLen )
     *resultingStrLen = (s-str)-1;
   return h;
 }
 
+PRUint32 nsCRT::HashCode(const char* start, PRUint32 length)
+{
+  PRUint32 h = 0;
+  const char* s = start;
+  const char* end = start + length;
+
+  unsigned char c;
+  while ( s < end ) {
+    c = *s++;
+    ADD_TO_HASHVAL(h, c);
+  }
+
+  return h;
+}
+
 PRUint32 nsCRT::HashCode(const PRUnichar* str, PRUint32* resultingStrLen)
 {
   PRUint32 h = 0;
   const PRUnichar* s = str;
 
   if (!str) return h;
 
   PRUnichar c;
   while ( (c = *s++) )
     ADD_TO_HASHVAL(h, c);
 
   if ( resultingStrLen )
     *resultingStrLen = (s-str)-1;
   return h;
 }
 
-PRUint32 nsCRT::HashCodeAsUTF8(const PRUnichar* str, PRUint32* resultingStrLen)
+PRUint32 nsCRT::HashCodeAsUTF8(const PRUnichar* start, PRUint32 length)
 {
   PRUint32 h = 0;
-  const PRUnichar* s = str;
+  const PRUnichar* s = start;
+  const PRUnichar* end = start + length;
 
-  {
-    PRUint16 W1 = 0;      // the first UTF-16 word in a two word tuple
-    PRUint32 U = 0;       // the current char as UCS-4
-    int code_length = 0;  // the number of bytes in the UTF-8 sequence for the current char
+  PRUint16 W1 = 0;      // the first UTF-16 word in a two word tuple
+  PRUint32 U = 0;       // the current char as UCS-4
+  int code_length = 0;  // the number of bytes in the UTF-8 sequence for the current char
 
-    PRUint16 W;
-    while ( (W = *s++) )
-      {
-          /*
-           * On the fly, decoding from UTF-16 (and/or UCS-2) into UTF-8 as per
-           *  http://www.ietf.org/rfc/rfc2781.txt
-           *  http://www.ietf.org/rfc/rfc3629.txt
-           */
+  PRUint16 W;
+  while ( s < end )
+    {
+      W = *s++;
+        /*
+         * On the fly, decoding from UTF-16 (and/or UCS-2) into UTF-8 as per
+         *  http://www.ietf.org/rfc/rfc2781.txt
+         *  http://www.ietf.org/rfc/rfc3629.txt
+         */
+
+      if ( !W1 )
+        {
+          if ( !IS_SURROGATE(W) )
+            {
+              U = W;
+              if ( W <= 0x007F )
+                code_length = 1;
+              else if ( W <= 0x07FF )
+                code_length = 2;
+              else
+                code_length = 3;
+            }
+          else if ( NS_IS_HIGH_SURROGATE(W) && s < end)
+            {
+              W1 = W;
 
-        if ( !W1 )
-          {
-            if ( !IS_SURROGATE(W) )
-              {
-                U = W;
-                if ( W <= 0x007F )
-                  code_length = 1;
-                else if ( W <= 0x07FF )
-                  code_length = 2;
-                else
-                  code_length = 3;
-              }
-            else if ( NS_IS_HIGH_SURROGATE(W) )
-              W1 = W;
-#ifdef DEBUG
-            else NS_ERROR("Got low surrogate but no previous high surrogate");
-#endif
-          }
-        else
-          {
-              // as required by the standard, this code is careful to
-              //  throw out illegal sequences
+              continue;
+            }
+          else
+            {
+              // Treat broken characters as the Unicode replacement
+              // character 0xFFFD
+              U = 0xFFFD;
+
+              code_length = 3;
+
+              NS_WARNING("Got low surrogate but no previous high surrogate");
+            }
+        }
+      else
+        {
+          // as required by the standard, this code is careful to
+          // throw out illegal sequences
 
-            if ( NS_IS_LOW_SURROGATE(W) )
-              {
-                U = SURROGATE_TO_UCS4(W1, W);
-                NS_ASSERTION(IS_VALID_CHAR(U), "How did this happen?");
-                code_length = 4;
-              }
-#ifdef DEBUG
-            else NS_ERROR("High surrogate not followed by low surrogate");
-#endif
-            W1 = 0;
-          }
+          if ( NS_IS_LOW_SURROGATE(W) )
+            {
+              U = SURROGATE_TO_UCS4(W1, W);
+              NS_ASSERTION(IS_VALID_CHAR(U), "How did this happen?");
+              code_length = 4;
+            }
+          else
+            {
+              // Treat broken characters as the Unicode replacement
+              // character 0xFFFD
+              U = 0xFFFD;
+
+              code_length = 3;
+
+              NS_WARNING("High surrogate not followed by low surrogate");
+            }
+
+          W1 = 0;
+        }
 
 
-        if ( code_length > 0 )
-          {
-            static const PRUint16 sBytePrefix[5]  = { 0x0000, 0x0000, 0x00C0, 0x00E0, 0x00F0  };
-            static const PRUint16 sShift[5]       = { 0, 0, 6, 12, 18 };
+      static const PRUint16 sBytePrefix[5]  = { 0x0000, 0x0000, 0x00C0, 0x00E0, 0x00F0  };
+      static const PRUint16 sShift[5]       = { 0, 0, 6, 12, 18 };
 
-              /*
-               *  Unlike the algorithm in http://www.ietf.org/rfc/rfc3629.txt
-               *  we must calculate the bytes in left to right order so that
-               *  our hash result matches what the narrow version would calculate
-               *  on an already UTF-8 string.
-               */
-
-              // hash the first (and often, only, byte)
-            ADD_TO_HASHVAL(h, (sBytePrefix[code_length] |
-                               (U>>sShift[code_length])));
+      /*
+       *  Unlike the algorithm in
+       *  http://www.ietf.org/rfc/rfc3629.txt we must calculate the
+       *  bytes in left to right order so that our hash result
+       *  matches what the narrow version would calculate on an
+       *  already UTF-8 string.
+       */
 
-              // an unrolled loop for hashing any remaining bytes in this sequence
-            switch ( code_length )
-              {  // falling through in each case
-                case 4:   ADD_TO_HASHVAL(h, (0x80 | ((U>>12) & 0x003F)));
-                case 3:   ADD_TO_HASHVAL(h, (0x80 | ((U>>6 ) & 0x003F)));
-                case 2:   ADD_TO_HASHVAL(h, (0x80 | ( U      & 0x003F)));
-                default:  code_length = 0;
-                  break;
-              }
-          }
-      }
-  }
+      // hash the first (and often, only, byte)
+      ADD_TO_HASHVAL(h, (sBytePrefix[code_length] | (U>>sShift[code_length])));
 
-  if ( resultingStrLen )
-    *resultingStrLen = (s-str)-1;
+      // an unrolled loop for hashing any remaining bytes in this
+      // sequence
+      switch ( code_length )
+        {  // falling through in each case
+          case 4:   ADD_TO_HASHVAL(h, (0x80 | ((U>>12) & 0x003F)));
+          case 3:   ADD_TO_HASHVAL(h, (0x80 | ((U>>6 ) & 0x003F)));
+          case 2:   ADD_TO_HASHVAL(h, (0x80 | ( U      & 0x003F)));
+          default:  code_length = 0;
+            break;
+        }
+    }
+
   return h;
 }
 
 PRUint32 nsCRT::BufferHashCode(const PRUnichar* s, PRUint32 len)
 {
   PRUint32 h = 0;
   const PRUnichar* done = s + len;
 
--- a/xpcom/ds/nsCRT.h
+++ b/xpcom/ds/nsCRT.h
@@ -223,26 +223,30 @@ public:
   	shared_allocator.deallocate(str, 0 /*we never new or kept the size*/);
   }
 
   // Computes the hashcode for a c-string, returns the string length as
   // an added bonus.
   static PRUint32 HashCode(const char* str,
                            PRUint32* resultingStrLen = nsnull);
 
+  // Computes the hashcode for a length number of bytes of c-string data.
+  static PRUint32 HashCode(const char* start, PRUint32 length);
+
   // Computes the hashcode for a ucs2 string, returns the string length
   // as an added bonus.
   static PRUint32 HashCode(const PRUnichar* str,
                            PRUint32* resultingStrLen = nsnull);
 
-  // Computes a hashcode for a ucs2 string that returns the same thing
-  // as the HashCode method taking a |char*| would if the string were
-  // converted to UTF8.  Returns the string length as an added bonus.
-  static PRUint32 HashCodeAsUTF8(const PRUnichar* str,
-                                 PRUint32* resultingStrLen = nsnull);
+  // Computes a hashcode for a length number of UTF16
+  // characters. Returns the same hash code as the HashCode method
+  // taking a |char*| would if the string were converted to UTF8. This
+  // hash function treats invalid UTF16 data as 0xFFFD (0xEFBFBD in
+  // UTF-8).
+  static PRUint32 HashCodeAsUTF8(const PRUnichar* start, PRUint32 length);
 
   // Computes the hashcode for a buffer with a specified length.
   static PRUint32 BufferHashCode(const PRUnichar* str, PRUint32 strLen);
 
   // String to longlong
   static PRInt64 atoll(const char *str);
   
   static char ToUpper(char aChar) { return NS_ToUpper(aChar); }
--- a/xpcom/string/public/nsUTF8Utils.h
+++ b/xpcom/string/public/nsUTF8Utils.h
@@ -549,18 +549,19 @@ class CalculateUTF8Length
       }
 
     private:
       size_t mLength;
       PRBool mErrorEncountered;
   };
 
 /**
- * A character sink (see |copy_string| in nsAlgorithm.h) for converting
- * UTF-16 to UTF-8.
+ * A character sink (see |copy_string| in nsAlgorithm.h) for
+ * converting UTF-16 to UTF-8. Treats invalid UTF-16 data as 0xFFFD
+ * (0xEFBFBD in UTF-8).
  */
 class ConvertUTF16toUTF8
   {
     public:
       typedef nsAString::char_type  value_type;
       typedef nsACString::char_type buffer_type;
 
     // The error handling here is more lenient than that in
@@ -617,25 +618,36 @@ class ConvertUTF16toUTF8
                     // 0001 0000-001F FFFF
                     *out++ = 0xF0 | (char)(ucs4 >> 18);
                     *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
                     *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
                     *out++ = 0x80 | (char)(0x003F & ucs4);
                   }
                 else
                   {
-                    NS_ERROR("got a High Surrogate but no low surrogate");
-                    // output nothing.
+                    // Treat broken characters as the Unicode
+                    // replacement character 0xFFFD (0xEFBFBD in
+                    // UTF-8)
+                    *out++ = 0xEF;
+                    *out++ = 0xBF;
+                    *out++ = 0xBD;
+
+                    NS_WARNING("got a High Surrogate but no low surrogate");
                   }
               }
             else // U+DC00 - U+DFFF
               {
+                // Treat broken characters as the Unicode replacement
+                // character 0xFFFD (0xEFBFBD in UTF-8)
+                *out++ = 0xEF;
+                *out++ = 0xBF;
+                *out++ = 0xBD;
+
                 // DC00- DFFF - Low Surrogate
-                NS_ERROR("got a low Surrogate but no high surrogate");
-                // output nothing.
+                NS_WARNING("got a low Surrogate but no high surrogate");
               }
           }
 
         mBuffer = out;
         return N;
       }
 
     void write_terminator()
@@ -645,17 +657,18 @@ class ConvertUTF16toUTF8
 
     private:
       buffer_type* const mStart;
       buffer_type* mBuffer;
   };
 
 /**
  * A character sink (see |copy_string| in nsAlgorithm.h) for computing
- * the number of bytes a UTF-16 would occupy in UTF-8.
+ * the number of bytes a UTF-16 would occupy in UTF-8. Treats invalid
+ * UTF-16 data as 0xFFFD (0xEFBFBD in UTF-8).
  */
 class CalculateUTF8Size
   {
     public:
       typedef nsAString::char_type value_type;
 
     CalculateUTF8Size()
       : mSize(0) { }
@@ -682,20 +695,33 @@ class CalculateUTF8Size
                     NS_ERROR("Surrogate pair split between fragments");
                     return N;
                   }
                 c = *p;
 
                 if (0xDC00 == (0xFC00 & c))
                   mSize += 4;
                 else
-                  NS_ERROR("got a high Surrogate but no low surrogate");
+                  {
+                    // Treat broken characters as the Unicode
+                    // replacement character 0xFFFD (0xEFBFBD in
+                    // UTF-8)
+                    mSize += 3;
+
+                    NS_WARNING("got a high Surrogate but no low surrogate");
+                  }
               }
             else // U+DC00 - U+DFFF
-              NS_ERROR("got a low Surrogate but no high surrogate");
+              {
+                // Treat broken characters as the Unicode replacement
+                // character 0xFFFD (0xEFBFBD in UTF-8)
+                mSize += 3;
+
+                NS_WARNING("got a low Surrogate but no high surrogate");
+              }
           }
 
         return N;
       }
 
     private:
       size_t mSize;
   };