Bug 1023778 part 3 - Make AtomizeChars and js_NewStringCopyN accept Latin1 chars. r=luke
authorJan de Mooij <jdemooij@mozilla.com>
Fri, 13 Jun 2014 20:51:05 +0200
changeset 209473 4a930d3fe2aadd8fda37a594fcd00bf3b966e2fd
parent 209472 064a0120db2d36bf5bd50f53ce142e6b32b485ed
child 209474 bc9e05912b9af6f6dcad399a486bd049101e1daa
push id3857
push userraliiev@mozilla.com
push dateTue, 02 Sep 2014 16:39:23 +0000
treeherdermozilla-beta@5638b907b505 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs1023778
milestone33.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1023778 part 3 - Make AtomizeChars and js_NewStringCopyN accept Latin1 chars. r=luke
js/src/jsatom.cpp
js/src/jsatom.h
js/src/jsatominlines.h
js/src/jsstr.cpp
js/src/jsstr.h
js/src/vm/String-inl.h
js/src/vm/StringBuffer.cpp
mfbt/HashFunctions.h
--- a/js/src/jsatom.cpp
+++ b/js/src/jsatom.cpp
@@ -347,19 +347,20 @@ AtomizeAndtake(ExclusiveContext *cx, jsc
         js_ReportOutOfMemory(cx); /* SystemAllocPolicy does not report OOM. */
         return nullptr;
     }
 
     return atom;
 }
 
 /* |tbchars| must not point into an inline or short string. */
+template <typename CharT>
 MOZ_ALWAYS_INLINE
 static JSAtom *
-AtomizeAndCopyChars(ExclusiveContext *cx, const jschar *tbchars, size_t length, InternBehavior ib)
+AtomizeAndCopyChars(ExclusiveContext *cx, const CharT *tbchars, size_t length, InternBehavior ib)
 {
     if (JSAtom *s = cx->staticStrings().lookup(tbchars, length))
          return s;
 
     AtomHasher::Lookup lookup(tbchars, length);
 
     AtomSet::Ptr pp = cx->permanentAtoms().readonlyThreadsafeLookup(lookup);
     if (pp)
@@ -391,16 +392,22 @@ AtomizeAndCopyChars(ExclusiveContext *cx
     if (!atoms.add(p, AtomStateEntry(atom, bool(ib)))) {
         js_ReportOutOfMemory(cx); /* SystemAllocPolicy does not report OOM. */
         return nullptr;
     }
 
     return atom;
 }
 
+template JSAtom *
+AtomizeAndCopyChars(ExclusiveContext *cx, const jschar *tbchars, size_t length, InternBehavior ib);
+
+template JSAtom *
+AtomizeAndCopyChars(ExclusiveContext *cx, const Latin1Char *tbchars, size_t length, InternBehavior ib);
+
 JSAtom *
 js::AtomizeString(ExclusiveContext *cx, JSString *str,
                   js::InternBehavior ib /* = js::DoNotInternAtom */)
 {
     if (str->isAtom()) {
         JSAtom &atom = str->asAtom();
         /* N.B. static atoms are effectively always interned. */
         if (ib != InternAtom || js::StaticStrings::isStatic(&atom))
@@ -453,27 +460,34 @@ js::Atomize(ExclusiveContext *cx, const 
     }
 
     jschar *tbcharsZ = InflateString(cx, bytes, &length);
     if (!tbcharsZ)
         return nullptr;
     return AtomizeAndtake(cx, tbcharsZ, length, ib);
 }
 
+template <typename CharT>
 JSAtom *
-js::AtomizeChars(ExclusiveContext *cx, const jschar *chars, size_t length, InternBehavior ib)
+js::AtomizeChars(ExclusiveContext *cx, const CharT *chars, size_t length, InternBehavior ib)
 {
     CHECK_REQUEST(cx);
 
     if (!JSString::validateLength(cx, length))
         return nullptr;
 
     return AtomizeAndCopyChars(cx, chars, length, ib);
 }
 
+template JSAtom *
+js::AtomizeChars(ExclusiveContext *cx, const Latin1Char *chars, size_t length, InternBehavior ib);
+
+template JSAtom *
+js::AtomizeChars(ExclusiveContext *cx, const jschar *chars, size_t length, InternBehavior ib);
+
 bool
 js::IndexToIdSlow(ExclusiveContext *cx, uint32_t index, MutableHandleId idp)
 {
     JS_ASSERT(index > JSID_INT_MAX);
 
     jschar buf[UINT32_CHAR_BUFFER_LENGTH];
     RangedPtr<jschar> end(ArrayEnd(buf), buf, ArrayEnd(buf));
     RangedPtr<jschar> start = BackfillIndexInCharBuffer(index, end);
--- a/js/src/jsatom.h
+++ b/js/src/jsatom.h
@@ -8,16 +8,17 @@
 #define jsatom_h
 
 #include "mozilla/HashFunctions.h"
 
 #include "jsalloc.h"
 
 #include "gc/Barrier.h"
 #include "gc/Rooting.h"
+#include "js/GCAPI.h"
 #include "vm/CommonPropertyNames.h"
 
 class JSAtom;
 class JSAutoByteString;
 
 struct JSIdArray {
     int length;
     js::HeapId vector[1];    /* actually, length jsid words */
@@ -80,24 +81,34 @@ class AtomStateEntry
 
     JSAtom *asPtr() const;
 };
 
 struct AtomHasher
 {
     struct Lookup
     {
-        const jschar    *chars;
-        size_t          length;
-        const JSAtom    *atom; /* Optional. */
+        union {
+            const JS::Latin1Char *latin1Chars;
+            const jschar *twoByteChars;
+        };
+        bool isLatin1;
+        size_t length;
+        const JSAtom *atom; /* Optional. */
+        JS::AutoCheckCannotGC nogc;
 
         HashNumber hash;
 
         Lookup(const jschar *chars, size_t length)
-          : chars(chars), length(length), atom(nullptr)
+          : twoByteChars(chars), isLatin1(false), length(length), atom(nullptr)
+        {
+            hash = mozilla::HashString(chars, length);
+        }
+        Lookup(const JS::Latin1Char *chars, size_t length)
+          : latin1Chars(chars), isLatin1(true), length(length), atom(nullptr)
         {
             hash = mozilla::HashString(chars, length);
         }
         inline explicit Lookup(const JSAtom *atom);
     };
 
     static HashNumber hash(const Lookup &l) { return l.hash; }
     static inline bool match(const AtomStateEntry &entry, const Lookup &lookup);
@@ -182,18 +193,19 @@ enum InternBehavior
     DoNotInternAtom = false,
     InternAtom = true
 };
 
 extern JSAtom *
 Atomize(ExclusiveContext *cx, const char *bytes, size_t length,
         js::InternBehavior ib = js::DoNotInternAtom);
 
+template <typename CharT>
 extern JSAtom *
-AtomizeChars(ExclusiveContext *cx, const jschar *chars, size_t length,
+AtomizeChars(ExclusiveContext *cx, const CharT *chars, size_t length,
              js::InternBehavior ib = js::DoNotInternAtom);
 
 extern JSAtom *
 AtomizeString(ExclusiveContext *cx, JSString *str, js::InternBehavior ib = js::DoNotInternAtom);
 
 template <AllowGC allowGC>
 extern JSAtom *
 ToAtom(ExclusiveContext *cx, typename MaybeRooted<Value, allowGC>::HandleType v);
--- a/js/src/jsatominlines.h
+++ b/js/src/jsatominlines.h
@@ -132,30 +132,47 @@ IdToString(JSContext *cx, jsid id)
     if (!str)
         return nullptr;
 
     return str->ensureFlat(cx);
 }
 
 inline
 AtomHasher::Lookup::Lookup(const JSAtom *atom)
-  : chars(atom->chars()), length(atom->length()), atom(atom)
+  : isLatin1(atom->hasLatin1Chars()), length(atom->length()), atom(atom)
 {
-    hash = mozilla::HashString(chars, length);
+    if (isLatin1) {
+        latin1Chars = atom->latin1Chars(nogc);
+        hash = mozilla::HashString(latin1Chars, length);
+    } else {
+        twoByteChars = atom->twoByteChars(nogc);
+        hash = mozilla::HashString(twoByteChars, length);
+    }
 }
 
 inline bool
 AtomHasher::match(const AtomStateEntry &entry, const Lookup &lookup)
 {
     JSAtom *key = entry.asPtr();
     if (lookup.atom)
         return lookup.atom == key;
     if (key->length() != lookup.length)
         return false;
-    return mozilla::PodEqual(key->chars(), lookup.chars, lookup.length);
+
+    if (key->hasLatin1Chars()) {
+        const Latin1Char *keyChars = key->latin1Chars(lookup.nogc);
+        if (lookup.isLatin1)
+            return mozilla::PodEqual(keyChars, lookup.latin1Chars, lookup.length);
+        return EqualCharsLatin1TwoByte(keyChars, lookup.twoByteChars, lookup.length);
+    }
+
+    const jschar *keyChars = key->twoByteChars(lookup.nogc);
+    if (lookup.isLatin1)
+        return EqualCharsLatin1TwoByte(lookup.latin1Chars, keyChars, lookup.length);
+    return mozilla::PodEqual(keyChars, lookup.twoByteChars, lookup.length);
 }
 
 inline Handle<PropertyName*>
 TypeName(JSType type, const JSAtomState &names)
 {
     JS_ASSERT(type < JSTYPE_LIMIT);
     JS_STATIC_ASSERT(offsetof(JSAtomState, undefined) +
                      JSTYPE_LIMIT * sizeof(ImmutablePropertyNamePtr) <=
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -17,16 +17,17 @@
 
 #include "jsstr.h"
 
 #include "mozilla/Attributes.h"
 #include "mozilla/Casting.h"
 #include "mozilla/CheckedInt.h"
 #include "mozilla/FloatingPoint.h"
 #include "mozilla/PodOperations.h"
+#include "mozilla/Range.h"
 #include "mozilla/TypeTraits.h"
 
 #include <ctype.h>
 #include <string.h>
 #include <wchar.h>
 
 #include "jsapi.h"
 #include "jsarray.h"
@@ -66,16 +67,17 @@ using namespace js::types;
 using namespace js::unicode;
 
 using mozilla::CheckedInt;
 using mozilla::IsNaN;
 using mozilla::IsNegativeZero;
 using mozilla::IsSame;
 using mozilla::PodCopy;
 using mozilla::PodEqual;
+using mozilla::Range;
 using mozilla::SafeCast;
 
 using JS::AutoCheckCannotGC;
 
 typedef Handle<JSLinearString*> HandleLinearString;
 
 static JSLinearString *
 ArgToRootedString(JSContext *cx, CallArgs &args, unsigned argno)
@@ -1698,26 +1700,16 @@ str_lastIndexOf(JSContext *cx, unsigned 
             res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
     }
 
     args.rval().setInt32(res);
     return true;
 }
 
 static bool
-EqualCharsLatin1TwoByte(const Latin1Char *s1, const jschar *s2, size_t len)
-{
-    for (const Latin1Char *s1end = s1 + len; s1 < s1end; s1++, s2++) {
-        if (jschar(*s1) != *s2)
-            return false;
-    }
-    return true;
-}
-
-static bool
 HasSubstringAt(JSLinearString *text, JSLinearString *pat, size_t start)
 {
     MOZ_ASSERT(start + pat->length() <= text->length());
 
     size_t patLen = pat->length();
 
     AutoCheckCannotGC nogc;
     if (text->hasLatin1Chars()) {
@@ -4274,69 +4266,108 @@ js_NewDependentString(JSContext *cx, JSS
         const Latin1Char *chars = base->latin1Chars(nogc) + start;
         if (JSLinearString *staticStr = cx->staticStrings().lookup(chars, length))
             return staticStr;
     }
 
     return JSDependentString::new_(cx, base, start, length);
 }
 
-template <AllowGC allowGC>
+template <typename CharT>
+static void
+CopyCharsMaybeInflate(jschar *dest, const CharT *src, size_t len);
+
+template <>
+void
+CopyCharsMaybeInflate(jschar *dest, const jschar *src, size_t len)
+{
+    PodCopy(dest, src, len);
+}
+
+template <>
+void
+CopyCharsMaybeInflate(jschar *dest, const Latin1Char *src, size_t len)
+{
+    CopyAndInflateChars(dest, src, len);
+}
+
+template <AllowGC allowGC, typename CharT>
 JSFlatString *
-js_NewStringCopyN(ExclusiveContext *cx, const jschar *s, size_t n)
+js_NewStringCopyN(ThreadSafeContext *cx, const CharT *s, size_t n)
 {
+    if (EnableLatin1Strings) {
+        if (JSFatInlineString::lengthFits<CharT>(n))
+            return NewFatInlineString<allowGC>(cx, Range<const CharT>(s, n));
+
+        ScopedJSFreePtr<CharT> news(cx->pod_malloc<CharT>(n + 1));
+        if (!news)
+            return nullptr;
+
+        PodCopy(news.get(), s, n);
+        news[n] = 0;
+
+        JSFlatString *str = js_NewString<allowGC>(cx, news.get(), n);
+        if (!str)
+            return nullptr;
+
+        news.forget();
+        return str;
+    }
+
     if (JSFatInlineString::twoByteLengthFits(n))
-        return NewFatInlineString<allowGC>(cx, TwoByteChars(s, n));
-
-    jschar *news = cx->pod_malloc<jschar>(n + 1);
+        return NewFatInlineString<allowGC>(cx, Range<const CharT>(s, n));
+
+    ScopedJSFreePtr<jschar> news(cx->pod_malloc<jschar>(n + 1));
     if (!news)
         return nullptr;
-    js_strncpy(news, s, n);
+
+    CopyCharsMaybeInflate(news.get(), s, n);
     news[n] = 0;
-    JSFlatString *str = js_NewString<allowGC>(cx, news, n);
+
+    JSFlatString *str = js_NewString<allowGC>(cx, news.get(), n);
     if (!str)
-        js_free(news);
+        return nullptr;
+
+    news.forget();
     return str;
 }
 
 template JSFlatString *
-js_NewStringCopyN<CanGC>(ExclusiveContext *cx, const jschar *s, size_t n);
+js_NewStringCopyN<CanGC>(ThreadSafeContext *cx, const jschar *s, size_t n);
+
+template JSFlatString *
+js_NewStringCopyN<NoGC>(ThreadSafeContext *cx, const jschar *s, size_t n);
+
+template JSFlatString *
+js_NewStringCopyN<CanGC>(ThreadSafeContext *cx, const Latin1Char *s, size_t n);
 
 template JSFlatString *
-js_NewStringCopyN<NoGC>(ExclusiveContext *cx, const jschar *s, size_t n);
-
-template <AllowGC allowGC>
+js_NewStringCopyN<NoGC>(ThreadSafeContext *cx, const Latin1Char *s, size_t n);
+
+template <>
 JSFlatString *
-js_NewStringCopyN(ThreadSafeContext *cx, const char *s, size_t n)
+js_NewStringCopyN<CanGC>(ThreadSafeContext *cx, const char *s, size_t n)
 {
-    if (JSFatInlineString::twoByteLengthFits(n))
-        return NewFatInlineString<allowGC>(cx, JS::Latin1Chars(s, n));
-
-    jschar *chars = InflateString(cx, s, &n);
-    if (!chars)
-        return nullptr;
-    JSFlatString *str = js_NewString<allowGC>(cx, chars, n);
-    if (!str)
-        js_free(chars);
-    return str;
+    return js_NewStringCopyN<CanGC>(cx, reinterpret_cast<const Latin1Char *>(s), n);
 }
 
-template JSFlatString *
-js_NewStringCopyN<CanGC>(ThreadSafeContext *cx, const char *s, size_t n);
-
-template JSFlatString *
-js_NewStringCopyN<NoGC>(ThreadSafeContext *cx, const char *s, size_t n);
+template <>
+JSFlatString *
+js_NewStringCopyN<NoGC>(ThreadSafeContext *cx, const char *s, size_t n)
+{
+    return js_NewStringCopyN<NoGC>(cx, reinterpret_cast<const Latin1Char *>(s), n);
+}
 
 template <AllowGC allowGC>
 JSFlatString *
 js_NewStringCopyZ(ExclusiveContext *cx, const jschar *s)
 {
     size_t n = js_strlen(s);
     if (JSFatInlineString::twoByteLengthFits(n))
-        return NewFatInlineString<allowGC>(cx, TwoByteChars(s, n));
+        return NewFatInlineString<allowGC>(cx, Range<const jschar>(s, n));
 
     size_t m = (n + 1) * sizeof(jschar);
     jschar *news = (jschar *) cx->malloc_(m);
     if (!news)
         return nullptr;
     js_memcpy(news, s, m);
     JSFlatString *str = js_NewString<allowGC>(cx, news, n);
     if (!str)
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@@ -96,23 +96,19 @@ extern const char js_encodeURIComponent_
 template <js::AllowGC allowGC, typename CharT>
 extern JSFlatString *
 js_NewString(js::ThreadSafeContext *cx, CharT *chars, size_t length);
 
 extern JSLinearString *
 js_NewDependentString(JSContext *cx, JSString *base, size_t start, size_t length);
 
 /* Copy a counted string and GC-allocate a descriptor for it. */
-template <js::AllowGC allowGC>
+template <js::AllowGC allowGC, typename CharT>
 extern JSFlatString *
-js_NewStringCopyN(js::ExclusiveContext *cx, const jschar *s, size_t n);
-
-template <js::AllowGC allowGC>
-extern JSFlatString *
-js_NewStringCopyN(js::ThreadSafeContext *cx, const char *s, size_t n);
+js_NewStringCopyN(js::ThreadSafeContext *cx, const CharT *s, size_t n);
 
 /* Copy a C string and GC-allocate a descriptor for it. */
 template <js::AllowGC allowGC>
 extern JSFlatString *
 js_NewStringCopyZ(js::ExclusiveContext *cx, const jschar *s);
 
 template <js::AllowGC allowGC>
 extern JSFlatString *
@@ -232,16 +228,26 @@ js_strncpy(jschar *dst, const jschar *sr
     return mozilla::PodCopy(dst, src, nelem);
 }
 
 extern jschar *
 js_strdup(js::ThreadSafeContext *cx, const jschar *s);
 
 namespace js {
 
+inline bool
+EqualCharsLatin1TwoByte(const Latin1Char *s1, const jschar *s2, size_t len)
+{
+    for (const Latin1Char *s1end = s1 + len; s1 < s1end; s1++, s2++) {
+        if (jschar(*s1) != *s2)
+            return false;
+    }
+    return true;
+}
+
 /*
  * Inflate bytes in ASCII encoding to jschars. Return null on error, otherwise
  * return the jschar that was malloc'ed. length is updated to the length of the
  * new string (in jschars). A null char is appended, but it is not included in
  * the length.
  */
 extern jschar *
 InflateString(ThreadSafeContext *cx, const char *bytes, size_t *length);
--- a/js/src/vm/String-inl.h
+++ b/js/src/vm/String-inl.h
@@ -5,16 +5,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef vm_String_inl_h
 #define vm_String_inl_h
 
 #include "vm/String.h"
 
 #include "mozilla/PodOperations.h"
+#include "mozilla/Range.h"
 
 #include "jscntxt.h"
 
 #include "gc/Marking.h"
 
 #include "jsgcinlines.h"
 
 namespace js {
@@ -37,17 +38,17 @@ AllocateFatInlineString(ThreadSafeContex
     if (!str)
         return nullptr;
     *chars = str->init<CharT>(len);
     return str;
 }
 
 template <AllowGC allowGC>
 static MOZ_ALWAYS_INLINE JSInlineString *
-NewFatInlineString(ThreadSafeContext *cx, JS::Latin1Chars chars)
+NewFatInlineString(ThreadSafeContext *cx, mozilla::Range<const Latin1Char> chars)
 {
     size_t len = chars.length();
 
     if (EnableLatin1Strings) {
         Latin1Char *p;
         JSInlineString *str = AllocateFatInlineString<allowGC>(cx, len, &p);
         if (!str)
             return nullptr;
@@ -65,17 +66,17 @@ NewFatInlineString(ThreadSafeContext *cx
     for (size_t i = 0; i < len; ++i)
         p[i] = static_cast<jschar>(chars[i]);
     p[len] = '\0';
     return str;
 }
 
 template <AllowGC allowGC>
 static MOZ_ALWAYS_INLINE JSInlineString *
-NewFatInlineString(ExclusiveContext *cx, JS::TwoByteChars chars)
+NewFatInlineString(ThreadSafeContext *cx, mozilla::Range<const jschar> chars)
 {
     /*
      * Don't bother trying to find a static atom; measurement shows that not
      * many get here (for one, Atomize is catching them).
      */
 
     size_t len = chars.length();
     jschar *storage;
--- a/js/src/vm/StringBuffer.cpp
+++ b/js/src/vm/StringBuffer.cpp
@@ -1,22 +1,26 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "vm/StringBuffer.h"
 
+#include "mozilla/Range.h"
+
 #include "jsobjinlines.h"
 
 #include "vm/String-inl.h"
 
 using namespace js;
 
+using mozilla::Range;
+
 template <typename CharT, class Buffer>
 static CharT *
 ExtractWellSized(ExclusiveContext *cx, Buffer &cb)
 {
     size_t capacity = cb.capacity();
     size_t length = cb.length();
 
     CharT *buf = cb.extractRawBuffer();
@@ -91,20 +95,20 @@ StringBuffer::finishString()
     if (!JSString::validateLength(cx, len))
         return nullptr;
 
     JS_STATIC_ASSERT(JSFatInlineString::MAX_LENGTH_TWO_BYTE < TwoByteCharBuffer::InlineLength);
     JS_STATIC_ASSERT(JSFatInlineString::MAX_LENGTH_LATIN1 < Latin1CharBuffer::InlineLength);
 
     if (isLatin1()) {
         if (JSFatInlineString::latin1LengthFits(len))
-            return NewFatInlineString<CanGC>(cx, Latin1Chars(latin1Chars().begin(), len));
+            return NewFatInlineString<CanGC>(cx, Range<const Latin1Char>(latin1Chars().begin(), len));
     } else {
         if (JSFatInlineString::twoByteLengthFits(len))
-            return NewFatInlineString<CanGC>(cx, TwoByteChars(twoByteChars().begin(), len));
+            return NewFatInlineString<CanGC>(cx, Range<const jschar>(twoByteChars().begin(), len));
     }
 
     return isLatin1()
         ? FinishStringFlat<Latin1Char>(cx, *this, latin1Chars())
         : FinishStringFlat<jschar>(cx, *this, twoByteChars());
 }
 
 JSAtom *
--- a/mfbt/HashFunctions.h
+++ b/mfbt/HashFunctions.h
@@ -310,16 +310,23 @@ MOZ_WARN_UNUSED_RESULT
 inline uint32_t
 HashString(const char* str, size_t length)
 {
   return detail::HashKnownLength(str, length);
 }
 
 MOZ_WARN_UNUSED_RESULT
 inline uint32_t
+HashString(const unsigned char* str, size_t length)
+{
+  return detail::HashKnownLength(str, length);
+}
+
+MOZ_WARN_UNUSED_RESULT
+inline uint32_t
 HashString(const uint16_t* str)
 {
   return detail::HashUntilZero(str);
 }
 
 MOZ_WARN_UNUSED_RESULT
 inline uint32_t
 HashString(const uint16_t* str, size_t length)