Bug 761552 - Part 1: Provide UTF8ToUnicode functions accepting a buffer. r=jlebar
authorNils Maier <maierman@web.de>
Tue, 29 Jan 2013 10:50:04 -0500
changeset 120229 a502aa076a9416d2af5fc5eddb30f98a5d7dfe56
parent 120228 8331ece7e1c16aad9389fb45906f8faf68f49099
child 120230 42c786efb5d67768f348462234a235d6b808b1e4
push id24243
push userryanvm@gmail.com
push dateWed, 30 Jan 2013 00:49:21 +0000
treeherdermozilla-central@5c248ef0fe62 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjlebar
bugs761552
milestone21.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 761552 - Part 1: Provide UTF8ToUnicode functions accepting a buffer. r=jlebar UTF8ToUnicode does not let the caller specify the buffer to use, but allocates the buffer on it's own. Hence the caller cannot choose which allocator to use. Rectify this by providing new APIs. 1. Split UTF8ToNewUnicode into: - CalcUTF8ToUnicodeLength - UTF8ToUnicodeBuffer 2. Rebuild UTF8ToNewUnicode using the new functions
xpcom/string/public/nsReadableUtils.h
xpcom/string/src/nsReadableUtils.cpp
--- a/xpcom/string/public/nsReadableUtils.h
+++ b/xpcom/string/public/nsReadableUtils.h
@@ -116,16 +116,48 @@ PRUnichar* ToNewUnicode( const nsAString
    * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
    *
    * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
    * @return a new |PRUnichar| buffer you must free with |nsMemory::Free|.
    */
 PRUnichar* ToNewUnicode( const nsACString& aSource );
 
   /**
+   * Returns the required length for a PRUnichar buffer holding
+   * a copy of aSource, using UTF-8 to UTF-16 conversion.
+   * The length does NOT include any space for zero-termination.
+   *
+   * @param aSource an 8-bit wide string, UTF-8 encoded
+   * @return length of UTF-16 encoded string copy, not zero-terminated
+   */
+uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource );
+
+  /**
+   * Copies the source string into the specified buffer, converting UTF-8 to
+   * UTF-16 in the process. The conversion is well defined for valid UTF-8
+   * strings.
+   * The copied string will be zero-terminated! Any embedded nulls will be
+   * copied nonetheless. It is the caller's responsiblity to ensure the buffer
+   * is large enough to hold the string copy plus one PRUnichar for
+   * zero-termination!
+   *
+   * @see CalcUTF8ToUnicodeLength( const nsACString& )
+   * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
+   *
+   * @param aSource an 8-bit wide string, UTF-8 encoded
+   * @param aBuffer the buffer holding the converted string copy
+   * @param aUTF16Count receiving optionally the number of 16-bit units that
+   *                    were copied
+   * @return aBuffer pointer, for convenience 
+   */
+PRUnichar* UTF8ToUnicodeBuffer( const nsACString& aSource,
+                                PRUnichar *aBuffer,
+                                uint32_t *aUTF16Count = nullptr );
+
+  /**
    * Returns a new |PRUnichar| buffer containing a zero-terminated copy
    * of |aSource|.
    *
    * Allocates and returns a new |char| buffer which you must free with
    * |nsMemory::Free|.  Performs an encoding conversion from UTF-8 to UTF-16 
    * while copying |aSource| to your new buffer.  This conversion is well defined
    * for a valid UTF-8 string.  The new buffer is zero-terminated, but that 
    * may not help you if |aSource| contains embedded nulls.
--- a/xpcom/string/src/nsReadableUtils.cpp
+++ b/xpcom/string/src/nsReadableUtils.cpp
@@ -320,38 +320,55 @@ ToNewUnicode( const nsACString& aSource 
       return nullptr;
 
     nsACString::const_iterator fromBegin, fromEnd;
     LossyConvertEncoding8to16 converter(result);
     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
     return result;
   }
 
-PRUnichar*
-UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count )
+uint32_t
+CalcUTF8ToUnicodeLength( const nsACString& aSource)
   {
     nsACString::const_iterator start, end;
     CalculateUTF8Length calculator;
     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
                 calculator);
+    return calculator.Length();
+  }
+
+PRUnichar*
+UTF8ToUnicodeBuffer( const nsACString& aSource, PRUnichar* aBuffer, uint32_t *aUTF16Count )
+  {
+    nsACString::const_iterator start, end;
+    ConvertUTF8toUTF16 converter(aBuffer);
+    copy_string(aSource.BeginReading(start),
+                aSource.EndReading(end),
+                converter).write_terminator();
+    if (aUTF16Count)
+      *aUTF16Count = converter.Length();
+    return aBuffer;
+  }
+
+PRUnichar*
+UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count )
+  {
+    const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
+    const size_t buffer_size = (length + 1) * sizeof(PRUnichar);
+    PRUnichar *buffer = static_cast<PRUnichar*>(nsMemory::Alloc(buffer_size));
+    if (!buffer)
+      return nullptr;
+
+    uint32_t copied;
+    UTF8ToUnicodeBuffer(aSource, buffer, &copied);
+    NS_ASSERTION(length == copied, "length mismatch");
 
     if (aUTF16Count)
-      *aUTF16Count = calculator.Length();
-
-    PRUnichar *result = static_cast<PRUnichar*>
-                                   (nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
-    if (!result)
-      return nullptr;
-
-    ConvertUTF8toUTF16 converter(result);
-    copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-                converter).write_terminator();
-    NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
-
-    return result;
+      *aUTF16Count = copied;
+    return buffer;
   }
 
 PRUnichar*
 CopyUnicodeTo( const nsAString& aSource, uint32_t aSrcOffset, PRUnichar* aDest, uint32_t aLength )
   {
     nsAString::const_iterator fromBegin, fromEnd;
     PRUnichar* toBegin = aDest;    
     copy_string(aSource.BeginReading(fromBegin).advance( int32_t(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( int32_t(aSrcOffset+aLength) ), toBegin);