author Henri Sivonen <hsivonen@hsivonen.fi>
Fri, 06 Jul 2018 10:44:43 +0300
changeset 489140 4ef0f163fdeb9afeddd87b37bfd987298c038542
parent 203608 f20eb963186959bde60b7ce8f505bb4903e19063
child 491356 481d502afc69d7d120b2d80a2b881bcea25c2661
permissions -rw-r--r--
Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj. Correctness improvements: * UTF errors are handled safely per spec instead of dangerously truncating strings. * There are fewer converter implementations. Performance improvements: * The old code did exact buffer length math, which meant doing UTF math twice on each input string (once for length calculation and another time for conversion). Exact length math is more complicated when handling errors properly, which the old code didn't do. The new code does UTF math on the string content only once (when converting) but risks allocating more than once. There are heuristics in place to lower the probability of reallocation in cases where the double math avoidance isn't enough of a saving to absorb an allocation and memcpy. * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized but a single non-ASCII code point pessimized the rest of the string. The new code tries to get back on the fast ASCII path. * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range input to eliminate an operation from the inner loop on x86/x86_64. * When assigning to a pre-existing string, the new code tries to reuse the old buffer instead of first releasing the old buffer and then allocating a new one. * When reallocating from the new code, the memcpy covers only the data that is part of the logical length of the old string instead of memcpying the whole capacity. (For old callers old excess memcpy behavior is preserved due to bogus callers. See bug 1472113.) * UTF-8 strings in XPConnect that are in the Latin1 range are passed to SpiderMonkey as Latin1. New features: * Conversion between UTF-8 and Latin1 is added in order to enable faster future interop between Rust code (or otherwise UTF-8-using code) and text node and SpiderMonkey code that uses Latin1. MozReview-Commit-ID: JaJuExfILM9

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsSegmentedBuffer_h__
#define nsSegmentedBuffer_h__

#include "nsIMemory.h"

class nsSegmentedBuffer
    : mSegmentSize(0)
    , mMaxSize(0)
    , mSegmentArray(nullptr)
    , mSegmentArrayCount(0)
    , mFirstSegmentIndex(0)
    , mLastSegmentIndex(0)


  nsresult Init(uint32_t aSegmentSize, uint32_t aMaxSize);

  char* AppendNewSegment();   // pushes at end

  // returns true if no more segments remain:
  bool DeleteFirstSegment();  // pops from beginning

  // returns true if no more segments remain:
  bool DeleteLastSegment();  // pops from beginning

  // Call Realloc() on last segment.  This is used to reduce memory
  // consumption when data is not an exact multiple of segment size.
  bool ReallocLastSegment(size_t aNewSize);

  void Empty();               // frees all segments

  inline uint32_t GetSegmentCount()
    if (mFirstSegmentIndex <= mLastSegmentIndex) {
      return mLastSegmentIndex - mFirstSegmentIndex;
    } else {
      return mSegmentArrayCount + mLastSegmentIndex - mFirstSegmentIndex;

  inline uint32_t GetSegmentSize()
    return mSegmentSize;
  inline uint32_t GetMaxSize()
    return mMaxSize;
  inline uint32_t GetSize()
    return GetSegmentCount() * mSegmentSize;

  inline char* GetSegment(uint32_t aIndex)
    NS_ASSERTION(aIndex < GetSegmentCount(), "index out of bounds");
    int32_t i = ModSegArraySize(mFirstSegmentIndex + (int32_t)aIndex);
    return mSegmentArray[i];

  inline int32_t ModSegArraySize(int32_t aIndex)
    uint32_t result = aIndex & (mSegmentArrayCount - 1);
    NS_ASSERTION(result == aIndex % mSegmentArrayCount,
                 "non-power-of-2 mSegmentArrayCount");
    return result;

  inline bool IsFull()
    return ModSegArraySize(mLastSegmentIndex + 1) == mFirstSegmentIndex;

  uint32_t            mSegmentSize;
  uint32_t            mMaxSize;
  char**              mSegmentArray;
  uint32_t            mSegmentArrayCount;
  int32_t             mFirstSegmentIndex;
  int32_t             mLastSegmentIndex;

// NS_SEGMENTARRAY_INITIAL_SIZE: This number needs to start out as a
// power of 2 given how it gets used. We double the segment array
// when we overflow it, and use that fact that it's a power of 2
// to compute a fast modulus operation in IsFull.
// 32 segment array entries can accommodate 128k of data if segments
// are 4k in size. That seems like a reasonable amount that will avoid
// needing to grow the segment array.

#endif // nsSegmentedBuffer_h__