xpcom/io/nsScriptableInputStream.cpp
author Henri Sivonen <hsivonen@hsivonen.fi>
Fri, 06 Jul 2018 10:44:43 +0300
changeset 489140 4ef0f163fdeb9afeddd87b37bfd987298c038542
parent 329569 a3dd1df4206901d71d464062589854c6e84f4a12
child 508163 6f3709b3878117466168c40affa7bca0b60cf75b
permissions -rw-r--r--
Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj. Correctness improvements: * UTF errors are handled safely per spec instead of dangerously truncating strings. * There are fewer converter implementations. Performance improvements: * The old code did exact buffer length math, which meant doing UTF math twice on each input string (once for length calculation and another time for conversion). Exact length math is more complicated when handling errors properly, which the old code didn't do. The new code does UTF math on the string content only once (when converting) but risks allocating more than once. There are heuristics in place to lower the probability of reallocation in cases where the double math avoidance isn't enough of a saving to absorb an allocation and memcpy. * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized but a single non-ASCII code point pessimized the rest of the string. The new code tries to get back on the fast ASCII path. * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range input to eliminate an operation from the inner loop on x86/x86_64. * When assigning to a pre-existing string, the new code tries to reuse the old buffer instead of first releasing the old buffer and then allocating a new one. * When reallocating from the new code, the memcpy covers only the data that is part of the logical length of the old string instead of memcpying the whole capacity. (For old callers old excess memcpy behavior is preserved due to bogus callers. See bug 1472113.) * UTF-8 strings in XPConnect that are in the Latin1 range are passed to SpiderMonkey as Latin1. New features: * Conversion between UTF-8 and Latin1 is added in order to enable faster future interop between Rust code (or otherwise UTF-8-using code) and text node and SpiderMonkey code that uses Latin1. MozReview-Commit-ID: JaJuExfILM9

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsScriptableInputStream.h"
#include "nsMemory.h"
#include "nsString.h"

NS_IMPL_ISUPPORTS(nsScriptableInputStream, nsIScriptableInputStream)

// nsIScriptableInputStream methods
NS_IMETHODIMP
nsScriptableInputStream::Close()
{
  if (!mInputStream) {
    return NS_ERROR_NOT_INITIALIZED;
  }
  return mInputStream->Close();
}

NS_IMETHODIMP
nsScriptableInputStream::Init(nsIInputStream* aInputStream)
{
  if (!aInputStream) {
    return NS_ERROR_NULL_POINTER;
  }
  mInputStream = aInputStream;
  return NS_OK;
}

NS_IMETHODIMP
nsScriptableInputStream::Available(uint64_t* aResult)
{
  if (!mInputStream) {
    return NS_ERROR_NOT_INITIALIZED;
  }
  return mInputStream->Available(aResult);
}

NS_IMETHODIMP
nsScriptableInputStream::Read(uint32_t aCount, char** aResult)
{
  nsresult rv = NS_OK;
  uint64_t count64 = 0;
  char* buffer = nullptr;

  if (!mInputStream) {
    return NS_ERROR_NOT_INITIALIZED;
  }

  rv = mInputStream->Available(&count64);
  if (NS_FAILED(rv)) {
    return rv;
  }

  // bug716556 - Ensure count+1 doesn't overflow
  uint32_t count =
    XPCOM_MIN((uint32_t)XPCOM_MIN<uint64_t>(count64, aCount), UINT32_MAX - 1);
  buffer = (char*)malloc(count + 1);  // make room for '\0'
  if (!buffer) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  rv = ReadHelper(buffer, count);
  if (NS_FAILED(rv)) {
    free(buffer);
    return rv;
  }

  buffer[count] = '\0';
  *aResult = buffer;
  return NS_OK;
}

NS_IMETHODIMP
nsScriptableInputStream::ReadBytes(uint32_t aCount, nsACString& aResult)
{
  if (!mInputStream) {
    return NS_ERROR_NOT_INITIALIZED;
  }

  if (!aResult.SetLength(aCount, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  MOZ_ASSERT(aResult.Length() == aCount);
  char* ptr = aResult.BeginWriting();
  nsresult rv = ReadHelper(ptr, aCount);
  if (NS_FAILED(rv)) {
    aResult.Truncate();
  }
  return rv;
}

nsresult
nsScriptableInputStream::ReadHelper(char* aBuffer, uint32_t aCount)
{
  uint32_t totalBytesRead = 0;
  while (1) {
    uint32_t bytesRead;
    nsresult rv = mInputStream->Read(aBuffer + totalBytesRead,
                                     aCount - totalBytesRead,
                                     &bytesRead);
    if (NS_FAILED(rv)) {
      return rv;
    }

    totalBytesRead += bytesRead;
    if (totalBytesRead == aCount) {
      break;
    }

    // If we have read zero bytes, we have hit EOF.
    if (bytesRead == 0) {
      return NS_ERROR_FAILURE;
    }

  }
  return NS_OK;
}

nsresult
nsScriptableInputStream::Create(nsISupports* aOuter, REFNSIID aIID,
                                void** aResult)
{
  if (aOuter) {
    return NS_ERROR_NO_AGGREGATION;
  }

  RefPtr<nsScriptableInputStream> sis = new nsScriptableInputStream();
  return sis->QueryInterface(aIID, aResult);
}