Bug 1214619 - Remove nsISaveAsCharset as much as possible without breaking extensions in popular use. r=emk.
authorHenri Sivonen <hsivonen@hsivonen.fi>
Thu, 22 Oct 2015 11:18:45 +0300
changeset 309243 6644bf5d558c8e0810ea5e44756c3bcd1ea5ee7b
parent 309242 e159456a12299df44b7b6cba1035a0cce87a9455
child 309244 f0c815f451f761c90634789b055090b8ea438187
push id7579
push usercliu@mozilla.com
push dateTue, 17 Nov 2015 02:28:56 +0000
reviewersemk
bugs1214619
milestone45.0a1
Bug 1214619 - Remove nsISaveAsCharset as much as possible without breaking extensions in popular use. r=emk.
intl/unicharutil/nsISaveAsCharset.idl
intl/unicharutil/nsSaveAsCharset.cpp
intl/unicharutil/nsSaveAsCharset.h
--- a/intl/unicharutil/nsISaveAsCharset.idl
+++ b/intl/unicharutil/nsISaveAsCharset.idl
@@ -6,47 +6,45 @@
 #include "nsISupports.idl"
 #include "nsIEntityConverter.idl"
 
 %{C++
 #define NS_SAVEASCHARSET_CID { 0xcd233e0, 0x7a86, 0x11d3, { 0x91, 0x5c, 0x0, 0x60, 0x8, 0xa6, 0xed, 0xf6 } }
 #define NS_SAVEASCHARSET_CONTRACTID "@mozilla.org/intl/saveascharset;1"
 %}
 
-[scriptable, uuid(33B87F70-7A9C-11d3-915C-006008A6EDF6)]
+/**
+ * DO NOT USE! For compat with legacy extension code only.
+ */
+[scriptable, uuid(b3b8124f-0abb-460e-88ac-3cf1a0134b2d)]
 interface nsISaveAsCharset : nsISupports
 {
   // attributes
   const unsigned long mask_Fallback                = 0x000000FF; // mask for fallback (8bits)
   const unsigned long mask_Entity                  = 0x00000300; // mask for entity (2bits)
   const unsigned long mask_CharsetFallback         = 0x00000400; // mask for charset fallback (1bit)
 
-  const unsigned long attr_FallbackNone = 0;                   // no fall back for unconverted chars (skipped)
-  const unsigned long attr_FallbackQuestionMark = 1;           // unconverted chars are replaced by '?'
-  const unsigned long attr_FallbackEscapeU = 2;                // unconverted chars are escaped as \uxxxx
-  const unsigned long attr_FallbackDecimalNCR = 3;             // unconverted chars are replaced by decimal NCR
-  const unsigned long attr_FallbackHexNCR = 4;                 // unconverted chars are replaced by hex NCR
+  const unsigned long attr_FallbackNone = 0;                   // IGNORED
+  const unsigned long attr_FallbackQuestionMark = 1;           // IGNORED
+  const unsigned long attr_FallbackEscapeU = 2;                // IGNORED
+  const unsigned long attr_FallbackDecimalNCR = 3;             // IGNORED
+  const unsigned long attr_FallbackHexNCR = 4;                 // IGNORED
 
-  const unsigned long attr_EntityNone = 0;                       // generate no Named Entity
-  const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // generate Named Entity before charset conversion
-  const unsigned long attr_EntityAfterCharsetConv =  0x00000200; // generate Named Entity after charset conversion
+  const unsigned long attr_EntityNone = 0;                       // IGNORED
+  const unsigned long attr_EntityBeforeCharsetConv = 0x00000100; // IGNORED
+  const unsigned long attr_EntityAfterCharsetConv =  0x00000200; // IGNORED
 
-  const unsigned long attr_CharsetFallback        =  0x00000400; // fallback to other charset and restart the convesion
+  const unsigned long attr_CharsetFallback        =  0x00000400; // IGNORED
 
 
                                                                // default attribute for plain text
   const unsigned long attr_plainTextDefault = attr_FallbackNone + attr_EntityNone;               
   
                                                                // default attribute for html text
                                                                // generate entity before charset conversion, use decimal NCR
   const unsigned long attr_htmlTextDefault = attr_FallbackDecimalNCR + attr_EntityBeforeCharsetConv;
 
-  readonly attribute string charset;                           // charset used for the conversion
-
-  // set up charset, attribute and entity version 
-  // see nsIEntityConverter.idl for possible value of entityVersion (entityNone for plain text).
-  void Init(in string charset, in unsigned long attr, in unsigned long entityVersion);
+  readonly attribute AUTF8String charset;                      // charset used for the conversion
 
-  // convert UCS-2 html to target charset
-  // may return the result code of the unicode converter (NS_ERROR_UENC_NOMAPPING)
-  // if the attribute does not specify any fall back (e.g. attrPlainTextDefault)
-	string Convert(in wstring inString);
+  void Init(in AUTF8String charset, in unsigned long ignored, in unsigned long alsoIgnored);
+
+  ACString Convert(in AString inString);
 };
--- a/intl/unicharutil/nsSaveAsCharset.cpp
+++ b/intl/unicharutil/nsSaveAsCharset.cpp
@@ -1,374 +1,56 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-
-#include "prmem.h"
-#include "mozilla/Snprintf.h"
 #include "nsSaveAsCharset.h"
-#include "nsWhitespaceTokenizer.h"
-#include "nsIUnicodeEncoder.h"
 #include "mozilla/dom/EncodingUtils.h"
-#include "nsComponentManagerUtils.h"
-
-using mozilla::dom::EncodingUtils;
 
 //
 // nsISupports methods
 //
 NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset)
 
 //
 // nsSaveAsCharset
 //
 nsSaveAsCharset::nsSaveAsCharset()
 {
-  mAttribute = attr_htmlTextDefault;
-  mEntityVersion = 0;
-  mCharsetListIndex = -1;
 }
 
 nsSaveAsCharset::~nsSaveAsCharset()
 {
 }
 
 NS_IMETHODIMP
-nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion)
+nsSaveAsCharset::Init(const nsACString& aCharset, uint32_t aIgnored, uint32_t aAlsoIgnored)
 {
-  nsresult rv = NS_OK;
-
-  mAttribute = attr;
-  mEntityVersion = entityVersion;
-
-  rv = SetupCharsetList(charset);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  // set up unicode encoder
-  rv = SetupUnicodeEncoder(GetNextCharset());
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  // set up entity converter
-  if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
-    mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
-
-  return rv;
-}
-
-NS_IMETHODIMP
-nsSaveAsCharset::Convert(const char16_t *inString, char **_retval)
-{
-  NS_ENSURE_ARG_POINTER(_retval);
-  NS_ENSURE_ARG_POINTER(inString);
-  if (0 == *inString)
-    return NS_ERROR_ILLEGAL_VALUE;
-  nsresult rv = NS_OK;
-
-  NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
-  NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
-
-  *_retval = nullptr;
-
-  // make sure to start from the first charset in the list
-  if (mCharsetListIndex > 0) {
-    mCharsetListIndex = -1;
-    rv = SetupUnicodeEncoder(GetNextCharset());
-    NS_ENSURE_SUCCESS(rv, rv);
+  nsAutoCString encoding;
+  if (!mozilla::dom::EncodingUtils::FindEncodingForLabelNoReplacement(aCharset, encoding)) {
+    return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR;
   }
-
-  do {
-    // fallback to the next charset in the list if the last conversion failed by an unmapped character
-    if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
-      const char * charset = GetNextCharset();
-      if (!charset)
-        break;
-      rv = SetupUnicodeEncoder(charset);
-      NS_ENSURE_SUCCESS(rv, rv);
-      PR_FREEIF(*_retval);
-    }
-
-    if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
-      NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
-      NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
-      char16_t *entity = nullptr;
-      // do the entity conversion first
-      rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
-      if(NS_SUCCEEDED(rv)) {
-        rv = DoCharsetConversion(entity, _retval);
-        free(entity);
-      }
-    }
-    else
-      rv = DoCharsetConversion(inString, _retval);
-
-  } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
-
-  return rv;
-}
-
-NS_IMETHODIMP 
-nsSaveAsCharset::GetCharset(char * *aCharset)
-{
-  NS_ENSURE_ARG(aCharset);
-  NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
-  NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
-
-  const char* charset = mCharsetList[mCharsetListIndex].get();
-  if (!charset) {
-    *aCharset = nullptr;
-    NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
-    return NS_ERROR_FAILURE;
-  }
-
-  *aCharset = strdup(charset);
-  return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////
-
-#define RESERVE_FALLBACK_BYTES 512
-
-// do the fallback, reallocate the buffer if necessary
-// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
-NS_IMETHODIMP
-nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, 
-                                int32_t *currentPos, int32_t estimatedLength)
-{
-  NS_ENSURE_ARG_POINTER(outString);
-  NS_ENSURE_ARG_POINTER(bufferLength);
-  NS_ENSURE_ARG_POINTER(currentPos);
-
-  char fallbackStr[256];
-  nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
-  if (NS_SUCCEEDED(rv)) {
-    int32_t tempLen = (int32_t) strlen(fallbackStr);
-
-    // reallocate if the buffer is not large enough
-    if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
-      int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES;
-      // + 1 is for the terminating NUL, don't add that to bufferLength
-      char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1);
-      if (temp) {
-        // adjust length/pointer after realloc
-        *bufferLength += addLength;
-        *outString = temp;
-      } else {
-        *outString = nullptr;
-        *bufferLength = 0;
-        return NS_ERROR_OUT_OF_MEMORY;
-      }
-    }
-    memcpy((*outString + *currentPos), fallbackStr, tempLen);
-    *currentPos += tempLen;
-  }
-  return rv;
+  mEncoder = new nsNCRFallbackEncoderWrapper(encoding);
+  mCharset.Assign(encoding);
+  return NS_OK;
 }
 
 NS_IMETHODIMP
-nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString)
+nsSaveAsCharset::Convert(const nsAString& aIn, nsACString& aOut)
 {
-  NS_ENSURE_ARG_POINTER(outString);
-
-  *outString = nullptr;
-
-  nsresult rv;
-  int32_t inStringLength = NS_strlen(inString);       // original input string length
-  int32_t bufferLength;                               // allocated buffer length
-  int32_t srcLength = inStringLength;
-  int32_t dstLength;
-  int32_t pos1, pos2;
-  nsresult saveResult = NS_OK;                         // to remember NS_ERROR_UENC_NOMAPPING
-
-  // estimate and allocate the target buffer (reserve extra memory for fallback)
-  rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
-  if (NS_FAILED(rv)) return rv;
-
-  bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback
-  // + 1 is for the terminating NUL -- we don't add that to bufferLength so that
-  // we can always write dstPtr[pos2] = '\0' even when the encoder filled the
-  // buffer.
-  char *dstPtr = (char *) PR_Malloc(bufferLength + 1);
-  if (!dstPtr) {
-    return NS_ERROR_OUT_OF_MEMORY;
-  }
-  
-  for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
-    // convert from unicode
-    dstLength = bufferLength - pos2;
-    NS_ASSERTION(dstLength >= 0, "out of bounds write");
-    rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
-
-    pos1 += srcLength ? srcLength : 1;
-    pos2 += dstLength;
-    dstPtr[pos2] = '\0';
-
-    // break: this is usually the case (no error) OR unrecoverable error
-    if (NS_ERROR_UENC_NOMAPPING != rv) break;
-
-    // remember this happened and reset the result
-    saveResult = rv;
-    rv = NS_OK;
-
-    // finish encoder, give it a chance to write extra data like escape sequences
-    dstLength = bufferLength - pos2;
-    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
-    if (NS_SUCCEEDED(rv)) {
-      pos2 += dstLength;
-      dstPtr[pos2] = '\0';
-    }
-
-    srcLength = inStringLength - pos1;
-
-    // do the fallback
-    if (!ATTR_NO_FALLBACK(mAttribute)) {
-      uint32_t unMappedChar;
-      if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && 
-          inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
-        unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
-        pos1++;
-      } else {
-        unMappedChar = inString[pos1-1];
-      }
-
-      rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
-      if (NS_FAILED(rv)) 
-        break;
-
-      rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
-      if (NS_FAILED(rv)) 
-        break;
-      dstPtr[pos2] = '\0';
-    }
-  }
-
-  if (NS_SUCCEEDED(rv)) {
-    // finish encoder, give it a chance to write extra data like escape sequences
-    dstLength = bufferLength - pos2;
-    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
-    if (NS_SUCCEEDED(rv)) {
-      pos2 += dstLength;
-      dstPtr[pos2] = '\0';
-    }
+  if (!mEncoder) {
+    return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR;
   }
 
-  if (NS_FAILED(rv)) {
-    PR_FREEIF(dstPtr);
-    return rv;
-  }
-
-  *outString = dstPtr;      // set the result string
-
-  // set error code so that the caller can do own fall back
-  if (NS_ERROR_UENC_NOMAPPING == saveResult) {
-    rv = NS_ERROR_UENC_NOMAPPING;
-  }
-
-  return rv;
-}
-
-NS_IMETHODIMP
-nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength)
-{
-  NS_ENSURE_ARG_POINTER(outString);
-
-  *outString = '\0';
-
-  nsresult rv = NS_OK;
-
-  if (ATTR_NO_FALLBACK(mAttribute)) {
-    return NS_OK;
-  }
-  if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
-    char *entity = nullptr;
-    rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
-    if (NS_SUCCEEDED(rv)) {
-      if (!entity || (int32_t)strlen(entity) > bufferLength) {
-        return NS_ERROR_OUT_OF_MEMORY;
-      }
-      PL_strcpy(outString, entity);
-      free(entity);
-      return rv;
-    }
+  if (!mEncoder->Encode(aIn, aOut)) {
+    return NS_ERROR_OUT_OF_MEMORY;
   }
-
-  switch (MASK_FALLBACK(mAttribute)) {
-  case attr_FallbackQuestionMark:
-    if(bufferLength>=2) {
-      *outString++='?';
-      *outString='\0';
-      rv = NS_OK;
-    } else {
-      rv = NS_ERROR_FAILURE;
-    }
-    break;
-  case attr_FallbackEscapeU:
-    if (inUCS4 & 0xff0000)
-      rv = (snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
-    else
-      rv = (snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
-    break;
-  case attr_FallbackDecimalNCR:
-    rv = (snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
-    break;
-  case attr_FallbackHexNCR:
-    rv = (snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
-    break;
-  case attr_FallbackNone:
-    rv = NS_OK;
-    break;
-  default:
-    rv = NS_ERROR_ILLEGAL_VALUE;
-    break;
-  }
-
-	return rv;
-}
-
-nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
-{
-  NS_ENSURE_ARG(charset);
-  nsDependentCString label(charset);
-  if (label.EqualsLiteral("replacement")) {
-    // Internal caller. "replacement" doesn't survive another label resolution.
-    mEncoder = EncodingUtils::EncoderForEncoding(label);
-    return NS_OK;
-  }
-  nsAutoCString encoding;
-  if (!EncodingUtils::FindEncodingForLabelNoReplacement(label,
-                                                        encoding)) {
-    return NS_ERROR_UCONV_NOCONV;
-  }
-  mEncoder = EncodingUtils::EncoderForEncoding(encoding);
   return NS_OK;
 }
 
-nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
+NS_IMETHODIMP 
+nsSaveAsCharset::GetCharset(nsACString& aCharset)
 {
-  NS_ENSURE_ARG(charsetList);
-
-  NS_ASSERTION(charsetList[0], "charsetList should not be empty");
-  if (!charsetList[0])
-    return NS_ERROR_INVALID_ARG;
-
-  if (mCharsetListIndex >= 0) {
-    mCharsetList.Clear();
-    mCharsetListIndex = -1;
-  }
-
-  nsCWhitespaceTokenizer tokenizer((nsDependentCString(charsetList)));
-  while (tokenizer.hasMoreTokens()) {
-    ParseString(tokenizer.nextToken(), ',', mCharsetList);
-  }
-
+  aCharset.Assign(mCharset);
   return NS_OK;
 }
-
-const char * nsSaveAsCharset::GetNextCharset()
-{
-  if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length()))
-    return nullptr;
-
-  // bump the index and return the next charset
-  return mCharsetList[++mCharsetListIndex].get();
-}
--- a/intl/unicharutil/nsSaveAsCharset.h
+++ b/intl/unicharutil/nsSaveAsCharset.h
@@ -1,73 +1,37 @@
 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#ifndef nsSaveAsCharset_h__
-#define nsSaveAsCharset_h__
+#ifndef nsSaveAsCharset_h_
+#define nsSaveAsCharset_h_
 
 #include "nsStringFwd.h"
-#include "nsTArray.h"
 #include "nsISaveAsCharset.h"
-#include "nsCOMPtr.h"
-
-#define MASK_FALLBACK(a) (nsISaveAsCharset::mask_Fallback & (a))
-#define MASK_ENTITY(a) (nsISaveAsCharset::mask_Entity & (a))
-#define MASK_CHARSET_FALLBACK(a) (nsISaveAsCharset::mask_CharsetFallback & (a))
-#define ATTR_NO_FALLBACK(a) (nsISaveAsCharset::attr_FallbackNone == MASK_FALLBACK(a) && \
-                             nsISaveAsCharset::attr_EntityAfterCharsetConv != MASK_ENTITY(a))
-
-class nsIUnicodeEncoder;
-class nsIEntityConverter;
+#include "nsAutoPtr.h"
+#include "nsNCRFallbackEncoderWrapper.h"
+#include "nsString.h"
 
 class nsSaveAsCharset : public nsISaveAsCharset
 {
 public:
-	
-	//
-	// implementation methods
-	//
+
   nsSaveAsCharset();
 
-	//
-	// nsISupports
-	//
-	NS_DECL_ISUPPORTS
+  NS_DECL_ISUPPORTS
+
+  NS_IMETHOD Init(const nsACString& aCharset, uint32_t aIgnored, uint32_t aAlsoIgnored) override;
 
-	//
-	// nsIEntityConverter
-	//
-  NS_IMETHOD Init(const char *charset, uint32_t attr, uint32_t entityVersion) override;
+  NS_IMETHOD Convert(const nsAString& ain, nsACString& aOut) override;
 
-  NS_IMETHOD Convert(const char16_t *inString, char **_retval) override;
+  NS_IMETHODIMP GetCharset(nsACString& aCharset) override;
 
-  NS_IMETHODIMP GetCharset(char * *aCharset) override;
-
-protected:
+private:
 
   virtual ~nsSaveAsCharset();
 
-  NS_IMETHOD DoCharsetConversion(const char16_t *inString, char **outString);
-
-  NS_IMETHOD DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength);
-
-  // do the fallback, reallocate the buffer if necessary
-  // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
-  NS_IMETHOD HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, 
-                            int32_t *currentPos, int32_t estimatedLength);
-
-  nsresult SetupUnicodeEncoder(const char* charset);
-
-  nsresult SetupCharsetList(const char *charsetList);
-
-  const char * GetNextCharset();
-
-  uint32_t mAttribute;                    // conversion attribute
-  uint32_t mEntityVersion;                // see nsIEntityConverter
-  nsCOMPtr<nsIUnicodeEncoder> mEncoder;   // encoder (convert from unicode)
-  nsCOMPtr<nsIEntityConverter> mEntityConverter;
-  nsTArray<nsCString> mCharsetList;
-  int32_t        mCharsetListIndex;
+  nsAutoPtr<nsNCRFallbackEncoderWrapper> mEncoder;
+  nsCString mCharset;
 };
 
 #endif