mailnews/base/util/nsMsgI18N.cpp
author Jorg K <jorgk@jorgk.com>
Mon, 23 Oct 2017 18:24:51 +0200
changeset 22410 0b0cba8d70bd35997965afe4c02010c49e402980
parent 22207 fdc9f3574d6ece7b18d60bf6fe94b63324e9536d
child 22540 bc4dff04bede39d57bc4353f8eeae800fb470877
permissions -rw-r--r--
Bug 1381762 - Replace nsIPlatfromCharset in mailnews. r=m_kato

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// as does this
#include "nsICharsetConverterManager.h"
#include "mozilla/dom/FallbackEncoding.h"
#include "nsIServiceManager.h"

#include "nsISupports.h"
#include "nsIPrefBranch.h"
#include "nsIPrefService.h"
#include "nsIMimeConverter.h"
#include "nsMsgUtils.h"
#include "nsMsgI18N.h"
#include "nsMsgMimeCID.h"
#include "nsILineInputStream.h"
#include "nsMimeTypes.h"
#include "nsString.h"
#include "prmem.h"
#include "plstr.h"
#include "nsUTF8Utils.h"
#include "nsNetUtil.h"
#include "nsCRTGlue.h"
#include "nsComponentManagerUtils.h"
#include "nsUnicharUtils.h"
#include "nsIFileStreams.h"
#include "../../intl/nsMUTF7ToUnicode.h"
#include "../../intl/nsUnicodeToMUTF7.h"

//
// International functions necessary for composition
//

nsresult nsMsgI18NConvertFromUnicode(const char* aCharset,
                                     const nsString& inString,
                                     nsACString& outString,
                                     bool aReportUencNoMapping)
{
  if (inString.IsEmpty()) {
    outString.Truncate();
    return NS_OK;
  }

  auto encoding = mozilla::Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
  if (!encoding) {
    return NS_ERROR_UCONV_NOCONV;
  } else if (encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING) {
    // We shouldn't ever ship anything in these encodings.
    return NS_ERROR_UCONV_NOCONV;
  }

  const mozilla::Encoding* actualEncoding;
  nsresult rv;
  mozilla::Tie(rv, actualEncoding) = encoding->Encode(inString, outString);
  mozilla::Unused << actualEncoding;

  if (rv == NS_OK_HAD_REPLACEMENTS) {
    rv = aReportUencNoMapping ? NS_ERROR_UENC_NOMAPPING : NS_OK;
  }

  return rv;
}

nsresult nsMsgI18NConvertToUnicode(const char* aCharset,
                                   const nsCString& inString,
                                   nsAString& outString)
{
  if (inString.IsEmpty()) {
    outString.Truncate();
    return NS_OK;
  }
  else if (!*aCharset) {
    // Despite its name, it also works for Latin-1.
    CopyASCIItoUTF16(inString, outString);
    return NS_OK;
  }
  else if (!PL_strcasecmp(aCharset, "UTF-8")) {
    return UTF_8_ENCODING->DecodeWithBOMRemoval(inString, outString);
  }

  auto encoding = mozilla::Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
  if (!encoding)
    return NS_ERROR_UCONV_NOCONV;
  return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(inString,
                                                                 outString);
}

nsresult CopyUTF16toMUTF7(const nsString &aSrc, nsACString& aDest)
{
  #define IMAP_UTF7_BUF_LENGTH 100
  nsUnicodeToMUTF7 converter;
  static char buffer[IMAP_UTF7_BUF_LENGTH];
  char16_t *in = (char16_t *)aSrc.get();
  int32_t inLen = aSrc.Length();
  int32_t outLen;
  aDest.Truncate();
  while (inLen > 0) {
    outLen = IMAP_UTF7_BUF_LENGTH;
    int32_t remaining = inLen;
    converter.ConvertNoBuffNoErr(in, &remaining, buffer, &outLen);
    aDest.Append(buffer, outLen);
    in += remaining;
    inLen -= remaining;
  }
  outLen = IMAP_UTF7_BUF_LENGTH;
  converter.FinishNoBuff(buffer, &outLen);
  if (outLen > 0)
    aDest.Append(buffer, outLen);
  return NS_OK;
}

nsresult CopyMUTF7toUTF16(const nsCString& aSrc, nsAString& aDest)
{
  // UTF-7 encoding size cannot be larger than the size in UTF-16.
  nsMUTF7ToUnicode converter;
  int32_t inLen = aSrc.Length();
  int32_t outLen = inLen;
  aDest.SetCapacity(outLen);
  converter.ConvertNoBuff(aSrc.get(), &inLen, aDest.BeginWriting(), &outLen);
  MOZ_ASSERT(inLen == (int32_t)aSrc.Length(), "UTF-7 should not produce a longer output");
  aDest.SetLength(outLen);
  return NS_OK;
}

// Charset used by the file system.
const char * nsMsgI18NFileSystemCharset()
{
  /* Get a charset used for the file. */
  static nsAutoCString fileSystemCharset;

  if (fileSystemCharset.IsEmpty())
    mozilla::dom::FallbackEncoding::FromLocale()->Name(fileSystemCharset);

  return fileSystemCharset.get();
}

// Charset used by the text file.
void nsMsgI18NTextFileCharset(nsACString& aCharset)
{
  mozilla::dom::FallbackEncoding::FromLocale()->Name(aCharset);
}

// MIME encoder, output string should be freed by PR_FREE
// XXX : fix callers later to avoid allocation and copy
char * nsMsgI18NEncodeMimePartIIStr(const char *header, bool structured, const char *charset, int32_t fieldnamelen, bool usemime)
{
  // No MIME, convert to the outgoing mail charset.
  if (false == usemime) {
    nsAutoCString convertedStr;
    if (NS_SUCCEEDED(ConvertFromUnicode(charset, NS_ConvertUTF8toUTF16(header),
                                        convertedStr)))
      return PL_strdup(convertedStr.get());
    else
      return PL_strdup(header);
  }

  nsAutoCString encodedString;
  nsresult res;
  nsCOMPtr<nsIMimeConverter> converter = do_GetService(NS_MIME_CONVERTER_CONTRACTID, &res);
  if (NS_SUCCEEDED(res) && nullptr != converter)
    res = converter->EncodeMimePartIIStr_UTF8(nsDependentCString(header),
      structured, "UTF-8", fieldnamelen,
      nsIMimeConverter::MIME_ENCODED_WORD_SIZE, encodedString);

  return NS_SUCCEEDED(res) ? PL_strdup(encodedString.get()) : nullptr;
}

// Return True if a charset is stateful (e.g. JIS).
bool nsMsgI18Nstateful_charset(const char *charset)
{
  //TODO: use charset manager's service
  return (PL_strcasecmp(charset, "ISO-2022-JP") == 0);
}

bool nsMsgI18Nmultibyte_charset(const char *charset)
{
  nsresult res;
  nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
  bool result = false;

  if (NS_SUCCEEDED(res)) {
    nsAutoString charsetData;
    res = ccm->GetCharsetData(charset, u".isMultibyte", charsetData);
    if (NS_SUCCEEDED(res)) {
      result = charsetData.LowerCaseEqualsLiteral("true");
    }
  }

  return result;
}

bool nsMsgI18Ncheck_data_in_charset_range(const char *charset, const char16_t* inString)
{
  if (!charset || !*charset || !inString || !*inString)
    return true;

  bool res = true;

  auto encoding = mozilla::Encoding::ForLabelNoReplacement(nsDependentCString(charset));
  if (!encoding)
    return false;
  auto encoder = encoding->NewEncoder();

  uint8_t buffer[512];
  auto src = mozilla::MakeStringSpan(inString);
  auto dst = mozilla::MakeSpan(buffer);
  while (true) {
    uint32_t result;
    size_t read;
    size_t written;
    mozilla::Tie(result, read, written) =
      encoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
    if (result == mozilla::kInputEmpty) {
      // All converted successfully.
      break;
    } else if (result != mozilla::kOutputFull) {
      // Didn't use all the input but the outout isn't full, hence
      // there was an unencodable character.
      res = false;
      break;
    }
    src = src.From(read);
    // dst = dst.From(written); // Just overwrite output since we don't need it.
  }

  return res;
}

// Simple parser to parse META charset.
// It only supports the case when the description is within one line.
const char *
nsMsgI18NParseMetaCharset(nsIFile* file)
{
  static char charset[nsIMimeConverter::MAX_CHARSET_NAME_LENGTH+1];

  *charset = '\0';

  bool isDirectory = false;
  file->IsDirectory(&isDirectory);
  if (isDirectory) {
    NS_ERROR("file is a directory");
    return charset;
  }

  nsresult rv;
  nsCOMPtr <nsIFileInputStream> fileStream = do_CreateInstance(NS_LOCALFILEINPUTSTREAM_CONTRACTID, &rv);
  NS_ENSURE_SUCCESS(rv, charset);

  rv = fileStream->Init(file, PR_RDONLY, 0664, false);
  nsCOMPtr <nsILineInputStream> lineStream = do_QueryInterface(fileStream, &rv);

  nsCString curLine;
  bool more = true;
  while (NS_SUCCEEDED(rv) && more) {
    rv = lineStream->ReadLine(curLine, &more);
    if (curLine.IsEmpty())
      continue;

    ToUpperCase(curLine);

    if (curLine.Find("/HEAD") != -1)
      break;

    if (curLine.Find("META") != -1 &&
        curLine.Find("HTTP-EQUIV") != -1 &&
        curLine.Find("CONTENT-TYPE") != -1 &&
        curLine.Find("CHARSET") != -1) {
      char *cp = (char *) PL_strchr(PL_strstr(curLine.get(), "CHARSET"), '=');
      char *token = nullptr;
      if (cp)
      {
        char *newStr = cp + 1;
        token = NS_strtok(" \"\'", &newStr);
      }
      if (token) {
        PL_strncpy(charset, token, sizeof(charset));
        charset[sizeof(charset)-1] = '\0';

        // this function cannot parse a file if it is really
        // encoded by one of the following charsets
        // so we can say that the charset label must be incorrect for
        // the .html if we actually see those charsets parsed
        // and we should ignore them
        if (!PL_strncasecmp("UTF-16", charset, sizeof("UTF-16")-1) ||
            !PL_strncasecmp("UTF-32", charset, sizeof("UTF-32")-1))
          charset[0] = '\0';

        break;
      }
    }
  }

  return charset;
}

nsresult nsMsgI18NShrinkUTF8Str(const nsCString &inString,
                                uint32_t aMaxLength,
                                nsACString &outString)
{
  if (inString.IsEmpty()) {
    outString.Truncate();
    return NS_OK;
  }
  if (inString.Length() < aMaxLength) {
    outString.Assign(inString);
    return NS_OK;
  }
  NS_ASSERTION(MsgIsUTF8(inString), "Invalid UTF-8 string is inputted");
  const char* start = inString.get();
  const char* end = start + inString.Length();
  const char* last = start + aMaxLength;
  const char* cur = start;
  const char* prev = nullptr;
  bool err = false;
  while (cur < last) {
    prev = cur;
    if (!UTF8CharEnumerator::NextChar(&cur, end, &err) || err)
      break;
  }
  if (!prev || err) {
    outString.Truncate();
    return NS_OK;
  }
  uint32_t len = prev - start;
  outString.Assign(Substring(inString, 0, len));
  return NS_OK;
}

void nsMsgI18NConvertRawBytesToUTF16(const nsCString& inString,
                                     const char* charset,
                                     nsAString& outString)
{
  if (MsgIsUTF8(inString))
  {
    CopyUTF8toUTF16(inString, outString);
    return;
  }

  nsresult rv = ConvertToUnicode(charset, inString, outString);
  if (NS_SUCCEEDED(rv))
    return;

  const char* cur = inString.BeginReading();
  const char* end = inString.EndReading();
  outString.Truncate();
  while (cur < end) {
    char c = *cur++;
    if (c & char(0x80))
      outString.Append(UCS2_REPLACEMENT_CHAR);
    else
      outString.Append(c);
  }
}

void nsMsgI18NConvertRawBytesToUTF8(const nsCString& inString,
                                    const char* charset,
                                    nsACString& outString)
{
  if (MsgIsUTF8(inString))
  {
    outString.Assign(inString);
    return;
  }

  nsAutoString utf16Text;
  nsresult rv = ConvertToUnicode(charset, inString, utf16Text);
  if (NS_SUCCEEDED(rv))
  {
    CopyUTF16toUTF8(utf16Text, outString);
    return;
  }

  // EF BF BD (UTF-8 encoding of U+FFFD)
  NS_NAMED_LITERAL_CSTRING(utf8ReplacementChar, "\357\277\275");
  const char* cur = inString.BeginReading();
  const char* end = inString.EndReading();
  outString.Truncate();
  while (cur < end) {
    char c = *cur++;
    if (c & char(0x80))
      outString.Append(utf8ReplacementChar);
    else
      outString.Append(c);
  }
}