Bug 1489949 - Port bug 1488659: Replace XPCOM use of nsICharsetDetector, part 2, take 2. r=hsivonen
authorJorg K <jorgk@jorgk.com>
Wed, 12 Sep 2018 09:08:20 +0200
changeset 33134 4d4e022e66970521a0962db37b3d07071638a854
parent 33133 9dc7f65220a4a9b888d916d40d08cedbf673b416
child 33135 21c798a31bdf4833368f791db5fddfb35c8997be
push id387
push userclokep@gmail.com
push dateMon, 10 Dec 2018 21:30:47 +0000
reviewershsivonen
bugs1489949, 1488659
Bug 1489949 - Port bug 1488659: Replace XPCOM use of nsICharsetDetector, part 2, take 2. r=hsivonen
mailnews/base/util/nsMsgUtils.cpp
mailnews/mime/src/comi18n.cpp
mailnews/mime/src/comi18n.h
mailnews/mime/src/mimetext.cpp
mailnews/mime/src/moz.build
--- a/mailnews/base/util/nsMsgUtils.cpp
+++ b/mailnews/base/util/nsMsgUtils.cpp
@@ -1876,20 +1876,20 @@ MsgStreamMsgHeaders(nsIInputStream *aInp
 
 class CharsetDetectionObserver : public nsICharsetDetectionObserver
 {
 public:
   NS_DECL_ISUPPORTS
   CharsetDetectionObserver() {};
   NS_IMETHOD Notify(const char* aCharset, nsDetectionConfident aConf) override
   {
-    mCharset = aCharset;
+    mCharset.AssignASCII(aCharset);
     return NS_OK;
   };
-  const char *GetDetectedCharset() { return mCharset.get(); }
+  void GetDetectedCharset(nsACString& aCharset) { aCharset = mCharset; }
 
 private:
   virtual ~CharsetDetectionObserver() {}
   nsCString mCharset;
 };
 
 NS_IMPL_ISUPPORTS(CharsetDetectionObserver, nsICharsetDetectionObserver)
 
@@ -1934,17 +1934,17 @@ MsgDetectCharsetFromFile(nsIFile *aFile,
       detector->DoIt(buffer.get(), buffer.Length(), &dontFeed);
       NS_ENSURE_SUCCESS(rv, rv);
       if (dontFeed)
         break;
     }
     rv = detector->Done();
     NS_ENSURE_SUCCESS(rv, rv);
 
-    aCharset = observer->GetDetectedCharset();
+    observer->GetDetectedCharset(aCharset);
   } else {
     // no charset detector available, check the BOM
     char sniffBuf[3];
     uint32_t numRead;
     rv = inputStream->Read(sniffBuf, sizeof(sniffBuf), &numRead);
 
     if (numRead >= 2 &&
                sniffBuf[0] == (char)0xfe &&
--- a/mailnews/mime/src/comi18n.cpp
+++ b/mailnews/mime/src/comi18n.cpp
@@ -1,21 +1,27 @@
 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "comi18n.h"
+#include "nsICharsetDetector.h"
 #include "nsIStringCharsetDetector.h"
+#include "nsCyrillicDetector.h"
+#include "nsUniversalDetector.h"
+#include "nsUdetXPCOMWrapper.h"
 #include "nsMsgUtils.h"
 #include "nsServiceManagerUtils.h"
 #include "nsComponentManagerUtils.h"
 #include "nsMsgMimeCID.h"
 #include "nsIMimeConverter.h"
+#include "mozilla/Preferences.h"
 
+using namespace mozilla;
 
 ////////////////////////////////////////////////////////////////////////////////
 // BEGIN PUBLIC INTERFACE
 extern "C" {
 
 
 void MIME_DecodeMimeHeader(const char *header, const char *default_charset,
                            bool override_charset, bool eatContinuations,
@@ -30,36 +36,77 @@ void MIME_DecodeMimeHeader(const char *h
   }
   mimeConverter->DecodeMimeHeaderToUTF8(nsDependentCString(header),
                                         default_charset, override_charset,
                                         eatContinuations, result);
 }
 
 // UTF-8 utility functions.
 //detect charset soly based on aBuf. return in aCharset
-nsresult
-MIME_detect_charset(const char *aBuf, int32_t aLength, const char** aCharset)
+class CharsetDetectionObserver : public nsICharsetDetectionObserver
 {
-  nsresult res = NS_ERROR_UNEXPECTED;
-  nsString detector_name;
-  *aCharset = nullptr;
+public:
+  NS_DECL_ISUPPORTS
+  CharsetDetectionObserver() {};
+  NS_IMETHOD Notify(const char* aCharset, nsDetectionConfident aConf) override
+  {
+    mCharset.AssignASCII(aCharset);
+    mConf = aConf;
+    return NS_OK;
+  };
+  void GetDetectedCharset(nsACString& aCharset) { aCharset = mCharset; }
+  nsDetectionConfident GetDetectionConfident() { return mConf; }
 
-  NS_GetLocalizedUnicharPreferenceWithDefault(nullptr, "intl.charset.detector", EmptyString(), detector_name);
+private:
+  virtual ~CharsetDetectionObserver() {}
+  nsCString mCharset;
+  nsDetectionConfident mConf;
+};
+
+nsresult
+MIME_detect_charset(const char *aBuf, int32_t aLength, nsACString& aCharset)
+{
+  nsresult rv = NS_ERROR_UNEXPECTED;
+  nsCOMPtr<nsICharsetDetector> detector;
+  nsAutoCString detectorName;
+  Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
 
-  if (!detector_name.IsEmpty()) {
-    nsAutoCString detector_contractid;
-    detector_contractid.AssignLiteral(NS_STRCDETECTOR_CONTRACTID_BASE);
-    detector_contractid.Append(NS_ConvertUTF16toUTF8(detector_name));
-    nsCOMPtr<nsIStringCharsetDetector> detector = do_CreateInstance(detector_contractid.get(), &res);
-    if (NS_SUCCEEDED(res)) {
-      nsDetectionConfident oConfident;
-      res = detector->DoIt(aBuf, aLength, aCharset, oConfident);
-      if (NS_SUCCEEDED(res) && (eBestAnswer == oConfident || eSureAnswer == oConfident)) {
+  if (!detectorName.IsEmpty()) {
+    // We recognize one of the three magic strings for the following languages.
+    if (detectorName.EqualsLiteral("ruprob")) {
+      detector = new nsRUProbDetector();
+    } else if (detectorName.EqualsLiteral("ukprob")) {
+      detector = new nsUKProbDetector();
+    } else if (detectorName.EqualsLiteral("ja_parallel_state_machine")) {
+      detector = new nsJAPSMDetector();
+    }
+  }
+
+  if (detector) {
+    nsAutoCString buffer;
+
+    RefPtr<CharsetDetectionObserver> observer = new CharsetDetectionObserver();
+
+    rv = detector->Init(observer);
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    nsDetectionConfident oConfident;
+    bool dontFeed = false;
+    rv = detector->DoIt(aBuf, aLength, &dontFeed);
+    if (NS_SUCCEEDED(rv)) {
+      rv = detector->Done();
+      NS_ENSURE_SUCCESS(rv, rv);
+      oConfident = observer->GetDetectionConfident();
+      if (oConfident == eBestAnswer || oConfident == eSureAnswer) {
+        observer->GetDetectedCharset(aCharset);
         return NS_OK;
+      } else {
+        // No luck after all.
+        rv = NS_ERROR_UNEXPECTED;
       }
     }
   }
-  return res;
+  return rv;
 }
 
 } /* end of extern "C" */
 // END PUBLIC INTERFACE
 
--- a/mailnews/mime/src/comi18n.h
+++ b/mailnews/mime/src/comi18n.h
@@ -22,16 +22,16 @@ extern "C" {
  * @param override_charset    [IN] If true, default_charset used instead of any charset labeling other than UTF-8
  * @param eatContinuations    [IN] If true, unfold headers
  * @param result      [OUT] Decoded buffer
  */
 void MIME_DecodeMimeHeader(const char *header, const char *default_charset,
                            bool override_charset, bool eatContinuations,
                            nsACString &result);
 
-nsresult MIME_detect_charset(const char *aBuf, int32_t aLength, const char** aCharset);
+nsresult MIME_detect_charset(const char *aBuf, int32_t aLength, nsACString& aCharset);
 
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
 
 #endif // _COMI18N_LOADED_H_
 
--- a/mailnews/mime/src/mimetext.cpp
+++ b/mailnews/mime/src/mimetext.cpp
@@ -378,38 +378,38 @@ MimeInlineText_convert_and_parse_line(ch
 }
 
 //In this function call, all buffered lines in lineDam will be sent to charset detector
 // and a charset will be used to parse all those line and following lines in this mime obj.
 static int
 MimeInlineText_open_dam(char *line, int32_t length, MimeObject *obj)
 {
   MimeInlineText *text = (MimeInlineText *) obj;
-  const char* detectedCharset = nullptr;
+  nsAutoCString detectedCharset;
   nsresult res = NS_OK;
   int status = 0;
   int32_t i;
 
   if (text->curDamOffset <= 0) {
     //there is nothing in dam, use current line for detection
     if (length > 0) {
-      res = MIME_detect_charset(line, length, &detectedCharset);
+      res = MIME_detect_charset(line, length, detectedCharset);
     }
   } else {
     //we have stuff in dam, use the one
-    res = MIME_detect_charset(text->lineDamBuffer, text->curDamOffset, &detectedCharset);
+    res = MIME_detect_charset(text->lineDamBuffer, text->curDamOffset, detectedCharset);
   }
 
   //set the charset for this obj
-  if (NS_SUCCEEDED(res) && detectedCharset && *detectedCharset)  {
+  if (NS_SUCCEEDED(res) && !detectedCharset.IsEmpty()) {
     PR_FREEIF(text->charset);
-    text->charset = strdup(detectedCharset);
+    text->charset = ToNewCString(detectedCharset);
 
     //update MsgWindow charset if we are instructed to do so
-    if (text->needUpdateMsgWinCharset && *text->charset)
+    if (text->needUpdateMsgWinCharset && text->charset)
       SetMailCharacterSetToMsgWindow(obj, text->charset);
   }
 
   //process dam and line using the charset
   if (text->curDamOffset) {
     for (i = 0; i < text->lastLineInDam-1; i++)
     {
       status = MimeInlineText_convert_and_parse_line(
--- a/mailnews/mime/src/moz.build
+++ b/mailnews/mime/src/moz.build
@@ -66,19 +66,22 @@ SOURCES += [
     'nsCMS.cpp',
     'nsCMSSecureMessage.cpp',
     'nsMimeObjectClassAccess.cpp',
     'nsSimpleMimeConverterStub.cpp',
     'nsStreamConverter.cpp',
 ]
 
 LOCAL_INCLUDES += [
-     '/%s/security/certverifier' % CONFIG['mozreltopsrcdir'],
-     '/%s/security/manager/ssl' % CONFIG['mozreltopsrcdir'],
-     '/%s/security/pkix/include' % CONFIG['mozreltopsrcdir'],
+    '/%s/extensions/universalchardet/src/base' % CONFIG['mozreltopsrcdir'],
+    '/%s/extensions/universalchardet/src/xpcom' % CONFIG['mozreltopsrcdir'],
+    '/%s/intl/chardet' % CONFIG['mozreltopsrcdir'],
+    '/%s/security/certverifier' % CONFIG['mozreltopsrcdir'],
+    '/%s/security/manager/ssl' % CONFIG['mozreltopsrcdir'],
+    '/%s/security/pkix/include' % CONFIG['mozreltopsrcdir'],
 ]
 
 EXTRA_COMPONENTS += [
     'mimeJSComponents.js',
     'msgMime.manifest',
 ]
 
 EXTRA_JS_MODULES += [