Bug 1734361 - fix non-utf8 newsgroups. r=freaktechnik
authorMagnus Melin <mkmelin+mozilla@iki.fi>
Sat, 23 Oct 2021 13:35:41 +0300
changeset 34124 d0af6cc5fe02dd6e5b27ec24322c02491d8d7990
parent 34123 09b6a3cf700dc3fccefedd4fcdfb024c202ede5f
child 34125 f5354584e9675e980cd3b2a9b4939d629f7bd7c1
push id19299
push usermkmelin@iki.fi
push dateSat, 23 Oct 2021 10:46:13 +0000
treeherdercomm-central@41f6353fe424 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfreaktechnik
bugs1734361
Bug 1734361 - fix non-utf8 newsgroups. r=freaktechnik * Restore erroneously removed 'text->inputAutodetect = true;'. * Use news server default charset. Based on https://github.com/Betterbird/thunderbird-patches/blob/main/91/bugs/1734361-fix-news-charset-issues.patch Differential Revision: https://phabricator.services.mozilla.com/D128582
mailnews/base/content/menulist-charsetpicker.js
mailnews/db/msgdb/src/nsMsgDatabase.cpp
mailnews/mime/src/mimetext.cpp
mailnews/mime/src/nsStreamConverter.cpp
mailnews/news/public/nsIMsgNewsFolder.idl
mailnews/news/public/nsINntpUrl.idl
mailnews/news/src/nsNewsFolder.cpp
mailnews/news/src/nsNntpUrl.cpp
mailnews/search/src/nsMsgFilter.cpp
mailnews/search/src/nsMsgSearchAdapter.cpp
--- a/mailnews/base/content/menulist-charsetpicker.js
+++ b/mailnews/base/content/menulist-charsetpicker.js
@@ -127,16 +127,17 @@ if (!customElements.get("menulist")) {
    */
   class MozMenulistCharsetpickerViewing extends MozMenulistCharsetpickerBase {
     get charsetValues() {
       return [
         "UTF-8",
         "Big5",
         "EUC-KR",
         "gbk",
+        "KOI8-R",
         "ISO-2022-JP",
         "ISO-8859-1",
         "ISO-8859-2",
         "ISO-8859-7",
         "windows-874",
         "windows-1250",
         "windows-1251",
         "windows-1252",
--- a/mailnews/db/msgdb/src/nsMsgDatabase.cpp
+++ b/mailnews/db/msgdb/src/nsMsgDatabase.cpp
@@ -5,16 +5,17 @@
 
 // this file implements the nsMsgDatabase interface using the MDB Interface.
 
 #include "nscore.h"
 #include "msgCore.h"
 #include "nsIFile.h"
 #include "nsMailDatabase.h"
 #include "nsDBFolderInfo.h"
+#include "nsIMsgNewsFolder.h"
 #include "nsMsgThread.h"
 #include "nsIMsgSearchTerm.h"
 #include "nsMsgBaseCID.h"
 #include "nsMorkCID.h"
 #include "nsIMdbFactoryFactory.h"
 #include "mozilla/Logging.h"
 #include "mozilla/Telemetry.h"
 #include "prprf.h"
@@ -2871,16 +2872,18 @@ nsIMimeConverter* nsMsgDatabase::GetMime
 nsresult nsMsgDatabase::GetEffectiveCharset(nsIMdbRow* row,
                                             nsACString& resultCharset) {
   resultCharset.Truncate();
   nsresult rv = RowCellColumnToCharPtr(row, m_messageCharSetColumnToken,
                                        getter_Copies(resultCharset));
   if (NS_FAILED(rv) || resultCharset.IsEmpty() ||
       resultCharset.EqualsLiteral("us-ascii")) {
     resultCharset.AssignLiteral("UTF-8");
+    nsCOMPtr<nsIMsgNewsFolder> newsfolder(do_QueryInterface(m_folder));
+    if (newsfolder) newsfolder->GetCharset(resultCharset);
   }
   return rv;
 }
 
 nsresult nsMsgDatabase::RowCellColumnToMime2DecodedString(
     nsIMdbRow* row, mdb_token columnToken, nsAString& resultStr) {
   nsresult err = NS_OK;
   const char* nakedString = nullptr;
--- a/mailnews/mime/src/mimetext.cpp
+++ b/mailnews/mime/src/mimetext.cpp
@@ -102,31 +102,23 @@ static int MimeInlineText_initializeChar
       // UTF-8 but omits the charset parameter.
       // When no charset is defined by the container (e.g. iMIP), iCalendar
       // files default to UTF-8 (RFC 2445, section 4.1.4).
       if (!text->charset && obj->content_type &&
           !PL_strcasecmp(obj->content_type, TEXT_CALENDAR))
         text->charset = strdup("UTF-8");
 
       if (!text->charset) {
-        nsresult res;
-
         text->charsetOverridable = true;
-
-        nsCOMPtr<nsIPrefBranch> prefBranch(
-            do_GetService(NS_PREFSERVICE_CONTRACTID, &res));
+        text->inputAutodetect = true;
 
         if (obj->options && obj->options->default_charset)
           text->charset = strdup(obj->options->default_charset);
-        else {
-          if (NS_SUCCEEDED(res)) {
-            text->charset = strdup("UTF-8");
-          } else
-            text->charset = strdup("");
-        }
+        else
+          text->charset = strdup("UTF-8");
       }
     }
   }
 
   if (text->inputAutodetect) {
     // We need to prepare lineDam for charset detection.
     text->lineDamBuffer = (char*)PR_Malloc(DAM_MAX_BUFFER_SIZE);
     text->lineDamPtrs = (char**)PR_Malloc(DAM_MAX_LINES * sizeof(char*));
--- a/mailnews/mime/src/nsStreamConverter.cpp
+++ b/mailnews/mime/src/nsStreamConverter.cpp
@@ -21,16 +21,17 @@
 #include "nsMimeStringResources.h"
 #include "nsIPrefService.h"
 #include "nsIPrefBranch.h"
 #include "nsNetUtil.h"
 #include "nsIMsgQuote.h"
 #include "nsNetUtil.h"
 #include "mozITXTToHTMLConv.h"
 #include "nsIMsgMailNewsUrl.h"
+#include "nsINntpUrl.h"
 #include "nsIMsgWindow.h"
 #include "nsICategoryManager.h"
 #include "nsMsgUtils.h"
 #include "mozilla/ArrayUtils.h"
 
 #define PREF_MAIL_DISPLAY_GLYPH "mail.display_glyph"
 #define PREF_MAIL_DISPLAY_STRUCT "mail.display_struct"
 
@@ -113,17 +114,30 @@ nsresult bridge_new_new_uri(void* bridge
           // check to see if we have a charset override...and if we do, set that
           // field appropriately too...
           nsresult rv = i18nUrl->GetOverRideCharset(&overrideCharset);
           if (NS_SUCCEEDED(rv) && overrideCharset) {
             *override_charset = true;
             *default_charset = nullptr;
           } else {
             *override_charset = false;
-            *default_charset = strdup("UTF-8");
+            // Special treatment for news: URLs. Get the server default charset.
+            nsCOMPtr<nsINntpUrl> nntpURL(do_QueryInterface(aURI));
+            if (nntpURL) {
+              nsCString charset;
+              rv = nntpURL->GetCharset(charset);
+              if (NS_SUCCEEDED(rv)) {
+                *default_charset = ToNewCString(charset);
+                *override_charset = true;
+              } else {
+                *default_charset = strdup("UTF-8");
+              }
+            } else {
+              *default_charset = strdup("UTF-8");
+            }
           }
 
           // if there is no manual override and a folder charset exists
           // then check if we have a folder level override
           if (!(*override_charset) && *default_charset && **default_charset) {
             // notify the default to msgWindow (for the menu check mark)
             // do not set the default in case of nsMimeMessageDraftOrTemplate
             // or nsMimeMessageEditorTemplate because it is already set
--- a/mailnews/news/public/nsIMsgNewsFolder.idl
+++ b/mailnews/news/public/nsIMsgNewsFolder.idl
@@ -91,16 +91,19 @@ interface nsIMsgNewsFolder : nsISupports
   /// @}
 
   void moveFolder(in nsIMsgFolder aNewsgroupToMove, in nsIMsgFolder aRefNewsgroup, in int32_t aOrientation);
 
   nsIMsgFolder addNewsgroup(in AUTF8String newsgroupName, in ACString setStr);
 
   void setReadSetFromStr(in ACString setStr);
 
+  /// returns the server's default charset.
+  readonly attribute ACString charset;
+
   readonly attribute ACString newsrcLine;
   readonly attribute ACString optionLines;
   readonly attribute ACString unsubscribedNewsgroupLines;
   void SetNewsrcHasChanged(in boolean newsrcHasChanged);
   void updateSummaryFromNNTPInfo(in long oldest, in long youngest, in long total);
   void removeMessage(in nsMsgKey key);
   [noscript] void removeMessages(in nsMsgKeyArrayRef aMsgKeys);
   void cancelComplete();
--- a/mailnews/news/public/nsINntpUrl.idl
+++ b/mailnews/news/public/nsINntpUrl.idl
@@ -63,16 +63,19 @@ interface nsINntpUrl : nsISupports {
   readonly attribute ACString group;
 
   /// The message ID portion of the URI, if one is present
   readonly attribute ACString messageID;
 
   /// The message key portion of the URI or nsMsgKey_None if not present
   readonly attribute nsMsgKey key;
 
+  /// returns the server's default charset.
+  readonly attribute ACString charset;
+
   /// The action of this news URI could not be determined
   const nsNewsAction ActionUnknown = 0;
   /// Fetch the contents of an article
   const nsNewsAction ActionFetchArticle = 1;
   /// Fetch the part of an article (requires ?part=)
   const nsNewsAction ActionFetchPart = 2;
   /// Save the contents of an article to disk
   const nsNewsAction ActionSaveMessageToDisk = 3;
--- a/mailnews/news/src/nsNewsFolder.cpp
+++ b/mailnews/news/src/nsNewsFolder.cpp
@@ -1243,16 +1243,25 @@ NS_IMETHODIMP nsMsgNewsFolder::MoveFolde
 
   return rv;
 }
 
 nsresult nsMsgNewsFolder::CreateBaseMessageURI(const nsACString& aURI) {
   return nsCreateNewsBaseMessageURI(nsCString(aURI).get(), mBaseMessageURI);
 }
 
+NS_IMETHODIMP nsMsgNewsFolder::GetCharset(nsACString& charset) {
+  nsCOMPtr<nsIMsgIncomingServer> server;
+  nsresult rv = GetServer(getter_AddRefs(server));
+  NS_ENSURE_SUCCESS(rv, rv);
+  nsCOMPtr<nsINntpIncomingServer> nserver(do_QueryInterface(server));
+  NS_ENSURE_TRUE(nserver, NS_ERROR_NULL_POINTER);
+  return nserver->GetCharset(charset);
+}
+
 NS_IMETHODIMP
 nsMsgNewsFolder::GetNewsrcLine(nsACString& newsrcLine) {
   nsresult rv;
   nsString newsgroupNameUtf16;
   rv = GetName(newsgroupNameUtf16);
   if (NS_FAILED(rv)) return rv;
   NS_ConvertUTF16toUTF8 newsgroupName(newsgroupNameUtf16);
 
--- a/mailnews/news/src/nsNntpUrl.cpp
+++ b/mailnews/news/src/nsNntpUrl.cpp
@@ -257,16 +257,25 @@ NS_IMETHODIMP nsNntpUrl::GetMessageID(ns
 }
 
 NS_IMETHODIMP nsNntpUrl::GetKey(nsMsgKey* key) {
   NS_ENSURE_ARG_POINTER(key);
   *key = m_key;
   return NS_OK;
 }
 
+NS_IMETHODIMP nsNntpUrl::GetCharset(nsACString& charset) {
+  nsCOMPtr<nsIMsgIncomingServer> server;
+  nsresult rv = GetServer(getter_AddRefs(server));
+  NS_ENSURE_SUCCESS(rv, rv);
+  nsCOMPtr<nsINntpIncomingServer> nserver(do_QueryInterface(server));
+  NS_ENSURE_TRUE(nserver, NS_ERROR_NULL_POINTER);
+  return nserver->GetCharset(charset);
+}
+
 NS_IMETHODIMP nsNntpUrl::GetNormalizedSpec(nsACString& aPrincipalSpec) {
   return NS_ERROR_NOT_IMPLEMENTED;
 }
 
 NS_IMETHODIMP nsNntpUrl::SetUri(const nsACString& aURI) {
   mURI = aURI;
   return NS_OK;
 }
--- a/mailnews/search/src/nsMsgFilter.cpp
+++ b/mailnews/search/src/nsMsgFilter.cpp
@@ -18,16 +18,17 @@
 #include "nsMsgSearchValue.h"
 #include "nsMsgI18N.h"
 #include "nsNativeCharsetUtils.h"
 #include "nsIOutputStream.h"
 #include "nsIStringBundle.h"
 #include "nsComponentManagerUtils.h"
 #include "nsServiceManagerUtils.h"
 #include "nsIMsgFilterService.h"
+#include "nsIMsgNewsFolder.h"
 #include "prmem.h"
 #include "mozilla/ArrayUtils.h"
 #include "mozilla/Services.h"
 
 static const char* kImapPrefix = "//imap:";
 static const char* kWhitespace = "\b\t\r\n ";
 
 nsMsgRuleAction::nsMsgRuleAction()
@@ -574,21 +575,22 @@ NS_IMETHODIMP nsMsgFilter::LogRuleHitFai
 }
 
 NS_IMETHODIMP
 nsMsgFilter::MatchHdr(nsIMsgDBHdr* msgHdr, nsIMsgFolder* folder,
                       nsIMsgDatabase* db, const nsACString& headers,
                       bool* pResult) {
   NS_ENSURE_ARG_POINTER(folder);
   NS_ENSURE_ARG_POINTER(msgHdr);
-  // use offlineMail because
-  nsresult rv = nsMsgSearchOfflineMail::MatchTermsForFilter(
-      msgHdr, m_termList, "UTF-8", m_scope, db, headers, &m_expressionTree,
-      pResult);
-  return rv;
+  nsCString folderCharset = "UTF-8"_ns;
+  nsCOMPtr<nsIMsgNewsFolder> newsfolder(do_QueryInterface(folder));
+  if (newsfolder) newsfolder->GetCharset(folderCharset);
+  return nsMsgSearchOfflineMail::MatchTermsForFilter(
+      msgHdr, m_termList, folderCharset.get(), m_scope, db, headers,
+      &m_expressionTree, pResult);
 }
 
 NS_IMETHODIMP
 nsMsgFilter::SetFilterList(nsIMsgFilterList* filterList) {
   // doesn't hold a ref.
   m_filterList = filterList;
   return NS_OK;
 }
--- a/mailnews/search/src/nsMsgSearchAdapter.cpp
+++ b/mailnews/search/src/nsMsgSearchAdapter.cpp
@@ -20,16 +20,17 @@
 #include "mozilla/UniquePtr.h"
 #include "prmem.h"
 #include "MailNewsTypes.h"
 #include "nsComponentManagerUtils.h"
 #include "nsServiceManagerUtils.h"
 #include "nsMemory.h"
 #include "nsMsgMessageFlags.h"
 #include "mozilla/Attributes.h"
+#include "nsIMsgNewsFolder.h"
 
 // This stuff lives in the base class because the IMAP search syntax
 // is used by the Dredd SEARCH command as well as IMAP itself
 
 // km - the NOT and HEADER strings are not encoded with a trailing
 //      <space> because they always precede a mnemonic that has a
 //      preceding <space> and double <space> characters cause some
 //    imap servers to return an error.
@@ -196,16 +197,30 @@ nsresult nsMsgSearchAdapter::GetSearchCh
   nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID, &rv));
   if (NS_SUCCEEDED(rv)) {
     prefs->GetBoolPref("mailnews.force_ascii_search", &forceAsciiSearch);
   }
 
   srcCharset = m_defaultCharset;
   dstCharset.Assign(srcCharset);
 
+  if (m_scope) {
+    nsCOMPtr<nsIMsgFolder> folder;
+    rv = m_scope->GetFolder(getter_AddRefs(folder));
+    if (NS_SUCCEEDED(rv) && folder) {
+      nsCOMPtr<nsIMsgNewsFolder> newsfolder(do_QueryInterface(folder));
+      if (newsfolder) {
+        nsCString folderCharset;
+        rv = newsfolder->GetCharset(folderCharset);
+        if (NS_SUCCEEDED(rv))
+          dstCharset.Assign(NS_ConvertASCIItoUTF16(folderCharset));
+      }
+    }
+  }
+
   if (forceAsciiSearch) {
     // Special cases to use in order to force US-ASCII searching with Latin1
     // or MacRoman text. Eurgh. This only has to happen because IMAP
     // and Dredd servers currently (4/23/97) only support US-ASCII.
     //
     // If the dest csid is ISO Latin 1 or MacRoman, attempt to convert the
     // source text to US-ASCII. (Not for now.)
     // if ((dst_csid == CS_LATIN1) || (dst_csid == CS_MAC_ROMAN))