Bug 844461 - Perform alias resolution on the fallback encoding pref value. r=smontagu, a=lsblakk.
authorHenri Sivonen <hsivonen@iki.fi>
Thu, 28 Feb 2013 16:25:19 +0200
changeset 128421 c68679b48ed50b8cdb12d4fac344759f7148f67f
parent 128416 de200581d19bd59517d91b499fe0c4f412a916ed
child 128422 32e6fabc2667e962023960a70f6514612625b065
push id297
push userlsblakk@mozilla.com
push dateTue, 26 Mar 2013 17:28:00 +0000
treeherdermozilla-release@64d7b45c34e6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu, lsblakk
bugs844461
milestone20.0
Bug 844461 - Perform alias resolution on the fallback encoding pref value. r=smontagu, a=lsblakk.
content/base/test/test_bug431701.html
content/html/document/src/nsHTMLDocument.cpp
content/html/document/src/nsHTMLDocument.h
dom/encoding/EncodingUtils.cpp
dom/encoding/EncodingUtils.h
extensions/universalchardet/tests/CharsetDetectionTests.js
extensions/universalchardet/tests/test_bug638318.html
extensions/universalchardet/tests/test_bug811363-1-1.html
extensions/universalchardet/tests/test_bug811363-1-5.html
layout/base/nsDocumentViewer.cpp
--- a/content/base/test/test_bug431701.html
+++ b/content/base/test/test_bug431701.html
@@ -57,17 +57,17 @@ function xhrDoc(idx) {
     return xhr.responseXML;
   };
 }
 
 // Each row has the document getter function, then the characterSet,
 // inputEncoding expected for that document.
 
 var tests = [
- [ frameDoc("one"), "ISO-8859-1", "ISO-8859-1" ],
+ [ frameDoc("one"), "windows-1252", "windows-1252" ],
  [ frameDoc("two"), "UTF-8", "UTF-8" ],
  [ frameDoc("three"), "windows-1252", "windows-1252" ],
  [ frameDoc("four"), "UTF-8", "UTF-8" ],
  [ frameDoc("five"), "UTF-8", "UTF-8" ],
  [ frameDoc("six"), "UTF-8", "UTF-8" ],
  [ frameDoc("seven"), "windows-1252", "windows-1252" ],
  [ createDoc, "UTF-8", null ],
  [ xhrDoc(4), "UTF-8", "UTF-8" ],
--- a/content/html/document/src/nsHTMLDocument.cpp
+++ b/content/html/document/src/nsHTMLDocument.cpp
@@ -78,16 +78,17 @@
 #include "nsIMutableArray.h"
 #include "nsArrayUtils.h"
 #include "nsIEffectiveTLDService.h"
 
 #include "nsIPrompt.h"
 //AHMED 12-2
 #include "nsBidiUtils.h"
 
+#include "mozilla/dom/EncodingUtils.h"
 #include "nsIEditingSession.h"
 #include "nsIEditor.h"
 #include "nsNodeInfoManager.h"
 #include "nsIPlaintextEditor.h"
 #include "nsIHTMLEditor.h"
 #include "nsIEditorDocShell.h"
 #include "nsIEditorStyleSheets.h"
 #include "nsIInlineSpellChecker.h"
@@ -324,17 +325,17 @@ nsHTMLDocument::TryHintCharset(nsIMarkup
     if(NS_SUCCEEDED(rv) && kCharsetUninitialized != requestCharsetSource) {
       nsAutoCString requestCharset;
       rv = aMarkupDV->GetHintCharacterSet(requestCharset);
       aMarkupDV->SetHintCharacterSetSource((int32_t)(kCharsetUninitialized));
 
       if(requestCharsetSource <= aCharsetSource)
         return;
 
-      if(NS_SUCCEEDED(rv) && IsAsciiCompatible(requestCharset)) {
+      if(NS_SUCCEEDED(rv) && EncodingUtils::EncodingUtils::IsAsciiCompatible(requestCharset)) {
         aCharsetSource = requestCharsetSource;
         aCharset = requestCharset;
 
         return;
       }
     }
   }
   return;
@@ -352,17 +353,17 @@ nsHTMLDocument::TryUserForcedCharset(nsI
   if(kCharsetFromUserForced <= aCharsetSource)
     return true;
 
   nsAutoCString forceCharsetFromDocShell;
   if (aMarkupDV) {
     rv = aMarkupDV->GetForceCharacterSet(forceCharsetFromDocShell);
   }
 
-  // Not making the IsAsciiCompatible() check here to allow the user to
+  // Not making the EncodingUtils::IsAsciiCompatible() check here to allow the user to
   // force UTF-16 from the menu.
   if(NS_SUCCEEDED(rv) && !forceCharsetFromDocShell.IsEmpty()) {
     aCharset = forceCharsetFromDocShell;
     //TODO: we should define appropriate constant for force charset
     aCharsetSource = kCharsetFromUserForced;
   } else if (aDocShell) {
     nsCOMPtr<nsIAtom> csAtom;
     aDocShell->GetForcedCharset(getter_AddRefs(csAtom));
@@ -385,22 +386,22 @@ nsHTMLDocument::TryCacheCharset(nsICachi
   nsresult rv;
 
   if (kCharsetFromCache <= aCharsetSource) {
     return true;
   }
 
   nsCString cachedCharset;
   rv = aCachingChannel->GetCacheTokenCachedCharset(cachedCharset);
-  // Check IsAsciiCompatible() even in the cache case, because the value
+  // Check EncodingUtils::IsAsciiCompatible() even in the cache case, because the value
   // might be stale and in the case of a stale charset that is not a rough
   // ASCII superset, the parser has no way to recover.
   if (NS_SUCCEEDED(rv) &&
       !cachedCharset.IsEmpty() &&
-      IsAsciiCompatible(cachedCharset))
+      EncodingUtils::IsAsciiCompatible(cachedCharset))
   {
     aCharset = cachedCharset;
     aCharsetSource = kCharsetFromCache;
 
     return true;
   }
 
   return false;
@@ -414,26 +415,16 @@ CheckSameOrigin(nsINode* aNode1, nsINode
 
   bool equal;
   return
     NS_SUCCEEDED(aNode1->NodePrincipal()->
                    Equals(aNode2->NodePrincipal(), &equal)) &&
     equal;
 }
 
-bool
-nsHTMLDocument::IsAsciiCompatible(const nsACString& aPreferredName)
-{
-  return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") ||
-           aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
-           aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
-           aPreferredName.LowerCaseEqualsLiteral("utf-7") ||
-           aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7"));
-}
-
 void
 nsHTMLDocument::TryParentCharset(nsIDocShell*  aDocShell,
                                  nsIDocument* aParentDocument,
                                  int32_t& aCharsetSource,
                                  nsACString& aCharset)
 {
   if (!aDocShell) {
     return;
@@ -449,28 +440,28 @@ nsHTMLDocument::TryParentCharset(nsIDocS
   aDocShell->GetParentCharsetSource(&parentSource);
   csAtom->ToUTF8String(parentCharset);
   if (kCharsetFromParentForced <= parentSource) {
     source = kCharsetFromParentForced;
   } else if (kCharsetFromHintPrevDoc == parentSource) {
     // Make sure that's OK
     if (!aParentDocument ||
         !CheckSameOrigin(this, aParentDocument) ||
-        !IsAsciiCompatible(parentCharset)) {
+        !EncodingUtils::IsAsciiCompatible(parentCharset)) {
       return;
     }
 
     // if parent is posted doc, set this prevent autodetections
     // I'm not sure this makes much sense... but whatever.
     source = kCharsetFromHintPrevDoc;
   } else if (kCharsetFromCache <= parentSource) {
     // Make sure that's OK
     if (!aParentDocument ||
         !CheckSameOrigin(this, aParentDocument) ||
-        !IsAsciiCompatible(parentCharset)) {
+        !EncodingUtils::IsAsciiCompatible(parentCharset)) {
       return;
     }
 
     source = kCharsetFromParentFrame;
   } else {
     return;
   }
 
@@ -489,20 +480,22 @@ nsHTMLDocument::UseWeakDocTypeDefault(in
   if (kCharsetFromWeakDocTypeDefault <= aCharsetSource)
     return;
 
   const nsAdoptingCString& defCharset =
     Preferences::GetLocalizedCString("intl.charset.default");
 
   // Don't let the user break things by setting intl.charset.default to
   // not a rough ASCII superset
-  if (!defCharset.IsEmpty() && IsAsciiCompatible(defCharset)) {
-    aCharset = defCharset;
+  nsAutoCString canonical;
+  if (EncodingUtils::FindEncodingForLabel(defCharset, canonical) &&
+      EncodingUtils::IsAsciiCompatible(canonical)) {
+    aCharset = canonical;
   } else {
-    aCharset.AssignLiteral("ISO-8859-1");
+    aCharset.AssignLiteral("windows-1252");
   }
   aCharsetSource = kCharsetFromWeakDocTypeDefault;
   return;
 }
 
 bool
 nsHTMLDocument::TryDefaultCharset( nsIMarkupDocumentViewer* aMarkupDV,
                                    int32_t& aCharsetSource,
@@ -510,17 +503,17 @@ nsHTMLDocument::TryDefaultCharset( nsIMa
 {
   if(kCharsetFromUserDefault <= aCharsetSource)
     return true;
 
   nsAutoCString defaultCharsetFromDocShell;
   if (aMarkupDV) {
     nsresult rv =
       aMarkupDV->GetDefaultCharacterSet(defaultCharsetFromDocShell);
-    // Not making the IsAsciiCompatible() check here to allow the user to
+    // Not making the EncodingUtils::IsAsciiCompatible() check here to allow the user to
     // force UTF-16 from the menu.
     if(NS_SUCCEEDED(rv)) {
       aCharset = defaultCharsetFromDocShell;
 
       aCharsetSource = kCharsetFromUserDefault;
       return true;
     }
   }
--- a/content/html/document/src/nsHTMLDocument.h
+++ b/content/html/document/src/nsHTMLDocument.h
@@ -288,18 +288,16 @@ protected:
   nsRefPtr<nsContentList> mForms;
   nsRefPtr<nsContentList> mFormControls;
 
   /** # of forms in the document, synchronously set */
   int32_t mNumForms;
 
   static uint32_t gWyciwygSessionCnt;
 
-  static bool IsAsciiCompatible(const nsACString& aPreferredName);
-
   static void TryHintCharset(nsIMarkupDocumentViewer* aMarkupDV,
                                int32_t& aCharsetSource,
                                nsACString& aCharset);
   static bool TryUserForcedCharset(nsIMarkupDocumentViewer* aMarkupDV,
                                      nsIDocShell*  aDocShell,
                                      int32_t& aCharsetSource,
                                      nsACString& aCharset);
   static bool TryCacheCharset(nsICachingChannel* aCachingChannel,
--- a/dom/encoding/EncodingUtils.cpp
+++ b/dom/encoding/EncodingUtils.cpp
@@ -63,10 +63,20 @@ EncodingUtils::FindEncodingForLabel(cons
     return false;
   }
 
   ToLowerCase(label);
   return NS_SUCCEEDED(nsUConvPropertySearch::SearchPropertyValue(
       labelsEncodings, ArrayLength(labelsEncodings), label, aOutEncoding));
 }
 
+bool
+EncodingUtils::IsAsciiCompatible(const nsACString& aPreferredName)
+{
+  return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") ||
+           aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
+           aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
+           aPreferredName.LowerCaseEqualsLiteral("utf-7") ||
+           aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7"));
+}
+
 } // namespace dom
 } // namespace mozilla
--- a/dom/encoding/EncodingUtils.h
+++ b/dom/encoding/EncodingUtils.h
@@ -65,16 +65,25 @@ public:
    * @param      aString, string to be trimmed.
    */
   template<class T>
   static void TrimSpaceCharacters(T& aString)
   {
     aString.Trim(" \t\n\f\r");
   }
 
+  /**
+   * Check is the encoding is ASCII-compatible in the sense that Basic Latin
+   * encodes to ASCII bytes. (The reverse may not be true!)
+   *
+   * @param aPreferredName a preferred encoding label
+   * @return whether the encoding is ASCII-compatible
+   */
+  static bool IsAsciiCompatible(const nsACString& aPreferredName);
+
 private:
   EncodingUtils() MOZ_DELETE;
 };
 
 } // dom
 } // mozilla
 
 #endif // mozilla_dom_encodingutils_h_
--- a/extensions/universalchardet/tests/CharsetDetectionTests.js
+++ b/extensions/universalchardet/tests/CharsetDetectionTests.js
@@ -45,18 +45,21 @@ function InitDetectorTests()
     $("testframe").onload = DoDetectionTest;
 
     if (gExpectedCharset == "default") {
         try {
             gExpectedCharset = prefService
                 .getComplexValue("intl.charset.default",
                                  Ci.nsIPrefLocalizedString)
                 .data;
+            if (gExpectedCharset == "ISO-8859-1") {
+                gExpectedCharset = "windows-1252";
+            }
         } catch (e) {
-            gExpectedCharset = "ISO-8859-8";
+            gExpectedCharset = "windows-1252";
         }
     }
 
     // Get the local directory. This needs to be a file: URI because chrome:
     // URIs are always UTF-8 (bug 617339) and we are testing decoding from other
     // charsets.
     var jar = getJar(getRootDirectory(window.location.href));
     var dir = jar ?
--- a/extensions/universalchardet/tests/test_bug638318.html
+++ b/extensions/universalchardet/tests/test_bug638318.html
@@ -18,14 +18,14 @@ https://bugzilla.mozilla.org/show_bug.cg
 <div id="content" style="display: none">  
 </div>
 <iframe id="testframe"></iframe>
 <pre id="test">
 <script class="testbody" type="text/javascript">
 /** Test for Bug 638318 **/
 /* Note! This test uses the chardet test harness but doesn't test chardet! */
 CharsetDetectionTests("bug638318_text.html",
-		      "ISO-8859-1",
+		      "windows-1252",
 		      new Array(""));
 </script>
 </pre>
 </body>
 </html>
--- a/extensions/universalchardet/tests/test_bug811363-1-1.html
+++ b/extensions/universalchardet/tests/test_bug811363-1-1.html
@@ -17,17 +17,17 @@ https://bugzilla.mozilla.org/show_bug.cg
 <p id="display"></p>
 <div id="content" style="display: none">
 </div>
 <iframe id="testframe"></iframe>
 <pre id="test">
 <script class="testbody" type="text/javascript">
 /** Test for Bug 811363 **/
 CharsetDetectionTests("bug811363-invalid-1.text",
-		      "ISO-8859-1",
+		      "windows-1252",
 		      new Array("ja_parallel_state_machine",
 				"zh_parallel_state_machine",
 				"zhtw_parallel_state_machine",
 				"zhcn_parallel_state_machine",
 				"cjk_parallel_state_machine",
 				"universal_charset_detector"));
 </script>
 </pre>
--- a/extensions/universalchardet/tests/test_bug811363-1-5.html
+++ b/extensions/universalchardet/tests/test_bug811363-1-5.html
@@ -17,17 +17,17 @@ https://bugzilla.mozilla.org/show_bug.cg
 <p id="display"></p>
 <div id="content" style="display: none">
 </div>
 <iframe id="testframe"></iframe>
 <pre id="test">
 <script class="testbody" type="text/javascript">
 /** Test for Bug 811363 **/
 CharsetDetectionTests("bug811363-invalid-5.text",
-		      "ISO-8859-1",
+		      "windows-1252",
 		      new Array("ja_parallel_state_machine",
 				"zh_parallel_state_machine",
 				"zhtw_parallel_state_machine",
 				"zhcn_parallel_state_machine",
 				"cjk_parallel_state_machine",
 				"universal_charset_detector"));
 </script>
 </pre>
--- a/layout/base/nsDocumentViewer.cpp
+++ b/layout/base/nsDocumentViewer.cpp
@@ -34,16 +34,17 @@
 #include "nsIDOMHTMLCollection.h"
 #include "nsIDOMHTMLElement.h"
 #include "nsIDOMRange.h"
 #include "nsContentCID.h"
 #include "nsLayoutCID.h"
 #include "nsContentUtils.h"
 #include "nsLayoutStylesheetCache.h"
 #include "mozilla/Preferences.h"
+#include "mozilla/dom/EncodingUtils.h"
 
 #include "nsIDeviceContextSpec.h"
 #include "nsViewManager.h"
 #include "nsView.h"
 
 #include "nsIPageSequenceFrame.h"
 #include "nsIURL.h"
 #include "nsNetUtil.h"
@@ -152,16 +153,17 @@ static const char sPrintOptionsContractI
 
 #include "nsObserverService.h"
 
 #include "mozilla/dom/Element.h"
 
 #include "jsfriendapi.h"
 
 using namespace mozilla;
+using namespace mozilla::dom;
 
 #ifdef DEBUG
 
 #undef NOISY_VIEWER
 #else
 #undef NOISY_VIEWER
 #endif
 
@@ -2974,20 +2976,24 @@ nsDocumentViewer::GetAuthorStyleDisabled
 NS_IMETHODIMP
 nsDocumentViewer::GetDefaultCharacterSet(nsACString& aDefaultCharacterSet)
 {
   if (mDefaultCharacterSet.IsEmpty())
   {
     const nsAdoptingCString& defCharset =
       Preferences::GetLocalizedCString("intl.charset.default");
 
-    if (!defCharset.IsEmpty()) {
-      mDefaultCharacterSet = defCharset;
+    // Don't let the user break things by setting intl.charset.default to
+    // not a rough ASCII superset
+    nsAutoCString canonical;
+    if (EncodingUtils::FindEncodingForLabel(defCharset, canonical) &&
+        EncodingUtils::IsAsciiCompatible(canonical)) {
+      mDefaultCharacterSet = canonical;
     } else {
-      mDefaultCharacterSet.AssignLiteral("ISO-8859-1");
+      mDefaultCharacterSet.AssignLiteral("windows-1252");
     }
   }
   aDefaultCharacterSet = mDefaultCharacterSet;
   return NS_OK;
 }
 
 static void
 SetChildDefaultCharacterSet(nsIMarkupDocumentViewer* aChild, void* aClosure)