Bug 1542733 - Follow-up: Fix Shift_JIS test and add more charset detection tests. r=me
authorJorg K <jorgk@jorgk.com>
Tue, 16 Apr 2019 23:44:01 +0200
changeset 26364 ec9035d2cf50
parent 26363 f41e5068ea11
child 26365 8b5660dec102
push id15803
push usermozilla@jorgk.com
push dateTue, 16 Apr 2019 22:10:23 +0000
treeherdercomm-central@ec9035d2cf50 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersme
bugs1542733
Bug 1542733 - Follow-up: Fix Shift_JIS test and add more charset detection tests. r=me
mailnews/base/util/nsMsgUtils.cpp
mailnews/compose/test/unit/data/test-ISO-2022-JP.txt
mailnews/compose/test/unit/data/test-KOI8-R.txt
mailnews/compose/test/unit/data/test-windows-1252.txt
mailnews/compose/test/unit/test_detectAttachmentCharset.js
--- a/mailnews/base/util/nsMsgUtils.cpp
+++ b/mailnews/base/util/nsMsgUtils.cpp
@@ -1949,18 +1949,17 @@ static bool IsStreamUTF8(nsIInputStream*
 NS_MSG_BASE nsresult
 MsgDetectCharsetFromFile(nsIFile *aFile, nsACString &aCharset)
 {
   // We do the detection in this order:
   // Check BOM.
   // If no BOM, run localized detection (Russian, Ukranian or Japanese).
   // We need to run this first, since ISO-2022-JP is 7bit ASCII and would be detected as UTF-8.
   // If ISO-2022-JP not detected, check for UTF-8.
-  // If no UTF-8, but detector detected something, use that,
-  // otherwisefall back to a localization-specific value.
+  // If no UTF-8, but detector detected something, use that, otherwise return an error.
   aCharset.Truncate();
 
   nsresult rv;
   nsCOMPtr<nsIInputStream> inputStream;
   rv = NS_NewLocalFileInputStream(getter_AddRefs(inputStream), aFile);
   NS_ENSURE_SUCCESS(rv, rv);
 
   // Check the BOM.
@@ -2042,20 +2041,18 @@ MsgDetectCharsetFromFile(nsIFile *aFile,
     aCharset.AssignLiteral("UTF-8");
     return NS_OK;
   }
 
   // No UTF-8 detected, use previous detection result.
   if (!aCharset.IsEmpty())
     return NS_OK;
 
-  // Use file system charset. Note that this is not very good after bug 1381762,
-  // for example ISO-8859-2 (Latin-2) is returned instead of windows-1250.
-  aCharset = nsMsgI18NFileSystemCharset();
-  return NS_OK;
+  // Nothing found, leave it to the caller.
+  return NS_ERROR_FAILURE;
 }
 
 /*
  * Converts a buffer to plain text. Some conversions may
  * or may not work with certain end charsets which is why we
  * need that as an argument to the function. If charset is
  * unknown or deemed of no importance NULL could be passed.
  */
new file mode 100644
--- /dev/null
+++ b/mailnews/compose/test/unit/data/test-ISO-2022-JP.txt
@@ -0,0 +1,1 @@
+テストテストテストテストテストテストテストテストテストテストテストテスト 
new file mode 100644
--- /dev/null
+++ b/mailnews/compose/test/unit/data/test-KOI8-R.txt
@@ -0,0 +1,2 @@
+     , , , ,  
+ .
new file mode 100644
--- /dev/null
+++ b/mailnews/compose/test/unit/data/test-windows-1252.txt
@@ -0,0 +1,1 @@
+ - This is text in windows-1252.
--- a/mailnews/compose/test/unit/test_detectAttachmentCharset.js
+++ b/mailnews/compose/test/unit/test_detectAttachmentCharset.js
@@ -34,25 +34,47 @@ async function testUTF16BE() {
 }
 
 async function testUTF16LE() {
   await createMessage(do_get_file("data/test-UTF-16LE.txt"));
   checkAttachmentCharset("UTF-16LE");
 }
 
 async function testShiftJIS() {
+  Services.prefs.setStringPref("intl.charset.detector", "ja_parallel_state_machine");
   await createMessage(do_get_file("data/test-SHIFT_JIS.txt"));
-  checkAttachmentCharset(null); // do not detect SHIFT_JIS in this file anymore
+  checkAttachmentCharset("Shift_JIS");
+}
+
+async function testISO2022JP() {
+  Services.prefs.setStringPref("intl.charset.detector", "ja_parallel_state_machine");
+  await createMessage(do_get_file("data/test-ISO-2022-JP.txt"));
+  checkAttachmentCharset("ISO-2022-JP");
+}
+
+async function testKOI8R() {
+  Services.prefs.setStringPref("intl.charset.detector", "ruprob");
+  await createMessage(do_get_file("data/test-KOI8-R.txt"));
+  checkAttachmentCharset("KOI8-R");
+}
+
+async function testWindows1252() {
+  Services.prefs.clearUserPref("intl.charset.detector");
+  await createMessage(do_get_file("data/test-windows-1252.txt"));
+  checkAttachmentCharset(null);  // windows-1252 is not directly detected.
 }
 
 var tests = [
   testUTF8,
   testUTF16BE,
   testUTF16LE,
-  testShiftJIS
+  testShiftJIS,
+  testISO2022JP,
+  testKOI8R,
+  testWindows1252
 ]
 
 function run_test() {
   // Ensure we have at least one mail account
   localAccountUtils.loadLocalMailAccount();
   Services.prefs.setIntPref("mail.strictly_mime.parm_folding", 0);
 
   tests.forEach(x => add_task(x));