Bug 562915 - Escape characters in content-type charset param not parsed properly. r=jduell
authorJulian Reschke <julian.reschke@gmx.de>
Sat, 03 Dec 2011 15:59:55 +0100
changeset 81974 abe0e4c2c4037181a519a3859a793cd00f756490
parent 81973 8d316e7fd4b08c476da7f37dabe0656e2be9bbd5
child 81975 109d10e4093779028f9ea1f7aa769d41ae8a37b9
push idunknown
push userunknown
push dateunknown
reviewersjduell
bugs562915
milestone11.0a1
Bug 562915 - Escape characters in content-type charset param not parsed properly. r=jduell
netwerk/base/src/nsURLHelper.cpp
netwerk/test/unit/test_parse_content_type.js
--- a/netwerk/base/src/nsURLHelper.cpp
+++ b/netwerk/base/src/nsURLHelper.cpp
@@ -883,22 +883,24 @@ net_ParseMediaType(const nsACString &aMe
                 charsetParamStart = curParamStart - 1;
                 charsetParamEnd = curParamEnd;
             }
 
             curParamStart = curParamEnd + 1;
         } while (curParamStart < flatStr.Length());
     }
 
+    bool charsetNeedsQuotedStringUnescaping = false;
     if (typeHasCharset) {
         // Trim LWS leading and trailing whitespace from charset.  We include
         // '(' in the trailing trim set to catch media-type comments, which are
         // not at all standard, but may occur in rare cases.
         charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
         if (*charset == '"') {
+            charsetNeedsQuotedStringUnescaping = true;
             charsetEnd =
                 start + net_FindStringEnd(flatStr, charset - start, *charset);
             charset++;
             NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
         } else {
             charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
         }
     }
@@ -919,17 +921,31 @@ net_ParseMediaType(const nsACString &aMe
                                 nsCaseInsensitiveCStringComparator());
         if (!eq) {
             aContentType.Assign(type, typeEnd - type);
             ToLowerCase(aContentType);
         }
 
         if ((!eq && *aHadCharset) || typeHasCharset) {
             *aHadCharset = true;
-            aContentCharset.Assign(charset, charsetEnd - charset);
+            if (charsetNeedsQuotedStringUnescaping) {
+                // parameters using the "quoted-string" syntax need
+                // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
+                aContentCharset.Truncate();
+                for (const char *c = charset; c != charsetEnd; c++) {
+                    if (*c == '\\' && c + 1 != charsetEnd) {
+                        // eat escape
+                        c++;  
+                    }
+                    aContentCharset.Append(*c);
+                }
+            }
+            else {
+                aContentCharset.Assign(charset, charsetEnd - charset);
+            }
             if (typeHasCharset) {
                 *aCharsetStart = charsetParamStart + aOffset;
                 *aCharsetEnd = charsetParamEnd + aOffset;
             }
         }
         // Only set a new charset position if this is a different type
         // from the last one we had and it doesn't already have a
         // charset param.  If this is the same type, we probably want
--- a/netwerk/test/unit/test_parse_content_type.js
+++ b/netwerk/test/unit/test_parse_content_type.js
@@ -130,16 +130,20 @@ function run_test() {
   check("text/plain", "", false);
 
   type = netutil.parseContentType('text/plain charset=UTF8', charset, hadCharset);
   check("text/plain", "", false);
 
   type = netutil.parseContentType('text/plain, TEXT/HTML; param="charset=UTF8"; ; param2="charset=UTF16", text/html, TEXT/HTML', charset, hadCharset);
   check("text/html", "", false);
 
+  // Bug 562915 - correctness: "\x" is "x"
+  type = netutil.parseContentType('text/plain; charset="UTF\\-8"', charset, hadCharset);
+  check("text/plain", "UTF-8", true);
+
   // Bug 700589
 
   // check that single quote doesn't confuse parsing of subsequent parameters
   type = netutil.parseContentType("text/plain; x='; charset=\"UTF-8\"", charset, hadCharset);
   check("text/plain", "UTF-8", true);
 
   // check that single quotes do not get removed from extracted charset
   type = netutil.parseContentType("text/plain; charset='UTF-8'", charset, hadCharset);