Bug 1210302 - Part 3: Add a NS_ParseRequestContentType API; r=mcmanus,sicking ba=jorge,sylvestre
authorEhsan Akhgari <ehsan@mozilla.com>
Thu, 01 Oct 2015 14:36:19 -0400
changeset 289564 c1cd30406136
parent 289563 71966c03aef5
child 289565 48340283c00e
push id5189
push usereakhgari@mozilla.com
push date2015-10-15 19:05 +0000
treeherdermozilla-beta@c1cd30406136 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmcmanus, sicking
bugs1210302
milestone42.0
Bug 1210302 - Part 3: Add a NS_ParseRequestContentType API; r=mcmanus,sicking ba=jorge,sylvestre
docshell/base/nsDocShell.cpp
dom/base/Navigator.cpp
dom/base/nsContentUtils.cpp
dom/html/nsHTMLDocument.cpp
dom/manifest/ImageObjectProcessor.jsm
netwerk/base/nsINetUtil.idl
netwerk/base/nsIOService.cpp
netwerk/base/nsNetUtil.cpp
netwerk/base/nsNetUtil.h
netwerk/base/nsURLHelper.cpp
netwerk/base/nsURLHelper.h
--- a/docshell/base/nsDocShell.cpp
+++ b/docshell/base/nsDocShell.cpp
@@ -13428,17 +13428,17 @@ nsDocShell::OnLinkClickSync(nsIContent* 
 
   // If this is an anchor element, grab its type property to use as a hint
   nsAutoString typeHint;
   nsCOMPtr<nsIDOMHTMLAnchorElement> anchor(do_QueryInterface(aContent));
   if (anchor) {
     anchor->GetType(typeHint);
     NS_ConvertUTF16toUTF8 utf8Hint(typeHint);
     nsAutoCString type, dummy;
-    NS_ParseContentType(utf8Hint, type, dummy);
+    NS_ParseRequestContentType(utf8Hint, type, dummy);
     CopyUTF8toUTF16(type, typeHint);
   }
 
   // Clone the URI now, in case a content policy or something messes
   // with it under InternalLoad; we do _not_ want to change the URI
   // our caller passed in.
   nsCOMPtr<nsIURI> clonedURI;
   aURI->Clone(getter_AddRefs(clonedURI));
--- a/dom/base/Navigator.cpp
+++ b/dom/base/Navigator.cpp
@@ -1299,19 +1299,19 @@ Navigator::SendBeacon(const nsAString& a
 
   nsCOMPtr<nsINetworkInterceptController> interceptController = do_QueryInterface(docShell);
   cors->SetInterceptController(interceptController);
 
   // Start a preflight if cross-origin and content type is not whitelisted
   rv = secMan->CheckSameOriginURI(documentURI, uri, false);
   bool crossOrigin = NS_FAILED(rv);
   nsAutoCString contentType, parsedCharset;
-  rv = NS_ParseContentType(mimeType, contentType, parsedCharset);
+  rv = NS_ParseRequestContentType(mimeType, contentType, parsedCharset);
   if (crossOrigin &&
-      contentType.Length() > 0 &&
+      mimeType.Length() > 0 &&
       !contentType.Equals(APPLICATION_WWW_FORM_URLENCODED) &&
       !contentType.Equals(MULTIPART_FORM_DATA) &&
       !contentType.Equals(TEXT_PLAIN)) {
 
     // we need to set the sameOriginChecker as a notificationCallback
     // so we can tell the channel not to follow redirects
     nsCOMPtr<nsIInterfaceRequestor> soc = nsContentUtils::SameOriginChecker();
     channel->SetNotificationCallbacks(soc);
--- a/dom/base/nsContentUtils.cpp
+++ b/dom/base/nsContentUtils.cpp
@@ -7136,17 +7136,17 @@ nsContentUtils::IsForbiddenResponseHeade
 
 // static
 bool
 nsContentUtils::IsAllowedNonCorsContentType(const nsACString& aHeaderValue)
 {
   nsAutoCString contentType;
   nsAutoCString unused;
 
-  nsresult rv = NS_ParseContentType(aHeaderValue, contentType, unused);
+  nsresult rv = NS_ParseRequestContentType(aHeaderValue, contentType, unused);
   if (NS_FAILED(rv)) {
     return false;
   }
 
   return contentType.LowerCaseEqualsLiteral("text/plain") ||
          contentType.LowerCaseEqualsLiteral("application/x-www-form-urlencoded") ||
          contentType.LowerCaseEqualsLiteral("multipart/form-data");
 }
--- a/dom/html/nsHTMLDocument.cpp
+++ b/dom/html/nsHTMLDocument.cpp
@@ -1407,17 +1407,17 @@ nsHTMLDocument::Open(JSContext* cx,
   }
 
   nsAutoCString contentType;
   contentType.AssignLiteral("text/html");
 
   nsAutoString type;
   nsContentUtils::ASCIIToLower(aType, type);
   nsAutoCString actualType, dummy;
-  NS_ParseContentType(NS_ConvertUTF16toUTF8(type), actualType, dummy);
+  NS_ParseRequestContentType(NS_ConvertUTF16toUTF8(type), actualType, dummy);
   if (!actualType.EqualsLiteral("text/html") &&
       !type.EqualsLiteral("replace")) {
     contentType.AssignLiteral("text/plain");
   }
 
   // If we already have a parser we ignore the document.open call.
   if (mParser || mParserAborted) {
     // The WHATWG spec says: "If the document has an active parser that isn't
--- a/dom/manifest/ImageObjectProcessor.jsm
+++ b/dom/manifest/ImageObjectProcessor.jsm
@@ -87,17 +87,17 @@ ImageObjectProcessor.prototype.process =
       objectName: 'image',
       object: aImage,
       property: 'type',
       expectedType: 'string',
       trim: true
     };
     let value = extractor.extractValue(spec);
     if (value) {
-      value = netutil.parseContentType(value, charset, hadCharset);
+      value = netutil.parseRequestContentType(value, charset, hadCharset);
     }
     return value || undefined;
   }
 
   function processDensityMember(aImage) {
     const value = parseFloat(aImage.density);
     const validNum = Number.isNaN(value) || value === +Infinity || value <=
       0;
--- a/netwerk/base/nsINetUtil.idl
+++ b/netwerk/base/nsINetUtil.idl
@@ -6,22 +6,36 @@
 #include "nsISupports.idl"
 
 interface nsIURI;
 interface nsIPrefBranch;
 
 /**
  * nsINetUtil provides various network-related utility methods.
  */
-[scriptable, uuid(ff0b3233-7ec5-4bf4-830f-6b2edaa53661)]
+[scriptable, uuid(fe2625ec-b884-4df1-b39c-9e830e47aa94)]
 interface nsINetUtil : nsISupports
 {
   /**
-   * Parse a content-type header and return the content type and
-   * charset (if any).
+   * Parse a content-type request header and return the content type
+   * and charset (if any).
+   *
+   * @param aTypeHeader the header string to parse
+   * @param [out] aCharset the charset parameter specified in the
+   *              header, if any.
+   * @param [out] aHadCharset whether a charset was explicitly specified.
+   * @return the MIME type specified in the header, in lower-case.
+   */
+  AUTF8String parseRequestContentType(in AUTF8String aTypeHeader,
+                                      out AUTF8String aCharset,
+                                      out boolean aHadCharset);
+
+  /**
+   * Parse a content-type response header and return the content type
+   * and charset (if any).
    *
    * @param aTypeHeader the header string to parse
    * @param [out] aCharset the charset parameter specified in the
    *              header, if any.
    * @param [out] aHadCharset whether a charset was explicitly specified.
    * @return the MIME type specified in the header, in lower-case.
    */
   AUTF8String parseResponseContentType(in AUTF8String aTypeHeader,
--- a/netwerk/base/nsIOService.cpp
+++ b/netwerk/base/nsIOService.cpp
@@ -1489,16 +1489,27 @@ nsIOService::Observe(nsISupports *subjec
 #endif
     }
 
     return NS_OK;
 }
 
 // nsINetUtil interface
 NS_IMETHODIMP
+nsIOService::ParseRequestContentType(const nsACString &aTypeHeader,
+                                     nsACString &aCharset,
+                                     bool *aHadCharset,
+                                     nsACString &aContentType)
+{
+    net_ParseRequestContentType(aTypeHeader, aContentType, aCharset, aHadCharset);
+    return NS_OK;
+}
+
+// nsINetUtil interface
+NS_IMETHODIMP
 nsIOService::ParseResponseContentType(const nsACString &aTypeHeader,
                                       nsACString &aCharset,
                                       bool *aHadCharset,
                                       nsACString &aContentType)
 {
     net_ParseContentType(aTypeHeader, aContentType, aCharset, aHadCharset);
     return NS_OK;
 }
--- a/netwerk/base/nsNetUtil.cpp
+++ b/netwerk/base/nsNetUtil.cpp
@@ -909,16 +909,34 @@ NS_GetReferrerFromChannel(nsIChannel *ch
         if (NS_FAILED(rv))
           *referrer = nullptr;
       }
     }
     return rv;
 }
 
 nsresult
+NS_ParseRequestContentType(const nsACString &rawContentType,
+                           nsCString        &contentType,
+                           nsCString        &contentCharset)
+{
+    // contentCharset is left untouched if not present in rawContentType
+    nsresult rv;
+    nsCOMPtr<nsINetUtil> util = do_GetNetUtil(&rv);
+    NS_ENSURE_SUCCESS(rv, rv);
+    nsCString charset;
+    bool hadCharset;
+    rv = util->ParseRequestContentType(rawContentType, charset, &hadCharset,
+                                       contentType);
+    if (NS_SUCCEEDED(rv) && hadCharset)
+        contentCharset = charset;
+    return rv;
+}
+
+nsresult
 NS_ParseResponseContentType(const nsACString &rawContentType,
                             nsCString        &contentType,
                             nsCString        &contentCharset)
 {
     // contentCharset is left untouched if not present in rawContentType
     nsresult rv;
     nsCOMPtr<nsINetUtil> util = do_GetNetUtil(&rv);
     NS_ENSURE_SUCCESS(rv, rv);
--- a/netwerk/base/nsNetUtil.h
+++ b/netwerk/base/nsNetUtil.h
@@ -484,16 +484,20 @@ nsresult NS_GetURLSpecFromDir(nsIFile   
  * referrer from the property docshell.internalReferrer, and if that doesn't
  * work and the channel is an nsIHTTPChannel, we check it's referrer property.
  *
  * @returns NS_ERROR_NOT_AVAILABLE if no referrer is available.
  */
 nsresult NS_GetReferrerFromChannel(nsIChannel *channel,
                                    nsIURI **referrer);
 
+nsresult NS_ParseRequestContentType(const nsACString &rawContentType,
+                                    nsCString        &contentType,
+                                    nsCString        &contentCharset);
+
 nsresult NS_ParseResponseContentType(const nsACString &rawContentType,
                                      nsCString        &contentType,
                                      nsCString        &contentCharset);
 
 nsresult NS_ExtractCharsetFromContentType(const nsACString &rawContentType,
                                           nsCString        &contentCharset,
                                           bool             *hadCharset,
                                           int32_t          *charsetStart,
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@@ -822,33 +822,36 @@ net_FindMediaDelimiter(const nsCString& 
 // function sets them.
 static void
 net_ParseMediaType(const nsACString &aMediaTypeStr,
                    nsACString       &aContentType,
                    nsACString       &aContentCharset,
                    int32_t          aOffset,
                    bool             *aHadCharset,
                    int32_t          *aCharsetStart,
-                   int32_t          *aCharsetEnd)
+                   int32_t          *aCharsetEnd,
+                   bool             aStrict)
 {
     const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
     const char* start = flatStr.get();
     const char* end = start + flatStr.Length();
 
     // Trim LWS leading and trailing whitespace from type.  We include '(' in
     // the trailing trim set to catch media-type comments, which are not at all
     // standard, but may occur in rare cases.
     const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
     const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
 
     const char* charset = "";
     const char* charsetEnd = charset;
     int32_t charsetParamStart = 0;
     int32_t charsetParamEnd = 0;
 
+    uint32_t consumed = typeEnd - type;
+
     // Iterate over parameters
     bool typeHasCharset = false;
     uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
     if (paramStart != uint32_t(kNotFound)) {
         // We have parameters.  Iterate over them.
         uint32_t curParamStart = paramStart + 1;
         do {
             uint32_t curParamEnd =
@@ -862,16 +865,17 @@ net_ParseMediaType(const nsACString &aMe
                                sizeof(charsetStr) - 1) == 0) {
                 charset = paramName + sizeof(charsetStr) - 1;
                 charsetEnd = start + curParamEnd;
                 typeHasCharset = true;
                 charsetParamStart = curParamStart - 1;
                 charsetParamEnd = curParamEnd;
             }
 
+            consumed = curParamEnd;
             curParamStart = curParamEnd + 1;
         } while (curParamStart < flatStr.Length());
     }
 
     bool charsetNeedsQuotedStringUnescaping = false;
     if (typeHasCharset) {
         // Trim LWS leading and trailing whitespace from charset.  We include
         // '(' in the trailing trim set to catch media-type comments, which are
@@ -891,18 +895,20 @@ net_ParseMediaType(const nsACString &aMe
     // if the server sent "*/*", it is meaningless, so do not store it.
     // also, if type is the same as aContentType, then just update the
     // charset.  however, if charset is empty and aContentType hasn't
     // changed, then don't wipe-out an existing aContentCharset.  We
     // also want to reject a mime-type if it does not include a slash.
     // some servers give junk after the charset parameter, which may
     // include a comma, so this check makes us a bit more tolerant.
 
-    if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&
-        memchr(type, '/', typeEnd - type) != nullptr) {
+    if (type != typeEnd &&
+        memchr(type, '/', typeEnd - type) != nullptr &&
+        (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end) :
+                   (strncmp(type, "*/*", typeEnd - type) != 0))) {
         // Common case here is that aContentType is empty
         bool eq = !aContentType.IsEmpty() &&
             aContentType.Equals(Substring(type, typeEnd),
                                 nsCaseInsensitiveCStringComparator());
         if (!eq) {
             aContentType.Assign(type, typeEnd - type);
             ToLowerCase(aContentType);
         }
@@ -999,23 +1005,69 @@ net_ParseContentType(const nsACString &a
         uint32_t curTypeEnd =
             net_FindMediaDelimiter(flatStr, curTypeStart, ',');
         
         // At this point curTypeEnd points to the spot where the media-type
         // starting at curTypeEnd ends.  Time to parse that!
         net_ParseMediaType(Substring(flatStr, curTypeStart,
                                      curTypeEnd - curTypeStart),
                            aContentType, aContentCharset, curTypeStart,
-                           aHadCharset, aCharsetStart, aCharsetEnd);
+                           aHadCharset, aCharsetStart, aCharsetEnd, false);
 
         // And let's move on to the next media-type
         curTypeStart = curTypeEnd + 1;
     } while (curTypeStart < flatStr.Length());
 }
 
+void
+net_ParseRequestContentType(const nsACString &aHeaderStr,
+                            nsACString       &aContentType,
+                            nsACString       &aContentCharset,
+                            bool             *aHadCharset)
+{
+    //
+    // Augmented BNF (from RFC 7231 section 3.1.1.1):
+    //
+    //   media-type   = type "/" subtype *( OWS ";" OWS parameter )
+    //   type         = token
+    //   subtype      = token
+    //   parameter    = token "=" ( token / quoted-string )
+    //
+    // Examples:
+    //
+    //   text/html
+    //   text/html; charset=ISO-8859-1
+    //   text/html; charset="ISO-8859-1"
+    //   application/octet-stream
+    //
+
+    aContentType.Truncate();
+    aContentCharset.Truncate();
+    *aHadCharset = false;
+    const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
+
+    // At this point curTypeEnd points to the spot where the media-type
+    // starting at curTypeEnd ends.  Time to parse that!
+    nsAutoCString contentType, contentCharset;
+    bool hadCharset = false;
+    int32_t dummy1, dummy2;
+    uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');
+    if (typeEnd != flatStr.Length()) {
+        // We have some stuff left at the end, so this is not a valid
+        // request Content-Type header.
+        return;
+    }
+    net_ParseMediaType(flatStr, contentType, contentCharset, 0,
+                       &hadCharset, &dummy1, &dummy2, true);
+
+    aContentType = contentType;
+    aContentCharset = contentCharset;
+    *aHadCharset = hadCharset;
+}
+
 bool
 net_IsValidHostName(const nsCSubstring &host)
 {
     const char *end = host.EndReading();
     // Use explicit whitelists to select which characters we are
     // willing to send to lower-level DNS logic. This is more
     // self-documenting, and can also be slightly faster than the
     // blacklist approach, since DNS names are the common case, and
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@@ -167,36 +167,52 @@ char * net_RFindCharNotInSet(const char 
 
 /**
  * Parses a content-type header and returns the content type and
  * charset (if any).  aCharset is not modified if no charset is
  * specified in anywhere in aHeaderStr.  In that case (no charset
  * specified), aHadCharset is set to false.  Otherwise, it's set to
  * true.  Note that aContentCharset can be empty even if aHadCharset
  * is true.
+ *
+ * This parsing is suitable for HTTP request.  Use net_ParseContentType
+ * for parsing this header in HTTP responses.
+ */
+void net_ParseRequestContentType(const nsACString &aHeaderStr,
+                                 nsACString       &aContentType,
+                                 nsACString       &aContentCharset,
+                                 bool*          aHadCharset);
+
+/**
+ * Parses a content-type header and returns the content type and
+ * charset (if any).  aCharset is not modified if no charset is
+ * specified in anywhere in aHeaderStr.  In that case (no charset
+ * specified), aHadCharset is set to false.  Otherwise, it's set to
+ * true.  Note that aContentCharset can be empty even if aHadCharset
+ * is true.
  */
 void net_ParseContentType(const nsACString &aHeaderStr,
-                                      nsACString       &aContentType,
-                                      nsACString       &aContentCharset,
-                                      bool*          aHadCharset);
+                          nsACString       &aContentType,
+                          nsACString       &aContentCharset,
+                          bool*          aHadCharset);
 /**
  * As above, but also returns the start and end indexes for the charset
  * parameter in aHeaderStr.  These are indices for the entire parameter, NOT
  * just the value.  If there is "effectively" no charset parameter (e.g. if an
  * earlier type with one is overridden by a later type without one),
  * *aHadCharset will be true but *aCharsetStart will be set to -1.  Note that
  * it's possible to have aContentCharset empty and *aHadCharset true when
  * *aCharsetStart is nonnegative; this corresponds to charset="".
  */
 void net_ParseContentType(const nsACString &aHeaderStr,
-                                      nsACString       &aContentType,
-                                      nsACString       &aContentCharset,
-                                      bool             *aHadCharset,
-                                      int32_t          *aCharsetStart,
-                                      int32_t          *aCharsetEnd);
+                          nsACString       &aContentType,
+                          nsACString       &aContentCharset,
+                          bool             *aHadCharset,
+                          int32_t          *aCharsetStart,
+                          int32_t          *aCharsetEnd);
 
 /* inline versions */
 
 /* remember the 64-bit platforms ;-) */
 #define NET_MAX_ADDRESS (((char*)0)-1)
 
 inline char *net_FindCharInSet(const char *str, const char *set)
 {