Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab
authorValentin Gosu <valentin.gosu@gmail.com>
Fri, 05 Feb 2016 14:45:08 +0100
changeset 283187 77f8588dad8c0349f88df7798a2e612ada487ffe
parent 283186 5342641913be9b90f5309a42ae226a1d340f30c6
child 283188 d485e217eda45ac86358e9927e252b636da172f8
push id71449
push uservalentin.gosu@gmail.com
push dateFri, 05 Feb 2016 13:45:54 +0000
treeherdermozilla-inbound@77f8588dad8c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewershonzab
bugs241788
milestone47.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab * net_ExtractURLScheme now uses mozilla::Tokenizer * net_FilterURIString also filters characters in the scheme now * removed startPos and endPos parameters for net_FilterURIString and introduced net_IsAbsoluteURL
netwerk/base/nsIOService.cpp
netwerk/base/nsStandardURL.cpp
netwerk/base/nsURLHelper.cpp
netwerk/base/nsURLHelper.h
netwerk/protocol/http/Http2Stream.cpp
netwerk/protocol/res/SubstitutingProtocolHandler.cpp
netwerk/streamconv/converters/nsIndexedToHTML.cpp
netwerk/test/unit/test_standardurl.js
--- a/netwerk/base/nsIOService.cpp
+++ b/netwerk/base/nsIOService.cpp
@@ -559,17 +559,17 @@ nsIOService::GetProtocolHandler(const ch
         return NS_ERROR_UNKNOWN_PROTOCOL;
 
     return rv;
 }
 
 NS_IMETHODIMP
 nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme)
 {
-    return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme);
+    return net_ExtractURLScheme(inURI, scheme);
 }
 
 NS_IMETHODIMP 
 nsIOService::GetProtocolFlags(const char* scheme, uint32_t *flags)
 {
     nsCOMPtr<nsIProtocolHandler> handler;
     nsresult rv = GetProtocolHandler(scheme, getter_AddRefs(handler));
     if (NS_FAILED(rv)) return rv;
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@@ -2862,30 +2862,18 @@ nsStandardURL::Init(uint32_t urlType,
             IsUTFCharset(mOriginCharset.get())) {
             mOriginCharset.Truncate();
         }
     }
     else if (!IsUTFCharset(charset)) {
         mOriginCharset = charset;
     }
 
-    if (baseURI) {
-        uint32_t start, end;
-        // pull out the scheme and where it ends
-        nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr);
-        if (NS_SUCCEEDED(rv) && spec.Length() > end+2) {
-            nsACString::const_iterator slash;
-            spec.BeginReading(slash);
-            slash.advance(end+1);
-            // then check if // follows
-            // if it follows, aSpec is really absolute ... 
-            // ignore aBaseURI in this case
-            if (*slash == '/' && *(++slash) == '/')
-                baseURI = nullptr;
-        }
+    if (baseURI && net_IsAbsoluteURL(spec)) {
+        baseURI = nullptr;
     }
 
     if (!baseURI)
         return SetSpec(spec);
 
     nsAutoCString buf;
     nsresult rv = baseURI->Resolve(spec, buf);
     if (NS_FAILED(rv)) return rv;
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@@ -9,16 +9,17 @@
 #include "nsURLHelper.h"
 #include "nsIFile.h"
 #include "nsIURLParser.h"
 #include "nsCOMPtr.h"
 #include "nsCRT.h"
 #include "nsNetCID.h"
 #include "mozilla/Preferences.h"
 #include "prnetdb.h"
+#include "mozilla/Tokenizer.h"
 
 using namespace mozilla;
 
 //----------------------------------------------------------------------------
 // Init/Shutdown
 //----------------------------------------------------------------------------
 
 static bool gInitialized = false;
@@ -174,22 +175,22 @@ net_ParseFileURL(const nsACString &inURL
     }
 
     outDirectory.Truncate();
     outFileBaseName.Truncate();
     outFileExtension.Truncate();
 
     const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
     const char *url = flatURL.get();
-    
-    uint32_t schemeBeg, schemeEnd;
-    rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
+
+    nsAutoCString scheme;
+    rv = net_ExtractURLScheme(flatURL, scheme);
     if (NS_FAILED(rv)) return rv;
 
-    if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
+    if (!scheme.EqualsLiteral("file")) {
         NS_ERROR("must be a file:// url");
         return NS_ERROR_UNEXPECTED;
     }
 
     nsIURLParser *parser = net_GetNoAuthURLParser();
     NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
 
     uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
@@ -478,67 +479,65 @@ net_ResolveRelativePath(const nsACString
     result = path;
     return NS_OK;
 }
 
 //----------------------------------------------------------------------------
 // scheme fu
 //----------------------------------------------------------------------------
 
+#if !defined(MOZILLA_XPCOMRT_API)
+static bool isAsciiAlpha(char c) {
+    return nsCRT::IsAsciiAlpha(c);
+}
+
+static bool
+net_IsValidSchemeChar(const char aChar)
+{
+    if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
+        aChar == '+' || aChar == '.' || aChar == '-') {
+        return true;
+    }
+    return false;
+}
+#endif
+
 /* Extract URI-Scheme if possible */
 nsresult
 net_ExtractURLScheme(const nsACString &inURI,
-                     uint32_t *startPos, 
-                     uint32_t *endPos,
-                     nsACString *scheme)
+                     nsACString& scheme)
 {
-    // search for something up to a colon, and call it the scheme
-    const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
-    const char* uri_start = flatURI.get();
-    const char* uri = uri_start;
+#if defined(MOZILLA_XPCOMRT_API)
+    NS_WARNING("net_ExtractURLScheme not implemented");
+    return NS_ERROR_NOT_IMPLEMENTED;
+#else
+    Tokenizer p(inURI, "\r\n\t");
 
-    if (!uri)
-        return NS_ERROR_MALFORMED_URI;
-
-    // skip leading white space
-    while (nsCRT::IsAsciiSpace(*uri))
-        uri++;
-
-    uint32_t start = uri - uri_start;
-    if (startPos) {
-        *startPos = start;
+    while (p.CheckWhite() || p.CheckChar(' ')) {
+        // Skip leading whitespace
     }
 
-    uint32_t length = 0;
-    char c;
-    while ((c = *uri++) != '\0') {
-        // First char must be Alpha
-        if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
-            length++;
-        } 
-        // Next chars can be alpha + digit + some special chars
-        else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 
-                 nsCRT::IsAsciiDigit(c) || c == '+' || 
-                 c == '.' || c == '-')) {
-            length++;
-        }
-        // stop if colon reached but not as first char
-        else if (c == ':' && length > 0) {
-            if (endPos) {
-                *endPos = start + length;
-            }
+    p.Record();
+    if (!p.CheckChar(isAsciiAlpha)) {
+        // First char must be alpha
+        return NS_ERROR_MALFORMED_URI;
+    }
 
-            if (scheme)
-                scheme->Assign(Substring(inURI, start, length));
-            return NS_OK;
-        }
-        else 
-            break;
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
     }
-    return NS_ERROR_MALFORMED_URI;
+
+    if (!p.CheckChar(':')) {
+        return NS_ERROR_MALFORMED_URI;
+    }
+
+    p.Claim(scheme);
+    scheme.StripChars("\r\n\t");
+    return NS_OK;
+#endif
 }
 
 bool
 net_IsValidScheme(const char *scheme, uint32_t schemeLen)
 {
     // first char must be alpha
     if (!nsCRT::IsAsciiAlpha(*scheme))
         return false;
@@ -552,96 +551,82 @@ net_IsValidScheme(const char *scheme, ui
               *scheme == '-'))
             return false;
     }
 
     return true;
 }
 
 bool
+net_IsAbsoluteURL(const nsACString& uri)
+{
+#if !defined(MOZILLA_XPCOMRT_API)
+    Tokenizer p(uri, "\r\n\t");
+
+    while (p.CheckWhite() || p.CheckChar(' ')) {
+        // Skip leading whitespace
+    }
+
+    // First char must be alpha
+    if (!p.CheckChar(isAsciiAlpha)) {
+        return false;
+    }
+
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
+    }
+    if (!p.CheckChar(':')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (!p.CheckChar('/')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (p.CheckChar('/')) {
+        // aSpec is really absolute. Ignore aBaseURI in this case
+        return true;
+    }
+#endif
+    return false;
+}
+
+bool
 net_FilterURIString(const char *str, nsACString& result)
 {
     NS_PRECONDITION(str, "Must have a non-null string!");
-    bool writing = false;
     result.Truncate();
     const char *p = str;
 
-    // Remove leading spaces, tabs, CR, LF if any.
-    while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
-        writing = true;
-        str = p + 1;
+    // Figure out if we need to filter anything.
+    bool writing = false;
+    while (*p) {
+        if (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
+            writing = true;
+            break;
+        }
         p++;
     }
 
-    // Don't strip from the scheme, because other code assumes everything
-    // up to the ':' is the scheme, and it's bad not to have it match.
-    // If there's no ':', strip.
-    bool found_colon = false;
-    const char *first = nullptr;
-    while (*p) {
-        switch (*p) {
-            case '\t': 
-            case '\r': 
-            case '\n':
-                if (found_colon) {
-                    writing = true;
-                    // append chars up to but not including *p
-                    if (p > str)
-                        result.Append(str, p - str);
-                    str = p + 1;
-                } else {
-                    // remember where the first \t\r\n was in case we find no scheme
-                    if (!first)
-                        first = p;
-                }
-                break;
-
-            case ':':
-                found_colon = true;
-                break;
-
-            case '/':
-            case '@':
-                if (!found_colon) {
-                    // colon also has to precede / or @ to be a scheme
-                    found_colon = true; // not really, but means ok to strip
-                    if (first) {
-                        // go back and replace
-                        p = first;
-                        continue; // process *p again
-                    }
-                }
-                break;
-
-            default:
-                break;
-        }
-        p++;
-
-        // At end, if there was no scheme, and we hit a control char, fix
-        // it up now.
-        if (!*p && first != nullptr && !found_colon) {
-            // TRICKY - to avoid duplicating code, we reset the loop back
-            // to the point we found something to do
-            p = first;
-            // This also stops us from looping after we finish
-            found_colon = true; // so we'll replace \t\r\n
-        }
+    if (!writing) {
+        // Nothing to strip or filter
+        return false;
     }
 
-    // Remove trailing spaces if any
-    while (((p-1) >= str) && (*(p-1) == ' ')) {
-        writing = true;
-        p--;
-    }
+    nsAutoCString temp;
 
-    if (writing && p > str)
-        result.Append(str, p - str);
+    temp.Assign(str);
+    temp.Trim("\r\n\t ");
+    temp.StripChars("\r\n\t");
 
-    return writing;
+    result.Assign(temp);
+
+    return true;
 }
 
 #if defined(XP_WIN)
 bool
 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
 {
     bool writing = false;
 
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@@ -75,27 +75,31 @@ void net_CoalesceDirs(netCoalesceFlags f
  *
  * @return a new string, representing canonical uri
  */
 nsresult net_ResolveRelativePath(const nsACString &relativePath,
                                              const nsACString &basePath,
                                              nsACString &result);
 
 /**
+ * Check if a URL is absolute
+ *
+ * @param inURL     URL spec
+ * @return true if the given spec represents an absolute URL
+ */
+bool net_IsAbsoluteURL(const nsACString& inURL);
+
+/**
  * Extract URI-Scheme if possible
  *
  * @param inURI     URI spec
- * @param startPos  start of scheme (may be null)
- * @param endPos    end of scheme; index of colon (may be null)
  * @param scheme    scheme copied to this buffer on return (may be null)
  */
 nsresult net_ExtractURLScheme(const nsACString &inURI,
-                                          uint32_t *startPos, 
-                                          uint32_t *endPos,
-                                          nsACString *scheme = nullptr);
+                              nsACString &scheme);
 
 /* check that the given scheme conforms to RFC 2396 */
 bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
 
 inline bool net_IsValidScheme(const nsAFlatCString &scheme)
 {
     return net_IsValidScheme(scheme.get(), scheme.Length());
 }
@@ -104,18 +108,17 @@ inline bool net_IsValidScheme(const nsAF
  * Filter out whitespace from a URI string.  The input is the |str|
  * pointer. |result| is written to if and only if there is whitespace that has
  * to be filtered out.  The return value is true if and only if |result| is
  * written to.
  *
  * This function strips out all whitespace at the beginning and end of the URL
  * and strips out \r, \n, \t from the middle of the URL.  This makes it safe to
  * call on things like javascript: urls or data: urls, where we may in fact run
- * into whitespace that is not properly encoded.  Note that stripping does not
- * occur in the scheme portion itself.
+ * into whitespace that is not properly encoded.
  *
  * @param str the pointer to the string to filter.  Must be non-null.
  * @param result the out param to write to if filtering happens
  * @return whether result was written to
  */
 bool net_FilterURIString(const char *str, nsACString& result);
 
 #if defined(XP_WIN)
--- a/netwerk/protocol/http/Http2Stream.cpp
+++ b/netwerk/protocol/http/Http2Stream.cpp
@@ -339,17 +339,17 @@ Http2Stream::WriteSegments(nsAHttpSegmen
   mSegmentWriter = nullptr;
   return rv;
 }
 
 nsresult
 Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr<nsStandardURL> &url)
 {
   nsAutoCString scheme;
-  nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme);
+  nsresult rv = net_ExtractURLScheme(origin, scheme);
   NS_ENSURE_SUCCESS(rv, rv);
   return MakeOriginURL(scheme, origin, url);
 }
 
 nsresult
 Http2Stream::MakeOriginURL(const nsACString &scheme, const nsACString &origin,
                            RefPtr<nsStandardURL> &url)
 {
--- a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
+++ b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
@@ -49,17 +49,17 @@ SubstitutingURL::EnsureFile()
   MOZ_ASSERT(substHandler);
 
   nsAutoCString spec;
   rv = substHandler->ResolveURI(this, spec);
   if (NS_FAILED(rv))
     return rv;
 
   nsAutoCString scheme;
-  rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme);
+  rv = net_ExtractURLScheme(spec, scheme);
   if (NS_FAILED(rv))
     return rv;
 
   // Bug 585869:
   // In most cases, the scheme is jar if it's not file.
   // Regardless, net_GetFileFromURLSpec should be avoided
   // when the scheme isn't file.
   if (!scheme.EqualsLiteral("file"))
--- a/netwerk/streamconv/converters/nsIndexedToHTML.cpp
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
@@ -755,18 +755,20 @@ nsIndexedToHTML::OnIndexAvailable(nsIReq
         loc.Append('/');
     }
 
     // now minimally re-escape the location...
     uint32_t escFlags;
     // for some protocols, we expect the location to be absolute.
     // if so, and if the location indeed appears to be a valid URI, then go
     // ahead and treat it like one.
+
+    nsAutoCString scheme;
     if (mExpectAbsLoc &&
-        NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
+        NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
         // escape as absolute 
         escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
     }
     else {
         // escape as relative
         // esc_Directory is needed because directories have a trailing slash.
         // Without it, the trailing '/' will be escaped, and links from within
         // that directory will be incorrect
--- a/netwerk/test/unit/test_standardurl.js
+++ b/netwerk/test/unit/test_standardurl.js
@@ -295,8 +295,14 @@ add_test(function test_hugeStringThrows(
   for (let prop of properties) {
     Assert.throws(() => url[prop] = hugeString,
                   /NS_ERROR_MALFORMED_URI/,
                   `Passing a huge string to "${prop}" should throw`);
   }
 
   run_next_test();
 });
+
+add_test(function test_filterWhitespace()
+{
+  var url = stringToURL(" \r\n\th\nt\rt\tp://ex\r\n\tample.com/path\r\n\t/\r\n\tto the/fil\r\n\te.e\r\n\txt?que\r\n\try#ha\r\n\tsh \r\n\t ");
+  do_check_eq(url.spec, "http://example.com/path/to%20the/file.ext?query#hash");
+});