Bug 1094067 - CSP: Parser should follow path production from RFC 3986 (r=sstamm)
authorChristoph Kerschbaumer <mozilla@christophkerschbaumer.com>
Fri, 14 Nov 2014 15:05:36 -0800
changeset 218777 80de5f55ca1ef321442f91ebbd2724a530070ad1
parent 218776 c8adfe4f59950fa970b167dd916741d356a29fd6
child 218778 5866fd3c075cccca4a1ad009db4410a021fa426d
push id27944
push usercbook@mozilla.com
push dateTue, 09 Dec 2014 11:54:28 +0000
treeherderautoland@acf5660d2048 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssstamm
bugs1094067
milestone37.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1094067 - CSP: Parser should follow path production from RFC 3986 (r=sstamm)
dom/security/nsCSPParser.cpp
dom/security/nsCSPParser.h
--- a/dom/security/nsCSPParser.cpp
+++ b/dom/security/nsCSPParser.cpp
@@ -40,16 +40,25 @@ static const char16_t UNDERLINE    = '_'
 static const char16_t TILDE        = '~';
 static const char16_t WILDCARD     = '*';
 static const char16_t WHITESPACE   = ' ';
 static const char16_t SINGLEQUOTE  = '\'';
 static const char16_t OPEN_CURL    = '{';
 static const char16_t CLOSE_CURL   = '}';
 static const char16_t NUMBER_SIGN  = '#';
 static const char16_t QUESTIONMARK = '?';
+static const char16_t PERCENT_SIGN = '%';
+static const char16_t EXCLAMATION  = '!';
+static const char16_t DOLLAR       = '$';
+static const char16_t AMPERSAND    = '&';
+static const char16_t OPENBRACE    = '(';
+static const char16_t CLOSINGBRACE = ')';
+static const char16_t COMMA        = ',';
+static const char16_t EQUALS       = '=';
+static const char16_t ATSYMBOL     = '@';
 
 static uint32_t kSubHostPathCharacterCutoff = 512;
 
 static const char* kHashSourceValidFns [] = { "sha256", "sha384", "sha512" };
 static const uint32_t kHashSourceValidFnsLen = 3;
 
 /* ===== nsCSPTokenizer ==================== */
 
@@ -135,16 +144,24 @@ isCharacterToken(char16_t aSymbol)
 }
 
 bool
 isNumberToken(char16_t aSymbol)
 {
   return (aSymbol >= '0' && aSymbol <= '9');
 }
 
+bool
+isValidHexDig(char16_t aHexDig)
+{
+  return (isNumberToken(aHexDig) ||
+          (aHexDig >= 'A' && aHexDig <= 'F') ||
+          (aHexDig >= 'a' && aHexDig <= 'f'));
+}
+
 void
 nsCSPParser::resetCurChar(const nsAString& aToken)
 {
   mCurChar = aToken.BeginReading();
   mEndChar = aToken.EndReading();
   resetCurValue();
 }
 
@@ -152,24 +169,128 @@ nsCSPParser::resetCurChar(const nsAStrin
 // number sign ("#") character, or by the end of the URI.
 // http://tools.ietf.org/html/rfc3986#section-3.3
 bool
 nsCSPParser::atEndOfPath()
 {
   return (atEnd() || peek(QUESTIONMARK) || peek(NUMBER_SIGN));
 }
 
+void
+nsCSPParser::percentDecodeStr(const nsAString& aEncStr, nsAString& outDecStr)
+{
+  outDecStr.Truncate();
+
+  // helper function that should not be visible outside this methods scope
+  struct local {
+    static inline char16_t convertHexDig(char16_t aHexDig) {
+      if (isNumberToken(aHexDig)) {
+        return aHexDig - '0';
+      }
+      if (aHexDig >= 'A' && aHexDig <= 'F') {
+        return aHexDig - 'A' + 10;
+      }
+      // must be a lower case character
+      // (aHexDig >= 'a' && aHexDig <= 'f')
+      return aHexDig - 'a' + 10;
+    }
+  };
+
+  const char16_t *cur, *end, *hexDig1, *hexDig2;
+  cur = aEncStr.BeginReading();
+  end = aEncStr.EndReading();
+
+  while (cur != end) {
+    // if it's not a percent sign then there is
+    // nothing to do for that character
+    if (*cur != PERCENT_SIGN) {
+      outDecStr.Append(*cur);
+      cur++;
+      continue;
+    }
+
+    // get the two hexDigs following the '%'-sign
+    hexDig1 = cur + 1;
+    hexDig2 = cur + 2;
+
+    // if there are no hexdigs after the '%' then
+    // there is nothing to do for us.
+    if (hexDig1 == end || hexDig2 == end ||
+        !isValidHexDig(*hexDig1) ||
+        !isValidHexDig(*hexDig2)) {
+      outDecStr.Append(PERCENT_SIGN);
+      cur++;
+      continue;
+    }
+
+    // decode "% hexDig1 hexDig2" into a character.
+    char16_t decChar = (local::convertHexDig(*hexDig1) << 4) +
+                       local::convertHexDig(*hexDig2);
+    outDecStr.Append(decChar);
+
+    // increment 'cur' to after the second hexDig
+    cur = ++hexDig2;
+  }
+}
+
+// unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
 bool
-nsCSPParser::atValidPathChar()
+nsCSPParser::atValidUnreservedChar()
 {
   return (peek(isCharacterToken) || peek(isNumberToken) ||
           peek(DASH) || peek(DOT) ||
           peek(UNDERLINE) || peek(TILDE));
 }
 
+// sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+//                 / "*" / "+" / "," / ";" / "="
+// Please note that even though ',' and ';' appear to be
+// valid sub-delims according to the RFC production of paths,
+// both can not appear here by itself, they would need to be
+// pct-encoded in order to be part of the path.
+bool
+nsCSPParser::atValidSubDelimChar()
+{
+  return (peek(EXCLAMATION) || peek(DOLLAR) || peek(AMPERSAND) ||
+          peek(SINGLEQUOTE) || peek(OPENBRACE) || peek(CLOSINGBRACE) ||
+          peek(WILDCARD) || peek(PLUS) || peek(EQUALS));
+}
+
+// pct-encoded   = "%" HEXDIG HEXDIG
+bool
+nsCSPParser::atValidPctEncodedChar()
+{
+  const char16_t* pctCurChar = mCurChar;
+
+  if ((pctCurChar + 2) >= mEndChar) {
+    // string too short, can't be a valid pct-encoded char.
+    return false;
+  }
+
+  // Any valid pct-encoding must follow the following format:
+  // "% HEXDIG HEXDIG"
+  if (PERCENT_SIGN != *pctCurChar ||
+     !isValidHexDig(*(pctCurChar+1)) ||
+     !isValidHexDig(*(pctCurChar+2))) {
+    return false;
+  }
+  return true;
+}
+
+// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+// http://tools.ietf.org/html/rfc3986#section-3.3
+bool
+nsCSPParser::atValidPathChar()
+{
+  return (atValidUnreservedChar() ||
+          atValidSubDelimChar() ||
+          atValidPctEncodedChar() ||
+          peek(COLON) || peek(ATSYMBOL));
+}
+
 void
 nsCSPParser::logWarningErrorToConsole(uint32_t aSeverityFlag,
                                       const char* aProperty,
                                       const char16_t* aParams[],
                                       uint32_t aParamsLength)
 {
   CSPPARSERLOG(("nsCSPParser::logWarningErrorToConsole: %s", aProperty));
 
@@ -248,38 +369,54 @@ nsCSPParser::subPath(nsCSPHostSrc* aCspH
   CSPPARSERLOG(("nsCSPParser::subPath, mCurToken: %s, mCurValue: %s",
                NS_ConvertUTF16toUTF8(mCurToken).get(),
                NS_ConvertUTF16toUTF8(mCurValue).get()));
 
   // Emergency exit to avoid endless loops in case a path in a CSP policy
   // is longer than 512 characters, or also to avoid endless loops
   // in case we are parsing unrecognized characters in the following loop.
   uint32_t charCounter = 0;
+  nsString pctDecodedSubPath;
 
   while (!atEndOfPath()) {
     if (peek(SLASH)) {
-      aCspHost->appendPath(mCurValue);
+      // before appendig any additional portion of a subpath we have to pct-decode
+      // that portion of the subpath. atValidPathChar() already verified a correct
+      // pct-encoding, now we can safely decode and append the decoded-sub path.
+      percentDecodeStr(mCurValue, pctDecodedSubPath);
+      aCspHost->appendPath(pctDecodedSubPath);
       // Resetting current value since we are appending parts of the path
       // to aCspHost, e.g; "http://www.example.com/path1/path2" then the
       // first part is "/path1", second part "/path2"
       resetCurValue();
     }
     else if (!atValidPathChar()) {
       const char16_t* params[] = { mCurToken.get() };
       logWarningErrorToConsole(nsIScriptError::warningFlag,
                                "couldntParseInvalidSource",
                                params, ArrayLength(params));
       return false;
     }
+    // potentially we have encountred a valid pct-encoded character in atValidPathChar();
+    // if so, we have to account for "% HEXDIG HEXDIG" and advance the pointer past
+    // the pct-encoded char.
+    if (peek(PERCENT_SIGN)) {
+      advance();
+      advance();
+    }
     advance();
     if (++charCounter > kSubHostPathCharacterCutoff) {
       return false;
     }
   }
-  aCspHost->appendPath(mCurValue);
+  // before appendig any additional portion of a subpath we have to pct-decode
+  // that portion of the subpath. atValidPathChar() already verified a correct
+  // pct-encoding, now we can safely decode and append the decoded-sub path.
+  percentDecodeStr(mCurValue, pctDecodedSubPath);
+  aCspHost->appendPath(pctDecodedSubPath);
   resetCurValue();
   return true;
 }
 
 bool
 nsCSPParser::path(nsCSPHostSrc* aCspHost)
 {
   CSPPARSERLOG(("nsCSPParser::path, mCurToken: %s, mCurValue: %s",
@@ -296,17 +433,19 @@ nsCSPParser::path(nsCSPHostSrc* aCspHost
     const char16_t* params[] = { mCurToken.get() };
     logWarningErrorToConsole(nsIScriptError::warningFlag, "couldntParseInvalidSource",
                              params, ArrayLength(params));
     return false;
   }
   if (atEndOfPath()) {
     // one slash right after host [port] is also considered a path, e.g.
     // www.example.com/ should result in www.example.com/
-    aCspHost->appendPath(mCurValue);
+    // please note that we do not have to perform any pct-decoding here
+    // because we are just appending a '/' and not any actual chars.
+    aCspHost->appendPath(NS_LITERAL_STRING("/"));
     return true;
   }
   // path can begin with "/" but not "//"
   // see http://tools.ietf.org/html/rfc3986#section-3.3
   if (!hostChar()) {
     const char16_t* params[] = { mCurToken.get() };
     logWarningErrorToConsole(nsIScriptError::warningFlag, "couldntParseInvalidSource",
                              params, ArrayLength(params));
--- a/dom/security/nsCSPParser.h
+++ b/dom/security/nsCSPParser.h
@@ -123,18 +123,23 @@ class nsCSPParser {
     nsCSPHostSrc*   appHost(); // helper function to support app specific hosts
     nsCSPHostSrc*   host();
     bool            hostChar();
     bool            schemeChar();
     bool            port();
     bool            path(nsCSPHostSrc* aCspHost);
 
     bool subHost();                                       // helper function to parse subDomains
+    bool atValidUnreservedChar();                         // helper function to parse unreserved
+    bool atValidSubDelimChar();                           // helper function to parse sub-delims
+    bool atValidPctEncodedChar();                         // helper function to parse pct-encoded
     bool subPath(nsCSPHostSrc* aCspHost);                 // helper function to parse paths
     void reportURIList(nsTArray<nsCSPBaseSrc*>& outSrcs); // helper function to parse report-uris
+    void percentDecodeStr(const nsAString& aEncStr,       // helper function to percent-decode
+                          nsAString& outDecStr);
 
     inline bool atEnd()
     {
       return mCurChar >= mEndChar;
     }
 
     inline bool accept(char16_t aSymbol)
     {