netwerk/base/nsURLParsers.cpp
author Wes Kocher <wkocher@mozilla.com>
Fri, 12 May 2017 18:24:42 -0700
changeset 358155 d7711b69b0136381c4da5cd292619d514ffee1ee
parent 326940 7cf7d0a3cc0e7adfab01099974149f8e655c776f
child 367749 6a629adbb62a299d7208373d1c6f375149d2afdb
permissions -rw-r--r--
Backed out changeset 95ff98f1c2e5 (bug 1319111) for talos failures a=backout MozReview-Commit-ID: Gs09moFm5rQ

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <string.h>

#include "mozilla/RangedPtr.h"

#include "nsURLParsers.h"
#include "nsURLHelper.h"
#include "nsString.h"
#include "nsCRT.h"

using namespace mozilla;

//----------------------------------------------------------------------------

static uint32_t
CountConsecutiveSlashes(const char *str, int32_t len)
{
    RangedPtr<const char> p(str, len);
    uint32_t count = 0;
    while (len-- && *p++ == '/') ++count;
    return count;
}

//----------------------------------------------------------------------------
// nsBaseURLParser implementation
//----------------------------------------------------------------------------

NS_IMPL_ISUPPORTS(nsAuthURLParser, nsIURLParser)
NS_IMPL_ISUPPORTS(nsNoAuthURLParser, nsIURLParser)

#define SET_RESULT(component, pos, len) \
    PR_BEGIN_MACRO \
        if (component ## Pos) \
           *component ## Pos = uint32_t(pos); \
        if (component ## Len) \
           *component ## Len = int32_t(len); \
    PR_END_MACRO

#define OFFSET_RESULT(component, offset) \
    PR_BEGIN_MACRO \
        if (component ## Pos) \
           *component ## Pos += offset; \
    PR_END_MACRO

NS_IMETHODIMP
nsBaseURLParser::ParseURL(const char *spec, int32_t specLen,
                          uint32_t *schemePos, int32_t *schemeLen,
                          uint32_t *authorityPos, int32_t *authorityLen,
                          uint32_t *pathPos, int32_t *pathLen)
{
    if (NS_WARN_IF(!spec)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (specLen < 0)
        specLen = strlen(spec);

    const char *stop = nullptr;
    const char *colon = nullptr;
    const char *slash = nullptr;
    const char *p = spec;
    uint32_t offset = 0;
    int32_t len = specLen;

    // skip leading whitespace
    while (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
        spec++;
        specLen--;
        offset++;

        p++;
        len--;
    }

    for (; len && *p && !colon && !slash; ++p, --len) {
        switch (*p) {
            case ':':
                if (!colon)
                    colon = p;
                break;
            case '/': // start of filepath
            case '?': // start of query
            case '#': // start of ref
                if (!slash)
                    slash = p;
                break;
            case '@': // username@hostname
            case '[': // start of IPv6 address literal
                if (!stop)
                    stop = p;
                break;
        }
    }
    // disregard the first colon if it follows an '@' or a '['
    if (colon && stop && colon > stop)
        colon = nullptr;

    // if the spec only contained whitespace ...
    if (specLen == 0) {
        SET_RESULT(scheme, 0, -1);
        SET_RESULT(authority, 0, 0);
        SET_RESULT(path, 0, 0);
        return NS_OK;
    }

    // ignore trailing whitespace and control characters
    for (p = spec + specLen - 1; ((unsigned char) *p <= ' ') && (p != spec); --p)
        ;

    specLen = p - spec + 1;

    if (colon && (colon < slash || !slash)) {
        //
        // spec = <scheme>:/<the-rest>
        //
        // or
        //
        // spec = <scheme>:<authority>
        // spec = <scheme>:<path-no-slashes>
        //
        if (!net_IsValidScheme(spec, colon - spec) || (*(colon+1) == ':')) {
            return NS_ERROR_MALFORMED_URI;
        }
        SET_RESULT(scheme, offset, colon - spec);
        if (authorityLen || pathLen) {
            uint32_t schemeLen = colon + 1 - spec;
            offset += schemeLen;
            ParseAfterScheme(colon + 1, specLen - schemeLen,
                             authorityPos, authorityLen,
                             pathPos, pathLen);
            OFFSET_RESULT(authority, offset);
            OFFSET_RESULT(path, offset);
        }
    }
    else {
        // 
        // spec = <authority-no-port-or-password>/<path>
        // spec = <path>
        //
        // or
        //
        // spec = <authority-no-port-or-password>/<path-with-colon>
        // spec = <path-with-colon>
        //
        // or
        //
        // spec = <authority-no-port-or-password>
        // spec = <path-no-slashes-or-colon>
        //
        SET_RESULT(scheme, 0, -1);
        if (authorityLen || pathLen) {
            ParseAfterScheme(spec, specLen,
                             authorityPos, authorityLen,
                             pathPos, pathLen);
            OFFSET_RESULT(authority, offset);
            OFFSET_RESULT(path, offset);
        }
    }
    return NS_OK;
}

NS_IMETHODIMP
nsBaseURLParser::ParseAuthority(const char *auth, int32_t authLen,
                                uint32_t *usernamePos, int32_t *usernameLen,
                                uint32_t *passwordPos, int32_t *passwordLen,
                                uint32_t *hostnamePos, int32_t *hostnameLen,
                                int32_t *port)
{
    if (NS_WARN_IF(!auth)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (authLen < 0)
        authLen = strlen(auth);

    SET_RESULT(username, 0, -1);
    SET_RESULT(password, 0, -1);
    SET_RESULT(hostname, 0, authLen);
    if (port)
       *port = -1;
    return NS_OK;
}

NS_IMETHODIMP
nsBaseURLParser::ParseUserInfo(const char *userinfo, int32_t userinfoLen,
                               uint32_t *usernamePos, int32_t *usernameLen,
                               uint32_t *passwordPos, int32_t *passwordLen)
{
    SET_RESULT(username, 0, -1);
    SET_RESULT(password, 0, -1);
    return NS_OK;
}

NS_IMETHODIMP
nsBaseURLParser::ParseServerInfo(const char *serverinfo, int32_t serverinfoLen,
                                 uint32_t *hostnamePos, int32_t *hostnameLen,
                                 int32_t *port)
{
    SET_RESULT(hostname, 0, -1);
    if (port)
       *port = -1;
    return NS_OK;
}

NS_IMETHODIMP
nsBaseURLParser::ParsePath(const char *path, int32_t pathLen,
                           uint32_t *filepathPos, int32_t *filepathLen,
                           uint32_t *queryPos, int32_t *queryLen,
                           uint32_t *refPos, int32_t *refLen)
{
    if (NS_WARN_IF(!path)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (pathLen < 0)
        pathLen = strlen(path);

    // path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref>

    // XXX PL_strnpbrk would be nice, but it's buggy

    // search for first occurrence of either ? or #
    const char *query_beg = 0, *query_end = 0;
    const char *ref_beg = 0;
    const char *p = 0;
    for (p = path; p < path + pathLen; ++p) {
        // only match the query string if it precedes the reference fragment
        if (!ref_beg && !query_beg && *p == '?')
            query_beg = p + 1;
        else if (*p == '#') {
            ref_beg = p + 1;
            if (query_beg)
                query_end = p;
            break;
        }
    }

    if (query_beg) {
        if (query_end)
            SET_RESULT(query, query_beg - path, query_end - query_beg);
        else
            SET_RESULT(query, query_beg - path, pathLen - (query_beg - path));
    }
    else
        SET_RESULT(query, 0, -1);

    if (ref_beg)
        SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path));
    else
        SET_RESULT(ref, 0, -1);

    const char *end;
    if (query_beg)
        end = query_beg - 1;
    else if (ref_beg)
        end = ref_beg - 1;
    else
        end = path + pathLen;

    // an empty file path is no file path
    if (end != path)
        SET_RESULT(filepath, 0, end - path);
    else
        SET_RESULT(filepath, 0, -1);
    return NS_OK;
}

NS_IMETHODIMP
nsBaseURLParser::ParseFilePath(const char *filepath, int32_t filepathLen,
                               uint32_t *directoryPos, int32_t *directoryLen,
                               uint32_t *basenamePos, int32_t *basenameLen,
                               uint32_t *extensionPos, int32_t *extensionLen)
{
    if (NS_WARN_IF(!filepath)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (filepathLen < 0)
        filepathLen = strlen(filepath);

    if (filepathLen == 0) {
        SET_RESULT(directory, 0, -1);
        SET_RESULT(basename, 0, 0); // assume a zero length file basename
        SET_RESULT(extension, 0, -1);
        return NS_OK;
    }

    const char *p;
    const char *end = filepath + filepathLen;

    // search backwards for filename
    for (p = end - 1; *p != '/' && p > filepath; --p)
        ;
    if (*p == '/') {
        // catch /.. and /.
        if ((p+1 < end && *(p+1) == '.') && 
           (p+2 == end || (*(p+2) == '.' && p+3 == end)))
            p = end - 1;
        // filepath = <directory><filename>.<extension>
        SET_RESULT(directory, 0, p - filepath + 1);
        ParseFileName(p + 1, end - (p + 1),
                      basenamePos, basenameLen,
                      extensionPos, extensionLen);
        OFFSET_RESULT(basename, p + 1 - filepath);
        OFFSET_RESULT(extension, p + 1 - filepath);
    }
    else {
        // filepath = <filename>.<extension>
        SET_RESULT(directory, 0, -1);
        ParseFileName(filepath, filepathLen,
                      basenamePos, basenameLen,
                      extensionPos, extensionLen);
    }
    return NS_OK;
}

nsresult
nsBaseURLParser::ParseFileName(const char *filename, int32_t filenameLen,
                               uint32_t *basenamePos, int32_t *basenameLen,
                               uint32_t *extensionPos, int32_t *extensionLen)
{
    if (NS_WARN_IF(!filename)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (filenameLen < 0)
        filenameLen = strlen(filename);

    // no extension if filename ends with a '.'
    if (filename[filenameLen-1] != '.') {
        // ignore '.' at the beginning
        for (const char *p = filename + filenameLen - 1; p > filename; --p) {
            if (*p == '.') {
                // filename = <basename.extension>
                SET_RESULT(basename, 0, p - filename);
                SET_RESULT(extension, p + 1 - filename, filenameLen - (p - filename + 1));
                return NS_OK;
            }
        }
    }
    // filename = <basename>
    SET_RESULT(basename, 0, filenameLen);
    SET_RESULT(extension, 0, -1);
    return NS_OK;
}

//----------------------------------------------------------------------------
// nsNoAuthURLParser implementation
//----------------------------------------------------------------------------

NS_IMETHODIMP
nsNoAuthURLParser::ParseAuthority(const char *auth, int32_t authLen,
                                 uint32_t *usernamePos, int32_t *usernameLen,
                                 uint32_t *passwordPos, int32_t *passwordLen,
                                 uint32_t *hostnamePos, int32_t *hostnameLen,
                                 int32_t *port)
{
    NS_NOTREACHED("Shouldn't parse auth in a NoAuthURL!");
    return NS_ERROR_UNEXPECTED;
}

void
nsNoAuthURLParser::ParseAfterScheme(const char *spec, int32_t specLen,
                                    uint32_t *authPos, int32_t *authLen,
                                    uint32_t *pathPos, int32_t *pathLen)
{
    NS_PRECONDITION(specLen >= 0, "unexpected");

    // everything is the path
    uint32_t pos = 0;
    switch (CountConsecutiveSlashes(spec, specLen)) {
    case 0:
    case 1:
        break;
    case 2:
        {
            const char *p = nullptr;
            if (specLen > 2) {
                // looks like there is an authority section

                // if the authority looks like a drive number then we
                // really want to treat it as part of the path
                // [a-zA-Z][:|]{/\}
                // i.e one of:   c:   c:\foo  c:/foo  c|  c|\foo  c|/foo
                if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') &&
                    nsCRT::IsAsciiAlpha(spec[2]) &&
                    ((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) {
                    pos = 1;
                    break;  
                }
                // Ignore apparent authority; path is everything after it
                for (p = spec + 2; p < spec + specLen; ++p) {
                    if (*p == '/' || *p == '?' || *p == '#')
                        break;
                }
            }
            SET_RESULT(auth, 0, -1);
            if (p && p != spec+specLen)
                SET_RESULT(path, p - spec, specLen - (p - spec));
            else
                SET_RESULT(path, 0, -1);
            return;
        }
    default:
        pos = 2;
        break;
    }
    SET_RESULT(auth, pos, 0);
    SET_RESULT(path, pos, specLen - pos);
}

#if defined(XP_WIN)
NS_IMETHODIMP
nsNoAuthURLParser::ParseFilePath(const char *filepath, int32_t filepathLen,
                                 uint32_t *directoryPos, int32_t *directoryLen,
                                 uint32_t *basenamePos, int32_t *basenameLen,
                                 uint32_t *extensionPos, int32_t *extensionLen)
{
    if (NS_WARN_IF(!filepath)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (filepathLen < 0)
        filepathLen = strlen(filepath);

    // look for a filepath consisting of only a drive number, which may or
    // may not have a leading slash.
    if (filepathLen > 1 && filepathLen < 4) {
        const char *end = filepath + filepathLen;
        const char *p = filepath;
        if (*p == '/')
            p++;
        if ((end-p == 2) && (p[1]==':' || p[1]=='|') && nsCRT::IsAsciiAlpha(*p)) {
            // filepath = <drive-number>:
            SET_RESULT(directory, 0, filepathLen);
            SET_RESULT(basename, 0, -1);
            SET_RESULT(extension, 0, -1);
            return NS_OK;
        }
    }

    // otherwise fallback on common implementation
    return nsBaseURLParser::ParseFilePath(filepath, filepathLen,
                                          directoryPos, directoryLen,
                                          basenamePos, basenameLen,
                                          extensionPos, extensionLen);
}
#endif

//----------------------------------------------------------------------------
// nsAuthURLParser implementation
//----------------------------------------------------------------------------

NS_IMETHODIMP
nsAuthURLParser::ParseAuthority(const char *auth, int32_t authLen,
                                uint32_t *usernamePos, int32_t *usernameLen,
                                uint32_t *passwordPos, int32_t *passwordLen,
                                uint32_t *hostnamePos, int32_t *hostnameLen,
                                int32_t *port)
{
    nsresult rv;

    if (NS_WARN_IF(!auth)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (authLen < 0)
        authLen = strlen(auth);

    if (authLen == 0) {
        SET_RESULT(username, 0, -1);
        SET_RESULT(password, 0, -1);
        SET_RESULT(hostname, 0, 0);
        if (port)
            *port = -1;
        return NS_OK;
    }

    // search backwards for @
    const char *p = auth + authLen - 1;
    for (; (*p != '@') && (p > auth); --p) {
      continue;
    }
    if ( *p == '@' ) {
        // auth = <user-info@server-info>
        rv = ParseUserInfo(auth, p - auth,
                           usernamePos, usernameLen,
                           passwordPos, passwordLen);
        if (NS_FAILED(rv)) return rv;
        rv = ParseServerInfo(p + 1, authLen - (p - auth + 1),
                             hostnamePos, hostnameLen,
                             port);
        if (NS_FAILED(rv)) return rv;
        OFFSET_RESULT(hostname, p + 1 - auth);

        // malformed if has a username or password
        // but no host info, such as: http://u:p@/
        if ((usernamePos || passwordPos) && (!hostnamePos || !*hostnameLen)) {
            return NS_ERROR_MALFORMED_URI;
        }
    }
    else {
        // auth = <server-info>
        SET_RESULT(username, 0, -1);
        SET_RESULT(password, 0, -1);
        rv = ParseServerInfo(auth, authLen,
                             hostnamePos, hostnameLen,
                             port);
        if (NS_FAILED(rv)) return rv;
    }
    return NS_OK;
}

NS_IMETHODIMP
nsAuthURLParser::ParseUserInfo(const char *userinfo, int32_t userinfoLen,
                               uint32_t *usernamePos, int32_t *usernameLen,
                               uint32_t *passwordPos, int32_t *passwordLen)
{
    if (NS_WARN_IF(!userinfo)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (userinfoLen < 0)
        userinfoLen = strlen(userinfo);

    if (userinfoLen == 0) {
        SET_RESULT(username, 0, -1);
        SET_RESULT(password, 0, -1);
        return NS_OK;
    }

    const char *p = (const char *) memchr(userinfo, ':', userinfoLen);
    if (p) {
        // userinfo = <username:password>
        if (p == userinfo) {
            // must have a username!
            return NS_ERROR_MALFORMED_URI;
        }
        SET_RESULT(username, 0, p - userinfo);
        SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1));
    }
    else {
        // userinfo = <username>
        SET_RESULT(username, 0, userinfoLen);
        SET_RESULT(password, 0, -1);
    }
    return NS_OK;
}

NS_IMETHODIMP
nsAuthURLParser::ParseServerInfo(const char *serverinfo, int32_t serverinfoLen,
                                 uint32_t *hostnamePos, int32_t *hostnameLen,
                                 int32_t *port)
{
    if (NS_WARN_IF(!serverinfo)) {
        return NS_ERROR_INVALID_POINTER;
    }

    if (serverinfoLen < 0)
        serverinfoLen = strlen(serverinfo);

    if (serverinfoLen == 0) {
        SET_RESULT(hostname, 0, 0);
        if (port)
            *port = -1;
        return NS_OK;
    }

    // search backwards for a ':' but stop on ']' (IPv6 address literal
    // delimiter).  check for illegal characters in the hostname.
    const char *p = serverinfo + serverinfoLen - 1;
    const char *colon = nullptr, *bracket = nullptr;
    for (; p > serverinfo; --p) {
        switch (*p) {
            case ']':
                bracket = p;
                break;
            case ':':
                if (bracket == nullptr)
                    colon = p;
                break;
            case ' ':
                // hostname must not contain a space
                return NS_ERROR_MALFORMED_URI;
        }
    }

    if (colon) {
        // serverinfo = <hostname:port>
        SET_RESULT(hostname, 0, colon - serverinfo);
        if (port) {
            // XXX unfortunately ToInteger is not defined for substrings
            nsAutoCString buf(colon+1, serverinfoLen - (colon + 1 - serverinfo));
            if (buf.Length() == 0) {
                *port = -1;
            }
            else {
                const char* nondigit = NS_strspnp("0123456789", buf.get());
                if (nondigit && *nondigit)
                    return NS_ERROR_MALFORMED_URI;

                nsresult err;
                *port = buf.ToInteger(&err);
                if (NS_FAILED(err) || *port < 0 || *port > std::numeric_limits<uint16_t>::max())
                    return NS_ERROR_MALFORMED_URI;
            }
        }
    }
    else {
        // serverinfo = <hostname>
        SET_RESULT(hostname, 0, serverinfoLen);
        if (port)
           *port = -1;
    }

    // In case of IPv6 address check its validity
    if (*hostnameLen > 1 && *(serverinfo + *hostnamePos) == '[' &&
        *(serverinfo + *hostnamePos + *hostnameLen - 1) == ']' &&
        !net_IsValidIPv6Addr(serverinfo + *hostnamePos + 1, *hostnameLen - 2))
            return NS_ERROR_MALFORMED_URI;

    return NS_OK;
}

void
nsAuthURLParser::ParseAfterScheme(const char *spec, int32_t specLen,
                                  uint32_t *authPos, int32_t *authLen,
                                  uint32_t *pathPos, int32_t *pathLen)
{
    NS_PRECONDITION(specLen >= 0, "unexpected");

    uint32_t nslash = CountConsecutiveSlashes(spec, specLen);

    // search for the end of the authority section
    const char *end = spec + specLen;
    const char *p;
    for (p = spec + nslash; p < end; ++p) {
        if (*p == '/' || *p == '?' || *p == '#')
            break;
    }
    if (p < end) {
        // spec = [/]<auth><path>
        SET_RESULT(auth, nslash, p - (spec + nslash));
        SET_RESULT(path, p - spec, specLen - (p - spec));
    }
    else {
        // spec = [/]<auth>
        SET_RESULT(auth, nslash, specLen - nslash);
        SET_RESULT(path, 0, -1);
    }
}

//----------------------------------------------------------------------------
// nsStdURLParser implementation
//----------------------------------------------------------------------------

void
nsStdURLParser::ParseAfterScheme(const char *spec, int32_t specLen,
                                 uint32_t *authPos, int32_t *authLen,
                                 uint32_t *pathPos, int32_t *pathLen)
{
    NS_PRECONDITION(specLen >= 0, "unexpected");

    uint32_t nslash = CountConsecutiveSlashes(spec, specLen);

    // search for the end of the authority section
    const char *end = spec + specLen;
    const char *p;
    for (p = spec + nslash; p < end; ++p) {
        if (strchr("/?#;", *p))
            break;
    }
    switch (nslash) {
    case 0:
    case 2:
        if (p < end) {
            // spec = (//)<auth><path>
            SET_RESULT(auth, nslash, p - (spec + nslash));
            SET_RESULT(path, p - spec, specLen - (p - spec));
        }
        else {
            // spec = (//)<auth>
            SET_RESULT(auth, nslash, specLen - nslash);
            SET_RESULT(path, 0, -1);
        }
        break;
    case 1:
        // spec = /<path>
        SET_RESULT(auth, 0, -1);
        SET_RESULT(path, 0, specLen);
        break;
    default:
        // spec = ///[/]<path>
        SET_RESULT(auth, 2, 0);
        SET_RESULT(path, 2, specLen - 2);
    }
}