/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- *//* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */#include<string.h>#include"mozilla/RangedPtr.h"#include"mozilla/TextUtils.h"#include"nsCRTGlue.h"#include"nsURLParsers.h"#include"nsURLHelper.h"#include"nsString.h"usingnamespacemozilla;//----------------------------------------------------------------------------staticuint32_tCountConsecutiveSlashes(constchar*str,int32_tlen){RangedPtr<constchar>p(str,len);uint32_tcount=0;while(len--&&*p++=='/')++count;returncount;}//----------------------------------------------------------------------------// nsBaseURLParser implementation//----------------------------------------------------------------------------NS_IMPL_ISUPPORTS(nsAuthURLParser,nsIURLParser)NS_IMPL_ISUPPORTS(nsNoAuthURLParser,nsIURLParser)#define SET_RESULT(component, pos, len) \ PR_BEGIN_MACRO \ if (component##Pos) *component##Pos = uint32_t(pos); \ if (component##Len) *component##Len = int32_t(len); \ PR_END_MACRO#define OFFSET_RESULT(component, offset) \ PR_BEGIN_MACRO \ if (component##Pos) *component##Pos += (offset); \ PR_END_MACRONS_IMETHODIMPnsBaseURLParser::ParseURL(constchar*spec,int32_tspecLen,uint32_t*schemePos,int32_t*schemeLen,uint32_t*authorityPos,int32_t*authorityLen,uint32_t*pathPos,int32_t*pathLen){if(NS_WARN_IF(!spec)){returnNS_ERROR_INVALID_POINTER;}if(specLen<0)specLen=strlen(spec);constchar*stop=nullptr;constchar*colon=nullptr;constchar*slash=nullptr;constchar*p=spec;uint32_toffset=0;int32_tlen=specLen;// skip leading whitespacewhile(*p==' '||*p=='\n'||*p=='\r'||*p=='\t'){spec++;specLen--;offset++;p++;len--;}for(;len&&*p&&!colon&&!slash;++p,--len){switch(*p){case':':if(!colon)colon=p;break;case'/':// start of filepathcase'?':// start of querycase'#':// start of refif(!slash)slash=p;break;case'@':// username@hostnamecase'[':// start of IPv6 address literalif(!stop)stop=p;break;}}// disregard the first colon if it follows an '@' or a '['if(colon&&stop&&colon>stop)colon=nullptr;// if the spec only contained whitespace ...if(specLen==0){SET_RESULT(scheme,0,-1);SET_RESULT(authority,0,0);SET_RESULT(path,0,0);returnNS_OK;}// ignore trailing whitespace and control charactersfor(p=spec+specLen-1;((unsignedchar)*p<=' ')&&(p!=spec);--p){;}specLen=p-spec+1;if(colon&&(colon<slash||!slash)){//// spec = <scheme>:/<the-rest>//// or//// spec = <scheme>:<authority>// spec = <scheme>:<path-no-slashes>//if(!net_IsValidScheme(nsDependentCSubstring(spec,colon-spec))){returnNS_ERROR_MALFORMED_URI;}SET_RESULT(scheme,offset,colon-spec);if(authorityLen||pathLen){uint32_tschemeLen=colon+1-spec;offset+=schemeLen;ParseAfterScheme(colon+1,specLen-schemeLen,authorityPos,authorityLen,pathPos,pathLen);OFFSET_RESULT(authority,offset);OFFSET_RESULT(path,offset);}}else{//// spec = <authority-no-port-or-password>/<path>// spec = <path>//// or//// spec = <authority-no-port-or-password>/<path-with-colon>// spec = <path-with-colon>//// or//// spec = <authority-no-port-or-password>// spec = <path-no-slashes-or-colon>//SET_RESULT(scheme,0,-1);if(authorityLen||pathLen){ParseAfterScheme(spec,specLen,authorityPos,authorityLen,pathPos,pathLen);OFFSET_RESULT(authority,offset);OFFSET_RESULT(path,offset);}}returnNS_OK;}NS_IMETHODIMPnsBaseURLParser::ParseAuthority(constchar*auth,int32_tauthLen,uint32_t*usernamePos,int32_t*usernameLen,uint32_t*passwordPos,int32_t*passwordLen,uint32_t*hostnamePos,int32_t*hostnameLen,int32_t*port){if(NS_WARN_IF(!auth)){returnNS_ERROR_INVALID_POINTER;}if(authLen<0)authLen=strlen(auth);SET_RESULT(username,0,-1);SET_RESULT(password,0,-1);SET_RESULT(hostname,0,authLen);if(port)*port=-1;returnNS_OK;}NS_IMETHODIMPnsBaseURLParser::ParseUserInfo(constchar*userinfo,int32_tuserinfoLen,uint32_t*usernamePos,int32_t*usernameLen,uint32_t*passwordPos,int32_t*passwordLen){SET_RESULT(username,0,-1);SET_RESULT(password,0,-1);returnNS_OK;}NS_IMETHODIMPnsBaseURLParser::ParseServerInfo(constchar*serverinfo,int32_tserverinfoLen,uint32_t*hostnamePos,int32_t*hostnameLen,int32_t*port){SET_RESULT(hostname,0,-1);if(port)*port=-1;returnNS_OK;}NS_IMETHODIMPnsBaseURLParser::ParsePath(constchar*path,int32_tpathLen,uint32_t*filepathPos,int32_t*filepathLen,uint32_t*queryPos,int32_t*queryLen,uint32_t*refPos,int32_t*refLen){if(NS_WARN_IF(!path)){returnNS_ERROR_INVALID_POINTER;}if(pathLen<0)pathLen=strlen(path);// path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref>// XXX PL_strnpbrk would be nice, but it's buggy// search for first occurrence of either ? or #constchar*query_beg=nullptr,*query_end=nullptr;constchar*ref_beg=nullptr;constchar*p=nullptr;for(p=path;p<path+pathLen;++p){// only match the query string if it precedes the reference fragmentif(!ref_beg&&!query_beg&&*p=='?'){query_beg=p+1;}elseif(*p=='#'){ref_beg=p+1;if(query_beg)query_end=p;break;}}if(query_beg){if(query_end){SET_RESULT(query,query_beg-path,query_end-query_beg);}else{SET_RESULT(query,query_beg-path,pathLen-(query_beg-path));}}else{SET_RESULT(query,0,-1);}if(ref_beg){SET_RESULT(ref,ref_beg-path,pathLen-(ref_beg-path));}else{SET_RESULT(ref,0,-1);}constchar*end;if(query_beg){end=query_beg-1;}elseif(ref_beg){end=ref_beg-1;}else{end=path+pathLen;}// an empty file path is no file pathif(end!=path){SET_RESULT(filepath,0,end-path);}else{SET_RESULT(filepath,0,-1);}returnNS_OK;}NS_IMETHODIMPnsBaseURLParser::ParseFilePath(constchar*filepath,int32_tfilepathLen,uint32_t*directoryPos,int32_t*directoryLen,uint32_t*basenamePos,int32_t*basenameLen,uint32_t*extensionPos,int32_t*extensionLen){if(NS_WARN_IF(!filepath)){returnNS_ERROR_INVALID_POINTER;}if(filepathLen<0)filepathLen=strlen(filepath);if(filepathLen==0){SET_RESULT(directory,0,-1);SET_RESULT(basename,0,0);// assume a zero length file basenameSET_RESULT(extension,0,-1);returnNS_OK;}constchar*p;constchar*end=filepath+filepathLen;// search backwards for filenamefor(p=end-1;*p!='/'&&p>filepath;--p){;}if(*p=='/'){// catch /.. and /.if((p+1<end&&*(p+1)=='.')&&(p+2==end||(*(p+2)=='.'&&p+3==end))){p=end-1;}// filepath = <directory><filename>.<extension>SET_RESULT(directory,0,p-filepath+1);ParseFileName(p+1,end-(p+1),basenamePos,basenameLen,extensionPos,extensionLen);OFFSET_RESULT(basename,p+1-filepath);OFFSET_RESULT(extension,p+1-filepath);}else{// filepath = <filename>.<extension>SET_RESULT(directory,0,-1);ParseFileName(filepath,filepathLen,basenamePos,basenameLen,extensionPos,extensionLen);}returnNS_OK;}nsresultnsBaseURLParser::ParseFileName(constchar*filename,int32_tfilenameLen,uint32_t*basenamePos,int32_t*basenameLen,uint32_t*extensionPos,int32_t*extensionLen){if(NS_WARN_IF(!filename)){returnNS_ERROR_INVALID_POINTER;}if(filenameLen<0)filenameLen=strlen(filename);// no extension if filename ends with a '.'if(filename[filenameLen-1]!='.'){// ignore '.' at the beginningfor(constchar*p=filename+filenameLen-1;p>filename;--p){if(*p=='.'){// filename = <basename.extension>SET_RESULT(basename,0,p-filename);SET_RESULT(extension,p+1-filename,filenameLen-(p-filename+1));returnNS_OK;}}}// filename = <basename>SET_RESULT(basename,0,filenameLen);SET_RESULT(extension,0,-1);returnNS_OK;}//----------------------------------------------------------------------------// nsNoAuthURLParser implementation//----------------------------------------------------------------------------NS_IMETHODIMPnsNoAuthURLParser::ParseAuthority(constchar*auth,int32_tauthLen,uint32_t*usernamePos,int32_t*usernameLen,uint32_t*passwordPos,int32_t*passwordLen,uint32_t*hostnamePos,int32_t*hostnameLen,int32_t*port){MOZ_ASSERT_UNREACHABLE("Shouldn't parse auth in a NoAuthURL!");returnNS_ERROR_UNEXPECTED;}voidnsNoAuthURLParser::ParseAfterScheme(constchar*spec,int32_tspecLen,uint32_t*authPos,int32_t*authLen,uint32_t*pathPos,int32_t*pathLen){MOZ_ASSERT(specLen>=0,"unexpected");// everything is the pathuint32_tpos=0;switch(CountConsecutiveSlashes(spec,specLen)){case0:case1:break;case2:{constchar*p=nullptr;if(specLen>2){// looks like there is an authority section// if the authority looks like a drive number then we// really want to treat it as part of the path// [a-zA-Z][:|]{/\}// i.e one of: c: c:\foo c:/foo c| c|\foo c|/fooif((specLen>3)&&(spec[3]==':'||spec[3]=='|')&&IsAsciiAlpha(spec[2])&&((specLen==4)||(spec[4]=='/')||(spec[4]=='\\'))){pos=1;break;}// Ignore apparent authority; path is everything after itfor(p=spec+2;p<spec+specLen;++p){if(*p=='/'||*p=='?'||*p=='#')break;}}SET_RESULT(auth,0,-1);if(p&&p!=spec+specLen){SET_RESULT(path,p-spec,specLen-(p-spec));}else{SET_RESULT(path,0,-1);}return;}default:pos=2;break;}SET_RESULT(auth,pos,0);SET_RESULT(path,pos,specLen-pos);}#if defined(XP_WIN)NS_IMETHODIMPnsNoAuthURLParser::ParseFilePath(constchar*filepath,int32_tfilepathLen,uint32_t*directoryPos,int32_t*directoryLen,uint32_t*basenamePos,int32_t*basenameLen,uint32_t*extensionPos,int32_t*extensionLen){if(NS_WARN_IF(!filepath)){returnNS_ERROR_INVALID_POINTER;}if(filepathLen<0)filepathLen=strlen(filepath);// look for a filepath consisting of only a drive number, which may or// may not have a leading slash.if(filepathLen>1&&filepathLen<4){constchar*end=filepath+filepathLen;constchar*p=filepath;if(*p=='/')p++;if((end-p==2)&&(p[1]==':'||p[1]=='|')&&IsAsciiAlpha(*p)){// filepath = <drive-number>:SET_RESULT(directory,0,filepathLen);SET_RESULT(basename,0,-1);SET_RESULT(extension,0,-1);returnNS_OK;}}// otherwise fallback on common implementationreturnnsBaseURLParser::ParseFilePath(filepath,filepathLen,directoryPos,directoryLen,basenamePos,basenameLen,extensionPos,extensionLen);}#endif//----------------------------------------------------------------------------// nsAuthURLParser implementation//----------------------------------------------------------------------------NS_IMETHODIMPnsAuthURLParser::ParseAuthority(constchar*auth,int32_tauthLen,uint32_t*usernamePos,int32_t*usernameLen,uint32_t*passwordPos,int32_t*passwordLen,uint32_t*hostnamePos,int32_t*hostnameLen,int32_t*port){nsresultrv;if(NS_WARN_IF(!auth)){returnNS_ERROR_INVALID_POINTER;}if(authLen<0)authLen=strlen(auth);if(authLen==0){SET_RESULT(username,0,-1);SET_RESULT(password,0,-1);SET_RESULT(hostname,0,0);if(port)*port=-1;returnNS_OK;}// search backwards for @constchar*p=auth+authLen-1;for(;(*p!='@')&&(p>auth);--p){}if(*p=='@'){// auth = <user-info@server-info>rv=ParseUserInfo(auth,p-auth,usernamePos,usernameLen,passwordPos,passwordLen);if(NS_FAILED(rv))returnrv;rv=ParseServerInfo(p+1,authLen-(p-auth+1),hostnamePos,hostnameLen,port);if(NS_FAILED(rv))returnrv;OFFSET_RESULT(hostname,p+1-auth);// malformed if has a username or password// but no host info, such as: http://u:p@/if((usernamePos||passwordPos)&&(!hostnamePos||!*hostnameLen)){returnNS_ERROR_MALFORMED_URI;}}else{// auth = <server-info>SET_RESULT(username,0,-1);SET_RESULT(password,0,-1);rv=ParseServerInfo(auth,authLen,hostnamePos,hostnameLen,port);if(NS_FAILED(rv))returnrv;}returnNS_OK;}NS_IMETHODIMPnsAuthURLParser::ParseUserInfo(constchar*userinfo,int32_tuserinfoLen,uint32_t*usernamePos,int32_t*usernameLen,uint32_t*passwordPos,int32_t*passwordLen){if(NS_WARN_IF(!userinfo)){returnNS_ERROR_INVALID_POINTER;}if(userinfoLen<0)userinfoLen=strlen(userinfo);if(userinfoLen==0){SET_RESULT(username,0,-1);SET_RESULT(password,0,-1);returnNS_OK;}constchar*p=(constchar*)memchr(userinfo,':',userinfoLen);if(p){// userinfo = <username:password>SET_RESULT(username,0,p-userinfo);SET_RESULT(password,p-userinfo+1,userinfoLen-(p-userinfo+1));}else{// userinfo = <username>SET_RESULT(username,0,userinfoLen);SET_RESULT(password,0,-1);}returnNS_OK;}NS_IMETHODIMPnsAuthURLParser::ParseServerInfo(constchar*serverinfo,int32_tserverinfoLen,uint32_t*hostnamePos,int32_t*hostnameLen,int32_t*port){if(NS_WARN_IF(!serverinfo)){returnNS_ERROR_INVALID_POINTER;}if(serverinfoLen<0)serverinfoLen=strlen(serverinfo);if(serverinfoLen==0){SET_RESULT(hostname,0,0);if(port)*port=-1;returnNS_OK;}// search backwards for a ':' but stop on ']' (IPv6 address literal// delimiter). check for illegal characters in the hostname.constchar*p=serverinfo+serverinfoLen-1;constchar*colon=nullptr,*bracket=nullptr;for(;p>serverinfo;--p){switch(*p){case']':bracket=p;break;case':':if(bracket==nullptr)colon=p;break;case' ':// hostname must not contain a spacereturnNS_ERROR_MALFORMED_URI;}}if(colon){// serverinfo = <hostname:port>SET_RESULT(hostname,0,colon-serverinfo);if(port){// XXX unfortunately ToInteger is not defined for substringsnsAutoCStringbuf(colon+1,serverinfoLen-(colon+1-serverinfo));if(buf.Length()==0){*port=-1;}else{constchar*nondigit=NS_strspnp("0123456789",buf.get());if(nondigit&&*nondigit)returnNS_ERROR_MALFORMED_URI;nsresulterr;*port=buf.ToInteger(&err);if(NS_FAILED(err)||*port<0||*port>std::numeric_limits<uint16_t>::max()){returnNS_ERROR_MALFORMED_URI;}}}}else{// serverinfo = <hostname>SET_RESULT(hostname,0,serverinfoLen);if(port)*port=-1;}// In case of IPv6 address check its validityif(*hostnameLen>1&&*(serverinfo+*hostnamePos)=='['&&*(serverinfo+*hostnamePos+*hostnameLen-1)==']'&&!net_IsValidIPv6Addr(Substring(serverinfo+*hostnamePos+1,*hostnameLen-2))){returnNS_ERROR_MALFORMED_URI;}returnNS_OK;}voidnsAuthURLParser::ParseAfterScheme(constchar*spec,int32_tspecLen,uint32_t*authPos,int32_t*authLen,uint32_t*pathPos,int32_t*pathLen){MOZ_ASSERT(specLen>=0,"unexpected");uint32_tnslash=CountConsecutiveSlashes(spec,specLen);// search for the end of the authority sectionconstchar*end=spec+specLen;constchar*p;for(p=spec+nslash;p<end;++p){if(*p=='/'||*p=='?'||*p=='#')break;}if(p<end){// spec = [/]<auth><path>SET_RESULT(auth,nslash,p-(spec+nslash));SET_RESULT(path,p-spec,specLen-(p-spec));}else{// spec = [/]<auth>SET_RESULT(auth,nslash,specLen-nslash);SET_RESULT(path,0,-1);}}//----------------------------------------------------------------------------// nsStdURLParser implementation//----------------------------------------------------------------------------voidnsStdURLParser::ParseAfterScheme(constchar*spec,int32_tspecLen,uint32_t*authPos,int32_t*authLen,uint32_t*pathPos,int32_t*pathLen){MOZ_ASSERT(specLen>=0,"unexpected");uint32_tnslash=CountConsecutiveSlashes(spec,specLen);// search for the end of the authority sectionconstchar*end=spec+specLen;constchar*p;for(p=spec+nslash;p<end;++p){if(strchr("/?#;",*p))break;}switch(nslash){case0:case2:if(p<end){// spec = (//)<auth><path>SET_RESULT(auth,nslash,p-(spec+nslash));SET_RESULT(path,p-spec,specLen-(p-spec));}else{// spec = (//)<auth>SET_RESULT(auth,nslash,specLen-nslash);SET_RESULT(path,0,-1);}break;case1:// spec = /<path>SET_RESULT(auth,0,-1);SET_RESULT(path,0,specLen);break;default:// spec = ///[/]<path>SET_RESULT(auth,2,0);SET_RESULT(path,2,specLen-2);}}