parser/xml/nsSAXXMLReader.cpp
author Michael Comella <michael.l.comella@gmail.com>
Tue, 15 Sep 2015 16:46:58 -0700
changeset 266944 ed8188590f14b1aae2e4f44c8196994f375a99f4
parent 266562 41dea9df27ed995f8315ab4318c187a617937664
child 268184 e8c7dfe727cd970e2c3294934e2927b14143c205
permissions -rw-r--r--
Bug 1201206 - Correct menu button background on 2.3. r=mhaigh One fear is that different devices set different menu colors and text colors. Since we're using the default text color and set an explicit menu color, the text color may not look good on these devices. I was unable to find a way to override the menu text color. It seems the best way to find out if this is a problem is to land it and test though!

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsIInputStream.h"
#include "nsNetCID.h"
#include "nsNetUtil.h"
#include "nsNullPrincipal.h"
#include "nsIParser.h"
#include "nsParserCIID.h"
#include "nsStreamUtils.h"
#include "nsStringStream.h"
#include "nsIScriptError.h"
#include "nsSAXAttributes.h"
#include "nsSAXLocator.h"
#include "nsSAXXMLReader.h"
#include "nsCharsetSource.h"

#include "mozilla/dom/EncodingUtils.h"

using mozilla::dom::EncodingUtils;

#define XMLNS_URI "http://www.w3.org/2000/xmlns/"

static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);

NS_IMPL_CYCLE_COLLECTION(nsSAXXMLReader,
                         mContentHandler,
                         mDTDHandler,
                         mErrorHandler,
                         mLexicalHandler,
                         mDeclarationHandler,
                         mBaseURI,
                         mListener,
                         mParserObserver)
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSAXXMLReader)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSAXXMLReader)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSAXXMLReader)
  NS_INTERFACE_MAP_ENTRY(nsISAXXMLReader)
  NS_INTERFACE_MAP_ENTRY(nsIExpatSink)
  NS_INTERFACE_MAP_ENTRY(nsIExtendedExpatSink)
  NS_INTERFACE_MAP_ENTRY(nsIContentSink)
  NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
  NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISAXXMLReader)
NS_INTERFACE_MAP_END

nsSAXXMLReader::nsSAXXMLReader() :
    mIsAsyncParse(false),
    mEnableNamespacePrefixes(false)
{
}

// nsIContentSink
NS_IMETHODIMP
nsSAXXMLReader::WillBuildModel(nsDTDMode)
{
  if (mContentHandler)
    return mContentHandler->StartDocument();

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::DidBuildModel(bool aTerminated)
{
  if (mContentHandler)
    return mContentHandler->EndDocument();

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetParser(nsParserBase *aParser)
{
  return NS_OK;
}

// nsIExtendedExpatSink
NS_IMETHODIMP
nsSAXXMLReader::HandleStartElement(const char16_t *aName,
                                   const char16_t **aAtts,
                                   uint32_t aAttsCount,
                                   uint32_t aLineNumber)
{
  if (!mContentHandler)
    return NS_OK;

  nsRefPtr<nsSAXAttributes> atts = new nsSAXAttributes();
  if (!atts)
    return NS_ERROR_OUT_OF_MEMORY;
  nsAutoString uri, localName, qName;
  for (; *aAtts; aAtts += 2) {
    SplitExpatName(aAtts[0], uri, localName, qName);
    // XXX don't have attr type information
    NS_NAMED_LITERAL_STRING(cdataType, "CDATA");
    // could support xmlns reporting, it's a standard SAX feature
    if (mEnableNamespacePrefixes || !uri.EqualsLiteral(XMLNS_URI)) {
      NS_ASSERTION(aAtts[1], "null passed to handler");
      atts->AddAttribute(uri, localName, qName, cdataType,
                         nsDependentString(aAtts[1]));
    }
  }

  // Deal with the element name
  SplitExpatName(aName, uri, localName, qName);
  return mContentHandler->StartElement(uri, localName, qName, atts);
}

NS_IMETHODIMP
nsSAXXMLReader::HandleEndElement(const char16_t *aName)
{
  if (mContentHandler) {
    nsAutoString uri, localName, qName;
    SplitExpatName(aName, uri, localName, qName);
    return mContentHandler->EndElement(uri, localName, qName);
  }
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleComment(const char16_t *aName)
{
  NS_ASSERTION(aName, "null passed to handler");
  if (mLexicalHandler)
    return mLexicalHandler->Comment(nsDependentString(aName));
 
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleCDataSection(const char16_t *aData,
                                   uint32_t aLength)
{
  nsresult rv;
  if (mLexicalHandler) {
    rv = mLexicalHandler->StartCDATA();
    NS_ENSURE_SUCCESS(rv, rv);
  }

  if (mContentHandler) {
    rv = mContentHandler->Characters(Substring(aData, aData+aLength));
    NS_ENSURE_SUCCESS(rv, rv);
  }

  if (mLexicalHandler) {
    rv = mLexicalHandler->EndCDATA();
    NS_ENSURE_SUCCESS(rv, rv);
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleStartDTD(const char16_t *aName,
                               const char16_t *aSystemId,
                               const char16_t *aPublicId)
{
  char16_t nullChar = char16_t(0);
  if (!aName)
    aName = &nullChar;
  if (!aSystemId)
    aSystemId = &nullChar;
  if (!aPublicId)
    aPublicId = &nullChar;

  mSystemId = aSystemId;
  mPublicId = aPublicId;
  if (mLexicalHandler) {
    return mLexicalHandler->StartDTD(nsDependentString(aName),
                                     nsDependentString(aPublicId),
                                     nsDependentString(aSystemId));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleDoctypeDecl(const nsAString & aSubset,
                                  const nsAString & aName,
                                  const nsAString & aSystemId,
                                  const nsAString & aPublicId,
                                  nsISupports* aCatalogData)
{
  if (mLexicalHandler)
    return mLexicalHandler->EndDTD();

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleCharacterData(const char16_t *aData,
                                    uint32_t aLength)
{
  if (mContentHandler)
    return mContentHandler->Characters(Substring(aData, aData+aLength));

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleStartNamespaceDecl(const char16_t *aPrefix,
                                         const char16_t *aUri)
{
  if (!mContentHandler)
    return NS_OK;
  
  char16_t nullChar = char16_t(0);
  if (!aPrefix)
    aPrefix = &nullChar;
  if (!aUri)
    aUri = &nullChar;

  return mContentHandler->StartPrefixMapping(nsDependentString(aPrefix),
                                             nsDependentString(aUri));
}

NS_IMETHODIMP
nsSAXXMLReader::HandleEndNamespaceDecl(const char16_t *aPrefix)
{
  if (!mContentHandler)
    return NS_OK;
  
  if (aPrefix)
    return mContentHandler->EndPrefixMapping(nsDependentString(aPrefix));

  return mContentHandler->EndPrefixMapping(EmptyString());
}

NS_IMETHODIMP
nsSAXXMLReader::HandleProcessingInstruction(const char16_t *aTarget,
                                            const char16_t *aData)
{
  NS_ASSERTION(aTarget && aData, "null passed to handler");
  if (mContentHandler) {
    return mContentHandler->ProcessingInstruction(nsDependentString(aTarget),
                                                  nsDependentString(aData));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleNotationDecl(const char16_t *aNotationName,
                                   const char16_t *aSystemId,
                                   const char16_t *aPublicId)
{
  NS_ASSERTION(aNotationName, "null passed to handler");
  if (mDTDHandler) {
    char16_t nullChar = char16_t(0);
    if (!aSystemId)
      aSystemId = &nullChar;
    if (!aPublicId)
      aPublicId = &nullChar;

    return mDTDHandler->NotationDecl(nsDependentString(aNotationName),
                                     nsDependentString(aSystemId),
                                     nsDependentString(aPublicId));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleUnparsedEntityDecl(const char16_t *aEntityName,
                                         const char16_t *aSystemId,
                                         const char16_t *aPublicId,
                                         const char16_t *aNotationName)
{
  NS_ASSERTION(aEntityName && aNotationName, "null passed to handler");
  if (mDTDHandler) {
    char16_t nullChar = char16_t(0);
    if (!aSystemId)
      aSystemId = &nullChar;
    if (!aPublicId)
      aPublicId = &nullChar;

    return mDTDHandler->UnparsedEntityDecl(nsDependentString(aEntityName),
                                           nsDependentString(aSystemId),
                                           nsDependentString(aPublicId),
                                           nsDependentString(aNotationName));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleXMLDeclaration(const char16_t *aVersion,
                                     const char16_t *aEncoding,
                                     int32_t aStandalone)
{
  NS_ASSERTION(aVersion, "null passed to handler");
  if (mDeclarationHandler) {
    char16_t nullChar = char16_t(0);
    if (!aEncoding)
      aEncoding = &nullChar;
    mDeclarationHandler->HandleXMLDeclaration(nsDependentString(aVersion),
                                              nsDependentString(aEncoding),
                                              aStandalone > 0);
  }
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::ReportError(const char16_t* aErrorText,
                            const char16_t* aSourceText,
                            nsIScriptError *aError,
                            bool *_retval)
{
  NS_PRECONDITION(aError && aSourceText && aErrorText, "Check arguments!!!");
  // Normally, the expat driver should report the error.
  *_retval = true;

  if (mErrorHandler) {
    uint32_t lineNumber;
    nsresult rv = aError->GetLineNumber(&lineNumber);
    NS_ENSURE_SUCCESS(rv, rv);

    uint32_t columnNumber;
    rv = aError->GetColumnNumber(&columnNumber);
    NS_ENSURE_SUCCESS(rv, rv);

    nsCOMPtr<nsISAXLocator> locator = new nsSAXLocator(mPublicId,
                                                       mSystemId,
                                                       lineNumber,
                                                       columnNumber);
    if (!locator)
      return NS_ERROR_OUT_OF_MEMORY;

    rv = mErrorHandler->FatalError(locator, nsDependentString(aErrorText));
    if (NS_SUCCEEDED(rv)) {
      // The error handler has handled the script error.  Don't log to console.
      *_retval = false;
    }
  }

  return NS_OK;
}

// nsISAXXMLReader

NS_IMETHODIMP
nsSAXXMLReader::GetBaseURI(nsIURI **aBaseURI)
{
  NS_IF_ADDREF(*aBaseURI = mBaseURI);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetBaseURI(nsIURI *aBaseURI)
{
  mBaseURI = aBaseURI;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetContentHandler(nsISAXContentHandler **aContentHandler)
{
  NS_IF_ADDREF(*aContentHandler = mContentHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetContentHandler(nsISAXContentHandler *aContentHandler)
{
  mContentHandler = aContentHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetDtdHandler(nsISAXDTDHandler **aDtdHandler)
{
  NS_IF_ADDREF(*aDtdHandler = mDTDHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetDtdHandler(nsISAXDTDHandler *aDtdHandler)
{
  mDTDHandler = aDtdHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetErrorHandler(nsISAXErrorHandler **aErrorHandler)
{
  NS_IF_ADDREF(*aErrorHandler = mErrorHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetErrorHandler(nsISAXErrorHandler *aErrorHandler)
{
  mErrorHandler = aErrorHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetFeature(const nsAString &aName, bool aValue)
{
  if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) {
    mEnableNamespacePrefixes = aValue;
    return NS_OK;
  }
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::GetFeature(const nsAString &aName, bool *aResult)
{
  if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) {
    *aResult = mEnableNamespacePrefixes;
    return NS_OK;
  }
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::GetDeclarationHandler(nsIMozSAXXMLDeclarationHandler **aDeclarationHandler) {
  NS_IF_ADDREF(*aDeclarationHandler = mDeclarationHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetDeclarationHandler(nsIMozSAXXMLDeclarationHandler *aDeclarationHandler) {
  mDeclarationHandler = aDeclarationHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetLexicalHandler(nsISAXLexicalHandler **aLexicalHandler)
{
  NS_IF_ADDREF(*aLexicalHandler = mLexicalHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetLexicalHandler(nsISAXLexicalHandler *aLexicalHandler)
{
  mLexicalHandler = aLexicalHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetProperty(const nsAString &aName, nsISupports* aValue)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::GetProperty(const nsAString &aName, bool *aResult)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::ParseFromString(const nsAString &aStr,
                                const char *aContentType)
{
  // Don't call this in the middle of an async parse
  NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE);

  NS_ConvertUTF16toUTF8 data(aStr);

  // The new stream holds a reference to the buffer
  nsCOMPtr<nsIInputStream> stream;
  nsresult rv = NS_NewByteInputStream(getter_AddRefs(stream),
                                      data.get(), data.Length(),
                                      NS_ASSIGNMENT_DEPEND);
  NS_ENSURE_SUCCESS(rv, rv);
  return ParseFromStream(stream, "UTF-8", aContentType);
}

NS_IMETHODIMP
nsSAXXMLReader::ParseFromStream(nsIInputStream *aStream,
                                const char *aCharset,
                                const char *aContentType)
{
  // Don't call this in the middle of an async parse
  NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE);

  NS_ENSURE_ARG(aStream);
  NS_ENSURE_ARG(aContentType);

  // Put the nsCOMPtr out here so we hold a ref to the stream as needed
  nsresult rv;
  nsCOMPtr<nsIInputStream> bufferedStream;
  if (!NS_InputStreamIsBuffered(aStream)) {
    rv = NS_NewBufferedInputStream(getter_AddRefs(bufferedStream),
                                   aStream, 4096);
    NS_ENSURE_SUCCESS(rv, rv);
    aStream = bufferedStream;
  }
 
  rv = EnsureBaseURI();
  NS_ENSURE_SUCCESS(rv, rv);

  nsCOMPtr<nsIPrincipal> nullPrincipal = nsNullPrincipal::Create();
  NS_ENSURE_TRUE(nullPrincipal, NS_ERROR_FAILURE);

  nsCOMPtr<nsIChannel> parserChannel;
  rv = NS_NewInputStreamChannel(getter_AddRefs(parserChannel),
                                mBaseURI,
                                aStream,
                                nullPrincipal,
                                nsILoadInfo::SEC_NORMAL,
                                nsIContentPolicy::TYPE_OTHER,
                                nsDependentCString(aContentType));
  if (!parserChannel || NS_FAILED(rv))
    return NS_ERROR_FAILURE;

  if (aCharset)
    parserChannel->SetContentCharset(nsDependentCString(aCharset));

  rv = InitParser(nullptr, parserChannel);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = mListener->OnStartRequest(parserChannel, nullptr);
  if (NS_FAILED(rv))
    parserChannel->Cancel(rv);

  /* When parsing a new document, we need to clear the XML identifiers.
     HandleStartDTD will set these values from the DTD declaration tag.
     We won't have them, of course, if there's a well-formedness error
     before the DTD tag (such as a space before an XML declaration).
   */
  mSystemId.Truncate();
  mPublicId.Truncate();

  nsresult status;
  parserChannel->GetStatus(&status);
  
  uint64_t offset = 0;
  while (NS_SUCCEEDED(rv) && NS_SUCCEEDED(status)) {
    uint64_t available;
    rv = aStream->Available(&available);
    if (rv == NS_BASE_STREAM_CLOSED) {
      rv = NS_OK;
      available = 0;
    }
    if (NS_FAILED(rv)) {
      parserChannel->Cancel(rv);
      break;
    }
    if (! available)
      break; // blocking input stream has none available when done

    if (available > UINT32_MAX)
      available = UINT32_MAX;

    rv = mListener->OnDataAvailable(parserChannel, nullptr,
                                    aStream,
                                    offset,
                                    (uint32_t)available);
    if (NS_SUCCEEDED(rv))
      offset += available;
    else
      parserChannel->Cancel(rv);
    parserChannel->GetStatus(&status);
  }
  rv = mListener->OnStopRequest(parserChannel, nullptr, status);
  mListener = nullptr;

  return rv;
}

NS_IMETHODIMP
nsSAXXMLReader::ParseAsync(nsIRequestObserver *aObserver)
{
  mParserObserver = aObserver;
  mIsAsyncParse = true;
  return NS_OK;
}

// nsIRequestObserver

NS_IMETHODIMP
nsSAXXMLReader::OnStartRequest(nsIRequest *aRequest, nsISupports *aContext)
{
  NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
  nsresult rv;
  rv = EnsureBaseURI();
  NS_ENSURE_SUCCESS(rv, rv);
  nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
  rv = InitParser(mParserObserver, channel);
  NS_ENSURE_SUCCESS(rv, rv);
  // we don't need or want this anymore
  mParserObserver = nullptr;
  return mListener->OnStartRequest(aRequest, aContext);
}

NS_IMETHODIMP
nsSAXXMLReader::OnStopRequest(nsIRequest *aRequest, nsISupports *aContext,
                              nsresult status)
{
  NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
  NS_ENSURE_STATE(mListener);
  nsresult rv = mListener->OnStopRequest(aRequest, aContext, status);
  mListener = nullptr;
  mIsAsyncParse = false;
  return rv;
}

// nsIStreamListener

NS_IMETHODIMP
nsSAXXMLReader::OnDataAvailable(nsIRequest *aRequest, nsISupports *aContext,
                                nsIInputStream *aInputStream, uint64_t offset,
                                uint32_t count)
{
  NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
  NS_ENSURE_STATE(mListener);
  return mListener->OnDataAvailable(aRequest, aContext, aInputStream, offset,
                                    count);
}

nsresult
nsSAXXMLReader::InitParser(nsIRequestObserver *aObserver, nsIChannel *aChannel)
{
  nsresult rv;

  // setup the parser
  nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv);
  NS_ENSURE_SUCCESS(rv, rv);

  parser->SetContentSink(this);

  int32_t charsetSource = kCharsetFromDocTypeDefault;
  nsAutoCString charset(NS_LITERAL_CSTRING("UTF-8"));
  TryChannelCharset(aChannel, charsetSource, charset);
  parser->SetDocumentCharset(charset, charsetSource);

  rv = parser->Parse(mBaseURI, aObserver);
  NS_ENSURE_SUCCESS(rv, rv);

  mListener = do_QueryInterface(parser, &rv);

  return rv;
}

// from nsDocument.cpp
bool
nsSAXXMLReader::TryChannelCharset(nsIChannel *aChannel,
                                  int32_t& aCharsetSource,
                                  nsACString& aCharset)
{
  if (aCharsetSource >= kCharsetFromChannel)
    return true;
  
  if (aChannel) {
    nsAutoCString charsetVal;
    nsresult rv = aChannel->GetContentCharset(charsetVal);
    if (NS_SUCCEEDED(rv)) {
      nsAutoCString preferred;
      if (!EncodingUtils::FindEncodingForLabel(charsetVal, preferred))
        return false;

      aCharset = preferred;
      aCharsetSource = kCharsetFromChannel;
      return true;
    }
  }

  return false;
}

nsresult
nsSAXXMLReader::EnsureBaseURI()
{
  if (mBaseURI) 
    return NS_OK;

  return NS_NewURI(getter_AddRefs(mBaseURI), "about:blank");
}

nsresult
nsSAXXMLReader::SplitExpatName(const char16_t *aExpatName,
                               nsString &aURI,
                               nsString &aLocalName,
                               nsString &aQName)
{
  /**
   * Adapted from RDFContentSinkImpl
   *
   * Expat can send the following:
   *    localName
   *    namespaceURI<separator>localName
   *    namespaceURI<separator>localName<separator>prefix
   *
   * and we use 0xFFFF for the <separator>.
   *
   */

  NS_ASSERTION(aExpatName, "null passed to handler");
  nsDependentString expatStr(aExpatName);
  int32_t break1, break2 = kNotFound;
  break1 = expatStr.FindChar(char16_t(0xFFFF));

  if (break1 == kNotFound) {
    aLocalName = expatStr; // no namespace
    aURI.Truncate();
    aQName = expatStr;
  } else {
    aURI = StringHead(expatStr, break1);
    break2 = expatStr.FindChar(char16_t(0xFFFF), break1 + 1);
    if (break2 == kNotFound) { // namespace, but no prefix
      aLocalName = Substring(expatStr, break1 + 1);
      aQName = aLocalName;
    } else { // namespace with prefix
      aLocalName = Substring(expatStr, break1 + 1, break2 - break1 - 1);
      aQName = Substring(expatStr, break2 + 1) +
        NS_LITERAL_STRING(":") + aLocalName;
    }
  }

  return NS_OK;
}