Merge cedar into mozilla-central
authorEhsan Akhgari <ehsan@mozilla.com>
Wed, 30 Mar 2011 13:53:35 -0400
changeset 64446 422bbd8245a728ebedbd64433dec253baa6c10d0
parent 64445 cbb7ffa045d365f3b027b95485f27e8a62386666 (current diff)
parent 64429 c2fcc089efaeabd4468d11a7948885e8fcfa0e96 (diff)
child 64448 ad4889b72e448992095fa7acd792aa3e562ff0c4
child 64492 d1dee201929c3002d47e5f7e49e16920985e2103
push idunknown
push userunknown
push dateunknown
milestone2.2a1pre
first release with
nightly linux32
422bbd8245a7 / 4.2a1pre / 20110331030432 / files
nightly linux64
422bbd8245a7 / 4.2a1pre / 20110331030432 / files
nightly mac
422bbd8245a7 / 4.2a1pre / 20110331030432 / files
nightly win32
422bbd8245a7 / 4.2a1pre / 20110331030432 / files
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
releases
nightly linux32
nightly linux64
nightly mac
nightly win32
Merge cedar into mozilla-central
content/base/src/nsContentUtils.cpp
content/canvas/src/nsCanvasRenderingContext2D.cpp
content/html/content/src/nsHTMLCanvasElement.cpp
dom/base/nsGlobalWindow.cpp
intl/uconv/tests/unit/test_bug335531.js
intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp
intl/uconv/ucvlatin/nsUTF32ToUnicode.h
intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp
intl/uconv/ucvlatin/nsUnicodeToUTF32.h
jpeg/cdjpeg.c
jpeg/change.log
jpeg/cjpeg.c
jpeg/ckconfig.c
jpeg/coderules.doc
jpeg/djpeg.c
jpeg/example.c
jpeg/filelist.doc
jpeg/install.doc
jpeg/jconfig-mac-cw.h
jpeg/jconfig.doc
jpeg/jconfig.wat
jpeg/jmemansi.c
jpeg/jmemdos.c
jpeg/jmemdosa.asm
jpeg/jmemname.c
jpeg/jos2fig.h
jpeg/jwinfig.h
jpeg/libjpeg.doc
jpeg/makefile.gen
jpeg/netscape_mods.doc
jpeg/structure.doc
jpeg/usage.doc
jpeg/wizard.doc
js/src/jit-test/tests/basic/bug633890.js
js/src/tests/js1_8/extensions/regress-476871-01.js
layout/base/nsCSSFrameConstructor.cpp
layout/generic/nsFrame.cpp
toolkit/themes/pinstripe/global/tree/sort-asc.gif
toolkit/themes/pinstripe/global/tree/sort-dsc.gif
--- a/README.txt
+++ b/README.txt
@@ -18,8 +18,10 @@ on http://developer.mozilla.org, you can
 mozilla.* Usenet group, or on IRC at irc.mozilla.org. [The Mozilla news groups
 are accessible on Google Groups, or news.mozilla.org with a NNTP reader.]
 
 You can download nightly development builds from the Mozilla FTP server.
 Keep in mind that nightly builds, which are used by Mozilla developers for
 testing, may be buggy. Firefox nightlies, for example, can be found at:
 
     ftp://ftp.mozilla.org/pub/firefox/nightly/latest-trunk/
+            - or -
+    http://nightly.mozilla.org/
--- a/browser/base/content/test/Makefile.in
+++ b/browser/base/content/test/Makefile.in
@@ -200,16 +200,17 @@ endif
                  browser_pluginnotification.js \
                  browser_relatedTabs.js \
                  browser_sanitize-passwordDisabledHosts.js \
                  browser_sanitize-sitepermissions.js \
                  browser_sanitize-timespans.js \
                  browser_clearplugindata.js \
                  browser_clearplugindata.html \
                  browser_clearplugindata_noage.html \
+                 browser_popupUI.js \
                  browser_sanitizeDialog.js \
                  browser_scope.js \
                  browser_selectTabAtIndex.js \
                  browser_tab_dragdrop.js \
                  browser_tab_dragdrop2.js \
                  browser_tab_dragdrop2_frame1.xul \
                  browser_tabfocus.js \
                  browser_tabs_isActive.js \
@@ -244,19 +245,16 @@ endif
                  app_subframe_bug575561.html \
                  browser_contentAreaClick.js \
                  browser_addon_bar_close_button.js \
                  browser_addon_bar_shortcut.js \
                  browser_addon_bar_aomlistener.js \
                  test_bug628179.html \
                  $(NULL)
 
-# compartment-disabled
-#                 browser_popupUI.js \
-
 ifneq (cocoa,$(MOZ_WIDGET_TOOLKIT))
 _BROWSER_FILES += \
 		browser_bug462289.js \
 		$(NULL)
 else
 _BROWSER_FILES += \
 		browser_bug565667.js \
 		browser_customize.js \
--- a/browser/base/content/test/browser_popupUI.js
+++ b/browser/base/content/test/browser_popupUI.js
@@ -16,17 +16,17 @@ function test() {
     "data:text/html,<html><script>popup=open('about:blank','','width=300,height=200')</script>";
 }
 
 function findPopup() {
   var enumerator = Services.wm.getEnumerator("navigator:browser");
 
   while (enumerator.hasMoreElements()) {
     let win = enumerator.getNext();
-    if (win.content == content.wrappedJSObject.popup) {
+    if (win.content.wrappedJSObject == content.wrappedJSObject.popup) {
       testPopupUI(win);
       return;
     }
   }
 
   throw "couldn't find the popup";
 }
 
--- a/browser/components/preferences/advanced.xul
+++ b/browser/components/preferences/advanced.xul
@@ -122,19 +122,16 @@
                   name="security.disable_button.openCertManager"
                   type="bool"/>
       <preference id="security.OCSP.disable_button.managecrl"
                   name="security.OCSP.disable_button.managecrl"
                   type="bool"/>
       <preference id="security.disable_button.openDeviceManager"
                   name="security.disable_button.openDeviceManager"
                   type="bool"/>
-      <preference id="privacy.donottrackheader.enabled"
-                  name="privacy.donottrackheader.enabled"
-                  type="bool"/>
     </preferences>
     
 #ifdef HAVE_SHELL_SERVICE
     <stringbundle id="bundleShell" src="chrome://browser/locale/shellservice.properties"/>
     <stringbundle id="bundleBrand" src="chrome://branding/locale/brand.properties"/>
 #endif
 
     <script type="application/javascript" src="chrome://browser/content/preferences/advanced.js"/>
@@ -189,20 +186,16 @@
                       accesskey="&allowHWAccel.accesskey;"
                       preference="layers.acceleration.disabled"/>
             <checkbox id="checkSpelling"
                       label="&checkSpelling.label;"
                       accesskey="&checkSpelling.accesskey;"
                       onsyncfrompreference="return gAdvancedPane.readCheckSpelling();"
                       onsynctopreference="return gAdvancedPane.writeCheckSpelling();"
                       preference="layout.spellcheckDefault"/>
-            <checkbox id="privacyDoNotTrackPrefs"
-                      label="&doNotTrack.label;"
-                      accesskey="&doNotTrack.accesskey;"
-                      preference="privacy.donottrackheader.enabled"/>
           </groupbox>
 
 #ifdef HAVE_SHELL_SERVICE
           <!-- System Defaults -->
           <groupbox id="systemDefaultsGroup" orient="vertical">
             <caption label="&systemDefaults.label;"/>
 
             <hbox id="checkDefaultBox" align="center" flex="1">      
--- a/browser/components/preferences/privacy.xul
+++ b/browser/components/preferences/privacy.xul
@@ -52,16 +52,21 @@
          xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul"
          xmlns:html="http://www.w3.org/1999/xhtml">
 
   <prefpane id="panePrivacy"
             onpaneload="gPrivacyPane.init();"
             helpTopic="prefs-privacy">
 
     <preferences id="privacyPreferences">
+  
+      <!-- Tracking -->
+      <preference id="privacy.donottrackheader.enabled"
+                  name="privacy.donottrackheader.enabled"
+                  type="bool"/>
 
       <!-- XXX button prefs -->
       <preference id="pref.privacy.disable_button.cookie_exceptions"
                   name="pref.privacy.disable_button.cookie_exceptions"
                   type="bool"/>
       <preference id="pref.privacy.disable_button.view_cookies"
                   name="pref.privacy.disable_button.view_cookies"
                   type="bool"/>
@@ -111,16 +116,26 @@
                   type="bool"/>
 
     </preferences>
     
     <stringbundle id="bundlePreferences" src="chrome://browser/locale/preferences/preferences.properties"/>
     
     <script type="application/javascript" src="chrome://browser/content/preferences/privacy.js"/>
 
+    <!-- Tracking -->
+    <groupbox id="trackingGroup">
+      <caption label="&tracking.label;"/>
+
+      <checkbox id="privacyDoNotTrackPrefs"
+                label="&doNotTrack.label;"
+                accesskey="&doNotTrack.accesskey;"
+                preference="privacy.donottrackheader.enabled"/>
+    </groupbox>
+
     <!-- History -->
     <groupbox id="historyGroup">
       <caption label="&history.label;"/>
 
       <hbox align="center">
         <label id="historyModeLabel"
                control="historyMode"
                accesskey="&historyHeader.pre.accesskey;">&historyHeader.pre.label;</label>
--- a/browser/locales/en-US/chrome/browser/preferences/advanced.dtd
+++ b/browser/locales/en-US/chrome/browser/preferences/advanced.dtd
@@ -16,18 +16,16 @@
 <!ENTITY useAutoScroll.label             "Use autoscrolling">
 <!ENTITY useAutoScroll.accesskey         "a">
 <!ENTITY useSmoothScrolling.label        "Use smooth scrolling">
 <!ENTITY useSmoothScrolling.accesskey    "m">
 <!ENTITY allowHWAccel.label              "Use hardware acceleration when available">
 <!ENTITY allowHWAccel.accesskey          "h">
 <!ENTITY checkSpelling.label             "Check my spelling as I type">
 <!ENTITY checkSpelling.accesskey         "t">
-<!ENTITY doNotTrack.label                "Tell web sites I do not want to be tracked">
-<!ENTITY doNotTrack.accesskey            "d">
 
 <!ENTITY systemDefaults.label            "System Defaults">
 <!ENTITY alwaysCheckDefault.label        "Always check to see if &brandShortName; is the default browser on startup"><!--XXX-->
 <!ENTITY alwaysCheckDefault.accesskey    "w">
 <!ENTITY checkNow.label                  "Check Now">
 <!ENTITY checkNow.accesskey              "N">
 <!ENTITY submitCrashes.label             "Submit crash reports">
 <!ENTITY submitCrashes.accesskey         "S">
--- a/browser/locales/en-US/chrome/browser/preferences/privacy.dtd
+++ b/browser/locales/en-US/chrome/browser/preferences/privacy.dtd
@@ -1,8 +1,13 @@
+<!ENTITY tracking.label                 "Tracking">
+
+<!ENTITY doNotTrack.label               "Tell web sites I do not want to be tracked">
+<!ENTITY doNotTrack.accesskey           "d">
+
 <!ENTITY  history.label                 "History">
 
 <!ENTITY  locationBar.label             "Location Bar">
 
 <!ENTITY  locbar.pre.label              "When using the location bar, suggest:">
 <!ENTITY  locbar.pre.accessKey          "u">
 <!ENTITY  locbar.post.label             "">
 <!ENTITY  locbar.both.label             "History and Bookmarks">
@@ -25,17 +30,16 @@
 <!ENTITY  askEachTime.label             "ask me every time">
 
 <!ENTITY  cookieExceptions.label        "Exceptions…">
 <!ENTITY  cookieExceptions.accesskey    "E">
 
 <!ENTITY  showCookies.label             "Show Cookies…">
 <!ENTITY  showCookies.accesskey         "S">
 
-
 <!ENTITY  historyHeader.pre.label          "&brandShortName; will:">
 <!ENTITY  historyHeader.pre.accesskey      "w">
 <!ENTITY  historyHeader.remember.label     "Remember history">
 <!ENTITY  historyHeader.dontremember.label "Never remember history">
 <!ENTITY  historyHeader.custom.label       "Use custom settings for history">
 <!ENTITY  historyHeader.post.label         "">
 
 <!ENTITY  rememberDescription.label      "&brandShortName; will remember your browsing, download, form and search history, and keep cookies from Web sites you visit.">
--- a/browser/themes/winstripe/browser/browser.css
+++ b/browser/themes/winstripe/browser/browser.css
@@ -51,17 +51,17 @@
 %include ../../browserShared.inc
 %filter substitution
 %define toolbarHighlight rgba(255,255,255,.5)
 %define selectedTabHighlight rgba(255,255,255,.7)
 %define toolbarShadowColor rgba(10%,10%,10%,.4)
 %define toolbarShadowOnTab -moz-linear-gradient(bottom, rgba(10%,10%,10%,.4) 1px, transparent 1px)
 %define bgTabTexture -moz-linear-gradient(transparent, hsla(0,0%,45%,.1) 1px, hsla(0,0%,32%,.2) 80%, hsla(0,0%,0%,.2))
 %define bgTabTextureHover -moz-linear-gradient(hsla(0,0%,100%,.3) 1px, hsla(0,0%,75%,.2) 80%, hsla(0,0%,60%,.2))
-%define navbarTextboxCustomBorder border-color: rgba(0,0,0,.25) rgba(0,0,0,.32) rgba(0,0,0,.37);
+%define navbarTextboxCustomBorder border-color: rgba(0,0,0,.32);
 
 #menubar-items {
   -moz-box-orient: vertical; /* for flex hack */
 }
 
 #main-menubar {
   -moz-box-flex: 1; /* make menu items expand to fill toolbar height */
 }
@@ -600,22 +600,21 @@ menuitem.bookmark-item {
 #nav-bar .toolbarbutton-1 > .toolbarbutton-menubutton-dropmarker,
 #nav-bar .toolbarbutton-1 {
   -moz-appearance: none;
   padding: 1px 5px;
   background: rgba(151,152,153,.05)
               -moz-linear-gradient(rgba(251,252,253,.95), rgba(246,247,248,.47) 49%, 
                                    rgba(231,232,233,.45) 51%, rgba(225,226,229,.3));
   background-clip: padding-box;
-  border-radius: 4.5px;
+  border-radius: 3.5px;
   border: 1px solid;
   border-color: rgba(0,0,0,.12) rgba(0,0,0,.19) rgba(0,0,0,.38);
   box-shadow: 0 0 0 1px rgba(255,255,255,.3) inset,
-              0 0 0 2px rgba(255,255,255,.1) inset,
-              0 1px 0 rgba(0,0,0,.15);
+              0 0 0 2px rgba(255,255,255,.1) inset;
   color: black;
   text-shadow: 0 0 2px white;
 }
 
 #nav-bar .toolbarbutton-1 > .toolbarbutton-menubutton-dropmarker,
 #navigator-toolbox[iconsize="small"][mode="icons"] > #nav-bar .toolbarbutton-1 > .toolbarbutton-menubutton-button,
 #navigator-toolbox[iconsize="small"][mode="icons"] > #nav-bar .toolbarbutton-1 {
   padding-left: 3px;
@@ -666,17 +665,16 @@ menuitem.bookmark-item {
 
 #nav-bar .toolbarbutton-1 > .toolbarbutton-menubutton-button:not([disabled="true"]):not(:active):hover,
 #nav-bar .toolbarbutton-1:not([open="true"]):not(:active):hover > .toolbarbutton-menubutton-dropmarker:not([disabled="true"]),
 #nav-bar .toolbarbutton-1:not([type="menu-button"]):not([disabled="true"]):not([checked="true"]):not([open="true"]):not(:active):hover {
   background-color: hsla(190,60%,70%,.5);
   border-color: hsla(190,50%,65%,.8) hsla(190,50%,50%,.8) hsla(190,50%,40%,.8);
   box-shadow: 0 0 0 1px rgba(255,255,255,.3) inset,
               0 0 0 1.5px rgba(255,255,255,.1) inset,
-              0 1px 0 rgba(0,0,0,.1),
               0 0 3.5px hsl(190,90%,80%);
   -moz-transition: background-color .4s ease-in,
                    border-color .3s ease-in,
                    box-shadow .3s ease-in;
 }
 
 #nav-bar .toolbarbutton-1 > .toolbarbutton-menubutton-button:not([disabled="true"]):hover:active,
 #nav-bar .toolbarbutton-1:hover:active > .toolbarbutton-menubutton-dropmarker:not([disabled="true"]),
@@ -796,27 +794,25 @@ toolbar[mode="full"] .toolbarbutton-1 > 
   margin-bottom: -2px;
   border: none;
   background-image: -moz-linear-gradient(rgba(251,252,253,.97), rgba(246,247,248,.5) 49%, 
                                          rgba(231,232,233,.45) 51%, rgba(225,226,229,.2));
   box-shadow: 0 0 0 1px rgba(255,255,255,.3) inset,
               0 0 0 2px rgba(255,255,255,.1) inset,
               0 0 0 1px rgba(0,0,0,.15),
               0 1px 0 rgba(0,0,0,.4),
-              0 1px 1px rgba(0,0,0,.3),
-              1px 2px 1px rgba(0,0,0,.2);
+              0 1px 1px rgba(0,0,0,.3);
 }
 
 #navigator-toolbox[iconsize="large"][mode="icons"] > #nav-bar #back-button:not([disabled="true"]):not([open="true"]):not(:active):hover {
   box-shadow: 0 0 0 1px rgba(255,255,255,.3) inset,
               0 0 0 2px rgba(255,255,255,.1) inset,
               0 0 0 1px hsla(190,50%,40%,.3),
               0 1px 0 rgba(0,0,0,.4),
               0 1px 1px rgba(0,0,0,.3),
-              1px 2px 1px rgba(0,0,0,.2),
               0 0 5px 1px hsl(190,90%,80%);
 }
 
 #navigator-toolbox[iconsize="large"][mode="icons"] > #nav-bar #back-button:not([disabled="true"]):hover:active,
 #navigator-toolbox[iconsize="large"][mode="icons"] > #nav-bar #back-button[open="true"] {
   box-shadow: 0 0 6.5px rgba(0,0,0,.4) inset,
               0 0 2px rgba(0,0,0,.4) inset,
               0 0 0 1px rgba(0,0,0,.65),
@@ -1097,19 +1093,17 @@ toolbar[mode="full"] .toolbarbutton-1 > 
 
 #urlbar,
 .searchbar-textbox {
   -moz-appearance: none;
   margin: 1px 3px;
   padding: 2px;
   background-clip: padding-box;
   border: 1px solid ThreeDDarkShadow;
-  border-radius: 4px;
-  box-shadow: 0 1px 0 rgba(0,0,0,.1) inset,
-              0 1px 0 rgba(255,255,255,.4);
+  border-radius: 3.5px;
 }
 
 @media all and (-moz-windows-default-theme) {
   #urlbar,
   .searchbar-textbox {
     @navbarTextboxCustomBorder@
   }
 }
--- a/config/autoconf.mk.in
+++ b/config/autoconf.mk.in
@@ -163,16 +163,20 @@ MOZ_TREMOR = @MOZ_TREMOR@
 MOZ_WEBM = @MOZ_WEBM@
 VPX_AS = @VPX_AS@
 VPX_ASFLAGS = @VPX_ASFLAGS@
 VPX_DASH_C_FLAG = @VPX_DASH_C_FLAG@
 VPX_AS_CONVERSION = @VPX_AS_CONVERSION@
 VPX_ASM_SUFFIX = @VPX_ASM_SUFFIX@
 VPX_X86_ASM = @VPX_X86_ASM@
 VPX_ARM_ASM = @VPX_ARM_ASM@
+LIBJPEG_TURBO_AS = @LIBJPEG_TURBO_AS@
+LIBJPEG_TURBO_ASFLAGS = @LIBJPEG_TURBO_ASFLAGS@
+LIBJPEG_TURBO_X86_ASM = @LIBJPEG_TURBO_X86_ASM@
+LIBJPEG_TURBO_X64_ASM = @LIBJPEG_TURBO_X64_ASM@
 NS_PRINTING = @NS_PRINTING@
 MOZ_PDF_PRINTING = @MOZ_PDF_PRINTING@
 MOZ_CRASHREPORTER = @MOZ_CRASHREPORTER@
 MOZ_HELP_VIEWER = @MOZ_HELP_VIEWER@
 MOC= @MOC@
 MOZ_NSS_PATCH = @MOZ_NSS_PATCH@
 MOZ_WEBGL = @MOZ_WEBGL@
 MOZ_ANGLE = @MOZ_ANGLE@
new file mode 100755
--- /dev/null
+++ b/config/find_OOM_errors.py
@@ -0,0 +1,335 @@
+#!/usr/bin/env python
+
+usage = """%prog: A test for OOM conditions in the shell.
+
+%prog finds segfaults and other errors caused by incorrect handling of
+allocation during OOM (out-of-memory) conditions.
+"""
+
+help = """Check for regressions only. This runs a set of files with a known
+number of OOM errors (specified by REGRESSION_COUNT), and exits with a non-zero
+result if more or less errors are found. See js/src/Makefile.in for invocation.
+"""
+
+
+import hashlib
+import re
+import shlex
+import subprocess
+import sys
+import threading
+import time
+
+from optparse import OptionParser
+
+#####################################################################
+# Utility functions
+#####################################################################
+def run(args, stdin=None):
+  class ThreadWorker(threading.Thread):
+    def __init__(self, pipe):
+      super(ThreadWorker, self).__init__()
+      self.all = ""
+      self.pipe = pipe
+      self.setDaemon(True)
+
+    def run(self):
+      while True:
+        line = self.pipe.readline()
+        if line == '': break
+        else:
+          self.all += line
+
+  try:
+    if type(args) == str:
+      args = shlex.split(args)
+
+    args = [str(a) for a in args] # convert to strs
+
+    stdin_pipe = subprocess.PIPE if stdin else None
+    proc = subprocess.Popen(args, stdin=stdin_pipe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if stdin_pipe:
+      proc.stdin.write(stdin)
+      proc.stdin.close()
+
+    stdout_worker = ThreadWorker(proc.stdout)
+    stderr_worker = ThreadWorker(proc.stderr)
+    stdout_worker.start()
+    stderr_worker.start()
+
+    proc.wait()
+    stdout_worker.join()
+    stderr_worker.join()
+
+  except KeyboardInterrupt, e:
+    sys.exit(-1)
+
+  stdout, stderr = stdout_worker.all, stderr_worker.all
+  result = (stdout, stderr, proc.returncode)
+  return result
+
+def get_js_files():
+  (out, err, exit) = run('find ../jit-test/tests -name "*.js"')
+  if (err, exit) == ("", 0):
+    sys.exit("Wrong directory, run from an objdir")
+  return out.split()
+
+
+
+#####################################################################
+# Blacklisting
+#####################################################################
+def in_blacklist(sig):
+  return sig in blacklist
+
+def add_to_blacklist(sig):
+  blacklist[sig] = blacklist.get(sig, 0)
+  blacklist[sig] += 1
+
+# How often is a particular lines important for this.
+def count_lines():
+  """Keep track of the amount of times individual lines occur, in order to
+     prioritize the errors which occur most frequently."""
+  counts = {}
+  for string,count in blacklist.items():
+    for line in string.split("\n"):
+      counts[line] = counts.get(line, 0) + count
+
+  lines = []
+  for k,v in counts.items():
+    lines.append("%6d: %s" % (v,k))
+
+  lines.sort()
+
+  countlog = file("../OOM_count_log", "w")
+  countlog.write("\n".join(lines))
+  countlog.flush()
+  countlog.close()
+
+
+#####################################################################
+# Output cleaning
+#####################################################################
+def clean_voutput(err):
+  # Skip what we can't reproduce
+  err = re.sub(r"^--\d+-- run: /usr/bin/dsymutil \"shell/js\"$", "", err, flags=re.MULTILINE)
+  err = re.sub(r"^==\d+==", "", err, flags=re.MULTILINE)
+  err = re.sub(r"^\*\*\d+\*\*", "", err, flags=re.MULTILINE)
+  err = re.sub(r"^\s+by 0x[0-9A-Fa-f]+: ", "by: ", err, flags=re.MULTILINE)
+  err = re.sub(r"^\s+at 0x[0-9A-Fa-f]+: ", "at: ", err, flags=re.MULTILINE)
+  err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is not stack'd)", r"\1\2", err, flags=re.MULTILINE)
+  err = re.sub(r"(^\s+Invalid write of size )\d+", r"\1x", err, flags=re.MULTILINE)
+  err = re.sub(r"(^\s+Invalid read of size )\d+", r"\1x", err, flags=re.MULTILINE)
+  err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is )\d+( bytes inside a block of size )[0-9,]+( free'd)", r"\1\2\3\4", err, flags=re.MULTILINE)
+
+  # Skip the repeating bit due to the segfault
+  lines = []
+  for l in err.split('\n'):
+    if l == " Process terminating with default action of signal 11 (SIGSEGV)":
+      break
+    lines.append(l)
+  err = '\n'.join(lines)
+
+  return err
+
+def remove_failed_allocation_backtraces(err):
+  lines = []
+
+  add = True
+  for l in err.split('\n'):
+
+    # Set start and end conditions for including text
+    if l == " The site of the failed allocation is:":
+      add = False
+    elif l[:2] not in ['by: ', 'at:']:
+      add = True
+
+    if add:
+      lines.append(l)
+
+
+  err = '\n'.join(lines)
+
+  return err
+
+
+def clean_output(err):
+  err = re.sub(r"^js\(\d+,0x[0-9a-f]+\) malloc: \*\*\* error for object 0x[0-9a-f]+: pointer being freed was not allocated\n\*\*\* set a breakppoint in malloc_error_break to debug\n$", "pointer being freed was not allocated", err, flags=re.MULTILINE)
+
+  return err
+
+
+#####################################################################
+# Consts, etc
+#####################################################################
+
+command_template = 'shell/js' \
+                 + ' -m -j -p' \
+                 + ' -e "const platform=\'darwin\'; const libdir=\'../jit-test/lib/\';"' \
+                 + ' -f ../jit-test/lib/prolog.js' \
+                 + ' -f %s'
+
+
+# Blacklists are things we don't want to see in our logs again (though we do
+# want to count them when they happen). Whitelists we do want to see in our
+# logs again, principally because the information we have isn't enough.
+
+blacklist = {}
+add_to_blacklist(r"('', '', 1)") # 1 means OOM if the shell hasn't launched yet.
+add_to_blacklist(r"('', 'out of memory\n', 1)")
+
+whitelist = set()
+whitelist.add(r"('', 'out of memory\n', -11)") # -11 means OOM
+whitelist.add(r"('', 'out of memory\nout of memory\n', -11)")
+
+
+
+#####################################################################
+# Program
+#####################################################################
+
+# Options
+parser = OptionParser(usage=usage)
+parser.add_option("-r", "--regression", action="store", metavar="REGRESSION_COUNT", help=help,
+                  type="int", dest="regression", default=0) # TODO: support a value of zero, eventually
+                  
+(OPTIONS, args) = parser.parse_args()
+
+
+if OPTIONS.regression:
+  # TODO: This should be expanded as we get a better hang of the OOM problems.
+  # For now, we'll just check that the number of OOMs in one short file does not
+  # increase.
+  files = ["../jit-test/tests/arguments/args-createontrace.js"]
+else:
+  files = get_js_files()
+
+  # Use a command-line arg to reduce the set of files
+  if len (args):
+    files = [f for f in files if f.find(args[0]) != -1]
+
+
+if OPTIONS.regression:
+  # Don't use a logfile, this is automated for tinderbox.
+  log = file("../OOM_log", "w")
+
+
+num_failures = 0
+for f in files:
+
+  # Run it once to establish boundaries
+  command = (command_template + ' -O') % (f)
+  out, err, exit = run(command)
+  max = re.match(".*OOM max count: (\d+).*", out, flags=re.DOTALL).groups()[0]
+  max = int(max)
+  
+  # OOMs don't recover well for the first 20 allocations or so.
+  # TODO: revisit this.
+  for i in range(20, max): 
+
+    if OPTIONS.regression == None:
+      print "Testing allocation %d/%d in %s" % (i,max,f)
+
+    command = (command_template + ' -A %d') % (f, i)
+    out, err, exit = run(command)
+
+    # Success (5 is SM's exit code for controlled errors)
+    if exit == 5 and err.find("out of memory") != -1:
+      continue
+
+    # Failure
+    else:
+
+      if OPTIONS.regression:
+        # Just count them
+        num_failures += 1
+        continue
+
+      #########################################################################
+      # The regression tests ends above. The rest of this is for running  the
+      # script manually.
+      #########################################################################
+
+      problem = str((out, err, exit))
+      if in_blacklist(problem) and problem not in whitelist:
+        add_to_blacklist(problem)
+        continue
+
+      add_to_blacklist(problem)
+
+
+      # Get valgrind output for a good stack trace
+      vcommand = "valgrind --dsymutil=yes -q --log-file=OOM_valgrind_log_file " + command
+      run(vcommand)
+      vout = file("OOM_valgrind_log_file").read()
+      vout = clean_voutput(vout)
+      sans_alloc_sites = remove_failed_allocation_backtraces(vout)
+
+      # Don't print duplicate information
+      if in_blacklist(sans_alloc_sites):
+        add_to_blacklist(sans_alloc_sites)
+        continue
+
+      add_to_blacklist(sans_alloc_sites)
+
+      log.write ("\n")
+      log.write ("\n")
+      log.write ("=========================================================================")
+      log.write ("\n")
+      log.write ("An allocation failure at\n\tallocation %d/%d in %s\n\tcauses problems (detected using bug 624094)" % (i, max, f))
+      log.write ("\n")
+      log.write ("\n")
+
+      log.write ("Command (from obj directory, using patch from bug 624094):\n  " + command)
+      log.write ("\n")
+      log.write ("\n")
+      log.write ("stdout, stderr, exitcode:\n  " + problem)
+      log.write ("\n")
+      log.write ("\n")
+
+      double_free = err.find("pointer being freed was not allocated") != -1
+      oom_detected = err.find("out of memory") != -1
+      multiple_oom_detected = err.find("out of memory\nout of memory") != -1
+      segfault_detected = exit == -11
+
+      log.write ("Diagnosis: ")
+      log.write ("\n")
+      if multiple_oom_detected:
+        log.write ("  - Multiple OOMs reported")
+        log.write ("\n")
+      if segfault_detected:
+        log.write ("  - segfault")
+        log.write ("\n")
+      if not oom_detected:
+        log.write ("  - No OOM checking")
+        log.write ("\n")
+      if double_free:
+        log.write ("  - Double free")
+        log.write ("\n")
+
+      log.write ("\n")
+
+      log.write ("Valgrind info:\n" + vout)
+      log.write ("\n")
+      log.write ("\n")
+      log.flush()
+
+  if not OPTIONS.regression == None:
+    count_lines()
+
+
+# Do the actual regression check
+if OPTIONS.regression:
+  expected_num_failures = OPTIONS.regression
+
+  if num_failures != expected_num_failures:
+
+    print "TEST-UNEXPECTED-FAIL |",
+    if num_failures > expected_num_failures:
+      print "More out-of-memory errors were found (%s) than expected (%d). This probably means an allocation site has been added without a NULL-check. If this is unavoidable, you can account for it by updating Makefile.in." % (num_failures, expected_num_failures),
+    else:
+      print "Congratulations, you have removed %d out-of-memory error(s) (%d remain)! Please account for it by updating Makefile.in." % (expected_num_failures - num_failures, num_failures),
+    sys.exit(-1)
+  else:
+    print 'TEST-PASS | find_OOM_errors | Found the expected number of OOM errors (%d)' % (expected_num_failures)
+
--- a/configure.in
+++ b/configure.in
@@ -4966,16 +4966,21 @@ MOZ_MEDIA=
 MOZ_WEBM=1
 VPX_AS=
 VPX_ASFLAGS=
 VPX_AS_DASH_C_FLAG=
 VPX_AS_CONVERSION=
 VPX_ASM_SUFFIX=
 VPX_X86_ASM=
 VPX_ARM_ASM=
+MOZ_LIBJPEG_TURBO=1
+LIBJPEG_TURBO_AS=
+LIBJPEG_TURBO_ASFLAGS=
+LIBJPEG_TURBO_X86_ASM=
+LIBJPEG_TURBO_X64_ASM=
 MOZ_PANGO=1
 MOZ_PERMISSIONS=1
 MOZ_PLACES=1
 MOZ_PLUGINS=1
 MOZ_PREF_EXTENSIONS=1
 MOZ_PROFILELOCKING=1
 MOZ_PSM=1
 MOZ_RDF=1
@@ -6437,16 +6442,95 @@ MOZ_ARG_WITH_STRING(crashreporter-enable
     MOZ_CRASHREPORTER_ENABLE_PERCENT="$val"])
 
 if test -z "$MOZ_CRASHREPORTER_ENABLE_PERCENT"; then
    MOZ_CRASHREPORTER_ENABLE_PERCENT=100
 fi
 AC_DEFINE_UNQUOTED(MOZ_CRASHREPORTER_ENABLE_PERCENT, $MOZ_CRASHREPORTER_ENABLE_PERCENT)
 
 dnl ========================================================
+dnl = libjpeg-turbo configuration
+dnl ========================================================
+
+MOZ_ARG_DISABLE_BOOL(libjpeg_turbo,
+[ --disable-libjpeg-turbo  Disable optimized jpeg decoding routines],
+    MOZ_LIBJPEG_TURBO=,
+    MOZ_LIBJPEG_TURBO=1)
+
+dnl Detect if we can use yasm to compile libjpeg-turbo's optimized assembly
+dnl files.
+
+if test -n "$MOZ_LIBJPEG_TURBO"; then
+
+  dnl Do we support libjpeg-turbo on this platform?
+  case "$OS_ARCH:$OS_TEST" in
+  Linux:x86|Linux:i?86)
+    LIBJPEG_TURBO_ASFLAGS="-f elf32 -rnasm -pnasm -DPIC -DELF"
+    LIBJPEG_TURBO_X86_ASM=1
+  ;;
+  Linux:x86_64)
+    LIBJPEG_TURBO_ASFLAGS="-f elf64 -rnasm -pnasm -D__x86_64__ -DPIC -DELF"
+    LIBJPEG_TURBO_X64_ASM=1
+  ;;
+  SunOS:i?86)
+    LIBJPEG_TURBO_ASFLAGS="-f elf32 -rnasm -pnasm -DPIC -DELF"
+    LIBJPEG_TURBO_X86_ASM=1
+  ;;
+  SunOS:x86_64)
+    LIBJPEG_TURBO_ASFLAGS="-f elf64 -rnasm -pnasm -D__x86_64__ -DPIC -DELF"
+    LIBJPEG_TURBO_X64_ASM=1
+  ;;
+  Darwin:i?86)
+    LIBJPEG_TURBO_ASFLAGS="-f macho32 -rnasm -pnasm -DPIC -DMACHO"
+    LIBJPEG_TURBO_X86_ASM=1
+  ;;
+  Darwin:x86_64)
+    LIBJPEG_TURBO_ASFLAGS="-f macho64 -rnasm -pnasm -D__x86_64__ -DPIC -DMACHO"
+    LIBJPEG_TURBO_X64_ASM=1
+  ;;
+  WINNT:x86|WINNT:i?86)
+    LIBJPEG_TURBO_ASFLAGS="-f win32 -rnasm -pnasm -DPIC -DWIN32"
+    LIBJPEG_TURBO_X86_ASM=1
+  ;;
+  WINNT:x86_64)
+    LIBJPEG_TURBO_ASFLAGS="-f win64 -rnasm -pnasm -D__x86_64__ -DPIC -DWIN64"
+    LIBJPEG_TURBO_X64_ASM=1
+  ;;
+  esac
+
+fi
+
+dnl If we're on a system which supports libjpeg-turbo's asm routines and
+dnl --disable-libjpeg-turbo wasn't passed, check for yasm, and error out if it
+dnl doesn't exist or we have too old of a version.
+if test -n "$LIBJPEG_TURBO_X86_ASM" -o -n "$LIBJPEG_TURBO_X64_ASM" ; then
+    AC_MSG_CHECKING([for YASM assembler])
+    AC_CHECK_PROGS(LIBJPEG_TURBO_AS, yasm, "")
+
+    if test -z "$LIBJPEG_TURBO_AS" ; then
+        AC_MSG_ERROR([yasm is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you do not appear to have yasm installed.  Either install it or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
+    fi
+
+    dnl Check for yasm 1.1 or greater.
+    if test "$_YASM_MAJOR_VERSION" -lt "1" -o \( "$_YASM_MAJOR_VERSION" -eq "1" -a "$_YASM_MINOR_VERSION" -lt "1" \) ; then
+        AC_MSG_ERROR([yasm 1.1 or greater is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you appear to have version $_YASM_MAJOR_VERSION.$_YASM_MINOR_VERSION.  Upgrade to the newest version or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
+    fi
+fi
+
+if test -n "$LIBJPEG_TURBO_X86_ASM"; then
+    AC_DEFINE(LIBJPEG_TURBO_X86_ASM)
+elif test -n "$LIBJPEG_TURBO_X64_ASM"; then
+    AC_DEFINE(LIBJPEG_TURBO_X64_ASM)
+elif test -n "$MOZ_LIBJPEG_TURBO"; then
+    dnl Warn if we're not building the optimized routines, even though the user
+    dnl didn't specify --disable-libjpeg-turbo.
+    AC_MSG_WARN([No assembler or assembly support for libjpeg-turbo.  Using unoptimized C routines.])
+fi
+
+dnl ========================================================
 dnl = Enable compilation of specific extension modules
 dnl ========================================================
 
 MOZ_ARG_ENABLE_STRING(extensions,
 [  --enable-extensions     Enable extensions],
 [ for option in `echo $enableval | sed 's/,/ /g'`; do
     if test "$option" = "yes" -o "$option" = "all"; then
         AC_MSG_ERROR([--enable-extensions=$option is no longer supported.])
@@ -9204,16 +9288,20 @@ AC_SUBST(MOZ_OGG)
 AC_SUBST(MOZ_ALSA_LIBS)
 AC_SUBST(VPX_AS)
 AC_SUBST(VPX_ASFLAGS)
 AC_SUBST(VPX_DASH_C_FLAG)
 AC_SUBST(VPX_AS_CONVERSION)
 AC_SUBST(VPX_ASM_SUFFIX)
 AC_SUBST(VPX_X86_ASM)
 AC_SUBST(VPX_ARM_ASM)
+AC_SUBST(LIBJPEG_TURBO_AS)
+AC_SUBST(LIBJPEG_TURBO_ASFLAGS)
+AC_SUBST(LIBJPEG_TURBO_X86_ASM)
+AC_SUBST(LIBJPEG_TURBO_X64_ASM)
 
 if test "$USING_HCC"; then
    CC='${topsrcdir}/build/hcc'
    CC="$CC '$_OLDCC'"
    CXX='${topsrcdir}/build/hcpp'
    CXX="$CXX '$_OLDCXX'"
    AC_SUBST(CC)
    AC_SUBST(CXX)
--- a/content/base/public/nsContentUtils.h
+++ b/content/base/public/nsContentUtils.h
@@ -540,17 +540,17 @@ public:
    * @param aCharset the name of the charset; if empty, we assume UTF8
    */
   static nsresult ConvertStringFromCharset(const nsACString& aCharset,
                                            const nsACString& aInput,
                                            nsAString& aOutput);
 
   /**
    * Determine whether a buffer begins with a BOM for UTF-8, UTF-16LE,
-   * UTF-16BE, UTF-32LE, UTF-32BE.
+   * UTF-16BE
    *
    * @param aBuffer the buffer to check
    * @param aLength the length of the buffer
    * @param aCharset empty if not found
    * @return boolean indicating whether a BOM was detected.
    */
   static PRBool CheckForBOM(const unsigned char* aBuffer, PRUint32 aLength,
                             nsACString& aCharset, PRBool *bigEndian = nsnull);
--- a/content/base/src/nsContentUtils.cpp
+++ b/content/base/src/nsContentUtils.cpp
@@ -3529,34 +3529,16 @@ nsContentUtils::CheckForBOM(const unsign
   PRBool found = PR_TRUE;
   aCharset.Truncate();
   if (aLength >= 3 &&
       aBuffer[0] == 0xEF &&
       aBuffer[1] == 0xBB &&
       aBuffer[2] == 0xBF) {
     aCharset = "UTF-8";
   }
-  else if (aLength >= 4 &&
-           aBuffer[0] == 0x00 &&
-           aBuffer[1] == 0x00 &&
-           aBuffer[2] == 0xFE &&
-           aBuffer[3] == 0xFF) {
-    aCharset = "UTF-32";
-    if (bigEndian)
-      *bigEndian = PR_TRUE;
-  }
-  else if (aLength >= 4 &&
-           aBuffer[0] == 0xFF &&
-           aBuffer[1] == 0xFE &&
-           aBuffer[2] == 0x00 &&
-           aBuffer[3] == 0x00) {
-    aCharset = "UTF-32";
-    if (bigEndian)
-      *bigEndian = PR_FALSE;
-  }
   else if (aLength >= 2 &&
            aBuffer[0] == 0xFE && aBuffer[1] == 0xFF) {
     aCharset = "UTF-16";
     if (bigEndian)
       *bigEndian = PR_TRUE;
   }
   else if (aLength >= 2 &&
            aBuffer[0] == 0xFF && aBuffer[1] == 0xFE) {
--- a/content/base/src/nsDOMFileReader.cpp
+++ b/content/base/src/nsDOMFileReader.cpp
@@ -708,33 +708,21 @@ nsDOMFileReader::GuessCharset(const char
     NS_ENSURE_SUCCESS(rv, rv);
 
     rv = detector->Done();
     NS_ENSURE_SUCCESS(rv, rv);
 
     aCharset = mCharset;
   } else {
     // no charset detector available, check the BOM
-    unsigned char sniffBuf[4];
+    unsigned char sniffBuf[3];
     PRUint32 numRead = (aDataLen >= sizeof(sniffBuf) ? sizeof(sniffBuf) : aDataLen);
     memcpy(sniffBuf, aFileData, numRead);
 
-    if (numRead >= 4 &&
-        sniffBuf[0] == 0x00 &&
-        sniffBuf[1] == 0x00 &&
-        sniffBuf[2] == 0xfe &&
-        sniffBuf[3] == 0xff) {
-      aCharset = "UTF-32BE";
-    } else if (numRead >= 4 &&
-               sniffBuf[0] == 0xff &&
-               sniffBuf[1] == 0xfe &&
-               sniffBuf[2] == 0x00 &&
-               sniffBuf[3] == 0x00) {
-      aCharset = "UTF-32LE";
-    } else if (numRead >= 2 &&
+    if (numRead >= 2 &&
                sniffBuf[0] == 0xfe &&
                sniffBuf[1] == 0xff) {
       aCharset = "UTF-16BE";
     } else if (numRead >= 2 &&
                sniffBuf[0] == 0xff &&
                sniffBuf[1] == 0xfe) {
       aCharset = "UTF-16LE";
     } else if (numRead >= 3 &&
--- a/content/base/src/nsFrameMessageManager.cpp
+++ b/content/base/src/nsFrameMessageManager.cpp
@@ -622,19 +622,17 @@ nsFrameScriptExecutor::LoadFrameScriptIn
     nsContentUtils::ThreadJSContextStack()->Push(mCx);
     {
       // Need to scope JSAutoRequest to happen after Push but before Pop,
       // at least for now. See bug 584673.
       JSAutoRequest ar(mCx);
       JSObject* global = nsnull;
       mGlobal->GetJSObject(&global);
       if (global) {
-        JS_ExecuteScript(mCx, global,
-                         (JSScript*)JS_GetPrivate(mCx, holder->mObject),
-                         nsnull);
+        JS_ExecuteScript(mCx, global, holder->mObject, nsnull);
       }
     }
     JSContext* unused;
     nsContentUtils::ThreadJSContextStack()->Pop(&unused);
     return;
   }
 
   nsCString url = NS_ConvertUTF16toUTF8(aURL);
@@ -680,41 +678,37 @@ nsFrameScriptExecutor::LoadFrameScriptIn
       if (global) {
         JSPrincipals* jsprin = nsnull;
         mPrincipal->GetJSPrincipals(mCx, &jsprin);
         nsContentUtils::XPConnect()->FlagSystemFilenamePrefix(url.get(), PR_TRUE);
 
         uint32 oldopts = JS_GetOptions(mCx);
         JS_SetOptions(mCx, oldopts | JSOPTION_NO_SCRIPT_RVAL);
 
-        JSScript* script =
+        JSObject* scriptObj =
           JS_CompileUCScriptForPrincipals(mCx, nsnull, jsprin,
                                          (jschar*)dataString.get(),
                                           dataString.Length(),
                                           url.get(), 1);
 
         JS_SetOptions(mCx, oldopts);
 
-        if (script) {
-          JSObject* scriptObj = JS_NewScriptObject(mCx, script);
-          JS_AddObjectRoot(mCx, &scriptObj);
+        if (scriptObj) {
           nsCAutoString scheme;
           uri->GetScheme(scheme);
           // We don't cache data: scripts!
           if (!scheme.EqualsLiteral("data")) {
             nsFrameScriptExecutorJSObjectHolder* holder =
               new nsFrameScriptExecutorJSObjectHolder(scriptObj);
             // Root the object also for caching.
             JS_AddNamedObjectRoot(mCx, &(holder->mObject),
                                   "Cached message manager script");
             sCachedScripts->Put(aURL, holder);
           }
-          JS_ExecuteScript(mCx, global,
-                           (JSScript*)JS_GetPrivate(mCx, scriptObj), nsnull);
-          JS_RemoveObjectRoot(mCx, &scriptObj);
+          JS_ExecuteScript(mCx, global, scriptObj, nsnull);
         }
         //XXX Argh, JSPrincipals are manually refcounted!
         JSPRINCIPALS_DROP(mCx, jsprin);
       }
     } 
     JSContext* unused;
     nsContentUtils::ThreadJSContextStack()->Pop(&unused);
   }
--- a/content/base/src/nsGenericElement.cpp
+++ b/content/base/src/nsGenericElement.cpp
@@ -5478,18 +5478,17 @@ nsGenericElement::GetLinkTarget(nsAStrin
 }
 
 // NOTE: The aPresContext pointer is NOT addrefed.
 // *aSelectorList might be null even if NS_OK is returned; this
 // happens when all the selectors were pseudo-element selectors.
 static nsresult
 ParseSelectorList(nsINode* aNode,
                   const nsAString& aSelectorString,
-                  nsCSSSelectorList** aSelectorList,
-                  nsPresContext** aPresContext)
+                  nsCSSSelectorList** aSelectorList)
 {
   NS_ENSURE_ARG(aNode);
 
   nsIDocument* doc = aNode->GetOwnerDoc();
   NS_ENSURE_STATE(doc);
 
   nsCSSParser parser(doc->CSSLoader());
   NS_ENSURE_TRUE(parser, NS_ERROR_OUT_OF_MEMORY);
@@ -5510,195 +5509,96 @@ ParseSelectorList(nsINode* aNode,
       cur->mNext = nsnull;
       delete cur;
     } else {
       slot = &cur->mNext;
     }
   } while (*slot);
   *aSelectorList = selectorList;
 
-  // It's not strictly necessary to have a prescontext here, but it's
-  // a bit of an optimization for various stuff.
-  *aPresContext = nsnull;
-  nsIPresShell* shell = doc->GetShell();
-  if (shell) {
-    *aPresContext = shell->GetPresContext();
-  }
-
   return NS_OK;
 }
 
-/*
- * Callback to be called as we iterate over the tree and match elements.  If
- * the callbacks returns false, the iteration should be stopped.
- */
-typedef PRBool
-(* ElementMatchedCallback)(nsIContent* aMatchingElement, void* aClosure);
-
-// returning false means stop iteration
-static PRBool
-TryMatchingElementsInSubtree(nsINode* aRoot,
-                             RuleProcessorData* aParentData,
-                             nsPresContext* aPresContext,
-                             nsCSSSelectorList* aSelectorList,
-                             ElementMatchedCallback aCallback,
-                             void* aClosure)
-{
-  /* To improve the performance of '+' and '~' combinators and the :nth-*
-   * selectors, we keep track of the immediately previous sibling data.  That's
-   * cheaper than heap-allocating all the datas and keeping track of them all,
-   * and helps a good bit in the common cases.  We also keep track of the whole
-   * parent data chain, since we have those Around anyway */
-  union { char c[2 * sizeof(RuleProcessorData)]; void *p; } databuf;
-  RuleProcessorData* prevSibling = nsnull;
-  RuleProcessorData* data = reinterpret_cast<RuleProcessorData*>(databuf.c);
-
-  PRBool continueIteration = PR_TRUE;
-  for (nsINode::ChildIterator iter(aRoot); !iter.IsDone(); iter.Next()) {
-    nsIContent* kid = iter;
-    if (!kid->IsElement()) {
-      continue;
-    }
-    /* See whether we match */
-    new (data) RuleProcessorData(aPresContext, kid->AsElement(), nsnull);
-    NS_ASSERTION(!data->mParentData, "Shouldn't happen");
-    NS_ASSERTION(!data->mPreviousSiblingData, "Shouldn't happen");
-    data->mParentData = aParentData;
-    data->mPreviousSiblingData = prevSibling;
-
-    if (nsCSSRuleProcessor::SelectorListMatches(*data, aSelectorList)) {
-      continueIteration = (*aCallback)(kid, aClosure);
-    }
-
-    if (continueIteration) {
-      continueIteration =
-        TryMatchingElementsInSubtree(kid, data, aPresContext, aSelectorList,
-                                     aCallback, aClosure);
-    }
-    
-    /* Clear out the parent and previous sibling data if we set them, so that
-     * ~RuleProcessorData won't try to delete a placement-new'd object. Make
-     * sure this happens before our possible early break.  Note that we can
-     * have null aParentData but non-null data->mParentData if we're scoped to
-     * an element.  However, prevSibling and data->mPreviousSiblingData must
-     * always match.
-     */
-    NS_ASSERTION(!aParentData || data->mParentData == aParentData,
-                 "Unexpected parent");
-    NS_ASSERTION(data->mPreviousSiblingData == prevSibling,
-                 "Unexpected prev sibling");
-    data->mPreviousSiblingData = nsnull;
-    if (prevSibling) {
-      if (aParentData) {
-        prevSibling->mParentData = nsnull;
-      }
-      prevSibling->~RuleProcessorData();
-    } else {
-      /* This is the first time through, so point |prevSibling| to the location
-         we want to have |data| end up pointing to. */
-      prevSibling = data + 1;
-    }
-
-    /* Now swap |prevSibling| and |data|.  Again, before the early break */
-    RuleProcessorData* temp = prevSibling;
-    prevSibling = data;
-    data = temp;
-    if (!continueIteration) {
-      break;
-    }
-  }
-  if (prevSibling) {
-    if (aParentData) {
-      prevSibling->mParentData = nsnull;
-    }
-    /* Make sure to clean this up */
-    prevSibling->~RuleProcessorData();
-  }
-
-  return continueIteration;
-}
-
-static PRBool
-FindFirstMatchingElement(nsIContent* aMatchingElement,
-                         void* aClosure)
-{
-  NS_PRECONDITION(aMatchingElement && aClosure, "How did that happen?");
-  nsIContent** slot = static_cast<nsIContent**>(aClosure);
-  *slot = aMatchingElement;
-  return PR_FALSE;
-}
-
 /* static */
 nsIContent*
 nsGenericElement::doQuerySelector(nsINode* aRoot, const nsAString& aSelector,
                                   nsresult *aResult)
 {
   NS_PRECONDITION(aResult, "Null out param?");
 
   nsAutoPtr<nsCSSSelectorList> selectorList;
-  nsPresContext* presContext;
   *aResult = ParseSelectorList(aRoot, aSelector,
-                               getter_Transfers(selectorList),
-                               &presContext);
+                               getter_Transfers(selectorList));
   NS_ENSURE_SUCCESS(*aResult, nsnull);
 
-  nsIContent* foundElement = nsnull;
-  TryMatchingElementsInSubtree(aRoot, nsnull, presContext, selectorList,
-                               FindFirstMatchingElement, &foundElement);
-
-  return foundElement;
-}
-
-static PRBool
-AppendAllMatchingElements(nsIContent* aMatchingElement,
-                          void* aClosure)
-{
-  NS_PRECONDITION(aMatchingElement && aClosure, "How did that happen?");
-  static_cast<nsBaseContentList*>(aClosure)->AppendElement(aMatchingElement);
-  return PR_TRUE;
+  TreeMatchContext matchingContext(PR_FALSE,
+                                   nsRuleWalker::eRelevantLinkUnvisited,
+                                   aRoot->GetOwnerDoc());
+  for (nsIContent* cur = aRoot->GetFirstChild();
+       cur;
+       cur = cur->GetNextNode(aRoot)) {
+    if (cur->IsElement() &&
+        nsCSSRuleProcessor::SelectorListMatches(cur->AsElement(),
+                                                matchingContext,
+                                                selectorList)) {
+      return cur;
+    }
+  }
+
+  return nsnull;
 }
 
 /* static */
 nsresult
 nsGenericElement::doQuerySelectorAll(nsINode* aRoot,
                                      const nsAString& aSelector,
                                      nsIDOMNodeList **aReturn)
 {
   NS_PRECONDITION(aReturn, "Null out param?");
 
   nsBaseContentList* contentList = new nsBaseContentList();
   NS_ENSURE_TRUE(contentList, NS_ERROR_OUT_OF_MEMORY);
   NS_ADDREF(*aReturn = contentList);
   
   nsAutoPtr<nsCSSSelectorList> selectorList;
-  nsPresContext* presContext;
   nsresult rv = ParseSelectorList(aRoot, aSelector,
-                                  getter_Transfers(selectorList),
-                                  &presContext);
+                                  getter_Transfers(selectorList));
   NS_ENSURE_SUCCESS(rv, rv);
 
-  TryMatchingElementsInSubtree(aRoot, nsnull, presContext, selectorList,
-                               AppendAllMatchingElements, contentList);
+  TreeMatchContext matchingContext(PR_FALSE,
+                                   nsRuleWalker::eRelevantLinkUnvisited,
+                                   aRoot->GetOwnerDoc());
+  for (nsIContent* cur = aRoot->GetFirstChild();
+       cur;
+       cur = cur->GetNextNode(aRoot)) {
+    if (cur->IsElement() &&
+        nsCSSRuleProcessor::SelectorListMatches(cur->AsElement(),
+                                                matchingContext,
+                                                selectorList)) {
+      contentList->AppendElement(cur);
+    }
+  }
   return NS_OK;
 }
 
 
 PRBool
 nsGenericElement::MozMatchesSelector(const nsAString& aSelector, nsresult* aResult)
 {
   nsAutoPtr<nsCSSSelectorList> selectorList;
-  nsPresContext* presContext;
   PRBool matches = PR_FALSE;
 
-  *aResult = ParseSelectorList(this, aSelector, getter_Transfers(selectorList),
-                               &presContext);
+  *aResult = ParseSelectorList(this, aSelector, getter_Transfers(selectorList));
 
   if (NS_SUCCEEDED(*aResult)) {
-    RuleProcessorData data(presContext, this, nsnull);
-    matches = nsCSSRuleProcessor::SelectorListMatches(data, selectorList);
+    TreeMatchContext matchingContext(PR_FALSE,
+                                     nsRuleWalker::eRelevantLinkUnvisited,
+                                     GetOwnerDoc());
+    matches = nsCSSRuleProcessor::SelectorListMatches(this, matchingContext,
+                                                      selectorList);
   }
 
   return matches;
 }
 
 NS_IMETHODIMP
 nsNSElementTearoff::MozMatchesSelector(const nsAString& aSelector, PRBool* aReturn)
 {
--- a/content/base/src/nsTextFragment.cpp
+++ b/content/base/src/nsTextFragment.cpp
@@ -226,17 +226,18 @@ nsTextFragment::SetTo(const PRUnichar* a
 
     return;
   }
 
   const PRUnichar *ucp = aBuffer;
   const PRUnichar *uend = aBuffer + aLength;
 
   // Check if we can use a shared string
-  if (firstChar == ' ' || firstChar == '\n' || firstChar == '\t') {
+  if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
+     (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
     if (firstChar == ' ') {
       ++ucp;
     }
 
     const PRUnichar* start = ucp;
     while (ucp < uend && *ucp == '\n') {
       ++ucp;
     }
--- a/content/base/test/test_fileapi.html
+++ b/content/base/test/test_fileapi.html
@@ -110,23 +110,16 @@ expectedTestCount++;
 
 r = new FileReader();
 r.readAsText(createFileWithData(convertToUTF16(testTextData)), "utf-16");
 r.onload = getLoadHandler(testTextData,
                           convertToUTF16(testTextData).length,
                           "utf16 reading");
 expectedTestCount++;
 
-r = new FileReader();
-r.onload = getLoadHandler(testTextData,
-                          convertToUTF32(testTextData).length,
-                          "utf32 reading");
-r.readAsText(createFileWithData(convertToUTF32(testTextData)), "UTF-32");
-expectedTestCount++;
-
 
 // Test loading an empty file works (and doesn't crash!)
 var emptyFile = createFileWithData("");
 dump("hello nurse");
 r = new FileReader();
 r.onload = getLoadHandler("", 0, "empty no encoding reading");
 r.readAsText(emptyFile, "");
 expectedTestCount++;
@@ -346,25 +339,16 @@ function convertToUTF16(s) {
   res = "";
   for (var i = 0; i < s.length; ++i) {
     c = s.charCodeAt(i);
     res += String.fromCharCode(c >>> 8, c & 255);
   }
   return res;
 }
 
-function convertToUTF32(s) {
-  res = "";
-  for (var i = 0; i < s.length; ++i) {
-    c = s.charCodeAt(i);
-    res += "\0\0" + String.fromCharCode(c >>> 8, c & 255);
-  }
-  return res;
-}
-
 function convertToUTF8(s) {
   return unescape(encodeURIComponent(s));
 }
 
 function convertToDataURL(s) {
   return "data:application/octet-stream;base64," + btoa(s);
 }
 
--- a/content/canvas/public/nsICanvasRenderingContextInternal.h
+++ b/content/canvas/public/nsICanvasRenderingContextInternal.h
@@ -38,25 +38,24 @@
 #ifndef nsICanvasRenderingContextInternal_h___
 #define nsICanvasRenderingContextInternal_h___
 
 #include "nsISupports.h"
 #include "nsIInputStream.h"
 #include "nsIDocShell.h"
 #include "gfxPattern.h"
 
+// {EC90F32E-7848-4819-A1E3-02E64C682A72}
 #define NS_ICANVASRENDERINGCONTEXTINTERNAL_IID \
-{ 0xffb42d3c, 0x8281, 0x44c8, \
-  { 0xac, 0xba, 0x73, 0x15, 0x31, 0xaa, 0xe5, 0x07 } }
+{ 0xec90f32e, 0x7848, 0x4819, { 0xa1, 0xe3, 0x2, 0xe6, 0x4c, 0x68, 0x2a, 0x72 } }
 
 class nsHTMLCanvasElement;
 class gfxContext;
 class gfxASurface;
 class nsIPropertyBag;
-class nsDisplayListBuilder;
 
 namespace mozilla {
 namespace layers {
 class CanvasLayer;
 class LayerManager;
 }
 namespace ipc {
 class Shmem;
@@ -104,18 +103,17 @@ public:
   NS_IMETHOD SetIsOpaque(PRBool isOpaque) = 0;
 
   // Invalidate this context and release any held resources, in preperation
   // for possibly reinitializing with SetDimensions/InitializeWithSurface.
   NS_IMETHOD Reset() = 0;
 
   // Return the CanvasLayer for this context, creating
   // one for the given layer manager if not available.
-  virtual already_AddRefed<CanvasLayer> GetCanvasLayer(nsDisplayListBuilder* aBuilder,
-                                                       CanvasLayer *aOldLayer,
+  virtual already_AddRefed<CanvasLayer> GetCanvasLayer(CanvasLayer *aOldLayer,
                                                        LayerManager *aManager) = 0;
 
   virtual void MarkContextClean() = 0;
 
   // Redraw the dirty rectangle of this canvas.
   NS_IMETHOD Redraw(const gfxRect &dirty) = 0;
 
   // Passes a generic nsIPropertyBag options argument, along with the
--- a/content/canvas/src/WebGLContext.cpp
+++ b/content/canvas/src/WebGLContext.cpp
@@ -53,17 +53,16 @@
 
 #include "imgIEncoder.h"
 
 #include "gfxContext.h"
 #include "gfxPattern.h"
 #include "gfxUtils.h"
 
 #include "CanvasUtils.h"
-#include "nsDisplayList.h"
 
 #include "GLContextProvider.h"
 
 #include "gfxCrashReporterUtils.h"
 
 #ifdef MOZ_SVG
 #include "nsSVGEffects.h"
 #endif
@@ -243,26 +242,26 @@ WebGLContext::DestroyResourcesAndContext
 #endif
 
     gl = nsnull;
 }
 
 void
 WebGLContext::Invalidate()
 {
-    if (mInvalidated)
-        return;
-
     if (!mCanvasElement)
         return;
 
 #ifdef MOZ_SVG
     nsSVGEffects::InvalidateDirectRenderingObservers(HTMLCanvasElement());
 #endif
 
+    if (mInvalidated)
+        return;
+
     mInvalidated = PR_TRUE;
     HTMLCanvasElement()->InvalidateFrame();
 }
 
 /* readonly attribute nsIDOMHTMLCanvasElement canvas; */
 NS_IMETHODIMP
 WebGLContext::GetCanvas(nsIDOMHTMLCanvasElement **canvas)
 {
@@ -615,64 +614,37 @@ WebGLContext::GetInputStream(const char*
 NS_IMETHODIMP
 WebGLContext::GetThebesSurface(gfxASurface **surface)
 {
     return NS_ERROR_NOT_AVAILABLE;
 }
 
 static PRUint8 gWebGLLayerUserData;
 
-class WebGLContextUserData : public LayerUserData {
-public:
-    WebGLContextUserData(nsHTMLCanvasElement *aContent)
-    : mContent(aContent) {}
-  static void DidTransactionCallback(void* aData)
-  {
-    static_cast<WebGLContextUserData*>(aData)->mContent->MarkContextClean();
-  }
-
-private:
-  nsRefPtr<nsHTMLCanvasElement> mContent;
-};
-
 already_AddRefed<layers::CanvasLayer>
-WebGLContext::GetCanvasLayer(nsDisplayListBuilder* aBuilder,
-                             CanvasLayer *aOldLayer,
+WebGLContext::GetCanvasLayer(CanvasLayer *aOldLayer,
                              LayerManager *aManager)
 {
     if (!mResetLayer && aOldLayer &&
         aOldLayer->HasUserData(&gWebGLLayerUserData)) {
         NS_ADDREF(aOldLayer);
+        if (mInvalidated) {
+            aOldLayer->Updated(nsIntRect(0, 0, mWidth, mHeight));
+            mInvalidated = PR_FALSE;
+            HTMLCanvasElement()->GetPrimaryCanvasFrame()->MarkLayersActive();
+        }
         return aOldLayer;
     }
 
     nsRefPtr<CanvasLayer> canvasLayer = aManager->CreateCanvasLayer();
     if (!canvasLayer) {
         NS_WARNING("CreateCanvasLayer returned null!");
         return nsnull;
     }
-    WebGLContextUserData *userData = nsnull;
-    if (aBuilder->IsPaintingToWindow()) {
-      // Make the layer tell us whenever a transaction finishes (including
-      // the current transaction), so we can clear our invalidation state and
-      // start invalidating again. We need to do this for the layer that is
-      // being painted to a window (there shouldn't be more than one at a time,
-      // and if there is, flushing the invalidation state more often than
-      // necessary is harmless).
-
-      // The layer will be destroyed when we tear down the presentation
-      // (at the latest), at which time this userData will be destroyed,
-      // releasing the reference to the element.
-      // The userData will receive DidTransactionCallbacks, which flush the
-      // the invalidation state to indicate that the canvas is up to date.
-      userData = new WebGLContextUserData(HTMLCanvasElement());
-      canvasLayer->SetDidTransactionCallback(
-              WebGLContextUserData::DidTransactionCallback, userData);
-    }
-    canvasLayer->SetUserData(&gWebGLLayerUserData, userData);
+    canvasLayer->SetUserData(&gWebGLLayerUserData, nsnull);
 
     CanvasLayer::Data data;
 
     // the gl context may either provide a native PBuffer, in which case we want to initialize
     // data with the gl context directly, or may provide a surface to which it renders (this is the case
     // of OSMesa contexts), in which case we want to initialize data with that surface.
 
     void* native_surface = gl->GetNativeData(gl::GLContext::NativeImageSurface);
@@ -684,18 +656,19 @@ WebGLContext::GetCanvasLayer(nsDisplayLi
     }
 
     data.mSize = nsIntSize(mWidth, mHeight);
     data.mGLBufferIsPremultiplied = mOptions.premultipliedAlpha ? PR_TRUE : PR_FALSE;
 
     canvasLayer->Initialize(data);
     PRUint32 flags = gl->CreationFormat().alpha == 0 ? Layer::CONTENT_OPAQUE : 0;
     canvasLayer->SetContentFlags(flags);
-    canvasLayer->Updated();
+    canvasLayer->Updated(nsIntRect(0, 0, mWidth, mHeight));
 
+    mInvalidated = PR_FALSE;
     mResetLayer = PR_FALSE;
 
     return canvasLayer.forget().get();
 }
 
 NS_IMETHODIMP
 WebGLContext::GetContextAttributes(jsval *aResult)
 {
--- a/content/canvas/src/WebGLContext.h
+++ b/content/canvas/src/WebGLContext.h
@@ -363,20 +363,19 @@ public:
     }
     nsresult ErrorOutOfMemory(const char *fmt = 0, ...);
 
     WebGLTexture *activeBoundTextureForTarget(WebGLenum target) {
         return target == LOCAL_GL_TEXTURE_2D ? mBound2DTextures[mActiveTexture]
                                              : mBoundCubeMapTextures[mActiveTexture];
     }
 
-    already_AddRefed<CanvasLayer> GetCanvasLayer(nsDisplayListBuilder* aBuilder,
-                                                 CanvasLayer *aOldLayer,
+    already_AddRefed<CanvasLayer> GetCanvasLayer(CanvasLayer *aOldLayer,
                                                  LayerManager *aManager);
-    void MarkContextClean() { mInvalidated = PR_FALSE; }
+    void MarkContextClean() { }
 
     // a number that increments every time we have an event that causes
     // all context resources to be lost.
     PRUint32 Generation() { return mGeneration.value(); }
 
 protected:
     void SetDontKnowIfNeedFakeBlack() {
         mFakeBlackStatus = DontKnowIfNeedFakeBlack;
--- a/content/canvas/src/nsCanvasRenderingContext2D.cpp
+++ b/content/canvas/src/nsCanvasRenderingContext2D.cpp
@@ -91,17 +91,16 @@
 #include "nsIDocShell.h"
 #include "nsIDOMWindow.h"
 #include "nsPIDOMWindow.h"
 #include "nsIDocShell.h"
 #include "nsIDocShellTreeItem.h"
 #include "nsIDocShellTreeNode.h"
 #include "nsIXPConnect.h"
 #include "jsapi.h"
-#include "nsDisplayList.h"
 
 #include "nsTArray.h"
 
 #include "imgIEncoder.h"
 
 #include "gfxContext.h"
 #include "gfxASurface.h"
 #include "gfxImageSurface.h"
@@ -403,18 +402,17 @@ public:
     NS_IMETHOD InitializeWithSurface(nsIDocShell *shell, gfxASurface *surface, PRInt32 width, PRInt32 height);
     NS_IMETHOD Render(gfxContext *ctx, gfxPattern::GraphicsFilter aFilter);
     NS_IMETHOD GetInputStream(const char* aMimeType,
                               const PRUnichar* aEncoderOptions,
                               nsIInputStream **aStream);
     NS_IMETHOD GetThebesSurface(gfxASurface **surface);
     NS_IMETHOD SetIsOpaque(PRBool isOpaque);
     NS_IMETHOD Reset();
-    already_AddRefed<CanvasLayer> GetCanvasLayer(nsDisplayListBuilder* aBuilder,
-                                                 CanvasLayer *aOldLayer,
+    already_AddRefed<CanvasLayer> GetCanvasLayer(CanvasLayer *aOldLayer,
                                                  LayerManager *aManager);
     void MarkContextClean();
     NS_IMETHOD SetIsIPC(PRBool isIPC);
     // this rect is in canvas device space
     NS_IMETHOD Redraw(const gfxRect &r);
     // this rect is in mThebes's current user space
     NS_IMETHOD RedrawUser(const gfxRect &r);
 
@@ -452,16 +450,17 @@ public:
         }
     private:
         gfxContext *mContext;
         nsRefPtr<gfxPath> mPath;
     };
     friend class PathAutoSaveRestore;
 
 protected:
+
     /**
      * The number of living nsCanvasRenderingContexts.  When this goes down to
      * 0, we free the premultiply and unpremultiply tables, if they exist.
      */
     static PRUint32 sNumLivingContexts;
 
     /**
      * Lookup table used to speed up GetImageData().
@@ -4118,81 +4117,59 @@ nsCanvasRenderingContext2D::SetMozImageS
         DirtyAllStyles();
     }
 
     return NS_OK;
 }
 
 static PRUint8 g2DContextLayerUserData;
 
-class CanvasRenderingContext2DUserData : public LayerUserData {
-public:
-  CanvasRenderingContext2DUserData(nsHTMLCanvasElement *aContent)
-    : mContent(aContent) {}
-  static void DidTransactionCallback(void* aData)
-  {
-    static_cast<CanvasRenderingContext2DUserData*>(aData)->mContent->MarkContextClean();
-  }
-
-private:
-  nsRefPtr<nsHTMLCanvasElement> mContent;
-};
-
 already_AddRefed<CanvasLayer>
-nsCanvasRenderingContext2D::GetCanvasLayer(nsDisplayListBuilder* aBuilder,
-                                           CanvasLayer *aOldLayer,
+nsCanvasRenderingContext2D::GetCanvasLayer(CanvasLayer *aOldLayer,
                                            LayerManager *aManager)
 {
     if (!mValid)
         return nsnull;
 
     if (!mResetLayer && aOldLayer &&
         aOldLayer->HasUserData(&g2DContextLayerUserData)) {
         NS_ADDREF(aOldLayer);
+        if (mIsEntireFrameInvalid || mInvalidateCount > 0) {
+            // XXX Need to just update the changed area here; we should keep track
+            // of the rectangle based on Redraw args.
+            aOldLayer->Updated(nsIntRect(0, 0, mWidth, mHeight));
+            MarkContextClean();
+            HTMLCanvasElement()->GetPrimaryCanvasFrame()->MarkLayersActive();
+        }
+
         return aOldLayer;
     }
 
     nsRefPtr<CanvasLayer> canvasLayer = aManager->CreateCanvasLayer();
     if (!canvasLayer) {
         NS_WARNING("CreateCanvasLayer returned null!");
         return nsnull;
     }
-    CanvasRenderingContext2DUserData *userData = nsnull;
-    if (aBuilder->IsPaintingToWindow()) {
-      // Make the layer tell us whenever a transaction finishes (including
-      // the current transaction), so we can clear our invalidation state and
-      // start invalidating again. We need to do this for the layer that is
-      // being painted to a window (there shouldn't be more than one at a time,
-      // and if there is, flushing the invalidation state more often than
-      // necessary is harmless).
-
-      // The layer will be destroyed when we tear down the presentation
-      // (at the latest), at which time this userData will be destroyed,
-      // releasing the reference to the element.
-      // The userData will receive DidTransactionCallbacks, which flush the
-      // the invalidation state to indicate that the canvas is up to date.
-      userData = new CanvasRenderingContext2DUserData(HTMLCanvasElement());
-      canvasLayer->SetDidTransactionCallback(
-              CanvasRenderingContext2DUserData::DidTransactionCallback, userData);
-    }
-    canvasLayer->SetUserData(&g2DContextLayerUserData, userData);
+    canvasLayer->SetUserData(&g2DContextLayerUserData, nsnull);
 
     CanvasLayer::Data data;
 
     data.mSurface = mSurface.get();
     data.mSize = nsIntSize(mWidth, mHeight);
 
     canvasLayer->Initialize(data);
     PRUint32 flags = mOpaque ? Layer::CONTENT_OPAQUE : 0;
     canvasLayer->SetContentFlags(flags);
-    canvasLayer->Updated();
+    canvasLayer->Updated(nsIntRect(0, 0, mWidth, mHeight));
 
     mResetLayer = PR_FALSE;
 
-    return canvasLayer.forget();
+    MarkContextClean();
+
+    return canvasLayer.forget().get();
 }
 
 void
 nsCanvasRenderingContext2D::MarkContextClean()
 {
     if (mInvalidateCount > 0) {
         mPredictManyRedrawCalls = mInvalidateCount > kCanvasMaxInvalidateCount;
     }
--- a/content/html/content/public/nsHTMLCanvasElement.h
+++ b/content/html/content/public/nsHTMLCanvasElement.h
@@ -152,22 +152,22 @@ public:
                            PRBool aNotify);
   virtual nsresult Clone(nsINodeInfo *aNodeInfo, nsINode **aResult) const;
   nsresult CopyInnerTo(nsGenericElement* aDest) const;
 
   /*
    * Helpers called by various users of Canvas
    */
 
-  already_AddRefed<CanvasLayer> GetCanvasLayer(nsDisplayListBuilder* aBuilder,
-                                               CanvasLayer *aOldLayer,
+  already_AddRefed<CanvasLayer> GetCanvasLayer(CanvasLayer *aOldLayer,
                                                LayerManager *aManager);
 
-  // Any invalidates requested by the context have been processed by updating
-  // the window. Future changes to the canvas need to trigger more invalidation.
+  // Tell the Context that all the current rendering that it's
+  // invalidated has been displayed to the screen, so that it should
+  // start requesting invalidates again as needed.
   void MarkContextClean();
 
   virtual nsXPCClassInfo* GetClassInfo();
 protected:
   nsIntSize GetWidthHeight();
 
   nsresult UpdateContext(nsIPropertyBag *aNewContextOptions = nsnull);
   nsresult ExtractData(const nsAString& aType,
--- a/content/html/content/src/nsFormSubmission.cpp
+++ b/content/html/content/src/nsFormSubmission.cpp
@@ -705,20 +705,19 @@ nsEncodingFormSubmission::nsEncodingForm
 {
   nsCAutoString charset(aCharset);
   // canonical name is passed so that we just have to check against
   // *our* canonical names listed in charsetaliases.properties
   if (charset.EqualsLiteral("ISO-8859-1")) {
     charset.AssignLiteral("windows-1252");
   }
 
-  // use UTF-8 for UTF-16* and UTF-32* (per WHATWG and existing practice of
+  // use UTF-8 for UTF-16* (per WHATWG and existing practice of
   // MS IE/Opera). 
-  if (StringBeginsWith(charset, NS_LITERAL_CSTRING("UTF-16")) || 
-      StringBeginsWith(charset, NS_LITERAL_CSTRING("UTF-32"))) {
+  if (StringBeginsWith(charset, NS_LITERAL_CSTRING("UTF-16"))) {
     charset.AssignLiteral("UTF-8");
   }
 
   mEncoder = do_CreateInstance(NS_SAVEASCHARSET_CONTRACTID);
   if (mEncoder) {
     nsresult rv =
       mEncoder->Init(charset.get(),
                      (nsISaveAsCharset::attr_EntityAfterCharsetConv + 
--- a/content/html/content/src/nsHTMLCanvasElement.cpp
+++ b/content/html/content/src/nsHTMLCanvasElement.cpp
@@ -645,42 +645,39 @@ void
 nsHTMLCanvasElement::InvalidateFrame(const gfxRect* damageRect)
 {
   // We don't need to flush anything here; if there's no frame or if
   // we plan to reframe we don't need to invalidate it anyway.
   nsIFrame *frame = GetPrimaryFrame();
   if (!frame)
     return;
 
-  frame->MarkLayersActive();
-
-  nsRect invalRect;
-  nsRect contentArea = frame->GetContentRect();
   if (damageRect) {
+    nsRect contentArea(frame->GetContentRect());
     nsIntSize size = GetWidthHeight();
 
     // damageRect and size are in CSS pixels; contentArea is in appunits
     // We want a rect in appunits; so avoid doing pixels-to-appunits and
     // vice versa conversion here.
     gfxRect realRect(*damageRect);
     realRect.Scale(contentArea.width / gfxFloat(size.width),
                    contentArea.height / gfxFloat(size.height));
     realRect.RoundOut();
 
     // then make it a nsRect
-    invalRect = nsRect(realRect.X(), realRect.Y(),
-                       realRect.Width(), realRect.Height());
+    nsRect invalRect(realRect.X(), realRect.Y(),
+                     realRect.Width(), realRect.Height());
+
+    // account for border/padding
+    invalRect.MoveBy(contentArea.TopLeft() - frame->GetPosition());
+
+    frame->InvalidateLayer(invalRect, nsDisplayItem::TYPE_CANVAS);
   } else {
-    invalRect = nsRect(nsPoint(0, 0), contentArea.Size());
-  }
-  invalRect.MoveBy(contentArea.TopLeft() - frame->GetPosition());
-
-  Layer* layer = frame->InvalidateLayer(invalRect, nsDisplayItem::TYPE_CANVAS);
-  if (layer) {
-    static_cast<CanvasLayer*>(layer)->Updated();
+    nsRect r(frame->GetContentRect() - frame->GetPosition());
+    frame->InvalidateLayer(r, nsDisplayItem::TYPE_CANVAS);
   }
 }
 
 PRInt32
 nsHTMLCanvasElement::CountContexts()
 {
   if (mCurrentContext)
     return 1;
@@ -699,24 +696,23 @@ nsHTMLCanvasElement::GetContextAtIndex (
 
 PRBool
 nsHTMLCanvasElement::GetIsOpaque()
 {
   return HasAttr(kNameSpaceID_None, nsGkAtoms::moz_opaque);
 }
 
 already_AddRefed<CanvasLayer>
-nsHTMLCanvasElement::GetCanvasLayer(nsDisplayListBuilder* aBuilder,
-                                    CanvasLayer *aOldLayer,
+nsHTMLCanvasElement::GetCanvasLayer(CanvasLayer *aOldLayer,
                                     LayerManager *aManager)
 {
   if (!mCurrentContext)
     return nsnull;
 
-  return mCurrentContext->GetCanvasLayer(aBuilder, aOldLayer, aManager);
+  return mCurrentContext->GetCanvasLayer(aOldLayer, aManager);
 }
 
 void
 nsHTMLCanvasElement::MarkContextClean()
 {
   if (!mCurrentContext)
     return;
 
--- a/content/media/nsBuiltinDecoder.h
+++ b/content/media/nsBuiltinDecoder.h
@@ -431,17 +431,20 @@ class nsBuiltinDecoder : public nsMediaD
   // state.
   Monitor& GetMonitor() { 
     return mMonitor; 
   }
 
   // Constructs the time ranges representing what segments of the media
   // are buffered and playable.
   virtual nsresult GetBuffered(nsTimeRanges* aBuffered) {
-    return mDecoderStateMachine->GetBuffered(aBuffered);
+    if (mDecoderStateMachine) {
+      return mDecoderStateMachine->GetBuffered(aBuffered);
+    }
+    return NS_ERROR_FAILURE;
   }
 
   virtual void NotifyDataArrived(const char* aBuffer, PRUint32 aLength, PRUint32 aOffset) {
     return mDecoderStateMachine->NotifyDataArrived(aBuffer, aLength, aOffset);
   }
 
  public:
   // Return the current state. Can be called on any thread. If called from
--- a/content/media/nsBuiltinDecoderReader.h
+++ b/content/media/nsBuiltinDecoderReader.h
@@ -114,25 +114,27 @@ typedef short SoundDataValue;
 
 #define MOZ_SOUND_DATA_FORMAT (nsAudioStream::FORMAT_S16_LE)
 #define MOZ_CLIP_TO_15(x) ((x)<-32768?-32768:(x)<=32767?(x):32767)
 // Convert the output of vorbis_synthesis_pcmout to a SoundDataValue
 #define MOZ_CONVERT_VORBIS_SAMPLE(x) \
  (static_cast<SoundDataValue>(MOZ_CLIP_TO_15((x)>>9)))
 // Convert a SoundDataValue to a float for the Audio API
 #define MOZ_CONVERT_SOUND_SAMPLE(x) ((x)*(1.F/32768))
+#define MOZ_SAMPLE_TYPE_S16LE 1
 
 #else /*MOZ_VORBIS*/
 
 typedef float VorbisPCMValue;
 typedef float SoundDataValue;
 
 #define MOZ_SOUND_DATA_FORMAT (nsAudioStream::FORMAT_FLOAT32)
 #define MOZ_CONVERT_VORBIS_SAMPLE(x) (x)
 #define MOZ_CONVERT_SOUND_SAMPLE(x) (x)
+#define MOZ_SAMPLE_TYPE_FLOAT32 1
 
 #endif
 
 // Holds chunk a decoded sound samples.
 class SoundData {
 public:
   SoundData(PRInt64 aOffset,
             PRInt64 aTime,
--- a/content/media/nsBuiltinDecoderStateMachine.cpp
+++ b/content/media/nsBuiltinDecoderStateMachine.cpp
@@ -592,17 +592,17 @@ void nsBuiltinDecoderStateMachine::Audio
 }
 
 PRUint32 nsBuiltinDecoderStateMachine::PlaySilence(PRUint32 aSamples,
                                                    PRUint32 aChannels,
                                                    PRUint64 aSampleOffset)
 
 {
   MonitorAutoEnter audioMon(mAudioMonitor);
-  if (mAudioStream->IsPaused()) {
+  if (!mAudioStream || mAudioStream->IsPaused()) {
     // The state machine has paused since we've released the decoder
     // monitor and acquired the audio monitor. Don't write any audio.
     return 0;
   }
   PRUint32 maxSamples = SILENCE_BYTES_CHUNK / aChannels;
   PRUint32 samples = NS_MIN(aSamples, maxSamples);
   PRUint32 numValues = samples * aChannels;
   nsAutoArrayPtr<SoundDataValue> buf(new SoundDataValue[numValues]);
--- a/content/media/nsMediaDecoder.cpp
+++ b/content/media/nsMediaDecoder.cpp
@@ -84,26 +84,26 @@ nsMediaDecoder::nsMediaDecoder() :
   mShuttingDown(PR_FALSE)
 {
   MOZ_COUNT_CTOR(nsMediaDecoder);
 }
 
 nsMediaDecoder::~nsMediaDecoder()
 {
   if (mVideoUpdateLock) {
-    PR_DestroyLock(mVideoUpdateLock);
+    nsAutoLock::DestroyLock(mVideoUpdateLock);
     mVideoUpdateLock = nsnull;
   }
   MOZ_COUNT_DTOR(nsMediaDecoder);
 }
 
 PRBool nsMediaDecoder::Init(nsHTMLMediaElement* aElement)
 {
   mElement = aElement;
-  mVideoUpdateLock = PR_NewLock();
+  mVideoUpdateLock = nsAutoLock::NewLock("nsMediaDecoder::mVideoUpdateLock");
 
   return mVideoUpdateLock != nsnull;
 }
 
 void nsMediaDecoder::Shutdown()
 {
   StopProgress();
   mElement = nsnull;
--- a/content/media/test/Makefile.in
+++ b/content/media/test/Makefile.in
@@ -224,16 +224,18 @@ endif
 		big.wav \
 		bogus.wav \
 		r11025_msadpcm_c1.wav \
 		r11025_s16_c1.wav \
 		r11025_s16_c1_trailing.wav \
 		r11025_u8_c1.wav \
 		r11025_u8_c1_trunc.wav \
 		r16000_u8_c1_list.wav \
+		wavedata_u8.wav \
+		wavedata_s16.wav \
 		$(NULL)
 
 # Other files
 _TEST_FILES += \
 		bogus.duh \
 		$(NULL)
 
 # These tests contain backend-specific tests. Try to write backend
@@ -275,16 +277,18 @@ else
 _TEST_FILES += \
 		test_can_play_type_no_webm.html \
 		$(NULL)
 endif
 
 ifdef MOZ_WAVE
 _TEST_FILES += \
 		test_can_play_type_wave.html \
+		test_wave_data_u8.html \
+		test_wave_data_s16.html \
 		$(NULL)
 else
 _TEST_FILES += \
 		test_can_play_type_no_wave.html \
 		$(NULL)
 endif
 
 libs:: $(_TEST_FILES)
--- a/content/media/test/test_autoplay_contentEditable.html
+++ b/content/media/test/test_autoplay_contentEditable.html
@@ -11,17 +11,17 @@
 <pre id="test">
 
 <script>
 
 var manager = new MediaTestManager;
 
 var tokens = {
   0:                ["canplay"],
-  "canplay":        ["canplaythrough"],
+  "canplay":        ["canplay", "canplaythrough"],
   "canplaythrough": ["canplay", "canplaythrough"]
 };
 
 function gotPlayEvent(event) {
   var v = event.target;
   ok(tokens[v._state].indexOf(event.type) >= 0,
      "Check expected event got " + event.type + " at " + v._state + " for " + v.src +
      " uneval(event.type)=" + uneval(event.type) + " typeof(event.type)=" + typeof(event.type) +
new file mode 100644
--- /dev/null
+++ b/content/media/test/test_wave_data_s16.html
@@ -0,0 +1,53 @@
+<!DOCTYPE HTML>
+<html>
+<head>
+  <title>Wave Media test: ended</title>
+  <script type="text/javascript" src="/MochiKit/packed.js"></script>
+  <script type="text/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+// Test if the ended event works correctly.
+var endPassed = false;
+var completed = false;
+
+function audioavailable(e) {
+  if (completed)
+    return false;
+
+  completed = true;
+  var samples = e.frameBuffer;
+  var time = e.time;
+
+  ok(samples.length >= 3, "Must be 3 or more samples. There were " + samples.length);  
+  if (samples.length >= 3) {
+    ok(samples[0] > 0.99 && samples[0] < 1.01, "First sound sample should be close to 1.0. It was " + samples[0]);
+    ok(samples[1] > -1.01 && samples [1] < 0.01, "Second sound sample should be close to -1.0. It was " + samples[1]);
+    ok(samples[2] > -0.01 && samples[2] < 0.01, "Third sound sample should be close to 0. It was " + samples[2]);
+  }
+
+  // Only care about the first few samples
+  SimpleTest.finish();
+  return false;
+}
+
+function startTest() {
+  if (completed)
+    return false;
+  var v = document.getElementById('v');
+  v.addEventListener('MozAudioAvailable', audioavailable, false);
+  v.play();
+  return false;
+}
+
+SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<audio id='v'
+       onloadedmetadata='return startTest();'>
+  <source type='audio/x-wav' src='wavedata_s16.wav'>
+</audio>
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/content/media/test/test_wave_data_u8.html
@@ -0,0 +1,53 @@
+<!DOCTYPE HTML>
+<html>
+<head>
+  <title>Wave Media test: ended</title>
+  <script type="text/javascript" src="/MochiKit/packed.js"></script>
+  <script type="text/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+// Test if the ended event works correctly.
+var endPassed = false;
+var completed = false;
+
+function audioavailable(e) {
+  if (completed)
+    return false;
+
+  completed = true;
+  var samples = e.frameBuffer;
+  var time = e.time;
+
+  ok(samples.length >= 3, "Must be 3 or more samples. There were " + samples.length);  
+  if (samples.length >= 3) {
+    ok(samples[0] > 0.99 && samples[0] < 1.01, "First sound sample should be close to 1.0. It was " + samples[0]);
+    ok(samples[1] > -1.01 && samples [1] < 0.01, "Second sound sample should be close to -1.0. It was " + samples[1]);
+    ok(samples[2] > -0.01 && samples[2] < 0.01, "Third sound sample should be close to 0. It was " + samples[2]);
+  }
+
+  // Only care about the first few samples
+  SimpleTest.finish();
+  return false;
+}
+
+function startTest() {
+  if (completed)
+    return false;
+  var v = document.getElementById('v');
+  v.addEventListener('MozAudioAvailable', audioavailable, false);
+  v.play();
+  return false;
+}
+
+SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<audio id='v'
+       onloadedmetadata='return startTest();'>
+  <source type='audio/x-wav' src='wavedata_u8.wav'>
+</audio>
+</body>
+</html>
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6a69cd78f6e29f9851f231d67837654a55d87a82
GIT binary patch
literal 22062
zc%1FYAr6B;07TI#C!jYVSWZ#FCbbCABm`H7hnu(CJ0M@4`I(vCrIa~YUh}wrj;>Ae
ssGdX8*S$%bZ9Ue1f6rX?z5oCK00000000000000000000006k#52p}`fdBvi
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1d895c2ce0867c03ba384b84b81918fa51bbb12a
GIT binary patch
literal 11037
zc%1FXs||zz07SvXA)$lw2owb%Ai<shO>#MhqX0VR(*<JYWzBx4aqOC8>F43U?Vl=1
faaZ5pvm#}F>%P`)a^?sC00000000000KC=(p0)@M
--- a/content/media/wave/Makefile.in
+++ b/content/media/wave/Makefile.in
@@ -47,16 +47,17 @@ LIBXUL_LIBRARY 	= 1
 
 
 EXPORTS		+= \
 		nsWaveDecoder.h \
 		$(NULL)
 
 CPPSRCS		= \
 		nsWaveDecoder.cpp \
+		nsWaveReader.cpp \
 		$(NULL)
 
 FORCE_STATIC_LIB = 1
 
 include $(topsrcdir)/config/rules.mk
 
 INCLUDES	+= \
 		-I$(srcdir)/../../base/src \
--- a/content/media/wave/nsWaveDecoder.cpp
+++ b/content/media/wave/nsWaveDecoder.cpp
@@ -10,18 +10,18 @@
  *
  * Software distributed under the License is distributed on an "AS IS" basis,
  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  * for the specific language governing rights and limitations under the
  * License.
  *
  * The Original Code is Mozilla code.
  *
- * The Initial Developer of the Original Code is the Mozilla Corporation.
- * Portions created by the Initial Developer are Copyright (C) 2008
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
  *  Matthew Gregan <kinetik@flim.org>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -30,1731 +30,16 @@
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
-#include "limits"
-#include "prlog.h"
-#include "prmem.h"
-#include "nsIDOMHTMLMediaElement.h"
-#include "nsIDocument.h"
-#include "nsIFrame.h"
-#include "nsIObserver.h"
-#include "nsISeekableStream.h"
-#include "nsAudioStream.h"
-#include "nsAutoLock.h"
-#include "nsHTMLMediaElement.h"
-#include "nsNetUtil.h"
-#include "nsThreadUtils.h"
+#include "nsBuiltinDecoderStateMachine.h"
+#include "nsWaveReader.h"
 #include "nsWaveDecoder.h"
-#include "nsTimeRanges.h"
-
-using mozilla::TimeDuration;
-using mozilla::TimeStamp;
-
-#ifdef PR_LOGGING
-static PRLogModuleInfo* gWaveDecoderLog;
-#define LOG(type, msg) PR_LOG(gWaveDecoderLog, type, msg)
-#else
-#define LOG(type, msg)
-#endif
-
-// Maximum number of seconds to wait when buffering.
-#define BUFFERING_TIMEOUT 3
-
-// Duration the playback loop will sleep after refilling the backend's audio
-// buffers.  The loop's goal is to keep AUDIO_BUFFER_LENGTH milliseconds of
-// audio buffered to allow time to refill before the backend underruns.
-// Should be a multiple of 10 to deal with poor timer granularity on some
-// platforms.
-#define AUDIO_BUFFER_WAKEUP 100
-#define AUDIO_BUFFER_LENGTH (2 * AUDIO_BUFFER_WAKEUP)
-
-// Magic values that identify RIFF chunks we're interested in.
-#define RIFF_CHUNK_MAGIC 0x52494646
-#define WAVE_CHUNK_MAGIC 0x57415645
-#define FRMT_CHUNK_MAGIC 0x666d7420
-#define DATA_CHUNK_MAGIC 0x64617461
-
-// Size of RIFF chunk header.  4 byte chunk header type and 4 byte size field.
-#define RIFF_CHUNK_HEADER_SIZE 8
-
-// Size of RIFF header.  RIFF chunk and 4 byte RIFF type.
-#define RIFF_INITIAL_SIZE (RIFF_CHUNK_HEADER_SIZE + 4)
-
-// Size of required part of format chunk.  Actual format chunks may be
-// extended (for non-PCM encodings), but we skip any extended data.
-#define WAVE_FORMAT_CHUNK_SIZE 16
-
-// PCM encoding type from format chunk.  Linear PCM is the only encoding
-// supported by nsAudioStream.
-#define WAVE_FORMAT_ENCODING_PCM 1
-
-enum State {
-  STATE_LOADING_METADATA,
-  STATE_BUFFERING,
-  STATE_PLAYING,
-  STATE_SEEKING,
-  STATE_PAUSED,
-  STATE_ENDED,
-  STATE_ERROR,
-  STATE_SHUTDOWN
-};
-
-/*
-  A single nsWaveStateMachine instance is owned by the decoder, created
-   on-demand at load time.  Upon creation, the decoder immediately
-   dispatches the state machine event to the decode thread to begin
-   execution.  Once running, metadata loading begins immediately.  If this
-   completes successfully, the state machine will move into a paused state
-   awaiting further commands.  The state machine provides a small set of
-   threadsafe methods enabling the main thread to play, pause, seek, and
-   query parameters.
-
-   An weak (raw) pointer to the decoder's nsMediaStream is used by the state
-   machine to read data, seek, and query stream information.  The decoder is
-   responsible for creating and opening the stream, and may also cancel it.
-   Every other stream operation is performed on the playback thread by the
-   state machine.  A cancel from the main thread will force any in-flight
-   stream operations to abort.
- */
-class nsWaveStateMachine : public nsRunnable
-{
-public:
-  nsWaveStateMachine(nsWaveDecoder* aDecoder,
-                     TimeDuration aBufferWaitTime, double aInitialVolume);
-  ~nsWaveStateMachine();
-
-  void SetStream(nsMediaStream* aStream) { mStream = aStream; }
-
-  // Set specified volume.  aVolume must be in range [0.0, 1.0].
-  // Threadsafe.
-  void SetVolume(double aVolume);
-
-  /*
-    The following four member functions initiate the appropriate state
-    transition suggested by the function name.  Threadsafe.
-   */
-  void Play();
-  void Pause();
-  void Seek(double aTime);
-  void Shutdown();
-
-  // Returns the playback length of the audio data in seconds, calculated
-  // from the length extracted from the metadata.  Returns NaN if called
-  // before metadata validation has completed.  Threadsafe.
-  double GetDuration();
-
-  // Returns the number of channels extracted from the metadata.  Returns 0
-  // if called before metadata validation has completed.  Threadsafe.
-  PRUint32 GetChannels();
-
-  // Returns the audio sample rate (number of samples per second) extracted
-  // from the metadata.  Returns 0 if called before metadata validation has
-  // completed.  Threadsafe.
-  PRUint32 GetSampleRate();
-
-  // Returns true if the state machine is seeking.  Threadsafe.
-  PRBool IsSeeking();
-
-  // Returns true if the state machine has reached the end of playback.  Threadsafe.
-  PRBool IsEnded();
-
-  // Main state machine loop. Runs forever, until shutdown state is reached.
-  NS_IMETHOD Run();
-
-  // Called by the decoder, on the main thread.
-  nsMediaDecoder::Statistics GetStatistics();
-
-  // Called on the decoder thread
-  void NotifyBytesConsumed(PRInt64 aBytes);
-
-  // Called by decoder and main thread.
-  nsHTMLMediaElement::NextFrameStatus GetNextFrameStatus();
-
-  // Clear the flag indicating that a playback position change event is
-  // currently queued and return the current time. This is called from the
-  // main thread.
-  double GetTimeForPositionChange();
-
-  nsresult GetBuffered(nsTimeRanges* aBuffered);
-
-private:
-  // Returns PR_TRUE if we're in shutdown state. Threadsafe.
-  PRBool IsShutdown();
-
-  // Reads from the media stream. Returns PR_FALSE on failure or EOF.  If
-  // aBytesRead is non-null, the number of bytes read will be returned via
-  // this.
-  PRBool ReadAll(char* aBuf, PRInt64 aSize, PRInt64* aBytesRead);
-
-  void UpdateReadyState() {
-    PR_ASSERT_CURRENT_THREAD_IN_MONITOR(mMonitor);
-
-    nsCOMPtr<nsIRunnable> event;
-    switch (GetNextFrameStatus()) {
-      case nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE_BUFFERING:
-        event = NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::NextFrameUnavailableBuffering);
-        break;
-      case nsHTMLMediaElement::NEXT_FRAME_AVAILABLE:
-        event = NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::NextFrameAvailable);
-        break;
-      case nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE:
-        event = NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::NextFrameUnavailable);
-        break;
-      default:
-        PR_NOT_REACHED("unhandled frame state");
-    }
-
-    NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
-  }
-
-  // Change the current state and wake the playback thread if it is waiting
-  // on mMonitor.  Used by public member functions called from both threads,
-  // so must hold mMonitor.  Threadsafe.
-  void ChangeState(State aState);
-
-  // Create and initialize audio stream using current audio parameters.
-  void OpenAudioStream(nsAutoMonitor& aMonitor);
-
-  // Shut down and dispose audio stream.
-  void CloseAudioStream();
-
-  // Read RIFF_INITIAL_SIZE from the beginning of the stream and verify that
-  // the stream data is a RIFF bitstream containing WAVE data.
-  PRBool LoadRIFFChunk();
-
-  // Read forward in the stream until aWantedChunk is found.  Return chunk
-  // size in aChunkSize.  aChunkSize will not be rounded up if the chunk
-  // size is odd.
-  PRBool ScanForwardUntil(PRUint32 aWantedChunk, PRUint32* aChunkSize);
-
-  // Scan forward in the stream looking for the WAVE format chunk.  If
-  // found, parse and validate required metadata, then use it to set
-  // mSampleRate, mChannels, mSampleSize, and mSampleFormat.
-  PRBool LoadFormatChunk();
-
-  // Scan forward in the stream looking for the start of the PCM data.  If
-  // found, record the data length and offset in mWaveLength and
-  // mWavePCMOffset.
-  PRBool FindDataOffset();
-
-  // Return the length of the PCM data.
-  PRInt64 GetDataLength();
-
-  // Fire a PlaybackPositionChanged event.  If aCoalesce is true and a
-  // PlaybackPositionChanged event is already pending, an event is not
-  // fired.
-  void FirePositionChanged(PRBool aCoalesce);
-
-  // Returns the number of seconds that aBytes represents based on the
-  // current audio parameters.  e.g.  176400 bytes is 1 second at 16-bit
-  // stereo 44.1kHz.
-  double BytesToTime(PRInt64 aBytes) const
-  {
-    NS_ABORT_IF_FALSE(mMetadataValid, "Requires valid metadata");
-    NS_ABORT_IF_FALSE(aBytes >= 0, "Must be >= 0");
-    return double(aBytes) / mSampleRate / mSampleSize;
-  }
-
-  // Returns the number of bytes that aTime represents based on the current
-  // audio parameters.  e.g.  1 second is 176400 bytes at 16-bit stereo
-  // 44.1kHz.
-  PRInt64 TimeToBytes(double aTime) const
-  {
-    NS_ABORT_IF_FALSE(mMetadataValid, "Requires valid metadata");
-    NS_ABORT_IF_FALSE(aTime >= 0.0f, "Must be >= 0");
-    return RoundDownToSample(PRInt64(aTime * mSampleRate * mSampleSize));
-  }
-
-  // Rounds aBytes down to the nearest complete sample.  Assumes beginning
-  // of byte range is already sample aligned by caller.
-  PRInt64 RoundDownToSample(PRInt64 aBytes) const
-  {
-    NS_ABORT_IF_FALSE(mMetadataValid, "Requires valid metadata");
-    NS_ABORT_IF_FALSE(aBytes >= 0, "Must be >= 0");
-    return aBytes - (aBytes % mSampleSize);
-  }
-
-  // Weak (raw) pointer to our decoder instance.  The decoder manages the
-  // lifetime of the state machine object, so it is guaranteed that the
-  // state machine will not outlive the decoder.  The decoder is not
-  // threadsafe, so this pointer must only be used to create runnable events
-  // targeted at the main thread.
-  nsWaveDecoder* mDecoder;
-
-  // Weak (raw) pointer to a media stream.  The decoder manages the lifetime
-  // of the stream, so it is guaranteed that the stream will live as long as
-  // the state machine.  The stream is threadsafe, but is only used on the
-  // playback thread except for create, open, and cancel, which are called
-  // from the main thread.
-  nsMediaStream* mStream;
-
-  // Our audio stream.  Created on demand when entering playback state.  It
-  // is destroyed when seeking begins and will not be reinitialized until
-  // playback resumes, so it is possible for this to be null.
-  nsRefPtr<nsAudioStream> mAudioStream;
-
-  // Maximum time to spend waiting for data during buffering.
-  TimeDuration mBufferingWait;
-
-  // Machine time that buffering began, used with mBufferingWait to time out
-  // buffering.
-  TimeStamp mBufferingStart;
-
-  // Download position where we should stop buffering.  Only accessed
-  // in the decoder thread.
-  PRInt64 mBufferingEndOffset;
-
-  /*
-    Metadata extracted from the WAVE header.  Used to initialize the audio
-    stream, and for byte<->time domain conversions.
-  */
-
-  // Number of samples per second.  Limited to range [100, 96000] in LoadFormatChunk.
-  PRUint32 mSampleRate;
-
-  // Number of channels.  Limited to range [1, 2] in LoadFormatChunk.
-  PRUint32 mChannels;
-
-  // Size of a single sample segment, which includes a sample for each
-  // channel (interleaved).
-  PRUint32 mSampleSize;
-
-  // The sample format of the PCM data.
-  nsAudioStream::SampleFormat mSampleFormat;
-
-  // Size of PCM data stored in the WAVE as reported by the data chunk in
-  // the media.
-  PRInt64 mWaveLength;
-
-  // Start offset of the PCM data in the media stream.  Extends mWaveLength
-  // bytes.
-  PRInt64 mWavePCMOffset;
-
-  /*
-    All member variables following this comment are accessed by both
-    threads and must be synchronized via mMonitor.
-  */
-  PRMonitor* mMonitor;
-
-  // The state to enter when the state machine loop iterates next.
-  State mState;
-
-  // A queued state transition.  This is used to record the next state
-  // transition when play or pause is requested during seeking or metadata
-  // loading to ensure a completed metadata load or seek returns to the most
-  // recently requested state on completion.
-  State mNextState;
-
-  // Current playback position in the stream.
-  PRInt64 mPlaybackPosition;
-
-  // Volume that the audio backend will be initialized with.
-  double mInitialVolume;
-
-  // Time position (in seconds) to seek to.  Set by Seek(double).
-  double mSeekTime;
-
-  // True once metadata has been parsed and validated. Users of mSampleRate,
-  // mChannels, mSampleSize, mSampleFormat, mWaveLength, mWavePCMOffset must
-  // check this flag before assuming the values are valid.
-  PRPackedBool mMetadataValid;
-
-  // True if an event to notify about a change in the playback position has
-  // been queued, but not yet run.  It is set to false when the event is
-  // run.  This allows coalescing of these events as they can be produced
-  // many times per second.
-  PRPackedBool mPositionChangeQueued;
-
-  // True if paused.  Tracks only the play/paused state.
-  PRPackedBool mPaused;
-
-  // True if playback of the audio stream has finished, and the audio stream
-  // has been drained. This means playback of the file has ended.
-  PRPackedBool mPlaybackEnded;
-};
-
-nsWaveStateMachine::nsWaveStateMachine(nsWaveDecoder* aDecoder,
-                                       TimeDuration aBufferWaitTime,
-                                       double aInitialVolume)
-  : mDecoder(aDecoder),
-    mStream(nsnull),
-    mBufferingWait(aBufferWaitTime),
-    mBufferingStart(),
-    mBufferingEndOffset(0),
-    mSampleRate(0),
-    mChannels(0),
-    mSampleSize(0),
-    mSampleFormat(nsAudioStream::FORMAT_S16_LE),
-    mWaveLength(0),
-    mWavePCMOffset(0),
-    mMonitor(nsnull),
-    mState(STATE_LOADING_METADATA),
-    mNextState(STATE_PAUSED),
-    mPlaybackPosition(0),
-    mInitialVolume(aInitialVolume),
-    mSeekTime(0.0f),
-    mMetadataValid(PR_FALSE),
-    mPositionChangeQueued(PR_FALSE),
-    mPaused(mNextState == STATE_PAUSED),
-    mPlaybackEnded(PR_FALSE)
-{
-  mMonitor = nsAutoMonitor::NewMonitor("nsWaveStateMachine");
-}
-
-nsWaveStateMachine::~nsWaveStateMachine()
-{
-  nsAutoMonitor::DestroyMonitor(mMonitor);
-}
-
-void
-nsWaveStateMachine::Shutdown()
-{
-  ChangeState(STATE_SHUTDOWN);
-}
-
-void
-nsWaveStateMachine::Play()
-{
-  nsAutoMonitor monitor(mMonitor);
-  mPaused = PR_FALSE;
-  mPlaybackEnded = PR_FALSE;
-  if (mState == STATE_ENDED) {
-    Seek(0);
-    return;
-  }
-  if (mState == STATE_LOADING_METADATA || mState == STATE_SEEKING) {
-    mNextState = STATE_PLAYING;
-  } else {
-    ChangeState(STATE_PLAYING);
-  }
-}
-
-void
-nsWaveStateMachine::SetVolume(double aVolume)
-{
-  nsAutoMonitor monitor(mMonitor);
-  mInitialVolume = aVolume;
-  if (mAudioStream) {
-    mAudioStream->SetVolume(aVolume);
-  }
-}
-
-void
-nsWaveStateMachine::Pause()
-{
-  nsAutoMonitor monitor(mMonitor);
-  mPaused = PR_TRUE;
-  if (mState == STATE_LOADING_METADATA || mState == STATE_SEEKING ||
-      mState == STATE_BUFFERING || mState == STATE_ENDED) {
-    mNextState = STATE_PAUSED;
-  } else if (mState == STATE_PLAYING) {
-    ChangeState(STATE_PAUSED);
-  }
-}
 
-void
-nsWaveStateMachine::Seek(double aTime)
-{
-  nsAutoMonitor monitor(mMonitor);
-  mPlaybackEnded = PR_FALSE;
-  mSeekTime = aTime;
-  if (mSeekTime < 0.0f) {
-    mSeekTime = 0.0f;
-  }
-  if (mState == STATE_LOADING_METADATA) {
-    mNextState = STATE_SEEKING;
-  } else if (mState != STATE_SEEKING) {
-    if (mState == STATE_ENDED) {
-      mNextState = mPaused ? STATE_PAUSED : STATE_PLAYING;
-    } else if (mState != STATE_BUFFERING) {
-      mNextState = mState;
-    }
-    ChangeState(STATE_SEEKING);
-  }
-  NS_ASSERTION(IsSeeking(), "IsSeeking() must return true when seeking");
-}
-
-double
-nsWaveStateMachine::GetDuration()
-{
-  nsAutoMonitor monitor(mMonitor);
-  if (mMetadataValid) {
-    return BytesToTime(GetDataLength());
-  }
-  return std::numeric_limits<double>::quiet_NaN();
-}
-
-PRUint32
-nsWaveStateMachine::GetChannels()
-{
-  nsAutoMonitor monitor(mMonitor);
-  if (mMetadataValid) {
-    return mChannels;
-  }
-  return 0;
-}
-
-PRUint32
-nsWaveStateMachine::GetSampleRate()
-{
-  nsAutoMonitor monitor(mMonitor);
-  if (mMetadataValid) {
-    return mSampleRate;
-  }
-  return 0;
-}
-
-PRBool
-nsWaveStateMachine::IsSeeking()
-{
-  nsAutoMonitor monitor(mMonitor);
-  return mState == STATE_SEEKING || mNextState == STATE_SEEKING;
-}
-
-PRBool
-nsWaveStateMachine::IsEnded()
-{
-  nsAutoMonitor monitor(mMonitor);
-  return mPlaybackEnded;
-}
-
-nsHTMLMediaElement::NextFrameStatus
-nsWaveStateMachine::GetNextFrameStatus()
-{
-  nsAutoMonitor monitor(mMonitor);
-  if (mState == STATE_BUFFERING)
-    return nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE_BUFFERING;
-  // If mMetadataValid is false then we can't call GetDataLength because
-  // we haven't got the length from the Wave header yet. But we know that
-  // if we haven't read the metadata then we don't have playable data.
-  if (mMetadataValid &&
-      mPlaybackPosition < mStream->GetCachedDataEnd(mPlaybackPosition) &&
-      mPlaybackPosition < mWavePCMOffset + GetDataLength())
-    return nsHTMLMediaElement::NEXT_FRAME_AVAILABLE;
-  return nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE;
-}
-
-double
-nsWaveStateMachine::GetTimeForPositionChange()
-{
-  nsAutoMonitor monitor(mMonitor);
-  mPositionChangeQueued = PR_FALSE;
-  return BytesToTime(mPlaybackPosition - mWavePCMOffset);
-}
-
-NS_IMETHODIMP
-nsWaveStateMachine::Run()
-{
-  // Monitor is held by this thread almost permanently, but must be manually
-  // dropped during long operations to prevent the main thread from blocking
-  // when calling methods on the state machine object.
-  nsAutoMonitor monitor(mMonitor);
-
-  for (;;) {
-    switch (mState) {
-    case STATE_LOADING_METADATA:
-      {
-        monitor.Exit();
-        PRBool loaded = LoadRIFFChunk() && LoadFormatChunk() && FindDataOffset();
-        monitor.Enter();
-
-        if (!loaded) {
-          ChangeState(STATE_ERROR);
-        }
-
-        if (mState == STATE_LOADING_METADATA) {
-          mMetadataValid = PR_TRUE;
-          if (mNextState != STATE_SEEKING) {
-            nsCOMPtr<nsIRunnable> event = NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::MetadataLoaded);
-            NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
-          }
-          ChangeState(mNextState);
-        }
-      }
-      break;
-
-    case STATE_BUFFERING: {
-      TimeStamp now = TimeStamp::Now();
-      if (now - mBufferingStart < mBufferingWait &&
-          mStream->GetCachedDataEnd(mPlaybackPosition) < mBufferingEndOffset &&
-          !mStream->IsDataCachedToEndOfStream(mPlaybackPosition) &&
-          !mStream->IsSuspendedByCache()) {
-        LOG(PR_LOG_DEBUG,
-            ("In buffering: buffering data until %d bytes available or %f seconds\n",
-             PRUint32(mBufferingEndOffset - mStream->GetCachedDataEnd(mPlaybackPosition)),
-             (mBufferingWait - (now - mBufferingStart)).ToSeconds()));
-        monitor.Wait(PR_MillisecondsToInterval(1000));
-      } else {
-        ChangeState(mNextState);
-        UpdateReadyState();
-      }
-
-      break;
-    }
-
-    case STATE_PLAYING: {
-      if (!mAudioStream) {
-        OpenAudioStream(monitor);
-        if (!mAudioStream) {
-          ChangeState(STATE_ERROR);
-          break;
-        }
-      }
-
-      TimeStamp now = TimeStamp::Now();
-      TimeStamp lastWakeup = now -
-        TimeDuration::FromMilliseconds(AUDIO_BUFFER_LENGTH);
-
-      do {
-        TimeDuration sleepTime = now - lastWakeup;
-        lastWakeup = now;
-
-        // We aim to have AUDIO_BUFFER_LENGTH milliseconds of audio
-        // buffered, but only sleep for AUDIO_BUFFER_WAKEUP milliseconds
-        // (waking early to refill before the backend underruns).  Since we
-        // wake early, we only buffer sleepTime milliseconds of audio since
-        // there is still AUDIO_BUFFER_LENGTH - sleepTime milliseconds of
-        // audio buffered.
-        TimeDuration targetTime =
-          TimeDuration::FromMilliseconds(AUDIO_BUFFER_LENGTH);
-        if (sleepTime < targetTime) {
-          targetTime = sleepTime;
-        }
-
-        PRInt64 len = TimeToBytes(double(targetTime.ToSeconds()));
-
-        PRInt64 leftToPlay =
-          GetDataLength() - (mPlaybackPosition - mWavePCMOffset);
-        if (leftToPlay <= len) {
-          len = leftToPlay;
-          ChangeState(STATE_ENDED);
-        }
-
-        PRInt64 availableOffset = mStream->GetCachedDataEnd(mPlaybackPosition);
-
-        // Don't buffer if we're at the end of the stream, or if the
-        // load has been suspended by the cache (in the latter case
-        // we need to advance playback to free up cache space).
-        if (mState != STATE_ENDED &&
-            availableOffset < mPlaybackPosition + len &&
-            !mStream->IsSuspendedByCache()) {
-          mBufferingStart = now;
-          mBufferingEndOffset = mPlaybackPosition +
-            TimeToBytes(double(mBufferingWait.ToSeconds()));
-          mBufferingEndOffset = PR_MAX(mPlaybackPosition + len,
-                                       mBufferingEndOffset);
-          mNextState = mState;
-          ChangeState(STATE_BUFFERING);
-
-          UpdateReadyState();
-          break;
-        }
-
-        if (len > 0) {
-          nsAutoArrayPtr<char> buf(new char[size_t(len)]);
-          PRInt64 got = 0;
-
-          monitor.Exit();
-          PRBool ok = ReadAll(buf.get(), len, &got);
-          monitor.Enter();
-
-          // Reached EOF.
-          if (!ok) {
-            ChangeState(STATE_ENDED);
-            if (got == 0) {
-              break;
-            }
-          }
-
-          // Calculate difference between the current media stream position
-          // and the expected end of the PCM data.
-          PRInt64 endDelta = mWavePCMOffset + mWaveLength - mPlaybackPosition;
-          if (endDelta < 0) {
-            // Read past the end of PCM data.  Adjust got to avoid playing
-            // back trailing data.
-            got -= -endDelta;
-            ChangeState(STATE_ENDED);
-          }
-
-          if (mState == STATE_ENDED) {
-            got = RoundDownToSample(got);
-          }
-
-          PRUint32 sampleSize = mSampleFormat == nsAudioStream::FORMAT_U8 ? 1 : 2;
-          NS_ABORT_IF_FALSE(got % sampleSize == 0, "Must write complete samples");
-          PRUint32 lengthInSamples = PRUint32(got / sampleSize);
-
-          monitor.Exit();
-          mAudioStream->Write(buf.get(), lengthInSamples, PR_FALSE);
-          monitor.Enter();
-
-          FirePositionChanged(PR_FALSE);
-        }
-
-        if (mState == STATE_PLAYING) {
-          monitor.Wait(PR_MillisecondsToInterval(AUDIO_BUFFER_WAKEUP));
-          now = TimeStamp::Now();
-        }
-      } while (mState == STATE_PLAYING);
-      break;
-    }
-
-    case STATE_SEEKING:
-      {
-        CloseAudioStream();
-
-        mSeekTime = NS_MIN(mSeekTime, GetDuration());
-        double seekTime = mSeekTime;
-
-        // Calculate relative offset within PCM data.
-        PRInt64 position = RoundDownToSample(TimeToBytes(seekTime));
-        NS_ABORT_IF_FALSE(position >= 0 && position <= GetDataLength(),
-                          "Invalid seek position");
-        // Convert to absolute offset within stream.
-        position += mWavePCMOffset;
-
-        // If in the midst of a seek, report the requested seek time
-        // as the current time as required by step 8 of 4.8.10.9 'Seeking'
-        // in the WHATWG spec.
-        PRInt64 oldPosition = mPlaybackPosition;
-        mPlaybackPosition = position;
-        FirePositionChanged(PR_TRUE);
-
-        monitor.Exit();
-        nsCOMPtr<nsIRunnable> startEvent =
-          NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::SeekingStarted);
-        NS_DispatchToMainThread(startEvent, NS_DISPATCH_SYNC);
-        monitor.Enter();
-
-        if (mState == STATE_SHUTDOWN) {
-          break;
-        }
-
-        monitor.Exit();
-        nsresult rv;
-        rv = mStream->Seek(nsISeekableStream::NS_SEEK_SET, position);
-        monitor.Enter();
-        if (NS_FAILED(rv)) {
-          NS_WARNING("Seek failed");
-          mPlaybackPosition = oldPosition;
-          FirePositionChanged(PR_TRUE);
-        }
-
-        if (mState == STATE_SHUTDOWN) {
-          break;
-        }
-
-        if (mState == STATE_SEEKING && mSeekTime == seekTime) {
-          // Special case #1: if a seek was requested during metadata load,
-          // mNextState will have been clobbered.  This can only happen when
-          // we're instantiating a decoder to service a seek request after
-          // playback has ended, so we know that the clobbered mNextState
-          // was PAUSED.
-          // Special case #2: if a seek is requested after the state machine
-          // entered STATE_ENDED but before the user has seen the ended
-          // event, playback has not ended as far as the user's
-          // concerned--the state machine needs to return to the last
-          // playback state.
-          // Special case #3: if seeking to the end of the media, transition
-          // directly into STATE_ENDED.
-          State nextState = mNextState;
-          if (nextState == STATE_SEEKING) {
-            nextState = STATE_PAUSED;
-          } else if (nextState == STATE_ENDED) {
-            nextState = mPaused ? STATE_PAUSED : STATE_PLAYING;
-          } else if (GetDuration() == seekTime) {
-            nextState = STATE_ENDED;
-          }
-          ChangeState(nextState);
-        }
-
-        if (mState != STATE_SEEKING) {
-          monitor.Exit();
-          nsCOMPtr<nsIRunnable> stopEvent =
-            NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::SeekingStopped);
-          NS_DispatchToMainThread(stopEvent, NS_DISPATCH_SYNC);
-          monitor.Enter();
-        }
-      }
-      break;
-
-    case STATE_PAUSED:
-      monitor.Wait();
-      break;
-
-    case STATE_ENDED:
-      FirePositionChanged(PR_TRUE);
-
-      if (mAudioStream) {
-        monitor.Exit();
-        mAudioStream->Drain();
-        monitor.Enter();
-
-        // After the drain call the audio stream is unusable. Close it so that
-        // next time audio is used a new stream is created.
-        CloseAudioStream();
-      }
-
-      mPlaybackEnded = PR_TRUE;
-
-      if (mState == STATE_ENDED) {
-        nsCOMPtr<nsIRunnable> event =
-          NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::PlaybackEnded);
-        NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
-
-        // We've finished playback. Shutdown the state machine thread, 
-        // in order to save memory on thread stacks, particuarly on Linux.
-        event = new ShutdownThreadEvent(mDecoder->mPlaybackThread);
-        NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
-        mDecoder->mPlaybackThread = nsnull;
-        return NS_OK;
-      }
-      break;
-
-    case STATE_ERROR:
-      {
-        nsCOMPtr<nsIRunnable> event = NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::DecodeError);
-        NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
-
-        monitor.Wait();
-
-        if (mState != STATE_SHUTDOWN) {
-          NS_WARNING("Invalid state transition");
-          ChangeState(STATE_ERROR);
-        }
-      }
-      break;
-
-    case STATE_SHUTDOWN:
-      mPlaybackEnded = PR_TRUE;
-      CloseAudioStream();
-      return NS_OK;
-    }
-  }
-
-  return NS_OK;
-}
-
-#if defined(DEBUG)
-static PRBool
-IsValidStateTransition(State aStartState, State aEndState)
-{
-  if (aEndState == STATE_SHUTDOWN) {
-    return PR_TRUE;
-  }
-
-  if (aStartState == aEndState) {
-    LOG(PR_LOG_WARNING, ("Transition to current state requested"));
-    return PR_TRUE;
-  }
-
-  switch (aStartState) {
-  case STATE_LOADING_METADATA:
-    if (aEndState == STATE_PLAYING || aEndState == STATE_SEEKING ||
-        aEndState == STATE_PAUSED || aEndState == STATE_ERROR)
-      return PR_TRUE;
-    break;
-  case STATE_BUFFERING:
-    if (aEndState == STATE_PLAYING || aEndState == STATE_PAUSED ||
-        aEndState == STATE_SEEKING)
-      return PR_TRUE;
-    break;
-  case STATE_PLAYING:
-    if (aEndState == STATE_BUFFERING || aEndState == STATE_SEEKING ||
-        aEndState == STATE_ENDED || aEndState == STATE_PAUSED)
-      return PR_TRUE;
-    break;
-  case STATE_SEEKING:
-    if (aEndState == STATE_PLAYING || aEndState == STATE_PAUSED ||
-        aEndState == STATE_ENDED)
-      return PR_TRUE;
-    break;
-  case STATE_PAUSED:
-    if (aEndState == STATE_PLAYING || aEndState == STATE_SEEKING)
-      return PR_TRUE;
-    break;
-  case STATE_ENDED:
-    if (aEndState == STATE_SEEKING)
-      return PR_TRUE;
-    /* fallthrough */
-  case STATE_ERROR:
-  case STATE_SHUTDOWN:
-    break;
-  }
-
-  LOG(PR_LOG_ERROR, ("Invalid state transition from %d to %d", aStartState, aEndState));
-  return PR_FALSE;
-}
-#endif
-
-void
-nsWaveStateMachine::ChangeState(State aState)
+nsDecoderStateMachine* nsWaveDecoder::CreateStateMachine()
 {
-  nsAutoMonitor monitor(mMonitor);
-  if (mState == STATE_SHUTDOWN) {
-    LOG(PR_LOG_WARNING, ("In shutdown, state transition ignored"));
-    return;
-  }
-#if defined(DEBUG)
-  NS_ABORT_IF_FALSE(IsValidStateTransition(mState, aState), "Invalid state transition");
-#endif
-  mState = aState;
-  monitor.NotifyAll();
-}
-
-void
-nsWaveStateMachine::OpenAudioStream(nsAutoMonitor& aMonitor)
-{
-  NS_ABORT_IF_FALSE(mMetadataValid,
-                    "Attempting to initialize audio stream with invalid metadata");
-
-  nsRefPtr<nsAudioStream> audioStream = nsAudioStream::AllocateStream();
-  if (!audioStream) {
-    LOG(PR_LOG_ERROR, ("Could not create audio stream"));
-    return;
-  }
-
-  // Drop the monitor while initializing the stream because remote
-  // audio streams wait on a synchronous event running on the main
-  // thread, and holding the decoder monitor while waiting for this
-  // can result in deadlocks.
-  aMonitor.Exit();
-  audioStream->Init(mChannels, mSampleRate, mSampleFormat);
-  aMonitor.Enter();
-
-  mAudioStream = audioStream;
-  mAudioStream->SetVolume(mInitialVolume);
-}
-
-void
-nsWaveStateMachine::CloseAudioStream()
-{
-  if (mAudioStream) {
-    mAudioStream->Shutdown();
-    mAudioStream = nsnull;
-  }
-}
-
-nsMediaDecoder::Statistics
-nsWaveStateMachine::GetStatistics()
-{
-  nsMediaDecoder::Statistics result;
-  nsAutoMonitor monitor(mMonitor);
-  result.mDownloadRate = mStream->GetDownloadRate(&result.mDownloadRateReliable);
-  result.mPlaybackRate = mSampleRate*mChannels*mSampleSize;
-  result.mPlaybackRateReliable = PR_TRUE;
-  result.mTotalBytes = mStream->GetLength();
-  result.mDownloadPosition = mStream->GetCachedDataEnd(mPlaybackPosition);
-  result.mDecoderPosition = mPlaybackPosition;
-  result.mPlaybackPosition = mPlaybackPosition;
-  return result;
-}
-
-void
-nsWaveStateMachine::NotifyBytesConsumed(PRInt64 aBytes)
-{
-  nsAutoMonitor monitor(mMonitor);
-  mPlaybackPosition += aBytes;
-}
-
-static PRUint32
-ReadUint32BE(const char** aBuffer)
-{
-  PRUint32 result =
-    PRUint8((*aBuffer)[0]) << 24 |
-    PRUint8((*aBuffer)[1]) << 16 |
-    PRUint8((*aBuffer)[2]) << 8 |
-    PRUint8((*aBuffer)[3]);
-  *aBuffer += sizeof(PRUint32);
-  return result;
-}
-
-static PRUint32
-ReadUint32LE(const char** aBuffer)
-{
-  PRUint32 result =
-    PRUint8((*aBuffer)[3]) << 24 |
-    PRUint8((*aBuffer)[2]) << 16 |
-    PRUint8((*aBuffer)[1]) << 8 |
-    PRUint8((*aBuffer)[0]);
-  *aBuffer += sizeof(PRUint32);
-  return result;
-}
-
-static PRUint16
-ReadUint16LE(const char** aBuffer)
-{
-  PRUint16 result =
-    PRUint8((*aBuffer)[1]) << 8 |
-    PRUint8((*aBuffer)[0]) << 0;
-  *aBuffer += sizeof(PRUint16);
-  return result;
-}
-
-PRBool
-nsWaveStateMachine::IsShutdown()
-{
-  nsAutoMonitor monitor(mMonitor);
-  return mState == STATE_SHUTDOWN;
-}
-
-PRBool
-nsWaveStateMachine::ReadAll(char* aBuf, PRInt64 aSize, PRInt64* aBytesRead = nsnull)
-{
-  PRUint32 got = 0;
-  if (aBytesRead) {
-    *aBytesRead = 0;
-  }
-  do {
-    PRUint32 read = 0;
-    if (NS_FAILED(mStream->Read(aBuf + got, PRUint32(aSize - got), &read))) {
-      NS_WARNING("Stream read failed");
-      return PR_FALSE;
-    }
-    if (IsShutdown() || read == 0) {
-      return PR_FALSE;
-    }
-    NotifyBytesConsumed(read);
-    got += read;
-    if (aBytesRead) {
-      *aBytesRead = got;
-    }
-  } while (got != aSize);
-  return PR_TRUE;
-}
-
-PRBool
-nsWaveStateMachine::LoadRIFFChunk()
-{
-  char riffHeader[RIFF_INITIAL_SIZE];
-  const char* p = riffHeader;
-
-  NS_ABORT_IF_FALSE(mStream->Tell() == 0,
-                    "LoadRIFFChunk called when stream in invalid state");
-
-  if (!ReadAll(riffHeader, sizeof(riffHeader))) {
-    return PR_FALSE;
-  }
-
-  if (ReadUint32BE(&p) != RIFF_CHUNK_MAGIC) {
-    NS_WARNING("Stream data not in RIFF format");
-    return PR_FALSE;
-  }
-
-  // Skip over RIFF size field.
-  p += 4;
-
-  if (ReadUint32BE(&p) != WAVE_CHUNK_MAGIC) {
-    NS_WARNING("Expected WAVE chunk");
-    return PR_FALSE;
-  }
-
-  return PR_TRUE;
-}
-
-PRBool
-nsWaveStateMachine::ScanForwardUntil(PRUint32 aWantedChunk, PRUint32* aChunkSize)
-{
-  NS_ABORT_IF_FALSE(aChunkSize, "Require aChunkSize argument");
-  *aChunkSize = 0;
-
-  for (;;) {
-    char chunkHeader[8];
-    const char* p = chunkHeader;
-
-    if (!ReadAll(chunkHeader, sizeof(chunkHeader))) {
-      return PR_FALSE;
-    }
-
-    PRUint32 magic = ReadUint32BE(&p);
-    PRUint32 chunkSize = ReadUint32LE(&p);
-
-    if (magic == aWantedChunk) {
-      *aChunkSize = chunkSize;
-      return PR_TRUE;
-    }
-
-    // RIFF chunks are two-byte aligned, so round up if necessary.
-    chunkSize += chunkSize % 2;
-
-    while (chunkSize > 0) {
-      PRUint32 size = PR_MIN(chunkSize, 1 << 16);
-      nsAutoArrayPtr<char> chunk(new char[size]);
-      if (!ReadAll(chunk.get(), size)) {
-        return PR_FALSE;
-      }
-      chunkSize -= size;
-    }
-  }
-}
-
-PRBool
-nsWaveStateMachine::LoadFormatChunk()
-{
-  PRUint32 fmtSize, rate, channels, sampleSize, sampleFormat;
-  char waveFormat[WAVE_FORMAT_CHUNK_SIZE];
-  const char* p = waveFormat;
-
-  // RIFF chunks are always word (two byte) aligned.
-  NS_ABORT_IF_FALSE(mStream->Tell() % 2 == 0,
-                    "LoadFormatChunk called with unaligned stream");
-
-  // The "format" chunk may not directly follow the "riff" chunk, so skip
-  // over any intermediate chunks.
-  if (!ScanForwardUntil(FRMT_CHUNK_MAGIC, &fmtSize)) {
-      return PR_FALSE;
-  }
-
-  if (!ReadAll(waveFormat, sizeof(waveFormat))) {
-    return PR_FALSE;
-  }
-
-  if (ReadUint16LE(&p) != WAVE_FORMAT_ENCODING_PCM) {
-    NS_WARNING("WAVE is not uncompressed PCM, compressed encodings are not supported");
-    return PR_FALSE;
-  }
-
-  channels = ReadUint16LE(&p);
-  rate = ReadUint32LE(&p);
-
-  // Skip over average bytes per second field.
-  p += 4;
-
-  sampleSize = ReadUint16LE(&p);
-
-  sampleFormat = ReadUint16LE(&p);
-
-  // PCM encoded WAVEs are not expected to have an extended "format" chunk,
-  // but I have found WAVEs that have a extended "format" chunk with an
-  // extension size of 0 bytes.  Be polite and handle this rather than
-  // considering the file invalid.  This code skips any extension of the
-  // "format" chunk.
-  if (fmtSize > WAVE_FORMAT_CHUNK_SIZE) {
-    char extLength[2];
-    const char* p = extLength;
-
-    if (!ReadAll(extLength, sizeof(extLength))) {
-      return PR_FALSE;
-    }
-
-    PRUint16 extra = ReadUint16LE(&p);
-    if (fmtSize - (WAVE_FORMAT_CHUNK_SIZE + 2) != extra) {
-      NS_WARNING("Invalid extended format chunk size");
-      return PR_FALSE;
-    }
-    extra += extra % 2;
-
-    if (extra > 0) {
-      nsAutoArrayPtr<char> chunkExtension(new char[extra]);
-      if (!ReadAll(chunkExtension.get(), extra)) {
-        return PR_FALSE;
-      }
-    }
-  }
-
-  // RIFF chunks are always word (two byte) aligned.
-  NS_ABORT_IF_FALSE(mStream->Tell() % 2 == 0,
-                    "LoadFormatChunk left stream unaligned");
-
-  // Make sure metadata is fairly sane.  The rate check is fairly arbitrary,
-  // but the channels check is intentionally limited to mono or stereo
-  // because that's what the audio backend currently supports.
-  if (rate < 100 || rate > 96000 ||
-      channels < 1 || channels > 2 ||
-      (sampleSize != 1 && sampleSize != 2 && sampleSize != 4) ||
-      (sampleFormat != 8 && sampleFormat != 16)) {
-    NS_WARNING("Invalid WAVE metadata");
-    return PR_FALSE;
-  }
-
-  nsAutoMonitor monitor(mMonitor);
-  mSampleRate = rate;
-  mChannels = channels;
-  mSampleSize = sampleSize;
-  if (sampleFormat == 8) {
-    mSampleFormat = nsAudioStream::FORMAT_U8;
-  } else {
-    mSampleFormat = nsAudioStream::FORMAT_S16_LE;
-  }
-  return PR_TRUE;
-}
-
-PRBool
-nsWaveStateMachine::FindDataOffset()
-{
-  // RIFF chunks are always word (two byte) aligned.
-  NS_ABORT_IF_FALSE(mStream->Tell() % 2 == 0,
-                    "FindDataOffset called with unaligned stream");
-
-  // The "data" chunk may not directly follow the "format" chunk, so skip
-  // over any intermediate chunks.
-  PRUint32 length;
-  if (!ScanForwardUntil(DATA_CHUNK_MAGIC, &length)) {
-    return PR_FALSE;
-  }
-
-  PRInt64 offset = mStream->Tell();
-  if (offset <= 0 || offset > PR_UINT32_MAX) {
-    NS_WARNING("PCM data offset out of range");
-    return PR_FALSE;
-  }
-
-  nsAutoMonitor monitor(mMonitor);
-  mWaveLength = length;
-  mWavePCMOffset = PRUint32(offset);
-  return PR_TRUE;
-}
-
-PRInt64
-nsWaveStateMachine::GetDataLength()
-{
-  NS_ABORT_IF_FALSE(mMetadataValid,
-                    "Attempting to initialize audio stream with invalid metadata");
-
-  PRInt64 length = mWaveLength;
-  // If the decoder has a valid content length, and it's shorter than the
-  // expected length of the PCM data, calculate the playback duration from
-  // the content length rather than the expected PCM data length.
-  PRInt64 streamLength = mStream->GetLength();
-  if (streamLength >= 0) {
-    PRInt64 dataLength = PR_MAX(0, streamLength - mWavePCMOffset);
-    length = PR_MIN(dataLength, length);
-  }
-  return length;
-}
-
-void
-nsWaveStateMachine::FirePositionChanged(PRBool aCoalesce)
-{
-  if (aCoalesce && mPositionChangeQueued) {
-    return;
-  }
-
-  mPositionChangeQueued = PR_TRUE;
-  nsCOMPtr<nsIRunnable> event = NS_NewRunnableMethod(mDecoder, &nsWaveDecoder::PlaybackPositionChanged);
-  NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
-}
-
-nsresult
-nsWaveStateMachine::GetBuffered(nsTimeRanges* aBuffered)
-{
-  PRInt64 startOffset = mStream->GetNextCachedData(mWavePCMOffset);
-  while (startOffset >= 0) {
-    PRInt64 endOffset = mStream->GetCachedDataEnd(startOffset);
-    // Bytes [startOffset..endOffset] are cached.
-    aBuffered->Add(BytesToTime(startOffset - mWavePCMOffset),
-                   BytesToTime(endOffset - mWavePCMOffset));
-    startOffset = mStream->GetNextCachedData(endOffset);
-  }
-  return NS_OK;
-}
-
-NS_IMPL_THREADSAFE_ISUPPORTS1(nsWaveDecoder, nsIObserver)
-
-nsWaveDecoder::nsWaveDecoder()
-  : mInitialVolume(1.0f),
-    mCurrentTime(0.0f),
-    mEndedDuration(std::numeric_limits<double>::quiet_NaN()),
-    mEnded(PR_FALSE),
-    mSeekable(PR_TRUE),
-    mResourceLoaded(PR_FALSE),
-    mMetadataLoadedReported(PR_FALSE),
-    mResourceLoadedReported(PR_FALSE)
-{
-  MOZ_COUNT_CTOR(nsWaveDecoder);
-
-#ifdef PR_LOGGING
-  if (!gWaveDecoderLog) {
-    gWaveDecoderLog = PR_NewLogModule("nsWaveDecoder");
-  }
-#endif
-}
-
-nsWaveDecoder::~nsWaveDecoder()
-{
-  MOZ_COUNT_DTOR(nsWaveDecoder);
-  UnpinForSeek();
-}
-
-PRBool
-nsWaveDecoder::Init(nsHTMLMediaElement* aElement)
-{
-  nsMediaDecoder::Init(aElement);
-
-  nsContentUtils::RegisterShutdownObserver(this);
-
-  mPlaybackStateMachine = new nsWaveStateMachine(this,
-    TimeDuration::FromMilliseconds(BUFFERING_TIMEOUT),
-    mInitialVolume);
-  NS_ENSURE_TRUE(mPlaybackStateMachine, PR_FALSE);
-
-  return PR_TRUE;
-}
-
-nsMediaStream*
-nsWaveDecoder::GetCurrentStream()
-{
-  return mStream;
-}
-
-already_AddRefed<nsIPrincipal>
-nsWaveDecoder::GetCurrentPrincipal()
-{
-  if (!mStream) {
-    return nsnull;
-  }
-  return mStream->GetCurrentPrincipal();
-}
-
-double
-nsWaveDecoder::GetCurrentTime()
-{
-  return mCurrentTime;
+  return new nsBuiltinDecoderStateMachine(this, new nsWaveReader(this));
 }
-
-nsresult
-nsWaveDecoder::StartStateMachineThread()
-{
-  NS_ASSERTION(mPlaybackStateMachine, "Must have state machine");
-  if (mPlaybackThread) {
-    return NS_OK;
-  }
-  nsresult rv = NS_NewThread(getter_AddRefs(mPlaybackThread));
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  return mPlaybackThread->Dispatch(mPlaybackStateMachine, NS_DISPATCH_NORMAL);
-}
-
-nsresult
-nsWaveDecoder::Seek(double aTime)
-{
-  if (mPlaybackStateMachine) {
-    mEnded = PR_FALSE;
-    mCurrentTime = aTime;
-    PinForSeek();
-    mPlaybackStateMachine->Seek(aTime);
-    return StartStateMachineThread();
-  }
-
-  return NS_ERROR_FAILURE;
-}
-
-nsresult
-nsWaveDecoder::PlaybackRateChanged()
-{
-  return NS_ERROR_NOT_IMPLEMENTED;
-}
-
-double
-nsWaveDecoder::GetDuration()
-{
-  if (mPlaybackStateMachine) {
-    return mPlaybackStateMachine->GetDuration();
-  }
-  return mEndedDuration;
-}
-
-void
-nsWaveDecoder::Pause()
-{
-  if (mPlaybackStateMachine) {
-    mPlaybackStateMachine->Pause();
-  }
-}
-
-void
-nsWaveDecoder::SetVolume(double aVolume)
-{
-  mInitialVolume = aVolume;
-  if (mPlaybackStateMachine) {
-    mPlaybackStateMachine->SetVolume(aVolume);
-  }
-}
-
-nsresult
-nsWaveDecoder::Play()
-{
-  if (mPlaybackStateMachine) {
-    mEnded = PR_FALSE;
-    mPlaybackStateMachine->Play();
-    return StartStateMachineThread();
-  }
-
-  return NS_ERROR_FAILURE;
-}
-
-void
-nsWaveDecoder::Stop()
-{
-  if (mPlaybackStateMachine) {
-    mPlaybackStateMachine->Shutdown();
-  }
-
-  if (mStream) {
-    mStream->Close();
-  }
-
-  if (mPlaybackThread) {
-    mPlaybackThread->Shutdown();
-  }
-
-  if (mPlaybackStateMachine) {
-    mEndedDuration = mPlaybackStateMachine->GetDuration();
-    mEnded = mPlaybackStateMachine->IsEnded();
-  }
-
-  mPlaybackThread = nsnull;
-  mPlaybackStateMachine = nsnull;
-  mStream = nsnull;
-
-  nsContentUtils::UnregisterShutdownObserver(this);
-}
-
-nsresult
-nsWaveDecoder::Load(nsMediaStream* aStream, nsIStreamListener** aStreamListener,
-                    nsMediaDecoder* aCloneDonor)
-{
-  NS_ASSERTION(aStream, "A stream should be provided");
-
-  if (aStreamListener) {
-    *aStreamListener = nsnull;
-  }
-
-  mStream = aStream;
-
-  nsresult rv = mStream->Open(aStreamListener);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  mPlaybackStateMachine->SetStream(mStream);
-
-  rv = NS_NewThread(getter_AddRefs(mPlaybackThread));
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  rv = mPlaybackThread->Dispatch(mPlaybackStateMachine, NS_DISPATCH_NORMAL);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  return NS_OK;
-}
-
-void
-nsWaveDecoder::MetadataLoaded()
-{
-  if (mShuttingDown) {
-    return;
-  }
-
-  if (mElement) {
-    mElement->MetadataLoaded(mPlaybackStateMachine->GetChannels(),
-                             mPlaybackStateMachine->GetSampleRate());
-    mElement->FirstFrameLoaded(mResourceLoaded);
-  }
-
-  mMetadataLoadedReported = PR_TRUE;
-
-  if (mResourceLoaded) {
-    ResourceLoaded();
-  } else {
-    StartProgress();
-  }
-}
-
-void
-nsWaveDecoder::PlaybackEnded()
-{
-  if (mShuttingDown) {
-    return;
-  }
-
-  if (!mPlaybackStateMachine->IsEnded()) {
-    return;
-  }
-  mEnded = PR_TRUE;
-
-  // Update ready state; now that we've finished playback, we should
-  // switch to HAVE_CURRENT_DATA.
-  UpdateReadyStateForData();
-  if (mElement) {
-    mElement->PlaybackEnded();
-  }
-}
-
-void
-nsWaveDecoder::ResourceLoaded()
-{
-  if (mShuttingDown) {
-    return;
-  }
-
-  mResourceLoaded = PR_TRUE;
-
-  if (!mMetadataLoadedReported || mResourceLoadedReported)
-    return;
-
-  StopProgress();
-
-  if (mElement) {
-    // Ensure the final progress event gets fired
-    mElement->ResourceLoaded();
-  }
-
-  mResourceLoadedReported = PR_TRUE;
-}
-
-void
-nsWaveDecoder::NetworkError()
-{
-  if (mShuttingDown) {
-    return;
-  }
-  if (mElement) {
-    mElement->NetworkError();
-  }
-  Shutdown();
-}
-
-PRBool
-nsWaveDecoder::IsSeeking() const
-{
-  if (mPlaybackStateMachine) {
-    return mPlaybackStateMachine->IsSeeking();
-  }
-  return PR_FALSE;
-}
-
-PRBool
-nsWaveDecoder::IsEnded() const
-{
-  return mEnded;
-}
-
-nsMediaDecoder::Statistics
-nsWaveDecoder::GetStatistics()
-{
-  if (!mPlaybackStateMachine)
-    return Statistics();
-  return mPlaybackStateMachine->GetStatistics();
-}
-
-void
-nsWaveDecoder::NotifySuspendedStatusChanged()
-{
-  if (mStream->IsSuspendedByCache() && mElement) {
-    // if this is an autoplay element, we need to kick off its autoplaying
-    // now so we consume data and hopefully free up cache space
-    mElement->NotifyAutoplayDataReady();
-  }
-}
-
-void
-nsWaveDecoder::NotifyBytesDownloaded()
-{
-  UpdateReadyStateForData();
-  Progress(PR_FALSE);
-}
-
-void
-nsWaveDecoder::NotifyDownloadEnded(nsresult aStatus)
-{
-  if (NS_SUCCEEDED(aStatus)) {
-    ResourceLoaded();
-  } else if (aStatus == NS_BINDING_ABORTED) {
-    // Download has been cancelled by user.
-    if (mElement) {
-      mElement->LoadAborted();
-    }
-  } else if (aStatus != NS_BASE_STREAM_CLOSED) {
-    NetworkError();
-  }
-  UpdateReadyStateForData();
-}
-
-void
-nsWaveDecoder::Shutdown()
-{
-  if (mShuttingDown)
-    return;
-
-  mShuttingDown = PR_TRUE;
-
-  nsMediaDecoder::Shutdown();
-
-  // An event that gets posted to the main thread, when the media element is
-  // being destroyed, to destroy the decoder. Since the decoder shutdown can
-  // block and post events this cannot be done inside destructor calls. So
-  // this event is posted asynchronously to the main thread to perform the
-  // shutdown.
-  nsCOMPtr<nsIRunnable> event =
-    NS_NewRunnableMethod(this, &nsWaveDecoder::Stop);
-  NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
-}
-
-nsresult
-nsWaveDecoder::Observe(nsISupports* aSubject, const char* aTopic, const PRUnichar* aData)
-{
-  if (strcmp(aTopic, NS_XPCOM_SHUTDOWN_OBSERVER_ID) == 0) {
-    Shutdown();
-  }
-  return NS_OK;
-}
-
-void
-nsWaveDecoder::NextFrameUnavailableBuffering()
-{
-  NS_ASSERTION(NS_IsMainThread(), "Should be called on main thread");
-  if (!mElement || mShuttingDown || !mPlaybackStateMachine)
-    return;
-
-  mElement->UpdateReadyStateForData(nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE_BUFFERING);
-}
-
-void
-nsWaveDecoder::NextFrameAvailable()
-{
-  NS_ASSERTION(NS_IsMainThread(), "Should be called on main thread");
-  if (!mElement || mShuttingDown || !mPlaybackStateMachine)
-    return;
-
-  if (!mMetadataLoadedReported) {
-    mElement->UpdateReadyStateForData(nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE);
-  } else {
-    mElement->UpdateReadyStateForData(nsHTMLMediaElement::NEXT_FRAME_AVAILABLE);
-  }
-}
-
-void
-nsWaveDecoder::NextFrameUnavailable()
-{
-  NS_ASSERTION(NS_IsMainThread(), "Should be called on main thread");
-  if (!mElement || mShuttingDown || !mPlaybackStateMachine)
-    return;
-
-  mElement->UpdateReadyStateForData(nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE);
-}
-
-void
-nsWaveDecoder::UpdateReadyStateForData()
-{
-  NS_ASSERTION(NS_IsMainThread(), "Should be called on main thread");
-  if (!mElement || mShuttingDown || !mPlaybackStateMachine)
-    return;
-
-  nsHTMLMediaElement::NextFrameStatus frameStatus =
-    mPlaybackStateMachine->GetNextFrameStatus();
-  if (frameStatus == nsHTMLMediaElement::NEXT_FRAME_AVAILABLE &&
-      !mMetadataLoadedReported) {
-    frameStatus = nsHTMLMediaElement::NEXT_FRAME_UNAVAILABLE;
-  }
-  mElement->UpdateReadyStateForData(frameStatus);
-}
-
-void
-nsWaveDecoder::SeekingStarted()
-{
-  if (mShuttingDown) {
-    return;
-  }
-
-  if (mElement) {
-    UpdateReadyStateForData();
-    mElement->SeekStarted();
-  }
-}
-
-void
-nsWaveDecoder::SeekingStopped()
-{
-  UnpinForSeek();
-  if (mShuttingDown) {
-    return;
-  }
-
-  if (mElement) {
-    UpdateReadyStateForData();
-    mElement->SeekCompleted();
-  }
-}
-
-void
-nsWaveDecoder::DecodeError()
-{
-  if (mShuttingDown) {
-    return;
-  }
-  if (mElement) {
-    mElement->DecodeError();
-  }
-  Shutdown();
-}
-
-void
-nsWaveDecoder::PlaybackPositionChanged()
-{
-  if (mShuttingDown) {
-    return;
-  }
-
-  double lastTime = mCurrentTime;
-
-  if (mPlaybackStateMachine) {
-    mCurrentTime = mPlaybackStateMachine->GetTimeForPositionChange();
-  }
-
-  if (mElement && lastTime != mCurrentTime) {
-    UpdateReadyStateForData();
-    FireTimeUpdate();
-  }
-}
-
-void
-nsWaveDecoder::SetDuration(PRInt64 /* aDuration */)
-{
-  // Ignored by the wave decoder since we can compute the
-  // duration directly from the wave data itself.
-}
-
-void
-nsWaveDecoder::SetSeekable(PRBool aSeekable)
-{
-  mSeekable = aSeekable;
-}
-
-PRBool
-nsWaveDecoder::GetSeekable()
-{
-  return mSeekable;
-}
-
-void
-nsWaveDecoder::Suspend()
-{
-  if (mStream) {
-    mStream->Suspend(PR_TRUE);
-  }
-}
-
-void
-nsWaveDecoder::Resume(PRBool aForceBuffering)
-{
-  if (mStream) {
-    mStream->Resume();
-  }
-}
-
-void 
-nsWaveDecoder::MoveLoadsToBackground()
-{
-  if (mStream) {
-    mStream->MoveLoadsToBackground();
-  }
-}
-
-nsresult
-nsWaveDecoder::GetBuffered(nsTimeRanges* aBuffered)
-{
-  NS_ASSERTION(NS_IsMainThread(), "Only call on main thread");
-  return mPlaybackStateMachine->GetBuffered(aBuffered);
-}
--- a/content/media/wave/nsWaveDecoder.h
+++ b/content/media/wave/nsWaveDecoder.h
@@ -10,18 +10,18 @@
  *
  * Software distributed under the License is distributed on an "AS IS" basis,
  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  * for the specific language governing rights and limitations under the
  * License.
  *
  * The Original Code is Mozilla code.
  *
- * The Initial Developer of the Original Code is the Mozilla Corporation.
- * Portions created by the Initial Developer are Copyright (C) 2008
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
  *  Matthew Gregan <kinetik@flim.org>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -33,276 +33,30 @@
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 #if !defined(nsWaveDecoder_h_)
 #define nsWaveDecoder_h_
 
-#include "nsISupports.h"
-#include "nsCOMPtr.h"
-#include "nsMediaDecoder.h"
-#include "nsMediaStream.h"
-
-/*
-  nsWaveDecoder provides an implementation of the abstract nsMediaDecoder
-  class that supports parsing and playback of Waveform Audio (WAVE) chunks
-  embedded in Resource Interchange File Format (RIFF) bitstreams as
-  specified by the Multimedia Programming Interface and Data Specification
-  1.0.
-
-  Each decoder instance starts one thread (the playback thread).  A single
-  nsWaveStateMachine event is dispatched to this thread to start the
-  thread's state machine running.  The Run method of the event is a loop
-  that executes the current state.  The state can be changed by the state
-  machine, or from the main thread via threadsafe methods on the event.
-  During playback, the playback thread reads data from the network and
-  writes it to the audio backend, attempting to keep the backend's audio
-  buffers full.  It is also responsible for seeking, buffering, and
-  pausing/resuming audio.
-
-  The decoder also owns an nsMediaStream instance that provides a threadsafe
-  blocking interface to read from network channels.  The state machine is
-  the primary user of this stream and holds a weak (raw) pointer to it as
-  the thread, state machine, and stream's lifetimes are all managed by the
-  decoder.
-
-  nsWaveStateMachine has the following states:
-
-  LOADING_METADATA
-    RIFF/WAVE chunks are being read from the stream, the metadata describing
-    the audio data is parsed.
-
-  BUFFERING
-    Playback is paused while waiting for additional data.
-
-  PLAYING
-    If data is available in the stream and the audio backend can consume
-    more data, it is read from the stream and written to the audio backend.
-    Sleep until approximately half of the backend's buffers have drained.
-
-  SEEKING
-    Decoder is seeking to a specified time in the media.
-
-  PAUSED
-    Pause the audio backend, then wait for a state transition.
-
-  ENDED
-    Expected PCM data (or stream EOF) reached, wait for the audio backend to
-    play any buffered data, then wait for shutdown.
-
-  ERROR
-    Metadata loading/parsing failed, wait for shutdown.
-
-  SHUTDOWN
-    Close the audio backend and return from the run loop.
-
-  State transitions within the state machine are:
-
-  LOADING_METADATA -> PLAYING
-                   -> PAUSED
-                   -> ERROR
-
-  BUFFERING        -> PLAYING
-                   -> PAUSED
-
-  PLAYING          -> BUFFERING
-                   -> ENDED
-
-  SEEKING          -> PLAYING
-                   -> PAUSED
-
-  PAUSED           -> waits for caller to play, seek, or shutdown
-
-  ENDED            -> waits for caller to shutdown
-
-  ERROR            -> waits for caller to shutdown
-
-  SHUTDOWN         -> exits state machine
-
-  In addition, the following methods cause state transitions:
-
-  Shutdown(), Play(), Pause(), Seek(double)
-
-  The decoder implementation is currently limited to Linear PCM encoded
-  audio data with one or two channels of 8- or 16-bit samples at sample
-  rates from 100 Hz to 96 kHz.  The number of channels is limited by what
-  the audio backend (sydneyaudio via nsAudioStream) currently supports.  The
-  supported sample rate is artificially limited to arbitrarily selected sane
-  values.  Support for additional channels (and other new features) would
-  require extending nsWaveDecoder to support parsing the newer
-  WAVE_FORMAT_EXTENSIBLE chunk format.
- */
-
-class nsWaveStateMachine;
-class nsTimeRanges;
-
-class nsWaveDecoder : public nsMediaDecoder
-{
-  friend class nsWaveStateMachine;
-
-  NS_DECL_ISUPPORTS
-  NS_DECL_NSIOBSERVER
-
- public:
-  nsWaveDecoder();
-  ~nsWaveDecoder();
-
-  virtual nsMediaDecoder* Clone() { return new nsWaveDecoder(); }
-
-  virtual PRBool Init(nsHTMLMediaElement* aElement);
-
-  virtual nsMediaStream* GetCurrentStream();
-  virtual already_AddRefed<nsIPrincipal> GetCurrentPrincipal();
-
-  // Return the current playback position in the media in seconds.
-  virtual double GetCurrentTime();
-
-  // Return the total playback length of the media in seconds.
-  virtual double GetDuration();
-
-  // Set the audio playback volume; must be in range [0.0, 1.0].
-  virtual void SetVolume(double aVolume);
-
-  virtual nsresult Play();
-  virtual void Pause();
-
-  // Set the current time of the media to aTime.  This may cause mStream to
-  // create a new channel to fetch data from the appropriate position in the
-  // stream.
-  virtual nsresult Seek(double aTime);
+#include "nsBuiltinDecoder.h"
 
-  // Report whether the decoder is currently seeking.
-  virtual PRBool IsSeeking() const;
-
-  // Report whether the decoder has reached end of playback.
-  virtual PRBool IsEnded() const;
-
-  // Start downloading the media at the specified URI.  The media's metadata
-  // will be parsed and made available as the load progresses.
-  virtual nsresult Load(nsMediaStream* aStream,
-                        nsIStreamListener** aStreamListener,
-                        nsMediaDecoder* aCloneDonor);
-
-  // Called by mStream (and possibly the nsChannelToPipeListener used
-  // internally by mStream) when the stream has completed loading.
-  virtual void ResourceLoaded();
-
-  // Called by mStream (and possibly the nsChannelToPipeListener used
-  // internally by mStream) if the stream encounters a network error.
-  virtual void NetworkError();
-
-  // Element is notifying us that the requested playback rate has changed.
-  virtual nsresult PlaybackRateChanged();
-
-  virtual void NotifySuspendedStatusChanged();
-  virtual void NotifyBytesDownloaded();
-  virtual void NotifyDownloadEnded(nsresult aStatus);
-
-  virtual Statistics GetStatistics();
-
-  void PlaybackPositionChanged();
-
-  // Setter for the duration. This is ignored by the wave decoder since it can
-  // compute the duration directly from the wave data.
-  virtual void SetDuration(PRInt64 aDuration);
-
-  // Getter/setter for mSeekable.
-  virtual void SetSeekable(PRBool aSeekable);
-  virtual PRBool GetSeekable();
-
-  // Must be called by the owning object before disposing the decoder.
-  virtual void Shutdown();
-
-  // Suspend any media downloads that are in progress. Called by the
-  // media element when it is sent to the bfcache. Call on the main
-  // thread only.
-  virtual void Suspend();
-
-  // Resume any media downloads that have been suspended. Called by the
-  // media element when it is restored from the bfcache. Call on the
-  // main thread only.
-  virtual void Resume(PRBool aForceBuffering);
-
-  // Calls mElement->UpdateReadyStateForData, telling it which state we have
-  // entered.  Main thread only.
-  void NextFrameUnavailableBuffering();
-  void NextFrameAvailable();
-  void NextFrameUnavailable();
-
-  // Change the element's ready state as necessary. Main thread only.
-  void UpdateReadyStateForData();
-
-  // Tells mStream to put all loads in the background.
-  virtual void MoveLoadsToBackground();
+/**
+ * The decoder implementation is currently limited to Linear PCM encoded
+ * audio data with one or two channels of 8- or 16-bit samples at sample
+ * rates from 100 Hz to 96 kHz.  The number of channels is limited by what
+ * the audio backend (sydneyaudio via nsAudioStream) currently supports.  The
+ * supported sample rate is artificially limited to arbitrarily selected sane
+ * values.  Support for additional channels (and other new features) would
+ * require extending nsWaveDecoder to support parsing the newer
+ * WAVE_FORMAT_EXTENSIBLE chunk format.
+**/
 
-  // Called asynchronously to shut down the decoder
-  void Stop();
 
-  // Constructs the time ranges representing what segments of the media
-  // are buffered and playable.
-  virtual nsresult GetBuffered(nsTimeRanges* aBuffered);
-
-  virtual void NotifyDataArrived(const char* aBuffer, PRUint32 aLength, PRUint32 aOffset) {}
-
-private:
-  // Notifies the element that seeking has started.
-  void SeekingStarted();
-
-  // Notifies the element that seeking has completed.
-  void SeekingStopped();
-
-  // Notifies the element that metadata loading has completed.  Only fired
-  // if metadata is valid.
-  void MetadataLoaded();
-
-  // Notifies the element that playback has completed.
-  void PlaybackEnded();
-
-  // Notifies the element that decoding has failed.
-  void DecodeError();
-
-  // Ensures that state machine thread is running, starting a new one
-  // if necessary.
-  nsresult StartStateMachineThread();
-
-  // Volume that the audio backend will be initialized with.
-  double mInitialVolume;
-
-  // Thread that handles audio playback, including data download.
-  nsCOMPtr<nsIThread> mPlaybackThread;
-
-  // State machine that runs on mPlaybackThread.  Methods on this object are
-  // safe to call from any thread.
-  nsCOMPtr<nsWaveStateMachine> mPlaybackStateMachine;
-
-  // Threadsafe wrapper around channels that provides seeking based on the
-  // underlying channel type.
-  nsAutoPtr<nsMediaStream> mStream;
-
-  // The current playback position of the media resource in units of
-  // seconds. This is updated every time a block of audio is passed to the
-  // backend (unless an prior update is still pending).  It is read and
-  // written from the main thread only.
-  double mCurrentTime;
-
-  // Copy of the duration and ended state when the state machine was
-  // disposed.  Used to respond to duration and ended queries with sensible
-  // values after the state machine has been destroyed.
-  double mEndedDuration;
-  PRPackedBool mEnded;
-
-  // True if the media resource is seekable.
-  PRPackedBool mSeekable;
-
-  // True when the media resource has completely loaded. Accessed on
-  // the main thread only.
-  PRPackedBool mResourceLoaded;
-
-  // True if MetadataLoaded has been reported to the element.
-  PRPackedBool mMetadataLoadedReported;
-
-  // True if ResourceLoaded has been reported to the element.
-  PRPackedBool mResourceLoadedReported;
+class nsWaveDecoder : public nsBuiltinDecoder
+{
+public:
+   virtual nsMediaDecoder* Clone() { return new nsWaveDecoder(); }
+   virtual nsDecoderStateMachine* CreateStateMachine();
 };
 
 #endif
new file mode 100644
--- /dev/null
+++ b/content/media/wave/nsWaveReader.cpp
@@ -0,0 +1,551 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla code.
+ *
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *  Matthew Gregan <kinetik@flim.org>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+#include "nsError.h"
+#include "nsBuiltinDecoderStateMachine.h"
+#include "nsBuiltinDecoder.h"
+#include "nsMediaStream.h"
+#include "nsWaveReader.h"
+#include "nsTimeRanges.h"
+#include "VideoUtils.h"
+
+using namespace mozilla;
+
+// Un-comment to enable logging of seek bisections.
+//#define SEEK_LOGGING
+
+#ifdef PR_LOGGING
+extern PRLogModuleInfo* gBuiltinDecoderLog;
+#define LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
+#ifdef SEEK_LOGGING
+#define SEEK_LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
+#else
+#define SEEK_LOG(type, msg)
+#endif
+#else
+#define LOG(type, msg)
+#define SEEK_LOG(type, msg)
+#endif
+
+// Magic values that identify RIFF chunks we're interested in.
+#define RIFF_CHUNK_MAGIC 0x52494646
+#define WAVE_CHUNK_MAGIC 0x57415645
+#define FRMT_CHUNK_MAGIC 0x666d7420
+#define DATA_CHUNK_MAGIC 0x64617461
+
+// Size of RIFF chunk header.  4 byte chunk header type and 4 byte size field.
+#define RIFF_CHUNK_HEADER_SIZE 8
+
+// Size of RIFF header.  RIFF chunk and 4 byte RIFF type.
+#define RIFF_INITIAL_SIZE (RIFF_CHUNK_HEADER_SIZE + 4)
+
+// Size of required part of format chunk.  Actual format chunks may be
+// extended (for non-PCM encodings), but we skip any extended data.
+#define WAVE_FORMAT_CHUNK_SIZE 16
+
+// PCM encoding type from format chunk.  Linear PCM is the only encoding
+// supported by nsAudioStream.
+#define WAVE_FORMAT_ENCODING_PCM 1
+
+// Maximum number of channels supported
+#define MAX_CHANNELS 2
+
+namespace {
+  PRUint32
+  ReadUint32BE(const char** aBuffer)
+  {
+    PRUint32 result =
+      PRUint8((*aBuffer)[0]) << 24 |
+      PRUint8((*aBuffer)[1]) << 16 |
+      PRUint8((*aBuffer)[2]) << 8 |
+      PRUint8((*aBuffer)[3]);
+    *aBuffer += sizeof(PRUint32);
+    return result;
+  }
+
+  PRUint32
+  ReadUint32LE(const char** aBuffer)
+  {
+    PRUint32 result =
+      PRUint8((*aBuffer)[3]) << 24 |
+      PRUint8((*aBuffer)[2]) << 16 |
+      PRUint8((*aBuffer)[1]) << 8 |
+      PRUint8((*aBuffer)[0]);
+    *aBuffer += sizeof(PRUint32);
+    return result;
+  }
+
+  PRUint16
+  ReadUint16LE(const char** aBuffer)
+  {
+    PRUint16 result =
+      PRUint8((*aBuffer)[1]) << 8 |
+      PRUint8((*aBuffer)[0]) << 0;
+    *aBuffer += sizeof(PRUint16);
+    return result;
+  }
+
+  PRInt16
+  ReadInt16LE(const char** aBuffer)
+  {
+    return static_cast<PRInt16>(ReadUint16LE(aBuffer));
+  }
+
+  PRUint8
+  ReadUint8(const char** aBuffer)
+  {
+    PRUint8 result = PRUint8((*aBuffer)[0]);
+    *aBuffer += sizeof(PRUint8);
+    return result;
+  }
+}
+
+nsWaveReader::nsWaveReader(nsBuiltinDecoder* aDecoder)
+  : nsBuiltinDecoderReader(aDecoder)
+{
+  MOZ_COUNT_CTOR(nsWaveReader);
+}
+
+nsWaveReader::~nsWaveReader()
+{
+  MOZ_COUNT_DTOR(nsWaveReader);
+}
+
+nsresult nsWaveReader::Init(nsBuiltinDecoderReader* aCloneDonor)
+{
+  return NS_OK;
+}
+
+nsresult nsWaveReader::ReadMetadata(nsVideoInfo* aInfo)
+{
+  NS_ASSERTION(mDecoder->OnStateMachineThread(), "Should be on state machine thread.");
+  MonitorAutoEnter mon(mMonitor);
+
+  PRBool loaded = LoadRIFFChunk() && LoadFormatChunk() && FindDataOffset();
+  if (!loaded) {
+    return NS_ERROR_FAILURE;
+  }
+
+  mInfo.mHasAudio = PR_TRUE;
+  mInfo.mHasVideo = PR_FALSE;
+  mInfo.mAudioRate = mSampleRate;
+  mInfo.mAudioChannels = mChannels;
+  mInfo.mDataOffset = -1;
+
+  *aInfo = mInfo;
+
+  MonitorAutoExit exitReaderMon(mMonitor);
+  MonitorAutoEnter decoderMon(mDecoder->GetMonitor());
+
+  float d = floorf(BytesToTime(GetDataLength() * 1000));
+  NS_ASSERTION(d <= PR_INT64_MAX, "Duration overflow");
+  mDecoder->GetStateMachine()->SetDuration(static_cast<PRInt64>(d));
+
+  return NS_OK;
+}
+
+PRBool nsWaveReader::DecodeAudioData()
+{
+  MonitorAutoEnter mon(mMonitor);
+  NS_ASSERTION(mDecoder->OnStateMachineThread() || mDecoder->OnDecodeThread(),
+               "Should be on state machine thread or decode thread.");
+
+  PRInt64 pos = GetPosition();
+  PRInt64 len = GetDataLength();
+  PRInt64 remaining = len - pos;
+  NS_ASSERTION(remaining >= 0, "Current wave position is greater than wave file length");
+
+  static const PRInt64 BLOCK_SIZE = 4096;
+  PRInt64 readSize = NS_MIN(BLOCK_SIZE, remaining);
+  PRInt64 samples = readSize / mSampleSize;
+
+  PR_STATIC_ASSERT(PRUint64(BLOCK_SIZE) < UINT_MAX / sizeof(SoundDataValue) / MAX_CHANNELS);
+  const size_t bufferSize = static_cast<size_t>(samples * mChannels);
+  nsAutoArrayPtr<SoundDataValue> sampleBuffer(new SoundDataValue[bufferSize]);
+
+  PR_STATIC_ASSERT(PRUint64(BLOCK_SIZE) < UINT_MAX / sizeof(char));
+  nsAutoArrayPtr<char> dataBuffer(new char[static_cast<size_t>(readSize)]);
+
+  if (!ReadAll(dataBuffer, readSize)) {
+    mAudioQueue.Finish();
+    return PR_FALSE;
+  }
+
+  // convert data to samples
+  const char* d = dataBuffer.get();
+  SoundDataValue* s = sampleBuffer.get();
+  for (int i = 0; i < samples; ++i) {
+    for (unsigned int j = 0; j < mChannels; ++j) {
+      if (mSampleFormat == nsAudioStream::FORMAT_U8) {
+        PRUint8 v =  ReadUint8(&d);
+#if defined(MOZ_SAMPLE_TYPE_S16LE)
+        *s++ = (v * (1.F/PR_UINT8_MAX)) * PR_UINT16_MAX + PR_INT16_MIN;
+#elif defined(MOZ_SAMPLE_TYPE_FLOAT32)
+        *s++ = (v * (1.F/PR_UINT8_MAX)) * 2.F - 1.F;
+#endif
+      }
+      else if (mSampleFormat == nsAudioStream::FORMAT_S16_LE) {
+        PRInt16 v =  ReadInt16LE(&d);
+#if defined(MOZ_SAMPLE_TYPE_S16LE)
+        *s++ = v;
+#elif defined(MOZ_SAMPLE_TYPE_FLOAT32)
+        *s++ = (PRInt32(v) - PR_INT16_MIN) / float(PR_UINT16_MAX) * 2.F - 1.F;
+#endif
+      }
+    }
+  }
+
+  float posTime = BytesToTime(pos);
+  float readSizeTime = BytesToTime(readSize);
+  NS_ASSERTION(posTime <= PR_INT64_MAX / 1000, "posTime overflow");
+  NS_ASSERTION(readSizeTime <= PR_INT64_MAX / 1000, "readSizeTime overflow");
+  NS_ASSERTION(samples < PR_INT32_MAX, "samples overflow");
+
+  mAudioQueue.Push(new SoundData(pos, static_cast<PRInt64>(posTime * 1000),
+                                 static_cast<PRInt64>(readSizeTime * 1000),
+                                 static_cast<PRInt32>(samples),
+                                 sampleBuffer.forget(), mChannels));
+
+  return PR_TRUE;
+}
+
+PRBool nsWaveReader::DecodeVideoFrame(PRBool &aKeyframeSkip,
+                                      PRInt64 aTimeThreshold)
+{
+  MonitorAutoEnter mon(mMonitor);
+  NS_ASSERTION(mDecoder->OnStateMachineThread() || mDecoder->OnDecodeThread(),
+               "Should be on state machine or decode thread.");
+
+  return PR_FALSE;
+}
+
+nsresult nsWaveReader::Seek(PRInt64 aTarget, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime)
+{
+  MonitorAutoEnter mon(mMonitor);
+  NS_ASSERTION(mDecoder->OnStateMachineThread(),
+               "Should be on state machine thread.");
+  LOG(PR_LOG_DEBUG, ("%p About to seek to %lldms", mDecoder, aTarget));
+  if (NS_FAILED(ResetDecode())) {
+    return NS_ERROR_FAILURE;
+  }
+  float d = BytesToTime(GetDataLength());
+  NS_ASSERTION(d < PR_INT64_MAX / 1000, "Duration overflow"); 
+  PRInt64 duration = static_cast<PRInt64>(d) * 1000;
+  PRInt64 seekTime = NS_MIN(aTarget, duration);
+  PRInt64 position = RoundDownToSample(static_cast<PRInt64>(TimeToBytes(seekTime) / 1000.f));
+  NS_ASSERTION(PR_INT64_MAX - mWavePCMOffset > position, "Integer overflow during wave seek");
+  position += mWavePCMOffset;
+  return mDecoder->GetCurrentStream()->Seek(nsISeekableStream::NS_SEEK_SET, position);
+}
+
+nsresult nsWaveReader::GetBuffered(nsTimeRanges* aBuffered, PRInt64 aStartTime)
+{
+  PRInt64 startOffset = mDecoder->GetCurrentStream()->GetNextCachedData(mWavePCMOffset);
+  while (startOffset >= 0) {
+    PRInt64 endOffset = mDecoder->GetCurrentStream()->GetCachedDataEnd(startOffset);
+    // Bytes [startOffset..endOffset] are cached.
+    NS_ASSERTION(startOffset >= mWavePCMOffset, "Integer underflow in GetBuffered");
+    NS_ASSERTION(endOffset >= mWavePCMOffset, "Integer underflow in GetBuffered");
+
+    aBuffered->Add(floorf(BytesToTime(startOffset - mWavePCMOffset) * 1000.f) / 1000.0,
+                   floorf(BytesToTime(endOffset - mWavePCMOffset) * 1000.f) / 1000.0);
+    startOffset = mDecoder->GetCurrentStream()->GetNextCachedData(endOffset);
+  }
+  return NS_OK;
+}
+
+PRBool
+nsWaveReader::ReadAll(char* aBuf, PRInt64 aSize, PRInt64* aBytesRead)
+{
+  PRUint32 got = 0;
+  if (aBytesRead) {
+    *aBytesRead = 0;
+  }
+  do {
+    PRUint32 read = 0;
+    if (NS_FAILED(mDecoder->GetCurrentStream()->Read(aBuf + got, PRUint32(aSize - got), &read))) {
+      NS_WARNING("Stream read failed");
+      return PR_FALSE;
+    }
+    if (read == 0) {
+      return PR_FALSE;
+    }
+    mDecoder->NotifyBytesConsumed(read);
+    got += read;
+    if (aBytesRead) {
+      *aBytesRead = got;
+    }
+  } while (got != aSize);
+  return PR_TRUE;
+}
+
+PRBool
+nsWaveReader::LoadRIFFChunk()
+{
+  char riffHeader[RIFF_INITIAL_SIZE];
+  const char* p = riffHeader;
+
+  NS_ABORT_IF_FALSE(mDecoder->GetCurrentStream()->Tell() == 0,
+                    "LoadRIFFChunk called when stream in invalid state");
+
+  if (!ReadAll(riffHeader, sizeof(riffHeader))) {
+    return PR_FALSE;
+  }
+
+  PR_STATIC_ASSERT(sizeof(PRUint32) * 2 <= RIFF_INITIAL_SIZE);
+  if (ReadUint32BE(&p) != RIFF_CHUNK_MAGIC) {
+    NS_WARNING("Stream data not in RIFF format");
+    return PR_FALSE;
+  }
+
+  // Skip over RIFF size field.
+  p += 4;
+
+  if (ReadUint32BE(&p) != WAVE_CHUNK_MAGIC) {
+    NS_WARNING("Expected WAVE chunk");
+    return PR_FALSE;
+  }
+
+  return PR_TRUE;
+}
+
+PRBool
+nsWaveReader::ScanForwardUntil(PRUint32 aWantedChunk, PRUint32* aChunkSize)
+{
+  NS_ABORT_IF_FALSE(aChunkSize, "Require aChunkSize argument");
+  *aChunkSize = 0;
+
+  for (;;) {
+    static const unsigned int CHUNK_HEADER_SIZE = 8;
+    char chunkHeader[CHUNK_HEADER_SIZE];
+    const char* p = chunkHeader;
+
+    if (!ReadAll(chunkHeader, sizeof(chunkHeader))) {
+      return PR_FALSE;
+    }
+
+    PR_STATIC_ASSERT(sizeof(PRUint32) * 2 <= CHUNK_HEADER_SIZE);
+    PRUint32 magic = ReadUint32BE(&p);
+    PRUint32 chunkSize = ReadUint32LE(&p);
+
+    if (magic == aWantedChunk) {
+      *aChunkSize = chunkSize;
+      return PR_TRUE;
+    }
+
+    // RIFF chunks are two-byte aligned, so round up if necessary.
+    chunkSize += chunkSize % 2;
+
+    static const unsigned int MAX_CHUNK_SIZE = 1 << 16;
+    PR_STATIC_ASSERT(MAX_CHUNK_SIZE < UINT_MAX / sizeof(char));
+    nsAutoArrayPtr<char> chunk(new char[MAX_CHUNK_SIZE]);
+    while (chunkSize > 0) {
+      PRUint32 size = PR_MIN(chunkSize, MAX_CHUNK_SIZE);
+      if (!ReadAll(chunk.get(), size)) {
+        return PR_FALSE;
+      }
+      chunkSize -= size;
+    }
+  }
+}
+
+PRBool
+nsWaveReader::LoadFormatChunk()
+{
+  PRUint32 fmtSize, rate, channels, sampleSize, sampleFormat;
+  char waveFormat[WAVE_FORMAT_CHUNK_SIZE];
+  const char* p = waveFormat;
+
+  // RIFF chunks are always word (two byte) aligned.
+  NS_ABORT_IF_FALSE(mDecoder->GetCurrentStream()->Tell() % 2 == 0,
+                    "LoadFormatChunk called with unaligned stream");
+
+  // The "format" chunk may not directly follow the "riff" chunk, so skip
+  // over any intermediate chunks.
+  if (!ScanForwardUntil(FRMT_CHUNK_MAGIC, &fmtSize)) {
+    return PR_FALSE;
+  }
+
+  if (!ReadAll(waveFormat, sizeof(waveFormat))) {
+    return PR_FALSE;
+  }
+
+  PR_STATIC_ASSERT(sizeof(PRUint16) +
+                   sizeof(PRUint16) +
+                   sizeof(PRUint32) +
+                   4 +
+                   sizeof(PRUint16) +
+                   sizeof(PRUint16) <= sizeof(waveFormat));
+  if (ReadUint16LE(&p) != WAVE_FORMAT_ENCODING_PCM) {
+    NS_WARNING("WAVE is not uncompressed PCM, compressed encodings are not supported");
+    return PR_FALSE;
+  }
+
+  channels = ReadUint16LE(&p);
+  rate = ReadUint32LE(&p);
+
+  // Skip over average bytes per second field.
+  p += 4;
+
+  sampleSize = ReadUint16LE(&p);
+
+  sampleFormat = ReadUint16LE(&p);
+
+  // PCM encoded WAVEs are not expected to have an extended "format" chunk,
+  // but I have found WAVEs that have a extended "format" chunk with an
+  // extension size of 0 bytes.  Be polite and handle this rather than
+  // considering the file invalid.  This code skips any extension of the
+  // "format" chunk.
+  if (fmtSize > WAVE_FORMAT_CHUNK_SIZE) {
+    char extLength[2];
+    const char* p = extLength;
+
+    if (!ReadAll(extLength, sizeof(extLength))) {
+      return PR_FALSE;
+    }
+
+    PR_STATIC_ASSERT(sizeof(PRUint16) <= sizeof(extLength));
+    PRUint16 extra = ReadUint16LE(&p);
+    if (fmtSize - (WAVE_FORMAT_CHUNK_SIZE + 2) != extra) {
+      NS_WARNING("Invalid extended format chunk size");
+      return PR_FALSE;
+    }
+    extra += extra % 2;
+
+    if (extra > 0) {
+      PR_STATIC_ASSERT(PR_UINT16_MAX + (PR_UINT16_MAX % 2) < UINT_MAX / sizeof(char));
+      nsAutoArrayPtr<char> chunkExtension(new char[extra]);
+      if (!ReadAll(chunkExtension.get(), extra)) {
+        return PR_FALSE;
+      }
+    }
+  }
+
+  // RIFF chunks are always word (two byte) aligned.
+  NS_ABORT_IF_FALSE(mDecoder->GetCurrentStream()->Tell() % 2 == 0,
+                    "LoadFormatChunk left stream unaligned");
+
+  // Make sure metadata is fairly sane.  The rate check is fairly arbitrary,
+  // but the channels check is intentionally limited to mono or stereo
+  // because that's what the audio backend currently supports.
+  if (rate < 100 || rate > 96000 ||
+      channels < 1 || channels > MAX_CHANNELS ||
+      (sampleSize != 1 && sampleSize != 2 && sampleSize != 4) ||
+      (sampleFormat != 8 && sampleFormat != 16)) {
+    NS_WARNING("Invalid WAVE metadata");
+    return PR_FALSE;
+  }
+
+  MonitorAutoEnter monitor(mDecoder->GetMonitor());
+  mSampleRate = rate;
+  mChannels = channels;
+  mSampleSize = sampleSize;
+  if (sampleFormat == 8) {
+    mSampleFormat = nsAudioStream::FORMAT_U8;
+  } else {
+    mSampleFormat = nsAudioStream::FORMAT_S16_LE;
+  }
+  return PR_TRUE;
+}
+
+PRBool
+nsWaveReader::FindDataOffset()
+{
+  // RIFF chunks are always word (two byte) aligned.
+  NS_ABORT_IF_FALSE(mDecoder->GetCurrentStream()->Tell() % 2 == 0,
+                    "FindDataOffset called with unaligned stream");
+
+  // The "data" chunk may not directly follow the "format" chunk, so skip
+  // over any intermediate chunks.
+  PRUint32 length;
+  if (!ScanForwardUntil(DATA_CHUNK_MAGIC, &length)) {
+    return PR_FALSE;
+  }
+
+  PRInt64 offset = mDecoder->GetCurrentStream()->Tell();
+  if (offset <= 0 || offset > PR_UINT32_MAX) {
+    NS_WARNING("PCM data offset out of range");
+    return PR_FALSE;
+  }
+
+  MonitorAutoEnter monitor(mDecoder->GetMonitor());
+  mWaveLength = length;
+  mWavePCMOffset = PRUint32(offset);
+  return PR_TRUE;
+}
+
+float
+nsWaveReader::BytesToTime(PRInt64 aBytes) const
+{
+  NS_ABORT_IF_FALSE(aBytes >= 0, "Must be >= 0");
+  return float(aBytes) / mSampleRate / mSampleSize;
+}
+
+PRInt64
+nsWaveReader::TimeToBytes(float aTime) const
+{
+  NS_ABORT_IF_FALSE(aTime >= 0.0f, "Must be >= 0");
+  return RoundDownToSample(PRInt64(aTime * mSampleRate * mSampleSize));
+}
+
+PRInt64
+nsWaveReader::RoundDownToSample(PRInt64 aBytes) const
+{
+  NS_ABORT_IF_FALSE(aBytes >= 0, "Must be >= 0");
+  return aBytes - (aBytes % mSampleSize);
+}
+
+PRInt64
+nsWaveReader::GetDataLength()
+{
+  PRInt64 length = mWaveLength;
+  // If the decoder has a valid content length, and it's shorter than the
+  // expected length of the PCM data, calculate the playback duration from
+  // the content length rather than the expected PCM data length.
+  PRInt64 streamLength = mDecoder->GetCurrentStream()->GetLength();
+  if (streamLength >= 0) {
+    PRInt64 dataLength = PR_MAX(0, streamLength - mWavePCMOffset);
+    length = PR_MIN(dataLength, length);
+  }
+  return length;
+}
+
+PRInt64
+nsWaveReader::GetPosition()
+{
+  return mDecoder->GetCurrentStream()->Tell();
+}
new file mode 100644
--- /dev/null
+++ b/content/media/wave/nsWaveReader.h
@@ -0,0 +1,120 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla code.
+ *
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *  Matthew Gregan <kinetik@flim.org>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+#if !defined(nsWaveReader_h_)
+#define nsWaveReader_h_
+
+#include "nsBuiltinDecoderReader.h"
+
+class nsMediaDecoder;
+
+class nsWaveReader : public nsBuiltinDecoderReader
+{
+public:
+  nsWaveReader(nsBuiltinDecoder* aDecoder);
+  ~nsWaveReader();
+
+  virtual nsresult Init(nsBuiltinDecoderReader* aCloneDonor);
+  virtual PRBool DecodeAudioData();
+  virtual PRBool DecodeVideoFrame(PRBool &aKeyframeSkip,
+                                  PRInt64 aTimeThreshold);
+
+  virtual PRBool HasAudio()
+  {
+    return PR_TRUE;
+  }
+
+  virtual PRBool HasVideo()
+  {
+    return PR_FALSE;
+  }
+
+  virtual nsresult ReadMetadata(nsVideoInfo* aInfo);
+  virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime);
+  virtual nsresult GetBuffered(nsTimeRanges* aBuffered, PRInt64 aStartTime);
+
+private:
+  PRBool ReadAll(char* aBuf, PRInt64 aSize, PRInt64* aBytesRead = nsnull);
+  PRBool LoadRIFFChunk();
+  PRBool ScanForwardUntil(PRUint32 aWantedChunk, PRUint32* aChunkSize);
+  PRBool LoadFormatChunk();
+  PRBool FindDataOffset();
+
+  // Returns the number of seconds that aBytes represents based on the
+  // current audio parameters.  e.g.  176400 bytes is 1 second at 16-bit
+  // stereo 44.1kHz. The time is rounded to the nearest millisecond.
+  float BytesToTime(PRInt64 aBytes) const;
+
+  // Returns the number of bytes that aTime represents based on the current
+  // audio parameters.  e.g.  1 second is 176400 bytes at 16-bit stereo
+  // 44.1kHz.
+  PRInt64 TimeToBytes(float aTime) const;
+
+  // Rounds aBytes down to the nearest complete sample.  Assumes beginning
+  // of byte range is already sample aligned by caller.
+  PRInt64 RoundDownToSample(PRInt64 aBytes) const;
+  PRInt64 GetDataLength();
+  PRInt64 GetPosition();
+
+  /*
+    Metadata extracted from the WAVE header.  Used to initialize the audio
+    stream, and for byte<->time domain conversions.
+  */
+
+  // Number of samples per second.  Limited to range [100, 96000] in LoadFormatChunk.
+  PRUint32 mSampleRate;
+
+  // Number of channels.  Limited to range [1, 2] in LoadFormatChunk.
+  PRUint32 mChannels;
+
+  // Size of a single sample segment, which includes a sample for each
+  // channel (interleaved).
+  PRUint32 mSampleSize;
+
+  // The sample format of the PCM data.
+  nsAudioStream::SampleFormat mSampleFormat;
+
+  // Size of PCM data stored in the WAVE as reported by the data chunk in
+  // the media.
+  PRInt64 mWaveLength;
+
+  // Start offset of the PCM data in the media stream.  Extends mWaveLength
+  // bytes.
+  PRInt64 mWavePCMOffset;
+};
+
+#endif
--- a/content/svg/content/src/DOMSVGAnimatedLengthList.cpp
+++ b/content/svg/content/src/DOMSVGAnimatedLengthList.cpp
@@ -117,17 +117,23 @@ DOMSVGAnimatedLengthList::InternalBaseVa
 {
   // When the number of items in our internal counterpart's baseVal changes,
   // we MUST keep our baseVal in sync. If we don't, script will either see a
   // list that is too short and be unable to access indexes that should be
   // valid, or else, MUCH WORSE, script will see a list that is too long and be
   // able to access "items" at indexes that are out of bounds (read/write to
   // bad memory)!!
 
+  nsRefPtr<DOMSVGAnimatedLengthList> kungFuDeathGrip;
   if (mBaseVal) {
+    if (!aNewValue.Length()) {
+      // InternalListLengthWillChange might clear last reference to |this|.
+      // Retain a temporary reference to keep from dying before returning.
+      kungFuDeathGrip = this;
+    }
     mBaseVal->InternalListLengthWillChange(aNewValue.Length());
   }
 
   // If our attribute is not animating, then our animVal mirrors our baseVal
   // and we must sync its length too. (If our attribute is animating, then the
   // SMIL engine takes care of calling InternalAnimValListWillChangeTo() if
   // necessary.)
 
--- a/content/svg/content/src/DOMSVGAnimatedNumberList.cpp
+++ b/content/svg/content/src/DOMSVGAnimatedNumberList.cpp
@@ -116,17 +116,23 @@ DOMSVGAnimatedNumberList::InternalBaseVa
 {
   // When the number of items in our internal counterpart's baseVal changes,
   // we MUST keep our baseVal in sync. If we don't, script will either see a
   // list that is too short and be unable to access indexes that should be
   // valid, or else, MUCH WORSE, script will see a list that is too long and be
   // able to access "items" at indexes that are out of bounds (read/write to
   // bad memory)!!
 
+  nsRefPtr<DOMSVGAnimatedNumberList> kungFuDeathGrip;
   if (mBaseVal) {
+    if (!aNewValue.Length()) {
+      // InternalListLengthWillChange might clear last reference to |this|.
+      // Retain a temporary reference to keep from dying before returning.
+      kungFuDeathGrip = this;
+    }
     mBaseVal->InternalListLengthWillChange(aNewValue.Length());
   }
 
   // If our attribute is not animating, then our animVal mirrors our baseVal
   // and we must sync its length too. (If our attribute is animating, then the
   // SMIL engine takes care of calling InternalAnimValListWillChangeTo() if
   // necessary.)
 
--- a/content/svg/content/src/DOMSVGLengthList.cpp
+++ b/content/svg/content/src/DOMSVGLengthList.cpp
@@ -98,16 +98,23 @@ DOMSVGLengthList::InternalListLengthWill
   PRUint32 oldLength = mItems.Length();
 
   if (aNewLength > DOMSVGLength::MaxListIndex()) {
     // It's safe to get out of sync with our internal list as long as we have
     // FEWER items than it does.
     aNewLength = DOMSVGLength::MaxListIndex();
   }
 
+  nsRefPtr<DOMSVGLengthList> kungFuDeathGrip;
+  if (oldLength && !aNewLength) {
+    // RemovingFromList() might clear last reference to |this|.
+    // Retain a temporary reference to keep from dying before returning.
+    kungFuDeathGrip = this;
+  }
+
   // If our length will decrease, notify the items that will be removed:
   for (PRUint32 i = aNewLength; i < oldLength; ++i) {
     if (mItems[i]) {
       mItems[i]->RemovingFromList();
     }
   }
 
   if (!mItems.SetLength(aNewLength)) {
@@ -388,17 +395,19 @@ DOMSVGLengthList::MaybeInsertNullInAnimV
   UpdateListIndicesFromIndex(animVal->mItems, aIndex + 1);
 }
 
 void
 DOMSVGLengthList::MaybeRemoveItemFromAnimValListAt(PRUint32 aIndex)
 {
   NS_ABORT_IF_FALSE(!IsAnimValList(), "call from baseVal to animVal");
 
-  DOMSVGLengthList* animVal = mAList->mAnimVal;
+  // This needs to be a strong reference; otherwise, the RemovingFromList call
+  // below might drop the last reference to animVal before we're done with it.
+  nsRefPtr<DOMSVGLengthList> animVal = mAList->mAnimVal;
 
   if (!animVal || mAList->IsAnimating()) {
     // No animVal list wrapper, or animVal not a clone of baseVal
     return;
   }
 
   NS_ABORT_IF_FALSE(animVal->mItems.Length() == mItems.Length(),
                     "animVal list not in sync!");
--- a/content/svg/content/src/DOMSVGNumberList.cpp
+++ b/content/svg/content/src/DOMSVGNumberList.cpp
@@ -98,16 +98,23 @@ DOMSVGNumberList::InternalListLengthWill
   PRUint32 oldLength = mItems.Length();
 
   if (aNewLength > DOMSVGNumber::MaxListIndex()) {
     // It's safe to get out of sync with our internal list as long as we have
     // FEWER items than it does.
     aNewLength = DOMSVGNumber::MaxListIndex();
   }
 
+  nsRefPtr<DOMSVGNumberList> kungFuDeathGrip;
+  if (oldLength && !aNewLength) {
+    // RemovingFromList() might clear last reference to |this|.
+    // Retain a temporary reference to keep from dying before returning.
+    kungFuDeathGrip = this;
+  }
+
   // If our length will decrease, notify the items that will be removed:
   for (PRUint32 i = aNewLength; i < oldLength; ++i) {
     if (mItems[i]) {
       mItems[i]->RemovingFromList();
     }
   }
 
   if (!mItems.SetLength(aNewLength)) {
@@ -388,17 +395,19 @@ DOMSVGNumberList::MaybeInsertNullInAnimV
   UpdateListIndicesFromIndex(animVal->mItems, aIndex + 1);
 }
 
 void
 DOMSVGNumberList::MaybeRemoveItemFromAnimValListAt(PRUint32 aIndex)
 {
   NS_ABORT_IF_FALSE(!IsAnimValList(), "call from baseVal to animVal");
 
-  DOMSVGNumberList* animVal = mAList->mAnimVal;
+  // This needs to be a strong reference; otherwise, the RemovingFromList call
+  // below might drop the last reference to animVal before we're done with it.
+  nsRefPtr<DOMSVGNumberList> animVal = mAList->mAnimVal;
 
   if (!animVal || mAList->IsAnimating()) {
     // No animVal list wrapper, or animVal not a clone of baseVal
     return;
   }
 
   NS_ABORT_IF_FALSE(animVal->mItems.Length() == mItems.Length(),
                     "animVal list not in sync!");
--- a/content/svg/content/src/DOMSVGPathSegList.cpp
+++ b/content/svg/content/src/DOMSVGPathSegList.cpp
@@ -135,16 +135,23 @@ DOMSVGPathSegList::InternalListWillChang
   PRUint32 length = mItems.Length();
   PRUint32 index = 0;
 
   PRUint32 dataLength = aNewValue.mData.Length();
   PRUint32 dataIndex = 0; // index into aNewValue's raw data array
 
   PRUint32 newSegType;
 
+  nsRefPtr<DOMSVGPathSegList> kungFuDeathGrip;
+  if (length && aNewValue.IsEmpty()) {
+    // RemovingFromList() might clear last reference to |this|.
+    // Retain a temporary reference to keep from dying before returning.
+    kungFuDeathGrip = this;
+  }
+
   while (index < length && dataIndex < dataLength) {
     newSegType = SVGPathSegUtils::DecodeType(aNewValue.mData[dataIndex]);
     if (ItemAt(index) && ItemAt(index)->Type() != newSegType) {
       ItemAt(index)->RemovingFromList();
       ItemAt(index) = nsnull;
     }
     // Only after the RemovingFromList() can we touch mInternalDataIndex!
     mItems[index].mInternalDataIndex = dataIndex;
@@ -535,17 +542,19 @@ DOMSVGPathSegList::
 {
   NS_ABORT_IF_FALSE(!IsAnimValList(), "call from baseVal to animVal");
 
   if (AttrIsAnimating()) {
     // animVal not a clone of baseVal
     return;
   }
 
-  DOMSVGPathSegList *animVal =
+  // This needs to be a strong reference; otherwise, the RemovingFromList call
+  // below might drop the last reference to animVal before we're done with it.
+  nsRefPtr<DOMSVGPathSegList> animVal =
     GetDOMWrapperIfExists(InternalAList().GetAnimValKey());
   if (!animVal) {
     // No animVal list wrapper
     return;
   }
 
   NS_ABORT_IF_FALSE(animVal->mItems.Length() == mItems.Length(),
                     "animVal list not in sync!");
--- a/content/svg/content/src/DOMSVGPointList.cpp
+++ b/content/svg/content/src/DOMSVGPointList.cpp
@@ -127,16 +127,23 @@ DOMSVGPointList::InternalListWillChangeT
 
   PRUint32 newLength = aNewValue.Length();
   if (newLength > DOMSVGPoint::MaxListIndex()) {
     // It's safe to get out of sync with our internal list as long as we have
     // FEWER items than it does.
     newLength = DOMSVGPoint::MaxListIndex();
   }
 
+  nsRefPtr<DOMSVGPointList> kungFuDeathGrip;
+  if (oldLength && !newLength) {
+    // RemovingFromList() might clear last reference to |this|.
+    // Retain a temporary reference to keep from dying before returning.
+    kungFuDeathGrip = this;
+  }
+
   // If our length will decrease, notify the items that will be removed:
   for (PRUint32 i = newLength; i < oldLength; ++i) {
     if (mItems[i]) {
       mItems[i]->RemovingFromList();
     }
   }
 
   if (!mItems.SetLength(newLength)) {
@@ -450,17 +457,19 @@ DOMSVGPointList::MaybeRemoveItemFromAnim
 {
   NS_ABORT_IF_FALSE(!IsAnimValList(), "call from baseVal to animVal");
 
   if (AttrIsAnimating()) {
     // animVal not a clone of baseVal
     return;
   }
 
-  DOMSVGPointList *animVal =
+  // This needs to be a strong reference; otherwise, the RemovingFromList call
+  // below might drop the last reference to animVal before we're done with it.
+  nsRefPtr<DOMSVGPointList> animVal =
     GetDOMWrapperIfExists(InternalAList().GetAnimValKey());
   if (!animVal) {
     // No animVal list wrapper
     return;
   }
 
   NS_ABORT_IF_FALSE(animVal->mItems.Length() == mItems.Length(),
                     "animVal list not in sync!");
--- a/content/svg/content/src/nsSVGAnimationElement.cpp
+++ b/content/svg/content/src/nsSVGAnimationElement.cpp
@@ -265,26 +265,24 @@ nsSVGAnimationElement::BindToTree(nsIDoc
   NS_ABORT_IF_FALSE(!mHrefTarget.get(),
                     "Shouldn't have href-target yet "
                     "(or it should've been cleared)");
   nsresult rv = nsSVGAnimationElementBase::BindToTree(aDocument, aParent,
                                                       aBindingParent,
                                                       aCompileEventHandlers);
   NS_ENSURE_SUCCESS(rv,rv);
 
-  // XXXdholbert is ownerDOMSVG (as a check for SVG parent) still needed here?
-  nsCOMPtr<nsIDOMSVGSVGElement> ownerDOMSVG;
-  rv = GetOwnerSVGElement(getter_AddRefs(ownerDOMSVG));
-
-  if (NS_FAILED(rv) || !ownerDOMSVG)
+  // XXXdholbert is GetCtx (as a check for SVG parent) still needed here?
+  if (!GetCtx()) {
     // No use proceeding. We don't have an SVG parent (yet) so we won't be able
     // to register ourselves etc. Maybe next time we'll have more luck.
     // (This sort of situation will arise a lot when trees are being constructed
     // piece by piece via script)
     return NS_OK;
+  }
 
   // Add myself to the animation controller's master set of animation elements.
   if (aDocument) {
     nsSMILAnimationController *controller = aDocument->GetAnimationController();
     if (controller) {
       controller->RegisterAnimationElement(this);
     }
     const nsAttrValue* href = mAttrsAndChildren.GetAttr(nsGkAtoms::href,
@@ -415,28 +413,23 @@ nsSVGAnimationElement::IsNodeOfType(PRUi
 }
 
 //----------------------------------------------------------------------
 // Implementation helpers
 
 nsSMILTimeContainer*
 nsSVGAnimationElement::GetTimeContainer()
 {
-  nsSMILTimeContainer *result = nsnull;
-  nsCOMPtr<nsIDOMSVGSVGElement> ownerDOMSVG;
-
-  nsresult rv = GetOwnerSVGElement(getter_AddRefs(ownerDOMSVG));
+  nsSVGSVGElement *element = nsSVGUtils::GetOuterSVGElement(this);
 
-  if (NS_SUCCEEDED(rv) && ownerDOMSVG) {
-    nsSVGSVGElement *ownerSVG =
-      static_cast<nsSVGSVGElement*>(ownerDOMSVG.get());
-    result = ownerSVG->GetTimedDocumentRoot();
+  if (element) {
+    return element->GetTimedDocumentRoot();
   }
 
-  return result;
+  return nsnull;
 }
 
 // nsIDOMElementTimeControl
 /* void beginElement (); */
 NS_IMETHODIMP
 nsSVGAnimationElement::BeginElement(void)
 {
   return BeginElementAt(0.f);
--- a/content/svg/content/src/nsSVGElement.cpp
+++ b/content/svg/content/src/nsSVGElement.cpp
@@ -1019,44 +1019,23 @@ NS_IMETHODIMP nsSVGElement::SetId(const 
 {
   return SetAttr(kNameSpaceID_None, nsGkAtoms::id, aId, PR_TRUE);
 }
 
 /* readonly attribute nsIDOMSVGSVGElement ownerSVGElement; */
 NS_IMETHODIMP
 nsSVGElement::GetOwnerSVGElement(nsIDOMSVGSVGElement * *aOwnerSVGElement)
 {
-  *aOwnerSVGElement = nsnull;
-
-  nsIContent* ancestor = nsSVGUtils::GetParentElement(this);
+  NS_IF_ADDREF(*aOwnerSVGElement = GetCtx());
 
-  while (ancestor && ancestor->GetNameSpaceID() == kNameSpaceID_SVG) {
-    nsIAtom* tag = ancestor->Tag();
-    if (tag == nsGkAtoms::foreignObject) {
-      // SVG in a foreignObject must have its own <svg> (nsSVGOuterSVGFrame).
-      // Leave *aOwnerSVGElement nulled out, but don't throw.
-      return NS_OK;
-    }
-    if (tag == nsGkAtoms::svg) {
-      *aOwnerSVGElement = static_cast<nsSVGSVGElement*>(ancestor);
-      NS_ADDREF(*aOwnerSVGElement);
-      return NS_OK;
-    }
-    ancestor = nsSVGUtils::GetParentElement(ancestor);
-  }
-
-  // we don't have a parent SVG element...
-
-  // are _we_ the outermost SVG element? If yes, return nsnull, but don't fail
-  if (Tag() == nsGkAtoms::svg) {
+  if (*aOwnerSVGElement || Tag() == nsGkAtoms::svg) {
+    // If we found something or we're the outermost SVG element, that's OK.
     return NS_OK;
   }
-  
-  // no owner found and we aren't the outermost SVG element either.
-  // this situation can e.g. occur during content tree teardown. 
+  // Otherwise, we've got an invalid structure
   return NS_ERROR_FAILURE;
 }
 
 /* readonly attribute nsIDOMSVGElement viewportElement; */
 NS_IMETHODIMP
 nsSVGElement::GetViewportElement(nsIDOMSVGElement * *aViewportElement)
 {
   *aViewportElement = nsSVGUtils::GetNearestViewportElement(this).get();
@@ -1427,19 +1406,31 @@ nsIAtom* nsSVGElement::GetEventNameForAt
 #endif // MOZ_SMIL
 
   return aAttr;
 }
 
 nsSVGSVGElement *
 nsSVGElement::GetCtx()
 {
-  nsCOMPtr<nsIDOMSVGSVGElement> svg;
-  GetOwnerSVGElement(getter_AddRefs(svg));
-  return static_cast<nsSVGSVGElement*>(svg.get());
+  dom::Element* ancestor = nsSVGUtils::GetParentElement(this);
+
+  while (ancestor && ancestor->GetNameSpaceID() == kNameSpaceID_SVG) {
+    nsIAtom* tag = ancestor->Tag();
+    if (tag == nsGkAtoms::foreignObject) {
+      return nsnull;
+    }
+    if (tag == nsGkAtoms::svg) {
+      return static_cast<nsSVGSVGElement*>(ancestor);
+    }
+    ancestor = nsSVGUtils::GetParentElement(ancestor);
+  }
+
+  // we don't have an ancestor <svg> element...
+  return nsnull;
 }
 
 /* virtual */ gfxMatrix
 nsSVGElement::PrependLocalTransformTo(const gfxMatrix &aMatrix)
 {
   return aMatrix;
 }
 
--- a/content/svg/content/src/nsSVGGraphicElement.cpp
+++ b/content/svg/content/src/nsSVGGraphicElement.cpp
@@ -33,16 +33,17 @@
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsSVGGraphicElement.h"
+#include "nsSVGSVGElement.h"
 #include "nsSVGTransformList.h"
 #include "nsSVGAnimatedTransformList.h"
 #include "nsGkAtoms.h"
 #include "nsSVGMatrix.h"
 #include "nsIDOMEventTarget.h"
 #include "nsIFrame.h"
 #include "nsISVGChildFrame.h"
 #include "nsIDOMSVGPoint.h"
@@ -77,17 +78,17 @@ NS_IMETHODIMP nsSVGGraphicElement::GetNe
 {
   *aNearestViewportElement = nsSVGUtils::GetNearestViewportElement(this).get();
   return NS_OK;
 }
 
 /* readonly attribute nsIDOMSVGElement farthestViewportElement; */
 NS_IMETHODIMP nsSVGGraphicElement::GetFarthestViewportElement(nsIDOMSVGElement * *aFarthestViewportElement)
 {
-  *aFarthestViewportElement = nsSVGUtils::GetFarthestViewportElement(this).get();
+  NS_IF_ADDREF(*aFarthestViewportElement = nsSVGUtils::GetOuterSVGElement(this));
   return NS_OK;
 }
 
 /* nsIDOMSVGRect getBBox (); */
 NS_IMETHODIMP nsSVGGraphicElement::GetBBox(nsIDOMSVGRect **_retval)
 {
   *_retval = nsnull;
 
--- a/content/svg/content/src/nsSVGSVGElement.cpp
+++ b/content/svg/content/src/nsSVGSVGElement.cpp
@@ -634,17 +634,17 @@ nsSVGSVGElement::CreateSVGNumber(nsIDOMS
   NS_ADDREF(*_retval = new DOMSVGNumber());
   return NS_OK;
 }
 
 /* nsIDOMSVGLength createSVGLength (); */
 NS_IMETHODIMP
 nsSVGSVGElement::CreateSVGLength(nsIDOMSVGLength **_retval)
 {
-  NS_IF_ADDREF(*_retval = new DOMSVGLength());
+  NS_ADDREF(*_retval = new DOMSVGLength());
   return NS_OK;
 }
 
 /* nsIDOMSVGAngle createSVGAngle (); */
 NS_IMETHODIMP
 nsSVGSVGElement::CreateSVGAngle(nsIDOMSVGAngle **_retval)
 {
   return NS_NewDOMSVGAngle(_retval);
@@ -729,17 +729,17 @@ nsSVGSVGElement::GetNearestViewportEleme
   *aNearestViewportElement = nsSVGUtils::GetNearestViewportElement(this).get();
   return NS_OK;
 }
 
 /* readonly attribute nsIDOMSVGElement farthestViewportElement; */
 NS_IMETHODIMP
 nsSVGSVGElement::GetFarthestViewportElement(nsIDOMSVGElement * *aFarthestViewportElement)
 {
-  *aFarthestViewportElement = nsSVGUtils::GetFarthestViewportElement(this).get();
+  NS_IF_ADDREF(*aFarthestViewportElement = nsSVGUtils::GetOuterSVGElement(this));
   return NS_OK;
 }
 
 /* nsIDOMSVGRect getBBox (); */
 NS_IMETHODIMP
 nsSVGSVGElement::GetBBox(nsIDOMSVGRect **_retval)
 {
   *_retval = nsnull;
@@ -877,34 +877,29 @@ nsSVGSVGElement::SetCurrentTranslate(flo
 {
   return SetCurrentScaleTranslate(mCurrentScale, x, y);
 }
 
 #ifdef MOZ_SMIL
 nsSMILTimeContainer*
 nsSVGSVGElement::GetTimedDocumentRoot()
 {
-  nsSMILTimeContainer *result = nsnull;
-
   if (mTimedDocumentRoot) {
-    result = mTimedDocumentRoot;
-  } else {
-    // We must not be the outermost SVG element, try to find it
-    nsCOMPtr<nsIDOMSVGSVGElement> outerSVGDOM;
-
-    nsresult rv = GetOwnerSVGElement(getter_AddRefs(outerSVGDOM));
-
-    if (NS_SUCCEEDED(rv) && outerSVGDOM) {
-      nsSVGSVGElement *outerSVG =
-        static_cast<nsSVGSVGElement*>(outerSVGDOM.get());
-      result = outerSVG->GetTimedDocumentRoot();
-    }
+    return mTimedDocumentRoot;
   }
 
-  return result;
+  // We must not be the outermost <svg> element, try to find it
+  nsSVGSVGElement *outerSVGElement =
+    nsSVGUtils::GetOuterSVGElement(this);
+
+  if (outerSVGElement) {
+    return outerSVGElement->GetTimedDocumentRoot();
+  }
+  // invalid structure
+  return nsnull;
 }
 #endif // MOZ_SMIL
 
 //----------------------------------------------------------------------
 // nsIContent methods
 
 NS_IMETHODIMP_(PRBool)
 nsSVGSVGElement::IsAttributeMapped(const nsIAtom* name) const
@@ -991,17 +986,17 @@ nsSVGSVGElement::GetViewBoxTransform()
   // Do we have an override preserveAspectRatio value?
   const SVGPreserveAspectRatio* overridePARPtr =
     GetImageOverridePreserveAspectRatio();
 
   // May assign this to overridePARPtr if we have no viewBox but are faking one:
   SVGPreserveAspectRatio tmpPAR;
 
   float viewportWidth, viewportHeight;
-  if (nsSVGUtils::IsInnerSVG(this)) {
+  if (IsInner()) {
     nsSVGSVGElement *ctx = GetCtx();
     viewportWidth = mLengthAttributes[WIDTH].GetAnimValue(ctx);
     viewportHeight = mLengthAttributes[HEIGHT].GetAnimValue(ctx);
   } else {
     viewportWidth = mViewportWidth;
     viewportHeight = mViewportHeight;
   }
 
@@ -1164,17 +1159,17 @@ float
 nsSVGSVGElement::GetLength(PRUint8 aCtxType)
 {
   float h, w;
 
   if (mViewBox.IsValid()) {
     const nsSVGViewBoxRect& viewbox = mViewBox.GetAnimValue();
     w = viewbox.width;
     h = viewbox.height;
-  } else if (nsSVGUtils::IsInnerSVG(this)) {
+  } else if (IsInner()) {
     nsSVGSVGElement *ctx = GetCtx();
     w = mLengthAttributes[WIDTH].GetAnimValue(ctx);
     h = mLengthAttributes[HEIGHT].GetAnimValue(ctx);
   } else if (ShouldSynthesizeViewBox()) {
     w = ComputeSynthesizedViewBoxDimension(mLengthAttributes[WIDTH],
                                            mViewportWidth, this);
     h = ComputeSynthesizedViewBoxDimension(mLengthAttributes[HEIGHT],
                                            mViewportHeight, this);
@@ -1198,17 +1193,17 @@ nsSVGSVGElement::GetLength(PRUint8 aCtxT
 }
 
 //----------------------------------------------------------------------
 // nsSVGElement methods
 
 /* virtual */ gfxMatrix
 nsSVGSVGElement::PrependLocalTransformTo(const gfxMatrix &aMatrix)
 {
-  if (nsSVGUtils::IsInnerSVG(this)) {
+  if (IsInner()) {
     float x, y;
     GetAnimatedLengthValues(&x, &y, nsnull);
     return GetViewBoxTransform() * gfxMatrix().Translate(gfxPoint(x, y)) * aMatrix;
   }
 
   if (IsRoot()) {
     gfxMatrix zoomPanTM;
     zoomPanTM.Translate(gfxPoint(mCurrentTranslate.GetX(), mCurrentTranslate.GetY()));
--- a/content/svg/content/src/nsSVGSVGElement.h
+++ b/content/svg/content/src/nsSVGSVGElement.h
@@ -52,20 +52,16 @@
 #include "nsSVGViewBox.h"
 #include "SVGAnimatedPreserveAspectRatio.h"
 #include "mozilla/dom/FromParser.h"
 
 #ifdef MOZ_SMIL
 class nsSMILTimeContainer;
 #endif // MOZ_SMIL
 
-#define QI_AND_CAST_TO_NSSVGSVGELEMENT(base)                                  \
-  (nsCOMPtr<nsIDOMSVGSVGElement>(do_QueryInterface(base)) ?                   \
-   static_cast<nsSVGSVGElement*>(base.get()) : nsnull)
-
 typedef nsSVGStylableElement nsSVGSVGElementBase;
 
 class nsSVGSVGElement;
 
 class nsSVGTranslatePoint {
 public:
   nsSVGTranslatePoint(float aX, float aY) :
     mX(aX), mY(aY) {}
@@ -266,16 +262,26 @@ protected:
 
   PRBool IsRoot() {
     NS_ASSERTION((IsInDoc() && !GetParent()) ==
                  (GetOwnerDoc() && (GetOwnerDoc()->GetRootElement() == this)),
                  "Can't determine if we're root");
     return IsInDoc() && !GetParent();
   }
 
+  /**
+   * Returns true if this is an SVG <svg> element that is the child of
+   * another non-foreignObject SVG element.
+   */
+  PRBool IsInner() {
+    const mozilla::dom::Element *parent = nsSVGUtils::GetParentElement(this);
+    return parent && parent->GetNameSpaceID() == kNameSpaceID_SVG &&
+           parent->Tag() != nsGkAtoms::foreignObject;
+  }
+
 #ifdef MOZ_SMIL
   /* 
    * While binding to the tree we need to determine if we will be the outermost
    * <svg> element _before_ the children are bound (as they want to know what
    * timed document root to register with) and therefore _before_ our parent is
    * set (both actions are performed by nsGenericElement::BindToTree) so we
    * can't use GetOwnerSVGElement() as it relies on GetParent(). This code is
    * basically a simplified version of GetOwnerSVGElement that uses the parent
--- a/content/svg/content/src/nsSVGTransformList.cpp
+++ b/content/svg/content/src/nsSVGTransformList.cpp
@@ -298,21 +298,21 @@ NS_IMETHODIMP nsSVGTransformList::Insert
 
 /* nsIDOMSVGTransform replaceItem (in nsIDOMSVGTransform newItem, in unsigned long index); */
 NS_IMETHODIMP nsSVGTransformList::ReplaceItem(nsIDOMSVGTransform *newItem,
                                               PRUint32 index,
                                               nsIDOMSVGTransform **_retval)
 {
   NS_ENSURE_NATIVE_TRANSFORM(newItem, _retval);
 
-  nsSVGValueAutoNotifier autonotifier(this);
-
   if (index >= mTransforms.Length())
     return NS_ERROR_DOM_INDEX_SIZE_ERR;
 
+  nsSVGValueAutoNotifier autonotifier(this);
+
   nsIDOMSVGTransform* oldItem = ElementAt(index);
 
   mTransforms.ElementAt(index) = newItem;
 
   nsCOMPtr<nsISVGValue> val = do_QueryInterface(oldItem);
   val->RemoveObserver(this);
   NS_RELEASE(oldItem);
   val = do_QueryInterface(newItem);
@@ -322,23 +322,23 @@ NS_IMETHODIMP nsSVGTransformList::Replac
   *_retval = newItem;
   NS_ADDREF(*_retval);
   return NS_OK;
 }
 
 /* nsIDOMSVGTransform removeItem (in unsigned long index); */
 NS_IMETHODIMP nsSVGTransformList::RemoveItem(PRUint32 index, nsIDOMSVGTransform **_retval)
 {
-  nsSVGValueAutoNotifier autonotifier(this);
-
   if (index >= mTransforms.Length()) {
     *_retval = nsnull;
     return NS_ERROR_DOM_INDEX_SIZE_ERR;
   }
 
+  nsSVGValueAutoNotifier autonotifier(this);
+
   *_retval = ElementAt(index);
 
   mTransforms.RemoveElementAt(index);
 
   nsCOMPtr<nsISVGValue> val = do_QueryInterface(*_retval);
   val->RemoveObserver(this);
 
   // don't NS_ADDREF(*_retval)
--- a/content/xbl/src/nsBindingManager.cpp
+++ b/content/xbl/src/nsBindingManager.cpp
@@ -1282,17 +1282,17 @@ nsBindingManager::WalkRules(nsIStyleRule
 
   // Walk the binding scope chain, starting with the binding attached to our
   // content, up till we run out of scopes or we get cut off.
   nsIContent *content = aData->mElement;
   
   do {
     nsXBLBinding *binding = GetBinding(content);
     if (binding) {
-      aData->mScopedRoot = content;
+      aData->mTreeMatchContext.mScopedRoot = content;
       binding->WalkRules(aFunc, aData);
       // If we're not looking at our original content, allow the binding to cut
       // off style inheritance
       if (content != aData->mElement) {
         if (!binding->InheritsStyle()) {
           // Go no further; we're not inheriting style from anything above here
           break;
         }
@@ -1306,17 +1306,17 @@ nsBindingManager::WalkRules(nsIStyleRule
     content = content->GetBindingParent();
   } while (content);
 
   // If "content" is non-null that means we cut off inheritance at some point
   // in the loop.
   *aCutOffInheritance = (content != nsnull);
 
   // Null out the scoped root that we set repeatedly
-  aData->mScopedRoot = nsnull;
+  aData->mTreeMatchContext.mScopedRoot = nsnull;
 
   return NS_OK;
 }
 
 typedef nsTHashtable<nsVoidPtrHashKey> RuleProcessorSet;
 
 static PLDHashOperator
 EnumRuleProcessors(nsISupports *aKey, nsXBLBinding *aBinding, void* aClosure)
--- a/dom/base/nsGlobalWindow.cpp
+++ b/dom/base/nsGlobalWindow.cpp
@@ -25,16 +25,17 @@
  *   Brendan Eich <brendan@mozilla.org>
  *   David Hyatt (hyatt@netscape.com)
  *   Dan Rosen <dr@netscape.com>
  *   Vidur Apparao <vidur@netscape.com>
  *   Johnny Stenback <jst@netscape.com>
  *   Mark Hammond <mhammond@skippinet.com.au>
  *   Ryan Jones <sciguyryan@gmail.com>
  *   Jeff Walden <jwalden+code@mit.edu>
+ *   Ben Bucksch <ben.bucksch  beonex.com>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either of the GNU General Public License Version 2 or later (the "GPL"),
  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -62,16 +63,18 @@
 #include "nsXPIDLString.h"
 #include "nsJSUtils.h"
 #include "prmem.h"
 #include "jsapi.h"              // for JSAutoRequest
 #include "jsdbgapi.h"           // for JS_ClearWatchPointsForObject
 #include "nsReadableUtils.h"
 #include "nsDOMClassInfo.h"
 #include "nsJSEnvironment.h"
+#include "nsCharSeparatedTokenizer.h" // for Accept-Language parsing
+#include "nsUnicharUtils.h"
 
 // Other Classes
 #include "nsIEventListenerManager.h"
 #include "nsEscape.h"
 #include "nsStyleCoord.h"
 #include "nsMimeTypeArray.h"
 #include "nsNetUtil.h"
 #include "nsICachingChannel.h"
@@ -10559,29 +10562,66 @@ nsNavigator::GetAppVersion(nsAString& aA
 }
 
 NS_IMETHODIMP
 nsNavigator::GetAppName(nsAString& aAppName)
 {
   return NS_GetNavigatorAppName(aAppName);
 }
 
+/**
+ * JS property navigator.language, exposed to web content.
+ * Take first value from Accept-Languages (HTTP header), which is
+ * the "content language" freely set by the user in the Pref window.
+ *
+ * Do not use UI language (chosen app locale) here.
+ * See RFC 2616, Section 15.1.4 "Privacy Issues Connected to Accept Headers"
+ *
+ * "en", "en-US" and "i-cherokee" and "" are valid.
+ * Fallback in case of invalid pref should be "" (empty string), to
+ * let site do fallback, e.g. to site's local language.
+ */
 NS_IMETHODIMP
 nsNavigator::GetLanguage(nsAString& aLanguage)
 {
-  nsresult rv;
-  nsCOMPtr<nsIHttpProtocolHandler>
-    service(do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &rv));
-  if (NS_SUCCEEDED(rv)) {
-    nsCAutoString lang;
-    rv = service->GetLanguage(lang);
-    CopyASCIItoUTF16(lang, aLanguage);
-  }
-
-  return rv;
+  // e.g. "de-de, en-us,en"
+  const nsAdoptingString& acceptLang =
+      nsContentUtils::GetLocalizedStringPref("intl.accept_languages");
+  // take everything before the first "," or ";", without trailing space
+  nsCharSeparatedTokenizer langTokenizer(acceptLang, ',');
+  const nsSubstring &firstLangPart = langTokenizer.nextToken();
+  nsCharSeparatedTokenizer qTokenizer(firstLangPart, ';');
+  aLanguage.Assign(qTokenizer.nextToken());
+
+  // checks and fixups
+  // replace "_" with "-", to avoid POSIX/Windows "en_US" notation
+  if (aLanguage.Length() > 2 && aLanguage[2] == PRUnichar('_'))
+    aLanguage.Replace(2, 1, PRUnichar('-')); // TODO replace all
+  // use uppercase for country part, e.g. "en-US", not "en-us", see BCP47
+  // only uppercase 2-letter country codes, not "zh-Hant", "de-DE-x-goethe"
+  if (aLanguage.Length() > 2)
+  {
+    nsCharSeparatedTokenizer localeTokenizer(aLanguage, '-');
+    PRInt32 pos = 0;
+    bool first = true;
+    while (localeTokenizer.hasMoreTokens())
+    {
+      const nsSubstring &code = localeTokenizer.nextToken();
+      if (code.Length() == 2 && !first)
+      {
+        nsAutoString upper(code);
+        ::ToUpperCase(upper);
+        aLanguage.Replace(pos, code.Length(), upper);
+      }
+      pos += code.Length() + 1; // 1 is the separator
+      if (first)
+        first = false;
+    }
+  }
+  return NS_OK;
 }
 
 NS_IMETHODIMP
 nsNavigator::GetPlatform(nsAString& aPlatform)
 {
   return NS_GetNavigatorPlatform(aPlatform);
 }
 
--- a/dom/base/nsJSEnvironment.cpp
+++ b/dom/base/nsJSEnvironment.cpp
@@ -1537,35 +1537,29 @@ nsJSContext::CompileScript(const PRUnich
   aScriptObject.drop(); // ensure old object not used on failure...
 
   // SecurityManager said "ok", but don't compile if aVersion is unknown.
   // Since the caller is responsible for parsing the version strings, we just
   // check it isn't JSVERSION_UNKNOWN.
   if (ok && ((JSVersion)aVersion) != JSVERSION_UNKNOWN) {
     JSAutoRequest ar(mContext);
 
-    JSScript* script =
+    JSObject* scriptObj =
         ::JS_CompileUCScriptForPrincipalsVersion(mContext,
                                                  (JSObject *)aScopeObject,
                                                  jsprin,
                                                  (jschar*) aText,
                                                  aTextLength,
                                                  aURL,
                                                  aLineNo,
                                                  JSVersion(aVersion));
-    if (script) {
-      JSObject *scriptObject = ::JS_NewScriptObject(mContext, script);
-      if (scriptObject) {
-        NS_ASSERTION(aScriptObject.getScriptTypeID()==JAVASCRIPT,
-                     "Expecting JS script object holder");
-        rv = aScriptObject.set(scriptObject);
-      } else {
-        ::JS_DestroyScript(mContext, script);
-        script = nsnull;
-      }
+    if (scriptObj) {
+      NS_ASSERTION(aScriptObject.getScriptTypeID()==JAVASCRIPT,
+                   "Expecting JS script object holder");
+      rv = aScriptObject.set(scriptObj);
     } else {
       rv = NS_ERROR_OUT_OF_MEMORY;
     }
   }
 
   // Whew!  Finally done.
   JSPRINCIPALS_DROP(mContext, jsprin);
   return rv;
@@ -1617,20 +1611,17 @@ nsJSContext::ExecuteScript(void *aScript
   NS_ENSURE_SUCCESS(rv, rv);
 
   rv = sSecurityManager->PushContextPrincipal(mContext, nsnull, principal);
   NS_ENSURE_SUCCESS(rv, rv);
 
   nsJSContext::TerminationFuncHolder holder(this);
   JSAutoRequest ar(mContext);
   ++mExecuteDepth;
-  ok = ::JS_ExecuteScript(mContext,
-                          (JSObject *)aScopeObject,
-                          (JSScript*)::JS_GetPrivate(mContext, scriptObj),
-                          &val);
+  ok = ::JS_ExecuteScript(mContext, (JSObject *)aScopeObject, scriptObj, &val);
 
   if (ok) {
     // If all went well, convert val to a string (XXXbe unless undefined?).
     rv = JSValueToAString(mContext, val, aRetValue, aIsUndefined);
   } else {
     if (aIsUndefined) {
       *aIsUndefined = PR_TRUE;
     }
@@ -2063,19 +2054,17 @@ nsJSContext::Serialize(nsIObjectOutputSt
 
     JSContext* cx = mContext;
     JSXDRState *xdr = ::JS_XDRNewMem(cx, JSXDR_ENCODE);
     if (! xdr)
         return NS_ERROR_OUT_OF_MEMORY;
     xdr->userdata = (void*) aStream;
 
     JSAutoRequest ar(cx);
-    JSScript *script = reinterpret_cast<JSScript*>
-                                       (::JS_GetPrivate(cx, mJSObject));
-    if (! ::JS_XDRScript(xdr, &script)) {
+    if (! ::JS_XDRScriptObject(xdr, &mJSObject)) {
         rv = NS_ERROR_FAILURE;  // likely to be a principals serialization error
     } else {
         // Get the encoded JSXDRState data and write it.  The JSXDRState owns
         // this buffer memory and will free it beneath ::JS_XDRDestroy.
         //
         // If an XPCOM object needs to be written in the midst of the JS XDR
         // encoding process, the C++ code called back from the JS engine (e.g.,
         // nsEncodeJSPrincipals in caps/src/nsJSPrincipals.cpp) will flush data
@@ -2127,25 +2116,18 @@ nsJSContext::Deserialize(nsIObjectInputS
     JSXDRState *xdr = ::JS_XDRNewMem(cx, JSXDR_DECODE);
     if (! xdr) {
         rv = NS_ERROR_OUT_OF_MEMORY;
     } else {
         xdr->userdata = (void*) aStream;
         JSAutoRequest ar(cx);
         ::JS_XDRMemSetData(xdr, data, size);
 
-        JSScript *script = nsnull;
-        if (! ::JS_XDRScript(xdr, &script)) {
+        if (! ::JS_XDRScriptObject(xdr, &result)) {
             rv = NS_ERROR_FAILURE;  // principals deserialization error?
-        } else {
-            result = ::JS_NewScriptObject(cx, script);
-            if (! result) {
-                rv = NS_ERROR_OUT_OF_MEMORY;    // certain error
-                ::JS_DestroyScript(cx, script);
-            }
         }
 
         // Update data in case ::JS_XDRScript called back into C++ code to
         // read an XPCOM object.
         //
         // In that case, the serialization process must have flushed a run
         // of counted bytes containing JS data at the point where the XPCOM
         // object starts, after which an encoding C++ callback from the JS
--- a/dom/base/nsJSTimeoutHandler.cpp
+++ b/dom/base/nsJSTimeoutHandler.cpp
@@ -45,19 +45,16 @@
 #include "nsIJSRuntimeService.h"
 #include "nsJSUtils.h"
 #include "nsDOMJSUtils.h"
 #include "nsContentUtils.h"
 #include "nsJSEnvironment.h"
 #include "nsServiceManagerUtils.h"
 #include "nsDOMError.h"
 #include "nsGlobalWindow.h"
-#include "jsobj.h"
-#include "jsatom.h"
-#include "jsfun.h"
 #include "nsIContentSecurityPolicy.h"
 
 static const char kSetIntervalStr[] = "setInterval";
 static const char kSetTimeoutStr[] = "setTimeout";
 
 // Our JS nsIScriptTimeoutHandler implementation.
 class nsJSScriptTimeoutHandler: public nsIScriptTimeoutHandler
 {
@@ -129,20 +126,21 @@ NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_
       foo.Append(tmp->mFileName);
       foo.AppendLiteral(":");
       foo.AppendInt(tmp->mLineNo);
       foo.AppendLiteral("]");
     }
     else if (tmp->mFunObj) {
       JSFunction* fun = (JSFunction*)tmp->mFunObj->getPrivate();
       if (fun->atom) {
-        size_t size = 1 + JS_PutEscapedFlatString(NULL, 0, ATOM_TO_STRING(fun->atom), 0);
+        JSFlatString *funId = JS_ASSERT_STRING_IS_FLAT(JS_GetFunctionId(fun));
+        size_t size = 1 + JS_PutEscapedFlatString(NULL, 0, funId, 0);
         char *name = new char[size];
         if (name) {
-          JS_PutEscapedFlatString(name, size, ATOM_TO_STRING(fun->atom), 0);
+          JS_PutEscapedFlatString(name, size, funId, 0);
           foo.AppendLiteral(" [");
           foo.Append(name);
           delete[] name;
           foo.AppendLiteral("]");
         }
       }
     }
     cb.DescribeNode(RefCounted, tmp->mRefCnt.get(),
--- a/dom/interfaces/core/nsIDOMXMLDocument.idl
+++ b/dom/interfaces/core/nsIDOMXMLDocument.idl
@@ -33,17 +33,17 @@
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsIDOMDocument.idl"
 
-[scriptable, uuid(0aa12c21-4710-4214-b21e-372c48332927)]
+[scriptable, uuid(5eb33612-2f3c-424d-acd8-3ff9cc358ad1)]
 interface nsIDOMXMLDocument : nsIDOMDocument
 {
   // DOM Level 3 Load & Save, DocumentLS
   // http://www.w3.org/TR/DOM-Level-3-LS/load-save.html#LS-DocumentLS
   /**
    * Whether to load synchronously or asynchronously.
    * The default is async==true.
    */
--- a/dom/interfaces/html/nsIDOMHTMLDocument.idl
+++ b/dom/interfaces/html/nsIDOMHTMLDocument.idl
@@ -45,17 +45,17 @@
  *
  * This interface is trying to follow the DOM Level 2 HTML specification:
  * http://www.w3.org/TR/DOM-Level-2-HTML/
  *
  * with changes from the work-in-progress WHATWG HTML specification:
  * http://www.whatwg.org/specs/web-apps/current-work/
  */
 
-[scriptable, uuid(a6cf9084-15b3-11d2-932e-00805f8add32)]
+[scriptable, uuid(fdab9ba2-c6ff-42c1-83ad-dea0b22af986)]
 interface nsIDOMHTMLDocument : nsIDOMDocument
 {
            attribute DOMString            title;
   readonly attribute DOMString            referrer;
   // domain is readonly per spec, but it's settable in
   // nsIDOMNSHTMLDocument
   [noscript] readonly attribute DOMString domain;
   readonly attribute DOMString            URL;
--- a/dom/interfaces/svg/nsIDOMSVGDocument.idl
+++ b/dom/interfaces/svg/nsIDOMSVGDocument.idl
@@ -34,17 +34,17 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsIDOMDocument.idl"
 
 interface nsIDOMSVGSVGElement;
 
-[scriptable, uuid(12d3b664-1dd2-11b2-a7cf-ceee7e90f396)]
+[scriptable, uuid(f9aa3be8-ab84-4e92-a50b-a7481171407a)]
 interface nsIDOMSVGDocument : nsIDOMDocument
                          /* , nsIDOMDocumentEvent */
 {
   readonly attribute DOMString title;
   readonly attribute DOMString referrer;
   readonly attribute DOMString domain;
   readonly attribute DOMString URL;
   readonly attribute nsIDOMSVGSVGElement rootElement;
--- a/dom/locales/en-US/chrome/charsetTitles.properties
+++ b/dom/locales/en-US/chrome/charsetTitles.properties
@@ -66,30 +66,27 @@ x-mac-icelandic.title   = Icelandic (Mac
 iso-2022-jp.title = Japanese (ISO-2022-JP)
 shift_jis.title = Japanese (Shift_JIS)
 euc-jp.title = Japanese (EUC-JP)
 big5.title = Chinese Traditional (Big5)
 big5-hkscs.title = Chinese Traditional (Big5-HKSCS)
 x-euc-tw.title = Chinese Traditional (EUC-TW)
 gb2312.title = Chinese Simplified (GB2312)
 hz-gb-2312.title = Chinese Simplified (HZ)
-x-gbk.title = Chinese Simplified (GBK)
+gbk.title = Chinese Simplified (GBK)
 iso-2022-cn.title = Chinese Simplified (ISO-2022-CN)
 euc-kr.title = Korean (EUC-KR)
 x-johab.title = Korean (JOHAB)
 x-windows-949.title = Korean (UHC)
 iso-2022-kr.title = Korean (ISO-2022-KR)
 utf-7.title = Unicode (UTF-7)
 utf-8.title = Unicode (UTF-8)
 utf-16.title = Unicode (UTF-16)
 utf-16le.title = Unicode (UTF-16LE)
 utf-16be.title = Unicode (UTF-16BE)
-utf-32.title = Unicode (UTF-32)
-utf-32le.title = Unicode (UTF-32LE)
-utf-32be.title = Unicode (UTF-32BE)
 iso-8859-5.title = Cyrillic (ISO-8859-5)
 iso-ir-111.title = Cyrillic (ISO-IR-111)
 windows-1251.title = Cyrillic (Windows-1251)
 x-mac-cyrillic.title = Cyrillic (MacCyrillic)
 x-mac-ukrainian.title = Cyrillic/Ukrainian (MacUkrainian)
 koi8-r.title = Cyrillic (KOI8-R)
 koi8-u.title = Cyrillic/Ukrainian (KOI8-U)
 iso-8859-7.title = Greek (ISO-8859-7)
--- a/dom/src/json/nsJSON.cpp
+++ b/dom/src/json/nsJSON.cpp
@@ -108,27 +108,23 @@ nsJSON::Encode(nsAString &aJSON)
   }
 
   return rv;
 }
 
 static const char UTF8BOM[] = "\xEF\xBB\xBF";
 static const char UTF16LEBOM[] = "\xFF\xFE";
 static const char UTF16BEBOM[] = "\xFE\xFF";
-static const char UTF32LEBOM[] = "\xFF\xFE\0\0";
-static const char UTF32BEBOM[] = "\0\0\xFE\xFF";
 
 static nsresult CheckCharset(const char* aCharset)
 {
   // Check that the charset is permissible
   if (!(strcmp(aCharset, "UTF-8") == 0 ||
         strcmp(aCharset, "UTF-16LE") == 0 ||
-        strcmp(aCharset, "UTF-16BE") == 0 ||
-        strcmp(aCharset, "UTF-32LE") == 0 ||
-        strcmp(aCharset, "UTF-32BE") == 0)) {
+        strcmp(aCharset, "UTF-16BE") == 0)) {
     return NS_ERROR_INVALID_ARG;
   }
 
   return NS_OK;
 }
 
 //
 // void EncodeToStream(in nsIOutputStream stream
@@ -161,20 +157,16 @@ nsJSON::EncodeToStream(nsIOutputStream *
   PRUint32 ignored;
   if (aWriteBOM) {
     if (strcmp(aCharset, "UTF-8") == 0)
       rv = aStream->Write(UTF8BOM, 3, &ignored);
     else if (strcmp(aCharset, "UTF-16LE") == 0)
       rv = aStream->Write(UTF16LEBOM, 2, &ignored);
     else if (strcmp(aCharset, "UTF-16BE") == 0)
       rv = aStream->Write(UTF16BEBOM, 2, &ignored);
-    else if (strcmp(aCharset, "UTF-32LE") == 0)
-      rv = aStream->Write(UTF32LEBOM, 4, &ignored);
-    else if (strcmp(aCharset, "UTF-32BE") == 0)
-      rv = aStream->Write(UTF32BEBOM, 4, &ignored);
     NS_ENSURE_SUCCESS(rv, rv);
   }
 
   nsJSONWriter writer(bufferedStream);
   rv = writer.SetCharset(aCharset);
   NS_ENSURE_SUCCESS(rv, rv);
 
   rv = EncodeInternal(&writer);
@@ -699,26 +691,20 @@ nsJSONListener::ProcessBytes(const char*
   nsCAutoString charset;
   if (mNeedsConverter && !mDecoder) {
     if (!nsContentUtils::CheckForBOM((const unsigned char*) mSniffBuffer.get(),
                                       mSniffBuffer.Length(), charset)) {
       // OK, found no BOM, sniff the first character to see what this is
       // See section 3 of RFC4627 for details on why this works.
       const char *buffer = mSniffBuffer.get();
       if (mSniffBuffer.Length() >= 4) {
-        if (buffer[0] == 0x00 && buffer[1] == 0x00 &&
+        if (buffer[0] == 0x00 && buffer[1] != 0x00 &&
             buffer[2] == 0x00 && buffer[3] != 0x00) {
-          charset = "UTF-32BE";
-        } else if (buffer[0] == 0x00 && buffer[1] != 0x00 &&
-                   buffer[2] == 0x00 && buffer[3] != 0x00) {
           charset = "UTF-16BE";
         } else if (buffer[0] != 0x00 && buffer[1] == 0x00 &&
-                   buffer[2] == 0x00 && buffer[3] == 0x00) {
-          charset = "UTF-32LE";
-        } else if (buffer[0] != 0x00 && buffer[1] == 0x00 &&
                    buffer[2] != 0x00 && buffer[3] == 0x00) {
           charset = "UTF-16LE";
         } else if (buffer[0] != 0x00 && buffer[1] != 0x00 &&
                    buffer[2] != 0x00 && buffer[3] != 0x00) {
           charset = "UTF-8";
         }
       }
     }
--- a/dom/src/json/test/unit/test_encode.js
+++ b/dom/src/json/test/unit/test_encode.js
@@ -99,23 +99,17 @@ function getTestPairs() {
 
 function testStringEncode() {
   // test empty arg
   do_check_eq(null, nativeJSON.encode());
 
   var pairs = getTestPairs();
   for each(pair in pairs) {
     var nativeResult = nativeJSON.encode(pair[1]);
-    var crockfordResult = crockfordJSON.stringify(pair[1]);
     do_check_eq(pair[0], nativeResult);
-    
-    // Don't follow json2.js handling of non-objects
-    if (pair[1] && (typeof pair[1] == "object")) {
-      do_check_eq(crockfordResult, nativeResult);
-    }
   }
 }
 
 function testOutputStreams() {
   function writeToFile(obj, charset, writeBOM) {
     var jsonFile = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsILocalFile);
     jsonFile.initWithFile(outputDir);
     jsonFile.append("test.json");
@@ -131,38 +125,30 @@ function testOutputStreams() {
   }
 
   var pairs = getTestPairs();
   for each(pair in pairs) {
     if (pair[1] && (typeof pair[1] == "object")) {
       var utf8File = writeToFile(pair[1], "UTF-8", false);
       var utf16LEFile = writeToFile(pair[1], "UTF-16LE", false);
       var utf16BEFile = writeToFile(pair[1], "UTF-16BE", false);
-      var utf32LEFile = writeToFile(pair[1], "UTF-32LE", false);
-      var utf32BEFile = writeToFile(pair[1], "UTF-32BE", false);
 
       // all ascii with no BOMs, so this will work
       do_check_eq(utf16LEFile.fileSize / 2, utf8File.fileSize);
-      do_check_eq(utf32LEFile.fileSize / 4, utf8File.fileSize);
       do_check_eq(utf16LEFile.fileSize, utf16BEFile.fileSize);
-      do_check_eq(utf32LEFile.fileSize, utf32BEFile.fileSize);
     }
   }
 
   // check BOMs
   var f = writeToFile({},"UTF-8", true);
   do_check_eq(f.fileSize, 5);
   var f = writeToFile({},"UTF-16LE", true);
   do_check_eq(f.fileSize, 6);
   var f = writeToFile({},"UTF-16BE", true);
   do_check_eq(f.fileSize, 6);
-  var f = writeToFile({},"UTF-32LE", true);
-  do_check_eq(f.fileSize, 12);
-  var f = writeToFile({},"UTF-32BE", true);
-  do_check_eq(f.fileSize, 12);
   
   outputDir.remove(true);
 }
 
 function throwingToJSON() {
   var a = {
     "b": 1,
     "c": 2,
--- a/dom/src/json/test/unit/test_wrappers.js
+++ b/dom/src/json/test/unit/test_wrappers.js
@@ -90,19 +90,17 @@ function getTestPairs() {
   return testPairs;
 }
 
 function testStringEncode() {
   var pairs = getTestPairs();
   for each(pair in pairs) {
     print(pair)
     var nativeResult = JSON.stringify(pair[1]);
-    var crockfordResult = crockfordJSON.stringify(pair[1]);
     do_check_eq(pair[0], nativeResult);
-    do_check_eq(crockfordResult, nativeResult);
   }
 }
 
 function decode_strings() {
   // empty object
   var x = JSON.parse("{}");
   do_check_eq(typeof x, "object");
 
--- a/dom/src/threads/nsDOMWorkerScriptLoader.cpp
+++ b/dom/src/threads/nsDOMWorkerScriptLoader.cpp
@@ -250,31 +250,30 @@ nsDOMWorkerScriptLoader::ExecuteScripts(
   NS_ASSERTION(aCx, "Shouldn't be null!");
 
   // Now execute all the scripts.
   for (PRUint32 index = 0; index < mScriptCount; index++) {
     ScriptLoadInfo& loadInfo = mLoadInfos[index];
 
     JSAutoRequest ar(aCx);
 
-    JSScript* script =
-      static_cast<JSScript*>(JS_GetPrivate(aCx, loadInfo.scriptObj.ToJSObject()));
-    NS_ASSERTION(script, "This shouldn't ever be null!");
+    JSObject* scriptObj = loadInfo.scriptObj.ToJSObject();
+    NS_ASSERTION(scriptObj, "This shouldn't ever be null!");
 
     JSObject* global = mWorker->mGlobal ?
                        mWorker->mGlobal :
                        JS_GetGlobalObject(aCx);
     NS_ENSURE_STATE(global);
 
     // Because we may have nested calls to this function we don't want the
     // execution to automatically report errors. We let them propagate instead.
     uint32 oldOpts =
       JS_SetOptions(aCx, JS_GetOptions(aCx) | JSOPTION_DONT_REPORT_UNCAUGHT);
 
-    PRBool success = JS_ExecuteScript(aCx, global, script, NULL);
+    PRBool success = JS_ExecuteScript(aCx, global, scriptObj, NULL);
 
     JS_SetOptions(aCx, oldOpts);
 
     if (!success) {
       return NS_ERROR_FAILURE;
     }
   }
   return NS_OK;
@@ -822,30 +821,29 @@ nsDOMWorkerScriptLoader::ScriptCompiler:
   // Because we may have nested calls to this function we don't want the
   // execution to automatically report errors. We let them propagate instead.
   uint32 oldOpts =
     JS_SetOptions(cx, JS_GetOptions(cx) | JSOPTION_DONT_REPORT_UNCAUGHT |
                       JSOPTION_NO_SCRIPT_RVAL);
 
   JSPrincipals* principal = nsDOMWorkerSecurityManager::WorkerPrincipal();
 
-  JSScript* script =
+  JSObject* scriptObj =
     JS_CompileUCScriptForPrincipals(cx, global, principal,
                                     reinterpret_cast<const jschar*>
                                                (mScriptText.BeginReading()),
                                     mScriptText.Length(), mFilename.get(), 1);
 
   JS_SetOptions(cx, oldOpts);
 
-  if (!script) {
+  if (!scriptObj) {
     return NS_ERROR_FAILURE;
   }
 
-  mScriptObj = JS_NewScriptObject(cx, script);
-  NS_ENSURE_STATE(mScriptObj.ToJSObject());
+  mScriptObj = scriptObj;
 
   return NS_OK;
 }
 
 nsDOMWorkerScriptLoader::
 ScriptLoaderDone::ScriptLoaderDone(nsDOMWorkerScriptLoader* aLoader,
                                    volatile PRBool* aDoneFlag)
 : ScriptLoaderRunnable(aLoader),
--- a/dom/src/threads/test/regExpStatics_worker.js
+++ b/dom/src/threads/test/regExpStatics_worker.js
@@ -8,17 +8,17 @@ onmessage = function() {
 };
 
 function run() {
   if (RegExp.$1) {
     throw "RegExp.$1 already set!";
     cancelTimeout(timeout);
   }
 
-  var match = /a(sd)f/("asdf");
+  var match = /a(sd)f/.exec("asdf");
   if (!RegExp.$1) {
     throw "RegExp.$1 didn't get set!";
     cancelTimeout(timeout);
   }
 
   if (++runCount == 3) {
     postMessage("done");
   }
--- a/embedding/components/windowwatcher/src/nsWindowWatcher.cpp
+++ b/embedding/components/windowwatcher/src/nsWindowWatcher.cpp
@@ -344,23 +344,23 @@ nsWindowWatcher::nsWindowWatcher() :
 
 nsWindowWatcher::~nsWindowWatcher()
 {
   // delete data
   while (mOldestWindow)
     RemoveWindow(mOldestWindow);
 
   if (mListLock)
-    PR_DestroyLock(mListLock);
+    nsAutoLock::DestroyLock(mListLock);
 }
 
 nsresult
 nsWindowWatcher::Init()
 {
-  mListLock = PR_NewLock();
+  mListLock = nsAutoLock::NewLock("nsWindowWatcher::mListLock");
   if (!mListLock)
     return NS_ERROR_OUT_OF_MEMORY;
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsWindowWatcher::OpenWindow(nsIDOMWindow *aParent,
                             const char *aUrl,
--- a/extensions/java/xpcom/src/nsJavaXPCOMBindingUtils.cpp
+++ b/extensions/java/xpcom/src/nsJavaXPCOMBindingUtils.cpp
@@ -329,17 +329,17 @@ InitializeJavaGlobals(JNIEnv *env)
       }
     }
     if (NS_FAILED(rv)) {
       NS_WARNING("Failed to populate JavaKeywords hash");
       goto init_error;
     }
   }
 
-  gJavaXPCOMLock = PR_NewLock();
+  gJavaXPCOMLock = nsAutoLock::NewLock("gJavaXPCOMLock");
   gJavaXPCOMInitialized = PR_TRUE;
   return PR_TRUE;
 
 init_error:
   // If we encounter an error during initialization, then free any globals that
   // were allocated, and return false.
   FreeJavaGlobals(env);
   return PR_FALSE;
@@ -435,17 +435,17 @@ FreeJavaGlobals(JNIEnv* env)
 
   if (gJavaKeywords) {
     delete gJavaKeywords;
     gJavaKeywords = nsnull;
   }
 
   if (tempLock) {
     PR_Unlock(tempLock);
-    PR_DestroyLock(tempLock);
+    nsAutoLock::DestroyLock(tempLock);
   }
 }
 
 
 /**************************************
  *  Java<->XPCOM object mappings
  **************************************/
 
--- a/extensions/universalchardet/src/base/nsUniversalDetector.cpp
+++ b/extensions/universalchardet/src/base/nsUniversalDetector.cpp
@@ -106,45 +106,31 @@ nsresult nsUniversalDetector::HandleData
 
   if (aLen > 0)
     mGotData = PR_TRUE;
 
   //If the data starts with BOM, we know it is UTF
   if (mStart)
   {
     mStart = PR_FALSE;
-    if (aLen > 3)
+    if (aLen > 2)
       switch (aBuf[0])
         {
         case '\xEF':
           if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
             // EF BB BF  UTF-8 encoded BOM
             mDetectedCharset = "UTF-8";
         break;
         case '\xFE':
-          if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
-            // FE FF 00 00  UCS-4, unusual octet order BOM (3412)
-            mDetectedCharset = "X-ISO-10646-UCS-4-3412";
-          else if ('\xFF' == aBuf[1])
+          if ('\xFF' == aBuf[1])
             // FE FF  UTF-16, big endian BOM
             mDetectedCharset = "UTF-16";
         break;
-        case '\x00':
-          if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
-            // 00 00 FE FF  UTF-32, big-endian BOM
-            mDetectedCharset = "UTF-32";
-          else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
-            // 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
-            mDetectedCharset = "X-ISO-10646-UCS-4-2143";
-        break;
         case '\xFF':
-          if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
-            // FF FE 00 00  UTF-32, little-endian BOM
-            mDetectedCharset = "UTF-32";
-          else if ('\xFE' == aBuf[1])
+          if ('\xFE' == aBuf[1])
             // FF FE  UTF-16, little endian BOM
             mDetectedCharset = "UTF-16";
         break;
       }  // switch
 
       if (mDetectedCharset)
       {
         mDone = PR_TRUE;
--- a/gfx/cairo/libpixman/src/Makefile.in
+++ b/gfx/cairo/libpixman/src/Makefile.in
@@ -68,18 +68,25 @@ DEFINES += -DPIXMAN_NO_TLS
 # Build MMX code either with VC or with gcc-on-x86
 ifdef _MSC_VER
 ifeq (86,$(findstring 86,$(OS_TEST)))
 ifneq (64,$(findstring 64,$(OS_TEST)))
 USE_MMX=1
 endif
 USE_SSE2=1
 MMX_CFLAGS=
+ifneq (,$(filter 1400 1500, $(_MSC_VER)))
+# MSVC 2005 and 2008 generate code that breaks alignment
+# restrictions in debug mode so always optimize.
+# See bug 640250 for more info.
+SSE2_CFLAGS=-O2
+else
 SSE2_CFLAGS=
 endif
+endif
 ifeq (arm,$(findstring arm,$(OS_TEST)))
 USE_ARM_SIMD_MSVC=1
 endif
 endif
 
 ifdef GNU_CC
 ifeq (86,$(findstring 86,$(OS_TEST)))
 USE_MMX=1
--- a/gfx/cairo/libpixman/src/pixman-access.c
+++ b/gfx/cairo/libpixman/src/pixman-access.c
@@ -206,16 +206,56 @@ fetch_scanline_b8g8r8x8 (pixman_image_t 
 	*buffer++ = (0xff000000 |
 	             ((p & 0xff000000) >> 24)	|
 	             ((p & 0x00ff0000) >> 8)	|
 	             ((p & 0x0000ff00) << 8));
     }
 }
 
 static void
+fetch_scanline_r8g8b8a8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+
+	*buffer++ = (((p & 0x000000ff) << 24) | (p >> 8));
+    }
+}
+
+static void
+fetch_scanline_r8g8b8x8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	
+	*buffer++ = (0xff000000 | (p >> 8));
+    }
+}
+
+static void
 fetch_scanline_x14r6g6b6 (pixman_image_t *image,
                           int             x,
                           int             y,
                           int             width,
                           uint32_t *      buffer,
                           const uint32_t *mask)
 {
     const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
@@ -1287,16 +1327,38 @@ fetch_pixel_b8g8r8x8 (bits_image_t *imag
     
     return ((0xff000000) |
 	    (pixel & 0xff000000) >> 24 |
 	    (pixel & 0x00ff0000) >> 8 |
 	    (pixel & 0x0000ff00) << 8);
 }
 
 static uint32_t
+fetch_pixel_r8g8b8a8 (bits_image_t *image,
+		      int           offset,
+		      int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t pixel = READ (image, (uint32_t *)bits + offset);
+    
+    return (((pixel & 0x000000ff) << 24) | (pixel >> 8));
+}
+
+static uint32_t
+fetch_pixel_r8g8b8x8 (bits_image_t *image,
+		      int           offset,
+		      int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t pixel = READ (image, (uint32_t *)bits + offset);
+    
+    return (0xff000000 | (pixel >> 8));
+}
+
+static uint32_t
 fetch_pixel_x14r6g6b6 (bits_image_t *image,
                        int           offset,
                        int           line)
 {
     uint32_t *bits = image->bits + line * image->rowstride;
     uint32_t pixel = READ (image, (uint32_t *) bits + offset);
     uint32_t r, g, b;
 
@@ -2023,16 +2085,49 @@ store_scanline_b8g8r8x8 (bits_image_t * 
 	WRITE (image, pixel++,
 	       ((values[i] >>  8) & 0x0000ff00) |
 	       ((values[i] <<  8) & 0x00ff0000) |
 	       ((values[i] << 24) & 0xff000000));
     }
 }
 
 static void
+store_scanline_r8g8b8a8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	WRITE (image, pixel++,
+	       ((values[i] >> 24) & 0x000000ff) | (values[i] << 8));
+    }
+}
+
+static void
+store_scanline_r8g8b8x8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    int i;
+    
+    for (i = 0; i < width; ++i)
+	WRITE (image, pixel++, (values[i] << 8));
+}
+
+static void
 store_scanline_x14r6g6b6 (bits_image_t *  image,
                           int             x,
                           int             y,
                           int             width,
                           const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = ((uint32_t *) bits) + x;
@@ -2840,16 +2935,18 @@ static const format_info_t accessors[] =
 {
 /* 32 bpp formats */
     FORMAT_INFO (a8r8g8b8),
     FORMAT_INFO (x8r8g8b8),
     FORMAT_INFO (a8b8g8r8),
     FORMAT_INFO (x8b8g8r8),
     FORMAT_INFO (b8g8r8a8),
     FORMAT_INFO (b8g8r8x8),
+    FORMAT_INFO (r8g8b8a8),
+    FORMAT_INFO (r8g8b8x8),
     FORMAT_INFO (x14r6g6b6),
 
 /* 24bpp formats */
     FORMAT_INFO (r8g8b8),
     FORMAT_INFO (b8g8r8),
     
 /* 16bpp formats */
     FORMAT_INFO (r5g6b5),
--- a/gfx/cairo/libpixman/src/pixman-arm-common.h
+++ b/gfx/cairo/libpixman/src/pixman-arm-common.h
@@ -21,16 +21,18 @@
  * DEALINGS IN THE SOFTWARE.
  *
  * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
  */
 
 #ifndef PIXMAN_ARM_COMMON_H
 #define PIXMAN_ARM_COMMON_H
 
+#include "pixman-fast-path.h"
+
 /* Define some macros which can expand into proxy functions between
  * ARM assembly optimized functions and the rest of pixman fast path API.
  *
  * All the low level ARM assembly functions have to use ARM EABI
  * calling convention and take up to 8 arguments:
  *    width, height, dst, dst_stride, src, src_stride, mask, mask_stride
  *
  * The arguments are ordered with the most important coming first (the
@@ -40,16 +42,19 @@
  * omitted when doing a function call.
  *
  * Arguments 'src' and 'mask' contain either a pointer to the top left
  * pixel of the composited rectangle or a pixel color value depending
  * on the function type. In the case of just a color value (solid source
  * or mask), the corresponding stride argument is unused.
  */
 
+#define SKIP_ZERO_SRC  1
+#define SKIP_ZERO_MASK 2
+
 #define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name,                \
                                           src_type, src_cnt,            \
                                           dst_type, dst_cnt)            \
 void                                                                    \
 pixman_composite_##name##_asm_##cputype (int32_t   w,                   \
                                          int32_t   h,                   \
                                          dst_type *dst,                 \
                                          int32_t   dst_stride,          \
@@ -80,17 +85,17 @@ cputype##_composite_##name (pixman_imple
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
                            dst_stride, dst_line, dst_cnt);              \
                                                                         \
     pixman_composite_##name##_asm_##cputype (width, height,             \
                                              dst_line, dst_stride,      \
                                              src_line, src_stride);     \
 }
 
-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name,                  \
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name,           \
                                         dst_type, dst_cnt)              \
 void                                                                    \
 pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
                                          int32_t    h,                  \
                                          dst_type  *dst,                \
                                          int32_t    dst_stride,         \
                                          uint32_t   src);               \
                                                                         \
@@ -108,30 +113,31 @@ cputype##_composite_##name (pixman_imple
                             int32_t                  dest_y,            \
                             int32_t                  width,             \
                             int32_t                  height)            \
 {                                                                       \
     dst_type  *dst_line;                                                \
     int32_t    dst_stride;                                              \
     uint32_t   src;                                                     \
                                                                         \
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
+    src = _pixman_image_get_solid (					\
+	imp, src_image, dst_image->bits.format);			\
                                                                         \
-    if (src == 0)                                                       \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
 	return;                                                         \
                                                                         \
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
                            dst_stride, dst_line, dst_cnt);              \
                                                                         \
     pixman_composite_##name##_asm_##cputype (width, height,             \
                                              dst_line, dst_stride,      \
                                              src);                      \
 }
 
-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name,             \
+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name,      \
                                              mask_type, mask_cnt,       \
                                              dst_type, dst_cnt)         \
 void                                                                    \
 pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
                                          int32_t    h,                  \
                                          dst_type  *dst,                \
                                          int32_t    dst_stride,         \
                                          uint32_t   src,                \
@@ -154,33 +160,34 @@ cputype##_composite_##name (pixman_imple
                             int32_t                  width,             \
                             int32_t                  height)            \
 {                                                                       \
     dst_type  *dst_line;                                                \
     mask_type *mask_line;                                               \
     int32_t    dst_stride, mask_stride;                                 \
     uint32_t   src;                                                     \
                                                                         \
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
+    src = _pixman_image_get_solid (					\
+	imp, src_image, dst_image->bits.format);			\
                                                                         \
-    if (src == 0)                                                       \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
 	return;                                                         \
                                                                         \
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
                            dst_stride, dst_line, dst_cnt);              \
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
                            mask_stride, mask_line, mask_cnt);           \
                                                                         \
     pixman_composite_##name##_asm_##cputype (width, height,             \
                                              dst_line, dst_stride,      \
                                              src, 0,                    \
                                              mask_line, mask_stride);   \
 }
 
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name,              \
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name,       \
                                             src_type, src_cnt,          \
                                             dst_type, dst_cnt)          \
 void                                                                    \
 pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
                                          int32_t    h,                  \
                                          dst_type  *dst,                \
                                          int32_t    dst_stride,         \
                                          src_type  *src,                \
@@ -202,19 +209,20 @@ cputype##_composite_##name (pixman_imple
                             int32_t                  width,             \
                             int32_t                  height)            \
 {                                                                       \
     dst_type  *dst_line;                                                \
     src_type  *src_line;                                                \
     int32_t    dst_stride, src_stride;                                  \
     uint32_t   mask;                                                    \
                                                                         \
-    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
+    mask = _pixman_image_get_solid (					\
+	imp, mask_image, dst_image->bits.format);			\
                                                                         \
-    if (mask == 0)                                                      \
+    if ((flags & SKIP_ZERO_MASK) && mask == 0)                          \
 	return;                                                         \
                                                                         \
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
                            dst_stride, dst_line, dst_cnt);              \
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
                            src_stride, src_line, src_cnt);              \
                                                                         \
     pixman_composite_##name##_asm_##cputype (width, height,             \
@@ -265,9 +273,137 @@ cputype##_composite_##name (pixman_imple
                            mask_stride, mask_line, mask_cnt);           \
                                                                         \
     pixman_composite_##name##_asm_##cputype (width, height,             \
                                              dst_line, dst_stride,      \
                                              src_line, src_stride,      \
                                              mask_line, mask_stride);   \
 }
 
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op,             \
+                                               src_type, dst_type)            \
+void                                                                          \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
+                                                   int32_t          w,        \
+                                                   dst_type *       dst,      \
+                                                   const src_type * src,      \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x);  \
+                                                                              \
+static force_inline void                                                      \
+scaled_nearest_scanline_##cputype##_##name##_##op (dst_type *       pd,       \
+                                                   const src_type * ps,       \
+                                                   int32_t          w,        \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x,   \
+                                                   pixman_fixed_t   max_vx,   \
+                                                   pixman_bool_t    zero_src) \
+{                                                                             \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
+                                                                  vx, unit_x);\
+}                                                                             \
+                                                                              \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op,                         \
+                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
+                       src_type, dst_type, COVER)                             \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op,                          \
+                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
+                       src_type, dst_type, NONE)                              \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op,                           \
+                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
+                       src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                      \
+    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                             \
+    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                              \
+    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op,   \
+                                                  src_type, dst_type)         \
+void                                                                          \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
+                                                   int32_t          w,        \
+                                                   dst_type *       dst,      \
+                                                   const src_type * src,      \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x,   \
+                                                   const uint8_t *  mask);    \
+                                                                              \
+static force_inline void                                                      \
+scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t *  mask,     \
+                                                   dst_type *       pd,       \
+                                                   const src_type * ps,       \
+                                                   int32_t          w,        \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x,   \
+                                                   pixman_fixed_t   max_vx,   \
+                                                   pixman_bool_t    zero_src) \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+	return;                                                               \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
+                                                                  vx, unit_x, \
+                                                                  mask);      \
+}                                                                             \
+                                                                              \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                  \
+                              scaled_nearest_scanline_##cputype##_##name##_##op,\
+                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op,                   \
+                              scaled_nearest_scanline_##cputype##_##name##_##op,\
+                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                    \
+                              scaled_nearest_scanline_##cputype##_##name##_##op,\
+                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)              \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+/*****************************************************************************/
+
+#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op,     \
+                                                src_type, dst_type)           \
+void                                                                          \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (               \
+                                                dst_type *       dst,         \
+                                                const src_type * top,         \
+                                                const src_type * bottom,      \
+                                                int              wt,          \
+                                                int              wb,          \
+                                                pixman_fixed_t   x,           \
+                                                pixman_fixed_t   ux,          \
+                                                int              width);      \
+                                                                              \
+static force_inline void                                                      \
+scaled_bilinear_scanline_##cputype##_##name##_##op (                          \
+                                                dst_type *       dst,         \
+                                                const uint32_t * mask,        \
+                                                const src_type * src_top,     \
+                                                const src_type * src_bottom,  \
+                                                int32_t          w,           \
+                                                int              wt,          \
+                                                int              wb,          \
+                                                pixman_fixed_t   vx,          \
+                                                pixman_fixed_t   unit_x,      \
+                                                pixman_fixed_t   max_vx,      \
+                                                pixman_bool_t    zero_src)    \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+	return;                                                               \
+    pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (           \
+                            dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
+}                                                                             \
+                                                                              \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                 \
+                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
+                       src_type, uint32_t, dst_type, COVER, FALSE, FALSE)     \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op,                  \
+                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
+                       src_type, uint32_t, dst_type, NONE, FALSE, FALSE)      \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                   \
+                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
+                       src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
+
 #endif
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
@@ -248,17 +248,17 @@
 
 #if 1
 
 .macro pixman_composite_over_8888_0565_process_pixblock_tail_head
         vqadd.u8    d16, d2, d20
     vld1.16     {d4, d5}, [DST_R, :128]!
         vqadd.u8    q9, q0, q11
     vshrn.u16   d6, q2, #8
-    vld4.8      {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     vshrn.u16   d7, q2, #3
     vsli.u16    q2, q2, #5
         vshll.u8    q14, d16, #8
                                     PF add PF_X, PF_X, #8
         vshll.u8    q8, d19, #8
                                     PF tst PF_CTL, #0xF
     vsri.u8     d6, d6, #5
                                     PF addne PF_X, PF_X, #8
@@ -290,17 +290,17 @@
 
 #else
 
 /* If we did not care much about the performance, we would just use this... */
 .macro pixman_composite_over_8888_0565_process_pixblock_tail_head
     pixman_composite_over_8888_0565_process_pixblock_tail
     vst1.16     {d28, d29}, [DST_W, :128]!
     vld1.16     {d4, d5}, [DST_R, :128]!
-    vld4.32     {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     pixman_composite_over_8888_0565_process_pixblock_head
     cache_preload 8, 8
 .endm
 
 #endif
 
 /*
  * And now the final part. We are using 'generate_composite_function' macro
@@ -428,17 +428,17 @@ generate_composite_function \
     vsri.u16    q14, q8, #5
     vsri.u16    q14, q9, #11
 .endm
 
 .macro pixman_composite_src_8888_0565_process_pixblock_tail_head
         vsri.u16    q14, q8, #5
                                     PF add PF_X, PF_X, #8
                                     PF tst PF_CTL, #0xF
-    vld4.8      {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
                                     PF addne PF_X, PF_X, #8
                                     PF subne PF_CTL, PF_CTL, #1
         vsri.u16    q14, q9, #11
                                     PF cmp PF_X, ORIG_W
                                     PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
     vshll.u8    q8, d1, #8
         vst1.16     {d28, d29}, [DST_W, :128]!
                                     PF subge PF_X, PF_X, ORIG_W
@@ -473,17 +473,17 @@ generate_composite_function \
 
 .macro pixman_composite_src_0565_8888_process_pixblock_tail
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_src_0565_8888_process_pixblock_tail_head
     pixman_composite_src_0565_8888_process_pixblock_tail
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
-    vld1.16    {d0, d1}, [SRC]!
+    fetch_src_pixblock
     pixman_composite_src_0565_8888_process_pixblock_head
     cache_preload 8, 8
 .endm
 
 generate_composite_function \
     pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
     FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
@@ -500,17 +500,17 @@ generate_composite_function \
     vqadd.u8    q14, q0, q2
     vqadd.u8    q15, q1, q3
 .endm
 
 .macro pixman_composite_add_8_8_process_pixblock_tail
 .endm
 
 .macro pixman_composite_add_8_8_process_pixblock_tail_head
-    vld1.8      {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
                                     PF add PF_X, PF_X, #32
                                     PF tst PF_CTL, #0xF
     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
                                     PF addne PF_X, PF_X, #32
                                     PF subne PF_CTL, PF_CTL, #1
         vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
                                     PF cmp PF_X, ORIG_W
                                     PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
@@ -532,23 +532,23 @@ generate_composite_function \
     default_cleanup, \
     pixman_composite_add_8_8_process_pixblock_head, \
     pixman_composite_add_8_8_process_pixblock_tail, \
     pixman_composite_add_8_8_process_pixblock_tail_head
 
 /******************************************************************************/
 
 .macro pixman_composite_add_8888_8888_process_pixblock_tail_head
-    vld1.8      {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
                                     PF add PF_X, PF_X, #8
                                     PF tst PF_CTL, #0xF
-    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
+    vld1.32     {d4, d5, d6, d7}, [DST_R, :128]!
                                     PF addne PF_X, PF_X, #8
                                     PF subne PF_CTL, PF_CTL, #1
-        vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
+        vst1.32     {d28, d29, d30, d31}, [DST_W, :128]!
                                     PF cmp PF_X, ORIG_W
                                     PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
                                     PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
                                     PF subge PF_X, PF_X, ORIG_W
                                     PF subges PF_CTL, PF_CTL, #0x10
     vqadd.u8    q14, q0, q2
                                     PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
                                     PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
@@ -608,17 +608,17 @@ generate_composite_function_single_scanl
         vrshr.u16   q13, q11, #8
                                     PF addne PF_X, PF_X, #8
                                     PF subne PF_CTL, PF_CTL, #1
         vraddhn.u16 d28, q14, q8
         vraddhn.u16 d29, q15, q9
                                     PF cmp PF_X, ORIG_W
         vraddhn.u16 d30, q12, q10
         vraddhn.u16 d31, q13, q11
-    vld4.8      {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
                                     PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
     vmvn.8      d22, d3
                                     PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
         vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
                                     PF subge PF_X, PF_X, ORIG_W
     vmull.u8    q8, d22, d4
                                     PF subges PF_CTL, PF_CTL, #0x10
     vmull.u8    q9, d22, d5
@@ -662,17 +662,17 @@ generate_composite_function_single_scanl
                                     PF subne PF_CTL, PF_CTL, #1
         vraddhn.u16 d28, q14, q8
         vraddhn.u16 d29, q15, q9
                                     PF cmp PF_X, ORIG_W
         vraddhn.u16 d30, q12, q10
         vraddhn.u16 d31, q13, q11
         vqadd.u8    q14, q0, q14
         vqadd.u8    q15, q1, q15
-    vld4.8      {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
                                     PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
     vmvn.8      d22, d3
                                     PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
         vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
                                     PF subge PF_X, PF_X, ORIG_W
     vmull.u8    q8, d22, d4
                                     PF subges PF_CTL, PF_CTL, #0x10
     vmull.u8    q9, d22, d5
@@ -786,70 +786,121 @@ generate_composite_function \
     pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
     28, /* dst_w_basereg */ \
     0,  /* dst_r_basereg */ \
     4,  /* src_basereg   */ \
     24  /* mask_basereg  */
 
 /******************************************************************************/
 
-.macro pixman_composite_over_n_8_0565_process_pixblock_head
-    /* in */
-    vmull.u8    q0, d24, d8
-    vmull.u8    q1, d24, d9
-    vmull.u8    q6, d24, d10
-    vmull.u8    q7, d24, d11
-    vrshr.u16   q10, q0, #8
-    vrshr.u16   q11, q1, #8
-    vrshr.u16   q12, q6, #8
-    vrshr.u16   q13, q7, #8
-    vraddhn.u16 d0, q0, q10
-    vraddhn.u16 d1, q1, q11
-    vraddhn.u16 d2, q6, q12
-    vraddhn.u16 d3, q7, q13
+.macro pixman_composite_over_8888_8_0565_process_pixblock_head
+    vmull.u8    q0,  d24, d8    /* IN for SRC pixels (part1) */
+    vmull.u8    q1,  d24, d9
+    vmull.u8    q6,  d24, d10
+    vmull.u8    q7,  d24, d11
+        vshrn.u16   d6,  q2, #8 /* convert DST_R data to 32-bpp (part1) */
+        vshrn.u16   d7,  q2, #3
+        vsli.u16    q2,  q2, #5
+    vrshr.u16   q8,  q0,  #8    /* IN for SRC pixels (part2) */
+    vrshr.u16   q9,  q1,  #8
+    vrshr.u16   q10, q6,  #8
+    vrshr.u16   q11, q7,  #8
+    vraddhn.u16 d0,  q0,  q8
+    vraddhn.u16 d1,  q1,  q9
+    vraddhn.u16 d2,  q6,  q10
+    vraddhn.u16 d3,  q7,  q11
+        vsri.u8     d6,  d6, #5 /* convert DST_R data to 32-bpp (part2) */
+        vsri.u8     d7,  d7, #6
+    vmvn.8      d3,  d3
+        vshrn.u16   d30, q2, #2
+    vmull.u8    q8,  d3, d6     /* now do alpha blending */
+    vmull.u8    q9,  d3, d7
+    vmull.u8    q10, d3, d30
+.endm
 
-    vshrn.u16   d6, q2, #8
-    vshrn.u16   d7, q2, #3
-    vsli.u16    q2, q2, #5
-    vsri.u8     d6, d6, #5
-    vmvn.8      d3, d3
-    vsri.u8     d7, d7, #6
-    vshrn.u16   d30, q2, #2
-    /* now do alpha blending */
-    vmull.u8    q10, d3, d6
-    vmull.u8    q11, d3, d7
-    vmull.u8    q12, d3, d30
-    vrshr.u16   q13, q10, #8
-    vrshr.u16   q3, q11, #8
-    vrshr.u16   q15, q12, #8
-    vraddhn.u16 d20, q10, q13
-    vraddhn.u16 d23, q11, q3
-    vraddhn.u16 d22, q12, q15
+.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
+    /* 3 cycle bubble (after vmull.u8) */
+    vrshr.u16   q13, q8,  #8
+    vrshr.u16   q11, q9,  #8
+    vrshr.u16   q15, q10, #8
+    vraddhn.u16 d16, q8,  q13
+    vraddhn.u16 d27, q9,  q11
+    vraddhn.u16 d26, q10, q15
+    vqadd.u8    d16, d2,  d16
+    /* 1 cycle bubble */
+    vqadd.u8    q9,  q0,  q13
+    vshll.u8    q14, d16, #8    /* convert to 16bpp */
+    vshll.u8    q8,  d19, #8
+    vshll.u8    q9,  d18, #8
+    vsri.u16    q14, q8,  #5
+    /* 1 cycle bubble */
+    vsri.u16    q14, q9,  #11
 .endm
 
-.macro pixman_composite_over_n_8_0565_process_pixblock_tail
-    vqadd.u8    d16, d2, d20
-    vqadd.u8    q9, q0, q11
-    /* convert to r5g6b5 */
-    vshll.u8    q14, d16, #8
-    vshll.u8    q8, d19, #8
-    vshll.u8    q9, d18, #8
-    vsri.u16    q14, q8, #5
-    vsri.u16    q14, q9, #11
+.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+    vld1.16     {d4, d5}, [DST_R, :128]!
+    vshrn.u16   d6,  q2,  #8
+    fetch_mask_pixblock
+    vshrn.u16   d7,  q2,  #3
+    fetch_src_pixblock
+    vmull.u8    q6,  d24, d10
+        vrshr.u16   q13, q8,  #8
+        vrshr.u16   q11, q9,  #8
+        vrshr.u16   q15, q10, #8
+        vraddhn.u16 d16, q8,  q13
+        vraddhn.u16 d27, q9,  q11
+        vraddhn.u16 d26, q10, q15
+        vqadd.u8    d16, d2,  d16
+    vmull.u8    q1,  d24, d9
+        vqadd.u8    q9,  q0,  q13
+        vshll.u8    q14, d16, #8
+    vmull.u8    q0,  d24, d8
+        vshll.u8    q8,  d19, #8
+        vshll.u8    q9,  d18, #8
+        vsri.u16    q14, q8,  #5
+    vmull.u8    q7,  d24, d11
+        vsri.u16    q14, q9,  #11
+
+    cache_preload 8, 8
+
+    vsli.u16    q2,  q2,  #5
+    vrshr.u16   q8,  q0,  #8
+    vrshr.u16   q9,  q1,  #8
+    vrshr.u16   q10, q6,  #8
+    vrshr.u16   q11, q7,  #8
+    vraddhn.u16 d0,  q0,  q8
+    vraddhn.u16 d1,  q1,  q9
+    vraddhn.u16 d2,  q6,  q10
+    vraddhn.u16 d3,  q7,  q11
+    vsri.u8     d6,  d6,  #5
+    vsri.u8     d7,  d7,  #6
+    vmvn.8      d3,  d3
+    vshrn.u16   d30, q2,  #2
+    vst1.16     {d28, d29}, [DST_W, :128]!
+    vmull.u8    q8,  d3,  d6
+    vmull.u8    q9,  d3,  d7
+    vmull.u8    q10, d3,  d30
 .endm
 
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8_0565_process_pixblock_tail_head
-    pixman_composite_over_n_8_0565_process_pixblock_tail
-    vst1.16     {d28, d29}, [DST_W, :128]!
-    vld1.16     {d4, d5}, [DST_R, :128]!
-    vld1.8      {d24}, [MASK]!
-    cache_preload 8, 8
-    pixman_composite_over_n_8_0565_process_pixblock_head
-.endm
+generate_composite_function \
+    pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
+    pixman_composite_over_8888_8_0565_process_pixblock_head, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    24  /* mask_basereg  */
+
+/******************************************************************************/
 
 /*
  * This function needs a special initialization of solid mask.
  * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
  * offset, split into color components and replicated in d8-d11
  * registers. Additionally, this function needs all the NEON registers,
  * so it has to save d8-d15 registers which are callee saved according
  * to ABI. These registers are restored from 'cleanup' macro. All the
@@ -872,59 +923,59 @@ generate_composite_function \
 
 generate_composite_function \
     pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
     FLAG_DST_READWRITE, \
     8, /* number of pixels, processed in a single block */ \
     5, /* prefetch distance */ \
     pixman_composite_over_n_8_0565_init, \
     pixman_composite_over_n_8_0565_cleanup, \
-    pixman_composite_over_n_8_0565_process_pixblock_head, \
-    pixman_composite_over_n_8_0565_process_pixblock_tail, \
-    pixman_composite_over_n_8_0565_process_pixblock_tail_head
+    pixman_composite_over_8888_8_0565_process_pixblock_head, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail_head
 
 /******************************************************************************/
 
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
-    vld1.16     {d4, d5}, [DST_R, :128]!
-    pixman_composite_over_n_8_0565_process_pixblock_tail
-    vld4.8      {d8, d9, d10, d11}, [SRC]!
-    cache_preload 8, 8
-    vld1.8      {d24}, [MASK]!
-    pixman_composite_over_n_8_0565_process_pixblock_head
-    vst1.16     {d28, d29}, [DST_W, :128]!
+.macro pixman_composite_over_8888_n_0565_init
+    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+    vpush       {d8-d15}
+    vld1.32     {d24[0]}, [DUMMY]
+    vdup.8      d24, d24[3]
+.endm
+
+.macro pixman_composite_over_8888_n_0565_cleanup
+    vpop        {d8-d15}
 .endm
 
 generate_composite_function \
-    pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
+    pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
     5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_n_8_0565_process_pixblock_head, \
-    pixman_composite_over_n_8_0565_process_pixblock_tail, \
+    pixman_composite_over_8888_n_0565_init, \
+    pixman_composite_over_8888_n_0565_cleanup, \
+    pixman_composite_over_8888_8_0565_process_pixblock_head, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
     pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
     28, /* dst_w_basereg */ \
     4,  /* dst_r_basereg */ \
     8,  /* src_basereg   */ \
     24  /* mask_basereg  */
 
 /******************************************************************************/
 
 .macro pixman_composite_src_0565_0565_process_pixblock_head
 .endm
 
 .macro pixman_composite_src_0565_0565_process_pixblock_tail
 .endm
 
 .macro pixman_composite_src_0565_0565_process_pixblock_tail_head
     vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
-    vld1.16 {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     cache_preload 16, 16
 .endm
 
 generate_composite_function \
     pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
     FLAG_DST_WRITEONLY, \
     16, /* number of pixels, processed in a single block */ \
     10, /* prefetch distance */ \
@@ -1060,17 +1111,17 @@ generate_composite_function \
 .macro pixman_composite_src_8888_8888_process_pixblock_head
 .endm
 
 .macro pixman_composite_src_8888_8888_process_pixblock_tail
 .endm
 
 .macro pixman_composite_src_8888_8888_process_pixblock_tail_head
     vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
-    vld1.32 {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     cache_preload 8, 8
 .endm
 
 generate_composite_function \
     pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
     FLAG_DST_WRITEONLY, \
     8, /* number of pixels, processed in a single block */ \
     10, /* prefetch distance */ \
@@ -1091,17 +1142,17 @@ generate_composite_function \
     vorr     q1, q1, q2
 .endm
 
 .macro pixman_composite_src_x888_8888_process_pixblock_tail
 .endm
 
 .macro pixman_composite_src_x888_8888_process_pixblock_tail_head
     vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
-    vld1.32 {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     vorr     q0, q0, q2
     vorr     q1, q1, q2
     cache_preload 8, 8
 .endm
 
 .macro pixman_composite_src_x888_8888_init
     vmov.u8  q2, #0xFF
     vshl.u32 q2, q2, #24
@@ -1166,17 +1217,17 @@ generate_composite_function \
     vqadd.u8    q15, q1, q15
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
     pixman_composite_over_n_8_8888_process_pixblock_tail
     vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
     vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    vld1.8      {d24}, [MASK]!
+    fetch_mask_pixblock
     cache_preload 8, 8
     pixman_composite_over_n_8_8888_process_pixblock_head
 .endm
 
 .macro pixman_composite_over_n_8_8888_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
     vpush       {d8-d15}
     vld1.32     {d11[0]}, [DUMMY]
@@ -1198,16 +1249,84 @@ generate_composite_function \
     pixman_composite_over_n_8_8888_init, \
     pixman_composite_over_n_8_8888_cleanup, \
     pixman_composite_over_n_8_8888_process_pixblock_head, \
     pixman_composite_over_n_8_8888_process_pixblock_tail, \
     pixman_composite_over_n_8_8888_process_pixblock_tail_head
 
 /******************************************************************************/
 
+.macro pixman_composite_over_n_8_8_process_pixblock_head
+    vmull.u8    q0,  d24, d8
+    vmull.u8    q1,  d25, d8
+    vmull.u8    q6,  d26, d8
+    vmull.u8    q7,  d27, d8
+    vrshr.u16   q10, q0,  #8
+    vrshr.u16   q11, q1,  #8
+    vrshr.u16   q12, q6,  #8
+    vrshr.u16   q13, q7,  #8
+    vraddhn.u16 d0,  q0,  q10
+    vraddhn.u16 d1,  q1,  q11
+    vraddhn.u16 d2,  q6,  q12
+    vraddhn.u16 d3,  q7,  q13
+    vmvn.8      q12, q0
+    vmvn.8      q13, q1
+    vmull.u8    q8,  d24, d4
+    vmull.u8    q9,  d25, d5
+    vmull.u8    q10, d26, d6
+    vmull.u8    q11, d27, d7
+.endm
+
+.macro pixman_composite_over_n_8_8_process_pixblock_tail
+    vrshr.u16   q14, q8,  #8
+    vrshr.u16   q15, q9,  #8
+    vrshr.u16   q12, q10, #8
+    vrshr.u16   q13, q11, #8
+    vraddhn.u16 d28, q14, q8
+    vraddhn.u16 d29, q15, q9
+    vraddhn.u16 d30, q12, q10
+    vraddhn.u16 d31, q13, q11
+    vqadd.u8    q14, q0,  q14
+    vqadd.u8    q15, q1,  q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
+    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
+    pixman_composite_over_n_8_8_process_pixblock_tail
+    fetch_mask_pixblock
+    cache_preload 32, 32
+    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
+    pixman_composite_over_n_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8_8_init
+    add         DUMMY, sp, #ARGS_STACK_OFFSET
+    vpush       {d8-d15}
+    vld1.32     {d8[0]}, [DUMMY]
+    vdup.8      d8, d8[3]
+.endm
+
+.macro pixman_composite_over_n_8_8_cleanup
+    vpop        {d8-d15}
+.endm
+
+generate_composite_function \
+    pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
+    FLAG_DST_READWRITE, \
+    32, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_over_n_8_8_init, \
+    pixman_composite_over_n_8_8_cleanup, \
+    pixman_composite_over_n_8_8_process_pixblock_head, \
+    pixman_composite_over_n_8_8_process_pixblock_tail, \
+    pixman_composite_over_n_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
 .macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
     /*
      * 'combine_mask_ca' replacement
      *
      * input:  solid src (n) in {d8,  d9,  d10, d11}
      *         dest in          {d4,  d5,  d6,  d7 }
      *         mask in          {d24, d25, d26, d27}
      * output: updated src in   {d0,  d1,  d2,  d3 }
@@ -1268,17 +1387,17 @@ generate_composite_function \
         vrshr.u16   q15, q9, #8
     vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
         vrshr.u16   q6, q10, #8
         vrshr.u16   q7, q11, #8
         vraddhn.u16 d28, q14, q8
         vraddhn.u16 d29, q15, q9
         vraddhn.u16 d30, q6, q10
         vraddhn.u16 d31, q7, q11
-    vld4.8      {d24, d25, d26, d27}, [MASK]!
+    fetch_mask_pixblock
         vqadd.u8    q14, q0, q14
         vqadd.u8    q15, q1, q15
     cache_preload 8, 8
     pixman_composite_over_n_8888_8888_ca_process_pixblock_head
     vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
 .macro pixman_composite_over_n_8888_8888_ca_init
@@ -1303,16 +1422,68 @@ generate_composite_function \
     pixman_composite_over_n_8888_8888_ca_init, \
     pixman_composite_over_n_8888_8888_ca_cleanup, \
     pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
     pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
     pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
 
 /******************************************************************************/
 
+.macro pixman_composite_in_n_8_process_pixblock_head
+    /* expecting source data in {d0, d1, d2, d3} */
+    /* and destination data in {d4, d5, d6, d7} */
+    vmull.u8    q8,  d4,  d3
+    vmull.u8    q9,  d5,  d3
+    vmull.u8    q10, d6,  d3
+    vmull.u8    q11, d7,  d3
+.endm
+
+.macro pixman_composite_in_n_8_process_pixblock_tail
+    vrshr.u16   q14, q8,  #8
+    vrshr.u16   q15, q9,  #8
+    vrshr.u16   q12, q10, #8
+    vrshr.u16   q13, q11, #8
+    vraddhn.u16 d28, q8,  q14
+    vraddhn.u16 d29, q9,  q15
+    vraddhn.u16 d30, q10, q12
+    vraddhn.u16 d31, q11, q13
+.endm
+
+.macro pixman_composite_in_n_8_process_pixblock_tail_head
+    pixman_composite_in_n_8_process_pixblock_tail
+    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
+    cache_preload 32, 32
+    pixman_composite_in_n_8_process_pixblock_head
+    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_in_n_8_init
+    add         DUMMY, sp, #ARGS_STACK_OFFSET
+    vld1.32     {d3[0]}, [DUMMY]
+    vdup.8      d3, d3[3]
+.endm
+
+.macro pixman_composite_in_n_8_cleanup
+.endm
+
+generate_composite_function \
+    pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
+    FLAG_DST_READWRITE, \
+    32, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_in_n_8_init, \
+    pixman_composite_in_n_8_cleanup, \
+    pixman_composite_in_n_8_process_pixblock_head, \
+    pixman_composite_in_n_8_process_pixblock_tail, \
+    pixman_composite_in_n_8_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    24  /* mask_basereg  */
+
 .macro pixman_composite_add_n_8_8_process_pixblock_head
     /* expecting source data in {d8, d9, d10, d11} */
     /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
     /* and destination data in {d4, d5, d6, d7} */
     /* mask is in d24, d25, d26, d27 */
     vmull.u8    q0, d24, d11
     vmull.u8    q1, d25, d11
     vmull.u8    q6, d26, d11
@@ -1332,17 +1503,17 @@ generate_composite_function \
 .macro pixman_composite_add_n_8_8_process_pixblock_tail
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_add_n_8_8_process_pixblock_tail_head
     pixman_composite_add_n_8_8_process_pixblock_tail
     vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    vld1.8      {d24, d25, d26, d27}, [MASK]!
+    fetch_mask_pixblock
     cache_preload 32, 32
     pixman_composite_add_n_8_8_process_pixblock_head
 .endm
 
 .macro pixman_composite_add_n_8_8_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
     vpush       {d8-d15}
     vld1.32     {d11[0]}, [DUMMY]
@@ -1389,18 +1560,18 @@ generate_composite_function \
 .macro pixman_composite_add_8_8_8_process_pixblock_tail
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_add_8_8_8_process_pixblock_tail_head
     pixman_composite_add_8_8_8_process_pixblock_tail
     vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    vld1.8      {d24, d25, d26, d27}, [MASK]!
-    vld1.8      {d0, d1, d2, d3}, [SRC]!
+    fetch_mask_pixblock
+    fetch_src_pixblock
     cache_preload 32, 32
     pixman_composite_add_8_8_8_process_pixblock_head
 .endm
 
 .macro pixman_composite_add_8_8_8_init
 .endm
 
 .macro pixman_composite_add_8_8_8_cleanup
@@ -1418,44 +1589,60 @@ generate_composite_function \
     pixman_composite_add_8_8_8_process_pixblock_tail_head
 
 /******************************************************************************/
 
 .macro pixman_composite_add_8888_8888_8888_process_pixblock_head
     /* expecting source data in {d0, d1, d2, d3} */
     /* destination data in {d4, d5, d6, d7} */
     /* mask in {d24, d25, d26, d27} */
-    vmull.u8    q8, d27, d0
-    vmull.u8    q9, d27, d1
+    vmull.u8    q8,  d27, d0
+    vmull.u8    q9,  d27, d1
     vmull.u8    q10, d27, d2
     vmull.u8    q11, d27, d3
-    vrshr.u16   q0, q8, #8
-    vrshr.u16   q1, q9, #8
-    vrshr.u16   q12, q10, #8
-    vrshr.u16   q13, q11, #8
-    vraddhn.u16 d0, q0, q8
-    vraddhn.u16 d1, q1, q9
-    vraddhn.u16 d2, q12, q10
-    vraddhn.u16 d3, q13, q11
-    vqadd.u8    q14, q0, q2
-    vqadd.u8    q15, q1, q3
+    /* 1 cycle bubble */
+    vrsra.u16   q8,  q8,  #8
+    vrsra.u16   q9,  q9,  #8
+    vrsra.u16   q10, q10, #8
+    vrsra.u16   q11, q11, #8
 .endm
 
 .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
+    /* 2 cycle bubble */
+    vrshrn.u16  d28, q8,  #8
+    vrshrn.u16  d29, q9,  #8
+    vrshrn.u16  d30, q10, #8
+    vrshrn.u16  d31, q11, #8
+    vqadd.u8    q14, q2,  q14
+    /* 1 cycle bubble */
+    vqadd.u8    q15, q3,  q15
 .endm
 
-/* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail
-    vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
+    fetch_src_pixblock
+        vrshrn.u16  d28, q8,  #8
+    fetch_mask_pixblock
+        vrshrn.u16  d29, q9,  #8
+    vmull.u8    q8,  d27, d0
+        vrshrn.u16  d30, q10, #8
+    vmull.u8    q9,  d27, d1
+        vrshrn.u16  d31, q11, #8
+    vmull.u8    q10, d27, d2
+        vqadd.u8    q14, q2,  q14
+    vmull.u8    q11, d27, d3
+        vqadd.u8    q15, q3,  q15
+    vrsra.u16   q8,  q8,  #8
     vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    vld4.8      {d24, d25, d26, d27}, [MASK]!
-    vld4.8      {d0, d1, d2, d3}, [SRC]!
+    vrsra.u16   q9,  q9,  #8
+        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
+    vrsra.u16   q10, q10, #8
+
     cache_preload 8, 8
-    pixman_composite_add_8888_8888_8888_process_pixblock_head
+
+    vrsra.u16   q11, q11, #8
 .endm
 
 generate_composite_function \
     pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
     10, /* prefetch distance */ \
     default_init, \
@@ -1471,16 +1658,88 @@ generate_composite_function_single_scanl
     default_init, \
     default_cleanup, \
     pixman_composite_add_8888_8888_8888_process_pixblock_head, \
     pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
     pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
 
 /******************************************************************************/
 
+generate_composite_function \
+    pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    27  /* mask_basereg  */
+
+/******************************************************************************/
+
+.macro pixman_composite_add_n_8_8888_init
+    add         DUMMY, sp, #ARGS_STACK_OFFSET
+    vld1.32     {d3[0]}, [DUMMY]
+    vdup.8      d0, d3[0]
+    vdup.8      d1, d3[1]
+    vdup.8      d2, d3[2]
+    vdup.8      d3, d3[3]
+.endm
+
+.macro pixman_composite_add_n_8_8888_cleanup
+.endm
+
+generate_composite_function \
+    pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_add_n_8_8888_init, \
+    pixman_composite_add_n_8_8888_cleanup, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    27  /* mask_basereg  */
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_n_8888_init
+    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+    vld1.32     {d27[0]}, [DUMMY]
+    vdup.8      d27, d27[3]
+.endm
+
+.macro pixman_composite_add_8888_n_8888_cleanup
+.endm
+
+generate_composite_function \
+    pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_add_8888_n_8888_init, \
+    pixman_composite_add_8888_n_8888_cleanup, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    27  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
     /* expecting source data in {d0, d1, d2, d3} */
     /* destination data in {d4, d5, d6, d7} */
     /* solid mask is in d15 */
 
     /* 'in' */
     vmull.u8    q8, d15, d3
     vmull.u8    q6, d15, d2
@@ -1512,19 +1771,19 @@ generate_composite_function_single_scanl
     vraddhn.u16 d30, q12, q10
     vraddhn.u16 d31, q13, q11
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
     vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
     pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
-    vld4.8     {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     cache_preload 8, 8
-    vld4.8     {d12, d13, d14, d15}, [MASK]!
+    fetch_mask_pixblock
     pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
 generate_composite_function_single_scanline \
     pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
@@ -1549,17 +1808,17 @@ generate_composite_function_single_scanl
     vqadd.u8    q14, q0, q14
     vqadd.u8    q15, q1, q15
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
     vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
     pixman_composite_over_8888_n_8888_process_pixblock_tail
-    vld4.8     {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     cache_preload 8, 8
     pixman_composite_over_8888_n_8888_process_pixblock_head
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
 .macro pixman_composite_over_8888_n_8888_init
     add         DUMMY, sp, #48
     vpush       {d8-d15}
@@ -1583,19 +1842,19 @@ generate_composite_function \
     pixman_composite_over_8888_n_8888_process_pixblock_tail_head
 
 /******************************************************************************/
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
     vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
     pixman_composite_over_8888_n_8888_process_pixblock_tail
-    vld4.8     {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     cache_preload 8, 8
-    vld4.8     {d12, d13, d14, d15}, [MASK]!
+    fetch_mask_pixblock
     pixman_composite_over_8888_n_8888_process_pixblock_head
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
 generate_composite_function \
     pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
@@ -1625,19 +1884,19 @@ generate_composite_function_single_scanl
     12  /* mask_basereg  */
 
 /******************************************************************************/
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
     vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
     pixman_composite_over_8888_n_8888_process_pixblock_tail
-    vld4.8     {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
     cache_preload 8, 8
-    vld1.8     {d15}, [MASK]!
+    fetch_mask_pixblock
     pixman_composite_over_8888_n_8888_process_pixblock_head
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
 generate_composite_function \
     pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
@@ -1657,17 +1916,17 @@ generate_composite_function \
 .macro pixman_composite_src_0888_0888_process_pixblock_head
 .endm
 
 .macro pixman_composite_src_0888_0888_process_pixblock_tail
 .endm
 
 .macro pixman_composite_src_0888_0888_process_pixblock_tail_head
     vst3.8 {d0, d1, d2}, [DST_W]!
-    vld3.8 {d0, d1, d2}, [SRC]!
+    fetch_src_pixblock
     cache_preload 8, 8
 .endm
 
 generate_composite_function \
     pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
     FLAG_DST_WRITEONLY, \
     8, /* number of pixels, processed in a single block */ \
     10, /* prefetch distance */ \
@@ -1687,17 +1946,17 @@ generate_composite_function \
     vswp   d0, d2
 .endm
 
 .macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
 .endm
 
 .macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
     vst4.8 {d0, d1, d2, d3}, [DST_W]!
-    vld3.8 {d0, d1, d2}, [SRC]!
+    fetch_src_pixblock
     vswp   d0, d2
     cache_preload 8, 8
 .endm
 
 .macro pixman_composite_src_0888_8888_rev_init
     veor   d3, d3, d3
 .endm
 
@@ -1726,17 +1985,17 @@ generate_composite_function \
 .macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
     vshll.u8    q14, d0, #8
     vsri.u16    q14, q8, #5
     vsri.u16    q14, q9, #11
 .endm
 
 .macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
         vshll.u8    q14, d0, #8
-    vld3.8 {d0, d1, d2}, [SRC]!
+    fetch_src_pixblock
         vsri.u16    q14, q8, #5
         vsri.u16    q14, q9, #11
     vshll.u8    q8, d1, #8
         vst1.16 {d28, d29}, [DST_W, :128]!
     vshll.u8    q9, d2, #8
 .endm
 
 generate_composite_function \
@@ -1772,17 +2031,17 @@ generate_composite_function \
     vraddhn.u16 d28, q13, q10
 .endm
 
 .macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
         vrshr.u16   q11, q8, #8
         vswp        d3, d31
         vrshr.u16   q12, q9, #8
         vrshr.u16   q13, q10, #8
-    vld4.8 {d0, d1, d2, d3}, [SRC]!
+    fetch_src_pixblock
         vraddhn.u16 d30, q11, q8
                                     PF add PF_X, PF_X, #8
                                     PF tst PF_CTL, #0xF
                                     PF addne PF_X, PF_X, #8
                                     PF subne PF_CTL, PF_CTL, #1
         vraddhn.u16 d29, q12, q9
         vraddhn.u16 d28, q13, q10
     vmull.u8    q8, d3, d0
@@ -1808,16 +2067,73 @@ generate_composite_function \
     pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
     28, /* dst_w_basereg */ \
     0, /* dst_r_basereg */ \
     0, /* src_basereg   */ \
     0  /* mask_basereg  */
 
 /******************************************************************************/
 
+.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head
+    vmull.u8    q8, d3, d0
+    vmull.u8    q9, d3, d1
+    vmull.u8    q10, d3, d2
+.endm
+
+.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail
+    vrshr.u16   q11, q8, #8
+    vswp        d3, d31
+    vrshr.u16   q12, q9, #8
+    vrshr.u16   q13, q10, #8
+    vraddhn.u16 d28, q11, q8
+    vraddhn.u16 d29, q12, q9
+    vraddhn.u16 d30, q13, q10
+.endm
+
+.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head
+        vrshr.u16   q11, q8, #8
+        vswp        d3, d31
+        vrshr.u16   q12, q9, #8
+        vrshr.u16   q13, q10, #8
+    fetch_src_pixblock
+        vraddhn.u16 d28, q11, q8
+                                    PF add PF_X, PF_X, #8
+                                    PF tst PF_CTL, #0xF
+                                    PF addne PF_X, PF_X, #8
+                                    PF subne PF_CTL, PF_CTL, #1
+        vraddhn.u16 d29, q12, q9
+        vraddhn.u16 d30, q13, q10
+    vmull.u8    q8, d3, d0
+    vmull.u8    q9, d3, d1
+    vmull.u8    q10, d3, d2
+        vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+                                    PF cmp PF_X, ORIG_W
+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+                                    PF subge PF_X, PF_X, ORIG_W
+                                    PF subges PF_CTL, PF_CTL, #0x10
+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endm
+
+generate_composite_function \
+    pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    10, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_src_rpixbuf_8888_process_pixblock_head, \
+    pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \
+    pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    0, /* dst_r_basereg */ \
+    0, /* src_basereg   */ \
+    0  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_over_0565_8_0565_process_pixblock_head
     /* mask is in d15 */
     convert_0565_to_x888 q4, d2, d1, d0
     convert_0565_to_x888 q5, d6, d5, d4
     /* source pixel data is in      {d0, d1, d2, XX} */
     /* destination pixel data is in {d4, d5, d6, XX} */
     vmvn.8      d7,  d15
     vmull.u8    q6,  d15, d2
@@ -1844,19 +2160,19 @@ generate_composite_function \
     vqadd.u8    q0,  q0,  q14
     vqadd.u8    q1,  q1,  q15
     /* 32bpp result is in {d0, d1, d2, XX} */
     convert_8888_to_0565 d2, d1, d0, q14, q15, q3
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
-    vld1.8     {d15}, [MASK]!
+    fetch_mask_pixblock
     pixman_composite_over_0565_8_0565_process_pixblock_tail
-    vld1.16    {d8, d9}, [SRC]!
+    fetch_src_pixblock
     vld1.16    {d10, d11}, [DST_R, :128]!
     cache_preload 8, 8
     pixman_composite_over_0565_8_0565_process_pixblock_head
     vst1.16    {d28, d29}, [DST_W, :128]!
 .endm
 
 generate_composite_function \
     pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
@@ -1870,16 +2186,44 @@ generate_composite_function \
     pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
     28, /* dst_w_basereg */ \
     10,  /* dst_r_basereg */ \
     8,  /* src_basereg   */ \
     15  /* mask_basereg  */
 
 /******************************************************************************/
 
+.macro pixman_composite_over_0565_n_0565_init
+    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+    vpush       {d8-d15}
+    vld1.32     {d15[0]}, [DUMMY]
+    vdup.8      d15, d15[3]
+.endm
+
+.macro pixman_composite_over_0565_n_0565_cleanup
+    vpop        {d8-d15}
+.endm
+
+generate_composite_function \
+    pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
+    FLAG_DST_READWRITE, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_over_0565_n_0565_init, \
+    pixman_composite_over_0565_n_0565_cleanup, \
+    pixman_composite_over_0565_8_0565_process_pixblock_head, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    10, /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    15  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_add_0565_8_0565_process_pixblock_head
     /* mask is in d15 */
     convert_0565_to_x888 q4, d2, d1, d0
     convert_0565_to_x888 q5, d6, d5, d4
     /* source pixel data is in      {d0, d1, d2, XX} */
     /* destination pixel data is in {d4, d5, d6, XX} */
     vmull.u8    q6,  d15, d2
     vmull.u8    q5,  d15, d1
@@ -1896,19 +2240,19 @@ generate_composite_function \
     vqadd.u8    q0,  q0,  q2
     vqadd.u8    q1,  q1,  q3
     /* 32bpp result is in {d0, d1, d2, XX} */
     convert_8888_to_0565 d2, d1, d0, q14, q15, q3
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
-    vld1.8     {d15}, [MASK]!
+    fetch_mask_pixblock
     pixman_composite_add_0565_8_0565_process_pixblock_tail
-    vld1.16    {d8, d9}, [SRC]!
+    fetch_src_pixblock
     vld1.16    {d10, d11}, [DST_R, :128]!
     cache_preload 8, 8
     pixman_composite_add_0565_8_0565_process_pixblock_head
     vst1.16    {d28, d29}, [DST_W, :128]!
 .endm
 
 generate_composite_function \
     pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
@@ -1946,17 +2290,17 @@ generate_composite_function \
     vraddhn.u16 d1, q15, q9
     vraddhn.u16 d2, q12, q10
     /* 32bpp result is in {d0, d1, d2, XX} */
     convert_8888_to_0565 d2, d1, d0, q14, q15, q3
 .endm
 
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
-    vld1.8     {d15}, [SRC]!
+    fetch_src_pixblock
     pixman_composite_out_reverse_8_0565_process_pixblock_tail
     vld1.16    {d10, d11}, [DST_R, :128]!
     cache_preload 8, 8
     pixman_composite_out_reverse_8_0565_process_pixblock_head
     vst1.16    {d28, d29}, [DST_W, :128]!
 .endm
 
 generate_composite_function \
@@ -1968,8 +2312,407 @@ generate_composite_function \
     default_cleanup_need_all_regs, \
     pixman_composite_out_reverse_8_0565_process_pixblock_head, \
     pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
     pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
     28, /* dst_w_basereg */ \
     10, /* dst_r_basereg */ \
     15, /* src_basereg   */ \
     0   /* mask_basereg  */
+
+/******************************************************************************/
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_over_8888_8888_process_pixblock_head, \
+    pixman_composite_over_8888_8888_process_pixblock_tail, \
+    pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_over_8888_0565_process_pixblock_head, \
+    pixman_composite_over_8888_0565_process_pixblock_tail, \
+    pixman_composite_over_8888_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    24  /* mask_basereg  */
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \
+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_src_8888_0565_process_pixblock_head, \
+    pixman_composite_src_8888_0565_process_pixblock_tail, \
+    pixman_composite_src_8888_0565_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_src_0565_8888_process_pixblock_head, \
+    pixman_composite_src_0565_8888_process_pixblock_tail, \
+    pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
+    pixman_composite_over_8888_8_0565_process_pixblock_head, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    24  /* mask_basereg  */
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \
+    FLAG_DST_READWRITE, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
+    pixman_composite_over_0565_8_0565_process_pixblock_head, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    10,  /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    15  /* mask_basereg  */
+
+/******************************************************************************/
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+    .func fname
+    .global fname
+#ifdef __ELF__
+    .hidden fname
+    .type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * Bilinear scaling support code which tries to provide pixel fetching, color
+ * format conversion, and interpolation as separate macros which can be used
+ * as the basic building blocks for constructing bilinear scanline functions.
+ */
+
+.macro bilinear_load_8888 reg1, reg2, tmp
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #2
+    add       TMP2, BOTTOM, TMP2, asl #2
+    vld1.32   {reg1}, [TMP1]
+    vld1.32   {reg2}, [TMP2]
+.endm
+
+.macro bilinear_load_0565 reg1, reg2, tmp
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #1
+    add       TMP2, BOTTOM, TMP2, asl #1
+    vld1.32   {reg2[0]}, [TMP1]
+    vld1.32   {reg2[1]}, [TMP2]
+    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_two_8888 \
+                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
+
+    bilinear_load_8888 reg1, reg2, tmp1
+    vmull.u8  acc1, reg1, d28
+    vmlal.u8  acc1, reg2, d29
+    bilinear_load_8888 reg3, reg4, tmp2
+    vmull.u8  acc2, reg3, d28
+    vmlal.u8  acc2, reg4, d29
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_four_8888 \
+                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+
+    bilinear_load_and_vertical_interpolate_two_8888 \
+                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
+    bilinear_load_and_vertical_interpolate_two_8888 \
+                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_two_0565 \
+                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
+
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    mov       TMP4, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #1
+    add       TMP2, BOTTOM, TMP2, asl #1
+    add       TMP3, TOP, TMP4, asl #1
+    add       TMP4, BOTTOM, TMP4, asl #1
+    vld1.32   {acc2lo[0]}, [TMP1]
+    vld1.32   {acc2hi[0]}, [TMP3]
+    vld1.32   {acc2lo[1]}, [TMP2]
+    vld1.32   {acc2hi[1]}, [TMP4]
+    convert_0565_to_x888 acc2, reg3, reg2, reg1
+    vzip.u8   reg1, reg3
+    vzip.u8   reg2, reg4
+    vzip.u8   reg3, reg4
+    vzip.u8   reg1, reg2
+    vmull.u8  acc1, reg1, d28
+    vmlal.u8  acc1, reg2, d29
+    vmull.u8  acc2, reg3, d28
+    vmlal.u8  acc2, reg4, d29
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_four_0565 \
+                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    mov       TMP4, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #1
+    add       TMP2, BOTTOM, TMP2, asl #1
+    add       TMP3, TOP, TMP4, asl #1
+    add       TMP4, BOTTOM, TMP4, asl #1
+    vld1.32   {xacc2lo[0]}, [TMP1]
+    vld1.32   {xacc2hi[0]}, [TMP3]
+    vld1.32   {xacc2lo[1]}, [TMP2]
+    vld1.32   {xacc2hi[1]}, [TMP4]
+    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    mov       TMP4, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #1
+    add       TMP2, BOTTOM, TMP2, asl #1
+    add       TMP3, TOP, TMP4, asl #1
+    add       TMP4, BOTTOM, TMP4, asl #1
+    vld1.32   {yacc2lo[0]}, [TMP1]
+    vzip.u8   xreg1, xreg3
+    vld1.32   {yacc2hi[0]}, [TMP3]
+    vzip.u8   xreg2, xreg4
+    vld1.32   {yacc2lo[1]}, [TMP2]
+    vzip.u8   xreg3, xreg4
+    vld1.32   {yacc2hi[1]}, [TMP4]
+    vzip.u8   xreg1, xreg2
+    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
+    vmull.u8  xacc1, xreg1, d28
+    vzip.u8   yreg1, yreg3
+    vmlal.u8  xacc1, xreg2, d29
+    vzip.u8   yreg2, yreg4
+    vmull.u8  xacc2, xreg3, d28
+    vzip.u8   yreg3, yreg4
+    vmlal.u8  xacc2, xreg4, d29
+    vzip.u8   yreg1, yreg2
+    vmull.u8  yacc1, yreg1, d28
+    vmlal.u8  yacc1, yreg2, d29
+    vmull.u8  yacc2, yreg3, d28
+    vmlal.u8  yacc2, yreg4, d29
+.endm
+
+.macro bilinear_store_8888 numpix, tmp1, tmp2
+.if numpix == 4
+    vst1.32   {d0, d1}, [OUT]!
+.elseif numpix == 2
+    vst1.32   {d0}, [OUT]!
+.elseif numpix == 1
+    vst1.32   {d0[0]}, [OUT, :32]!
+.else
+    .error bilinear_store_8888 numpix is unsupported
+.endif
+.endm
+
+.macro bilinear_store_0565 numpix, tmp1, tmp2
+    vuzp.u8 d0, d1
+    vuzp.u8 d2, d3
+    vuzp.u8 d1, d3
+    vuzp.u8 d0, d2
+    convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
+.if numpix == 4
+    vst1.16   {d2}, [OUT]!
+.elseif numpix == 2
+    vst1.32   {d2[0]}, [OUT]!
+.elseif numpix == 1
+    vst1.16   {d2[0]}, [OUT]!
+.else
+    .error bilinear_store_0565 numpix is unsupported
+.endif
+.endm
+
+.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
+    bilinear_load_&src_fmt d0, d1, d2
+    vmull.u8  q1, d0, d28
+    vmlal.u8  q1, d1, d29
+    vshr.u16  d30, d24, #8
+    /* 4 cycles bubble */
+    vshll.u16 q0, d2, #8
+    vmlsl.u16 q0, d2, d30
+    vmlal.u16 q0, d3, d30
+    /* 5 cycles bubble */
+    vshrn.u32 d0, q0, #16
+    /* 3 cycles bubble */
+    vmovn.u16 d0, q0
+    /* 1 cycle bubble */
+    bilinear_store_&dst_fmt 1, q2, q3
+.endm
+
+.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
+    bilinear_load_and_vertical_interpolate_two_&src_fmt \
+                q1, q11, d0, d1, d20, d21, d22, d23
+    vshr.u16  q15, q12, #8
+    vadd.u16  q12, q12, q13
+    vshll.u16 q0, d2, #8
+    vmlsl.u16 q0, d2, d30
+    vmlal.u16 q0, d3, d30
+    vshll.u16 q10, d22, #8
+    vmlsl.u16 q10, d22, d31
+    vmlal.u16 q10, d23, d31
+    vshrn.u32 d30, q0, #16
+    vshrn.u32 d31, q10, #16
+    vmovn.u16 d0, q15
+    bilinear_store_&dst_fmt 2, q2, q3
+.endm
+
+.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
+    bilinear_load_and_vertical_interpolate_four_&src_fmt \
+                q1, q11, d0, d1, d20, d21, d22, d23 \
+                q3, q9,  d4, d5, d16, d17, d18, d19
+    pld       [TMP1, PF_OFFS]
+    vshr.u16  q15, q12, #8
+    vadd.u16  q12, q12, q13
+    vshll.u16 q0, d2, #8
+    vmlsl.u16 q0, d2, d30
+    vmlal.u16 q0, d3, d30
+    vshll.u16 q10, d22, #8
+    vmlsl.u16 q10, d22, d31
+    vmlal.u16 q10, d23, d31
+    vshr.u16  q15, q12, #8
+    vshll.u16 q2, d6, #8
+    vmlsl.u16 q2, d6, d30
+    vmlal.u16 q2, d7, d30
+    vshll.u16 q8, d18, #8
+    pld       [TMP2, PF_OFFS]
+    vmlsl.u16 q8, d18, d31
+    vmlal.u16 q8, d19, d31
+    vadd.u16  q12, q12, q13
+    vshrn.u32 d0, q0, #16
+    vshrn.u32 d1, q10, #16
+    vshrn.u32 d4, q2, #16
+    vshrn.u32 d5, q8, #16
+    vmovn.u16 d0, q0
+    vmovn.u16 d1, q2
+    bilinear_store_&dst_fmt 4, q2, q3
+.endm
+
+/*
+ * Main template macro for generating NEON optimized bilinear scanline
+ * functions.
+ *
+ * TODO: use software pipelining and aligned writes to the destination buffer
+ *       in order to improve performance
+ *
+ * Bilinear scanline scaler macro template uses the following arguments:
+ *  fname             - name of the function to generate
+ *  src_fmt           - source color format (8888 or 0565)
+ *  dst_fmt           - destination color format (8888 or 0565)
+ *  bpp_shift         - (1 << bpp_shift) is the size of source pixel in bytes
+ *  prefetch_distance - prefetch in the source image by that many
+ *                      pixels ahead
+ */
+
+.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
+                                       bpp_shift, prefetch_distance
+
+pixman_asm_function fname
+    OUT       .req      r0
+    TOP       .req      r1
+    BOTTOM    .req      r2
+    WT        .req      r3
+    WB        .req      r4
+    X         .req      r5
+    UX        .req      r6
+    WIDTH     .req      ip
+    TMP1      .req      r3
+    TMP2      .req      r4
+    PF_OFFS   .req      r7
+    TMP3      .req      r8
+    TMP4      .req      r9
+
+    mov       ip, sp
+    push      {r4, r5, r6, r7, r8, r9}
+    mov       PF_OFFS, #prefetch_distance
+    ldmia     ip, {WB, X, UX, WIDTH}
+    mul       PF_OFFS, PF_OFFS, UX
+
+    cmp       WIDTH, #0
+    ble       3f
+
+    vdup.u16  q12, X
+    vdup.u16  q13, UX
+    vdup.u8   d28, WT
+    vdup.u8   d29, WB
+    vadd.u16  d25, d25, d26
+    vadd.u16  q13, q13, q13
+
+    subs      WIDTH, WIDTH, #4
+    blt       1f
+    mov       PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
+0:
+    bilinear_interpolate_four_pixels src_fmt, dst_fmt
+    subs      WIDTH, WIDTH, #4
+    bge       0b
+1:
+    tst       WIDTH, #2
+    beq       2f
+    bilinear_interpolate_two_pixels src_fmt, dst_fmt
+2:
+    tst       WIDTH, #1
+    beq       3f
+    bilinear_interpolate_last_pixel src_fmt, dst_fmt
+3:
+    pop       {r4, r5, r6, r7, r8, r9}
+    bx        lr
+
+    .unreq    OUT
+    .unreq    TOP
+    .unreq    BOTTOM
+    .unreq    WT
+    .unreq    WB
+    .unreq    X
+    .unreq    UX
+    .unreq    WIDTH
+    .unreq    TMP1
+    .unreq    TMP2
+    .unreq    PF_OFFS
+    .unreq    TMP3
+    .unreq    TMP4
+.endfunc
+
+.endm
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
@@ -200,16 +200,131 @@
 .macro pixst_a numpix, bpp, basereg, mem_operand
 .if (bpp * numpix) <= 128
     pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
 .else
     pixst numpix, bpp, basereg, mem_operand, 128
 .endif
 .endm
 
+/*
+ * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register
+ * aliases to be defined)
+ */
+.macro pixld1_s elem_size, reg1, mem_operand
+.if elem_size == 16
+    mov     TMP1, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP1, mem_operand, TMP1, asl #1
+    mov     TMP2, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP2, mem_operand, TMP2, asl #1
+    vld1.16 {d&reg1&[0]}, [TMP1, :16]
+    mov     TMP1, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP1, mem_operand, TMP1, asl #1
+    vld1.16 {d&reg1&[1]}, [TMP2, :16]
+    mov     TMP2, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP2, mem_operand, TMP2, asl #1
+    vld1.16 {d&reg1&[2]}, [TMP1, :16]
+    vld1.16 {d&reg1&[3]}, [TMP2, :16]
+.elseif elem_size == 32
+    mov     TMP1, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP1, mem_operand, TMP1, asl #2
+    mov     TMP2, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP2, mem_operand, TMP2, asl #2
+    vld1.32 {d&reg1&[0]}, [TMP1, :32]
+    vld1.32 {d&reg1&[1]}, [TMP2, :32]
+.else
+    .error "unsupported"
+.endif
+.endm
+
+.macro pixld2_s elem_size, reg1, reg2, mem_operand
+.if elem_size == 32
+    mov     TMP1, VX, asr #16
+    add     VX, VX, UNIT_X, asl #1
+    add     TMP1, mem_operand, TMP1, asl #2
+    mov     TMP2, VX, asr #16
+    sub     VX, VX, UNIT_X
+    add     TMP2, mem_operand, TMP2, asl #2
+    vld1.32 {d&reg1&[0]}, [TMP1, :32]
+    mov     TMP1, VX, asr #16
+    add     VX, VX, UNIT_X, asl #1
+    add     TMP1, mem_operand, TMP1, asl #2
+    vld1.32 {d&reg2&[0]}, [TMP2, :32]
+    mov     TMP2, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP2, mem_operand, TMP2, asl #2
+    vld1.32 {d&reg1&[1]}, [TMP1, :32]
+    vld1.32 {d&reg2&[1]}, [TMP2, :32]
+.else
+    pixld1_s elem_size, reg1, mem_operand
+    pixld1_s elem_size, reg2, mem_operand
+.endif
+.endm
+
+.macro pixld0_s elem_size, reg1, idx, mem_operand
+.if elem_size == 16
+    mov     TMP1, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP1, mem_operand, TMP1, asl #1
+    vld1.16 {d&reg1&[idx]}, [TMP1, :16]
+.elseif elem_size == 32
+    mov     TMP1, VX, asr #16
+    add     VX, VX, UNIT_X
+    add     TMP1, mem_operand, TMP1, asl #2
+    vld1.32 {d&reg1&[idx]}, [TMP1, :32]
+.endif
+.endm
+
+.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand
+.if numbytes == 32
+    pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand
+    pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand
+    pixdeinterleave elem_size, %(basereg+4)
+.elseif numbytes == 16
+    pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand
+.elseif numbytes == 8
+    pixld1_s elem_size, %(basereg+1), mem_operand
+.elseif numbytes == 4
+    .if elem_size == 32
+        pixld0_s elem_size, %(basereg+0), 1, mem_operand
+    .elseif elem_size == 16
+        pixld0_s elem_size, %(basereg+0), 2, mem_operand
+        pixld0_s elem_size, %(basereg+0), 3, mem_operand
+    .else
+        pixld0_s elem_size, %(basereg+0), 4, mem_operand
+        pixld0_s elem_size, %(basereg+0), 5, mem_operand
+        pixld0_s elem_size, %(basereg+0), 6, mem_operand
+        pixld0_s elem_size, %(basereg+0), 7, mem_operand
+    .endif
+.elseif numbytes == 2
+    .if elem_size == 16
+        pixld0_s elem_size, %(basereg+0), 1, mem_operand
+    .else
+        pixld0_s elem_size, %(basereg+0), 2, mem_operand
+        pixld0_s elem_size, %(basereg+0), 3, mem_operand
+    .endif
+.elseif numbytes == 1
+    pixld0_s elem_size, %(basereg+0), 1, mem_operand
+.else
+    .error "unsupported size: numbytes"
+.endif
+.endm
+
+.macro pixld_s numpix, bpp, basereg, mem_operand
+.if bpp > 0
+    pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand
+.endif
+.endm
+
 .macro vuzp8 reg1, reg2
     vuzp.8 d&reg1, d&reg2
 .endm
 
 .macro vzip8 reg1, reg2
     vzip.8 d&reg1, d&reg2
 .endm
 
@@ -311,16 +426,21 @@
     pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
 .endif
 .if mask_bpp > 0
     pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
 .endif
 .endif
 .endm
 
+.macro fetch_mask_pixblock
+    pixld       pixblock_size, mask_bpp, \
+                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+.endm
+
 /*
  * Macro which is used to process leading pixels until destination
  * pointer is properly aligned (at 16 bytes boundary). When destination
  * buffer uses 16bpp format, this is unnecessary, or even pointless.
  */
 .macro ensure_destination_ptr_alignment process_pixblock_head, \
                                         process_pixblock_tail, \
                                         process_pixblock_tail_head
@@ -330,17 +450,17 @@
 
 .irp lowbit, 1, 2, 4, 8, 16
 local skip1
 .if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
 .if lowbit < 16 /* we don't need more than 16-byte alignment */
     tst         DST_R, #lowbit
     beq         1f
 .endif
-    pixld       (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
+    pixld_src   (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
     pixld       (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
 .if dst_r_bpp > 0
     pixld_a     (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
 .else
     add         DST_R, DST_R, #lowbit
 .endif
     PF add      PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
     sub         W, W, #(lowbit * 8 / dst_w_bpp)
@@ -392,17 +512,17 @@ 2:
                                process_pixblock_tail, \
                                process_pixblock_tail_head
     tst         W, #(pixblock_size - 1)
     beq         2f
 .irp chunk_size, 16, 8, 4, 2, 1
 .if pixblock_size > chunk_size
     tst         W, #chunk_size
     beq         1f
-    pixld       chunk_size, src_bpp, src_basereg, SRC
+    pixld_src   chunk_size, src_bpp, src_basereg, SRC
     pixld       chunk_size, mask_bpp, mask_basereg, MASK
 .if dst_aligned_flag != 0
     pixld_a     chunk_size, dst_r_bpp, dst_r_basereg, DST_R
 .else
     pixld       chunk_size, dst_r_bpp, dst_r_basereg, DST_R
 .endif
 .if cache_preload_flag != 0
     PF add      PF_X, PF_X, #chunk_size
@@ -526,16 +646,23 @@ fname:
     .set mask_bpp, mask_bpp_
     .set dst_w_bpp, dst_w_bpp_
     .set pixblock_size, pixblock_size_
     .set dst_w_basereg, dst_w_basereg_
     .set dst_r_basereg, dst_r_basereg_
     .set src_basereg, src_basereg_
     .set mask_basereg, mask_basereg_
 
+    .macro pixld_src x:vararg
+        pixld x
+    .endm
+    .macro fetch_src_pixblock
+        pixld_src   pixblock_size, src_bpp, \
+                    (src_basereg - pixblock_size * src_bpp / 64), SRC
+    .endm
 /*
  * Assign symbolic names to registers
  */
     W           .req        r0      /* width (is updated during processing) */
     H           .req        r1      /* height (is updated during processing) */
     DST_W       .req        r2      /* destination buffer pointer for writes */
     DST_STRIDE  .req        r3      /* destination image stride */
     SRC         .req        r4      /* source buffer pointer */
@@ -691,18 +818,17 @@ fname:
 0:
     ensure_destination_ptr_alignment process_pixblock_head, \
                                      process_pixblock_tail, \
                                      process_pixblock_tail_head
 
     /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
     pixld_a     pixblock_size, dst_r_bpp, \
                 (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    pixld       pixblock_size, src_bpp, \
-                (src_basereg - pixblock_size * src_bpp / 64), SRC
+    fetch_src_pixblock
     pixld       pixblock_size, mask_bpp, \
                 (mask_basereg - pixblock_size * mask_bpp / 64), MASK
     PF add      PF_X, PF_X, #pixblock_size
     process_pixblock_head
     cache_preload 0, pixblock_size
     cache_preload_simple
     subs        W, W, #(pixblock_size * 2)
     blt         2f
@@ -734,18 +860,17 @@ 2:
  * nor prefetch are used.
  */
 8:
     /* Process exactly pixblock_size pixels if needed */
     tst         W, #pixblock_size
     beq         1f
     pixld       pixblock_size, dst_r_bpp, \
                 (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    pixld       pixblock_size, src_bpp, \
-                (src_basereg - pixblock_size * src_bpp / 64), SRC
+    fetch_src_pixblock
     pixld       pixblock_size, mask_bpp, \
                 (mask_basereg - pixblock_size * mask_bpp / 64), MASK
     process_pixblock_head
     process_pixblock_tail
     pixst       pixblock_size, dst_w_bpp, \
                 (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
 1:
     /* Process the remaining trailing pixels in the scanline */
@@ -756,16 +881,19 @@ 1:
     advance_to_next_scanline 8b
 9:
 .if regs_shortage
     pop         {r0, r1}
 .endif
     cleanup
     pop         {r4-r12, pc}  /* exit */
 
+    .purgem     fetch_src_pixblock
+    .purgem     pixld_src
+
     .unreq      SRC
     .unreq      MASK
     .unreq      DST_R
     .unreq      DST_W
     .unreq      ORIG_W
     .unreq      W
     .unreq      H
     .unreq      SRC_STRIDE
@@ -779,17 +907,18 @@ 9:
     .unreq      DUMMY
     .endfunc
 .endm
 
 /*
  * A simplified variant of function generation template for a single
  * scanline processing (for implementing pixman combine functions)
  */
-.macro generate_composite_function_single_scanline fname, \
+.macro generate_composite_function_scanline        use_nearest_scaling, \
+                                                   fname, \
                                                    src_bpp_, \
                                                    mask_bpp_, \
                                                    dst_w_bpp_, \
                                                    flags, \
                                                    pixblock_size_, \
                                                    init, \
                                                    cleanup, \
                                                    process_pixblock_head, \
@@ -816,54 +945,88 @@ fname:
     .set src_bpp, src_bpp_
     .set mask_bpp, mask_bpp_
     .set dst_w_bpp, dst_w_bpp_
     .set pixblock_size, pixblock_size_
     .set dst_w_basereg, dst_w_basereg_
     .set dst_r_basereg, dst_r_basereg_
     .set src_basereg, src_basereg_
     .set mask_basereg, mask_basereg_
-/*
- * Assign symbolic names to registers
- */
+
+.if use_nearest_scaling != 0
+    /*
+     * Assign symbolic names to registers for nearest scaling
+     */
+    W           .req        r0
+    DST_W       .req        r1
+    SRC         .req        r2
+    VX          .req        r3
+    UNIT_X      .req        ip
+    MASK        .req        lr
+    TMP1        .req        r4
+    TMP2        .req        r5
+    DST_R       .req        r6
+
+    .macro pixld_src x:vararg
+        pixld_s x
+    .endm
+
+    ldr         UNIT_X, [sp]
+    push        {r4-r6, lr}
+    .if mask_bpp != 0
+    ldr         MASK, [sp, #(16 + 4)]
+    .endif
+.else
+    /*
+     * Assign symbolic names to registers
+     */
     W           .req        r0      /* width (is updated during processing) */
     DST_W       .req        r1      /* destination buffer pointer for writes */
     SRC         .req        r2      /* source buffer pointer */
     DST_R       .req        ip      /* destination buffer pointer for reads */
     MASK        .req        r3      /* mask pointer */
 
+    .macro pixld_src x:vararg
+        pixld x
+    .endm
+.endif
+
 .if (((flags) & FLAG_DST_READWRITE) != 0)
     .set dst_r_bpp, dst_w_bpp
 .else
     .set dst_r_bpp, 0
 .endif
 .if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
     .set DEINTERLEAVE_32BPP_ENABLED, 1
 .else
     .set DEINTERLEAVE_32BPP_ENABLED, 0
 .endif
 
+    .macro fetch_src_pixblock
+        pixld_src   pixblock_size, src_bpp, \
+                    (src_basereg - pixblock_size * src_bpp / 64), SRC
+    .endm
+
     init
     mov         DST_R, DST_W
 
     cmp         W, #pixblock_size
     blt         8f
 
     ensure_destination_ptr_alignment process_pixblock_head, \
                                      process_pixblock_tail, \
                                      process_pixblock_tail_head
 
     subs        W, W, #pixblock_size
     blt         7f
 
     /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
     pixld_a     pixblock_size, dst_r_bpp, \
                 (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    pixld       pixblock_size, src_bpp, \
-                (src_basereg - pixblock_size * src_bpp / 64), SRC
+    fetch_src_pixblock
     pixld       pixblock_size, mask_bpp, \
                 (mask_basereg - pixblock_size * mask_bpp / 64), MASK
     process_pixblock_head
     subs        W, W, #pixblock_size
     blt         2f
 1:
     process_pixblock_tail_head
     subs        W, W, #pixblock_size
@@ -875,35 +1038,67 @@ 2:
 7:
     /* Process the remaining trailing pixels in the scanline (dst aligned) */
     process_trailing_pixels 0, 1, \
                             process_pixblock_head, \
                             process_pixblock_tail, \
                             process_pixblock_tail_head
 
     cleanup
-    bx         lr  /* exit */
+.if use_nearest_scaling != 0
+    pop         {r4-r6, pc}  /* exit */
+.else
+    bx          lr  /* exit */
+.endif
 8:
     /* Process the remaining trailing pixels in the scanline (dst unaligned) */
     process_trailing_pixels 0, 0, \
                             process_pixblock_head, \
                             process_pixblock_tail, \
                             process_pixblock_tail_head
 
     cleanup
+
+.if use_nearest_scaling != 0
+    pop         {r4-r6, pc}  /* exit */
+
+    .unreq      DST_R
+    .unreq      SRC
+    .unreq      W
+    .unreq      VX
+    .unreq      UNIT_X
+    .unreq      TMP1
+    .unreq      TMP2
+    .unreq      DST_W
+    .unreq      MASK
+
+.else
     bx          lr  /* exit */
 
     .unreq      SRC
     .unreq      MASK
     .unreq      DST_R
     .unreq      DST_W
     .unreq      W
+.endif
+
+    .purgem     fetch_src_pixblock
+    .purgem     pixld_src
+
     .endfunc
 .endm
 
+.macro generate_composite_function_single_scanline x:vararg
+    generate_composite_function_scanline 0, x
+.endm
+
+.macro generate_composite_function_nearest_scanline x:vararg
+    generate_composite_function_scanline 1, x
+.endm
+
 /* Default prologue/epilogue, nothing special needs to be done */
 
 .macro default_init
 .endm
 
 .macro default_cleanup
 .endm
 
@@ -958,8 +1153,25 @@ 8:
  */
 .macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
     vshll.u8    tmp1, in_g, #8
     vshll.u8    out, in_r, #8
     vshll.u8    tmp2, in_b, #8
     vsri.u16    out, tmp1, #5
     vsri.u16    out, tmp2, #11
 .endm
+
+/*
+ * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
+ * returned in (out0, out1) registers pair. Requires one temporary
+ * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
+ * value from 'in' is lost
+ */
+.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
+    vshl.u16    out0, in,   #5  /* G top 6 bits */
+    vshl.u16    tmp,  in,   #11 /* B top 5 bits */
+    vsri.u16    in,   in,   #5  /* R is ready in top bits */
+    vsri.u16    out0, out0, #6  /* G is ready in top bits */
+    vsri.u16    tmp,  tmp,  #5  /* B is ready in top bits */
+    vshr.u16    out1, in,   #8  /* R is in place */
+    vsri.u16    out0, tmp,  #8  /* G & B is in place */
+    vzip.u16    out0, out1      /* everything is in place */
+.endm
--- a/gfx/cairo/libpixman/src/pixman-arm-neon.c
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c
@@ -47,61 +47,100 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon,
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
                                    uint16_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
                                    uint8_t, 3, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
                                    uint8_t, 3, uint16_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
                                    uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
+                                   uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
                                    uint8_t, 1, uint8_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
                                    uint32_t, 1, uint16_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
                                    uint8_t, 1, uint16_t, 1)
 
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
                                  uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
                                  uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
                                  uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
+                                 uint8_t, 1)
 
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
                                       uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
                                       uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
                                       uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
+                                      uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
                                       uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
 
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
+                                     uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
+                                     uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
+                                     uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
                                      uint32_t, 1, uint32_t, 1)
 
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
+                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
                                         uint32_t, 1, uint8_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
                                         uint32_t, 1, uint8_t, 1, uint16_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
 
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
+                                        uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
+                                        uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
+                                        uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
+                                        uint16_t, uint32_t)
+
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+                                           OVER, uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+                                           OVER, uint16_t, uint16_t)
+
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+                                         uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+                                         uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
+                                         uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
+                                         uint16_t, uint16_t)
+
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
                                    int32_t   h,
                                    uint8_t  *dst,
                                    int32_t   dst_stride,
                                    uint8_t   src);
 
 void
@@ -221,31 +260,39 @@ static const pixman_fast_path_t arm_neon
     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     a8b8g8r8, neon_composite_src_8888_8888),
     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
     PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8b8g8r8, neon_composite_src_rpixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8r8g8b8, neon_composite_src_rpixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8b8g8r8, neon_composite_src_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       neon_composite_over_n_8_8),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, neon_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, neon_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, neon_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   neon_composite_over_0565_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   neon_composite_over_0565_n_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   neon_composite_over_0565_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   neon_composite_over_0565_8_0565),
@@ -254,28 +301,72 @@ static const pixman_fast_path_t arm_neon
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, neon_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, neon_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, neon_composite_add_n_8_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
     PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
     PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, neon_composite_add_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, neon_composite_add_8888_n_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (IN,   solid,    null,     a8,       neon_composite_in_n_8),
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, r5g6b5,   neon_composite_out_reverse_8_0565),
     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, b5g6r5,   neon_composite_out_reverse_8_0565),
 
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
+    /* Note: NONE repeat is not supported yet */
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
+
     { PIXMAN_OP_NONE },
 };
 
 static pixman_bool_t
 arm_neon_blt (pixman_implementation_t *imp,
               uint32_t *               src_bits,
               uint32_t *               dst_bits,
               int                      src_stride,
@@ -348,23 +439,18 @@ neon_combine_##name##_u (pixman_implemen
 	pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
 }
 
 BIND_COMBINE_U (over)
 BIND_COMBINE_U (add)
 BIND_COMBINE_U (out_reverse)
 
 pixman_implementation_t *
-_pixman_implementation_create_arm_neon (void)
+_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
 {
-#ifdef USE_ARM_SIMD
-    pixman_implementation_t *fallback = _pixman_implementation_create_arm_simd ();
-#else
-    pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
-#endif
     pixman_implementation_t *imp =
 	_pixman_implementation_create (fallback, arm_neon_fast_paths);
 
     imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
     imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
     imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
 
     imp->blt = arm_neon_blt;
--- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S
@@ -1,10 +1,11 @@
 /*
  * Copyright © 2008 Mozilla Corporation
+ * Copyright © 2010 Nokia Corporation
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
  * documentation for any purpose is hereby granted without fee, provided that
  * the above copyright notice appear in all copies and that both that
  * copyright notice and this permission notice appear in supporting
  * documentation, and that the name of Mozilla Corporation not be used in
  * advertising or publicity pertaining to distribution of the software without
  * specific, written prior permission.  Mozilla Corporation makes no
@@ -323,8 +324,115 @@ 3:	ldr	r4, [sp, #12]
 	cmp	r0, r4
 	ldr	r12, [sp, #8]
 	ldr	r2, [sp]
 	bne	5b
 0:	add	sp, sp, #28
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
 	bx	lr
 .endfunc
+
+/*
+ * Note: This code is only using armv5te instructions (not even armv6),
+ *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
+ *       be split into a few variants, tuned for each microarchitecture.
+ *
+ * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
+ * have efficient write combining), it needs to be changed to use 16-byte
+ * aligned writes using STM instruction.
+ *
+ * Nearest scanline scaler macro template uses the following arguments:
+ *  fname                     - name of the function to generate
+ *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
+ *  t                         - type suffix for LDR/STR instructions
+ *  prefetch_distance         - prefetch in the source image by that many
+ *                              pixels ahead
+ *  prefetch_braking_distance - stop prefetching when that many pixels are
+ *                              remaining before the end of scanline
+ */
+
+.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
+                                      prefetch_distance,        \
+                                      prefetch_braking_distance
+
+pixman_asm_function fname
+	W	.req	r0
+	DST	.req	r1
+	SRC	.req	r2
+	VX	.req	r3
+	UNIT_X	.req	ip
+	TMP1	.req	r4
+	TMP2	.req	r5
+	VXMASK	.req	r6
+	PF_OFFS	.req	r7
+
+	ldr	UNIT_X, [sp]
+	push	{r4, r5, r6, r7}
+	mvn	VXMASK, #((1 << bpp_shift) - 1)
+
+	/* define helper macro */
+	.macro	scale_2_pixels
+		ldr&t	TMP1, [SRC, TMP1]
+		and	TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
+		add	VX, VX, UNIT_X
+		str&t	TMP1, [DST], #(1 << bpp_shift)
+
+		ldr&t	TMP2, [SRC, TMP2]
+		and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+		add	VX, VX, UNIT_X
+		str&t	TMP2, [DST], #(1 << bpp_shift)
+	.endm
+
+	/* now do the scaling */
+	and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+	add	VX, VX, UNIT_X
+	subs	W, W, #(8 + prefetch_braking_distance)
+	blt	2f
+	/* calculate prefetch offset */
+	mov	PF_OFFS, #prefetch_distance
+	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
+1:	/* main loop, process 8 pixels per iteration with prefetch */
+	subs	W, W, #8
+	add	PF_OFFS, UNIT_X, lsl #3
+	scale_2_pixels
+	scale_2_pixels
+	scale_2_pixels
+	scale_2_pixels
+	pld	[SRC, PF_OFFS, lsr #(16 - bpp_shift)]
+	bge	1b
+2:
+	subs	W, W, #(4 - 8 - prefetch_braking_distance)
+	blt	2f
+1:	/* process the remaining pixels */
+	scale_2_pixels
+	scale_2_pixels
+	subs	W, W, #4
+	bge	1b
+2:
+	tst	W, #2
+	beq	2f
+	scale_2_pixels
+2:
+	tst	W, #1
+	ldrne&t	TMP1, [SRC, TMP1]
+	strne&t	TMP1, [DST]
+	/* cleanup helper macro */
+	.purgem	scale_2_pixels
+	.unreq	DST
+	.unreq	SRC
+	.unreq	W
+	.unreq	VX
+	.unreq	UNIT_X
+	.unreq	TMP1
+	.unreq	TMP2
+	.unreq	VXMASK
+	.unreq	PF_OFFS
+	/* return */
+	pop	{r4, r5, r6, r7}
+	bx	lr
+.endfunc
+.endm
+
+generate_nearest_scanline_func \
+    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
+
+generate_nearest_scanline_func \
+    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
--- a/gfx/cairo/libpixman/src/pixman-arm-simd.c
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd.c
@@ -24,16 +24,17 @@
  *
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include "pixman-private.h"
 #include "pixman-arm-common.h"
+#include "pixman-fast-path.h"
 
 #if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
 
 void
 pixman_composite_add_8_8_asm_armv6 (int32_t  width,
 				    int32_t  height,
 				    uint8_t *dst_line,
 				    int32_t  dst_stride,
@@ -375,22 +376,27 @@ pixman_composite_over_n_8_8888_asm_armv6
 
 #endif
 
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
                                    uint8_t, 1, uint8_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
                                      uint32_t, 1, uint32_t, 1)
 
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
                                       uint8_t, 1, uint32_t, 1)
 
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
+                                        uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
+                                        uint32_t, uint32_t)
+
 static const pixman_fast_path_t arm_simd_fast_paths[] =
 {
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
@@ -399,19 +405,28 @@ static const pixman_fast_path_t arm_simd
 
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
 
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
 
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
+
     { PIXMAN_OP_NONE },
 };
 
 pixman_implementation_t *
-_pixman_implementation_create_arm_simd (void)
+_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback)
 {
-    pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
-    pixman_implementation_t *imp = _pixman_implementation_create (general, arm_simd_fast_paths);
+    pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths);
 
     return imp;
 }
--- a/gfx/cairo/libpixman/src/pixman-bits-image.c
+++ b/gfx/cairo/libpixman/src/pixman-bits-image.c
@@ -30,53 +30,51 @@
 #include <config.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "pixman-private.h"
 #include "pixman-combine32.h"
 
-/* Store functions */
-void
-_pixman_image_store_scanline_32 (bits_image_t *  image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 const uint32_t *buffer)
+/*
+ * By default, just evaluate the image at 32bpp and expand.  Individual image
+ * types can plug in a better scanline getter if they want to. For example
+ * we  could produce smoother gradients by evaluating them at higher color
+ * depth, but that's a project for the future.
+ */
+static void
+_pixman_image_get_scanline_generic_64 (pixman_image_t * image,
+                                       int              x,
+                                       int              y,
+                                       int              width,
+                                       uint32_t *       buffer,
+                                       const uint32_t * mask)
 {
-    image->store_scanline_32 (image, x, y, width, buffer);
+    uint32_t *mask8 = NULL;
 
-    if (image->common.alpha_map)
+    /* Contract the mask image, if one exists, so that the 32-bit fetch
+     * function can use it.
+     */
+    if (mask)
     {
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
+	mask8 = pixman_malloc_ab (width, sizeof(uint32_t));
+	if (!mask8)
+	    return;
 
-	image->common.alpha_map->store_scanline_32 (
-	    image->common.alpha_map, x, y, width, buffer);
+	pixman_contract (mask8, (uint64_t *)mask, width);
     }
-}
+
+    /* Fetch the source image into the first half of buffer. */
+    image->bits.get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8);
 
-void
-_pixman_image_store_scanline_64 (bits_image_t *  image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 const uint32_t *buffer)
-{
-    image->store_scanline_64 (image, x, y, width, buffer);
+    /* Expand from 32bpp to 64bpp in place. */
+    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width);
 
-    if (image->common.alpha_map)
-    {
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
-
-	image->common.alpha_map->store_scanline_64 (
-	    image->common.alpha_map, x, y, width, buffer);
-    }
+    free (mask8);
 }
 
 /* Fetch functions */
 
 static force_inline uint32_t
 fetch_pixel_no_alpha (bits_image_t *image,
 		      int x, int y, pixman_bool_t check_bounds)
 {
@@ -292,16 +290,17 @@ bits_image_fetch_bilinear_no_repeat_8888
     pixman_fixed_t x_top, x_bottom, x;
     pixman_fixed_t ux_top, ux_bottom, ux;
     pixman_vector_t v;
     uint32_t top_mask, bottom_mask;
     uint32_t *top_row;
     uint32_t *bottom_row;
     uint32_t *end;
     uint32_t zero[2] = { 0, 0 };
+    uint32_t one = 1;
     int y, y1, y2;
     int disty;
     int mask_inc;
     int w;
 
     /* reference point is the center of the pixel */
     v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
     v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
@@ -357,20 +356,18 @@ bits_image_fetch_bilinear_no_repeat_8888
     }
 
     /* Instead of checking whether the operation uses the mast in
      * each loop iteration, verify this only once and prepare the
      * variables to make the code smaller inside the loop.
      */
     if (!mask)
     {
-	uint32_t mask_bits = 1;
-
         mask_inc = 0;
-        mask = &mask_bits;
+        mask = &one;
     }
     else
     {
         /* If have a mask, prepare the variables to check it */
         mask_inc = 1;
     }
 
     /* If both are zero, then the whole thing is zero */
@@ -902,16 +899,87 @@ bits_image_fetch_bilinear_affine (pixman
 	    tl, tr, bl, br, distx, disty);
 
     next:
 	x += ux;
 	y += uy;
     }
 }
 
+static force_inline void
+bits_image_fetch_nearest_affine (pixman_image_t * image,
+				 int              offset,
+				 int              line,
+				 int              width,
+				 uint32_t *       buffer,
+				 const uint32_t * mask,
+				 
+				 convert_pixel_t	convert_pixel,
+				 pixman_format_code_t	format,
+				 pixman_repeat_t	repeat_mode)
+{
+    pixman_fixed_t x, y;
+    pixman_fixed_t ux, uy;
+    pixman_vector_t v;
+    bits_image_t *bits = &image->bits;
+    int i;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    if (!pixman_transform_point_3d (image->common.transform, &v))
+	return;
+
+    ux = image->common.transform->matrix[0][0];
+    uy = image->common.transform->matrix[1][0];
+
+    x = v.vector[0];
+    y = v.vector[1];
+
+    for (i = 0; i < width; ++i)
+    {
+	int width, height, x0, y0;
+	const uint8_t *row;
+
+	if (mask && !mask[i])
+	    goto next;
+	
+	width = image->bits.width;
+	height = image->bits.height;
+	x0 = pixman_fixed_to_int (x - pixman_fixed_e);
+	y0 = pixman_fixed_to_int (y - pixman_fixed_e);
+
+	if (repeat_mode == PIXMAN_REPEAT_NONE &&
+	    (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
+	{
+	    buffer[i] = 0;
+	}
+	else
+	{
+	    uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+
+	    if (repeat_mode != PIXMAN_REPEAT_NONE)
+	    {
+		repeat (repeat_mode, width, &x0);
+		repeat (repeat_mode, height, &y0);
+	    }
+
+	    row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0;
+
+	    buffer[i] = convert_pixel (row, x0) | mask;
+	}
+
+    next:
+	x += ux;
+	y += uy;
+    }
+}
+
 static force_inline uint32_t
 convert_a8r8g8b8 (const uint8_t *row, int x)
 {
     return *(((uint32_t *)row) + x);
 }
 
 static force_inline uint32_t
 convert_x8r8g8b8 (const uint8_t *row, int x)
@@ -935,39 +1003,59 @@ convert_r5g6b5 (const uint8_t *row, int 
     static void								\
     bits_image_fetch_bilinear_affine_ ## name (pixman_image_t *image,	\
 					       int              offset,	\
 					       int              line,	\
 					       int              width,	\
 					       uint32_t *       buffer,	\
 					       const uint32_t * mask)	\
     {									\
-	bits_image_fetch_bilinear_affine (image, offset, line, width, buffer, mask, \
+	bits_image_fetch_bilinear_affine (image, offset, line,		\
+					  width, buffer, mask,		\
 					  convert_ ## format,		\
 					  PIXMAN_ ## format,		\
 					  repeat_mode);			\
-    }									\
-    extern int no_such_variable
+    }
+
+#define MAKE_NEAREST_FETCHER(name, format, repeat_mode)			\
+    static void								\
+    bits_image_fetch_nearest_affine_ ## name (pixman_image_t *image,	\
+					      int              offset,	\
+					      int              line,	\
+					      int              width,	\
+					      uint32_t *       buffer,	\
+					      const uint32_t * mask)	\
+    {									\
+	bits_image_fetch_nearest_affine (image, offset, line,		\
+					 width, buffer, mask,		\
+					 convert_ ## format,		\
+					 PIXMAN_ ## format,		\
+					 repeat_mode);			\
+    }
 
-MAKE_BILINEAR_FETCHER (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD);
-MAKE_BILINEAR_FETCHER (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE);
-MAKE_BILINEAR_FETCHER (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT);
-MAKE_BILINEAR_FETCHER (normal_a8r8g8b8,  a8r8g8b8, PIXMAN_REPEAT_NORMAL);
-MAKE_BILINEAR_FETCHER (pad_x8r8g8b8,     x8r8g8b8, PIXMAN_REPEAT_PAD);
-MAKE_BILINEAR_FETCHER (none_x8r8g8b8,    x8r8g8b8, PIXMAN_REPEAT_NONE);
-MAKE_BILINEAR_FETCHER (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT);
-MAKE_BILINEAR_FETCHER (normal_x8r8g8b8,  x8r8g8b8, PIXMAN_REPEAT_NORMAL);
-MAKE_BILINEAR_FETCHER (pad_a8,           a8,       PIXMAN_REPEAT_PAD);
-MAKE_BILINEAR_FETCHER (none_a8,          a8,       PIXMAN_REPEAT_NONE);
-MAKE_BILINEAR_FETCHER (reflect_a8,	 a8,       PIXMAN_REPEAT_REFLECT);
-MAKE_BILINEAR_FETCHER (normal_a8,	 a8,       PIXMAN_REPEAT_NORMAL);
-MAKE_BILINEAR_FETCHER (pad_r5g6b5,       r5g6b5,   PIXMAN_REPEAT_PAD);
-MAKE_BILINEAR_FETCHER (none_r5g6b5,      r5g6b5,   PIXMAN_REPEAT_NONE);
-MAKE_BILINEAR_FETCHER (reflect_r5g6b5,   r5g6b5,   PIXMAN_REPEAT_REFLECT);
-MAKE_BILINEAR_FETCHER (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL);
+#define MAKE_FETCHERS(name, format, repeat_mode)			\
+    MAKE_NEAREST_FETCHER (name, format, repeat_mode)			\
+    MAKE_BILINEAR_FETCHER (name, format, repeat_mode)
+
+MAKE_FETCHERS (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_a8r8g8b8,  a8r8g8b8, PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_x8r8g8b8,     x8r8g8b8, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_x8r8g8b8,    x8r8g8b8, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_x8r8g8b8,  x8r8g8b8, PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_a8,           a8,       PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_a8,          a8,       PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_a8,	 a8,       PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_a8,	 a8,       PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_r5g6b5,       r5g6b5,   PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_r5g6b5,      r5g6b5,   PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_r5g6b5,   r5g6b5,   PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL)
 
 static void
 bits_image_fetch_solid_32 (pixman_image_t * image,
                            int              x,
                            int              y,
                            int              width,
                            uint32_t *       buffer,
                            const uint32_t * mask)
@@ -1171,39 +1259,57 @@ static const fetcher_info_t fetcher_info
 
 #define GENERAL_BILINEAR_FLAGS						\
     (FAST_PATH_NO_ALPHA_MAP		|				\
      FAST_PATH_NO_ACCESSORS		|				\
      FAST_PATH_HAS_TRANSFORM		|				\
      FAST_PATH_AFFINE_TRANSFORM		|				\
      FAST_PATH_BILINEAR_FILTER)
 
+#define GENERAL_NEAREST_FLAGS						\
+    (FAST_PATH_NO_ALPHA_MAP		|				\
+     FAST_PATH_NO_ACCESSORS		|				\
+     FAST_PATH_HAS_TRANSFORM		|				\
+     FAST_PATH_AFFINE_TRANSFORM		|				\
+     FAST_PATH_NEAREST_FILTER)
+
 #define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
     { PIXMAN_ ## format,						\
       GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
       bits_image_fetch_bilinear_affine_ ## name,			\
       _pixman_image_get_scanline_generic_64				\
     },
 
-    BILINEAR_AFFINE_FAST_PATH (pad_a8r8g8b8, a8r8g8b8, PAD)
-    BILINEAR_AFFINE_FAST_PATH (none_a8r8g8b8, a8r8g8b8, NONE)
-    BILINEAR_AFFINE_FAST_PATH (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
-    BILINEAR_AFFINE_FAST_PATH (normal_a8r8g8b8, a8r8g8b8, NORMAL)
-    BILINEAR_AFFINE_FAST_PATH (pad_x8r8g8b8, x8r8g8b8, PAD)
-    BILINEAR_AFFINE_FAST_PATH (none_x8r8g8b8, x8r8g8b8, NONE)
-    BILINEAR_AFFINE_FAST_PATH (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
-    BILINEAR_AFFINE_FAST_PATH (normal_x8r8g8b8, x8r8g8b8, NORMAL)
-    BILINEAR_AFFINE_FAST_PATH (pad_a8, a8, PAD)
-    BILINEAR_AFFINE_FAST_PATH (none_a8, a8, NONE)
-    BILINEAR_AFFINE_FAST_PATH (reflect_a8, a8, REFLECT)
-    BILINEAR_AFFINE_FAST_PATH (normal_a8, a8, NORMAL)
-    BILINEAR_AFFINE_FAST_PATH (pad_r5g6b5, r5g6b5, PAD)
-    BILINEAR_AFFINE_FAST_PATH (none_r5g6b5, r5g6b5, NONE)
-    BILINEAR_AFFINE_FAST_PATH (reflect_r5g6b5, r5g6b5, REFLECT)
-    BILINEAR_AFFINE_FAST_PATH (normal_r5g6b5, r5g6b5, NORMAL)
+#define NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
+    { PIXMAN_ ## format,						\
+      GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
+      bits_image_fetch_nearest_affine_ ## name,			\
+      _pixman_image_get_scanline_generic_64				\
+    },
+
+#define AFFINE_FAST_PATHS(name, format, repeat)				\
+    BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
+    NEAREST_AFFINE_FAST_PATH(name, format, repeat)
+    
+    AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
+    AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
+    AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
+    AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
+    AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
+    AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
+    AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
+    AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
+    AFFINE_FAST_PATHS (pad_a8, a8, PAD)
+    AFFINE_FAST_PATHS (none_a8, a8, NONE)
+    AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
+    AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
+    AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
+    AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
+    AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
+    AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
 
     /* Affine, no alpha */
     { PIXMAN_any,
       (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
       bits_image_fetch_affine_no_alpha,
       _pixman_image_get_scanline_generic_64
     },
 
@@ -1223,26 +1329,190 @@ bits_image_property_changed (pixman_imag
     _pixman_bits_image_setup_accessors (&image->bits);
 
     info = fetcher_info;
     while (info->format != PIXMAN_null)
     {
 	if ((info->format == format || info->format == PIXMAN_any)	&&
 	    (info->flags & flags) == info->flags)
 	{
-	    image->common.get_scanline_32 = info->fetch_32;
-	    image->common.get_scanline_64 = info->fetch_64;
+	    image->bits.get_scanline_32 = info->fetch_32;
+	    image->bits.get_scanline_64 = info->fetch_64;
 	    break;
 	}
 
 	info++;
     }
 }
 
 static uint32_t *
+src_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    iter->image->bits.get_scanline_32 (
+	iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask);
+
+    return iter->buffer;
+}
+
+static uint32_t *
+src_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    iter->image->bits.get_scanline_64 (
+	iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask);
+
+    return iter->buffer;
+}
+
+void
+_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+{
+    if (iter->flags & ITER_NARROW)
+	iter->get_scanline = src_get_scanline_narrow;
+    else
+	iter->get_scanline = src_get_scanline_wide;
+}
+
+static uint32_t *
+dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *	    buffer = iter->buffer;
+
+    image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask);
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->fetch_scanline_32 (
+	    (pixman_image_t *)image->common.alpha_map,
+	    x, y, width, buffer, mask);
+    }
+
+    return iter->buffer;
+}
+
+static uint32_t *
+dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *	    buffer = iter->buffer;
+
+    image->fetch_scanline_64 (
+	(pixman_image_t *)image, x, y, width, buffer, mask);
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->fetch_scanline_64 (
+	    (pixman_image_t *)image->common.alpha_map, x, y, width, buffer, mask);
+    }
+
+    return iter->buffer;
+}
+
+static void
+dest_write_back_narrow (pixman_iter_t *iter)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    const uint32_t *buffer = iter->buffer;
+
+    image->store_scanline_32 (image, x, y, width, buffer);
+
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->store_scanline_32 (
+	    image->common.alpha_map, x, y, width, buffer);
+    }
+
+    iter->y++;
+}
+
+static void
+dest_write_back_wide (pixman_iter_t *iter)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    const uint32_t *buffer = iter->buffer;
+
+    image->store_scanline_64 (image, x, y, width, buffer);
+
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->store_scanline_64 (
+	    image->common.alpha_map, x, y, width, buffer);
+    }
+
+    iter->y++;
+}
+
+static void
+dest_write_back_direct (pixman_iter_t *iter)
+{
+    iter->buffer += iter->image->bits.rowstride;
+}
+
+void
+_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+{
+    if (iter->flags & ITER_NARROW)
+    {
+	if (((image->common.flags &
+	      (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) ==
+	     (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) &&
+	    (image->bits.format == PIXMAN_a8r8g8b8	||
+	     (image->bits.format == PIXMAN_x8r8g8b8	&&
+	      (iter->flags & ITER_LOCALIZED_ALPHA))))
+	{
+	    iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
+
+	    iter->get_scanline = _pixman_iter_get_scanline_noop;
+	    iter->write_back = dest_write_back_direct;
+	}
+	else
+	{
+	    if ((iter->flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+		(ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+	    {
+		iter->get_scanline = _pixman_iter_get_scanline_noop;
+	    }
+	    else
+	    {
+		iter->get_scanline = dest_get_scanline_narrow;
+	    }
+
+	    iter->write_back = dest_write_back_narrow;
+	}
+    }
+    else
+    {
+	iter->get_scanline = dest_get_scanline_wide;
+	iter->write_back = dest_write_back_wide;
+    }
+}
+
+static uint32_t *
 create_bits (pixman_format_code_t format,
              int                  width,
              int                  height,
              int *                rowstride_bytes)
 {
     int stride;
     int buf_size;
     int bpp;
--- a/gfx/cairo/libpixman/src/pixman-combine.c.template
+++ b/gfx/cairo/libpixman/src/pixman-combine.c.template
@@ -128,16 +128,27 @@ combine_clear (pixman_implementation_t *
                const comp4_t *          src,
                const comp4_t *          mask,
                int                      width)
 {
     memset (dest, 0, width * sizeof(comp4_t));
 }
 
 static void
+combine_dst (pixman_implementation_t *imp,
+	     pixman_op_t	      op,
+	     comp4_t *		      dest,
+	     const comp4_t *	      src,
+	     const comp4_t *          mask,
+	     int		      width)
+{
+    return;
+}
+
+static void
 combine_src_u (pixman_implementation_t *imp,
                pixman_op_t              op,
                comp4_t *                dest,
                const comp4_t *          src,
                const comp4_t *          mask,
                int                      width)
 {
     int i;
@@ -943,25 +954,43 @@ set_lum (comp4_t dest[3], comp4_t src[3]
 
     /* clip_color */
     l = LUM (tmp);
     min = CH_MIN (tmp);
     max = CH_MAX (tmp);
 
     if (min < 0)
     {
-	tmp[0] = l + (tmp[0] - l) * l / (l - min);
-	tmp[1] = l + (tmp[1] - l) * l / (l - min);
-	tmp[2] = l + (tmp[2] - l) * l / (l - min);
+	if (l - min == 0.0)
+	{
+	    tmp[0] = 0;
+	    tmp[1] = 0;
+	    tmp[2] = 0;
+	}
+	else
+	{
+	    tmp[0] = l + (tmp[0] - l) * l / (l - min);
+	    tmp[1] = l + (tmp[1] - l) * l / (l - min);
+	    tmp[2] = l + (tmp[2] - l) * l / (l - min);
+	}
     }
     if (max > a)
     {
-	tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
-	tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
-	tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
+	if (max - l == 0.0)
+	{
+	    tmp[0] = a;
+	    tmp[1] = a;
+	    tmp[2] = a;
+	}
+	else
+	{
+	    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
+	    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
+	    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
+	}
     }
 
     dest[0] = tmp[0] * MASK + 0.5;
     dest[1] = tmp[1] * MASK + 0.5;
     dest[2] = tmp[2] * MASK + 0.5;
 }
 
 static void
@@ -1291,27 +1320,23 @@ combine_disjoint_over_u (pixman_implemen
 {
     int i;
 
     for (i = 0; i < width; ++i)
     {
 	comp4_t s = combine_mask (src, mask, i);
 	comp2_t a = s >> A_SHIFT;
 
-	if (a != 0x00)
+	if (s != 0x00)
 	{
-	    if (a != MASK)
-	    {
-		comp4_t d = *(dest + i);
-		a = combine_disjoint_out_part (d >> A_SHIFT, a);
-		UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s);
-		s = d;
-	    }
+	    comp4_t d = *(dest + i);
+	    a = combine_disjoint_out_part (d >> A_SHIFT, a);
+	    UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s);
 
-	    *(dest + i) = s;
+	    *(dest + i) = d;
 	}
     }
 }
 
 static void
 combine_disjoint_in_u (pixman_implementation_t *imp,
                        pixman_op_t              op,
                        comp4_t *                dest,
@@ -2309,47 +2334,47 @@ combine_conjoint_xor_ca (pixman_implemen
 }
 
 void
 _pixman_setup_combiner_functions_width (pixman_implementation_t *imp)
 {
     /* Unified alpha */
     imp->combine_width[PIXMAN_OP_CLEAR] = combine_clear;
     imp->combine_width[PIXMAN_OP_SRC] = combine_src_u;
-    /* dest */
+    imp->combine_width[PIXMAN_OP_DST] = combine_dst;
     imp->combine_width[PIXMAN_OP_OVER] = combine_over_u;
     imp->combine_width[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
     imp->combine_width[PIXMAN_OP_IN] = combine_in_u;
     imp->combine_width[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
     imp->combine_width[PIXMAN_OP_OUT] = combine_out_u;
     imp->combine_width[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
     imp->combine_width[PIXMAN_OP_ATOP] = combine_atop_u;
     imp->combine_width[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
     imp->combine_width[PIXMAN_OP_XOR] = combine_xor_u;
     imp->combine_width[PIXMAN_OP_ADD] = combine_add_u;
     imp->combine_width[PIXMAN_OP_SATURATE] = combine_saturate_u;
 
     /* Disjoint, unified */
     imp->combine_width[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
     imp->combine_width[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
-    /* dest */
+    imp->combine_width[PIXMAN_OP_DISJOINT_DST] = combine_dst;
     imp->combine_width[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
     imp->combine_width[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
 
     /* Conjoint, unified */
     imp->combine_width[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
     imp->combine_width[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
-    /* dest */
+    imp->combine_width[PIXMAN_OP_CONJOINT_DST] = combine_dst;
     imp->combine_width[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
     imp->combine_width[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
     imp->combine_width[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
     imp->combine_width[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
     imp->combine_width[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
     imp->combine_width[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
     imp->combine_width[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
     imp->combine_width[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
@@ -2385,31 +2410,31 @@ void
     imp->combine_width_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
     imp->combine_width_ca[PIXMAN_OP_XOR] = combine_xor_ca;
     imp->combine_width_ca[PIXMAN_OP_ADD] = combine_add_ca;
     imp->combine_width_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
 
     /* Disjoint CA */
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
-    /* dest */
+    imp->combine_width_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
     imp->combine_width_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
 
     /* Conjoint CA */
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
-    /* dest */
+    imp->combine_width_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
     imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
@@ -2422,15 +2447,15 @@ void
     imp->combine_width_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
     imp->combine_width_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
     imp->combine_width_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
     imp->combine_width_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
     imp->combine_width_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
     imp->combine_width_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
     imp->combine_width_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
 
-    /* It is not clear that these make sense, so leave them out for now */
-    imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = NULL;
-    imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = NULL;
-    imp->combine_width_ca[PIXMAN_OP_HSL_COLOR] = NULL;
-    imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = NULL;
+    /* It is not clear that these make sense, so make them noops for now */
+    imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
+    imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;