Merge mozilla-central to autoland. a=merge CLOSED TREE
authorNoemi Erli <nerli@mozilla.com>
Thu, 04 Oct 2018 07:10:05 +0300
changeset 495248 2735d9fa61d7b0623b2a9c63cc68ff6b5ba83f7d
parent 495247 9c9d32068acf37740896df161189fb413ac169a2 (current diff)
parent 495241 8b1f1ebed0f0d6c8abc7e201d70d999f92f2817e (diff)
child 495249 323691f11567176e1e70a6d49a628e2ed653de6e
push id9984
push userffxbld-merge
push dateMon, 15 Oct 2018 21:07:35 +0000
treeherdermozilla-beta@183d27ea8570 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmerge
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Merge mozilla-central to autoland. a=merge CLOSED TREE
devtools/client/inspector/shared/three-pane-onboarding-tooltip.js
gfx/webrender/res/resource_cache.glsl
js/src/jit/Disassembler.cpp
js/src/jit/Disassembler.h
js/src/jit/arm64/Disassembler-arm64.cpp
js/src/jit/x86-shared/Disassembler-x86-shared.cpp
--- a/browser/app/winlauncher/LauncherProcessWin.cpp
+++ b/browser/app/winlauncher/LauncherProcessWin.cpp
@@ -35,21 +35,22 @@
  * @return true if browser startup should proceed, otherwise false.
  */
 static bool
 PostCreationSetup(HANDLE aChildProcess, HANDLE aChildMainThread,
                   const bool aIsSafeMode)
 {
   // The launcher process's DLL blocking code is incompatible with ASAN because
   // it is able to execute before ASAN itself has even initialized.
-#if defined(MOZ_ASAN)
+  // Also, the AArch64 build doesn't yet have a working interceptor.
+#if defined(MOZ_ASAN) || defined(_M_ARM64)
   return true;
 #else
   return mozilla::InitializeDllBlocklistOOP(aChildProcess);
-#endif // defiend(MOZ_ASAN)
+#endif // defined(MOZ_ASAN) || defined(_M_ARM64)
 }
 
 #if !defined(PROCESS_CREATION_MITIGATION_POLICY_IMAGE_LOAD_PREFER_SYSTEM32_ALWAYS_ON)
 # define PROCESS_CREATION_MITIGATION_POLICY_IMAGE_LOAD_PREFER_SYSTEM32_ALWAYS_ON (0x00000001ULL << 60)
 #endif // !defined(PROCESS_CREATION_MITIGATION_POLICY_IMAGE_LOAD_PREFER_SYSTEM32_ALWAYS_ON)
 
 #if (_WIN32_WINNT < 0x0602)
 BOOL WINAPI
--- a/browser/base/content/browser.xul
+++ b/browser/base/content/browser.xul
@@ -6,16 +6,22 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 <!-- The "global.css" stylesheet is imported first to allow other stylesheets to
      override rules using selectors with the same specificity. This applies to
      both "content" and "skin" packages, which bug 1385444 will unify later. -->
 <?xml-stylesheet href="chrome://global/skin/" type="text/css"?>
 
+<!-- While these stylesheets are defined in Toolkit, they are only used in the
+     main browser window, so we can load them here. Bug 1474241 is on file to
+     consider moving these widgets to the "browser" folder. -->
+<?xml-stylesheet href="chrome://global/content/tabprompts.css" type="text/css"?>
+<?xml-stylesheet href="chrome://global/skin/tabprompts.css" type="text/css"?>
+
 <?xml-stylesheet href="chrome://browser/content/browser.css" type="text/css"?>
 <?xml-stylesheet href="chrome://browser/content/tabbrowser.css" type="text/css"?>
 <?xml-stylesheet href="chrome://browser/content/downloads/downloads.css" type="text/css"?>
 <?xml-stylesheet href="chrome://browser/content/places/places.css" type="text/css"?>
 <?xml-stylesheet href="chrome://browser/content/usercontext/usercontext.css" type="text/css"?>
 <?xml-stylesheet href="chrome://browser/skin/" type="text/css"?>
 <?xml-stylesheet href="chrome://browser/skin/controlcenter/panel.css" type="text/css"?>
 <?xml-stylesheet href="chrome://browser/skin/customizableui/panelUI.css" type="text/css"?>
--- a/browser/base/content/content.js
+++ b/browser/base/content/content.js
@@ -29,27 +29,43 @@ XPCOMUtils.defineLazyGetter(this, "Login
 });
 
 // NOTE: Much of this logic is duplicated in BrowserCLH.js for Android.
 addMessageListener("RemoteLogins:fillForm", function(message) {
   // intercept if ContextMenu.jsm had sent a plain object for remote targets
   message.objects.inputElement = ContextMenuChild.getTarget(global, message, "inputElement");
   LoginManagerContent.receiveMessage(message, content);
 });
+
+function shouldIgnoreLoginManagerEvent(event) {
+  // If we have a null principal then prevent any more password manager code from running and
+  // incorrectly using the document `location`.
+  return event.target.nodePrincipal.isNullPrincipal;
+}
+
 addEventListener("DOMFormHasPassword", function(event) {
+  if (shouldIgnoreLoginManagerEvent(event)) {
+    return;
+  }
   LoginManagerContent.onDOMFormHasPassword(event, content);
   let formLike = LoginFormFactory.createFromForm(event.originalTarget);
   InsecurePasswordUtils.reportInsecurePasswords(formLike);
 });
 addEventListener("DOMInputPasswordAdded", function(event) {
+  if (shouldIgnoreLoginManagerEvent(event)) {
+    return;
+  }
   LoginManagerContent.onDOMInputPasswordAdded(event, content);
   let formLike = LoginFormFactory.createFromField(event.originalTarget);
   InsecurePasswordUtils.reportInsecurePasswords(formLike);
 });
 addEventListener("DOMAutoComplete", function(event) {
+  if (shouldIgnoreLoginManagerEvent(event)) {
+    return;
+  }
   LoginManagerContent.onUsernameInput(event);
 });
 
 ContentMetaHandler.init(this);
 
 // This is a temporary hack to prevent regressions (bug 1471327).
 void content;
 
--- a/devtools/client/inspector/inspector.js
+++ b/devtools/client/inspector/inspector.js
@@ -18,17 +18,16 @@ const Store = require("devtools/client/i
 const InspectorStyleChangeTracker = require("devtools/client/inspector/shared/style-change-tracker");
 
 // Use privileged promise in panel documents to prevent having them to freeze
 // during toolbox destruction. See bug 1402779.
 const Promise = require("Promise");
 
 loader.lazyRequireGetter(this, "initCssProperties", "devtools/shared/fronts/css-properties", true);
 loader.lazyRequireGetter(this, "HTMLBreadcrumbs", "devtools/client/inspector/breadcrumbs", true);
-loader.lazyRequireGetter(this, "ThreePaneOnboardingTooltip", "devtools/client/inspector/shared/three-pane-onboarding-tooltip");
 loader.lazyRequireGetter(this, "KeyShortcuts", "devtools/client/shared/key-shortcuts");
 loader.lazyRequireGetter(this, "InspectorSearch", "devtools/client/inspector/inspector-search", true);
 loader.lazyRequireGetter(this, "ToolSidebar", "devtools/client/inspector/toolsidebar", true);
 loader.lazyRequireGetter(this, "MarkupView", "devtools/client/inspector/markup/markup");
 loader.lazyRequireGetter(this, "HighlightersOverlay", "devtools/client/inspector/shared/highlighters-overlay");
 loader.lazyRequireGetter(this, "nodeConstants", "devtools/shared/dom-node-constants");
 loader.lazyRequireGetter(this, "Menu", "devtools/client/framework/menu");
 loader.lazyRequireGetter(this, "MenuItem", "devtools/client/framework/menu-item");
@@ -58,18 +57,16 @@ const LAZY_RESIZE_INTERVAL_MS = 200;
 // If the toolbox's width is smaller than the given amount of pixels, the sidebar
 // automatically switches from 'landscape/horizontal' to 'portrait/vertical' mode.
 const PORTRAIT_MODE_WIDTH_THRESHOLD = 700;
 // If the toolbox's width docked to the side is smaller than the given amount of pixels,
 // the sidebar automatically switches from 'landscape/horizontal' to 'portrait/vertical'
 // mode.
 const SIDE_PORTAIT_MODE_WIDTH_THRESHOLD = 1000;
 
-const THREE_PANE_FIRST_RUN_PREF = "devtools.inspector.three-pane-first-run";
-const SHOW_THREE_PANE_ONBOARDING_PREF = "devtools.inspector.show-three-pane-tooltip";
 const THREE_PANE_ENABLED_PREF = "devtools.inspector.three-pane-enabled";
 const THREE_PANE_ENABLED_SCALAR = "devtools.inspector.three_pane_enabled";
 const THREE_PANE_CHROME_ENABLED_PREF = "devtools.inspector.chrome.three-pane-enabled";
 const TELEMETRY_EYEDROPPER_OPENED = "devtools.toolbar.eyedropper.opened";
 const TRACK_CHANGES_ENABLED = "devtools.inspector.changes.enabled";
 
 /**
  * Represents an open instance of the Inspector for a tab.
@@ -128,19 +125,16 @@ function Inspector(toolbox) {
   if (Services.prefs.getBoolPref(TRACK_CHANGES_ENABLED)) {
     this.changesManager = new ChangesManager(this);
   }
 
   // Store the URL of the target page prior to navigation in order to ensure
   // telemetry counts in the Grid Inspector are not double counted on reload.
   this.previousURL = this.target.url;
 
-  this.is3PaneModeFirstRun = Services.prefs.getBoolPref(THREE_PANE_FIRST_RUN_PREF);
-  this.show3PaneTooltip = Services.prefs.getBoolPref(SHOW_THREE_PANE_ONBOARDING_PREF);
-
   this.nodeMenuTriggerInfo = null;
 
   this._clearSearchResultsLabel = this._clearSearchResultsLabel.bind(this);
   this._handleRejectionIfNotDestroyed = this._handleRejectionIfNotDestroyed.bind(this);
   this._onBeforeNavigate = this._onBeforeNavigate.bind(this);
   this._onContextMenu = this._onContextMenu.bind(this);
   this._onMarkupFrameLoad = this._onMarkupFrameLoad.bind(this);
   this._updateSearchResultsLabel = this._updateSearchResultsLabel.bind(this);
@@ -290,24 +284,16 @@ Inspector.prototype = {
 
     if (this.target.isLocalTab) {
       this.target.on("thread-paused", this._updateDebuggerPausedWarning);
       this.target.on("thread-resumed", this._updateDebuggerPausedWarning);
       this.toolbox.on("select", this._updateDebuggerPausedWarning);
       this._updateDebuggerPausedWarning();
     }
 
-    // Resets the inspector sidebar widths if this is the first run of the 3 pane mode.
-    if (this.is3PaneModeFirstRun) {
-      Services.prefs.clearUserPref("devtools.toolsidebar-width.inspector");
-      Services.prefs.clearUserPref("devtools.toolsidebar-height.inspector");
-      Services.prefs.clearUserPref("devtools.toolsidebar-width.inspector.splitsidebar");
-      Services.prefs.setBoolPref(THREE_PANE_FIRST_RUN_PREF, false);
-    }
-
     this._initMarkup();
     this.isReady = false;
 
     this.setupSearchBox();
 
     // Setup the splitter before the sidebar is displayed so,
     // we don't miss any events.
     this.setupSplitter();
@@ -329,23 +315,16 @@ Inspector.prototype = {
       this.selection.setNodeFront(defaultSelection, { reason: "inspector-open" });
       await onAllPanelsUpdated;
       await this.markup.expandNode(this.selection.nodeFront);
     }
 
     // Setup the toolbar only now because it may depend on the document.
     await this.setupToolbar();
 
-    // Show the 3 pane onboarding tooltip only if the inspector is visisble since the
-    // Accessibility panel initializes the Inspector and if it is not the browser toolbox.
-    if (this.show3PaneTooltip && !this.target.chrome &&
-        this.toolbox.currentToolId === "inspector") {
-      this.threePaneTooltip = new ThreePaneOnboardingTooltip(this.toolbox, this.panelDoc);
-    }
-
     // Log the 3 pane inspector setting on inspector open. The question we want to answer
     // is:
     // "What proportion of users use the 3 pane vs 2 pane inspector on inspector open?"
     this.telemetry.keyedScalarAdd(THREE_PANE_ENABLED_SCALAR, this.is3PaneModeEnabled, 1);
 
     this.emit("ready");
     return this;
   },
@@ -1455,20 +1434,16 @@ Inspector.prototype = {
     if (this.fontinspector) {
       this.fontinspector.destroy();
     }
 
     if (this.animationinspector) {
       this.animationinspector.destroy();
     }
 
-    if (this.threePaneTooltip) {
-      this.threePaneTooltip.destroy();
-    }
-
     if (this._highlighters) {
       this._highlighters.destroy();
       this._highlighters = null;
     }
 
     if (this._search) {
       this._search.destroy();
       this._search = null;
@@ -1491,27 +1466,25 @@ Inspector.prototype = {
     }
 
     this._is3PaneModeChromeEnabled = null;
     this._is3PaneModeEnabled = null;
     this._notificationBox = null;
     this._target = null;
     this._toolbox = null;
     this.breadcrumbs = null;
-    this.is3PaneModeFirstRun = null;
     this.panelDoc = null;
     this.panelWin.inspector = null;
     this.panelWin = null;
     this.resultsLength = null;
     this.searchBox = null;
     this.show3PaneTooltip = null;
     this.sidebar = null;
     this.store = null;
     this.telemetry = null;
-    this.threePaneTooltip = null;
 
     this._panelDestroyer = promise.all([
       cssPropertiesDestroyer,
       markupDestroyer,
       sidebarDestroyer,
       ruleViewSideBarDestroyer
     ]);
 
--- a/devtools/client/inspector/shared/moz.build
+++ b/devtools/client/inspector/shared/moz.build
@@ -5,14 +5,13 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 DevToolsModules(
     'highlighters-overlay.js',
     'node-types.js',
     'reflow-tracker.js',
     'style-change-tracker.js',
     'style-inspector-menu.js',
-    'three-pane-onboarding-tooltip.js',
     'tooltips-overlay.js',
     'utils.js'
 )
 
 BROWSER_CHROME_MANIFESTS += ['test/browser.ini']
deleted file mode 100644
--- a/devtools/client/inspector/shared/three-pane-onboarding-tooltip.js
+++ /dev/null
@@ -1,111 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-"use strict";
-
-const Services = require("Services");
-const { openDocLink } = require("devtools/client/shared/link");
-const { HTMLTooltip } = require("devtools/client/shared/widgets/tooltip/HTMLTooltip");
-
-const { LocalizationHelper } = require("devtools/shared/l10n");
-const L10N = new LocalizationHelper("devtools/client/locales/inspector.properties");
-
-const SHOW_THREE_PANE_ONBOARDING_PREF = "devtools.inspector.show-three-pane-tooltip";
-
-const XHTML_NS = "http://www.w3.org/1999/xhtml";
-const CONTAINER_WIDTH = 300;
-const LEARN_MORE_LINK = "https://developer.mozilla.org/en-US/docs/Tools/Page_Inspector/Use_the_3-pane_inspector?utm_source=devtools&utm_medium=3-pane-onboarding";
-
-/**
- * Three pane inspector onboarding tooltip that is shown on the 3 pane inspector toggle
- * button when the pref is on.
- */
-class ThreePaneOnboardingTooltip {
-  constructor(toolbox, doc) {
-    this.toolbox = toolbox;
-    this.doc = doc;
-    this.tooltip = new HTMLTooltip(this.toolbox.doc, {
-      type: "arrow",
-      useXulWrapper: true,
-    });
-
-    this.onCloseButtonClick = this.onCloseButtonClick.bind(this);
-    this.onLearnMoreLinkClick = this.onLearnMoreLinkClick.bind(this);
-
-    const container = doc.createElementNS(XHTML_NS, "div");
-    container.className = "onboarding-container";
-
-    const icon = doc.createElementNS(XHTML_NS, "span");
-    icon.className = "onboarding-icon";
-    container.appendChild(icon);
-
-    const content = doc.createElementNS(XHTML_NS, "div");
-    content.className = "onboarding-content";
-    container.appendChild(content);
-
-    const message = doc.createElementNS(XHTML_NS, "div");
-    const learnMoreString = L10N.getStr("inspector.threePaneOnboarding.learnMoreLink");
-    const messageString = L10N.getFormatStr("inspector.threePaneOnboarding.content",
-      learnMoreString);
-    const learnMoreStartIndex = messageString.indexOf(learnMoreString);
-
-    message.append(messageString.substring(0, learnMoreStartIndex));
-
-    this.learnMoreLink = doc.createElementNS(XHTML_NS, "a");
-    this.learnMoreLink.className = "onboarding-link";
-    this.learnMoreLink.href = "#";
-    this.learnMoreLink.textContent = learnMoreString;
-
-    message.append(this.learnMoreLink);
-    message.append(messageString.substring(learnMoreStartIndex + learnMoreString.length));
-    content.append(message);
-
-    this.closeButton = doc.createElementNS(XHTML_NS, "button");
-    this.closeButton.className = "onboarding-close-button devtools-button";
-    container.appendChild(this.closeButton);
-
-    this.closeButton.addEventListener("click", this.onCloseButtonClick);
-    this.learnMoreLink.addEventListener("click", this.onLearnMoreLinkClick);
-
-    this.tooltip.panel.appendChild(container);
-    this.tooltip.setContentSize({ width: CONTAINER_WIDTH });
-    this.tooltip.show(this.doc.querySelector("#inspector-sidebar .sidebar-toggle"), {
-      position: "top",
-    });
-  }
-
-  destroy() {
-    this.closeButton.removeEventListener("click", this.onCloseButtonClick);
-    this.learnMoreLink.removeEventListener("click", this.onLearnMoreLinkClick);
-
-    this.tooltip.destroy();
-
-    this.closeButton = null;
-    this.doc = null;
-    this.learnMoreLink = null;
-    this.toolbox = null;
-    this.tooltip = null;
-  }
-
-  /**
-   * Handler for the "click" event on the close button. Hides the onboarding tooltip
-   * and sets the show three pane onboarding tooltip pref to false.
-   */
-  onCloseButtonClick() {
-    Services.prefs.setBoolPref(SHOW_THREE_PANE_ONBOARDING_PREF, false);
-    this.tooltip.hide();
-  }
-
-  /**
-   * Handler for the "click" event on the learn more button. Hides the onboarding tooltip
-   * and opens the link to the mdn page in a new tab.
-   */
-  onLearnMoreLinkClick() {
-    Services.prefs.setBoolPref(SHOW_THREE_PANE_ONBOARDING_PREF, false);
-    this.tooltip.hide();
-    openDocLink(LEARN_MORE_LINK);
-  }
-}
-
-module.exports = ThreePaneOnboardingTooltip;
--- a/devtools/client/locales/en-US/inspector.properties
+++ b/devtools/client/locales/en-US/inspector.properties
@@ -471,16 +471,8 @@ inspector.classPanel.newClass.placeholde
 # LOCALIZATION NOTE (inspector.classPanel.noClasses): This is the text displayed in the
 # class panel when the current element has no classes applied.
 inspector.classPanel.noClasses=No classes on this element
 
 # LOCALIZATION NOTE (inspector.noProperties): In the case where there are no CSS
 # properties to display e.g. due to search criteria this message is
 # displayed.
 inspector.noProperties=No CSS properties found.
-
-# LOCALIZATION NOTE (inspector.threePaneOnboarding.content,
-# inspector.threePaneOnboarding.learnMoreLink): This is the content shown in the 3 pane
-# inspector onboarding tooltip that is displayed on top of the 3 pane inspector toggle
-# button. %S in the content will be replaced by a link at run time with the learnMoreLink
-# string.
-inspector.threePaneOnboarding.content=New: 3-pane mode lets you see both CSS rules and Layout tools. Click this button to toggle. %S
-inspector.threePaneOnboarding.learnMoreLink=Learn more
--- a/devtools/client/preferences/devtools-client.js
+++ b/devtools/client/preferences/devtools-client.js
@@ -31,29 +31,20 @@ pref("devtools.command-button-noautohide
 
 // Inspector preferences
 // Enable the Inspector
 pref("devtools.inspector.enabled", true);
 // What was the last active sidebar in the inspector
 pref("devtools.inspector.activeSidebar", "ruleview");
 pref("devtools.inspector.remote", false);
 
-// Show the 3 pane onboarding tooltip in the inspector only in release or beta builds.
-#if defined(RELEASE_OR_BETA)
-pref("devtools.inspector.show-three-pane-tooltip", true);
-#else
-pref("devtools.inspector.show-three-pane-tooltip", false);
-#endif
 // Enable the 3 pane mode in the inspector
 pref("devtools.inspector.three-pane-enabled", true);
 // Enable the 3 pane mode in the chrome inspector
 pref("devtools.inspector.chrome.three-pane-enabled", false);
-// Whether or not this is the first run of the 3 pane mode. Used to reset the default
-// inspector sidebar widths for its first run.
-pref("devtools.inspector.three-pane-first-run", true);
 // Collapse pseudo-elements by default in the rule-view
 pref("devtools.inspector.show_pseudo_elements", false);
 // The default size for image preview tooltips in the rule-view/computed-view/markup-view
 pref("devtools.inspector.imagePreviewTooltipSize", 300);
 // Enable user agent style inspection in rule-view
 pref("devtools.inspector.showUserAgentStyles", false);
 // Show all native anonymous content (like controls in <video> tags)
 pref("devtools.inspector.showAllAnonymousContent", false);
--- a/devtools/client/shared/test/shared-head.js
+++ b/devtools/client/shared/test/shared-head.js
@@ -118,21 +118,19 @@ function loadFrameScriptUtils(browser = 
   mm.loadFrameScript(frameURL, false);
   SimpleTest.registerCleanupFunction(() => {
     mm = null;
   });
   return mm;
 }
 
 Services.prefs.setBoolPref("devtools.inspector.three-pane-enabled", true);
-Services.prefs.setBoolPref("devtools.inspector.show-three-pane-tooltip", false);
 registerCleanupFunction(() => {
   Services.prefs.clearUserPref("devtools.dump.emit");
   Services.prefs.clearUserPref("devtools.inspector.three-pane-enabled");
-  Services.prefs.clearUserPref("devtools.inspector.show-three-pane-tooltip");
   Services.prefs.clearUserPref("devtools.toolbox.host");
   Services.prefs.clearUserPref("devtools.toolbox.previousHost");
   Services.prefs.clearUserPref("devtools.toolbox.splitconsoleEnabled");
   Services.prefs.clearUserPref("devtools.toolbox.splitconsoleHeight");
 });
 
 registerCleanupFunction(async function cleanup() {
   while (gBrowser.tabs.length > 1) {
--- a/dom/plugins/ipc/IpdlTuple.h
+++ b/dom/plugins/ipc/IpdlTuple.h
@@ -2,16 +2,66 @@
 #define dom_plugins_ipc_ipdltuple_h
 
 #include "mozilla/plugins/FunctionBrokerIPCUtils.h"
 #include "mozilla/Variant.h"
 
 namespace mozilla {
 namespace plugins {
 
+// The stuff in this "internal" namespace used to be inside the IpdlTuple
+// class, but that prevented the DECLARE_USE_COPY_CONSTRUCTORS that is
+// needed on the IpdlTupleElement struct. Without this, nsTArray can end
+// up using a move constructor on this struct, which is not memmovable on
+// Windows.
+namespace internal {
+
+struct InvalidType {};
+
+// Like Variant but with a default constructor.
+template <typename ... Types>
+struct MaybeVariant
+{
+public:
+  MaybeVariant() : mValue(InvalidType()) {}
+  MaybeVariant(MaybeVariant&& o) : mValue(std::move(o.mValue)) {}
+
+  template <typename Param> void Set(const Param& aParam)
+  {
+    mValue = mozilla::AsVariant(aParam);
+  }
+
+  typedef mozilla::Variant<InvalidType, Types...> MaybeVariantType;
+  MaybeVariantType& GetVariant() { return mValue; }
+  const MaybeVariantType& GetVariant() const { return mValue; }
+
+private:
+  MaybeVariantType mValue;
+};
+
+#if defined(XP_WIN)
+typedef MaybeVariant<int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,
+                     int64_t,uint64_t,nsCString,bool,OpenFileNameIPC,
+                     OpenFileNameRetIPC,NativeWindowHandle,
+                     IPCSchannelCred,IPCInternetBuffers,StringArray,
+                     IPCPrintDlg> IpdlTupleElement;
+#else
+typedef MaybeVariant<int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,
+                     int64_t,uint64_t,nsCString,bool> IpdlTupleElement;
+#endif // defined(XP_WIN)
+
+} // namespace internal
+} // namespace plugins
+} // namespace mozilla
+
+DECLARE_USE_COPY_CONSTRUCTORS(mozilla::plugins::internal::IpdlTupleElement)
+
+namespace mozilla {
+namespace plugins {
+
 /**
  * IpdlTuple is used by automatic function brokering to pass parameter
  * lists for brokered functions.  It supports a limited set of types
  * (see IpdlTuple::IpdlTupleElement).
  */
 class IpdlTuple
 {
 public:
@@ -36,62 +86,33 @@ public:
   template <typename EltType>
   void AddElement(const EltType& aElt)
   {
     IpdlTupleElement* newEntry = mTupleElements.AppendElement();
     newEntry->Set(aElt);
   }
 
 private:
-  struct InvalidType {};
-
-  // Like Variant but with a default constructor.
-  template <typename ... Types>
-  struct MaybeVariant
-  {
-  public:
-    MaybeVariant() : mValue(InvalidType()) {}
-    MaybeVariant(MaybeVariant&& o) : mValue(std::move(o.mValue)) {}
-
-    template <typename Param> void Set(const Param& aParam)
-    {
-      mValue = mozilla::AsVariant(aParam);
-    }
-
-    typedef mozilla::Variant<InvalidType, Types...> MaybeVariantType;
-    MaybeVariantType& GetVariant() { return mValue; }
-    const MaybeVariantType& GetVariant() const { return mValue; }
-
-  private:
-    MaybeVariantType mValue;
-  };
-
-#if defined(XP_WIN)
-  typedef MaybeVariant<int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,
-                       int64_t,uint64_t,nsCString,bool,OpenFileNameIPC,
-                       OpenFileNameRetIPC,NativeWindowHandle,
-                       IPCSchannelCred,IPCInternetBuffers,StringArray,
-                       IPCPrintDlg> IpdlTupleElement;
-#else
-  typedef MaybeVariant<int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,
-                       int64_t,uint64_t,nsCString,bool> IpdlTupleElement;
-#endif // defined(XP_WIN)
+  typedef mozilla::plugins::internal::InvalidType InvalidType;
+  typedef mozilla::plugins::internal::IpdlTupleElement IpdlTupleElement;
 
   friend struct IPC::ParamTraits<IpdlTuple>;
   friend struct IPC::ParamTraits<IpdlTuple::IpdlTupleElement>;
   friend struct IPC::ParamTraits<IpdlTuple::InvalidType>;
 
   nsTArray<IpdlTupleElement> mTupleElements;
 };
 
+namespace internal {
 template <> template<>
-inline void IpdlTuple::IpdlTupleElement::Set<nsDependentCSubstring>(const nsDependentCSubstring& aParam)
+inline void IpdlTupleElement::Set<nsDependentCSubstring>(const nsDependentCSubstring& aParam)
 {
   mValue = MaybeVariantType(mozilla::VariantType<nsCString>(), aParam);
 }
+} // namespace internal
 
 } // namespace plugins
 } // namespace mozilla
 
 namespace IPC {
 
 using namespace mozilla::plugins;
 
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -32,17 +32,17 @@ void main(void) {
     int brush_flags = (aData.z >> 24) & 0xff;
     PrimitiveHeader ph = fetch_prim_header(prim_header_address);
 
     // Fetch the segment of this brush primitive we are drawing.
     int segment_address = ph.specific_prim_address +
                           VECS_PER_SPECIFIC_BRUSH +
                           segment_index * VECS_PER_SEGMENT;
 
-    vec4[2] segment_data = fetch_from_resource_cache_2(segment_address);
+    vec4[2] segment_data = fetch_from_gpu_cache_2(segment_address);
     RectWithSize local_segment_rect = RectWithSize(segment_data[0].xy, segment_data[0].zw);
 
     VertexInfo vi;
 
     // Fetch the dynamic picture that we are drawing on.
     PictureTask pic_task = fetch_picture_task(ph.render_task_index);
     ClipArea clip_area = fetch_clip_area(clip_address);
 
--- a/gfx/webrender/res/brush_blend.glsl
+++ b/gfx/webrender/res/brush_blend.glsl
@@ -96,18 +96,18 @@ void brush_vs(
                 vec3(0.769 - 0.769 * invAmount, 0.686 + 0.314 * invAmount, 0.534 - 0.534 * invAmount),
                 vec3(0.189 - 0.189 * invAmount, 0.168 - 0.168 * invAmount, 0.131 + 0.869 * invAmount)
             );
             vColorOffset = vec3(0.0);
             break;
         }
         case 10: {
             // Color Matrix
-            vec4 mat_data[3] = fetch_from_resource_cache_3(user_data.z);
-            vec4 offset_data = fetch_from_resource_cache_1(user_data.z + 4);
+            vec4 mat_data[3] = fetch_from_gpu_cache_3(user_data.z);
+            vec4 offset_data = fetch_from_gpu_cache_1(user_data.z + 4);
             vColorMat = mat3(mat_data[0].xyz, mat_data[1].xyz, mat_data[2].xyz);
             vColorOffset = offset_data.rgb;
             break;
         }
         default: break;
     }
 }
 #endif
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -28,17 +28,17 @@ flat varying vec2 vTileRepeat;
 
 struct ImageBrushData {
     vec4 color;
     vec4 background_color;
     vec2 stretch_size;
 };
 
 ImageBrushData fetch_image_data(int address) {
-    vec4[3] raw_data = fetch_from_resource_cache_3(address);
+    vec4[3] raw_data = fetch_from_gpu_cache_3(address);
     ImageBrushData data = ImageBrushData(
         raw_data[0],
         raw_data[1],
         raw_data[2].xy
     );
     return data;
 }
 
--- a/gfx/webrender/res/brush_linear_gradient.glsl
+++ b/gfx/webrender/res/brush_linear_gradient.glsl
@@ -27,17 +27,17 @@ flat varying vec2 vTileRepeat;
 
 struct Gradient {
     vec4 start_end_point;
     int extend_mode;
     vec2 stretch_size;
 };
 
 Gradient fetch_gradient(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return Gradient(
         data[0],
         int(data[1].x),
         data[1].yz
     );
 }
 
 void brush_vs(
--- a/gfx/webrender/res/brush_mix_blend.glsl
+++ b/gfx/webrender/res/brush_mix_blend.glsl
@@ -19,17 +19,17 @@ void brush_vs(
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 unused
 ) {
     vec2 snapped_device_pos = snap_device_pos(vi);
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0));
     vOp = user_data.x;
 
     PictureTask src_task = fetch_picture_task(user_data.z);
     vec2 src_uv = snapped_device_pos +
                   src_task.common_data.task_rect.p0 -
                   src_task.content_origin;
     vSrcUv = vec3(src_uv / texture_size, src_task.common_data.texture_layer_index);
 
@@ -195,18 +195,18 @@ const int MixBlendMode_SoftLight   = 9;
 const int MixBlendMode_Difference  = 10;
 const int MixBlendMode_Exclusion   = 11;
 const int MixBlendMode_Hue         = 12;
 const int MixBlendMode_Saturation  = 13;
 const int MixBlendMode_Color       = 14;
 const int MixBlendMode_Luminosity  = 15;
 
 Fragment brush_fs() {
-    vec4 Cb = textureLod(sCacheRGBA8, vBackdropUv, 0.0);
-    vec4 Cs = textureLod(sCacheRGBA8, vSrcUv, 0.0);
+    vec4 Cb = textureLod(sPrevPassColor, vBackdropUv, 0.0);
+    vec4 Cs = textureLod(sPrevPassColor, vSrcUv, 0.0);
 
     if (Cb.a == 0.0) {
         return Fragment(Cs);
     }
     if (Cs.a == 0.0) {
         return Fragment(vec4(0.0));
     }
 
--- a/gfx/webrender/res/brush_radial_gradient.glsl
+++ b/gfx/webrender/res/brush_radial_gradient.glsl
@@ -26,17 +26,17 @@ flat varying vec2 vTileRepeat;
 struct RadialGradient {
     vec4 center_start_end_radius;
     float ratio_xy;
     int extend_mode;
     vec2 stretch_size;
 };
 
 RadialGradient fetch_radial_gradient(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return RadialGradient(
         data[0],
         data[1].x,
         int(data[1].y),
         data[1].zw
     );
 }
 
--- a/gfx/webrender/res/brush_solid.glsl
+++ b/gfx/webrender/res/brush_solid.glsl
@@ -14,17 +14,17 @@ varying vec2 vLocalPos;
 
 #ifdef WR_VERTEX_SHADER
 
 struct SolidBrush {
     vec4 color;
 };
 
 SolidBrush fetch_solid_primitive(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
+    vec4 data = fetch_from_gpu_cache_1(address);
     return SolidBrush(data);
 }
 
 void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     RectWithSize segment_rect,
--- a/gfx/webrender/res/brush_yuv_image.glsl
+++ b/gfx/webrender/res/brush_yuv_image.glsl
@@ -72,17 +72,17 @@ void write_uv_rect(
     #endif
 }
 
 struct YuvPrimitive {
     float coefficient;
 };
 
 YuvPrimitive fetch_yuv_primitive(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
+    vec4 data = fetch_from_gpu_cache_1(address);
     return YuvPrimitive(data.x);
 }
 
 void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     RectWithSize segment_rect,
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,render_task,resource_cache,snap,transform
+#include rect,render_task,gpu_cache,snap,transform
 
 #ifdef WR_VERTEX_SHADER
 
 #define SEGMENT_ALL         0
 #define SEGMENT_CORNER_TL   1
 #define SEGMENT_CORNER_TR   2
 #define SEGMENT_CORNER_BL   3
 #define SEGMENT_CORNER_BR   4
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -41,19 +41,19 @@ BlurTask fetch_blur_task(int address) {
 void main(void) {
     BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
     RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress);
 
     RectWithSize src_rect = src_task.task_rect;
     RectWithSize target_rect = blur_task.common_data.task_rect;
 
 #if defined WR_FEATURE_COLOR_TARGET
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0).xy);
 #else
-    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
 #endif
     vUv.z = src_task.texture_layer_index;
     vSigma = blur_task.blur_radius;
 
     // Ensure that the support is an even number of pixels to simplify the
     // fragment shader logic.
     //
     // TODO(pcwalton): Actually make use of this fact and use the texture
@@ -84,20 +84,20 @@ void main(void) {
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 #if defined WR_FEATURE_COLOR_TARGET
 #define SAMPLE_TYPE vec4
-#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassColor, uv)
 #else
 #define SAMPLE_TYPE float
-#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassAlpha, uv).r
 #endif
 
 // TODO(gw): Write a fast path blur that handles smaller blur radii
 //           with a offset / weight uniform table and a constant
 //           loop iteration count!
 
 // TODO(gw): Make use of the bilinear sampling trick to reduce
 //           the number of texture fetches needed for a gaussian blur.
--- a/gfx/webrender/res/cs_clip_box_shadow.glsl
+++ b/gfx/webrender/res/cs_clip_box_shadow.glsl
@@ -21,17 +21,17 @@ struct BoxShadowData {
     vec2 src_rect_size;
     float clip_mode;
     int stretch_mode_x;
     int stretch_mode_y;
     RectWithSize dest_rect;
 };
 
 BoxShadowData fetch_data(ivec2 address) {
-    vec4 data[3] = fetch_from_resource_cache_3_direct(address);
+    vec4 data[3] = fetch_from_gpu_cache_3_direct(address);
     RectWithSize dest_rect = RectWithSize(data[2].xy, data[2].zw);
     BoxShadowData bs_data = BoxShadowData(
         data[0].xy,
         data[0].z,
         int(data[1].x),
         int(data[1].y),
         dest_rect
     );
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -12,17 +12,17 @@ flat varying vec4 vClipMaskUvInnerRect;
 flat varying float vLayer;
 
 #ifdef WR_VERTEX_SHADER
 struct ImageMaskData {
     RectWithSize local_rect;
 };
 
 ImageMaskData fetch_mask_data(ivec2 address) {
-    vec4 data = fetch_from_resource_cache_1_direct(address);
+    vec4 data = fetch_from_gpu_cache_1_direct(address);
     RectWithSize local_rect = RectWithSize(data.xy, data.zw);
     ImageMaskData mask_data = ImageMaskData(local_rect);
     return mask_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
--- a/gfx/webrender/res/cs_clip_line.glsl
+++ b/gfx/webrender/res/cs_clip_line.glsl
@@ -23,17 +23,17 @@ flat varying vec2 vLocalOrigin;
 struct LineDecorationData {
     RectWithSize local_rect;
     float wavyLineThickness;
     float style;
     float orientation;
 };
 
 LineDecorationData fetch_data(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     RectWithSize local_rect = RectWithSize(data[0].xy, data[0].zw);
     LineDecorationData line_data = LineDecorationData(
         local_rect,
         data[1].x,
         data[1].y,
         data[1].z
     );
     return line_data;
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -13,31 +13,31 @@ flat varying vec4 vClipCenter_Radius_BR;
 
 #ifdef WR_VERTEX_SHADER
 struct ClipRect {
     RectWithSize rect;
     vec4 mode;
 };
 
 ClipRect fetch_clip_rect(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     ClipRect rect = ClipRect(RectWithSize(data[0].xy, data[0].zw), data[1]);
     return rect;
 }
 
 struct ClipCorner {
     RectWithSize rect;
     vec4 outer_inner_radius;
 };
 
 // index is of type float instead of int because using an int led to shader
 // miscompilations with a macOS 10.12 Intel driver.
 ClipCorner fetch_clip_corner(ivec2 address, float index) {
     address += ivec2(2 + 2 * int(index), 0);
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     ClipCorner corner = ClipCorner(RectWithSize(data[0].xy, data[0].zw), data[1]);
     return corner;
 }
 
 struct ClipData {
     ClipRect rect;
     ClipCorner top_left;
     ClipCorner top_right;
--- a/gfx/webrender/res/cs_scale.glsl
+++ b/gfx/webrender/res/cs_scale.glsl
@@ -27,19 +27,19 @@ ScaleTask fetch_scale_task(int address) 
 void main(void) {
     ScaleTask scale_task = fetch_scale_task(aScaleRenderTaskAddress);
     RenderTaskCommonData src_task = fetch_render_task_common_data(aScaleSourceTaskAddress);
 
     RectWithSize src_rect = src_task.task_rect;
     RectWithSize target_rect = scale_task.common_data.task_rect;
 
 #if defined WR_FEATURE_COLOR_TARGET
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0).xy);
 #else
-    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
 #endif
 
     vUv.z = src_task.texture_layer_index;
 
     vUvRect = vec4(src_rect.p0 + vec2(0.5),
                    src_rect.p0 + src_rect.size - vec2(0.5)) / texture_size.xyxy;
 
     vec2 pos = target_rect.p0 + target_rect.size * aPosition.xy;
@@ -49,20 +49,20 @@ void main(void) {
 }
 
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 #if defined WR_FEATURE_COLOR_TARGET
 #define SAMPLE_TYPE vec4
-#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassColor, uv)
 #else
 #define SAMPLE_TYPE float
-#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassAlpha, uv).r
 #endif
 
 void main(void) {
     vec2 st = clamp(vUv.xy, vUvRect.xy, vUvRect.zw);
     oFragColor = vec4(SAMPLE_TEXTURE(vec3(st, vUv.z)));
 }
 
 #endif
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/gpu_cache.glsl
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache;
+
+#define VECS_PER_IMAGE_RESOURCE     2
+
+// TODO(gw): This is here temporarily while we have
+//           both GPU store and cache. When the GPU
+//           store code is removed, we can change the
+//           PrimitiveInstance instance structure to
+//           use 2x unsigned shorts as vertex attributes
+//           instead of an int, and encode the UV directly
+//           in the vertices.
+ivec2 get_gpu_cache_uv(int address) {
+    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
+                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
+}
+
+vec4[2] fetch_from_gpu_cache_2_direct(ivec2 address) {
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0))
+    );
+}
+
+vec4[2] fetch_from_gpu_cache_2(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0))
+    );
+}
+
+#ifdef WR_VERTEX_SHADER
+
+vec4[8] fetch_from_gpu_cache_8(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[8](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(4, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(5, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(6, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(7, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3_direct(ivec2 address) {
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4_direct(ivec2 address) {
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(3, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0))
+    );
+}
+
+vec4 fetch_from_gpu_cache_1_direct(ivec2 address) {
+    return texelFetch(sGpuCache, address, 0);
+}
+
+vec4 fetch_from_gpu_cache_1(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return texelFetch(sGpuCache, uv, 0);
+}
+
+//TODO: image resource is too specific for this module
+
+struct ImageResource {
+    RectWithEndpoint uv_rect;
+    float layer;
+    vec3 user_data;
+};
+
+ImageResource fetch_image_resource(int address) {
+    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
+}
+
+ImageResource fetch_image_resource_direct(ivec2 address) {
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
+}
+
+// Fetch optional extra data for a texture cache resource. This can contain
+// a polygon defining a UV rect within the texture cache resource.
+struct ImageResourceExtra {
+    vec2 st_tl;
+    vec2 st_tr;
+    vec2 st_bl;
+    vec2 st_br;
+};
+
+ImageResourceExtra fetch_image_resource_extra(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address + VECS_PER_IMAGE_RESOURCE);
+    return ImageResourceExtra(
+        data[0].xy,
+        data[0].zw,
+        data[1].xy,
+        data[1].zw
+    );
+}
+
+#endif //WR_VERTEX_SHADER
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -1,30 +1,27 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,render_task,resource_cache,snap,transform
+#include rect,render_task,gpu_cache,snap,transform
 
 #define EXTEND_MODE_CLAMP  0
 #define EXTEND_MODE_REPEAT 1
 
 #define SUBPX_DIR_NONE        0
 #define SUBPX_DIR_HORIZONTAL  1
 #define SUBPX_DIR_VERTICAL    2
 #define SUBPX_DIR_MIXED       3
 
 #define RASTER_LOCAL            0
 #define RASTER_SCREEN           1
 
-uniform sampler2DArray sCacheA8;
-uniform sampler2DArray sCacheRGBA8;
-
-// An A8 target for standalone tasks that is available to all passes.
-uniform sampler2DArray sSharedCacheA8;
+uniform sampler2DArray sPrevPassAlpha;
+uniform sampler2DArray sPrevPassColor;
 
 vec2 clamp_rect(vec2 pt, RectWithSize rect) {
     return clamp(pt, rect.p0, rect.p0 + rect.size);
 }
 
 // TODO: convert back to RectWithEndPoint if driver issues are resolved, if ever.
 flat varying vec4 vClipMaskUvBounds;
 // XY and W are homogeneous coordinates, Z is the layer index
@@ -116,24 +113,22 @@ VertexInfo write_vertex(RectWithSize ins
         transform.m,
         snap_rect
     );
 
     // Transform the current vertex to world space.
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
-    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
+    vec2 device_pos = world_pos.xy * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_pos + snap_offset -
-                     task.content_origin +
-                     task.common_data.task_rect.p0;
+    vec2 final_offset = snap_offset - task.content_origin + task.common_data.task_rect.p0;
 
-    gl_Position = uTransform * vec4(final_pos, z, 1.0);
+    gl_Position = uTransform * vec4(device_pos + final_offset * world_pos.w, z * world_pos.w, world_pos.w);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
         snap_offset,
         world_pos
     );
 
     return vi;
@@ -249,17 +244,17 @@ float do_clip() {
     if (!all(bvec4(left, right))) {
         return 0.0;
     }
     // finally, the slow path - fetch the mask value from an image
     // Note the Z getting rounded to the nearest integer because the variable
     // is still interpolated and becomes a subject of precision-caused
     // fluctuations, see https://bugzilla.mozilla.org/show_bug.cgi?id=1491911
     ivec3 tc = ivec3(mask_uv, vClipMaskUv.z + 0.5);
-    return texelFetch(sCacheA8, tc, 0).r;
+    return texelFetch(sPrevPassAlpha, tc, 0).r;
 }
 
 #ifdef WR_FEATURE_DITHERING
 vec4 dither(vec4 color) {
     const int matrix_mask = 7;
 
     ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
     float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
@@ -292,15 +287,15 @@ vec4 sample_gradient(int address, float 
     //     floor(x) is the gradient color entry index
     //     fract(x) is the linear filtering factor between start and end
     int lut_offset = 2 * int(floor(x));     // There is a [start, end] color per entry.
 
     // Ensure we don't fetch outside the valid range of the LUT.
     lut_offset = clamp(lut_offset, 0, 2 * (GRADIENT_ENTRIES + 1));
 
     // Fetch the start and end color.
-    vec4 texels[2] = fetch_from_resource_cache_2(address + lut_offset);
+    vec4 texels[2] = fetch_from_gpu_cache_2(address + lut_offset);
 
     // Finally interpolate and apply dithering
     return dither(mix(texels[0], texels[1], fract(x)));
 }
 
 #endif //WR_FRAGMENT_SHADER
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -9,21 +9,21 @@ flat varying vec4 vUvSampleBounds;
 
 #ifdef WR_VERTEX_SHADER
 struct SplitGeometry {
     vec2 local[4];
     RectWithSize local_rect;
 };
 
 SplitGeometry fetch_split_geometry(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
+    ivec2 uv = get_gpu_cache_uv(address);
 
-    vec4 data0 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0));
-    vec4 data1 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0));
-    vec4 data2 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0));
+    vec4 data0 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0));
+    vec4 data1 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0));
+    vec4 data2 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0));
 
     SplitGeometry geo;
     geo.local = vec2[4](
         data0.xy,
         data0.zw,
         data1.xy,
         data1.zw
     );
@@ -81,17 +81,17 @@ void main(void) {
     write_clip(
         world_pos,
         vec2(0.0),
         clip_area
     );
 
     gl_Position = uTransform * final_pos;
 
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0));
     vec2 uv0 = res.uv_rect.p0;
     vec2 uv1 = res.uv_rect.p1;
 
     vec2 min_uv = min(uv0, uv1);
     vec2 max_uv = max(uv0, uv1);
 
     vUvSampleBounds = vec4(
         min_uv + vec2(0.5),
@@ -110,11 +110,11 @@ void main(void) {
     vUv = vec3(uv / texture_size, res.layer);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float alpha = do_clip();
     vec2 uv = clamp(vUv.xy, vUvSampleBounds.xy, vUvSampleBounds.zw);
-    oFragColor = alpha * textureLod(sCacheRGBA8, vec3(uv, vUv.z), 0.0);
+    oFragColor = alpha * textureLod(sPrevPassColor, vec3(uv, vUv.z), 0.0);
 }
 #endif
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -23,17 +23,17 @@ struct Glyph {
 };
 
 Glyph fetch_glyph(int specific_prim_address,
                   int glyph_index) {
     // Two glyphs are packed in each texel in the GPU cache.
     int glyph_address = specific_prim_address +
                         VECS_PER_TEXT_RUN +
                         int(uint(glyph_index) / GLYPHS_PER_GPU_BLOCK);
-    vec4 data = fetch_from_resource_cache_1(glyph_address);
+    vec4 data = fetch_from_gpu_cache_1(glyph_address);
     // Select XY or ZW based on glyph index.
     // We use "!= 0" instead of "== 1" here in order to work around a driver
     // bug with equality comparisons on integers.
     vec2 glyph = mix(data.xy, data.zw,
                      bvec2(uint(glyph_index) % GLYPHS_PER_GPU_BLOCK != 0U));
 
     return Glyph(glyph);
 }
@@ -41,28 +41,28 @@ Glyph fetch_glyph(int specific_prim_addr
 struct GlyphResource {
     vec4 uv_rect;
     float layer;
     vec2 offset;
     float scale;
 };
 
 GlyphResource fetch_glyph_resource(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
 }
 
 struct TextRun {
     vec4 color;
     vec4 bg_color;
     vec2 offset;
 };
 
 TextRun fetch_text_run(int address) {
-    vec4 data[3] = fetch_from_resource_cache_3(address);
+    vec4 data[3] = fetch_from_gpu_cache_3(address);
     return TextRun(data[0], data[1], data[2].xy);
 }
 
 VertexInfo write_text_vertex(RectWithSize local_clip_rect,
                              float z,
                              Transform transform,
                              PictureTask task,
                              vec2 text_offset,
deleted file mode 100644
--- a/gfx/webrender/res/resource_cache.glsl
+++ /dev/null
@@ -1,137 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-uniform HIGHP_SAMPLER_FLOAT sampler2D sResourceCache;
-
-#define VECS_PER_IMAGE_RESOURCE     2
-
-// TODO(gw): This is here temporarily while we have
-//           both GPU store and cache. When the GPU
-//           store code is removed, we can change the
-//           PrimitiveInstance instance structure to
-//           use 2x unsigned shorts as vertex attributes
-//           instead of an int, and encode the UV directly
-//           in the vertices.
-ivec2 get_resource_cache_uv(int address) {
-    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
-                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
-}
-
-vec4[2] fetch_from_resource_cache_2_direct(ivec2 address) {
-    return vec4[2](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0))
-    );
-}
-
-vec4[2] fetch_from_resource_cache_2(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[2](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0))
-    );
-}
-
-#ifdef WR_VERTEX_SHADER
-
-vec4[8] fetch_from_resource_cache_8(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[8](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(3, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(4, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(5, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(6, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(7, 0))
-    );
-}
-
-vec4[3] fetch_from_resource_cache_3(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[3](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0))
-    );
-}
-
-vec4[3] fetch_from_resource_cache_3_direct(ivec2 address) {
-    return vec4[3](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(2, 0))
-    );
-}
-
-vec4[4] fetch_from_resource_cache_4_direct(ivec2 address) {
-    return vec4[4](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(3, 0))
-    );
-}
-
-vec4[4] fetch_from_resource_cache_4(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[4](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(3, 0))
-    );
-}
-
-vec4 fetch_from_resource_cache_1_direct(ivec2 address) {
-    return texelFetch(sResourceCache, address, 0);
-}
-
-vec4 fetch_from_resource_cache_1(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return texelFetch(sResourceCache, uv, 0);
-}
-
-//TODO: image resource is too specific for this module
-
-struct ImageResource {
-    RectWithEndpoint uv_rect;
-    float layer;
-    vec3 user_data;
-};
-
-ImageResource fetch_image_resource(int address) {
-    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw);
-}
-
-ImageResource fetch_image_resource_direct(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
-    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw);
-}
-
-// Fetch optional extra data for a texture cache resource. This can contain
-// a polygon defining a UV rect within the texture cache resource.
-struct ImageResourceExtra {
-    vec2 st_tl;
-    vec2 st_tr;
-    vec2 st_bl;
-    vec2 st_br;
-};
-
-ImageResourceExtra fetch_image_resource_extra(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address + VECS_PER_IMAGE_RESOURCE);
-    return ImageResourceExtra(
-        data[0].xy,
-        data[0].zw,
-        data[1].xy,
-        data[1].zw
-    );
-}
-
-#endif //WR_VERTEX_SHADER
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -9,17 +9,17 @@ use clip::{ClipDataStore, ClipNodeFlags,
 use clip_scroll_tree::{ClipScrollTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
 use euclid::vec3;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheHandle, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders};
 use gpu_types::{ClipMaskInstance, SplitCompositeInstance};
 use gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{FastHashMap, SavedTargetIndex, TextureSource};
 use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Clipper, Polygon, Splitter};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentTaskId, DeferredResolve};
 use prim_store::{EdgeAaSegmentMask, ImageSource};
 use prim_store::{PrimitiveMetadata, VisibleGradientTile, PrimitiveInstance};
 use prim_store::{BorderSource, Primitive, PrimitiveDetails};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind, ShaderColorMode};
@@ -61,39 +61,39 @@ pub enum BatchKind {
 }
 
 /// Optional textures that can be used as a source in the shaders.
 /// Textures that are not used by the batch are equal to TextureId::invalid().
 #[derive(Copy, Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BatchTextures {
-    pub colors: [SourceTexture; 3],
+    pub colors: [TextureSource; 3],
 }
 
 impl BatchTextures {
     pub fn no_texture() -> Self {
         BatchTextures {
-            colors: [SourceTexture::Invalid; 3],
+            colors: [TextureSource::Invalid; 3],
         }
     }
 
     pub fn render_target_cache() -> Self {
         BatchTextures {
             colors: [
-                SourceTexture::CacheRGBA8,
-                SourceTexture::CacheA8,
-                SourceTexture::Invalid,
+                TextureSource::PrevPassColor,
+                TextureSource::PrevPassAlpha,
+                TextureSource::Invalid,
             ],
         }
     }
 
-    pub fn color(texture: SourceTexture) -> Self {
+    pub fn color(texture: TextureSource) -> Self {
         BatchTextures {
-            colors: [texture, texture, SourceTexture::Invalid],
+            colors: [texture, texture, TextureSource::Invalid],
         }
     }
 }
 
 #[derive(Copy, Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BatchKey {
@@ -115,18 +115,18 @@ impl BatchKey {
         self.kind == other.kind && self.blend_mode == other.blend_mode &&
             textures_compatible(self.textures.colors[0], other.textures.colors[0]) &&
             textures_compatible(self.textures.colors[1], other.textures.colors[1]) &&
             textures_compatible(self.textures.colors[2], other.textures.colors[2])
     }
 }
 
 #[inline]
-fn textures_compatible(t1: SourceTexture, t2: SourceTexture) -> bool {
-    t1 == SourceTexture::Invalid || t2 == SourceTexture::Invalid || t1 == t2
+fn textures_compatible(t1: TextureSource, t2: TextureSource) -> bool {
+    t1 == TextureSource::Invalid || t2 == TextureSource::Invalid || t1 == t2
 }
 
 pub struct AlphaBatchList {
     pub batches: Vec<PrimitiveBatch>,
     pub item_rects: Vec<Vec<WorldRect>>,
 }
 
 impl AlphaBatchList {
@@ -756,19 +756,19 @@ impl AlphaBatchBuilder {
                                                 let secondary_id = picture.secondary_render_task_id.expect("no secondary!?");
                                                 let saved_index = render_tasks[secondary_id].saved_index.expect("no saved index!?");
                                                 debug_assert_ne!(saved_index, SavedTargetIndex::PENDING);
 
                                                 // Build BatchTextures for shadow/content
                                                 let shadow_textures = BatchTextures::render_target_cache();
                                                 let content_textures = BatchTextures {
                                                     colors: [
-                                                        SourceTexture::RenderTaskCache(saved_index),
-                                                        SourceTexture::Invalid,
-                                                        SourceTexture::Invalid,
+                                                        TextureSource::RenderTaskCache(saved_index),
+                                                        TextureSource::Invalid,
+                                                        TextureSource::Invalid,
                                                     ],
                                                 };
 
                                                 // Build batch keys for shadow/content
                                                 let shadow_key = BatchKey::new(kind, non_segmented_blend_mode, shadow_textures);
                                                 let content_key = BatchKey::new(kind, non_segmented_blend_mode, content_textures);
 
                                                 // Retrieve the UV rect addresses for shadow/content.
@@ -1084,30 +1084,30 @@ impl AlphaBatchBuilder {
                 let batch_list = &mut self.batch_list;
 
                 ctx.resource_cache.fetch_glyphs(
                     text_cpu.used_font.clone(),
                     &text_cpu.glyph_keys,
                     glyph_fetch_buffer,
                     gpu_cache,
                     |texture_id, mut glyph_format, glyphs| {
-                        debug_assert_ne!(texture_id, SourceTexture::Invalid);
+                        debug_assert_ne!(texture_id, TextureSource::Invalid);
 
                         // Ignore color and only sample alpha when shadowing.
                         if text_cpu.shadow {
                             glyph_format = glyph_format.ignore_color();
                         }
 
                         let subpx_dir = subpx_dir.limit_by(glyph_format);
 
                         let textures = BatchTextures {
                             colors: [
                                 texture_id,
-                                SourceTexture::Invalid,
-                                SourceTexture::Invalid,
+                                TextureSource::Invalid,
+                                TextureSource::Invalid,
                             ],
                         };
 
                         let kind = BatchKind::TextRun(glyph_format);
 
                         let (blend_mode, color_mode) = match glyph_format {
                             GlyphFormat::Subpixel |
                             GlyphFormat::TransformedSubpixel => {
@@ -1339,17 +1339,17 @@ fn get_image_tile_params(
 
     let cache_item = resolve_image(
         request,
         resource_cache,
         gpu_cache,
         deferred_resolves,
     );
 
-    if cache_item.texture_id == SourceTexture::Invalid {
+    if cache_item.texture_id == TextureSource::Invalid {
         None
     } else {
         let textures = BatchTextures::color(cache_item.texture_id);
         Some((
             BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
             textures,
             [
                 cache_item.uv_rect_handle.as_int(gpu_cache),
@@ -1388,17 +1388,17 @@ impl BrushPrimitive {
                             .get_cached_render_task(rt_handle);
                         resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
                     }
                 };
                 if cfg!(debug_assertions) && is_chased {
                     println!("\tsource {:?}", cache_item);
                 }
 
-                if cache_item.texture_id == SourceTexture::Invalid {
+                if cache_item.texture_id == TextureSource::Invalid {
                     None
                 } else {
                     let textures = BatchTextures::color(cache_item.texture_id);
 
                     Some((
                         BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                         textures,
                         [
@@ -1426,17 +1426,17 @@ impl BrushPrimitive {
                             None => return None,
                         };
                         let rt_cache_entry = resource_cache
                             .get_cached_render_task(rt_handle);
                         resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
                     }
                 };
 
-                if cache_item.texture_id == SourceTexture::Invalid {
+                if cache_item.texture_id == TextureSource::Invalid {
                     None
                 } else {
                     let textures = BatchTextures::color(cache_item.texture_id);
 
                     Some((
                         BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                         textures,
                         [
@@ -1503,17 +1503,17 @@ impl BrushPrimitive {
                             rendering: image_rendering,
                             tile: None,
                         },
                         resource_cache,
                         gpu_cache,
                         deferred_resolves,
                     );
 
-                    if cache_item.texture_id == SourceTexture::Invalid {
+                    if cache_item.texture_id == TextureSource::Invalid {
                         warn!("Warnings: skip a PrimitiveKind::YuvImage");
                         return None;
                     }
 
                     textures.colors[channel] = cache_item.texture_id;
                     uv_rect_addresses[channel] = cache_item.uv_rect_handle.as_int(gpu_cache);
                 }
 
@@ -1659,17 +1659,17 @@ pub fn resolve_image(
             // by the render thread.
             match image_properties.external_image {
                 Some(external_image) => {
                     // This is an external texture - we will add it to
                     // the deferred resolves list to be patched by
                     // the render thread...
                     let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
                     let cache_item = CacheItem {
-                        texture_id: SourceTexture::External(external_image),
+                        texture_id: TextureSource::External(external_image),
                         uv_rect_handle: cache_handle,
                         uv_rect: DeviceUintRect::new(
                             DeviceUintPoint::zero(),
                             image_properties.descriptor.size,
                         ),
                         texture_layer: 0,
                     };
 
@@ -1701,18 +1701,18 @@ pub fn resolve_image(
 /// Batcher managing draw calls into the clip mask (in the RT cache).
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipBatcher {
     /// Rectangle draws fill up the rectangles with rounded corners.
     pub rectangles: Vec<ClipMaskInstance>,
     /// Image draws apply the image masking.
-    pub images: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
-    pub box_shadows: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
+    pub images: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
+    pub box_shadows: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
     pub line_decorations: Vec<ClipMaskInstance>,
 }
 
 impl ClipBatcher {
     pub fn new() -> Self {
         ClipBatcher {
             rectangles: Vec::new(),
             images: FastHashMap::default(),
@@ -1810,17 +1810,17 @@ impl ClipBatcher {
                     let rt_handle = info
                         .cache_handle
                         .as_ref()
                         .expect("bug: render task handle not allocated");
                     let rt_cache_entry = resource_cache
                         .get_cached_render_task(rt_handle);
                     let cache_item = resource_cache
                         .get_texture_cache_item(&rt_cache_entry.handle);
-                    debug_assert_ne!(cache_item.texture_id, SourceTexture::Invalid);
+                    debug_assert_ne!(cache_item.texture_id, TextureSource::Invalid);
 
                     self.box_shadows
                         .entry(cache_item.texture_id)
                         .or_insert(Vec::new())
                         .push(ClipMaskInstance {
                             clip_data_address: gpu_address,
                             resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
                             ..instance
@@ -1841,19 +1841,19 @@ impl ClipBatcher {
                         ..instance
                     });
                 }
             }
         }
     }
 }
 
-fn get_buffer_kind(texture: SourceTexture) -> ImageBufferKind {
+fn get_buffer_kind(texture: TextureSource) -> ImageBufferKind {
     match texture {
-        SourceTexture::External(ext_image) => {
+        TextureSource::External(ext_image) => {
             match ext_image.image_type {
                 ExternalImageType::TextureHandle(target) => {
                     target.into()
                 }
                 ExternalImageType::Buffer => {
                     // The ExternalImageType::Buffer should be handled by resource_cache.
                     // It should go through the non-external case.
                     panic!("Unexpected non-texture handle type");
--- a/gfx/webrender/src/device/gl.rs
+++ b/gfx/webrender/src/device/gl.rs
@@ -429,16 +429,21 @@ impl ExternalTexture {
     }
 
     #[cfg(feature = "replay")]
     pub fn internal_id(&self) -> gl::GLuint {
         self.id
     }
 }
 
+/// WebRender interface to an OpenGL texture.
+///
+/// Because freeing a texture requires various device handles that are not
+/// reachable from this struct, manual destruction via `Device` is required.
+/// Our `Drop` implementation asserts that this has happened.
 pub struct Texture {
     id: gl::GLuint,
     target: gl::GLuint,
     layer_count: i32,
     format: ImageFormat,
     width: u32,
     height: u32,
     filter: TextureFilter,
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -20,54 +20,60 @@ use std::sync::Arc;
 use capture::{CaptureConfig, ExternalCaptureImage};
 #[cfg(feature = "replay")]
 use capture::PlainExternalImage;
 use tiling;
 
 pub type FastHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
 pub type FastHashSet<K> = HashSet<K, BuildHasherDefault<FxHasher>>;
 
-// An ID for a texture that is owned by the
-// texture cache module. This can include atlases
-// or standalone textures allocated via the
-// texture cache (e.g. if an image is too large
-// to be added to an atlas). The texture cache
-// manages the allocation and freeing of these
-// IDs, and the rendering thread maintains a
-// map from cache texture ID to native texture.
-
+/// An ID for a texture that is owned by the `texture_cache` module.
+///
+/// This can include atlases or standalone textures allocated via the texture
+/// cache (e.g.  if an image is too large to be added to an atlas). The texture
+/// cache manages the allocation and freeing of these IDs, and the rendering
+/// thread maintains a map from cache texture ID to native texture.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheTextureId(pub usize);
 
+/// Identifies a render pass target that is persisted until the end of the frame.
+///
+/// By default, only the targets of the immediately-preceding pass are bound as
+/// inputs to the next pass. However, tasks can opt into having their target
+/// preserved in a list until the end of the frame, and this type specifies the
+/// index in that list.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct SavedTargetIndex(pub usize);
 
 impl SavedTargetIndex {
     pub const PENDING: Self = SavedTargetIndex(!0);
 }
 
-// Represents the source for a texture.
-// These are passed from throughout the
-// pipeline until they reach the rendering
-// thread, where they are resolved to a
-// native texture ID.
-
+/// Identifies the source of an input texture to a shader.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum SourceTexture {
+pub enum TextureSource {
+    /// Equivalent to `None`, allowing us to avoid using `Option`s everywhere.
     Invalid,
+    /// An entry in the texture cache.
     TextureCache(CacheTextureId),
+    /// An external image texture, mananged by the embedding.
     External(ExternalImageData),
-    CacheA8,
-    CacheRGBA8,
+    /// The alpha target of the immediately-preceding pass.
+    PrevPassAlpha,
+    /// The color target of the immediately-preceding pass.
+    PrevPassColor,
+    /// A render target from an earlier pass. Unlike the immediately-preceding
+    /// passes, these are not made available automatically, but are instead
+    /// opt-in by the `RenderTask` (see `mark_for_saving()`).
     RenderTaskCache(SavedTargetIndex),
 }
 
 pub const ORTHO_NEAR_PLANE: f32 = -100000.0;
 pub const ORTHO_FAR_PLANE: f32 = 100000.0;
 
 #[derive(Copy, Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -274,20 +274,29 @@ impl PicturePrimitive {
             raster_spatial_node_index,
             surface_spatial_node_index,
         ).expect("todo");
 
         // Establish a new rasterization root if we have
         // a surface, and we have perspective or local raster
         // space request.
         let raster_space = self.requested_raster_space;
-        let local_scale = raster_space.local_scale();
 
-        let wants_raster_root = xf.has_perspective_component() ||
-                                local_scale.is_some();
+        // TODO(gw): A temporary hack here to revert behavior to
+        //           always raster in screen-space. This is not
+        //           a problem yet, since we're not taking advantage
+        //           of this for caching yet. This is a workaround
+        //           for some existing issues with handling scale
+        //           when rasterizing in local space mode. Once
+        //           the fixes for those are in-place, we can
+        //           remove this hack!
+        //let local_scale = raster_space.local_scale();
+        // let wants_raster_root = xf.has_perspective_component() ||
+        //                         local_scale.is_some();
+        let wants_raster_root = xf.has_perspective_component();
 
         let establishes_raster_root = has_surface && wants_raster_root;
 
         let raster_spatial_node_index = if establishes_raster_root {
             surface_spatial_node_index
         } else {
             raster_spatial_node_index
         };
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1,12 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+//! The high-level module responsible for managing the pipeline and preparing
+//! commands to be issued by the `Renderer`.
+//!
+//! See the comment at the top of the `renderer` module for a description of
+//! how these two pieces interact.
+
 use api::{ApiMsg, BuiltDisplayList, ClearCache, DebugCommand};
 #[cfg(feature = "debugger")]
 use api::{BuiltDisplayListIter, SpecificDisplayItem};
 use api::{DeviceIntPoint, DevicePixelScale, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{DocumentId, DocumentLayer, ExternalScrollId, FrameMsg, HitTestFlags, HitTestResult};
 use api::{IdNamespace, LayoutPoint, PipelineId, RenderNotifier, SceneMsg, ScrollClamping};
 use api::{MemoryReport, VoidPtrToSizeFn};
 use api::{ScrollLocation, ScrollNodeState, TransactionMsg, ResourceUpdate, ImageKey};
@@ -656,16 +662,20 @@ impl RenderBackend {
                         );
                     },
                     SceneBuilderResult::FlushComplete(tx) => {
                         tx.send(()).ok();
                     }
                     SceneBuilderResult::ExternalEvent(evt) => {
                         self.notifier.external_event(evt);
                     }
+                    SceneBuilderResult::ClearNamespace(id) => {
+                        self.resource_cache.clear_namespace(id);
+                        self.documents.retain(|doc_id, _doc| doc_id.0 != id);
+                    }
                     SceneBuilderResult::Stopped => {
                         panic!("We haven't sent a Stop yet, how did we get a Stopped back?");
                     }
                 }
             }
 
             keep_going = match self.api_rx.recv() {
                 Ok(msg) => {
@@ -765,19 +775,18 @@ impl RenderBackend {
                 self.documents.remove(&document_id);
                 self.low_priority_scene_tx.send(
                     SceneBuilderRequest::DeleteDocument(document_id)
                 ).unwrap();
             }
             ApiMsg::ExternalEvent(evt) => {
                 self.low_priority_scene_tx.send(SceneBuilderRequest::ExternalEvent(evt)).unwrap();
             }
-            ApiMsg::ClearNamespace(namespace_id) => {
-                self.resource_cache.clear_namespace(namespace_id);
-                self.documents.retain(|did, _doc| did.0 != namespace_id);
+            ApiMsg::ClearNamespace(id) => {
+                self.low_priority_scene_tx.send(SceneBuilderRequest::ClearNamespace(id)).unwrap();
             }
             ApiMsg::MemoryPressure => {
                 // This is drastic. It will basically flush everything out of the cache,
                 // and the next frame will have to rebuild all of its resources.
                 // We may want to look into something less extreme, but on the other hand this
                 // should only be used in situations where are running low enough on memory
                 // that we risk crashing if we don't do something about it.
                 // The advantage of clearing the cache completely is that it gets rid of any
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -11,17 +11,17 @@ use clip::{ClipDataStore, ClipItem, Clip
 use clip_scroll_tree::SpatialNodeIndex;
 use device::TextureFilter;
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use glyph_rasterizer::GpuGlyphCacheKey;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::{BorderInstance, ImageSource, RasterizationSpace, UvRectKind};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{CacheTextureId, FastHashMap, SavedTargetIndex};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use picture::PictureCacheKey;
 use prim_store::{PrimitiveIndex, ImageCacheKey};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use render_backend::FrameId;
 use resource_cache::{CacheItem, ResourceCache};
@@ -111,26 +111,16 @@ impl RenderTaskTree {
                 debug_assert!(pass_index == passes.len() - 1);
             }
             RenderTaskLocation::Dynamic(..) |
             RenderTaskLocation::TextureCache(..) => {
                 debug_assert!(pass_index < passes.len() - 1);
             }
         }
 
-        // If this task can be shared between multiple
-        // passes, render it in the first pass so that
-        // it is available to all subsequent passes.
-        let pass_index = if task.is_shared() {
-            debug_assert!(task.children.is_empty());
-            0
-        } else {
-            pass_index
-        };
-
         let pass = &mut passes[pass_index];
         pass.add_render_task(id, task.get_dynamic_size(), task.target_kind());
     }
 
     pub fn prepare_for_render(&mut self) {
         for task in &mut self.tasks {
             task.prepare_for_render();
         }
@@ -169,23 +159,37 @@ impl ops::Index<RenderTaskId> for Render
 
 impl ops::IndexMut<RenderTaskId> for RenderTaskTree {
     fn index_mut(&mut self, id: RenderTaskId) -> &mut RenderTask {
         debug_assert_eq!(self.frame_id, id.1);
         &mut self.tasks[id.0 as usize]
     }
 }
 
+/// Identifies the output buffer location for a given `RenderTask`.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTaskLocation {
+    /// The `RenderTask` should be drawn to a fixed region in a specific render
+    /// target. This is used for the root `RenderTask`, where the main
+    /// framebuffer is used as the render target.
     Fixed(DeviceIntRect),
+    /// The `RenderTask` should be drawn to a target provided by the atlas
+    /// allocator. This is the most common case.
+    ///
+    /// The second member specifies the width and height of the task
+    /// output, and the first member is initially left as `None`. During the
+    /// build phase, we invoke `RenderTargetList::alloc()` and store the
+    /// resulting location in the first member. That location identifies the
+    /// render target and the offset of the allocated region within that target.
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
-    TextureCache(SourceTexture, i32, DeviceIntRect),
+    /// The output of the `RenderTask` will be persisted beyond this frame, and
+    /// thus should be drawn into the `TextureCache`.
+    TextureCache(CacheTextureId, i32, DeviceIntRect),
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheMaskTask {
     actual_rect: DeviceIntRect,
     pub root_spatial_node_index: SpatialNodeIndex,
@@ -866,43 +870,16 @@ impl RenderTask {
             }
 
             RenderTaskKind::Blit(..) => {
                 RenderTargetKind::Color
             }
         }
     }
 
-    // Check if this task wants to be made available as an input
-    // to all passes (except the first) in the render task tree.
-    // To qualify for this, the task needs to have no children / dependencies.
-    // Currently, this is only supported for A8 targets, but it can be
-    // trivially extended to also support RGBA8 targets in the future
-    // if we decide that is useful.
-    pub fn is_shared(&self) -> bool {
-        match self.kind {
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Scaling(..) |
-            RenderTaskKind::ClipRegion(..) |
-            RenderTaskKind::Blit(..) |
-            RenderTaskKind::Border(..) |
-            RenderTaskKind::Glyph(..) => false,
-
-            // TODO(gw): For now, we've disabled the shared clip mask
-            //           optimization. It's of dubious value in the
-            //           future once we start to cache clip tasks anyway.
-            //           I have left shared texture support here though,
-            //           just in case we want it in the future.
-            RenderTaskKind::CacheMask(..) => false,
-        }
-    }
-
     // Optionally, prepare the render task for drawing. This is executed
     // after all resource cache items (textures and glyphs) have been
     // resolved and can be queried. It also allows certain render tasks
     // to defer calculating an exact size until now, if desired.
     pub fn prepare_for_render(&mut self) {
     }
 
     pub fn write_gpu_blocks(
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -1,18 +1,31 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-//! The webrender API.
+//! The high-level module responsible for interfacing with the GPU.
 //!
-//! The `webrender::renderer` module provides the interface to webrender, which
-//! is accessible through [`Renderer`][renderer]
+//! Much of WebRender's design is driven by separating work into different
+//! threads. To avoid the complexities of multi-threaded GPU access, we restrict
+//! all communication with the GPU to one thread, the render thread. But since
+//! issuing GPU commands is often a bottleneck, we move everything else (i.e.
+//! the computation of what commands to issue) to another thread, the
+//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other
+//! thread (like the SceneBuilder threads or Rayon workers), but the
+//! Render-vs-RenderBackend distinction is the most important.
 //!
-//! [renderer]: struct.Renderer.html
+//! The consumer is responsible for initializing the render thread before
+//! calling into WebRender, which means that this module also serves as the
+//! initial entry point into WebRender, and is responsible for spawning the
+//! various other threads discussed above. That said, WebRender initialization
+//! returns both the `Renderer` instance as well as a channel for communicating
+//! directly with the `RenderBackend`. Aside from a few high-level operations
+//! like 'render now', most of interesting commands from the consumer go over
+//! that channel and operate on the `RenderBackend`.
 
 use api::{BlobImageHandler, ColorF, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, Epoch, ExternalImageId};
 use api::{ExternalImageType, FontRenderMode, FrameMsg, ImageFormat, PipelineId};
 use api::{ImageRendering, Checkpoint, NotificationRequest};
 use api::{MemoryReport, VoidPtrToSizeFn};
 use api::{RenderApiSender, RenderNotifier, TexelRect, TextureTarget};
 use api::{channel};
@@ -34,17 +47,17 @@ use frame_builder::{ChasePrimitive, Fram
 use gleam::gl;
 use glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 #[cfg(feature = "debug_renderer")]
 use gpu_cache::GpuDebugChunk;
 #[cfg(feature = "pathfinder")]
 use gpu_glyph_renderer::GpuGlyphRenderer;
 use gpu_types::ScalingInstance;
-use internal_types::{SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
+use internal_types::{TextureSource, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
 use internal_types::{CacheTextureId, DebugOutput, FastHashMap, RenderedDocument, ResultMsg};
 use internal_types::{TextureUpdateList, TextureUpdateOp, TextureUpdateSource};
 use internal_types::{RenderTargetInfo, SavedTargetIndex};
 use prim_store::DeferredResolve;
 use profiler::{BackendProfileCounters, FrameProfileCounters,
                GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use device::query::GpuProfiler;
 use rayon::{ThreadPool, ThreadPoolBuilder};
@@ -269,31 +282,32 @@ impl From<GlyphFormat> for ShaderColorMo
                 panic!("Subpixel glyph formats must be handled separately.");
             }
             GlyphFormat::Bitmap => ShaderColorMode::Bitmap,
             GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap,
         }
     }
 }
 
+/// Enumeration of the texture samplers used across the various WebRender shaders.
+///
+/// Each variant corresponds to a uniform declared in shader source. We only bind
+/// the variants we need for a given shader, so not every variant is bound for every
+/// batch.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub(crate) enum TextureSampler {
     Color0,
     Color1,
     Color2,
-    CacheA8,
-    CacheRGBA8,
-    ResourceCache,
+    PrevPassAlpha,
+    PrevPassColor,
+    GpuCache,
     TransformPalette,
     RenderTasks,
     Dither,
-    // A special sampler that is bound to the A8 output of
-    // the *first* pass. Items rendered in this target are
-    // available as inputs to tasks in any subsequent pass.
-    SharedCacheA8,
     PrimitiveHeadersF,
     PrimitiveHeadersI,
 }
 
 impl TextureSampler {
     pub(crate) fn color(n: usize) -> TextureSampler {
         match n {
             0 => TextureSampler::Color0,
@@ -307,25 +321,24 @@ impl TextureSampler {
 }
 
 impl Into<TextureSlot> for TextureSampler {
     fn into(self) -> TextureSlot {
         match self {
             TextureSampler::Color0 => TextureSlot(0),
             TextureSampler::Color1 => TextureSlot(1),
             TextureSampler::Color2 => TextureSlot(2),
-            TextureSampler::CacheA8 => TextureSlot(3),
-            TextureSampler::CacheRGBA8 => TextureSlot(4),
-            TextureSampler::ResourceCache => TextureSlot(5),
+            TextureSampler::PrevPassAlpha => TextureSlot(3),
+            TextureSampler::PrevPassColor => TextureSlot(4),
+            TextureSampler::GpuCache => TextureSlot(5),
             TextureSampler::TransformPalette => TextureSlot(6),
             TextureSampler::RenderTasks => TextureSlot(7),
             TextureSampler::Dither => TextureSlot(8),
-            TextureSampler::SharedCacheA8 => TextureSlot(9),
-            TextureSampler::PrimitiveHeadersF => TextureSlot(10),
-            TextureSampler::PrimitiveHeadersI => TextureSlot(11),
+            TextureSampler::PrimitiveHeadersF => TextureSlot(9),
+            TextureSampler::PrimitiveHeadersI => TextureSlot(10),
         }
     }
 }
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedVertex {
     pub pos: [f32; 2],
@@ -698,107 +711,120 @@ impl GpuGlyphRenderer {
     fn new(_: &mut Device, _: &VAO, _: bool) -> Result<GpuGlyphRenderer, RendererError> {
         Ok(GpuGlyphRenderer)
     }
 }
 
 #[cfg(not(feature = "pathfinder"))]
 struct StenciledGlyphPage;
 
+/// A Texture that has been initialized by the `device` module and is ready to
+/// be used.
 struct ActiveTexture {
     texture: Texture,
     saved_index: Option<SavedTargetIndex>,
-    is_shared: bool,
 }
 
-struct SourceTextureResolver {
-    /// A vector for fast resolves of texture cache IDs to
-    /// native texture IDs. This maps to a free-list managed
-    /// by the backend thread / texture cache. We free the
-    /// texture memory associated with a TextureId when its
-    /// texture cache ID is freed by the texture cache, but
-    /// reuse the TextureId when the texture caches's free
-    /// list reuses the texture cache ID. This saves having to
-    /// use a hashmap, and allows a flat vector for performance.
-    cache_texture_map: Vec<Texture>,
+/// Helper struct for resolving device Textures for use during rendering passes.
+///
+/// Manages the mapping between the at-a-distance texture handles used by the
+/// `RenderBackend` (which does not directly interface with the GPU) and actual
+/// device texture handles.
+struct TextureResolver {
+    /// A vector for fast resolves of texture cache IDs to native texture IDs.
+    /// This maps to a free-list managed by the backend thread / texture cache.
+    /// We free the texture memory associated with a TextureId when its texture
+    /// cache ID is freed by the texture cache, but reuse the TextureId when the
+    /// texture caches's free list reuses the texture cache ID. This saves
+    /// having to use a hashmap, and allows a flat vector for performance.
+    texture_cache_map: Vec<Texture>,
 
     /// Map of external image IDs to native textures.
     external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
 
-    /// A special 1x1 dummy cache texture used for shaders that expect to work
-    /// with the cache but are actually running in the first pass
-    /// when no target is yet provided as a cache texture input.
+    /// A special 1x1 dummy texture used for shaders that expect to work with
+    /// the output of the previous pass but are actually running in the first
+    /// pass.
     dummy_cache_texture: Texture,
 
-    /// The current cache textures.
-    cache_rgba8_texture: Option<ActiveTexture>,
-    cache_a8_texture: Option<ActiveTexture>,
-
-    /// An alpha texture shared between all passes.
-    //TODO: just use the standard texture saving logic instead.
-    shared_alpha_texture: Option<Texture>,
-
-    /// Saved cache textures that are to be re-used.
-    saved_textures: Vec<Texture>,
-
-    /// General pool of render targets.
+    /// The outputs of the previous pass, if applicable.
+    prev_pass_color: Option<ActiveTexture>,
+    prev_pass_alpha: Option<ActiveTexture>,
+
+    /// Saved render targets from previous passes. This is used when a pass
+    /// needs access to the result of a pass other than the immediately-preceding
+    /// one. In this case, the `RenderTask` will get a a non-`None` `saved_index`,
+    /// which will cause the resulting render target to be persisted in this list
+    /// (at that index) until the end of the frame.
+    saved_targets: Vec<Texture>,
+
+    /// Pool of idle render target textures ready for re-use.
+    ///
+    /// Naively, it would seem like we only ever need two pairs of (color,
+    /// alpha) render targets: one for the output of the previous pass (serving
+    /// as input to the current pass), and one for the output of the current
+    /// pass. However, there are cases where the output of one pass is used as
+    /// the input to multiple future passes. For example, drop-shadows draw the
+    /// picture in pass X, then reference it in pass X+1 to create the blurred
+    /// shadow, and pass the results of both X and X+1 to pass X+2 draw the
+    /// actual content.
+    ///
+    /// See the comments in `allocate_target_texture` for more insight on why
+    /// reuse is a win.
     render_target_pool: Vec<Texture>,
 }
 
-impl SourceTextureResolver {
-    fn new(device: &mut Device) -> SourceTextureResolver {
+impl TextureResolver {
+    fn new(device: &mut Device) -> TextureResolver {
         let mut dummy_cache_texture = device
             .create_texture(TextureTarget::Array, ImageFormat::BGRA8);
         device.init_texture::<u8>(
             &mut dummy_cache_texture,
             1,
             1,
             TextureFilter::Linear,
             None,
             1,
             None,
         );
 
-        SourceTextureResolver {
-            cache_texture_map: Vec::new(),
+        TextureResolver {
+            texture_cache_map: Vec::new(),
             external_images: FastHashMap::default(),
             dummy_cache_texture,
-            cache_a8_texture: None,
-            cache_rgba8_texture: None,
-            shared_alpha_texture: None,
-            saved_textures: Vec::default(),
+            prev_pass_alpha: None,
+            prev_pass_color: None,
+            saved_targets: Vec::default(),
             render_target_pool: Vec::new(),
         }
     }
 
     fn deinit(self, device: &mut Device) {
         device.delete_texture(self.dummy_cache_texture);
 
-        for texture in self.cache_texture_map {
+        for texture in self.texture_cache_map {
             device.delete_texture(texture);
         }
 
         for texture in self.render_target_pool {
             device.delete_texture(texture);
         }
     }
 
     fn begin_frame(&mut self) {
-        assert!(self.cache_rgba8_texture.is_none());
-        assert!(self.cache_a8_texture.is_none());
-        assert!(self.saved_textures.is_empty());
+        assert!(self.prev_pass_color.is_none());
+        assert!(self.prev_pass_alpha.is_none());
+        assert!(self.saved_targets.is_empty());
     }
 
     fn end_frame(&mut self, device: &mut Device, frame_id: FrameId) {
         // return the cached targets to the pool
         self.end_pass(None, None);
-        // return the global alpha texture
-        self.render_target_pool.extend(self.shared_alpha_texture.take());
         // return the saved targets as well
-        self.render_target_pool.extend(self.saved_textures.drain(..));
+        self.render_target_pool.extend(self.saved_targets.drain(..));
 
         // GC the render target pool.
         //
         // We use a simple scheme whereby we drop any texture that hasn't been used
         // in the last 30 frames. This should generally prevent any sustained build-
         // up of unused textures, unless we don't generate frames for a long period.
         // This can happen when the window is minimized, and we probably want to
         // flush all the WebRender caches in that case [1].
@@ -825,115 +851,110 @@ impl SourceTextureResolver {
         &mut self,
         a8_texture: Option<ActiveTexture>,
         rgba8_texture: Option<ActiveTexture>,
     ) {
         // If we have cache textures from previous pass, return them to the pool.
         // Also assign the pool index of those cache textures to last pass's index because this is
         // the result of last pass.
         // Note: the order here is important, needs to match the logic in `RenderPass::build()`.
-        if let Some(at) = self.cache_rgba8_texture.take() {
-            assert!(!at.is_shared);
+        if let Some(at) = self.prev_pass_color.take() {
             if let Some(index) = at.saved_index {
-                assert_eq!(self.saved_textures.len(), index.0);
-                self.saved_textures.push(at.texture);
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
             } else {
                 self.render_target_pool.push(at.texture);
             }
         }
-        if let Some(at) = self.cache_a8_texture.take() {
+        if let Some(at) = self.prev_pass_alpha.take() {
             if let Some(index) = at.saved_index {
-                assert!(!at.is_shared);
-                assert_eq!(self.saved_textures.len(), index.0);
-                self.saved_textures.push(at.texture);
-            } else if at.is_shared {
-                assert!(self.shared_alpha_texture.is_none());
-                self.shared_alpha_texture = Some(at.texture);
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
             } else {
                 self.render_target_pool.push(at.texture);
             }
         }
 
         // We have another pass to process, make these textures available
         // as inputs to the next pass.
-        self.cache_rgba8_texture = rgba8_texture;
-        self.cache_a8_texture = a8_texture;
+        self.prev_pass_color = rgba8_texture;
+        self.prev_pass_alpha = a8_texture;
     }
 
     // Bind a source texture to the device.
-    fn bind(&self, texture_id: &SourceTexture, sampler: TextureSampler, device: &mut Device) {
+    fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) {
         match *texture_id {
-            SourceTexture::Invalid => {}
-            SourceTexture::CacheA8 => {
-                let texture = match self.cache_a8_texture {
+            TextureSource::Invalid => {}
+            TextureSource::PrevPassAlpha => {
+                let texture = match self.prev_pass_alpha {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 };
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::CacheRGBA8 => {
-                let texture = match self.cache_rgba8_texture {
+            TextureSource::PrevPassColor => {
+                let texture = match self.prev_pass_color {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 };
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::External(external_image) => {
+            TextureSource::External(external_image) => {
                 let texture = self.external_images
                     .get(&(external_image.id, external_image.channel_index))
                     .expect(&format!("BUG: External image should be resolved by now"));
                 device.bind_external_texture(sampler, texture);
             }
-            SourceTexture::TextureCache(index) => {
-                let texture = &self.cache_texture_map[index.0];
+            TextureSource::TextureCache(index) => {
+                let texture = &self.texture_cache_map[index.0];
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::RenderTaskCache(saved_index) => {
-                let texture = &self.saved_textures[saved_index.0];
+            TextureSource::RenderTaskCache(saved_index) => {
+                let texture = &self.saved_targets[saved_index.0];
                 device.bind_texture(sampler, texture)
             }
         }
     }
 
     // Get the real (OpenGL) texture ID for a given source texture.
     // For a texture cache texture, the IDs are stored in a vector
     // map for fast access.
-    fn resolve(&self, texture_id: &SourceTexture) -> Option<&Texture> {
+    fn resolve(&self, texture_id: &TextureSource) -> Option<&Texture> {
         match *texture_id {
-            SourceTexture::Invalid => None,
-            SourceTexture::CacheA8 => Some(
-                match self.cache_a8_texture {
+            TextureSource::Invalid => None,
+            TextureSource::PrevPassAlpha => Some(
+                match self.prev_pass_alpha {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 }
             ),
-            SourceTexture::CacheRGBA8 => Some(
-                match self.cache_rgba8_texture {
+            TextureSource::PrevPassColor => Some(
+                match self.prev_pass_color {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 }
             ),
-            SourceTexture::External(..) => {
+            TextureSource::External(..) => {
                 panic!("BUG: External textures cannot be resolved, they can only be bound.");
             }
-            SourceTexture::TextureCache(index) => {
-                Some(&self.cache_texture_map[index.0])
+            TextureSource::TextureCache(index) => {
+                Some(&self.texture_cache_map[index.0])
             }
-            SourceTexture::RenderTaskCache(saved_index) => {
-                Some(&self.saved_textures[saved_index.0])
+            TextureSource::RenderTaskCache(saved_index) => {
+                Some(&self.saved_targets[saved_index.0])
             }
         }
     }
 
     fn report_memory(&self) -> MemoryReport {
         let mut report = MemoryReport::default();
 
         // We're reporting GPU memory rather than heap-allocations, so we don't
         // use size_of_op.
-        for t in self.cache_texture_map.iter() {
+        for t in self.texture_cache_map.iter() {
             report.texture_cache_textures += t.size_in_bytes();
         }
         for t in self.render_target_pool.iter() {
             report.render_target_textures += t.size_in_bytes();
         }
 
         report
     }
@@ -958,19 +979,19 @@ struct CacheRow {
 }
 
 impl CacheRow {
     fn new() -> Self {
         CacheRow { is_dirty: false }
     }
 }
 
-/// The bus over which CPU and GPU versions of the cache
+/// The bus over which CPU and GPU versions of the GPU cache
 /// get synchronized.
-enum CacheBus {
+enum GpuCacheBus {
     /// PBO-based updates, currently operate on a row granularity.
     /// Therefore, are subject to fragmentation issues.
     PixelBuffer {
         /// PBO used for transfers.
         buffer: PBO,
         /// Meta-data about the cached rows.
         rows: Vec<CacheRow>,
         /// Mirrored block data on CPU.
@@ -988,65 +1009,65 @@ enum CacheBus {
         /// VBO for gpu block data.
         buf_value: VBO<GpuBlockData>,
         /// Currently stored block count.
         count: usize,
     },
 }
 
 /// The device-specific representation of the cache texture in gpu_cache.rs
-struct CacheTexture {
+struct GpuCacheTexture {
     texture: Texture,
-    bus: CacheBus,
+    bus: GpuCacheBus,
 }
 
-impl CacheTexture {
+impl GpuCacheTexture {
     fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
         let texture = device.create_texture(TextureTarget::Default, ImageFormat::RGBAF32);
 
         let bus = if use_scatter {
             let program = device
                 .create_program("gpu_cache_update", "", &desc::GPU_CACHE_UPDATE)?;
             let buf_position = device.create_vbo();
             let buf_value = device.create_vbo();
             //Note: the vertex attributes have to be supplied in the same order
             // as for program creation, but each assigned to a different stream.
             let vao = device.create_custom_vao(&[
                 buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
                 buf_value   .stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[1..2]),
             ]);
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 program,
                 vao,
                 buf_position,
                 buf_value,
                 count: 0,
             }
         } else {
             let buffer = device.create_pbo();
-            CacheBus::PixelBuffer {
+            GpuCacheBus::PixelBuffer {
                 buffer,
                 rows: Vec::new(),
                 cpu_blocks: Vec::new(),
             }
         };
 
-        Ok(CacheTexture {
+        Ok(GpuCacheTexture {
             texture,
             bus,
         })
     }
 
     fn deinit(self, device: &mut Device) {
         device.delete_texture(self.texture);
         match self.bus {
-            CacheBus::PixelBuffer { buffer, ..} => {
+            GpuCacheBus::PixelBuffer { buffer, ..} => {
                 device.delete_pbo(buffer);
             }
-            CacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
+            GpuCacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
                 device.delete_program(program);
                 device.delete_custom_vao(vao);
                 device.delete_vbo(buf_position);
                 device.delete_vbo(buf_value);
             }
         }
     }
 
@@ -1060,17 +1081,17 @@ impl CacheTexture {
         total_block_count: usize,
         max_height: u32,
     ) {
         // See if we need to create or resize the texture.
         let old_size = self.texture.get_dimensions();
         let new_size = DeviceUintSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, max_height);
 
         match self.bus {
-            CacheBus::PixelBuffer { ref mut rows, .. } => {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                 if max_height > old_size.height {
                     // Create a f32 texture that can be used for the vertex shader
                     // to fetch data from.
                     device.init_texture::<u8>(
                         &mut self.texture,
                         new_size.width,
                         new_size.height,
                         TextureFilter::Nearest,
@@ -1082,17 +1103,17 @@ impl CacheTexture {
                     // If we had to resize the texture, just mark all rows
                     // as dirty so they will be uploaded to the texture
                     // during the next flush.
                     for row in rows.iter_mut() {
                         row.is_dirty = true;
                     }
                 }
             }
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 ref mut buf_position,
                 ref mut buf_value,
                 ref mut count,
                 ..
             } => {
                 *count = 0;
                 if total_block_count > buf_value.allocated_count() {
                     device.allocate_vbo(buf_position, total_block_count, VertexUsageHint::Stream);
@@ -1117,17 +1138,17 @@ impl CacheTexture {
                     }
                 }
             }
         }
     }
 
     fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
         match self.bus {
-            CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+            GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
                 for update in &updates.updates {
                     match *update {
                         GpuCacheUpdate::Copy {
                             block_index,
                             block_count,
                             address,
                         } => {
                             let row = address.v as usize;
@@ -1150,17 +1171,17 @@ impl CacheTexture {
                             let data = &mut cpu_blocks[block_offset .. (block_offset + block_count)];
                             for i in 0 .. block_count {
                                 data[i] = updates.blocks[block_index + i];
                             }
                         }
                     }
                 }
             }
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 ref buf_position,
                 ref buf_value,
                 ref mut count,
                 ..
             } => {
                 //TODO: re-use this heap allocation
                 // Unused positions will be left as 0xFFFF, which translates to
                 // (1.0, 1.0) in the vertex output position and gets culled out
@@ -1188,17 +1209,17 @@ impl CacheTexture {
                 device.fill_vbo(buf_position, &position_data, *count);
                 *count += position_data.len();
             }
         }
     }
 
     fn flush(&mut self, device: &mut Device) -> usize {
         match self.bus {
-            CacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
+            GpuCacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
                 let rows_dirty = rows
                     .iter()
                     .filter(|row| row.is_dirty)
                     .count();
                 if rows_dirty == 0 {
                     return 0
                 }
 
@@ -1223,17 +1244,17 @@ impl CacheTexture {
 
                     uploader.upload(rect, 0, None, cpu_blocks);
 
                     row.is_dirty = false;
                 }
 
                 rows_dirty
             }
-            CacheBus::Scatter { ref program, ref vao, count, .. } => {
+            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
                 device.disable_depth();
                 device.set_blend(false);
                 device.bind_program(program);
                 device.bind_custom_vao(vao);
                 device.bind_draw_target(
                     Some((&self.texture, 0)),
                     Some(self.texture.get_dimensions()),
                 );
@@ -1392,16 +1413,19 @@ pub struct RendererVAOs {
     blur_vao: VAO,
     clip_vao: VAO,
     border_vao: VAO,
     scale_vao: VAO,
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
+///
+/// We have a separate `Renderer` instance for each instance of WebRender (generally
+/// one per OS window), and all instances share the same thread.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     debug_server: DebugServer,
     pub device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     active_documents: Vec<(DocumentId, RenderedDocument)>,
@@ -1431,27 +1455,27 @@ pub struct Renderer {
 
     pub gpu_profile: GpuProfiler<GpuProfileTag>,
     vaos: RendererVAOs,
 
     prim_header_f_texture: VertexDataTexture,
     prim_header_i_texture: VertexDataTexture,
     transforms_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
-    gpu_cache_texture: CacheTexture,
+    gpu_cache_texture: GpuCacheTexture,
     #[cfg(feature = "debug_renderer")]
     gpu_cache_debug_chunks: Vec<GpuDebugChunk>,
 
     gpu_cache_frame_id: FrameId,
     gpu_cache_overflow: bool,
 
     pipeline_info: PipelineInfo,
 
     // Manages and resolves source textures IDs to real texture IDs.
-    texture_resolver: SourceTextureResolver,
+    texture_resolver: TextureResolver,
 
     // A PBO used to do asynchronous texture cache uploads.
     texture_cache_upload_pbo: PBO,
 
     dither_matrix_texture: Option<Texture>,
 
     /// Optional trait object that allows the client
     /// application to provide external buffers for image data.
@@ -1693,24 +1717,24 @@ impl Renderer {
                                                             options.precache_shaders));
 
         let blur_vao = device.create_vao_with_new_instances(&desc::BLUR, &prim_vao);
         let clip_vao = device.create_vao_with_new_instances(&desc::CLIP, &prim_vao);
         let border_vao = device.create_vao_with_new_instances(&desc::BORDER, &prim_vao);
         let scale_vao = device.create_vao_with_new_instances(&desc::SCALE, &prim_vao);
         let texture_cache_upload_pbo = device.create_pbo();
 
-        let texture_resolver = SourceTextureResolver::new(&mut device);
+        let texture_resolver = TextureResolver::new(&mut device);
 
         let prim_header_f_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let prim_header_i_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAI32);
         let transforms_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let render_task_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
 
-        let gpu_cache_texture = CacheTexture::new(
+        let gpu_cache_texture = GpuCacheTexture::new(
             &mut device,
             options.scatter_gpu_cache_updates,
         )?;
 
         device.end_frame();
 
         let backend_notifier = notifier.clone();
 
@@ -2295,23 +2319,23 @@ impl Renderer {
             DebugCommand::LoadCapture(..) => {
                 panic!("Capture commands are not welcome here! Did you build with 'capture' feature?")
             }
             DebugCommand::ClearCaches(_)
             | DebugCommand::SimulateLongSceneBuild(_)
             | DebugCommand::SimulateLongLowPrioritySceneBuild(_) => {}
             DebugCommand::InvalidateGpuCache => {
                 match self.gpu_cache_texture.bus {
-                    CacheBus::PixelBuffer { ref mut rows, .. } => {
+                    GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                         info!("Invalidating GPU caches");
                         for row in rows {
                             row.is_dirty = true;
                         }
                     }
-                    CacheBus::Scatter { .. } => {
+                    GpuCacheBus::Scatter { .. } => {
                         warn!("Unable to invalidate scattered GPU cache");
                     }
                 }
             }
         }
     }
 
     /// Set a callback for handling external images.
@@ -2646,17 +2670,17 @@ impl Renderer {
         let deferred_update_list = self.update_deferred_resolves(&frame.deferred_resolves);
         self.pending_gpu_cache_updates.extend(deferred_update_list);
 
         self.update_gpu_cache();
 
         // Note: the texture might have changed during the `update`,
         // so we need to bind it here.
         self.device.bind_texture(
-            TextureSampler::ResourceCache,
+            TextureSampler::GpuCache,
             &self.gpu_cache_texture.texture,
         );
     }
 
     fn update_texture_cache(&mut self) {
         let _gm = self.gpu_profile.start_marker("texture cache update");
         let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
 
@@ -2667,23 +2691,23 @@ impl Renderer {
                         width,
                         height,
                         layer_count,
                         format,
                         filter,
                         render_target,
                     } => {
                         let CacheTextureId(cache_texture_index) = update.id;
-                        if self.texture_resolver.cache_texture_map.len() == cache_texture_index {
+                        if self.texture_resolver.texture_cache_map.len() == cache_texture_index {
                             // Create a new native texture, as requested by the texture cache.
                             let texture = self.device.create_texture(TextureTarget::Array, format);
-                            self.texture_resolver.cache_texture_map.push(texture);
+                            self.texture_resolver.texture_cache_map.push(texture);
                         }
                         let texture =
-                            &mut self.texture_resolver.cache_texture_map[cache_texture_index];
+                            &mut self.texture_resolver.texture_cache_map[cache_texture_index];
                         assert_eq!(texture.get_format(), format);
 
                         // Ensure no PBO is bound when creating the texture storage,
                         // or GL will attempt to read data from there.
                         self.device.init_texture::<u8>(
                             texture,
                             width,
                             height,
@@ -2695,17 +2719,17 @@ impl Renderer {
                     }
                     TextureUpdateOp::Update {
                         rect,
                         source,
                         stride,
                         layer_index,
                         offset,
                     } => {
-                        let texture = &self.texture_resolver.cache_texture_map[update.id.0];
+                        let texture = &self.texture_resolver.texture_cache_map[update.id.0];
                         let mut uploader = self.device.upload_texture(
                             texture,
                             &self.texture_cache_upload_pbo,
                             0,
                         );
 
                         let bytes_uploaded = match source {
                             TextureUpdateSource::Bytes { data } => {
@@ -2743,17 +2767,17 @@ impl Renderer {
                                 handler.unlock(id, channel_index);
                                 size
                             }
                         };
 
                         self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10);
                     }
                     TextureUpdateOp::Free => {
-                        let texture = &mut self.texture_resolver.cache_texture_map[update.id.0];
+                        let texture = &mut self.texture_resolver.texture_cache_map[update.id.0];
                         self.device.free_texture_storage(texture);
                     }
                 }
             }
         }
     }
 
     pub(crate) fn draw_instanced_batch<T>(
@@ -2826,17 +2850,17 @@ impl Renderer {
         backdrop: &RenderTask,
         readback: &RenderTask,
     ) {
         if scissor_rect.is_some() {
             self.device.disable_scissor();
         }
 
         let cache_texture = self.texture_resolver
-            .resolve(&SourceTexture::CacheRGBA8)
+            .resolve(&TextureSource::PrevPassColor)
             .unwrap();
 
         // Before submitting the composite batch, do the
         // framebuffer readbacks that are needed for each
         // composite operation in this batch.
         let (readback_rect, readback_layer) = readback.get_target_rect();
         let (backdrop_rect, _) = backdrop.get_target_rect();
         let backdrop_screen_origin = match backdrop.kind {
@@ -2904,17 +2928,17 @@ impl Renderer {
                     self.device.bind_read_target(Some((src_texture, layer)));
                     source_rect
                 }
                 BlitJobSource::RenderTask(task_id) => {
                     // A blit from the child render task into this target.
                     // TODO(gw): Support R8 format here once we start
                     //           creating mips for alpha masks.
                     let src_texture = self.texture_resolver
-                        .resolve(&SourceTexture::CacheRGBA8)
+                        .resolve(&TextureSource::PrevPassColor)
                         .expect("BUG: invalid source texture");
                     let source = &render_tasks[task_id];
                     let (source_rect, layer) = source.get_target_rect();
                     self.device.bind_read_target(Some((src_texture, layer.0 as i32)));
                     source_rect
                 }
             };
             debug_assert_eq!(source_rect.size, blit.target_rect.size);
@@ -2923,31 +2947,31 @@ impl Renderer {
                 blit.target_rect,
             );
         }
     }
 
     fn handle_scaling(
         &mut self,
         scalings: &[ScalingInstance],
-        source: SourceTexture,
+        source: TextureSource,
         projection: &Transform3D<f32>,
         stats: &mut RendererStats,
     ) {
         if scalings.is_empty() {
             return
         }
 
         match source {
-            SourceTexture::CacheRGBA8 => {
+            TextureSource::PrevPassColor => {
                 self.shaders.cs_scale_rgba8.bind(&mut self.device,
                                                  &projection,
                                                  &mut self.renderer_errors);
             }
-            SourceTexture::CacheA8 => {
+            TextureSource::PrevPassAlpha => {
                 self.shaders.cs_scale_a8.bind(&mut self.device,
                                               &projection,
                                               &mut self.renderer_errors);
             }
             _ => unreachable!(),
         }
 
         self.draw_instanced_batch(
@@ -3059,17 +3083,17 @@ impl Renderer {
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
         }
 
-        self.handle_scaling(&target.scalings, SourceTexture::CacheRGBA8, projection, stats);
+        self.handle_scaling(&target.scalings, TextureSource::PrevPassColor, projection, stats);
 
         //TODO: record the pixel count for cached primitives
 
         if target.needs_depth() {
             let _gl = self.gpu_profile.start_marker("opaque batches");
             let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
             self.set_blend(false, framebuffer_kind);
             //Note: depth equality is needed for split planes
@@ -3351,17 +3375,17 @@ impl Renderer {
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
         }
 
-        self.handle_scaling(&target.scalings, SourceTexture::CacheA8, projection, stats);
+        self.handle_scaling(&target.scalings, TextureSource::PrevPassAlpha, projection, stats);
 
         // Draw the clip items into the tiled alpha mask.
         {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);
 
             // switch to multiplicative blending
             self.set_blend(true, FramebufferKind::Other);
             self.set_blend_mode_multiply(FramebufferKind::Other);
@@ -3382,18 +3406,18 @@ impl Renderer {
                 );
             }
             // draw box-shadow clips
             for (mask_texture_id, items) in target.clip_batcher.box_shadows.iter() {
                 let _gm2 = self.gpu_profile.start_marker("box-shadows");
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
-                        SourceTexture::Invalid,
-                        SourceTexture::Invalid,
+                        TextureSource::Invalid,
+                        TextureSource::Invalid,
                     ],
                 };
                 self.shaders.cs_clip_box_shadow
                     .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     items,
                     VertexArrayKind::Clip,
                     &textures,
@@ -3418,18 +3442,18 @@ impl Renderer {
             }
 
             // draw image masks
             for (mask_texture_id, items) in target.clip_batcher.images.iter() {
                 let _gm2 = self.gpu_profile.start_marker("clip images");
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
-                        SourceTexture::Invalid,
-                        SourceTexture::Invalid,
+                        TextureSource::Invalid,
+                        TextureSource::Invalid,
                     ],
                 };
                 self.shaders.cs_clip_image
                     .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     items,
                     VertexArrayKind::Clip,
                     &textures,
@@ -3438,25 +3462,26 @@ impl Renderer {
             }
         }
 
         self.gpu_profile.finish_sampler(alpha_sampler);
     }
 
     fn draw_texture_cache_target(
         &mut self,
-        texture: &SourceTexture,
+        texture: &CacheTextureId,
         layer: i32,
         target: &TextureCacheRenderTarget,
         render_tasks: &RenderTaskTree,
         stats: &mut RendererStats,
     ) {
+        let texture_source = TextureSource::TextureCache(*texture);
         let (target_size, projection) = {
             let texture = self.texture_resolver
-                .resolve(texture)
+                .resolve(&texture_source)
                 .expect("BUG: invalid target texture");
             let target_size = texture.get_dimensions();
             let projection = Transform3D::ortho(
                 0.0,
                 target_size.width as f32,
                 0.0,
                 target_size.height as f32,
                 ORTHO_NEAR_PLANE,
@@ -3470,17 +3495,17 @@ impl Renderer {
 
         self.set_blend(false, FramebufferKind::Other);
 
         // Handle any Pathfinder glyphs.
         let stencil_page = self.stencil_glyphs(&target.glyphs, &projection, &target_size, stats);
 
         {
             let texture = self.texture_resolver
-                .resolve(texture)
+                .resolve(&texture_source)
                 .expect("BUG: invalid target texture");
             self.device
                 .bind_draw_target(Some((texture, layer)), Some(target_size));
         }
 
         self.device.disable_depth();
         self.device.disable_depth_write();
         self.set_blend(false, FramebufferKind::Other);
@@ -3655,16 +3680,28 @@ impl Renderer {
                 .expect("Found external image, but no handler set!");
 
             for (ext_data, _) in self.texture_resolver.external_images.drain() {
                 handler.unlock(ext_data.0, ext_data.1);
             }
         }
     }
 
+    /// Allocates a texture to be used as the output for a rendering pass.
+    ///
+    /// We make an effort to reuse render targe textures across passes and
+    /// across frames. Reusing a texture with the same dimensions (width,
+    /// height, and layer-count) and format is obviously ideal. Reusing a
+    /// texture with different dimensions but the same format can be faster
+    /// than allocating a new texture, since it basically boils down to
+    /// a realloc in GPU memory, which can be very cheap if the existing
+    /// region can be resized. However, some drivers/GPUs require textures
+    /// with different formats to be allocated in different arenas,
+    /// reinitializing with a different format can force a large copy. As
+    /// such, we just allocate a new texture in that case.
     fn allocate_target_texture<T: RenderTarget>(
         &mut self,
         list: &mut RenderTargetList<T>,
         counters: &mut FrameProfileCounters,
         frame_id: FrameId,
     ) -> Option<ActiveTexture> {
         debug_assert_ne!(list.max_size, DeviceUintSize::zero());
         if list.targets.is_empty() {
@@ -3720,17 +3757,16 @@ impl Renderer {
             list.targets.len() as _,
             None,
         );
 
         list.check_ready(&texture);
         Some(ActiveTexture {
             texture,
             saved_index: list.saved_index.clone(),
-            is_shared: list.is_shared,
         })
     }
 
     fn bind_frame_data(&mut self, frame: &mut Frame) {
         let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
         self.device.set_device_pixel_ratio(frame.device_pixel_ratio);
 
         self.prim_header_f_texture.update(
@@ -3762,18 +3798,18 @@ impl Renderer {
 
         self.render_task_texture
             .update(&mut self.device, &mut frame.render_tasks.task_data);
         self.device.bind_texture(
             TextureSampler::RenderTasks,
             &self.render_task_texture.texture,
         );
 
-        debug_assert!(self.texture_resolver.cache_a8_texture.is_none());
-        debug_assert!(self.texture_resolver.cache_rgba8_texture.is_none());
+        debug_assert!(self.texture_resolver.prev_pass_alpha.is_none());
+        debug_assert!(self.texture_resolver.prev_pass_color.is_none());
     }
 
     fn draw_tile_frame(
         &mut self,
         frame: &mut Frame,
         framebuffer_size: Option<DeviceUintSize>,
         framebuffer_depth_is_ready: bool,
         frame_id: FrameId,
@@ -3792,23 +3828,23 @@ impl Renderer {
 
         self.bind_frame_data(frame);
         self.texture_resolver.begin_frame();
 
         for (pass_index, pass) in frame.passes.iter_mut().enumerate() {
             self.gpu_profile.place_marker(&format!("pass {}", pass_index));
 
             self.texture_resolver.bind(
-                &SourceTexture::CacheA8,
-                TextureSampler::CacheA8,
+                &TextureSource::PrevPassAlpha,
+                TextureSampler::PrevPassAlpha,
                 &mut self.device,
             );
             self.texture_resolver.bind(
-                &SourceTexture::CacheRGBA8,
-                TextureSampler::CacheRGBA8,
+                &TextureSource::PrevPassColor,
+                TextureSampler::PrevPassColor,
                 &mut self.device,
             );
 
             let (cur_alpha, cur_color) = match pass.kind {
                 RenderPassKind::MainFramebuffer(ref target) => {
                     if let Some(framebuffer_size) = framebuffer_size {
                         stats.color_target_count += 1;
 
@@ -3904,22 +3940,16 @@ impl Renderer {
                             stats,
                         );
                     }
 
                     (alpha_tex, color_tex)
                 }
             };
 
-            //Note: the `end_pass` will make sure this texture is not recycled this frame
-            if let Some(ActiveTexture { ref texture, is_shared: true, .. }) = cur_alpha {
-                self.device
-                    .bind_texture(TextureSampler::SharedCacheA8, texture);
-            }
-
             self.texture_resolver.end_pass(
                 cur_alpha,
                 cur_color,
             );
         }
 
         self.texture_resolver.end_frame(&mut self.device, frame_id);
 
@@ -4034,29 +4064,29 @@ impl Renderer {
         if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
             return;
         }
 
         let mut spacing = 16;
         let mut size = 512;
         let fb_width = framebuffer_size.width as i32;
         let num_layers: i32 = self.texture_resolver
-            .cache_texture_map
+            .texture_cache_map
             .iter()
             .map(|texture| texture.get_layer_count())
             .sum();
 
         if num_layers * (size + spacing) > fb_width {
             let factor = fb_width as f32 / (num_layers * (size + spacing)) as f32;
             size = (size as f32 * factor) as i32;
             spacing = (spacing as f32 * factor) as i32;
         }
 
         let mut i = 0;
-        for texture in &self.texture_resolver.cache_texture_map {
+        for texture in &self.texture_resolver.texture_cache_map {
             let y = spacing + if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
                 528
             } else {
                 0
             };
             let dimensions = texture.get_dimensions();
             let src_rect = DeviceIntRect::new(
                 DeviceIntPoint::zero(),
@@ -4225,17 +4255,17 @@ impl Renderer {
         unsafe { op(ptr as *const c_void) }
     }
 
     /// Collects a memory report.
     pub fn report_memory(&self) -> MemoryReport {
         let mut report = MemoryReport::default();
 
         // GPU cache CPU memory.
-        if let CacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
+        if let GpuCacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
             report.gpu_cache_cpu_mirror += self.size_of(cpu_blocks.as_ptr());
         }
 
         // GPU cache GPU memory.
         report.gpu_cache_textures += self.gpu_cache_texture.texture.size_in_bytes();
 
         // Render task CPU memory.
         for (_id, doc) in &self.active_documents {
@@ -4735,17 +4765,17 @@ impl Renderer {
                     "gpu", &config.root, &mut self.device,
                 ),
                 gpu_cache_frame_id: self.gpu_cache_frame_id,
                 textures: Vec::new(),
                 external_images: deferred_images,
             };
 
             info!("saving cached textures");
-            for texture in &self.texture_resolver.cache_texture_map {
+            for texture in &self.texture_resolver.texture_cache_map {
                 let file_name = format!("cache-{}", plain_self.textures.len() + 1);
                 info!("\t{}", file_name);
                 let plain = Self::save_texture(texture, &file_name, &config.root, &mut self.device);
                 plain_self.textures.push(plain);
             }
 
             config.serialize(&plain_self, "renderer");
         }
@@ -4789,49 +4819,49 @@ impl Renderer {
             let value = (CapturedExternalImageData::Buffer(data), plain_ext.uv);
             image_handler.data.insert((ext.id, ext.channel_index), value);
         }
 
         if let Some(renderer) = CaptureConfig::deserialize::<PlainRenderer, _>(&root, "renderer") {
             info!("loading cached textures");
             self.device.begin_frame();
 
-            for texture in self.texture_resolver.cache_texture_map.drain(..) {
+            for texture in self.texture_resolver.texture_cache_map.drain(..) {
                 self.device.delete_texture(texture);
             }
             for texture in renderer.textures {
                 info!("\t{}", texture.data);
                 let mut t = self.device.create_texture(TextureTarget::Array, texture.format);
                 Self::load_texture(&mut t, &texture, &root, &mut self.device);
-                self.texture_resolver.cache_texture_map.push(t);
+                self.texture_resolver.texture_cache_map.push(t);
             }
 
             info!("loading gpu cache");
             let gpu_cache_data = Self::load_texture(
                 &mut self.gpu_cache_texture.texture,
                 &renderer.gpu_cache,
                 &root,
                 &mut self.device,
             );
             match self.gpu_cache_texture.bus {
-                CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+                GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
                     let dim = self.gpu_cache_texture.texture.get_dimensions();
                     let blocks = unsafe {
                         slice::from_raw_parts(
                             gpu_cache_data.as_ptr() as *const GpuBlockData,
                             gpu_cache_data.len() / mem::size_of::<GpuBlockData>(),
                         )
                     };
                     // fill up the CPU cache from the contents we just loaded
                     rows.clear();
                     cpu_blocks.clear();
                     rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
                     cpu_blocks.extend_from_slice(blocks);
                 }
-                CacheBus::Scatter { .. } => {}
+                GpuCacheBus::Scatter { .. } => {}
             }
             self.gpu_cache_frame_id = renderer.gpu_cache_frame_id;
 
             info!("loading external texture-backed images");
             let mut native_map = FastHashMap::<String, gl::GLuint>::default();
             for ExternalCaptureImage { short_path, external, descriptor } in renderer.external_images {
                 let target = match external.image_type {
                     ExternalImageType::TextureHandle(target) => target,
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -23,17 +23,17 @@ use device::TextureFilter;
 use euclid::{point2, size2};
 use glyph_cache::GlyphCache;
 #[cfg(not(feature = "pathfinder"))]
 use glyph_cache::GlyphCacheEntry;
 use glyph_rasterizer::{FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::UvRectKind;
 use image::{compute_tile_range, for_each_tile_in_range};
-use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
+use internal_types::{FastHashMap, FastHashSet, TextureSource, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTaskCache, RenderTaskCacheKey, RenderTaskId};
 use render_task::{RenderTaskCacheEntry, RenderTaskCacheEntryHandle, RenderTaskTree};
 use smallvec::SmallVec;
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::collections::hash_map::IterMut;
 use std::{cmp, mem};
@@ -62,26 +62,26 @@ pub struct GlyphFetchResult {
 // for this is that the texture may change
 // dimensions (e.g. the pages in a texture
 // atlas can grow). When this happens, by
 // storing the coordinates as texel values
 // we don't need to go through and update
 // various CPU-side structures.
 #[derive(Debug, Clone)]
 pub struct CacheItem {
-    pub texture_id: SourceTexture,
+    pub texture_id: TextureSource,
     pub uv_rect_handle: GpuCacheHandle,
     pub uv_rect: DeviceUintRect,
     pub texture_layer: i32,
 }
 
 impl CacheItem {
     pub fn invalid() -> Self {
         CacheItem {
-            texture_id: SourceTexture::Invalid,
+            texture_id: TextureSource::Invalid,
             uv_rect_handle: GpuCacheHandle::new(),
             uv_rect: DeviceUintRect::zero(),
             texture_layer: 0,
         }
     }
 }
 
 #[derive(Debug)]
@@ -1227,23 +1227,23 @@ impl ResourceCache {
     pub fn fetch_glyphs<F>(
         &self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         fetch_buffer: &mut Vec<GlyphFetchResult>,
         gpu_cache: &mut GpuCache,
         mut f: F,
     ) where
-        F: FnMut(SourceTexture, GlyphFormat, &[GlyphFetchResult]),
+        F: FnMut(TextureSource, GlyphFormat, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
 
         self.glyph_rasterizer.prepare_font(&mut font);
 
-        let mut current_texture_id = SourceTexture::Invalid;
+        let mut current_texture_id = TextureSource::Invalid;
         let mut current_glyph_format = GlyphFormat::Subpixel;
         debug_assert!(fetch_buffer.is_empty());
 
         for (loop_index, key) in glyph_keys.iter().enumerate() {
            let (cache_item, glyph_format) =
                 match self.glyph_rasterizer.get_cache_item_for_glyph(key,
                                                                      &font,
                                                                      &self.cached_glyphs,
@@ -1277,24 +1277,24 @@ impl ResourceCache {
     pub fn fetch_glyphs<F>(
         &self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         fetch_buffer: &mut Vec<GlyphFetchResult>,
         gpu_cache: &mut GpuCache,
         mut f: F,
     ) where
-        F: FnMut(SourceTexture, GlyphFormat, &[GlyphFetchResult]),
+        F: FnMut(TextureSource, GlyphFormat, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
 
         self.glyph_rasterizer.prepare_font(&mut font);
         let glyph_key_cache = self.cached_glyphs.get_glyph_key_cache_for_font(&font);
 
-        let mut current_texture_id = SourceTexture::Invalid;
+        let mut current_texture_id = TextureSource::Invalid;
         let mut current_glyph_format = GlyphFormat::Subpixel;
         debug_assert!(fetch_buffer.is_empty());
 
         for (loop_index, key) in glyph_keys.iter().enumerate() {
             let (cache_item, glyph_format) = match *glyph_key_cache.get(key) {
                 GlyphCacheEntry::Cached(ref glyph) => {
                     (self.texture_cache.get(&glyph.texture_cache_handle), glyph.format)
                 }
@@ -1621,16 +1621,20 @@ impl ResourceCache {
     pub fn clear_namespace(&mut self, namespace: IdNamespace) {
         self.resources
             .image_templates
             .images
             .retain(|key, _| key.0 != namespace);
         self.cached_images
             .clear_keys(|key| key.0 == namespace);
 
+        self.blob_image_templates.retain(|key, _| key.0 != namespace);
+
+        self.rasterized_blob_images.retain(|key, _| key.0 != namespace);
+
         self.resources.font_instances
             .write()
             .unwrap()
             .retain(|key, _| key.0 != namespace);
         for &key in self.resources.font_templates.keys().filter(|key| key.0 == namespace) {
             self.glyph_rasterizer.delete_font(key);
         }
         self.resources
--- a/gfx/webrender/src/scene_builder.rs
+++ b/gfx/webrender/src/scene_builder.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AsyncBlobImageRasterizer, BlobImageRequest, BlobImageParams, BlobImageResult};
 use api::{DocumentId, PipelineId, ApiMsg, FrameMsg, ResourceUpdate, ExternalEvent, Epoch};
-use api::{BuiltDisplayList, ColorF, LayoutSize, NotificationRequest, Checkpoint};
+use api::{BuiltDisplayList, ColorF, LayoutSize, NotificationRequest, Checkpoint, IdNamespace};
 use api::channel::MsgSender;
 #[cfg(feature = "capture")]
 use capture::CaptureConfig;
 use frame_builder::{FrameBuilderConfig, FrameBuilder};
 use clip::{ClipDataInterner, ClipDataUpdateList};
 use clip_scroll_tree::ClipScrollTree;
 use display_list_flattener::DisplayListFlattener;
 use internal_types::{FastHashMap, FastHashSet};
@@ -117,31 +117,33 @@ pub struct BuiltScene {
 
 // Message from render backend to scene builder.
 pub enum SceneBuilderRequest {
     Transaction(Box<Transaction>),
     ExternalEvent(ExternalEvent),
     DeleteDocument(DocumentId),
     WakeUp,
     Flush(MsgSender<()>),
+    ClearNamespace(IdNamespace),
     SetFrameBuilderConfig(FrameBuilderConfig),
     SimulateLongSceneBuild(u32),
     SimulateLongLowPrioritySceneBuild(u32),
     Stop,
     #[cfg(feature = "capture")]
     SaveScene(CaptureConfig),
     #[cfg(feature = "replay")]
     LoadScenes(Vec<LoadScene>),
 }
 
 // Message from scene builder to render backend.
 pub enum SceneBuilderResult {
     Transaction(Box<BuiltTransaction>, Option<Sender<SceneSwapResult>>),
     ExternalEvent(ExternalEvent),
     FlushComplete(MsgSender<()>),
+    ClearNamespace(IdNamespace),
     Stopped,
 }
 
 // Message from render backend to scene builder to indicate the
 // scene swap was completed. We need a separate channel for this
 // so that they don't get mixed with SceneBuilderRequest messages.
 pub enum SceneSwapResult {
     Complete(Sender<()>),
@@ -216,50 +218,61 @@ impl SceneBuilder {
                 picture_id_generator: PictureIdGenerator::new(),
                 simulate_slow_ms: 0,
             },
             in_tx,
             out_rx,
         )
     }
 
+    /// Send a message to the render backend thread.
+    ///
+    /// We first put something in the result queue and then send a wake-up
+    /// message to the api queue that the render backend is blocking on.
+    pub fn send(&self, msg: SceneBuilderResult) {
+        self.tx.send(msg).unwrap();
+        let _ = self.api_tx.send(ApiMsg::WakeUp);
+    }
+
     /// The scene builder thread's event loop.
     pub fn run(&mut self) {
         if let Some(ref hooks) = self.hooks {
             hooks.register();
         }
 
         loop {
             match self.rx.recv() {
                 Ok(SceneBuilderRequest::WakeUp) => {}
                 Ok(SceneBuilderRequest::Flush(tx)) => {
-                    self.tx.send(SceneBuilderResult::FlushComplete(tx)).unwrap();
-                    let _ = self.api_tx.send(ApiMsg::WakeUp);
+                    self.send(SceneBuilderResult::FlushComplete(tx));
                 }
                 Ok(SceneBuilderRequest::Transaction(mut txn)) => {
                     let built_txn = self.process_transaction(&mut txn);
                     self.forward_built_transaction(built_txn);
                 }
                 Ok(SceneBuilderRequest::DeleteDocument(document_id)) => {
                     self.documents.remove(&document_id);
                 }
                 Ok(SceneBuilderRequest::SetFrameBuilderConfig(cfg)) => {
                     self.config = cfg;
                 }
+                Ok(SceneBuilderRequest::ClearNamespace(id)) => {
+                    self.documents.retain(|doc_id, _doc| doc_id.0 != id);
+                    self.send(SceneBuilderResult::ClearNamespace(id));
+                }
                 #[cfg(feature = "replay")]
                 Ok(SceneBuilderRequest::LoadScenes(msg)) => {
                     self.load_scenes(msg);
                 }
                 #[cfg(feature = "capture")]
                 Ok(SceneBuilderRequest::SaveScene(config)) => {
                     self.save_scene(config);
                 }
                 Ok(SceneBuilderRequest::ExternalEvent(evt)) => {
-                    self.tx.send(SceneBuilderResult::ExternalEvent(evt)).unwrap();
-                    self.api_tx.send(ApiMsg::WakeUp).unwrap();
+                    self.send(SceneBuilderResult::ExternalEvent(evt));
                 }
                 Ok(SceneBuilderRequest::Stop) => {
                     self.tx.send(SceneBuilderResult::Stopped).unwrap();
                     // We don't need to send a WakeUp to api_tx because we only
                     // get the Stop when the RenderBackend loop is exiting.
                     break;
                 }
                 Ok(SceneBuilderRequest::SimulateLongSceneBuild(time_ms)) => {
--- a/gfx/webrender/src/shade.rs
+++ b/gfx/webrender/src/shade.rs
@@ -377,51 +377,48 @@ fn create_prim_shader(
     if let Ok(ref program) = program {
         device.bind_shader_samplers(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
                 ("sColor1", TextureSampler::Color1),
                 ("sColor2", TextureSampler::Color2),
                 ("sDither", TextureSampler::Dither),
-                ("sCacheA8", TextureSampler::CacheA8),
-                ("sCacheRGBA8", TextureSampler::CacheRGBA8),
+                ("sPrevPassAlpha", TextureSampler::PrevPassAlpha),
+                ("sPrevPassColor", TextureSampler::PrevPassColor),
                 ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sGpuCache", TextureSampler::GpuCache),
                 ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
                 ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
         );
     }
 
     program
 }
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
     let prefix = format!(
-        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}U\n
-        #define WR_FEATURE_TRANSFORM\n",
+        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}U\n",
         MAX_VERTEX_TEXTURE_WIDTH
     );
 
     debug!("ClipShader {}", name);
 
     let program = device.create_program(name, &prefix, &desc::CLIP);
 
     if let Ok(ref program) = program {
         device.bind_shader_samplers(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
                 ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sGpuCache", TextureSampler::GpuCache),
                 ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
                 ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
         );
     }
 
     program
 }
--- a/gfx/webrender/src/spatial_node.rs
+++ b/gfx/webrender/src/spatial_node.rs
@@ -251,22 +251,30 @@ impl SpatialNode {
             SpatialNodeType::ReferenceFrame(ref mut info) => {
                 // Resolve the transform against any property bindings.
                 let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
                 info.resolved_transform =
                     LayoutFastTransform::with_vector(info.origin_in_parent_reference_frame)
                     .pre_mul(&source_transform.into())
                     .pre_mul(&info.source_perspective);
 
-                // The transformation for this viewport in world coordinates is the transformation for
-                // our parent reference frame, plus any accumulated scrolling offsets from nodes
-                // between our reference frame and this node. Finally, we also include
-                // whatever local transformation this reference frame provides.
+                // In order to compute a transformation to world coordinates, we need to apply the
+                // following transforms in order:
+                //   state.parent_accumulated_scroll_offset
+                //   info.source_perspective
+                //   info.source_transform
+                //   info.origin_in_parent_reference_frame
+                //   state.parent_reference_frame_transform
+                // The first one incorporates the scrolling effect of any scrollframes/sticky nodes
+                // between this reference frame and the parent reference frame. The middle three
+                // transforms (which are combined into info.resolved_transform) do the conversion
+                // into the parent reference frame's coordinate space, and then the last one
+                // applies the parent reference frame's transform to the world space.
                 let relative_transform = info.resolved_transform
-                    .post_translate(state.parent_accumulated_scroll_offset)
+                    .pre_translate(&state.parent_accumulated_scroll_offset)
                     .to_transform()
                     .with_destination::<LayoutPixel>();
                 self.world_viewport_transform =
                     state.parent_reference_frame_transform.pre_mul(&relative_transform.into());
                 self.world_content_transform = self.world_viewport_transform;
 
                 info.invertible = self.world_viewport_transform.is_invertible();
                 if !info.invertible {
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -5,17 +5,17 @@
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageType, ImageData, ImageFormat};
 use api::ImageDescriptor;
 use device::TextureFilter;
 use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle};
 use gpu_types::{ImageSource, UvRectKind};
 use internal_types::{CacheTextureId, FastHashMap, TextureUpdateList, TextureUpdateSource};
-use internal_types::{RenderTargetInfo, SourceTexture, TextureUpdate, TextureUpdateOp};
+use internal_types::{RenderTargetInfo, TextureSource, TextureUpdate, TextureUpdateOp};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use resource_cache::CacheItem;
 use std::cell::Cell;
 use std::cmp;
 use std::mem;
 use std::rc::Rc;
 
@@ -570,31 +570,31 @@ impl TextureCache {
                     EntryKind::Cache {
                         layer_index,
                         origin,
                         ..
                     } => (layer_index, origin),
                 };
                 CacheItem {
                     uv_rect_handle: entry.uv_rect_handle,
-                    texture_id: SourceTexture::TextureCache(entry.texture_id),
+                    texture_id: TextureSource::TextureCache(entry.texture_id),
                     uv_rect: DeviceUintRect::new(origin, entry.size),
                     texture_layer: layer_index as i32,
                 }
             }
             None => panic!("BUG: handle not requested earlier in frame"),
         }
     }
 
     // A more detailed version of get(). This allows access to the actual
     // device rect of the cache allocation.
     pub fn get_cache_location(
         &self,
         handle: &TextureCacheHandle,
-    ) -> (SourceTexture, i32, DeviceUintRect) {
+    ) -> (CacheTextureId, i32, DeviceUintRect) {
         let handle = handle
             .entry
             .as_ref()
             .expect("BUG: handle not requested earlier in frame");
 
         let entry = self.entries
             .get_opt(handle)
             .expect("BUG: was dropped from cache or not updated!");
@@ -604,17 +604,17 @@ impl TextureCache {
                 (0, DeviceUintPoint::zero())
             }
             EntryKind::Cache {
                 layer_index,
                 origin,
                 ..
             } => (layer_index, origin),
         };
-        (SourceTexture::TextureCache(entry.texture_id),
+        (entry.texture_id,
          layer_index as i32,
          DeviceUintRect::new(origin, entry.size))
     }
 
     pub fn mark_unused(&mut self, handle: &TextureCacheHandle) {
         if let Some(ref handle) = handle.entry {
             if let Some(entry) = self.entries.get_opt_mut(handle) {
                 // Set a very low last accessed frame to make it very likely that this entry
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -9,17 +9,17 @@ use batch::{AlphaBatchBuilder, AlphaBatc
 use clip::ClipStore;
 use clip_scroll_tree::{ClipScrollTree};
 use device::{FrameId, Texture};
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use gpu_cache::{GpuCache};
 use gpu_types::{BorderInstance, BlurDirection, BlurInstance, PrimitiveHeaders, ScalingInstance};
 use gpu_types::{TransformData, TransformPalette};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{CacheTextureId, FastHashMap, SavedTargetIndex, TextureSource};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use prim_store::{PrimitiveStore, DeferredResolve};
 use profiler::FrameProfileCounters;
 use render_backend::FrameResources;
 use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree, ScalingTask};
 use resource_cache::ResourceCache;
@@ -27,16 +27,17 @@ use std::{cmp, usize, f32, i32, mem};
 use texture_allocator::GuillotineAllocator;
 #[cfg(feature = "pathfinder")]
 use webrender_api::{DevicePixel, FontRenderMode};
 
 const MIN_TARGET_SIZE: u32 = 2048;
 const STYLE_SOLID: i32 = ((BorderStyle::Solid as i32) << 8) | ((BorderStyle::Solid as i32) << 16);
 const STYLE_MASK: i32 = 0x00FF_FF00;
 
+/// Identifies a given `RenderTarget` in a `RenderTargetList`.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTargetIndex(pub usize);
 
 pub struct RenderTargetContext<'a, 'rc> {
     pub device_pixel_scale: DevicePixelScale,
     pub prim_store: &'a PrimitiveStore,
@@ -81,84 +82,134 @@ impl TextureAllocator {
             let rect = DeviceIntRect::new(origin, size);
             self.used_rect = rect.union(&self.used_rect);
         }
 
         origin
     }
 }
 
+/// Represents a number of rendering operations on a surface.
+///
+/// In graphics parlance, a "render target" usually means "a surface (texture or
+/// framebuffer) bound to the output of a shader". This trait has a slightly
+/// different meaning, in that it represents the operations on that surface
+/// _before_ it's actually bound and rendered. So a `RenderTarget` is built by
+/// the `RenderBackend` by inserting tasks, and then shipped over to the
+/// `Renderer` where a device surface is resolved and the tasks are transformed
+/// into draw commands on that surface.
+///
+/// We express this as a trait to generalize over color and alpha surfaces.
+/// a given `RenderTask` will draw to one or the other, depending on its type
+/// and sometimes on its parameters. See `RenderTask::target_kind`.
 pub trait RenderTarget {
+    /// Creates a new RenderTarget of the given type.
     fn new(
         size: Option<DeviceUintSize>,
         screen_size: DeviceIntSize,
     ) -> Self;
+
+    /// Allocates a region of the given size in this target, and returns either
+    /// the offset of that region or `None` if it won't fit.
+    ///
+    /// If a non-`None` result is returned, that value is generally stored in
+    /// a task which is then added to this target via `add_task()`.
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint>;
+
+    /// Optional hook to provide additional processing for the target at the
+    /// end of the build phase.
     fn build(
         &mut self,
         _ctx: &mut RenderTargetContext,
         _gpu_cache: &mut GpuCache,
         _render_tasks: &mut RenderTaskTree,
         _deferred_resolves: &mut Vec<DeferredResolve>,
         _prim_headers: &mut PrimitiveHeaders,
         _transforms: &mut TransformPalette,
     ) {
     }
-    // TODO(gw): It's a bit odd that we need the deferred resolves and mutable
-    //           GPU cache here. They are typically used by the build step
-    //           above. They are used for the blit jobs to allow resolve_image
-    //           to be called. It's a bit of extra overhead to store the image
-    //           key here and the resolve them in the build step separately.
-    //           BUT: if/when we add more texture cache target jobs, we might
-    //           want to tidy this up.
+
+    /// Associates a `RenderTask` with this target. That task must be assigned
+    /// to a region returned by invoking `allocate()` on this target.
+    ///
+    /// TODO(gw): It's a bit odd that we need the deferred resolves and mutable
+    /// GPU cache here. They are typically used by the build step above. They
+    /// are used for the blit jobs to allow resolve_image to be called. It's a
+    /// bit of extra overhead to store the image key here and the resolve them
+    /// in the build step separately.  BUT: if/when we add more texture cache
+    /// target jobs, we might want to tidy this up.
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
         deferred_resolves: &mut Vec<DeferredResolve>,
     );
     fn used_rect(&self) -> DeviceIntRect;
     fn needs_depth(&self) -> bool;
 }
 
+/// A tag used to identify the output format of a `RenderTarget`.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTargetKind {
-    Color, // RGBA32
+    Color, // RGBA8
     Alpha, // R8
 }
 
+/// A series of `RenderTarget` instances, serving as the high-level container
+/// into which `RenderTasks` are assigned.
+///
+/// During the build phase, we iterate over the tasks in each `RenderPass`. For
+/// each task, we invoke `allocate()` on the `RenderTargetList`, which in turn
+/// attempts to allocate an output region in the last `RenderTarget` in the
+/// list. If allocation fails (or if the list is empty), a new `RenderTarget` is
+/// created and appended to the list. The build phase then assign the task into
+/// the target associated with the final allocation.
+///
+/// The result is that each `RenderPass` is associated with one or two
+/// `RenderTargetLists`, depending on whether we have all our tasks have the
+/// same `RenderTargetKind`. The lists are then shipped to the `Renderer`, which
+/// allocates a device texture array, with one slice per render target in the
+/// list.
+///
+/// The upshot of this scheme is that it maximizes batching. In a given pass,
+/// we need to do a separate batch for each individual render target. But with
+/// the texture array, we can expose the entirety of the previous pass to each
+/// task in the current pass in a single batch, which generally allows each
+/// task to be drawn in a single batch regardless of how many results from the
+/// previous pass it depends on.
+///
+/// Note that in some cases (like drop-shadows), we can depend on the output of
+/// a pass earlier than the immediately-preceding pass. See `SavedTargetIndex`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTargetList<T> {
     screen_size: DeviceIntSize,
     pub format: ImageFormat,
     pub max_size: DeviceUintSize,
     pub targets: Vec<T>,
     pub saved_index: Option<SavedTargetIndex>,
-    pub is_shared: bool,
 }
 
 impl<T: RenderTarget> RenderTargetList<T> {
     fn new(
         screen_size: DeviceIntSize,
         format: ImageFormat,
     ) -> Self {
         RenderTargetList {
             screen_size,
             format,
             max_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
             targets: Vec::new(),
             saved_index: None,
-            is_shared: false,
         }
     }
 
     fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
@@ -251,17 +302,17 @@ pub struct FrameOutput {
     pub task_id: RenderTaskId,
     pub pipeline_id: PipelineId,
 }
 
 // Defines where the source data for a blit job can be found.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BlitJobSource {
-    Texture(SourceTexture, i32, DeviceIntRect),
+    Texture(TextureSource, i32, DeviceIntRect),
     RenderTask(RenderTaskId),
 }
 
 // Information required to do a blit from a source to a target.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BlitJob {
     pub source: BlitJobSource,
@@ -280,17 +331,20 @@ pub struct GlyphJob {
     pub embolden_amount: TypedVector2D<f32, DevicePixel>,
 }
 
 #[cfg(not(feature = "pathfinder"))]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct GlyphJob;
 
-/// A render target represents a number of rendering operations on a surface.
+/// Contains the work (in the form of instance arrays) needed to fill a color
+/// color output surface (RGBA8).
+///
+/// See `RenderTarget`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ColorRenderTarget {
     pub alpha_batch_containers: Vec<AlphaBatchContainer>,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub readbacks: Vec<DeviceIntRect>,
@@ -495,16 +549,20 @@ impl RenderTarget for ColorRenderTarget 
 
     fn needs_depth(&self) -> bool {
         self.alpha_batch_containers.iter().any(|ab| {
             !ab.opaque_batches.is_empty()
         })
     }
 }
 
+/// Contains the work (in the form of instance arrays) needed to fill an alpha
+/// output surface (R8).
+///
+/// See `RenderTarget`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub scalings: Vec<ScalingInstance>,
@@ -722,59 +780,71 @@ impl TextureCacheRenderTarget {
             embolden_amount: task_info.embolden_amount,
         })
     }
 
     #[cfg(not(feature = "pathfinder"))]
     fn add_glyph_task(&mut self, _: &mut GlyphTask, _: DeviceIntRect) {}
 }
 
+/// Contains the set of `RenderTarget`s specific to the kind of pass.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderPassKind {
+    /// The final pass to the main frame buffer, where we have a single color
+    /// target for display to the user.
     MainFramebuffer(ColorRenderTarget),
+    /// An intermediate pass, where we may have multiple targets.
     OffScreen {
         alpha: RenderTargetList<AlphaRenderTarget>,
         color: RenderTargetList<ColorRenderTarget>,
-        texture_cache: FastHashMap<(SourceTexture, i32), TextureCacheRenderTarget>,
+        texture_cache: FastHashMap<(CacheTextureId, i32), TextureCacheRenderTarget>,
     },
 }
 
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
 ///
 /// A render pass can have several render targets if there wasn't enough space in one
-/// target to do all of the rendering for that pass.
+/// target to do all of the rendering for that pass. See `RenderTargetList`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderPass {
+    /// The kind of pass, as well as the set of targets associated with that
+    /// kind of pass.
     pub kind: RenderPassKind,
+    /// The set of tasks to be performed in this pass, as indices into the
+    /// `RenderTaskTree`.
     tasks: Vec<RenderTaskId>,
 }
 
 impl RenderPass {
+    /// Creates a pass for the main framebuffer. There is only one of these, and
+    /// it is always the last pass.
     pub fn new_main_framebuffer(screen_size: DeviceIntSize) -> Self {
         let target = ColorRenderTarget::new(None, screen_size);
         RenderPass {
             kind: RenderPassKind::MainFramebuffer(target),
             tasks: vec![],
         }
     }
 
+    /// Creates an intermediate off-screen pass.
     pub fn new_off_screen(screen_size: DeviceIntSize) -> Self {
         RenderPass {
             kind: RenderPassKind::OffScreen {
                 color: RenderTargetList::new(screen_size, ImageFormat::BGRA8),
                 alpha: RenderTargetList::new(screen_size, ImageFormat::R8),
                 texture_cache: FastHashMap::default(),
             },
             tasks: vec![],
         }
     }
 
+    /// Adds a task to this pass.
     pub fn add_render_task(
         &mut self,
         task_id: RenderTaskId,
         size: DeviceIntSize,
         target_kind: RenderTargetKind,
     ) {
         if let RenderPassKind::OffScreen { ref mut color, ref mut alpha, .. } = self.kind {
             let max_size = match target_kind {
@@ -783,16 +853,21 @@ impl RenderPass {
             };
             max_size.width = cmp::max(max_size.width, size.width as u32);
             max_size.height = cmp::max(max_size.height, size.height as u32);
         }
 
         self.tasks.push(task_id);
     }
 
+    /// Processes this pass to prepare it for rendering.
+    ///
+    /// Among other things, this allocates output regions for each of our tasks
+    /// (added via `add_render_task`) in a RenderTarget and assigns it into that
+    /// target.
     pub fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
@@ -819,21 +894,16 @@ impl RenderPass {
                     gpu_cache,
                     render_tasks,
                     deferred_resolves,
                     prim_headers,
                     transforms,
                 );
             }
             RenderPassKind::OffScreen { ref mut color, ref mut alpha, ref mut texture_cache } => {
-                let is_shared_alpha = self.tasks.iter().any(|&task_id| {
-                    let task = &render_tasks[task_id];
-                    task.is_shared() &&
-                        task.target_kind() == RenderTargetKind::Alpha
-                });
                 let saved_color = if self.tasks.iter().any(|&task_id| {
                     let t = &render_tasks[task_id];
                     t.target_kind() == RenderTargetKind::Color && t.saved_index.is_some()
                 }) {
                     Some(render_tasks.save_target())
                 } else {
                     None
                 };
@@ -935,17 +1005,16 @@ impl RenderPass {
                     ctx,
                     gpu_cache,
                     render_tasks,
                     deferred_resolves,
                     saved_alpha,
                     prim_headers,
                     transforms,
                 );
-                alpha.is_shared = is_shared_alpha;
             }
         }
     }
 }
 
 #[derive(Debug, Clone, Default)]
 pub struct CompositeOps {
     // Requires only a single texture as input (e.g. most filters)
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -535,16 +535,17 @@ impl<Src, Dst> FastTransform<Src, Dst> {
             FastTransform::Transform { inverse: Some(ref inverse), is_2d: true, .. }  =>
                 inverse.transform_rect(rect),
             FastTransform::Transform { ref transform, is_2d: false, .. } =>
                 transform.inverse_rect_footprint(rect),
             FastTransform::Transform { inverse: None, .. }  => None,
         }
     }
 
+    #[allow(dead_code)]
     pub fn post_translate(&self, new_offset: TypedVector2D<f32, Dst>) -> Self {
         match *self {
             FastTransform::Offset(offset) => {
                 let offset = offset.to_untyped() + new_offset.to_untyped();
                 FastTransform::Offset(TypedVector2D::from_untyped(&offset))
             }
             FastTransform::Transform { ref transform, .. } => {
                 let transform = transform.post_translate(new_offset.to_3d());
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -6,17 +6,16 @@ extern crate serde_bytes;
 
 use app_units::Au;
 use channel::{self, MsgSender, Payload, PayloadSender, PayloadSenderHelperMethods};
 use std::cell::Cell;
 use std::fmt;
 use std::marker::PhantomData;
 use std::os::raw::c_void;
 use std::path::PathBuf;
-use std::sync::Arc;
 use std::u32;
 use {BuiltDisplayList, BuiltDisplayListDescriptor, ColorF, DeviceIntPoint, DeviceUintRect};
 use {DeviceUintSize, ExternalScrollId, FontInstanceKey, FontInstanceOptions};
 use {FontInstancePlatformOptions, FontKey, FontVariation, GlyphDimensions, GlyphIndex, ImageData};
 use {ImageDescriptor, ImageKey, ItemTag, LayoutPoint, LayoutSize, LayoutTransform, LayoutVector2D};
 use {NativeFontHandle, WorldPoint};
 
 pub type TileSize = u16;
@@ -1264,39 +1263,52 @@ pub enum Checkpoint {
     /// notified. This provides the guarantee that if a request is created it will get notified.
     TransactionDropped,
 }
 
 pub trait NotificationHandler : Send + Sync {
     fn notify(&self, when: Checkpoint);
 }
 
-#[derive(Clone)]
 pub struct NotificationRequest {
-    handler: Arc<NotificationHandler>,
+    handler: Option<Box<NotificationHandler>>,
     when: Checkpoint,
-    done: bool,
 }
 
 impl NotificationRequest {
-    pub fn new(when: Checkpoint, handler: Arc<NotificationHandler>) -> Self {
+    pub fn new(when: Checkpoint, handler: Box<NotificationHandler>) -> Self {
         NotificationRequest {
-            handler,
+            handler: Some(handler),
             when,
-            done: false,
         }
     }
 
     pub fn when(&self) -> Checkpoint { self.when }
 
     pub fn notify(mut self) {
-        self.handler.notify(self.when);
-        self.done = true;
+        if let Some(handler) = self.handler.take() {
+            handler.notify(self.when);
+        }
     }
 }
 
 impl Drop for NotificationRequest {
     fn drop(&mut self) {
-        if !self.done {
-            self.handler.notify(Checkpoint::TransactionDropped);
+        if let Some(ref mut handler) = self.handler {
+            handler.notify(Checkpoint::TransactionDropped);
         }
     }
 }
+
+// This Clone impl yields an "empty" request because we don't want the requests
+// to be notified twice so the request is owned by only one of the API messages
+// (the original one) after the clone.
+// This works in practice because the notifications requests are used for
+// synchronization so we don't need to include them in the recording mechanism
+// in wrench that clones the messages.
+impl Clone for NotificationRequest {
+    fn clone(&self) -> Self {
+        NotificationRequest {
+            when: self.when,
+            handler: None,
+        }
+    }
+}
--- a/gfx/webrender_api/src/image.rs
+++ b/gfx/webrender_api/src/image.rs
@@ -122,34 +122,38 @@ impl ImageFormat {
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ColorDepth {
     /// 8 bits image (most common)
     Color8,
     /// 10 bits image
     Color10,
     /// 12 bits image
     Color12,
+    /// 16 bits image
+    Color16,
 }
 
 impl ColorDepth {
     /// Return the numerical bit depth value for the type.
     pub fn bit_depth(self) -> u32 {
         match self {
             ColorDepth::Color8 => 8,
             ColorDepth::Color10 => 10,
             ColorDepth::Color12 => 12,
+            ColorDepth::Color16 => 16,
         }
     }
     /// 10 and 12 bits images are encoded using 16 bits integer, we need to
     /// rescale the 10 or 12 bits value to extend to 16 bits.
     pub fn rescaling_factor(self) -> f32 {
         match self {
             ColorDepth::Color8 => 1.0,
             ColorDepth::Color10 => 64.0,
             ColorDepth::Color12 => 16.0,
+            ColorDepth::Color16 => 1.0,
         }
     }
 }
 
 /// Metadata (but not storage) describing an image In WebRender.
 #[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ImageDescriptor {
     /// Format of the image data.
--- a/gfx/webrender_api/src/lib.rs
+++ b/gfx/webrender_api/src/lib.rs
@@ -1,12 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+//! The `webrender_api` crate contains an assortment types and functions used
+//! by WebRender consumers as well as, in many cases, WebRender itself.
+//!
+//! This separation allows Servo to parallelize compilation across `webrender`
+//! and other crates that depend on `webrender_api`. So in practice, we put
+//! things in this crate when Servo needs to use them. Firefox depends on the
+//! `webrender` crate directly, and so this distinction is not really relevant
+//! there.
+
 #![cfg_attr(feature = "nightly", feature(nonzero))]
 #![cfg_attr(feature = "cargo-clippy", allow(float_cmp, too_many_arguments, unreadable_literal))]
 
 extern crate app_units;
 extern crate bincode;
 #[macro_use]
 extern crate bitflags;
 extern crate byteorder;
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-d7a6d081384ce0da9dd359b0cf4b9f758aab1b67
+9536249e3ed920a920346f6cc0a79473cad16099
--- a/gfx/webrender_bindings/src/bindings.rs
+++ b/gfx/webrender_bindings/src/bindings.rs
@@ -1132,17 +1132,17 @@ pub extern "C" fn wr_transaction_notify(
     impl NotificationHandler for GeckoNotification {
         fn notify(&self, when: Checkpoint) {
             unsafe {
                 wr_transaction_notification_notified(self.0, when);
             }
         }
     }
 
-    let handler = Arc::new(GeckoNotification(event));
+    let handler = Box::new(GeckoNotification(event));
     txn.notify(NotificationRequest::new(when, handler));
 }
 
 #[no_mangle]
 pub extern "C" fn wr_transaction_update_epoch(
     txn: &mut Transaction,
     pipeline_id: WrPipelineId,
     epoch: WrEpoch,
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -72,16 +72,18 @@ enum class ClipMode {
 // Specifies the color depth of an image. Currently only used for YUV images.
 enum class ColorDepth : uint8_t {
   // 8 bits image (most common)
   Color8,
   // 10 bits image
   Color10,
   // 12 bits image
   Color12,
+  // 16 bits image
+  Color16,
 
   Sentinel /* this must be last for serialization purposes. */
 };
 
 enum class ExtendMode : uint32_t {
   Clamp,
   Repeat,
 
@@ -274,16 +276,19 @@ struct DevicePixel;
 
 struct DocumentHandle;
 
 // Geometry in a stacking context's local coordinate space (logical pixels).
 struct LayoutPixel;
 
 // The renderer is responsible for submitting to the GPU the work prepared by the
 // RenderBackend.
+//
+// We have a separate `Renderer` instance for each instance of WebRender (generally
+// one per OS window), and all instances share the same thread.
 struct Renderer;
 
 // Offset in number of tiles.
 struct Tiles;
 
 // Represents the work associated to a transaction before scene building.
 struct Transaction;
 
deleted file mode 100644
--- a/js/src/jit/Disassembler.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * vim: set ts=8 sts=4 et sw=4 tw=99:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "jit/Disassembler.h"
-
-using namespace js;
-using namespace js::jit;
-using namespace js::jit::Disassembler;
-
-#ifdef DEBUG
-bool
-Disassembler::ComplexAddress::operator==(const ComplexAddress& other) const
-{
-    return base_ == other.base_ &&
-           index_ == other.index_ &&
-           scale_ == other.scale_ &&
-           disp_ == other.disp_ &&
-           isPCRelative_ == other.isPCRelative_;
-}
-
-bool
-Disassembler::ComplexAddress::operator!=(const ComplexAddress& other) const
-{
-    return !operator==(other);
-}
-
-bool
-Disassembler::OtherOperand::operator==(const OtherOperand& other) const
-{
-    if (kind_ != other.kind_) {
-        return false;
-    }
-    switch (kind_) {
-      case Imm: return u_.imm == other.u_.imm;
-      case GPR: return u_.gpr == other.u_.gpr;
-      case FPR: return u_.fpr == other.u_.fpr;
-    }
-    MOZ_CRASH("Unexpected OtherOperand kind");
-}
-
-bool
-Disassembler::OtherOperand::operator!=(const OtherOperand& other) const
-{
-    return !operator==(other);
-}
-
-bool
-Disassembler::HeapAccess::operator==(const HeapAccess& other) const
-{
-    return kind_ == other.kind_ &&
-           size_ == other.size_ &&
-           address_ == other.address_ &&
-           otherOperand_ == other.otherOperand_;
-}
-
-bool
-Disassembler::HeapAccess::operator!=(const HeapAccess& other) const
-{
-    return !operator==(other);
-}
-
-#endif
deleted file mode 100644
--- a/js/src/jit/Disassembler.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * vim: set ts=8 sts=4 et sw=4 tw=99:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef jit_Disassembler_h
-#define jit_Disassembler_h
-
-#include "jit/MacroAssembler.h"
-#include "jit/Registers.h"
-
-namespace js {
-namespace jit {
-
-namespace Disassembler {
-
-class ComplexAddress {
-    int32_t disp_;
-    Register::Encoding base_ : 8;
-    Register::Encoding index_ : 8;
-    int8_t scale_; // log2 encoding
-    bool isPCRelative_;
-
-  public:
-    ComplexAddress()
-      : disp_(0),
-        base_(Registers::Invalid),
-        index_(Registers::Invalid),
-        scale_(0),
-        isPCRelative_(false)
-    {
-        MOZ_ASSERT(*this == *this);
-    }
-
-    ComplexAddress(int32_t disp, Register::Encoding base)
-      : disp_(disp),
-        base_(base),
-        index_(Registers::Invalid),
-        scale_(0),
-        isPCRelative_(false)
-    {
-        MOZ_ASSERT(*this == *this);
-        MOZ_ASSERT(base != Registers::Invalid);
-        MOZ_ASSERT(base_ == base);
-    }
-
-    ComplexAddress(int32_t disp, Register::Encoding base, Register::Encoding index, int scale)
-      : disp_(disp),
-        base_(base),
-        index_(index),
-        scale_(scale),
-        isPCRelative_(false)
-    {
-        MOZ_ASSERT(scale >= 0 && scale < 4);
-        MOZ_ASSERT_IF(index == Registers::Invalid, scale == 0);
-        MOZ_ASSERT(*this == *this);
-        MOZ_ASSERT(base_ == base);
-        MOZ_ASSERT(index_ == index);
-    }
-
-    explicit ComplexAddress(const void* addr)
-      : disp_(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr))),
-        base_(Registers::Invalid),
-        index_(Registers::Invalid),
-        scale_(0),
-        isPCRelative_(false)
-    {
-        MOZ_ASSERT(*this == *this);
-        MOZ_ASSERT(reinterpret_cast<const void*>(uintptr_t(disp_)) == addr);
-    }
-
-    explicit ComplexAddress(const Operand& op) {
-#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
-        switch (op.kind()) {
-          case Operand::MEM_REG_DISP:
-            *this = ComplexAddress(op.disp(), op.base());
-            return;
-          case Operand::MEM_SCALE:
-            *this = ComplexAddress(op.disp(), op.base(), op.index(), op.scale());
-            return;
-          case Operand::MEM_ADDRESS32:
-            *this = ComplexAddress(op.address());
-            return;
-          default:
-            break;
-        }
-#endif
-        MOZ_CRASH("Unexpected Operand kind");
-    }
-
-    bool isPCRelative() const {
-        return isPCRelative_;
-    }
-
-    int32_t disp() const {
-        return disp_;
-    }
-
-    bool hasBase() const {
-        return base_ != Registers::Invalid;
-    }
-
-    Register::Encoding base() const {
-        MOZ_ASSERT(hasBase());
-        return base_;
-    }
-
-    bool hasIndex() const {
-        return index_ != Registers::Invalid;
-    }
-
-    Register::Encoding index() const {
-        MOZ_ASSERT(hasIndex());
-        return index_;
-    }
-
-    uint32_t scale() const {
-        return scale_;
-    }
-
-#ifdef DEBUG
-    bool operator==(const ComplexAddress& other) const;
-    bool operator!=(const ComplexAddress& other) const;
-#endif
-};
-
-// An operand other than a memory operand -- a register or an immediate.
-class OtherOperand {
-  public:
-    enum Kind {
-        Imm,
-        GPR,
-        FPR,
-    };
-
-  private:
-    Kind kind_;
-    union {
-        int32_t imm;
-        Register::Encoding gpr;
-        FloatRegister::Encoding fpr;
-    } u_;
-
-  public:
-    OtherOperand()
-      : kind_(Imm)
-    {
-        u_.imm = 0;
-        MOZ_ASSERT(*this == *this);
-    }
-
-    explicit OtherOperand(int32_t imm)
-      : kind_(Imm)
-    {
-        u_.imm = imm;
-        MOZ_ASSERT(*this == *this);
-    }
-
-    explicit OtherOperand(Register::Encoding gpr)
-      : kind_(GPR)
-    {
-        u_.gpr = gpr;
-        MOZ_ASSERT(*this == *this);
-    }
-
-    explicit OtherOperand(FloatRegister::Encoding fpr)
-      : kind_(FPR)
-    {
-        u_.fpr = fpr;
-        MOZ_ASSERT(*this == *this);
-    }
-
-    Kind kind() const {
-        return kind_;
-    }
-
-    int32_t imm() const {
-        MOZ_ASSERT(kind_ == Imm);
-        return u_.imm;
-    }
-
-    Register::Encoding gpr() const {
-        MOZ_ASSERT(kind_ == GPR);
-        return u_.gpr;
-    }
-
-    FloatRegister::Encoding fpr() const {
-        MOZ_ASSERT(kind_ == FPR);
-        return u_.fpr;
-    }
-
-#ifdef DEBUG
-    bool operator==(const OtherOperand& other) const;
-    bool operator!=(const OtherOperand& other) const;
-#endif
-};
-
-class HeapAccess {
-  public:
-    enum Kind {
-        Unknown,
-        Load,       // any bits not covered by the load are zeroed
-        LoadSext32, // like Load, but sign-extend to 32 bits
-        LoadSext64, // like Load, but sign-extend to 64 bits
-        Store
-    };
-
-  private:
-    Kind kind_;
-    size_t size_; // The number of bytes of memory accessed
-    ComplexAddress address_;
-    OtherOperand otherOperand_;
-
-  public:
-    HeapAccess()
-      : kind_(Unknown),
-        size_(0)
-    {
-        MOZ_ASSERT(*this == *this);
-    }
-
-    HeapAccess(Kind kind, size_t size, const ComplexAddress& address, const OtherOperand& otherOperand)
-      : kind_(kind),
-        size_(size),
-        address_(address),
-        otherOperand_(otherOperand)
-    {
-        MOZ_ASSERT(kind != Unknown);
-        MOZ_ASSERT_IF(kind == LoadSext32, otherOperand.kind() != OtherOperand::FPR);
-        MOZ_ASSERT_IF(kind == Load || kind == LoadSext32, otherOperand.kind() != OtherOperand::Imm);
-        MOZ_ASSERT(*this == *this);
-    }
-
-    Kind kind() const {
-        return kind_;
-    }
-
-    size_t size() const {
-        MOZ_ASSERT(kind_ != Unknown);
-        return size_;
-    }
-
-    const ComplexAddress& address() const {
-        return address_;
-    }
-
-    const OtherOperand& otherOperand() const {
-        return otherOperand_;
-    }
-
-#ifdef DEBUG
-    bool operator==(const HeapAccess& other) const;
-    bool operator!=(const HeapAccess& other) const;
-#endif
-};
-
-MOZ_COLD uint8_t* DisassembleHeapAccess(uint8_t* ptr, HeapAccess* access);
-
-#ifdef DEBUG
-void DumpHeapAccess(const HeapAccess& access);
-
-inline void
-VerifyHeapAccess(uint8_t* begin, uint8_t* end, const HeapAccess& expected)
-{
-    HeapAccess disassembled;
-    uint8_t* e = DisassembleHeapAccess(begin, &disassembled);
-    MOZ_ASSERT(e == end);
-    MOZ_ASSERT(disassembled == expected);
-}
-#endif
-
-} // namespace Disassembler
-
-} // namespace jit
-} // namespace js
-
-#endif /* jit_Disassembler_h */
--- a/js/src/jit/EffectiveAddressAnalysis.cpp
+++ b/js/src/jit/EffectiveAddressAnalysis.cpp
@@ -190,83 +190,31 @@ AnalyzeLoadUnboxedScalar(MLoadUnboxedSca
     if (!add->hasLiveDefUses() && DeadIfUnused(add) && add->canRecoverOnBailout()) {
         JitSpew(JitSpew_EAA, "mark as recovered on bailout: %s%u",
                 add->opName(), add->id());
         add->setRecoveredOnBailoutUnchecked();
     }
 }
 
 template<typename AsmJSMemoryAccess>
-bool
-EffectiveAddressAnalysis::tryAddDisplacement(AsmJSMemoryAccess* ins, int32_t o)
-{
-#ifdef WASM_HUGE_MEMORY
-    // Compute the new offset. Check for overflow.
-    uint32_t oldOffset = ins->offset();
-    uint32_t newOffset = oldOffset + o;
-    if (o < 0 ? (newOffset >= oldOffset) : (newOffset < oldOffset)) {
-        return false;
-    }
-
-    // The offset must ultimately be written into the offset immediate of a load
-    // or store instruction so don't allow folding of the offset is bigger.
-    if (newOffset >= wasm::OffsetGuardLimit) {
-        return false;
-    }
-
-    // Everything checks out. This is the new offset.
-    ins->setOffset(newOffset);
-    return true;
-#else
-    return false;
-#endif
-}
-
-template<typename AsmJSMemoryAccess>
 void
 EffectiveAddressAnalysis::analyzeAsmJSHeapAccess(AsmJSMemoryAccess* ins)
 {
     MDefinition* base = ins->base();
 
     if (base->isConstant()) {
-        // Look for heap[i] where i is a constant offset, and fold the offset.
-        // By doing the folding now, we simplify the task of codegen; the offset
-        // is always the address mode immediate. This also allows it to avoid
-        // a situation where the sum of a constant pointer value and a non-zero
-        // offset doesn't actually fit into the address mode immediate.
-        int32_t imm = base->toConstant()->toInt32();
-        if (imm != 0 && tryAddDisplacement(ins, imm)) {
-            MInstruction* zero = MConstant::New(graph_.alloc(), Int32Value(0));
-            ins->block()->insertBefore(ins, zero);
-            ins->replaceBase(zero);
-        }
-
         // If the index is within the minimum heap length, we can optimize
         // away the bounds check.
+        int32_t imm = base->toConstant()->toInt32();
         if (imm >= 0) {
             int32_t end = (uint32_t)imm + ins->byteSize();
             if (end >= imm && (uint32_t)end <= mir_->minWasmHeapLength()) {
                  ins->removeBoundsCheck();
             }
         }
-    } else if (base->isAdd()) {
-        // Look for heap[a+i] where i is a constant offset, and fold the offset.
-        // Alignment masks have already been moved out of the way by the
-        // Alignment Mask Analysis pass.
-        MDefinition* op0 = base->toAdd()->getOperand(0);
-        MDefinition* op1 = base->toAdd()->getOperand(1);
-        if (op0->isConstant()) {
-            mozilla::Swap(op0, op1);
-        }
-        if (op1->isConstant()) {
-            int32_t imm = op1->toConstant()->toInt32();
-            if (tryAddDisplacement(ins, imm)) {
-                ins->replaceBase(op0);
-            }
-        }
     }
 }
 
 // This analysis converts patterns of the form:
 //   truncate(x + (y << {0,1,2,3}))
 //   truncate(x + (y << {0,1,2,3}) + imm32)
 // into a single lea instruction, and patterns of the form:
 //   asmload(x + imm32)
--- a/js/src/jit/EffectiveAddressAnalysis.h
+++ b/js/src/jit/EffectiveAddressAnalysis.h
@@ -15,19 +15,16 @@ namespace jit {
 class MIRGraph;
 
 class EffectiveAddressAnalysis
 {
     MIRGenerator* mir_;
     MIRGraph& graph_;
 
     template <typename AsmJSMemoryAccess>
-    MOZ_MUST_USE bool tryAddDisplacement(AsmJSMemoryAccess* ins, int32_t o);
-
-    template <typename AsmJSMemoryAccess>
     void analyzeAsmJSHeapAccess(AsmJSMemoryAccess* ins);
 
   public:
     EffectiveAddressAnalysis(MIRGenerator* mir, MIRGraph& graph)
       : mir_(mir), graph_(graph)
     {}
 
     MOZ_MUST_USE bool analyze();
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -4631,21 +4631,16 @@ LIRGenerator::visitWasmAddOffset(MWasmAd
     MOZ_ASSERT(ins->type() == MIRType::Int32);
     MOZ_ASSERT(ins->offset());
     define(new(alloc()) LWasmAddOffset(useRegisterAtStart(ins->base())), ins);
 }
 
 void
 LIRGenerator::visitWasmLoadTls(MWasmLoadTls* ins)
 {
-#ifdef WASM_HUGE_MEMORY
-    // This will disappear once we remove HeapReg and replace it with a load
-    // from Tls, but in the mean time it keeps us sane.
-    MOZ_CRASH("No WasmLoadTls here at the moment");
-#endif
     auto* lir = new(alloc()) LWasmLoadTls(useRegisterAtStart(ins->tlsPtr()));
     define(lir, ins);
 }
 
 void
 LIRGenerator::visitWasmBoundsCheck(MWasmBoundsCheck* ins)
 {
 #ifdef WASM_HUGE_MEMORY
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -13513,124 +13513,112 @@ class MAsmJSMemoryAccess
 };
 
 class MAsmJSLoadHeap
   : public MVariadicInstruction, // 1 plus optional memoryBase and boundsCheckLimit
     public MAsmJSMemoryAccess,
     public NoTypePolicy::Data
 {
     uint32_t memoryBaseIndex_;
-    uint32_t boundsCheckIndex_;
-
-    explicit MAsmJSLoadHeap(uint32_t memoryBaseIndex, uint32_t boundsCheckIndex,
-                            Scalar::Type accessType)
+
+    explicit MAsmJSLoadHeap(uint32_t memoryBaseIndex, Scalar::Type accessType)
       : MVariadicInstruction(classOpcode),
         MAsmJSMemoryAccess(accessType),
-        memoryBaseIndex_(memoryBaseIndex),
-        boundsCheckIndex_(boundsCheckIndex)
+        memoryBaseIndex_(memoryBaseIndex)
     {
         setResultType(ScalarTypeToMIRType(accessType));
     }
 
   public:
     INSTRUCTION_HEADER(AsmJSLoadHeap)
 
     static MAsmJSLoadHeap* New(TempAllocator& alloc,
                                MDefinition* memoryBase,
                                MDefinition* base,
                                MDefinition* boundsCheckLimit,
                                Scalar::Type accessType)
     {
-        uint32_t nextIndex = 1;
+        uint32_t nextIndex = 2;
         uint32_t memoryBaseIndex = memoryBase ? nextIndex++ : UINT32_MAX;
-        uint32_t boundsCheckIndex = boundsCheckLimit ? nextIndex++ : UINT32_MAX;
-
-        MAsmJSLoadHeap* load = new(alloc) MAsmJSLoadHeap(memoryBaseIndex, boundsCheckIndex,
-                                                         accessType);
+
+        MAsmJSLoadHeap* load = new(alloc) MAsmJSLoadHeap(memoryBaseIndex, accessType);
         if (!load->init(alloc, nextIndex)) {
             return nullptr;
         }
 
         load->initOperand(0, base);
+        load->initOperand(1, boundsCheckLimit);
         if (memoryBase) {
             load->initOperand(memoryBaseIndex, memoryBase);
         }
-        if (boundsCheckLimit) {
-            load->initOperand(boundsCheckIndex, boundsCheckLimit);
-        }
 
         return load;
     }
 
     MDefinition* base() const { return getOperand(0); }
     void replaceBase(MDefinition* newBase) { replaceOperand(0, newBase); }
-    MDefinition* memoryBase() const { return getOperand(memoryBaseIndex_); }
-    MDefinition* boundsCheckLimit() const { return getOperand(boundsCheckIndex_); }
+    bool hasMemoryBase() const { return memoryBaseIndex_ != UINT32_MAX; }
+    MDefinition* memoryBase() const { MOZ_ASSERT(hasMemoryBase()); return getOperand(memoryBaseIndex_); }
+    MDefinition* boundsCheckLimit() const { return getOperand(1); }
 
     bool congruentTo(const MDefinition* ins) const override;
     AliasSet getAliasSet() const override {
         return AliasSet::Load(AliasSet::WasmHeap);
     }
     AliasType mightAlias(const MDefinition* def) const override;
 };
 
 class MAsmJSStoreHeap
   : public MVariadicInstruction, // 2 plus optional memoryBase and boundsCheckLimit
     public MAsmJSMemoryAccess,
     public NoTypePolicy::Data
 {
     uint32_t memoryBaseIndex_;
-    uint32_t boundsCheckIndex_;
-
-    explicit MAsmJSStoreHeap(uint32_t memoryBaseIndex, uint32_t boundsCheckIndex,
-                             Scalar::Type accessType)
+
+    explicit MAsmJSStoreHeap(uint32_t memoryBaseIndex, Scalar::Type accessType)
       : MVariadicInstruction(classOpcode),
         MAsmJSMemoryAccess(accessType),
-        memoryBaseIndex_(memoryBaseIndex),
-        boundsCheckIndex_(boundsCheckIndex)
+        memoryBaseIndex_(memoryBaseIndex)
     {
     }
 
   public:
     INSTRUCTION_HEADER(AsmJSStoreHeap)
 
     static MAsmJSStoreHeap* New(TempAllocator& alloc,
                                 MDefinition* memoryBase,
                                 MDefinition* base,
                                 MDefinition* boundsCheckLimit,
                                 Scalar::Type accessType,
                                 MDefinition* v)
     {
-        uint32_t nextIndex = 2;
+        uint32_t nextIndex = 3;
         uint32_t memoryBaseIndex = memoryBase ? nextIndex++ : UINT32_MAX;
-        uint32_t boundsCheckIndex = boundsCheckLimit ? nextIndex++ : UINT32_MAX;
-
-        MAsmJSStoreHeap* store = new(alloc) MAsmJSStoreHeap(memoryBaseIndex, boundsCheckIndex,
-                                                            accessType);
+
+        MAsmJSStoreHeap* store = new(alloc) MAsmJSStoreHeap(memoryBaseIndex, accessType);
         if (!store->init(alloc, nextIndex)) {
             return nullptr;
         }
 
         store->initOperand(0, base);
         store->initOperand(1, v);
+        store->initOperand(2, boundsCheckLimit);
         if (memoryBase) {
             store->initOperand(memoryBaseIndex, memoryBase);
         }
-        if (boundsCheckLimit) {
-            store->initOperand(boundsCheckIndex, boundsCheckLimit);
-        }
 
         return store;
     }
 
     MDefinition* base() const { return getOperand(0); }
     void replaceBase(MDefinition* newBase) { replaceOperand(0, newBase); }
     MDefinition* value() const { return getOperand(1); }
-    MDefinition* memoryBase() const { return getOperand(memoryBaseIndex_); }
-    MDefinition* boundsCheckLimit() const { return getOperand(boundsCheckIndex_); }
+    bool hasMemoryBase() const { return memoryBaseIndex_ != UINT32_MAX; }
+    MDefinition* memoryBase() const { MOZ_ASSERT(hasMemoryBase()); return getOperand(memoryBaseIndex_); }
+    MDefinition* boundsCheckLimit() const { return getOperand(2); }
 
     AliasSet getAliasSet() const override {
         return AliasSet::Store(AliasSet::WasmHeap);
     }
 };
 
 class MWasmCompareExchangeHeap
   : public MVariadicInstruction,
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -1492,24 +1492,23 @@ class MacroAssembler : public MacroAssem
 
     CodeOffset wasmTrapInstruction() PER_SHARED_ARCH;
 
     void wasmTrap(wasm::Trap trap, wasm::BytecodeOffset bytecodeOffset);
     void wasmInterruptCheck(Register tls, wasm::BytecodeOffset bytecodeOffset);
     void wasmReserveStackChecked(uint32_t amount, wasm::BytecodeOffset trapOffset);
 
     // Emit a bounds check against the wasm heap limit, jumping to 'label' if
-    // 'cond' holds. Required when WASM_HUGE_MEMORY is not defined. If
-    // JitOptions.spectreMaskIndex is true, in speculative executions 'index' is
-    // saturated in-place to 'boundsCheckLimit'.
+    // 'cond' holds. If JitOptions.spectreMaskIndex is true, in speculative
+    // executions 'index' is saturated in-place to 'boundsCheckLimit'.
     void wasmBoundsCheck(Condition cond, Register index, Register boundsCheckLimit, Label* label)
-        DEFINED_ON(arm, arm64, mips32, mips64, x86);
+        DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
 
     void wasmBoundsCheck(Condition cond, Register index, Address boundsCheckLimit, Label* label)
-        DEFINED_ON(arm, arm64, mips32, mips64, x86);
+        DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
 
     // Each wasm load/store instruction appends its own wasm::Trap::OutOfBounds.
     void wasmLoad(const wasm::MemoryAccessDesc& access, Operand srcAddr, AnyRegister out) DEFINED_ON(x86, x64);
     void wasmLoadI64(const wasm::MemoryAccessDesc& access, Operand srcAddr, Register64 out) DEFINED_ON(x86, x64);
     void wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, Operand dstAddr) DEFINED_ON(x86, x64);
     void wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, Operand dstAddr) DEFINED_ON(x86);
 
     // For all the ARM and ARM64 wasmLoad and wasmStore functions, `ptr` MUST
--- a/js/src/jit/arm/Simulator-arm.cpp
+++ b/js/src/jit/arm/Simulator-arm.cpp
@@ -410,19 +410,19 @@ class AutoLockSimulatorCache : public Lo
 
 mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
     SimulatorProcess::ICacheCheckingDisableCount(1); // Checking is disabled by default.
 SimulatorProcess* SimulatorProcess::singleton_ = nullptr;
 
 int64_t Simulator::StopSimAt = -1L;
 
 Simulator*
-Simulator::Create(JSContext* cx)
+Simulator::Create()
 {
-    auto sim = MakeUnique<Simulator>(cx);
+    auto sim = MakeUnique<Simulator>();
     if (!sim) {
         return nullptr;
     }
 
     if (!sim->init()) {
         return nullptr;
     }
 
@@ -1147,18 +1147,17 @@ SimulatorProcess::FlushICache(void* star
 {
     JitSpewCont(JitSpew_CacheFlush, "[%p %zx]", start_addr, size);
     if (!ICacheCheckingDisableCount) {
         AutoLockSimulatorCache als;
         js::jit::FlushICacheLocked(icache(), start_addr, size);
     }
 }
 
-Simulator::Simulator(JSContext* cx)
-  : cx_(cx)
+Simulator::Simulator()
 {
     // Set up simulator support first. Some of this information is needed to
     // setup the architecture state.
 
     // Note, allocation and anything that depends on allocated memory is
     // deferred until init(), in order to handle OOM properly.
 
     stack_ = nullptr;
@@ -1594,100 +1593,16 @@ Simulator::registerState()
     wasm::RegisterState state;
     state.pc = (void*) get_pc();
     state.fp = (void*) get_register(fp);
     state.sp = (void*) get_register(sp);
     state.lr = (void*) get_register(lr);
     return state;
 }
 
-static inline JitActivation*
-GetJitActivation(JSContext* cx)
-{
-    if (!wasm::CodeExists) {
-        return nullptr;
-    }
-    if (!cx->activation() || !cx->activation()->isJit()) {
-        return nullptr;
-    }
-    return cx->activation()->asJit();
-}
-
-// WebAssembly memories contain an extra region of guard pages (see
-// WasmArrayRawBuffer comment). The guard pages catch out-of-bounds accesses
-// using a signal handler that redirects PC to a stub that safely reports an
-// error. However, if the handler is hit by the simulator, the PC is in C++ code
-// and cannot be redirected. Therefore, we must avoid hitting the handler by
-// redirecting in the simulator before the real handler would have been hit.
-bool
-Simulator::handleWasmSegFault(int32_t addr, unsigned numBytes)
-{
-    JitActivation* act = GetJitActivation(cx_);
-    if (!act) {
-        return false;
-    }
-
-    void* pc = reinterpret_cast<void*>(get_pc());
-    uint8_t* fp = reinterpret_cast<uint8_t*>(get_register(r11));
-
-    const wasm::CodeSegment* segment = wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule()) {
-        return false;
-    }
-    const wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    wasm::Instance* instance = wasm::LookupFaultingInstance(*moduleSegment, pc, fp);
-    if (!instance) {
-        return false;
-    }
-
-    MOZ_RELEASE_ASSERT(&instance->code() == &moduleSegment->code());
-
-    if (!instance->memoryAccessInGuardRegion((uint8_t*)addr, numBytes)) {
-        return false;
-    }
-
-    wasm::Trap trap;
-    wasm::BytecodeOffset bytecode;
-    MOZ_ALWAYS_TRUE(moduleSegment->code().lookupTrap(pc, &trap, &bytecode));
-
-    MOZ_RELEASE_ASSERT(trap == wasm::Trap::OutOfBounds);
-
-    act->startWasmTrap(wasm::Trap::OutOfBounds, bytecode.offset(), registerState());
-    set_pc(int32_t(moduleSegment->trapCode()));
-    return true;
-}
-
-bool
-Simulator::handleWasmIllFault()
-{
-    JitActivation* act = GetJitActivation(cx_);
-    if (!act) {
-        return false;
-    }
-
-    void* pc = reinterpret_cast<void*>(get_pc());
-
-    const wasm::CodeSegment* segment = wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule()) {
-        return false;
-    }
-    const wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    wasm::Trap trap;
-    wasm::BytecodeOffset bytecode;
-    if (!moduleSegment->code().lookupTrap(pc, &trap, &bytecode)) {
-        return false;
-    }
-
-    act->startWasmTrap(trap, bytecode.offset(), registerState());
-    set_pc(int32_t(moduleSegment->trapCode()));
-    return true;
-}
-
 uint64_t
 Simulator::readQ(int32_t addr, SimInstruction* instr, UnalignedPolicy f)
 {
     if (handleWasmSegFault(addr, 8)) {
         return UINT64_MAX;
     }
 
     if ((addr & 3) == 0 || (f == AllowUnaligned && !HasAlignmentFault())) {
@@ -3672,17 +3587,19 @@ rotateBytes(uint32_t val, int32_t rotate
         return (val >> 24) | (val << 8);
     }
 }
 
 void
 Simulator::decodeType3(SimInstruction* instr)
 {
     if (MOZ_UNLIKELY(instr->isUDF())) {
-        if (handleWasmIllFault()) {
+        uint8_t* newPC;
+        if (wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+            set_pc((int32_t)newPC);
             return;
         }
         MOZ_CRASH("illegal instruction encountered");
     }
 
     int rd = instr->rdValue();
     int rn = instr->rnValue();
     int32_t rn_val = get_register(rn);
--- a/js/src/jit/arm/Simulator-arm.h
+++ b/js/src/jit/arm/Simulator-arm.h
@@ -34,17 +34,17 @@
 #include "mozilla/Atomics.h"
 
 #include "jit/arm/Architecture-arm.h"
 #include "jit/arm/disasm/Disasm-arm.h"
 #include "jit/IonTypes.h"
 #include "js/ProfilingFrameIterator.h"
 #include "threading/Thread.h"
 #include "vm/MutexIDs.h"
-#include "wasm/WasmCode.h"
+#include "wasm/WasmSignalHandlers.h"
 
 namespace js {
 namespace jit {
 
 class JitActivation;
 class Simulator;
 class Redirection;
 class CachePage;
@@ -100,22 +100,22 @@ class Simulator
         d24, d25, d26, d27, d28, d29, d30, d31,
         num_d_registers = 32,
         q0 = 0, q1, q2, q3, q4, q5, q6, q7,
         q8, q9, q10, q11, q12, q13, q14, q15,
         num_q_registers = 16
     };
 
     // Returns nullptr on OOM.
-    static Simulator* Create(JSContext* cx);
+    static Simulator* Create();
 
     static void Destroy(Simulator* simulator);
 
     // Constructor/destructor are for internal use only; use the static methods above.
-    explicit Simulator(JSContext* cx);
+    Simulator();
     ~Simulator();
 
     static bool supportsAtomics() { return HasLDSTREXBHD(); }
 
     // The currently executing Simulator instance. Potentially there can be one
     // for each native thread.
     static Simulator* Current();
 
@@ -284,18 +284,30 @@ class Simulator
     inline void disableStop(uint32_t bkpt_code);
     inline void increaseStopCounter(uint32_t bkpt_code);
     void printStopInfo(uint32_t code);
 
     // Handle a wasm interrupt triggered by an async signal handler.
     JS::ProfilingFrameIterator::RegisterState registerState();
 
     // Handle any wasm faults, returning true if the fault was handled.
-    bool handleWasmSegFault(int32_t addr, unsigned numBytes);
-    bool handleWasmIllFault();
+    // This method is rather hot so inline the normal (no-wasm) case.
+    bool MOZ_ALWAYS_INLINE handleWasmSegFault(int32_t addr, unsigned numBytes) {
+        if (MOZ_LIKELY(!wasm::CodeExists)) {
+            return false;
+        }
+
+        uint8_t* newPC;
+        if (!wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes, &newPC)) {
+            return false;
+        }
+
+        set_pc(int32_t(newPC));
+        return true;
+    }
 
     // Read and write memory.
     inline uint8_t readBU(int32_t addr);
     inline int8_t readB(int32_t addr);
     inline void writeB(int32_t addr, uint8_t value);
     inline void writeB(int32_t addr, int8_t value);
 
     inline uint8_t readExBU(int32_t addr);
@@ -377,18 +389,16 @@ class Simulator
     template<class ReturnType, int register_size>
     void getFromVFPRegister(int reg_index, ReturnType* out);
 
     template<class InputType, int register_size>
     void setVFPRegister(int reg_index, const InputType& value);
 
     void callInternal(uint8_t* entry);
 
-    JSContext* const cx_;
-
     // Architecture state.
     // Saturating instructions require a Q flag to indicate saturation.
     // There is currently no way to read the CPSR directly, and thus read the Q
     // flag, so this is left unimplemented.
     int32_t registers_[16];
     bool n_flag_;
     bool z_flag_;
     bool c_flag_;
deleted file mode 100644
--- a/js/src/jit/arm64/Disassembler-arm64.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * vim: set ts=8 sts=4 et sw=4 tw=99:
- *
- * Copyright 2018 Mozilla Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "jit/Disassembler.h"
-
-MOZ_COLD uint8_t*
-js::jit::Disassembler::DisassembleHeapAccess(uint8_t*, js::jit::Disassembler::HeapAccess*)
-{
-    MOZ_CRASH("NYI - asm.js not supported yet on this platform");
-}
--- a/js/src/jit/arm64/vixl/Debugger-vixl.cpp
+++ b/js/src/jit/arm64/vixl/Debugger-vixl.cpp
@@ -540,18 +540,18 @@ const char* RegisterToken::kWAliases[kNu
   { "w27", NULL },
   { "w28", NULL },
   { "w29", NULL },
   { "w30", NULL },
   { "wsp", NULL }
 };
 
 
-Debugger::Debugger(JSContext* cx, Decoder* decoder, FILE* stream)
-    : Simulator(cx, decoder, stream),
+Debugger::Debugger(Decoder* decoder, FILE* stream)
+    : Simulator(decoder, stream),
       debug_parameters_(DBG_INACTIVE),
       pending_request_(false),
       steps_(0),
       last_command_(NULL) {
   disasm_ = js_new<PrintDisassembler>(stdout);
   printer_ = js_new<Decoder>();
   printer_->AppendVisitor(disasm_);
 }
--- a/js/src/jit/arm64/vixl/Debugger-vixl.h
+++ b/js/src/jit/arm64/vixl/Debugger-vixl.h
@@ -49,17 +49,17 @@ enum DebugParameters {
 
 // Forward declarations.
 class DebugCommand;
 class Token;
 class FormatToken;
 
 class Debugger : public Simulator {
  public:
-  explicit Debugger(JSContext* cx, Decoder* decoder, FILE* stream = stdout);
+  explicit Debugger(Decoder* decoder, FILE* stream = stdout);
   ~Debugger();
 
   virtual void Run() override;
   virtual void VisitException(const Instruction* instr) override;
 
   int debug_parameters() const { return debug_parameters_; }
   void set_debug_parameters(int parameters) {
     debug_parameters_ = parameters;
--- a/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp
+++ b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp
@@ -28,32 +28,28 @@
 
 #include "jit/arm64/vixl/Debugger-vixl.h"
 #include "jit/arm64/vixl/Simulator-vixl.h"
 #include "jit/IonTypes.h"
 #include "js/UniquePtr.h"
 #include "js/Utility.h"
 #include "threading/LockGuard.h"
 #include "vm/Runtime.h"
-#include "wasm/WasmInstance.h"
-#include "wasm/WasmProcess.h"
-#include "wasm/WasmSignalHandlers.h"
 
 js::jit::SimulatorProcess* js::jit::SimulatorProcess::singleton_ = nullptr;
 
 namespace vixl {
 
 using mozilla::DebugOnly;
 using js::jit::ABIFunctionType;
 using js::jit::JitActivation;
 using js::jit::SimulatorProcess;
 
-Simulator::Simulator(JSContext* cx, Decoder* decoder, FILE* stream)
-  : cx_(cx)
-  , stream_(nullptr)
+Simulator::Simulator(Decoder* decoder, FILE* stream)
+  : stream_(nullptr)
   , print_disasm_(nullptr)
   , instrumentation_(nullptr)
   , stack_(nullptr)
   , stack_limit_(nullptr)
   , decoder_(nullptr)
   , oom_(false)
 {
     this->init(decoder, stream);
@@ -156,29 +152,29 @@ void Simulator::init(Decoder* decoder, F
 
 Simulator* Simulator::Current() {
   JSContext* cx = js::TlsContext.get();
   MOZ_ASSERT(js::CurrentThreadCanAccessRuntime(cx->runtime()));
   return cx->simulator();
 }
 
 
-Simulator* Simulator::Create(JSContext* cx) {
+Simulator* Simulator::Create() {
   Decoder *decoder = js_new<vixl::Decoder>();
   if (!decoder)
     return nullptr;
 
   // FIXME: This just leaks the Decoder object for now, which is probably OK.
   // FIXME: We should free it at some point.
   // FIXME: Note that it can't be stored in the SimulatorRuntime due to lifetime conflicts.
   js::UniquePtr<Simulator> sim;
   if (getenv("USE_DEBUGGER") != nullptr)
-    sim.reset(js_new<Debugger>(cx, decoder, stdout));
+    sim.reset(js_new<Debugger>(decoder, stdout));
   else
-    sim.reset(js_new<Simulator>(cx, decoder, stdout));
+    sim.reset(js_new<Simulator>(decoder, stdout));
 
   // Check if Simulator:init ran out of memory.
   if (sim && sim->oom())
     return nullptr;
 
   return sim.release();
 }
 
@@ -214,72 +210,27 @@ bool Simulator::overRecursed(uintptr_t n
 
 
 bool Simulator::overRecursedWithExtra(uint32_t extra) const {
   uintptr_t newsp = get_sp() - extra;
   return newsp <= stackLimit();
 }
 
 
-static inline JitActivation*
-GetJitActivation(JSContext* cx)
-{
-    if (!js::wasm::CodeExists)
-        return nullptr;
-    if (!cx->activation() || !cx->activation()->isJit())
-        return nullptr;
-    return cx->activation()->asJit();
-}
-
 JS::ProfilingFrameIterator::RegisterState
 Simulator::registerState()
 {
   JS::ProfilingFrameIterator::RegisterState state;
   state.pc = (uint8_t*) get_pc();
   state.fp = (uint8_t*) get_fp();
   state.lr = (uint8_t*) get_lr();
   state.sp = (uint8_t*) get_sp();
   return state;
 }
 
-bool
-Simulator::handle_wasm_seg_fault(uintptr_t addr, unsigned numBytes)
-{
-    JitActivation* act = GetJitActivation(cx_);
-    if (!act)
-        return false;
-
-    uint8_t* pc = (uint8_t*)get_pc();
-    uint8_t* fp = (uint8_t*)get_fp();
-
-    const js::wasm::CodeSegment* segment = js::wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule())
-        return false;
-    const js::wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    js::wasm::Instance* instance = js::wasm::LookupFaultingInstance(*moduleSegment, pc, fp);
-    if (!instance)
-	return false;
-
-    MOZ_RELEASE_ASSERT(&instance->code() == &moduleSegment->code());
-
-    if (!instance->memoryAccessInGuardRegion((uint8_t*)addr, numBytes))
-        return false;
-
-    js::wasm::Trap trap;
-    js::wasm::BytecodeOffset bytecode;
-    MOZ_ALWAYS_TRUE(moduleSegment->code().lookupTrap(pc, &trap, &bytecode));
-
-    MOZ_RELEASE_ASSERT(trap == js::wasm::Trap::OutOfBounds);
-
-    act->startWasmTrap(js::wasm::Trap::OutOfBounds, bytecode.offset(), registerState());
-    set_pc((Instruction*)moduleSegment->trapCode());
-    return true;
-}
-
 int64_t Simulator::call(uint8_t* entry, int argument_count, ...) {
   va_list parameters;
   va_start(parameters, argument_count);
 
   // First eight arguments passed in registers.
   VIXL_ASSERT(argument_count <= 8);
   // This code should use the type of the called function
   // (with templates, like the callVM machinery), but since the
@@ -413,54 +364,35 @@ class Redirection
 
 
 
 void* Simulator::RedirectNativeFunction(void* nativeFunction, ABIFunctionType type) {
   Redirection* redirection = Redirection::Get(nativeFunction, type);
   return redirection->addressOfSvcInstruction();
 }
 
-bool
-Simulator::handle_wasm_ill_fault()
-{
-    JitActivation* act = GetJitActivation(cx_);
-    if (!act)
-        return false;
-
-    uint8_t* pc = (uint8_t*)get_pc();
-
-    const js::wasm::CodeSegment* segment = js::wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule())
-        return false;
-    const js::wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    js::wasm::Trap trap;
-    js::wasm::BytecodeOffset bytecode;
-    if (!moduleSegment->code().lookupTrap(pc, &trap, &bytecode))
-        return false;
-
-    act->startWasmTrap(trap, bytecode.offset(), registerState());
-    set_pc((Instruction*)moduleSegment->trapCode());
-    return true;
-}
-
 void Simulator::VisitException(const Instruction* instr) {
   switch (instr->Mask(ExceptionMask)) {
     case BRK: {
       int lowbit  = ImmException_offset;
       int highbit = ImmException_offset + ImmException_width - 1;
       HostBreakpoint(instr->Bits(highbit, lowbit));
       break;
     }
     case HLT:
       switch (instr->ImmException()) {
-        case kUnreachableOpcode:
-          if (!handle_wasm_ill_fault())
-              DoUnreachable(instr);
+        case kUnreachableOpcode: {
+          uint8_t* newPC;
+          if (js::wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+            set_pc((Instruction*)newPC);
+            return;
+          }
+          DoUnreachable(instr);
           return;
+        }
         case kTraceOpcode:
           DoTrace(instr);
           return;
         case kLogOpcode:
           DoLog(instr);
           return;
         case kPrintfOpcode:
           DoPrintf(instr);
--- a/js/src/jit/arm64/vixl/Simulator-vixl.h
+++ b/js/src/jit/arm64/vixl/Simulator-vixl.h
@@ -39,16 +39,17 @@
 #include "jit/arm64/vixl/Instructions-vixl.h"
 #include "jit/arm64/vixl/Instrument-vixl.h"
 #include "jit/arm64/vixl/Simulator-Constants-vixl.h"
 #include "jit/arm64/vixl/Utils-vixl.h"
 #include "jit/IonTypes.h"
 #include "js/AllocPolicy.h"
 #include "vm/MutexIDs.h"
 #include "vm/PosixNSPR.h"
+#include "wasm/WasmSignalHandlers.h"
 
 namespace vixl {
 
 // Assemble the specified IEEE-754 components into the target type and apply
 // appropriate rounding.
 //  sign:     0 = positive, 1 = negative
 //  exponent: Unbiased IEEE-754 exponent.
 //  mantissa: The mantissa of the input. The top bit (which is not encoded for
@@ -691,23 +692,23 @@ class SimExclusiveGlobalMonitor {
   const int kPassProbability;
   uint32_t seed_;
 };
 
 class Redirection;
 
 class Simulator : public DecoderVisitor {
  public:
-  explicit Simulator(JSContext* cx, Decoder* decoder, FILE* stream = stdout);
+  explicit Simulator(Decoder* decoder, FILE* stream = stdout);
   ~Simulator();
 
   // Moz changes.
   void init(Decoder* decoder, FILE* stream);
   static Simulator* Current();
-  static Simulator* Create(JSContext* cx);
+  static Simulator* Create();
   static void Destroy(Simulator* sim);
   uintptr_t stackLimit() const;
   uintptr_t* addressOfStackLimit();
   bool overRecursed(uintptr_t newsp = 0) const;
   bool overRecursedWithExtra(uint32_t extra) const;
   int64_t call(uint8_t* entry, int argument_count, ...);
   static void* RedirectNativeFunction(void* nativeFunction, js::jit::ABIFunctionType type);
   void setGPR32Result(int32_t result);
@@ -741,18 +742,31 @@ class Simulator : public DecoderVisitor 
   template <typename T>
   T get_pc_as() const { return reinterpret_cast<T>(const_cast<Instruction*>(pc())); }
 
   void set_pc(const Instruction* new_pc) {
     pc_ = Memory::AddressUntag(new_pc);
     pc_modified_ = true;
   }
 
-  bool handle_wasm_ill_fault();
-  bool handle_wasm_seg_fault(uintptr_t addr, unsigned numBytes);
+  // Handle any wasm faults, returning true if the fault was handled.
+  // This method is rather hot so inline the normal (no-wasm) case.
+  bool MOZ_ALWAYS_INLINE handle_wasm_seg_fault(uintptr_t addr, unsigned numBytes) {
+    if (MOZ_LIKELY(!js::wasm::CodeExists)) {
+      return false;
+    }
+
+    uint8_t* newPC;
+    if (!js::wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes, &newPC)) {
+      return false;
+    }
+
+    set_pc((Instruction*)newPC);
+    return true;
+  }
 
   void increment_pc() {
     if (!pc_modified_) {
       pc_ = pc_->NextInstruction();
     }
 
     pc_modified_ = false;
   }
@@ -2513,18 +2527,16 @@ class Simulator : public DecoderVisitor 
 
   bool FPProcessNaNs(const Instruction* instr);
 
   // Pseudo Printf instruction
   void DoPrintf(const Instruction* instr);
 
   // Processor state ---------------------------------------
 
-  JSContext* const cx_;
-
   // Simulated monitors for exclusive access instructions.
   SimExclusiveLocalMonitor local_monitor_;
   SimExclusiveGlobalMonitor global_monitor_;
 
   // Output stream.
   FILE* stream_;
   PrintDisassembler* print_disasm_;
 
--- a/js/src/jit/mips32/Simulator-mips32.cpp
+++ b/js/src/jit/mips32/Simulator-mips32.cpp
@@ -518,17 +518,17 @@ class AutoLockSimulatorCache : public Lo
 
 mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
     SimulatorProcess::ICacheCheckingDisableCount(1); // Checking is disabled by default.
 SimulatorProcess* SimulatorProcess::singleton_ = nullptr;
 
 int Simulator::StopSimAt = -1;
 
 Simulator*
-Simulator::Create(JSContext* cx)
+Simulator::Create()
 {
     auto sim = MakeUnique<Simulator>();
     if (!sim) {
         return nullptr;
     }
 
     if (!sim->init()) {
         return nullptr;
@@ -1634,113 +1634,29 @@ Simulator::registerState()
     wasm::RegisterState state;
     state.pc = (void*) get_pc();
     state.fp = (void*) getRegister(fp);
     state.sp = (void*) getRegister(sp);
     state.lr = (void*) getRegister(ra);
     return state;
 }
 
-// WebAssembly memories contain an extra region of guard pages (see
-// WasmArrayRawBuffer comment). The guard pages catch out-of-bounds accesses
-// using a signal handler that redirects PC to a stub that safely reports an
-// error. However, if the handler is hit by the simulator, the PC is in C++ code
-// and cannot be redirected. Therefore, we must avoid hitting the handler by
-// redirecting in the simulator before the real handler would have been hit.
-bool
-Simulator::handleWasmFault(int32_t addr, unsigned numBytes)
-{
-    if (!wasm::CodeExists) {
-        return false;
-    }
-
-    JSContext* cx = TlsContext.get();
-    if (!cx->activation() || !cx->activation()->isJit()) {
-        return false;
-    }
-    JitActivation* act = cx->activation()->asJit();
-
-    void* pc = reinterpret_cast<void*>(get_pc());
-    uint8_t* fp = reinterpret_cast<uint8_t*>(getRegister(Register::fp));
-
-    const wasm::CodeSegment* segment = wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule()) {
-        return false;
-    }
-    const wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    wasm::Instance* instance = wasm::LookupFaultingInstance(*moduleSegment, pc, fp);
-    if (!instance) {
-        return false;
-    }
-
-    MOZ_RELEASE_ASSERT(&instance->code() == &moduleSegment->code());
-
-    if (!instance->memoryAccessInGuardRegion((uint8_t*)addr, numBytes)) {
-         return false;
-    }
-
-    LLBit_ = false;
-
-    wasm::Trap trap;
-    wasm::BytecodeOffset bytecode;
-    MOZ_ALWAYS_TRUE(moduleSegment->code().lookupTrap(pc, &trap, &bytecode));
-
-    MOZ_RELEASE_ASSERT(trap == wasm::Trap::OutOfBounds);
-
-    act->startWasmTrap(wasm::Trap::OutOfBounds, bytecode.offset(), registerState());
-    set_pc(int32_t(moduleSegment->trapCode()));
-    return true;
-}
-
-bool
-Simulator::handleWasmTrapFault()
-{
-    if (!wasm::CodeExists) {
-        return false;
-    }
-
-    JSContext* cx = TlsContext.get();
-    if (!cx->activation() || !cx->activation()->isJit()) {
-        return false;
-    }
-    JitActivation* act = cx->activation()->asJit();
-
-    void* pc = reinterpret_cast<void*>(get_pc());
-
-    const wasm::CodeSegment* segment = wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule()) {
-        return false;
-    }
-    const wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    wasm::Trap trap;
-    wasm::BytecodeOffset bytecode;
-    if (!moduleSegment->code().lookupTrap(pc, &trap, &bytecode)) {
-        return false;
-    }
-
-    act->startWasmTrap(trap, bytecode.offset(), registerState());
-    set_pc(int32_t(moduleSegment->trapCode()));
-    return true;
-}
-
 // MIPS memory instructions (except lwl/r and swl/r) trap on unaligned memory
 // access enabling the OS to handle them via trap-and-emulate.
 // Note that simulator runs have the runtime system running directly on the host
 // system and only generated code is executed in the simulator.
 // Since the host is typically IA32 it will not trap on unaligned memory access.
 // We assume that that executing correct generated code will not produce unaligned
 // memory access, so we explicitly check for address alignment and trap.
 // Note that trapping does not occur when executing wasm code, which requires that
 // unaligned memory access provides correct result.
 int
 Simulator::readW(uint32_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 4)) {
+    if (handleWasmSegFault(addr, 4)) {
         return -1;
     }
 
     if ((addr & kPointerAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
         return *ptr;
     }
     printf("Unaligned read at 0x%08x, pc=0x%08" PRIxPTR "\n",
@@ -1748,17 +1664,17 @@ Simulator::readW(uint32_t addr, SimInstr
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 void
 Simulator::writeW(uint32_t addr, int value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 4)) {
+    if (handleWasmSegFault(addr, 4)) {
         return;
     }
 
     if ((addr & kPointerAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1767,17 +1683,17 @@ Simulator::writeW(uint32_t addr, int val
            addr,
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 double
 Simulator::readD(uint32_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 8)) {
+    if (handleWasmSegFault(addr, 8)) {
         return NAN;
     }
 
     if ((addr & kDoubleAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         double* ptr = reinterpret_cast<double*>(addr);
         return *ptr;
     }
     printf("Unaligned (double) read at 0x%08x, pc=0x%08" PRIxPTR "\n",
@@ -1785,17 +1701,17 @@ Simulator::readD(uint32_t addr, SimInstr
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 void
 Simulator::writeD(uint32_t addr, double value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 8)) {
+    if (handleWasmSegFault(addr, 8)) {
         return;
     }
 
     if ((addr & kDoubleAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         double* ptr = reinterpret_cast<double*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1804,17 +1720,17 @@ Simulator::writeD(uint32_t addr, double 
            addr,
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 uint16_t
 Simulator::readHU(uint32_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return 0xffff;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
         return *ptr;
     }
     printf("Unaligned unsigned halfword read at 0x%08x, pc=0x%08" PRIxPTR "\n",
@@ -1822,17 +1738,17 @@ Simulator::readHU(uint32_t addr, SimInst
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 int16_t
 Simulator::readH(uint32_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return -1;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         int16_t* ptr = reinterpret_cast<int16_t*>(addr);
         return *ptr;
     }
     printf("Unaligned signed halfword read at 0x%08x, pc=0x%08" PRIxPTR "\n",
@@ -1840,17 +1756,17 @@ Simulator::readH(uint32_t addr, SimInstr
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 void
 Simulator::writeH(uint32_t addr, uint16_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1859,17 +1775,17 @@ Simulator::writeH(uint32_t addr, uint16_
            addr,
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 void
 Simulator::writeH(uint32_t addr, int16_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         int16_t* ptr = reinterpret_cast<int16_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1878,65 +1794,65 @@ Simulator::writeH(uint32_t addr, int16_t
            addr,
            reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 uint32_t
 Simulator::readBU(uint32_t addr)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return 0xff;
     }
 
     uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
     return *ptr;
 }
 
 int32_t
 Simulator::readB(uint32_t addr)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return -1;
     }
 
     int8_t* ptr = reinterpret_cast<int8_t*>(addr);
     return *ptr;
 }
 
 void
 Simulator::writeB(uint32_t addr, uint8_t value)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return;
     }
 
     uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
     LLBit_ = false;
     *ptr = value;
 }
 
 void
 Simulator::writeB(uint32_t addr, int8_t value)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return;
     }
 
     int8_t* ptr = reinterpret_cast<int8_t*>(addr);
     LLBit_ = false;
     *ptr = value;
 }
 
 int
 Simulator::loadLinkedW(uint32_t addr, SimInstruction* instr)
 {
     if ((addr & kPointerAlignmentMask) == 0) {
 
-        if (handleWasmFault(addr, 1)) {
+        if (handleWasmSegFault(addr, 1)) {
             return -1;
         }
 
         volatile int32_t* ptr = reinterpret_cast<volatile int32_t*>(addr);
         int32_t value = *ptr;
         lastLLValue_ = value;
         LLAddr_ = addr;
         // Note that any memory write or "external" interrupt should reset this value to false.
@@ -2341,18 +2257,22 @@ Simulator::softwareInterrupt(SimInstruct
     } else {
           switch (func) {
             case ff_tge:
             case ff_tgeu:
             case ff_tlt:
             case ff_tltu:
             case ff_teq:
             case ff_tne:
-            if (instr->bits(15, 6) == kWasmTrapCode && handleWasmTrapFault()) {
-                return;
+            if (instr->bits(15, 6) == kWasmTrapCode) {
+                uint8_t* newPC;
+                if (wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+                    set_pc(int32_t(newPC));
+                    return;
+                }
             }
         };
         // All remaining break_ codes, and all traps are handled here.
         MipsDebugger dbg(this);
         dbg.debug();
     }
 }
 
--- a/js/src/jit/mips32/Simulator-mips32.h
+++ b/js/src/jit/mips32/Simulator-mips32.h
@@ -32,17 +32,17 @@
 #ifdef JS_SIMULATOR_MIPS32
 
 #include "mozilla/Atomics.h"
 
 #include "jit/IonTypes.h"
 #include "js/ProfilingFrameIterator.h"
 #include "threading/Thread.h"
 #include "vm/MutexIDs.h"
-#include "wasm/WasmCode.h"
+#include "wasm/WasmSignalHandlers.h"
 
 namespace js {
 
 namespace jit {
 
 class JitActivation;
 
 class Simulator;
@@ -155,17 +155,17 @@ class Simulator {
         f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11,
         f12, f13, f14, f15,   // f12 and f14 are arguments FPURegisters.
         f16, f17, f18, f19, f20, f21, f22, f23, f24, f25,
         f26, f27, f28, f29, f30, f31,
         kNumFPURegisters
     };
 
     // Returns nullptr on OOM.
-    static Simulator* Create(JSContext* cx);
+    static Simulator* Create();
 
     static void Destroy(Simulator* simulator);
 
     // Constructor/destructor are for internal use only; use the static methods above.
     Simulator();
     ~Simulator();
 
     static bool supportsAtomics() { return true; }
@@ -303,18 +303,31 @@ class Simulator {
     void enableStop(uint32_t code);
     void disableStop(uint32_t code);
     void increaseStopCounter(uint32_t code);
     void printStopInfo(uint32_t code);
 
     JS::ProfilingFrameIterator::RegisterState registerState();
 
     // Handle any wasm faults, returning true if the fault was handled.
-    bool handleWasmFault(int32_t addr, unsigned numBytes);
-    bool handleWasmTrapFault();
+    // This method is rather hot so inline the normal (no-wasm) case.
+    bool MOZ_ALWAYS_INLINE handleWasmSegFault(int32_t addr, unsigned numBytes) {
+        if (MOZ_LIKELY(!js::wasm::CodeExists)) {
+            return false;
+        }
+
+        uint8_t* newPC;
+        if (!js::wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes, &newPC)) {
+            return false;
+        }
+
+        LLBit_ = false;
+        set_pc(int32_t(newPC));
+        return true;
+    }
 
     // Executes one instruction.
     void instructionDecode(SimInstruction* instr);
     // Execute one instruction placed in a branch delay slot.
     void branchDelayInstructionDecode(SimInstruction* instr);
 
   public:
     static int StopSimAt;
--- a/js/src/jit/mips64/Simulator-mips64.cpp
+++ b/js/src/jit/mips64/Simulator-mips64.cpp
@@ -554,17 +554,17 @@ class AutoLockSimulatorCache : public Lo
 
 mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
     SimulatorProcess::ICacheCheckingDisableCount(1);  // Checking is disabled by default.
 SimulatorProcess* SimulatorProcess::singleton_ = nullptr;
 
 int64_t Simulator::StopSimAt = -1;
 
 Simulator *
-Simulator::Create(JSContext* cx)
+Simulator::Create()
 {
     auto sim = MakeUnique<Simulator>();
     if (!sim) {
         return nullptr;
     }
 
     if (!sim->init()) {
         return nullptr;
@@ -1646,192 +1646,108 @@ Simulator::registerState()
     wasm::RegisterState state;
     state.pc = (void*) get_pc();
     state.fp = (void*) getRegister(fp);
     state.sp = (void*) getRegister(sp);
     state.lr = (void*) getRegister(ra);
     return state;
 }
 
-// WebAssembly memories contain an extra region of guard pages (see
-// WasmArrayRawBuffer comment). The guard pages catch out-of-bounds accesses
-// using a signal handler that redirects PC to a stub that safely reports an
-// error. However, if the handler is hit by the simulator, the PC is in C++ code
-// and cannot be redirected. Therefore, we must avoid hitting the handler by
-// redirecting in the simulator before the real handler would have been hit.
-bool
-Simulator::handleWasmFault(uint64_t addr, unsigned numBytes)
-{
-    if (!wasm::CodeExists) {
-        return false;
-    }
-
-    JSContext* cx = TlsContext.get();
-    if (!cx->activation() || !cx->activation()->isJit()) {
-        return false;
-    }
-    JitActivation* act = cx->activation()->asJit();
-
-    void* pc = reinterpret_cast<void*>(get_pc());
-    uint8_t* fp = reinterpret_cast<uint8_t*>(getRegister(Register::fp));
-
-    const wasm::CodeSegment* segment = wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule()) {
-        return false;
-    }
-    const wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    wasm::Instance* instance = wasm::LookupFaultingInstance(*moduleSegment, pc, fp);
-    if (!instance) {
-        return false;
-    }
-
-    MOZ_RELEASE_ASSERT(&instance->code() == &moduleSegment->code());
-
-    if (!instance->memoryAccessInGuardRegion((uint8_t*)addr, numBytes)) {
-         return false;
-    }
-
-    LLBit_ = false;
-
-    wasm::Trap trap;
-    wasm::BytecodeOffset bytecode;
-    MOZ_ALWAYS_TRUE(moduleSegment->code().lookupTrap(pc, &trap, &bytecode));
-
-    MOZ_RELEASE_ASSERT(trap == wasm::Trap::OutOfBounds);
-
-    act->startWasmTrap(wasm::Trap::OutOfBounds, bytecode.offset(), registerState());
-    set_pc(int64_t(moduleSegment->trapCode()));
-    return true;
-}
-
-bool
-Simulator::handleWasmTrapFault()
-{
-    if (!wasm::CodeExists) {
-        return false;
-    }
-
-    JSContext* cx = TlsContext.get();
-    if (!cx->activation() || !cx->activation()->isJit()) {
-        return false;
-    }
-    JitActivation* act = cx->activation()->asJit();
-
-    void* pc = reinterpret_cast<void*>(get_pc());
-
-    const wasm::CodeSegment* segment = wasm::LookupCodeSegment(pc);
-    if (!segment || !segment->isModule()) {
-        return false;
-    }
-    const wasm::ModuleSegment* moduleSegment = segment->asModule();
-
-    wasm::Trap trap;
-    wasm::BytecodeOffset bytecode;
-    if (!moduleSegment->code().lookupTrap(pc, &trap, &bytecode)) {
-        return false;
-    }
-
-    act->startWasmTrap(trap, bytecode.offset(), registerState());
-    set_pc(int64_t(moduleSegment->trapCode()));
-    return true;
-}
-
 // MIPS memory instructions (except lw(d)l/r , sw(d)l/r) trap on unaligned memory
 // access enabling the OS to handle them via trap-and-emulate.
 // Note that simulator runs have the runtime system running directly on the host
 // system and only generated code is executed in the simulator.
 // Since the host is typically IA32 it will not trap on unaligned memory access.
 // We assume that that executing correct generated code will not produce unaligned
 // memory access, so we explicitly check for address alignment and trap.
 // Note that trapping does not occur when executing wasm code, which requires that
 // unaligned memory access provides correct result.
 
 uint8_t
 Simulator::readBU(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return 0xff;
     }
 
     uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
     return* ptr;
 }
 
 int8_t
 Simulator::readB(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return -1;
     }
 
     int8_t* ptr = reinterpret_cast<int8_t*>(addr);
     return* ptr;
 }
 
 void
 Simulator::writeB(uint64_t addr, uint8_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return;
     }
 
     uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
     *ptr = value;
 }
 
 void
 Simulator::writeB(uint64_t addr, int8_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 1)) {
+    if (handleWasmSegFault(addr, 1)) {
         return;
     }
 
     int8_t* ptr = reinterpret_cast<int8_t*>(addr);
     *ptr = value;
 }
 
 uint16_t
 Simulator::readHU(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return 0xffff;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
         return *ptr;
     }
     printf("Unaligned unsigned halfword read at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 int16_t
 Simulator::readH(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return -1;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         int16_t* ptr = reinterpret_cast<int16_t*>(addr);
         return *ptr;
     }
     printf("Unaligned signed halfword read at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 void
 Simulator::writeH(uint64_t addr, uint16_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1839,17 +1755,17 @@ Simulator::writeH(uint64_t addr, uint16_
     printf("Unaligned unsigned halfword write at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 void
 Simulator::writeH(uint64_t addr, int16_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 2)) {
+    if (handleWasmSegFault(addr, 2)) {
         return;
     }
 
     if ((addr & 1) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         int16_t* ptr = reinterpret_cast<int16_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1857,51 +1773,51 @@ Simulator::writeH(uint64_t addr, int16_t
     printf("Unaligned halfword write at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 uint32_t
 Simulator::readWU(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 4)) {
+    if (handleWasmSegFault(addr, 4)) {
         return -1;
     }
 
     if ((addr & 3) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         uint32_t* ptr = reinterpret_cast<uint32_t*>(addr);
         return *ptr;
     }
     printf("Unaligned read at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 int32_t
 Simulator::readW(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 4)) {
+    if (handleWasmSegFault(addr, 4)) {
         return -1;
     }
 
     if ((addr & 3) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         int32_t* ptr = reinterpret_cast<int32_t*>(addr);
         return *ptr;
     }
     printf("Unaligned read at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 void
 Simulator::writeW(uint64_t addr, uint32_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 4)) {
+    if (handleWasmSegFault(addr, 4)) {
         return;
     }
 
     if ((addr & 3) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         uint32_t* ptr = reinterpret_cast<uint32_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1909,17 +1825,17 @@ Simulator::writeW(uint64_t addr, uint32_
     printf("Unaligned write at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 void
 Simulator::writeW(uint64_t addr, int32_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 4)) {
+    if (handleWasmSegFault(addr, 4)) {
         return;
     }
 
     if ((addr & 3) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         int32_t* ptr = reinterpret_cast<int32_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1927,34 +1843,34 @@ Simulator::writeW(uint64_t addr, int32_t
     printf("Unaligned write at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 int64_t
 Simulator::readDW(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 8)) {
+    if (handleWasmSegFault(addr, 8)) {
         return -1;
     }
 
     if ((addr & kPointerAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
         return *ptr;
     }
     printf("Unaligned read at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 void
 Simulator::writeDW(uint64_t addr, int64_t value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 8)) {
+    if (handleWasmSegFault(addr, 8)) {
         return;
     }
 
     if ((addr & kPointerAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         int64_t* ptr = reinterpret_cast<int64_t*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1962,34 +1878,34 @@ Simulator::writeDW(uint64_t addr, int64_
     printf("Unaligned write at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
 }
 
 double
 Simulator::readD(uint64_t addr, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 8)) {
+    if (handleWasmSegFault(addr, 8)) {
         return NAN;
     }
 
     if ((addr & kDoubleAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         double* ptr = reinterpret_cast<double*>(addr);
         return *ptr;
     }
     printf("Unaligned (double) read at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n",
            addr, reinterpret_cast<intptr_t>(instr));
     MOZ_CRASH();
     return 0;
 }
 
 void
 Simulator::writeD(uint64_t addr, double value, SimInstruction* instr)
 {
-    if (handleWasmFault(addr, 8)) {
+    if (handleWasmSegFault(addr, 8)) {
         return;
     }
 
     if ((addr & kDoubleAlignmentMask) == 0 || wasm::InCompiledCode(reinterpret_cast<void *>(get_pc()))) {
         double* ptr = reinterpret_cast<double*>(addr);
         LLBit_ = false;
         *ptr = value;
         return;
@@ -1999,17 +1915,17 @@ Simulator::writeD(uint64_t addr, double 
     MOZ_CRASH();
 }
 
 int
 Simulator::loadLinkedW(uint64_t addr, SimInstruction* instr)
 {
     if ((addr & 3) == 0) {
 
-        if (handleWasmFault(addr, 4)) {
+        if (handleWasmSegFault(addr, 4)) {
             return -1;
         }
 
         volatile int32_t* ptr = reinterpret_cast<volatile int32_t*>(addr);
         int32_t value = *ptr;
         lastLLValue_ = value;
         LLAddr_ = addr;
         // Note that any memory write or "external" interrupt should reset this value to false.
@@ -2052,17 +1968,17 @@ Simulator::storeConditionalW(uint64_t ad
     return 0;
 }
 
 int64_t
 Simulator::loadLinkedD(uint64_t addr, SimInstruction* instr)
 {
     if ((addr & kPointerAlignmentMask) == 0) {
 
-        if (handleWasmFault(addr, 8)) {
+        if (handleWasmSegFault(addr, 8)) {
             return -1;
         }
 
         volatile int64_t* ptr = reinterpret_cast<volatile int64_t*>(addr);
         int64_t value = *ptr;
         lastLLValue_ = value;
         LLAddr_ = addr;
         // Note that any memory write or "external" interrupt should reset this value to false.
@@ -2437,18 +2353,22 @@ Simulator::softwareInterrupt(SimInstruct
     } else {
         switch (func) {
             case ff_tge:
             case ff_tgeu:
             case ff_tlt:
             case ff_tltu:
             case ff_teq:
             case ff_tne:
-            if (instr->bits(15, 6) == kWasmTrapCode && handleWasmTrapFault()) {
-                return;
+            if (instr->bits(15, 6) == kWasmTrapCode) {
+                uint8_t* newPC;
+                if (wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+                    set_pc(int64_t(newPC));
+                    return;
+                }
             }
         };
         // All remaining break_ codes, and all traps are handled here.
         MipsDebugger dbg(this);
         dbg.debug();
     }
 }
 
--- a/js/src/jit/mips64/Simulator-mips64.h
+++ b/js/src/jit/mips64/Simulator-mips64.h
@@ -33,16 +33,17 @@
 #ifdef JS_SIMULATOR_MIPS64
 
 #include "mozilla/Atomics.h"
 
 #include "jit/IonTypes.h"
 #include "js/ProfilingFrameIterator.h"
 #include "threading/Thread.h"
 #include "vm/MutexIDs.h"
+#include "wasm/WasmSignalHandlers.h"
 
 namespace js {
 
 namespace jit {
 
 class JitActivation;
 
 class Simulator;
@@ -155,17 +156,17 @@ class Simulator {
     enum FPURegister {
         f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11,
         f12, f13, f14, f15, f16, f17, f18, f19, f20, f21,
         f22, f23, f24, f25, f26, f27, f28, f29, f30, f31,
         kNumFPURegisters
     };
 
     // Returns nullptr on OOM.
-    static Simulator* Create(JSContext* cx);
+    static Simulator* Create();
 
     static void Destroy(Simulator* simulator);
 
     // Constructor/destructor are for internal use only; use the static methods above.
     Simulator();
     ~Simulator();
 
     static bool supportsAtomics() { return true; }
@@ -312,18 +313,31 @@ class Simulator {
     void enableStop(uint32_t code);
     void disableStop(uint32_t code);
     void increaseStopCounter(uint32_t code);
     void printStopInfo(uint32_t code);
 
     JS::ProfilingFrameIterator::RegisterState registerState();
 
     // Handle any wasm faults, returning true if the fault was handled.
-    bool handleWasmFault(uint64_t addr, unsigned numBytes);
-    bool handleWasmTrapFault();
+    // This method is rather hot so inline the normal (no-wasm) case.
+    bool MOZ_ALWAYS_INLINE handleWasmSegFault(uint64_t addr, unsigned numBytes) {
+        if (MOZ_LIKELY(!js::wasm::CodeExists)) {
+            return false;
+        }
+
+        uint8_t* newPC;
+        if (!js::wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes, &newPC)) {
+            return false;
+        }
+
+        LLBit_ = false;
+        set_pc(int64_t(newPC));
+        return true;
+    }
 
     // Executes one instruction.
     void instructionDecode(SimInstruction* instr);
     // Execute one instruction placed in a branch delay slot.
     void branchDelayInstructionDecode(SimInstruction* instr);
 
   public:
     static int64_t StopSimAt;
--- a/js/src/jit/shared/CodeGenerator-shared-inl.h
+++ b/js/src/jit/shared/CodeGenerator-shared-inl.h
@@ -3,17 +3,16 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef jit_shared_CodeGenerator_shared_inl_h
 #define jit_shared_CodeGenerator_shared_inl_h
 
 #include "jit/shared/CodeGenerator-shared.h"
-#include "jit/Disassembler.h"
 
 #include "jit/MacroAssembler-inl.h"
 
 namespace js {
 namespace jit {
 
 static inline bool
 IsConstant(const LInt64Allocation& a)
@@ -342,90 +341,16 @@ CodeGeneratorShared::restoreLiveVolatile
 {
     MOZ_ASSERT(!ins->isCall());
     LSafepoint* safepoint = ins->safepoint();
     LiveRegisterSet regs;
     regs.set() = RegisterSet::Intersect(safepoint->liveRegs().set(), RegisterSet::Volatile());
     masm.PopRegsInMask(regs);
 }
 
-void
-CodeGeneratorShared::verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, bool isLoad,
-                                                 Scalar::Type type, Operand mem, LAllocation alloc)
-{
-#ifdef DEBUG
-    using namespace Disassembler;
-
-    Disassembler::HeapAccess::Kind kind = isLoad ? HeapAccess::Load : HeapAccess::Store;
-    switch (type) {
-      case Scalar::Int8:
-      case Scalar::Int16:
-        if (kind == HeapAccess::Load) {
-            kind = HeapAccess::LoadSext32;
-        }
-        break;
-      default:
-        break;
-    }
-
-    OtherOperand op;
-    switch (type) {
-      case Scalar::Int8:
-      case Scalar::Uint8:
-      case Scalar::Int16:
-      case Scalar::Uint16:
-      case Scalar::Int32:
-      case Scalar::Uint32:
-        if (!alloc.isConstant()) {
-            op = OtherOperand(ToRegister(alloc).encoding());
-        } else {
-            // x86 doesn't allow encoding an imm64 to memory move; the value
-            // is wrapped anyways.
-            int32_t i = ToInt32(&alloc);
-
-            // Sign-extend the immediate value out to 32 bits. We do this even
-            // for unsigned element types so that we match what the disassembly
-            // code does, as it doesn't know about signedness of stores.
-            unsigned shift = 32 - TypedArrayElemSize(type) * 8;
-            i = int32_t(uint32_t(i) << shift) >> shift;
-            op = OtherOperand(i);
-        }
-        break;
-      case Scalar::Int64:
-        // Can't encode an imm64-to-memory move.
-        op = OtherOperand(ToRegister(alloc).encoding());
-        break;
-      case Scalar::Float32:
-      case Scalar::Float64:
-        op = OtherOperand(ToFloatRegister(alloc).encoding());
-        break;
-      case Scalar::Uint8Clamped:
-      case Scalar::MaxTypedArrayViewType:
-        MOZ_CRASH("Unexpected array type");
-    }
-
-    HeapAccess access(kind, TypedArrayElemSize(type), ComplexAddress(mem), op);
-    masm.verifyHeapAccessDisassembly(begin, end, access);
-#endif
-}
-
-void
-CodeGeneratorShared::verifyLoadDisassembly(uint32_t begin, uint32_t end, Scalar::Type type,
-                                           Operand mem, LAllocation alloc)
-{
-    verifyHeapAccessDisassembly(begin, end, true, type, mem, alloc);
-}
-
-void
-CodeGeneratorShared::verifyStoreDisassembly(uint32_t begin, uint32_t end, Scalar::Type type,
-                                            Operand mem, LAllocation alloc)
-{
-    verifyHeapAccessDisassembly(begin, end, false, type, mem, alloc);
-}
-
 inline bool
 CodeGeneratorShared::isGlobalObject(JSObject* object)
 {
     // Calling object->is<GlobalObject>() is racy because this relies on
     // checking the group and this can be changed while we are compiling off the
     // main thread. Note that we only check for the script realm's global here.
     return object == gen->realm->maybeGlobal();
 }
--- a/js/src/jit/shared/CodeGenerator-shared.h
+++ b/js/src/jit/shared/CodeGenerator-shared.h
@@ -517,26 +517,16 @@ class CodeGeneratorShared : public LElem
     void emitTracelogStartEvent(uint32_t textId) {}
     void emitTracelogStopEvent(uint32_t textId) {}
     void emitTracelogStartEvent(const char* text, TraceLoggerTextId enabledTextId) {}
     void emitTracelogStopEvent(const char* text, TraceLoggerTextId enabledTextId) {}
     void emitTracelogIonStart() {}
     void emitTracelogIonStop() {}
 #endif
 
-  protected:
-    inline void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, bool isLoad,
-                                            Scalar::Type type, Operand mem, LAllocation alloc);
-
-  public:
-    inline void verifyLoadDisassembly(uint32_t begin, uint32_t end, Scalar::Type type,
-                                      Operand mem, LAllocation alloc);
-    inline void verifyStoreDisassembly(uint32_t begin, uint32_t end, Scalar::Type type,
-                                       Operand mem, LAllocation alloc);
-
     bool isGlobalObject(JSObject* object);
 };
 
 // An out-of-line path is generated at the end of the function.
 class OutOfLineCode : public TempObject
 {
     Label entry_;
     Label rejoin_;
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -8731,17 +8731,18 @@ class LWasmStoreI64 : public LInstructio
         return getInt64Operand(ValueIndex);
     }
 };
 
 class LAsmJSLoadHeap : public LInstructionHelper<1, 3, 0>
 {
   public:
     LIR_HEADER(AsmJSLoadHeap);
-    explicit LAsmJSLoadHeap(const LAllocation& ptr, const LAllocation& boundsCheckLimit = LAllocation(),
+    explicit LAsmJSLoadHeap(const LAllocation& ptr,
+                            const LAllocation& boundsCheckLimit,
                             const LAllocation& memoryBase = LAllocation())
       : LInstructionHelper(classOpcode)
     {
         setOperand(0, ptr);
         setOperand(1, boundsCheckLimit);
         setOperand(2, memoryBase);
     }
     MAsmJSLoadHeap* mir() const {
@@ -8757,18 +8758,19 @@ class LAsmJSLoadHeap : public LInstructi
         return getOperand(2);
     }
 };
 
 class LAsmJSStoreHeap : public LInstructionHelper<0, 4, 0>
 {
   public:
     LIR_HEADER(AsmJSStoreHeap);
-    LAsmJSStoreHeap(const LAllocation& ptr, const LAllocation& value,
-                    const LAllocation& boundsCheckLimit = LAllocation(),
+    LAsmJSStoreHeap(const LAllocation& ptr,
+                    const LAllocation& value,
+                    const LAllocation& boundsCheckLimit,
                     const LAllocation& memoryBase = LAllocation())
       : LInstructionHelper(classOpcode)
     {
         setOperand(0, ptr);
         setOperand(1, value);
         setOperand(2, boundsCheckLimit);
         setOperand(3, memoryBase);
     }
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -488,60 +488,16 @@ CodeGenerator::visitWasmStore(LWasmStore
 
 void
 CodeGenerator::visitWasmStoreI64(LWasmStoreI64* ins)
 {
     emitWasmStore(ins);
 }
 
 void
-CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins)
-{
-    const MAsmJSLoadHeap* mir = ins->mir();
-    MOZ_ASSERT(mir->offset() < wasm::OffsetGuardLimit);
-
-    const LAllocation* ptr = ins->ptr();
-    const LDefinition* out = ins->output();
-
-    Scalar::Type accessType = mir->access().type();
-
-    Operand srcAddr = ptr->isBogus()
-                      ? Operand(HeapReg, mir->offset())
-                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
-
-    uint32_t before = masm.size();
-    masm.wasmLoad(mir->access(), srcAddr, ToAnyRegister(out));
-    uint32_t after = masm.size();
-    verifyLoadDisassembly(before, after, accessType, srcAddr, *out->output());
-}
-
-void
-CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins)
-{
-    const MAsmJSStoreHeap* mir = ins->mir();
-    MOZ_ASSERT(mir->offset() < wasm::OffsetGuardLimit);
-
-    const LAllocation* ptr = ins->ptr();
-    const LAllocation* value = ins->value();
-
-    Scalar::Type accessType = mir->access().type();
-
-    canonicalizeIfDeterministic(accessType, value);
-
-    Operand dstAddr = ptr->isBogus()
-                      ? Operand(HeapReg, mir->offset())
-                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
-
-    uint32_t before = masm.size();
-    wasmStore(mir->access(), value, dstAddr);
-    uint32_t after = masm.size();
-    verifyStoreDisassembly(before, after, accessType, dstAddr, *value);
-}
-
-void
 CodeGenerator::visitWasmCompareExchangeHeap(LWasmCompareExchangeHeap* ins)
 {
     MWasmCompareExchangeHeap* mir = ins->mir();
 
     Register ptr = ToRegister(ins->ptr());
     Register oldval = ToRegister(ins->oldValue());
     Register newval = ToRegister(ins->newValue());
     MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@@ -247,55 +247,16 @@ LIRGenerator::visitWasmStore(MWasmStore*
     }
 
     LAllocation baseAlloc = useRegisterOrZeroAtStart(base);
     auto* lir = new(alloc()) LWasmStore(baseAlloc, valueAlloc);
     add(lir, ins);
 }
 
 void
-LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins)
-{
-    MDefinition* base = ins->base();
-    MOZ_ASSERT(base->type() == MIRType::Int32);
-
-    define(new(alloc()) LAsmJSLoadHeap(useRegisterOrZeroAtStart(base)), ins);
-}
-
-void
-LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins)
-{
-    MDefinition* base = ins->base();
-    MOZ_ASSERT(base->type() == MIRType::Int32);
-
-    LAsmJSStoreHeap* lir = nullptr;  // initialize to silence GCC warning
-    switch (ins->access().type()) {
-      case Scalar::Int8:
-      case Scalar::Uint8:
-      case Scalar::Int16:
-      case Scalar::Uint16:
-      case Scalar::Int32:
-      case Scalar::Uint32:
-        lir = new(alloc()) LAsmJSStoreHeap(useRegisterOrZeroAtStart(base),
-                                           useRegisterOrConstantAtStart(ins->value()));
-        break;
-      case Scalar::Float32:
-      case Scalar::Float64:
-        lir = new(alloc()) LAsmJSStoreHeap(useRegisterOrZeroAtStart(base),
-                                           useRegisterAtStart(ins->value()));
-        break;
-      case Scalar::Int64:
-      case Scalar::Uint8Clamped:
-      case Scalar::MaxTypedArrayViewType:
-        MOZ_CRASH("unexpected array type");
-    }
-    add(lir, ins);
-}
-
-void
 LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins)
 {
     MDefinition* base = ins->base();
     MOZ_ASSERT(base->type() == MIRType::Int32);
 
     // The output may not be used but will be clobbered regardless, so
     // pin the output to eax.
     //
--- a/js/src/jit/x86-shared/Assembler-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.cpp
@@ -1,16 +1,15 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "gc/Marking.h"
-#include "jit/Disassembler.h"
 #include "jit/JitRealm.h"
 #if defined(JS_CODEGEN_X86)
 # include "jit/x86/MacroAssembler-x86.h"
 #elif defined(JS_CODEGEN_X64)
 # include "jit/x64/MacroAssembler-x64.h"
 #else
 # error "Wrong architecture. Only x86 and x64 should build this file!"
 #endif
@@ -228,29 +227,16 @@ AssemblerX86Shared::InvertCondition(Doub
         return DoubleLessThanOrUnordered;
       case DoubleGreaterThanOrEqualOrUnordered:
         return DoubleLessThan;
       default:
         MOZ_CRASH("unexpected condition");
     }
 }
 
-void
-AssemblerX86Shared::verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
-                                                const Disassembler::HeapAccess& heapAccess)
-{
-#ifdef DEBUG
-    if (masm.oom()) {
-        return;
-    }
-    unsigned char* code = masm.data();
-    Disassembler::VerifyHeapAccess(code + begin, code + end, heapAccess);
-#endif
-}
-
 CPUInfo::SSEVersion CPUInfo::maxSSEVersion = UnknownSSE;
 CPUInfo::SSEVersion CPUInfo::maxEnabledSSEVersion = UnknownSSE;
 bool CPUInfo::avxPresent = false;
 bool CPUInfo::avxEnabled = false;
 bool CPUInfo::popcntPresent = false;
 bool CPUInfo::needAmdBugWorkaround = false;
 
 static uintptr_t
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -395,16 +395,56 @@ CodeGenerator::visitWasmReinterpret(LWas
       case MIRType::Int64:
         MOZ_CRASH("not handled by this LIR opcode");
       default:
         MOZ_CRASH("unexpected WasmReinterpret");
     }
 }
 
 void
+CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins)
+{
+    const MAsmJSLoadHeap* mir = ins->mir();
+    MOZ_ASSERT(mir->access().offset() == 0);
+
+    const LAllocation* ptr = ins->ptr();
+    const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+    AnyRegister out = ToAnyRegister(ins->output());
+
+    Scalar::Type accessType = mir->accessType();
+
+    OutOfLineLoadTypedArrayOutOfBounds* ool = nullptr;
+    if (mir->needsBoundsCheck()) {
+        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(out, accessType);
+        addOutOfLineCode(ool, mir);
+
+        masm.wasmBoundsCheck(Assembler::AboveOrEqual, ToRegister(ptr), ToRegister(boundsCheckLimit),
+                             ool->entry());
+    }
+
+#ifdef JS_CODEGEN_X86
+    const LAllocation* memoryBase = ins->memoryBase();
+    Operand srcAddr = ptr->isBogus()
+                    ? Operand(ToRegister(memoryBase), 0)
+                    : Operand(ToRegister(memoryBase), ToRegister(ptr), TimesOne);
+#else
+    MOZ_ASSERT(!mir->hasMemoryBase());
+    Operand srcAddr = ptr->isBogus()
+                    ? Operand(HeapReg, 0)
+                    : Operand(HeapReg, ToRegister(ptr), TimesOne);
+#endif
+
+    masm.wasmLoad(mir->access(), srcAddr, out);
+
+    if (ool) {
+        masm.bind(ool->rejoin());
+    }
+}
+
+void
 CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool)
 {
     switch (ool->viewType()) {
       case Scalar::Int64:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
       case Scalar::Float32:
         masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu());
@@ -422,16 +462,54 @@ CodeGeneratorX86Shared::visitOutOfLineLo
         Register destReg = ool->dest().gpr();
         masm.mov(ImmWord(0), destReg);
         break;
     }
     masm.jmp(ool->rejoin());
 }
 
 void
+CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins)
+{
+    const MAsmJSStoreHeap* mir = ins->mir();
+    MOZ_ASSERT(mir->offset() == 0);
+
+    const LAllocation* ptr = ins->ptr();
+    const LAllocation* value = ins->value();
+    const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+    Scalar::Type accessType = mir->accessType();
+    canonicalizeIfDeterministic(accessType, value);
+
+    Label rejoin;
+    if (mir->needsBoundsCheck()) {
+        masm.wasmBoundsCheck(Assembler::AboveOrEqual, ToRegister(ptr), ToRegister(boundsCheckLimit),
+                             &rejoin);
+    }
+
+#ifdef JS_CODEGEN_X86
+    const LAllocation* memoryBase = ins->memoryBase();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(ToRegister(memoryBase), 0)
+                      : Operand(ToRegister(memoryBase), ToRegister(ptr), TimesOne);
+#else
+    MOZ_ASSERT(!mir->hasMemoryBase());
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(HeapReg, 0)
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne);
+#endif
+
+    masm.wasmStore(mir->access(), ToAnyRegister(value), dstAddr);
+
+    if (rejoin.used()) {
+        masm.bind(&rejoin);
+    }
+}
+
+void
 CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir)
 {
     MWasmAddOffset* mir = lir->mir();
     Register base = ToRegister(lir->base());
     Register out = ToRegister(lir->output());
 
     if (base != out) {
         masm.move32(base, out);
deleted file mode 100644
--- a/js/src/jit/x86-shared/Disassembler-x86-shared.cpp
+++ /dev/null
@@ -1,578 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * vim: set ts=8 sts=4 et sw=4 tw=99:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "jit/Disassembler.h"
-
-#include "jit/x86-shared/Encoding-x86-shared.h"
-
-using namespace js;
-using namespace js::jit;
-using namespace js::jit::X86Encoding;
-using namespace js::jit::Disassembler;
-
-MOZ_COLD static bool REX_W(uint8_t rex) { return (rex >> 3) & 0x1; }
-MOZ_COLD static bool REX_R(uint8_t rex) { return (rex >> 2) & 0x1; }
-MOZ_COLD static bool REX_X(uint8_t rex) { return (rex >> 1) & 0x1; }
-MOZ_COLD static bool REX_B(uint8_t rex) { return (rex >> 0) & 0x1; }
-
-MOZ_COLD static uint8_t
-MakeREXFlags(bool w, bool r, bool x, bool b)
-{
-    uint8_t rex = (w << 3) | (r << 2) | (x << 1) | (b << 0);
-    MOZ_RELEASE_ASSERT(REX_W(rex) == w);
-    MOZ_RELEASE_ASSERT(REX_R(rex) == r);
-    MOZ_RELEASE_ASSERT(REX_X(rex) == x);
-    MOZ_RELEASE_ASSERT(REX_B(rex) == b);
-    return rex;
-}
-
-MOZ_COLD static ModRmMode
-ModRM_Mode(uint8_t modrm)
-{
-    return ModRmMode((modrm >> 6) & 0x3);
-}
-
-MOZ_COLD static uint8_t
-ModRM_Reg(uint8_t modrm)
-{
-    return (modrm >> 3) & 0x7;
-}
-
-MOZ_COLD static uint8_t
-ModRM_RM(uint8_t modrm)
-{
-    return (modrm >> 0) & 0x7;
-}
-
-MOZ_COLD static bool
-ModRM_hasSIB(uint8_t modrm)
-{
-    return ModRM_Mode(modrm) != ModRmRegister && ModRM_RM(modrm) == hasSib;
-}
-MOZ_COLD static bool
-ModRM_hasDisp8(uint8_t modrm)
-{
-    return ModRM_Mode(modrm) == ModRmMemoryDisp8;
-}
-MOZ_COLD static bool
-ModRM_hasRIP(uint8_t modrm)
-{
-#ifdef JS_CODEGEN_X64
-    return ModRM_Mode(modrm) == ModRmMemoryNoDisp && ModRM_RM(modrm) == noBase;
-#else
-    return false;
-#endif
-}
-MOZ_COLD static bool
-ModRM_hasDisp32(uint8_t modrm)
-{
-    return ModRM_Mode(modrm) == ModRmMemoryDisp32 ||
-           ModRM_hasRIP(modrm);
-}
-
-MOZ_COLD static uint8_t
-SIB_SS(uint8_t sib)
-{
-    return (sib >> 6) & 0x3;
-}
-
-MOZ_COLD static uint8_t
-SIB_Index(uint8_t sib)
-{
-    return (sib >> 3) & 0x7;
-}
-
-MOZ_COLD static uint8_t
-SIB_Base(uint8_t sib)
-{
-    return (sib >> 0) & 0x7;
-}
-
-MOZ_COLD static bool
-SIB_hasRIP(uint8_t sib)
-{
-    return SIB_Base(sib) == noBase && SIB_Index(sib) == noIndex;
-}
-
-MOZ_COLD static bool
-HasRIP(uint8_t modrm, uint8_t sib, uint8_t rex)
-{
-    return ModRM_hasRIP(modrm) && SIB_hasRIP(sib);
-}
-
-MOZ_COLD static bool
-HasDisp8(uint8_t modrm)
-{
-    return ModRM_hasDisp8(modrm);
-}
-
-MOZ_COLD static bool
-HasDisp32(uint8_t modrm, uint8_t sib)
-{
-    return ModRM_hasDisp32(modrm) ||
-           (SIB_Base(sib) == noBase &&
-            SIB_Index(sib) == noIndex &&
-            ModRM_Mode(modrm) == ModRmMemoryNoDisp);
-}
-
-MOZ_COLD static uint32_t
-Reg(uint8_t modrm, uint8_t sib, uint8_t rex)
-{
-    return ModRM_Reg(modrm) | (REX_R(rex) << 3);
-}
-
-MOZ_COLD static bool
-HasBase(uint8_t modrm, uint8_t sib)
-{
-    return !ModRM_hasSIB(modrm) ||
-           SIB_Base(sib) != noBase ||
-           SIB_Index(sib) != noIndex ||
-           ModRM_Mode(modrm) != ModRmMemoryNoDisp;
-}
-
-MOZ_COLD static RegisterID
-DecodeBase(uint8_t modrm, uint8_t sib, uint8_t rex)
-{
-    return HasBase(modrm, sib)
-           ? RegisterID((ModRM_hasSIB(modrm) ? SIB_Base(sib) : ModRM_RM(modrm)) | (REX_B(rex) << 3))
-           : invalid_reg;
-}
-
-MOZ_COLD static RegisterID
-DecodeIndex(uint8_t modrm, uint8_t sib, uint8_t rex)
-{
-    RegisterID index = RegisterID(SIB_Index(sib) | (REX_X(rex) << 3));
-    return ModRM_hasSIB(modrm) && index != noIndex ? index : invalid_reg;
-}
-
-MOZ_COLD static uint32_t
-DecodeScale(uint8_t modrm, uint8_t sib, uint8_t rex)
-{
-    return ModRM_hasSIB(modrm) ? SIB_SS(sib) : 0;
-}
-
-#define PackOpcode(op0, op1, op2) ((op0) | ((op1) << 8) | ((op2) << 16))
-#define Pack2ByteOpcode(op1) PackOpcode(OP_2BYTE_ESCAPE, op1, 0)
-#define Pack3ByteOpcode(op1, op2) PackOpcode(OP_2BYTE_ESCAPE, op1, op2)
-
-uint8_t*
-js::jit::Disassembler::DisassembleHeapAccess(uint8_t* ptr, HeapAccess* access)
-{
-    VexOperandType type = VEX_PS;
-    uint32_t opcode = OP_NOP_00;
-    uint8_t modrm = 0;
-    uint8_t sib = 0;
-    uint8_t rex = 0;
-    int32_t disp = 0;
-    int32_t imm = 0;
-    bool haveImm = false;
-    int opsize = 4;
-
-    // Legacy prefixes
-    switch (*ptr) {
-      case PRE_LOCK:
-      case PRE_PREDICT_BRANCH_NOT_TAKEN: // (obsolete), aka %cs
-      case 0x3E: // aka predict-branch-taken (obsolete)
-      case 0x36: // %ss
-      case 0x26: // %es
-      case 0x64: // %fs
-      case 0x65: // %gs
-      case 0x67: // address-size override
-        MOZ_CRASH("Unable to disassemble instruction");
-      case PRE_SSE_F2: // aka REPNZ/REPNE
-        type = VEX_SD;
-        ptr++;
-        break;
-      case PRE_SSE_F3: // aka REP/REPE/REPZ
-        type = VEX_SS;
-        ptr++;
-        break;
-      case PRE_SSE_66: // aka PRE_OPERAND_SIZE
-        type = VEX_PD;
-        opsize = 2;
-        ptr++;
-        break;
-      default:
-        break;
-    }
-
-    // REX and VEX prefixes
-    {
-        int x = 0, b = 0, m = 1, w = 0;
-        int r, l, p;
-        switch (*ptr) {
-#ifdef JS_CODEGEN_X64
-          case PRE_REX | 0x0: case PRE_REX | 0x1: case PRE_REX | 0x2: case PRE_REX | 0x3:
-          case PRE_REX | 0x4: case PRE_REX | 0x5: case PRE_REX | 0x6: case PRE_REX | 0x7:
-          case PRE_REX | 0x8: case PRE_REX | 0x9: case PRE_REX | 0xa: case PRE_REX | 0xb:
-          case PRE_REX | 0xc: case PRE_REX | 0xd: case PRE_REX | 0xe: case PRE_REX | 0xf:
-            rex = *ptr++ & 0xf;
-            goto rex_done;
-#endif
-          case PRE_VEX_C4: {
-            if (type != VEX_PS) {
-                MOZ_CRASH("Unable to disassemble instruction");
-            }
-            ++ptr;
-            uint8_t c4a = *ptr++ ^ 0xe0;
-            uint8_t c4b = *ptr++ ^ 0x78;
-            r = (c4a >> 7) & 0x1;
-            x = (c4a >> 6) & 0x1;
-            b = (c4a >> 5) & 0x1;
-            m = (c4a >> 0) & 0x1f;
-            w = (c4b >> 7) & 0x1;
-            l = (c4b >> 2) & 0x1;
-            p = (c4b >> 0) & 0x3;
-            break;
-          }
-          case PRE_VEX_C5: {
-            if (type != VEX_PS) {
-              MOZ_CRASH("Unable to disassemble instruction");
-            }
-            ++ptr;
-            uint8_t c5 = *ptr++ ^ 0xf8;
-            r = (c5 >> 7) & 0x1;
-            l = (c5 >> 2) & 0x1;
-            p = (c5 >> 0) & 0x3;
-            break;
-          }
-          default:
-            goto rex_done;
-        }
-        if (l != 0) { // 256-bit SIMD
-            MOZ_CRASH("Unable to disassemble instruction");
-        }
-        type = VexOperandType(p);
-        rex = MakeREXFlags(w, r, x, b);
-        switch (m) {
-          case 0x1:
-            opcode = Pack2ByteOpcode(*ptr++);
-            goto opcode_done;
-          case 0x2:
-            opcode = Pack3ByteOpcode(ESCAPE_38, *ptr++);
-            goto opcode_done;
-          case 0x3:
-            opcode = Pack3ByteOpcode(ESCAPE_3A, *ptr++);
-            goto opcode_done;
-          default:
-            MOZ_CRASH("Unable to disassemble instruction");
-        }
-    }
-  rex_done:;
-    if (REX_W(rex)) {
-        opsize = 8;
-    }
-
-    // Opcode.
-    opcode = *ptr++;
-    switch (opcode) {
-#ifdef JS_CODEGEN_X64
-      case OP_PUSH_EAX + 0: case OP_PUSH_EAX + 1: case OP_PUSH_EAX + 2: case OP_PUSH_EAX + 3:
-      case OP_PUSH_EAX + 4: case OP_PUSH_EAX + 5: case OP_PUSH_EAX + 6: case OP_PUSH_EAX + 7:
-      case OP_POP_EAX + 0: case OP_POP_EAX + 1: case OP_POP_EAX + 2: case OP_POP_EAX + 3:
-      case OP_POP_EAX + 4: case OP_POP_EAX + 5: case OP_POP_EAX + 6: case OP_POP_EAX + 7:
-      case OP_PUSH_Iz:
-      case OP_PUSH_Ib:
-        opsize = 8;
-        break;
-#endif
-      case OP_2BYTE_ESCAPE:
-        opcode |= *ptr << 8;
-        switch (*ptr++) {
-          case ESCAPE_38:
-          case ESCAPE_3A:
-            opcode |= *ptr++ << 16;
-            break;
-          default:
-            break;
-        }
-        break;
-      default:
-        break;
-    }
-  opcode_done:;
-
-    // ModR/M
-    modrm = *ptr++;
-
-    // SIB
-    if (ModRM_hasSIB(modrm)) {
-        sib = *ptr++;
-    }
-
-    // Address Displacement
-    if (HasDisp8(modrm)) {
-        disp = int8_t(*ptr++);
-    } else if (HasDisp32(modrm, sib)) {
-        memcpy(&disp, ptr, sizeof(int32_t));
-        ptr += sizeof(int32_t);
-    }
-
-    // Immediate operand
-    switch (opcode) {
-      case OP_PUSH_Ib:
-      case OP_IMUL_GvEvIb:
-      case OP_GROUP1_EbIb:
-      case OP_GROUP1_EvIb:
-      case OP_TEST_EAXIb:
-      case OP_GROUP2_EvIb:
-      case OP_GROUP11_EvIb:
-      case OP_GROUP3_EbIb:
-      case Pack2ByteOpcode(OP2_PSHUFD_VdqWdqIb):
-      case Pack2ByteOpcode(OP2_PSLLD_UdqIb): // aka OP2_PSRAD_UdqIb, aka OP2_PSRLD_UdqIb
-      case Pack2ByteOpcode(OP2_PEXTRW_GdUdIb):
-      case Pack2ByteOpcode(OP2_SHUFPS_VpsWpsIb):
-      case Pack3ByteOpcode(ESCAPE_3A, OP3_PEXTRD_EdVdqIb):
-      case Pack3ByteOpcode(ESCAPE_3A, OP3_BLENDPS_VpsWpsIb):
-      case Pack3ByteOpcode(ESCAPE_3A, OP3_PINSRD_VdqEdIb):
-        // 8-bit signed immediate
-        imm = int8_t(*ptr++);
-        haveImm = true;
-        break;
-      case OP_RET_Iz:
-        // 16-bit unsigned immediate
-        memcpy(&imm, ptr, sizeof(int16_t));
-        ptr += sizeof(int16_t);
-        haveImm = true;
-        break;
-      case OP_ADD_EAXIv:
-      case OP_OR_EAXIv:
-      case OP_AND_EAXIv:
-      case OP_SUB_EAXIv:
-      case OP_XOR_EAXIv:
-      case OP_CMP_EAXIv:
-      case OP_PUSH_Iz:
-      case OP_IMUL_GvEvIz:
-      case OP_GROUP1_EvIz:
-      case OP_TEST_EAXIv:
-      case OP_MOV_EAXIv:
-      case OP_GROUP3_EvIz:
-        // 32-bit signed immediate
-        memcpy(&imm, ptr, sizeof(int32_t));
-        ptr += sizeof(int32_t);
-        haveImm = true;
-        break;
-      case OP_GROUP11_EvIz:
-        // opsize-sized signed immediate
-        memcpy(&imm, ptr, opsize);
-        imm = int32_t(uint32_t(imm) << (32 - opsize * 8)) >> (32 - opsize * 8);
-        ptr += opsize;
-        haveImm = true;
-        break;
-      default:
-        break;
-    }
-
-    // Interpret the opcode.
-    if (HasRIP(modrm, sib, rex)) {
-        MOZ_CRASH("Unable to disassemble instruction");
-    }
-
-    size_t memSize = 0;
-    OtherOperand otherOperand(imm);
-    HeapAccess::Kind kind = HeapAccess::Unknown;
-    RegisterID gpr(RegisterID(Reg(modrm, sib, rex)));
-    XMMRegisterID xmm(XMMRegisterID(Reg(modrm, sib, rex)));
-    ComplexAddress addr(disp,
-                        DecodeBase(modrm, sib, rex),
-                        DecodeIndex(modrm, sib, rex),
-                        DecodeScale(modrm, sib, rex));
-    switch (opcode) {
-      case OP_GROUP11_EvIb:
-        if (gpr != RegisterID(GROUP11_MOV)) {
-            MOZ_CRASH("Unable to disassemble instruction");
-        }
-        MOZ_RELEASE_ASSERT(haveImm);
-        memSize = 1;
-        kind = HeapAccess::Store;
-        break;
-      case OP_GROUP11_EvIz:
-        if (gpr != RegisterID(GROUP11_MOV)) {
-            MOZ_CRASH("Unable to disassemble instruction");
-        }
-        MOZ_RELEASE_ASSERT(haveImm);
-        memSize = opsize;
-        kind = HeapAccess::Store;
-        break;
-      case OP_MOV_GvEv:
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(gpr);
-        memSize = opsize;
-        kind = HeapAccess::Load;
-        break;
-      case OP_MOV_GvEb:
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(gpr);
-        memSize = 1;
-        kind = HeapAccess::Load;
-        break;
-      case OP_MOV_EvGv:
-        if (!haveImm) {
-            otherOperand = OtherOperand(gpr);
-        }
-        memSize = opsize;
-        kind = HeapAccess::Store;
-        break;
-      case OP_MOV_EbGv:
-        if (!haveImm) {
-            otherOperand = OtherOperand(gpr);
-        }
-        memSize = 1;
-        kind = HeapAccess::Store;
-        break;
-      case Pack2ByteOpcode(OP2_MOVZX_GvEb):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(gpr);
-        memSize = 1;
-        kind = HeapAccess::Load;
-        break;
-      case Pack2ByteOpcode(OP2_MOVZX_GvEw):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(gpr);
-        memSize = 2;
-        kind = HeapAccess::Load;
-        break;
-      case Pack2ByteOpcode(OP2_MOVSX_GvEb):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(gpr);
-        memSize = 1;
-        kind = opsize == 8 ? HeapAccess::LoadSext64 : HeapAccess::LoadSext32;
-        break;
-      case Pack2ByteOpcode(OP2_MOVSX_GvEw):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(gpr);
-        memSize = 2;
-        kind = opsize == 8 ? HeapAccess::LoadSext64 : HeapAccess::LoadSext32;
-        break;
-#ifdef JS_CODEGEN_X64
-      case OP_MOVSXD_GvEv:
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(gpr);
-        memSize = 4;
-        kind = HeapAccess::LoadSext64;
-        break;
-#endif // JS_CODEGEN_X64
-      case Pack2ByteOpcode(OP2_MOVDQ_VdqWdq): // aka OP2_MOVDQ_VsdWsd
-      case Pack2ByteOpcode(OP2_MOVAPS_VsdWsd):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(xmm);
-        memSize = 16;
-        kind = HeapAccess::Load;
-        break;
-      case Pack2ByteOpcode(OP2_MOVSD_VsdWsd): // aka OP2_MOVPS_VpsWps
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(xmm);
-        switch (type) {
-          case VEX_SS: memSize = 4; break;
-          case VEX_SD: memSize = 8; break;
-          case VEX_PS:
-          case VEX_PD: memSize = 16; break;
-          default: MOZ_CRASH("Unexpected VEX type");
-        }
-        kind = HeapAccess::Load;
-        break;
-      case Pack2ByteOpcode(OP2_MOVDQ_WdqVdq):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(xmm);
-        memSize = 16;
-        kind = HeapAccess::Store;
-        break;
-      case Pack2ByteOpcode(OP2_MOVSD_WsdVsd): // aka OP2_MOVPS_WpsVps
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(xmm);
-        switch (type) {
-          case VEX_SS: memSize = 4; break;
-          case VEX_SD: memSize = 8; break;
-          case VEX_PS:
-          case VEX_PD: memSize = 16; break;
-          default: MOZ_CRASH("Unexpected VEX type");
-        }
-        kind = HeapAccess::Store;
-        break;
-      case Pack2ByteOpcode(OP2_MOVD_VdEd):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(xmm);
-        switch (type) {
-          case VEX_PD: memSize = 4; break;
-          default: MOZ_CRASH("Unexpected VEX type");
-        }
-        kind = HeapAccess::Load;
-        break;
-      case Pack2ByteOpcode(OP2_MOVQ_WdVd):
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(xmm);
-        switch (type) {
-          case VEX_PD: memSize = 8; break;
-          default: MOZ_CRASH("Unexpected VEX type");
-        }
-        kind = HeapAccess::Store;
-        break;
-      case Pack2ByteOpcode(OP2_MOVD_EdVd): // aka OP2_MOVQ_VdWd
-        MOZ_RELEASE_ASSERT(!haveImm);
-        otherOperand = OtherOperand(xmm);
-        switch (type) {
-          case VEX_SS: memSize = 8; kind = HeapAccess::Load; break;
-          case VEX_PD: memSize = 4; kind = HeapAccess::Store; break;
-          default: MOZ_CRASH("Unexpected VEX type");
-        }
-        break;
-      default:
-        MOZ_CRASH("Unable to disassemble instruction");
-    }
-
-    *access = HeapAccess(kind, memSize, addr, otherOperand);
-    return ptr;
-}
-
-#ifdef DEBUG
-void
-js::jit::Disassembler::DumpHeapAccess(const HeapAccess& access)
-{
-    switch (access.kind()) {
-      case HeapAccess::Store:      fprintf(stderr, "store"); break;
-      case HeapAccess::Load:       fprintf(stderr, "load"); break;
-      case HeapAccess::LoadSext32: fprintf(stderr, "loadSext32"); break;
-      case HeapAccess::LoadSext64: fprintf(stderr, "loadSext64"); break;
-      default:                     fprintf(stderr, "unknown"); break;
-    }
-    fprintf(stderr, "%u ", unsigned(access.size()));
-
-    switch (access.otherOperand().kind()) {
-      case OtherOperand::Imm:
-        fprintf(stderr, "imm %d", access.otherOperand().imm());
-        break;
-      case OtherOperand::GPR:
-        fprintf(stderr, "gpr %s", X86Encoding::GPRegName(access.otherOperand().gpr()));
-        break;
-      case OtherOperand::FPR:
-        fprintf(stderr, "fpr %s", X86Encoding::XMMRegName(access.otherOperand().fpr()));
-        break;
-      default: fprintf(stderr, "unknown");
-    }
-
-    fprintf(stderr, " @ ");
-
-    if (access.address().isPCRelative()) {
-        fprintf(stderr, MEM_o32r " ", ADDR_o32r(access.address().disp()));
-    } else if (access.address().hasIndex()) {
-        if (access.address().hasBase()) {
-            fprintf(stderr, MEM_obs " ",
-                    ADDR_obs(access.address().disp(), access.address().base(),
-                             access.address().index(), access.address().scale()));
-        } else {
-            fprintf(stderr, MEM_os " ",
-                    ADDR_os(access.address().disp(),
-                            access.address().index(), access.address().scale()));
-        }
-    } else if (access.address().hasBase()) {
-        fprintf(stderr, MEM_ob " ", ADDR_ob(access.address().disp(), access.address().base()));
-    } else {
-        fprintf(stderr, MEM_o " ", ADDR_o(access.address().disp()));
-    }
-
-    fprintf(stderr, "\n");
-}
-#endif
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -283,16 +283,94 @@ LIRGenerator::visitWasmNeg(MWasmNeg* ins
         defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0);
         break;
       default:
         MOZ_CRASH();
     }
 }
 
 void
+LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins)
+{
+    MDefinition* base = ins->base();
+    MOZ_ASSERT(base->type() == MIRType::Int32);
+
+    MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+    MOZ_ASSERT_IF(ins->needsBoundsCheck(), boundsCheckLimit->type() == MIRType::Int32);
+
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants. This should
+    // only happen in rare constant-folding cases since asm.js sets the minimum
+    // heap size based when accessed via constant.
+    LAllocation baseAlloc = ins->needsBoundsCheck()
+                            ? useRegisterAtStart(base)
+                            : useRegisterOrZeroAtStart(base);
+
+    LAllocation limitAlloc = ins->needsBoundsCheck()
+                           ? useRegisterAtStart(boundsCheckLimit)
+                           : LAllocation();
+    LAllocation memoryBaseAlloc = ins->hasMemoryBase()
+                                ? useRegisterAtStart(ins->memoryBase())
+                                : LAllocation();
+
+    auto* lir = new(alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc);
+    define(lir, ins);
+}
+
+void
+LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins)
+{
+    MDefinition* base = ins->base();
+    MOZ_ASSERT(base->type() == MIRType::Int32);
+
+    MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+    MOZ_ASSERT_IF(ins->needsBoundsCheck(), boundsCheckLimit->type() == MIRType::Int32);
+
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants. This should
+    // only happen in rare constant-folding cases since asm.js sets the minimum
+    // heap size based when accessed via constant.
+    LAllocation baseAlloc = ins->needsBoundsCheck()
+                            ? useRegisterAtStart(base)
+                            : useRegisterOrZeroAtStart(base);
+
+    LAllocation limitAlloc = ins->needsBoundsCheck()
+                           ? useRegisterAtStart(boundsCheckLimit)
+                           : LAllocation();
+    LAllocation memoryBaseAlloc = ins->hasMemoryBase()
+                                ? useRegisterAtStart(ins->memoryBase())
+                                : LAllocation();
+
+    LAsmJSStoreHeap* lir = nullptr;
+    switch (ins->access().type()) {
+      case Scalar::Int8: case Scalar::Uint8:
+#ifdef JS_CODEGEN_X86
+        // See comment for LIRGeneratorX86::useByteOpRegister.
+        lir = new(alloc()) LAsmJSStoreHeap(baseAlloc, useFixed(ins->value(), eax),
+                                           limitAlloc, memoryBaseAlloc);
+        break;
+#endif
+      case Scalar::Int16: case Scalar::Uint16:
+      case Scalar::Int32: case Scalar::Uint32:
+      case Scalar::Float32: case Scalar::Float64:
+        // For now, don't allow constant values. The immediate operand affects
+        // instruction layout which affects patching.
+        lir = new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
+                                            limitAlloc, memoryBaseAlloc);
+        break;
+      case Scalar::Int64:
+        MOZ_CRASH("NYI");
+      case Scalar::Uint8Clamped:
+      case Scalar::MaxTypedArrayViewType:
+        MOZ_CRASH("unexpected array type");
+    }
+    add(lir, ins);
+}
+
+void
 LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
 {
     if (div->rhs()->isConstant()) {
         uint32_t rhs = div->rhs()->toConstant()->toInt32();
         int32_t shift = FloorLog2(rhs);
 
         LAllocation lhs = useRegisterAtStart(div->lhs());
         if (rhs != 0 && uint32_t(1) << shift == rhs) {
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
@@ -786,16 +786,36 @@ MacroAssembler::pushFakeReturnAddress(Re
 // WebAssembly
 
 CodeOffset
 MacroAssembler::wasmTrapInstruction()
 {
     return ud2();
 }
 
+void
+MacroAssembler::wasmBoundsCheck(Condition cond, Register index, Register boundsCheckLimit, Label* label)
+{
+    cmp32(index, boundsCheckLimit);
+    j(cond, label);
+    if (JitOptions.spectreIndexMasking) {
+        cmovCCl(cond, Operand(boundsCheckLimit), index);
+    }
+}
+
+void
+MacroAssembler::wasmBoundsCheck(Condition cond, Register index, Address boundsCheckLimit, Label* label)
+{
+    cmp32(index, Operand(boundsCheckLimit));
+    j(cond, label);
+    if (JitOptions.spectreIndexMasking) {
+        cmovCCl(cond, Operand(boundsCheckLimit), index);
+    }
+}
+
 // RAII class that generates the jumps to traps when it's destructed, to
 // prevent some code duplication in the outOfLineWasmTruncateXtoY methods.
 struct MOZ_RAII AutoHandleWasmTruncateToIntErrors
 {
     MacroAssembler& masm;
     Label inputIsNaN;
     Label intOverflow;
     wasm::BytecodeOffset off;
--- a/js/src/jit/x86/CodeGenerator-x86.cpp
+++ b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -339,80 +339,16 @@ CodeGenerator::visitWasmStore(LWasmStore
 
 void
 CodeGenerator::visitWasmStoreI64(LWasmStoreI64* ins)
 {
     emitWasmStore(ins);
 }
 
 void
-CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins)
-{
-    const MAsmJSLoadHeap* mir = ins->mir();
-    MOZ_ASSERT(mir->access().offset() == 0);
-
-    const LAllocation* ptr = ins->ptr();
-    const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
-    const LAllocation* memoryBase = ins->memoryBase();
-    AnyRegister out = ToAnyRegister(ins->output());
-
-    Scalar::Type accessType = mir->accessType();
-
-    OutOfLineLoadTypedArrayOutOfBounds* ool = nullptr;
-    if (mir->needsBoundsCheck()) {
-        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(out, accessType);
-        addOutOfLineCode(ool, mir);
-
-        masm.wasmBoundsCheck(Assembler::AboveOrEqual, ToRegister(ptr), ToRegister(boundsCheckLimit),
-                             ool->entry());
-    }
-
-    Operand srcAddr = ptr->isBogus()
-                      ? Operand(ToRegister(memoryBase), 0)
-                      : Operand(ToRegister(memoryBase), ToRegister(ptr), TimesOne);
-
-    masm.wasmLoad(mir->access(), srcAddr, out);
-
-    if (ool) {
-        masm.bind(ool->rejoin());
-    }
-}
-
-void
-CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins)
-{
-    const MAsmJSStoreHeap* mir = ins->mir();
-    MOZ_ASSERT(mir->offset() == 0);
-
-    const LAllocation* ptr = ins->ptr();
-    const LAllocation* value = ins->value();
-    const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
-    const LAllocation* memoryBase = ins->memoryBase();
-
-    Scalar::Type accessType = mir->accessType();
-    canonicalizeIfDeterministic(accessType, value);
-
-    Operand dstAddr = ptr->isBogus()
-                      ? Operand(ToRegister(memoryBase), 0)
-                      : Operand(ToRegister(memoryBase), ToRegister(ptr), TimesOne);
-
-    Label rejoin;
-    if (mir->needsBoundsCheck()) {
-        masm.wasmBoundsCheck(Assembler::AboveOrEqual, ToRegister(ptr), ToRegister(boundsCheckLimit),
-                             &rejoin);
-    }
-
-    masm.wasmStore(mir->access(), ToAnyRegister(value), dstAddr);
-
-    if (rejoin.used()) {
-        masm.bind(&rejoin);
-    }
-}
-
-void
 CodeGenerator::visitWasmCompareExchangeHeap(LWasmCompareExchangeHeap* ins)
 {
     MWasmCompareExchangeHeap* mir = ins->mir();
 
     Register ptrReg = ToRegister(ins->ptr());
     Register oldval = ToRegister(ins->oldValue());
     Register newval = ToRegister(ins->newValue());
     Register addrTemp = ToRegister(ins->addrTemp());
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -396,86 +396,16 @@ LIRGenerator::visitWasmStore(MWasmStore*
         MOZ_CRASH("unexpected array type");
     }
 
     auto* lir = new(alloc()) LWasmStore(baseAlloc, valueAlloc, useRegisterAtStart(memoryBase));
     add(lir, ins);
 }
 
 void
-LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins)
-{
-    MDefinition* base = ins->base();
-    MOZ_ASSERT(base->type() == MIRType::Int32);
-
-    MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
-    MOZ_ASSERT_IF(ins->needsBoundsCheck(), boundsCheckLimit->type() == MIRType::Int32);
-
-    MDefinition* memoryBase = ins->memoryBase();
-    MOZ_ASSERT(memoryBase->type() == MIRType::Pointer);
-
-    // For simplicity, require a register if we're going to emit a bounds-check
-    // branch, so that we don't have special cases for constants.
-    LAllocation baseAlloc = ins->needsBoundsCheck()
-                            ? useRegisterAtStart(base)
-                            : useRegisterOrZeroAtStart(base);
-    LAllocation limitAlloc = ins->needsBoundsCheck()
-                           ? useRegisterAtStart(boundsCheckLimit)
-                           : LAllocation();
-
-    auto* lir = new(alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, useRegisterAtStart(memoryBase));
-    define(lir, ins);
-}
-
-void
-LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins)
-{
-    MDefinition* base = ins->base();
-    MOZ_ASSERT(base->type() == MIRType::Int32);
-
-    MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
-    MOZ_ASSERT_IF(ins->needsBoundsCheck(), boundsCheckLimit->type() == MIRType::Int32);
-
-    MDefinition* memoryBase = ins->memoryBase();
-    MOZ_ASSERT(memoryBase->type() == MIRType::Pointer);
-
-    // For simplicity, require a register if we're going to emit a bounds-check
-    // branch, so that we don't have special cases for constants.
-    LAllocation baseAlloc = ins->needsBoundsCheck()
-                            ? useRegisterAtStart(base)
-                            : useRegisterOrZeroAtStart(base);
-    LAllocation limitAlloc = ins->needsBoundsCheck()
-                           ? useRegisterAtStart(boundsCheckLimit)
-                           : LAllocation();
-
-    LAsmJSStoreHeap* lir = nullptr;
-    switch (ins->access().type()) {
-      case Scalar::Int8: case Scalar::Uint8:
-        // See comment for LIRGeneratorX86::useByteOpRegister.
-        lir = new(alloc()) LAsmJSStoreHeap(baseAlloc, useFixed(ins->value(), eax),
-                                           limitAlloc, useRegisterAtStart(memoryBase));
-        break;
-      case Scalar::Int16: case Scalar::Uint16:
-      case Scalar::Int32: case Scalar::Uint32:
-      case Scalar::Float32: case Scalar::Float64:
-        // For now, don't allow constant values. The immediate operand affects
-        // instruction layout which affects patching.
-        lir = new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
-                                            limitAlloc, useRegisterAtStart(memoryBase));
-        break;
-      case Scalar::Int64:
-        MOZ_CRASH("NYI");
-      case Scalar::Uint8Clamped:
-      case Scalar::MaxTypedArrayViewType:
-        MOZ_CRASH("unexpected array type");
-    }
-    add(lir, ins);
-}
-
-void
 LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins)
 {
     MDefinition* base = ins->base();
     MOZ_ASSERT(base->type() == MIRType::Int32);
 
     MDefinition* memoryBase = ins->memoryBase();
     MOZ_ASSERT(memoryBase->type() == MIRType::Pointer);
 
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@@ -620,36 +620,16 @@ MacroAssembler::storeUnboxedValue(const 
                                   const Address& dest, MIRType slotType);
 template void
 MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, MIRType valueType,
                                   const BaseObjectElementIndex& dest, MIRType slotType);
 
 // wasm specific methods, used in both the wasm baseline compiler and ion.
 
 void
-MacroAssembler::wasmBoundsCheck(Condition cond, Register index, Register boundsCheckLimit, Label* label)
-{
-    cmp32(index, boundsCheckLimit);
-    j(cond, label);
-    if (JitOptions.spectreIndexMasking) {
-        cmovCCl(cond, Operand(boundsCheckLimit), index);
-    }
-}
-
-void
-MacroAssembler::wasmBoundsCheck(Condition cond, Register index, Address boundsCheckLimit, Label* label)
-{
-    cmp32(index, Operand(boundsCheckLimit));
-    j(cond, label);
-    if (JitOptions.spectreIndexMasking) {
-        cmovCCl(cond, Operand(boundsCheckLimit), index);
-    }
-}
-
-void
 MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access, Operand srcAddr, AnyRegister out)
 {
     MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP || srcAddr.kind() == Operand::MEM_SCALE);
 
     memoryBarrierBefore(access.sync());
 
     append(access, size());
     switch (access.type()) {
--- a/js/src/moz.build
+++ b/js/src/moz.build
@@ -282,17 +282,16 @@ UNIFIED_SOURCES += [
     'jit/BitSet.cpp',
     'jit/BytecodeAnalysis.cpp',
     'jit/C1Spewer.cpp',
     'jit/CacheIR.cpp',
     'jit/CacheIRCompiler.cpp',
     'jit/CacheIRSpewer.cpp',
     'jit/CodeGenerator.cpp',
     'jit/CompileWrappers.cpp',
-    'jit/Disassembler.cpp',
     'jit/EdgeCaseAnalysis.cpp',
     'jit/EffectiveAddressAnalysis.cpp',
     'jit/ExecutableAllocator.cpp',
     'jit/FoldLinearArithConstants.cpp',
     'jit/InstructionReordering.cpp',
     'jit/Ion.cpp',
     'jit/IonAnalysis.cpp',
     'jit/IonBuilder.cpp',
@@ -514,19 +513,16 @@ elif CONFIG['JS_CODEGEN_X86'] or CONFIG[
         'jit/x86-shared/Assembler-x86-shared.cpp',
         'jit/x86-shared/AssemblerBuffer-x86-shared.cpp',
         'jit/x86-shared/CodeGenerator-x86-shared.cpp',
         'jit/x86-shared/Lowering-x86-shared.cpp',
         'jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp',
         'jit/x86-shared/MacroAssembler-x86-shared.cpp',
         'jit/x86-shared/MoveEmitter-x86-shared.cpp',
     ]
-    SOURCES += [
-        'jit/x86-shared/Disassembler-x86-shared.cpp',  # using namespace js::jit::X86Encoding;
-    ]
     if CONFIG['JS_CODEGEN_X64']:
         LOpcodesGenerated.inputs += ['jit/x64/LIR-x64.h']
         UNIFIED_SOURCES += [
             'jit/x64/Assembler-x64.cpp',
             'jit/x64/Bailouts-x64.cpp',
             'jit/x64/CodeGenerator-x64.cpp',
             'jit/x64/Lowering-x64.cpp',
             'jit/x64/MacroAssembler-x64.cpp',
@@ -567,17 +563,16 @@ elif CONFIG['JS_CODEGEN_ARM']:
         ]
 elif CONFIG['JS_CODEGEN_ARM64']:
     LOpcodesGenerated.inputs += ['jit/arm64/LIR-arm64.h']
     UNIFIED_SOURCES += [
         'jit/arm64/Architecture-arm64.cpp',
         'jit/arm64/Assembler-arm64.cpp',
         'jit/arm64/Bailouts-arm64.cpp',
         'jit/arm64/CodeGenerator-arm64.cpp',
-        'jit/arm64/Disassembler-arm64.cpp',
         'jit/arm64/Lowering-arm64.cpp',
         'jit/arm64/MacroAssembler-arm64.cpp',
         'jit/arm64/MoveEmitter-arm64.cpp',
         'jit/arm64/Trampoline-arm64.cpp',
         'jit/arm64/vixl/Assembler-vixl.cpp',
         'jit/arm64/vixl/Cpu-vixl.cpp',
         'jit/arm64/vixl/Decoder-vixl.cpp',
         'jit/arm64/vixl/Disasm-vixl.cpp',
--- a/js/src/old-configure.in
+++ b/js/src/old-configure.in
@@ -688,17 +688,22 @@ case "$target" in
             AR='lib'
             AR_FLAGS='-NOLOGO -OUT:$@'
             ;;
         esac
         AR_EXTRACT=
         RANLIB='echo not_ranlib'
         STRIP='echo not_strip'
         PKG_SKIP_STRIP=1
-        WIN32_SUBSYSTEM_VERSION=6.01
+        # aarch64 doesn't support subsystems below 6.02
+        if test "$CPU_ARCH" = "aarch64"; then
+            WIN32_SUBSYSTEM_VERSION=6.02
+        else
+            WIN32_SUBSYSTEM_VERSION=6.01
+        fi
         WIN32_CONSOLE_EXE_LDFLAGS=-SUBSYSTEM:CONSOLE,$WIN32_SUBSYSTEM_VERSION
         WIN32_GUI_EXE_LDFLAGS=-SUBSYSTEM:WINDOWS,$WIN32_SUBSYSTEM_VERSION
         DSO_LDOPTS=-SUBSYSTEM:WINDOWS,$WIN32_SUBSYSTEM_VERSION
         _USE_CPP_INCLUDE_FLAG=1
         _DEFINES_CFLAGS="-FI $jsconfdefs -DMOZILLA_CLIENT"
         _DEFINES_CXXFLAGS="-FI $jsconfdefs -DMOZILLA_CLIENT"
         CFLAGS="$CFLAGS -W3 -Gy -Zc:inline"
         CXXFLAGS="$CXXFLAGS -W3 -Gy -Zc:inline"
--- a/js/src/vm/ArrayBufferObject-inl.h
+++ b/js/src/vm/ArrayBufferObject-inl.h
@@ -42,17 +42,17 @@ AnyArrayBufferByteLength(const ArrayBuff
 {
     if (buf->is<ArrayBufferObject>()) {
         return buf->as<ArrayBufferObject>().byteLength();
     }
     return buf->as<SharedArrayBufferObject>().byteLength();
 }
 
 inline uint32_t
-ArrayBufferObjectMaybeShared::byteLength()
+ArrayBufferObjectMaybeShared::byteLength() const
 {
     return AnyArrayBufferByteLength(this);
 }
 
 inline bool
 AnyArrayBufferIsPreparedForAsmJS(const ArrayBufferObjectMaybeShared* buf)
 {
     if (buf->is<ArrayBufferObject>()) {
--- a/js/src/vm/ArrayBufferObject.cpp
+++ b/js/src/vm/ArrayBufferObject.cpp
@@ -610,19 +610,19 @@ ArrayBufferObject::changeContents(JSCont
  * Wasm Raw Buf Linear Memory Structure
  *
  * The linear heap in Wasm is an mmaped array buffer. Several
  * constants manage its lifetime:
  *
  *  - length - the wasm-visible current length of the buffer. Accesses in the
  *    range [0, length] succeed. May only increase.
  *
- *  - boundsCheckLimit - when !WASM_HUGE_MEMORY, the size against which we
- *    perform bounds checks. It is always a constant offset smaller than
- *    mappedSize. Currently that constant offset is 64k (wasm::GuardSize).
+ *  - boundsCheckLimit - the size against which we perform bounds checks. It is
+ *    always a constant offset smaller than mappedSize. Currently that constant
+ *    offset is 64k (wasm::GuardSize).
  *
  *  - maxSize - the optional declared limit on how much length can grow.
  *
  *  - mappedSize - the actual mmaped size. Access in the range
  *    [0, mappedSize] will either succeed, or be handled by the wasm signal
  *    handlers.
  *
  * The below diagram shows the layout of the wasm heap. The wasm-visible
@@ -930,59 +930,25 @@ js::CreateWasmBuffer(JSContext* cx, cons
                                                                buffer);
 }
 
 // Note this function can return false with or without an exception pending. The
 // asm.js caller checks cx->isExceptionPending before propagating failure.
 // Returning false without throwing means that asm.js linking will fail which
 // will recompile as non-asm.js.
 /* static */ bool
-ArrayBufferObject::prepareForAsmJS(JSContext* cx, Handle<ArrayBufferObject*> buffer, bool needGuard)
+ArrayBufferObject::prepareForAsmJS(JSContext* cx, Handle<ArrayBufferObject*> buffer)
 {
-#ifdef WASM_HUGE_MEMORY
-    MOZ_ASSERT(needGuard);
-#endif
     MOZ_ASSERT(buffer->byteLength() % wasm::PageSize == 0);
     MOZ_RELEASE_ASSERT(wasm::HaveSignalHandlers());
 
     if (buffer->forInlineTypedObject()) {
         return false;
     }
 
-    if (needGuard) {
-        if (buffer->isWasm() && buffer->isPreparedForAsmJS()) {
-            return true;
-        }
-
-        // Non-prepared-for-asm.js wasm buffers can be detached at any time.
-        // This error can only be triggered for Atomics on !WASM_HUGE_MEMORY
-        // so this error is only visible in testing.
-        if (buffer->isWasm() || buffer->isPreparedForAsmJS()) {
-            return false;
-        }
-
-        uint32_t length = buffer->byteLength();
-        WasmArrayRawBuffer* wasmBuf = WasmArrayRawBuffer::Allocate(length, Some(length));
-        if (!wasmBuf) {
-            ReportOutOfMemory(cx);
-            return false;
-        }
-
-        void* data = wasmBuf->dataPointer();
-        memcpy(data, buffer->dataPointer(), length);
-
-        // Swap the new elements into the ArrayBufferObject. Mark the
-        // ArrayBufferObject so we don't do this again.
-        buffer->changeContents(cx, BufferContents::create<WASM>(data), OwnsData);
-        buffer->setIsPreparedForAsmJS();
-        MOZ_ASSERT(data == buffer->dataPointer());
-        cx->updateMallocCounter(wasmBuf->mappedSize());
-        return true;
-    }
-
     if (!buffer->isWasm() && buffer->isPreparedForAsmJS()) {
         return true;
     }
 
     // Non-prepared-for-asm.js wasm buffers can be detached at any time.
     if (buffer->isWasm()) {
         return false;
     }
@@ -1206,16 +1172,28 @@ ArrayBufferObject::wasmBoundsCheckLimit(
 uint32_t
 ArrayBufferObjectMaybeShared::wasmBoundsCheckLimit() const
 {
     if (is<ArrayBufferObject>()) {
         return as<ArrayBufferObject>().wasmBoundsCheckLimit();
     }
     return as<SharedArrayBufferObject>().wasmBoundsCheckLimit();
 }
+#else
+uint32_t
+ArrayBufferObject::wasmBoundsCheckLimit() const
+{
+    return byteLength();
+}
+
+uint32_t
+ArrayBufferObjectMaybeShared::wasmBoundsCheckLimit() const
+{
+    return byteLength();
+}
 #endif
 
 uint32_t
 ArrayBufferObject::flags() const
 {
     return uint32_t(getFixedSlot(FLAGS_SLOT).toInt32());
 }
 
--- a/js/src/vm/ArrayBufferObject.h
+++ b/js/src/vm/ArrayBufferObject.h
@@ -100,33 +100,31 @@ int32_t LiveMappedBufferCount();
 class ArrayBufferObjectMaybeShared;
 
 mozilla::Maybe<uint32_t> WasmArrayBufferMaxSize(const ArrayBufferObjectMaybeShared* buf);
 size_t WasmArrayBufferMappedSize(const ArrayBufferObjectMaybeShared* buf);
 
 class ArrayBufferObjectMaybeShared : public NativeObject
 {
   public:
-    inline uint32_t byteLength();
+    inline uint32_t byteLength() const;
     inline bool isDetached() const;
     inline SharedMem<uint8_t*> dataPointerEither();
 
     // WebAssembly support:
     // Note: the eventual goal is to remove this from ArrayBuffer and have
     // (Shared)ArrayBuffers alias memory owned by some wasm::Memory object.
 
     mozilla::Maybe<uint32_t> wasmMaxSize() const {
         return WasmArrayBufferMaxSize(this);
     }
     size_t wasmMappedSize() const {
         return WasmArrayBufferMappedSize(this);
     }
-#ifndef WASM_HUGE_MEMORY
     uint32_t wasmBoundsCheckLimit() const;
-#endif
 
     inline bool isPreparedForAsmJS() const;
     inline bool isWasm() const;
 };
 
 typedef Rooted<ArrayBufferObjectMaybeShared*> RootedArrayBufferObjectMaybeShared;
 typedef Handle<ArrayBufferObjectMaybeShared*> HandleArrayBufferObjectMaybeShared;
 typedef MutableHandle<ArrayBufferObjectMaybeShared*> MutableHandleArrayBufferObjectMaybeShared;
@@ -382,31 +380,30 @@ class ArrayBufferObject : public ArrayBu
     bool isPlain() const { return bufferKind() == PLAIN; }
     bool isWasm() const { return bufferKind() == WASM; }
     bool isMapped() const { return bufferKind() == MAPPED; }
     bool isExternal() const { return bufferKind() == EXTERNAL; }
     bool isDetached() const { return flags() & DETACHED; }
     bool isPreparedForAsmJS() const { return flags() & FOR_ASMJS; }
 
     // WebAssembly support:
-    static MOZ_MUST_USE bool prepareForAsmJS(JSContext* cx, Handle<ArrayBufferObject*> buffer,
-                                             bool needGuard);
+    static MOZ_MUST_USE bool prepareForAsmJS(JSContext* cx, Handle<ArrayBufferObject*> buffer);
     size_t wasmMappedSize() const;
     mozilla::Maybe<uint32_t> wasmMaxSize() const;
     static MOZ_MUST_USE bool wasmGrowToSizeInPlace(uint32_t newSize,
                                                    Handle<ArrayBufferObject*> oldBuf,
                                                    MutableHandle<ArrayBufferObject*> newBuf,
                                                    JSContext* cx);
 #ifndef WASM_HUGE_MEMORY
     static MOZ_MUST_USE bool wasmMovingGrowToSize(uint32_t newSize,
                                                   Handle<ArrayBufferObject*> oldBuf,
                                                   MutableHandle<ArrayBufferObject*> newBuf,
                                                   JSContext* cx);
+#endif
     uint32_t wasmBoundsCheckLimit() const;
-#endif
 
     static void finalize(FreeOp* fop, JSObject* obj);
 
     static BufferContents createMappedContents(int fd, size_t offset, size_t length);
 
     static size_t offsetOfFlagsSlot() {
         return getFixedSlotOffset(FLAGS_SLOT);
     }
--- a/js/src/vm/JSContext.cpp
+++ b/js/src/vm/JSContext.cpp
@@ -113,17 +113,17 @@ JSContext::init(ContextKind kind)
             return false;
         }
 
         if (!fx.initInstance()) {
             return false;
         }
 
 #ifdef JS_SIMULATOR
-        simulator_ = jit::Simulator::Create(this);
+        simulator_ = jit::Simulator::Create();
         if (!simulator_) {
             return false;
         }
 #endif
 
         if (!wasm::EnsureSignalHandlers(this)) {
             return false;
         }
--- a/js/src/vm/Stack.cpp
+++ b/js/src/vm/Stack.cpp
@@ -1855,17 +1855,17 @@ jit::JitActivation::traceIonRecovery(JST
 void
 jit::JitActivation::startWasmTrap(wasm::Trap trap, uint32_t bytecodeOffset,
                                   const wasm::RegisterState& state)
 {
     MOZ_ASSERT(!isWasmTrapping());
 
     bool unwound;
     wasm::UnwindState unwindState;
-    MOZ_ALWAYS_TRUE(wasm::StartUnwinding(state, &unwindState, &unwound));
+    MOZ_RELEASE_ASSERT(wasm::StartUnwinding(state, &unwindState, &unwound));
     MOZ_ASSERT(unwound == (trap == wasm::Trap::IndirectCallBadSig));
 
     void* pc = unwindState.pc;
     wasm::Frame* fp = unwindState.fp;
 
     const wasm::Code& code = fp->tls->instance->code();
     MOZ_RELEASE_ASSERT(&code == wasm::LookupCode(pc));
 
--- a/js/src/wasm/AsmJS.cpp
+++ b/js/src/wasm/AsmJS.cpp
@@ -6550,27 +6550,18 @@ CheckBuffer(JSContext* cx, const AsmJSMe
                         metadata.minMemoryLength));
         if (!msg) {
             return false;
         }
         return LinkFail(cx, msg.get());
     }
 
     if (buffer->is<ArrayBufferObject>()) {
-        // On 64-bit, bounds checks are statically removed so the huge guard
-        // region is always necessary. On 32-bit, allocating a guard page
-        // requires reallocating the incoming ArrayBuffer which could trigger
-        // OOM. Thus, don't ask for a guard page in this case;
-#ifdef WASM_HUGE_MEMORY
-        bool needGuard = true;
-#else
-        bool needGuard = false;
-#endif
         Rooted<ArrayBufferObject*> arrayBuffer(cx, &buffer->as<ArrayBufferObject>());
-        if (!ArrayBufferObject::prepareForAsmJS(cx, arrayBuffer, needGuard)) {
+        if (!ArrayBufferObject::prepareForAsmJS(cx, arrayBuffer)) {
             return LinkFail(cx, "Unable to prepare ArrayBuffer for asm.js use");
         }
     } else {
         return LinkFail(cx, "Unable to prepare SharedArrayBuffer for asm.js use");
     }
 
     MOZ_ASSERT(buffer->isPreparedForAsmJS());
     return true;
--- a/js/src/wasm/WasmFrameIter.cpp
+++ b/js/src/wasm/WasmFrameIter.cpp
@@ -1462,54 +1462,8 @@ ProfilingFrameIterator::label() const
       case CodeRange::TrapExit:          return trapDescription;
       case CodeRange::DebugTrap:         return debugTrapDescription;
       case CodeRange::FarJumpIsland:     return "interstitial (in wasm)";
       case CodeRange::Throw:             MOZ_CRASH("does not have a frame");
     }
 
     MOZ_CRASH("bad code range kind");
 }
-
-Instance*
-wasm::LookupFaultingInstance(const ModuleSegment& codeSegment, void* pc, void* fp)
-{
-    // Assume bug-caused faults can be raised at any PC and apply the logic of
-    // ProfilingFrameIterator to reject any pc outside the (post-prologue,
-    // pre-epilogue) body of a wasm function. This is exhaustively tested by the
-    // simulators which call this function at every load/store before even
-    // knowing whether there is a fault.
-
-    const CodeRange* codeRange = codeSegment.code().lookupFuncRange(pc);
-    if (!codeRange) {
-        return nullptr;
-    }
-
-    size_t offsetInModule = ((uint8_t*)pc) - codeSegment.base();
-    if ((offsetInModule >= codeRange->funcNormalEntry() &&
-         offsetInModule < codeRange->funcNormalEntry() + SetFP) ||
-        (offsetInModule >= codeRange->ret() - PoppedFP &&
-         offsetInModule <= codeRange->ret()))
-    {
-        return nullptr;
-    }
-
-    Instance* instance = reinterpret_cast<Frame*>(fp)->tls->instance;
-
-    // TODO: In the special case of a cross-instance indirect call bad-signature
-    // fault, fp can point to the caller frame which is in a different
-    // instance/module than pc. This special case should go away when old-style
-    // traps go away and signal handling is reworked.
-    //MOZ_RELEASE_ASSERT(&instance->code() == &codeSegment.code());
-
-    return instance;
-}
-
-bool
-wasm::InCompiledCode(void* pc)
-{
-    if (LookupCodeSegment(pc)) {
-        return true;
-    }
-
-    const CodeRange* codeRange;
-    uint8_t* codeBase;
-    return LookupBuiltinThunk(pc, &codeRange, &codeBase);
-}
--- a/js/src/wasm/WasmFrameIter.h
+++ b/js/src/wasm/WasmFrameIter.h
@@ -40,16 +40,18 @@ class DebugFrame;
 class FuncTypeIdDesc;
 class Instance;
 class ModuleSegment;
 
 struct CallableOffsets;
 struct FuncOffsets;
 struct Frame;
 
+typedef JS::ProfilingFrameIterator::RegisterState RegisterState;
+
 // Iterates over a linear group of wasm frames of a single wasm JitActivation,
 // called synchronously from C++ in the wasm thread. It will stop at the first
 // frame that is not of the same kind, or at the end of an activation.
 //
 // If you want to handle every kind of frames (including JS jit frames), use
 // JitFrameIter.
 
 class WasmFrameIter
@@ -184,17 +186,17 @@ class ProfilingFrameIterator
 
     // Start unwinding at a group of wasm frames after unwinding an inner group
     // of JSJit frames.
     ProfilingFrameIterator(const jit::JitActivation& activation, const Frame* fp);
 
     // Start unwinding at the innermost activation given the register state when
     // the thread was suspended.
     ProfilingFrameIterator(const jit::JitActivation& activation,
-                           const JS::ProfilingFrameIterator::RegisterState& state);
+                           const RegisterState& state);
 
     void operator++();
     bool done() const { return !codeRange_ && exitReason_.isNone(); }
 
     void* stackAddress() const { MOZ_ASSERT(!done()); return stackAddress_; }
     uint8_t* unwoundIonCallerFP() const { MOZ_ASSERT(done()); return unwoundIonCallerFP_; }
     const char* label() const;
 };
@@ -223,40 +225,27 @@ GenerateJitEntryPrologue(jit::MacroAssem
 
 void
 GenerateFunctionPrologue(jit::MacroAssembler& masm, const FuncTypeIdDesc& funcTypeId,
                          const mozilla::Maybe<uint32_t>& tier1FuncIndex,
                          FuncOffsets* offsets);
 void
 GenerateFunctionEpilogue(jit::MacroAssembler& masm, unsigned framePushed, FuncOffsets* offsets);
 
-// Given a fault at pc with register fp, return the faulting instance if there
-// is such a plausible instance, and otherwise null.
-
-Instance*
-LookupFaultingInstance(const ModuleSegment& codeSegment, void* pc, void* fp);
-
-// Return whether the given PC is in wasm code.
-
-bool
-InCompiledCode(void* pc);
-
 // Describes register state and associated code at a given call frame.
 
 struct UnwindState
 {
     Frame* fp;
     void* pc;
     const Code* code;
     const CodeRange* codeRange;
     UnwindState() : fp(nullptr), pc(nullptr), code(nullptr), codeRange(nullptr) {}
 };
 
-typedef JS::ProfilingFrameIterator::RegisterState RegisterState;
-
 // Ensures the register state at a call site is consistent: pc must be in the
 // code range of the code described by fp. This prevents issues when using
 // the values of pc/fp, especially at call sites boundaries, where the state
 // hasn't fully transitioned from the caller's to the callee's.
 //
 // unwoundCaller is set to true if we were in a transitional state and had to
 // rewind to the caller's frame instead of the current frame.
 //
--- a/js/src/wasm/WasmInstance.cpp
+++ b/js/src/wasm/WasmInstance.cpp
@@ -792,19 +792,17 @@ Instance::Instance(JSContext* cx,
 #ifdef DEBUG
     for (auto t : code_->tiers()) {
         MOZ_ASSERT(funcImports.length() == metadata(t).funcImports.length());
     }
 #endif
     MOZ_ASSERT(tables_.length() == metadata().tables.length());
 
     tlsData()->memoryBase = memory ? memory->buffer().dataPointerEither().unwrap() : nullptr;
-#ifndef WASM_HUGE_MEMORY
     tlsData()->boundsCheckLimit = memory ? memory->buffer().wasmBoundsCheckLimit() : 0;
-#endif
     tlsData()->instance = this;
     tlsData()->realm = realm_;
     tlsData()->cx = cx;
     tlsData()->resetInterrupt(cx);
     tlsData()->jumpTable = code_->tieringJumpTable();
     tlsData()->addressOfNeedsIncrementalBarrier =
         (uint8_t*)cx->compartment()->zone()->addressOfNeedsIncrementalBarrier();
 
@@ -982,17 +980,16 @@ Instance::~Instance()
 }
 
 size_t
 Instance::memoryMappedSize() const
 {
     return memory_->buffer().wasmMappedSize();
 }
 
-#ifdef JS_SIMULATOR
 bool
 Instance::memoryAccessInGuardRegion(uint8_t* addr, unsigned numBytes) const
 {
     MOZ_ASSERT(numBytes > 0);
 
     if (!metadata().usesMemory()) {
         return false;
     }
@@ -1000,17 +997,16 @@ Instance::memoryAccessInGuardRegion(uint
     uint8_t* base = memoryBase().unwrap(/* comparison */);
     if (addr < base) {
         return false;
     }
 
     size_t lastByteOffset = addr - base + (numBytes - 1);
     return lastByteOffset >= memory()->volatileMemoryLength() && lastByteOffset < memoryMappedSize();
 }
-#endif
 
 void
 Instance::tracePrivate(JSTracer* trc)
 {
     // This method is only called from WasmInstanceObject so the only reason why
     // TraceEdge is called is so that the pointer can be updated during a moving
     // GC. TraceWeakEdge may sound better, but it is less efficient given that
     // we know object_ is already marked.
@@ -1232,19 +1228,17 @@ Instance::ensureProfilingLabels(bool pro
 void
 Instance::onMovingGrowMemory(uint8_t* prevMemoryBase)
 {
     MOZ_ASSERT(!isAsmJS());
     MOZ_ASSERT(!memory_->isShared());
 
     ArrayBufferObject& buffer = memory_->buffer().as<ArrayBufferObject>();
     tlsData()->memoryBase = buffer.dataPointer();
-#ifndef WASM_HUGE_MEMORY
     tlsData()->boundsCheckLimit = buffer.wasmBoundsCheckLimit();
-#endif
 }
 
 void
 Instance::onMovingGrowTable()
 {
     MOZ_ASSERT(!isAsmJS());
     MOZ_ASSERT(tables_.length() == 1);
     TableTls& table = tableTls(metadata().tables[0]);
--- a/js/src/wasm/WasmInstance.h
+++ b/js/src/wasm/WasmInstance.h
@@ -100,19 +100,17 @@ class Instance
     const MetadataTier& metadata(Tier t) const { return code_->metadata(t); }
     const Metadata& metadata() const { return code_->metadata(); }
     bool isAsmJS() const { return metadata().isAsmJS(); }
     const SharedTableVector& tables() const { return tables_; }
     SharedMem<uint8_t*> memoryBase() const;
     WasmMemoryObject* memory() const;
     size_t memoryMappedSize() const;
     SharedArrayRawBuffer* sharedMemoryBuffer() const; // never null
-#ifdef JS_SIMULATOR
     bool memoryAccessInGuardRegion(uint8_t* addr, unsigned numBytes) const;
-#endif
     const StructTypeVector& structTypes() const { return code_->structTypes(); }
 
     static constexpr size_t offsetOfJSJitArgsRectifier() {
         return offsetof(Instance, jsJitArgsRectifier_);
     }
     static constexpr size_t offsetOfJSJitExceptionHandler() {
         return offsetof(Instance, jsJitExceptionHandler_);
     }
--- a/js/src/wasm/WasmIonCompile.cpp
+++ b/js/src/wasm/WasmIonCompile.cpp
@@ -591,24 +591,26 @@ class FunctionCompiler
         load = MWasmLoadTls::New(alloc(), tlsPointer_, offsetof(wasm::TlsData, memoryBase),
                                  MIRType::Pointer, aliases);
         curBlock_->add(load);
 #endif
         return load;
     }
 
     MWasmLoadTls* maybeLoadBoundsCheckLimit() {
-        MWasmLoadTls* load = nullptr;
-#ifndef WASM_HUGE_MEMORY
+#ifdef WASM_HUGE_MEMORY
+        if (!env_.isAsmJS()) {
+            return nullptr;
+        }
+#endif
         AliasSet aliases = env_.maxMemoryLength.isSome() ? AliasSet::None()
                                                          : AliasSet::Load(AliasSet::WasmHeapMeta);
-        load = MWasmLoadTls::New(alloc(), tlsPointer_, offsetof(wasm::TlsData, boundsCheckLimit),
-                                 MIRType::Int32, aliases);
+        auto load = MWasmLoadTls::New(alloc(), tlsPointer_, offsetof(wasm::TlsData, boundsCheckLimit),
+                                      MIRType::Int32, aliases);
         curBlock_->add(load);
-#endif
         return load;
     }
 
     // Only sets *mustAdd if it also returns true.
     bool needAlignmentCheck(MemoryAccessDesc* access, MDefinition* base, bool* mustAdd) {
         MOZ_ASSERT(!*mustAdd);
 
         // asm.js accesses are always aligned and need no checks.
--- a/js/src/wasm/WasmProcess.cpp
+++ b/js/src/wasm/WasmProcess.cpp
@@ -273,16 +273,28 @@ const Code*
 wasm::LookupCode(const void* pc, const CodeRange** codeRange /* = nullptr */)
 {
     const CodeSegment* found = LookupCodeSegment(pc, codeRange);
     MOZ_ASSERT_IF(!found && codeRange, !*codeRange);
     return found ? &found->code() : nullptr;
 }
 
 bool
+wasm::InCompiledCode(void* pc)
+{
+    if (LookupCodeSegment(pc)) {
+        return true;
+    }
+
+    const CodeRange* codeRange;
+    uint8_t* codeBase;
+    return LookupBuiltinThunk(pc, &codeRange, &codeBase);
+}
+
+bool
 wasm::Init()
 {
     MOZ_RELEASE_ASSERT(!sProcessCodeSegmentMap);
 
 #ifdef ENABLE_WASM_CRANELIFT
     cranelift_initialize();
 #endif
 
--- a/js/src/wasm/WasmProcess.h
+++ b/js/src/wasm/WasmProcess.h
@@ -33,16 +33,21 @@ class CodeSegment;
 // and thus are safe to use in a profiling context.
 
 const CodeSegment*
 LookupCodeSegment(const void* pc, const CodeRange** codeRange = nullptr);
 
 const Code*
 LookupCode(const void* pc, const CodeRange** codeRange = nullptr);
 
+// Return whether the given PC is in any type of wasm code (module or builtin).
+
+bool
+InCompiledCode(void* pc);
+
 // A bool member that can be used as a very fast lookup to know if there is any
 // code segment at all.
 
 extern mozilla::Atomic<bool> CodeExists;
 
 // These methods allow to (un)register CodeSegments so they can be looked up
 // via pc in the methods described above.
 
--- a/js/src/wasm/WasmSignalHandlers.cpp
+++ b/js/src/wasm/WasmSignalHandlers.cpp
@@ -14,27 +14,32 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 #include "wasm/WasmSignalHandlers.h"
 
 #include "mozilla/DebugOnly.h"
-#include "mozilla/PodOperations.h"
 #include "mozilla/ScopeExit.h"
 #include "mozilla/ThreadLocal.h"
 
-#include "jit/AtomicOperations.h"
-#include "jit/Disassembler.h"
 #include "vm/Runtime.h"
-#include "wasm/WasmBuiltins.h"
 #include "wasm/WasmInstance.h"
 
-#include "vm/ArrayBufferObject-inl.h"
+using namespace js;
+using namespace js::wasm;
+
+using mozilla::DebugOnly;
+
+// =============================================================================
+// This following pile of macros and includes defines the ToRegisterState() and
+// the ContextTo{PC,FP,SP,LR}() functions from the (highly) platform-specific
+// CONTEXT struct which is provided to the signal handler.
+// =============================================================================
 
 #if defined(XP_WIN)
 # include "util/Windows.h"
 #else
 # include <signal.h>
 # include <sys/mman.h>
 #endif
 
@@ -46,86 +51,35 @@
 # if defined(__DragonFly__)
 #  include <machine/npx.h> // for union savefpu
 # elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
        defined(__NetBSD__) || defined(__OpenBSD__)
 #  include <machine/fpu.h> // for struct savefpu/fxsave64
 # endif
 #endif
 
-using namespace js;
-using namespace js::jit;
-using namespace js::wasm;
-
-using JS::GenericNaN;
-using mozilla::DebugOnly;
-
-// Crashing inside the signal handler can cause the handler to be recursively
-// invoked, eventually blowing the stack without actually showing a crash
-// report dialog via Breakpad. To guard against this we watch for such
-// recursion and fall through to the next handler immediately rather than
-// trying to handle it.
-
-static MOZ_THREAD_LOCAL(bool) sAlreadyInSignalHandler;
-
-struct AutoSignalHandler
-{
-    explicit AutoSignalHandler()
-    {
-        MOZ_ASSERT(!sAlreadyInSignalHandler.get());
-        sAlreadyInSignalHandler.set(true);
-    }
-
-    ~AutoSignalHandler() {
-        MOZ_ASSERT(sAlreadyInSignalHandler.get());
-        sAlreadyInSignalHandler.set(false);
-    }
-};
-
 #if defined(XP_WIN)
-# define XMM_sig(p,i) ((p)->Xmm##i)
 # define EIP_sig(p) ((p)->Eip)
 # define EBP_sig(p) ((p)->Ebp)
 # define ESP_sig(p) ((p)->Esp)
 # define RIP_sig(p) ((p)->Rip)
-# define RAX_sig(p) ((p)->Rax)
-# define RCX_sig(p) ((p)->Rcx)
-# define RDX_sig(p) ((p)->Rdx)
-# define RBX_sig(p) ((p)->Rbx)
 # define RSP_sig(p) ((p)->Rsp)
 # define RBP_sig(p) ((p)->Rbp)
-# define RSI_sig(p) ((p)->Rsi)
-# define RDI_sig(p) ((p)->Rdi)
-# define R8_sig(p) ((p)->R8)
-# define R9_sig(p) ((p)->R9)
-# define R10_sig(p) ((p)->R10)
 # define R11_sig(p) ((p)->R11)
-# define R12_sig(p) ((p)->R12)
 # define R13_sig(p) ((p)->R13)
 # define R14_sig(p) ((p)->R14)
 # define R15_sig(p) ((p)->R15)
 #elif defined(__OpenBSD__)
-# define XMM_sig(p,i) ((p)->sc_fpstate->fx_xmm[i])
 # define EIP_sig(p) ((p)->sc_eip)
 # define EBP_sig(p) ((p)->sc_ebp)
 # define ESP_sig(p) ((p)->sc_esp)
 # define RIP_sig(p) ((p)->sc_rip)
-# define RAX_sig(p) ((p)->sc_rax)
-# define RCX_sig(p) ((p)->sc_rcx)
-# define RDX_sig(p) ((p)->sc_rdx)
-# define RBX_sig(p) ((p)->sc_rbx)
 # define RSP_sig(p) ((p)->sc_rsp)
 # define RBP_sig(p) ((p)->sc_rbp)
-# define RSI_sig(p) ((p)->sc_rsi)
-# define RDI_sig(p) ((p)->sc_rdi)
-# define R8_sig(p) ((p)->sc_r8)
-# define R9_sig(p) ((p)->sc_r9)
-# define R10_sig(p) ((p)->sc_r10)
 # define R11_sig(p) ((p)->sc_r11)
-# define R12_sig(p) ((p)->sc_r12)
 # if defined(__arm__)
 #  define R13_sig(p) ((p)->sc_usr_sp)
 #  define R14_sig(p) ((p)->sc_usr_lr)
 #  define R15_sig(p) ((p)->sc_pc)
 # else
 #  define R13_sig(p) ((p)->sc_r13)
 #  define R14_sig(p) ((p)->sc_r14)
 #  define R15_sig(p) ((p)->sc_r15)
@@ -137,39 +91,27 @@ struct AutoSignalHandler
 #  define R31_sig(p) ((p)->sc_sp)
 # endif
 # if defined(__mips__)
 #  define EPC_sig(p) ((p)->sc_pc)
 #  define RFP_sig(p) ((p)->sc_regs[30])
 # endif
 #elif defined(__linux__) || defined(__sun)
 # if defined(__linux__)
-#  define XMM_sig(p,i) ((p)->uc_mcontext.fpregs->_xmm[i])
 #  define EIP_sig(p) ((p)->uc_mcontext.gregs[REG_EIP])
 #  define EBP_sig(p) ((p)->uc_mcontext.gregs[REG_EBP])
 #  define ESP_sig(p) ((p)->uc_mcontext.gregs[REG_ESP])
 # else
-#  define XMM_sig(p,i) ((p)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.xmm[i])
 #  define EIP_sig(p) ((p)->uc_mcontext.gregs[REG_PC])
 #  define EBP_sig(p) ((p)->uc_mcontext.gregs[REG_EBP])
 #  define ESP_sig(p) ((p)->uc_mcontext.gregs[REG_ESP])
 # endif
 # define RIP_sig(p) ((p)->uc_mcontext.gregs[REG_RIP])
-# define RAX_sig(p) ((p)->uc_mcontext.gregs[REG_RAX])
-# define RCX_sig(p) ((p)->uc_mcontext.gregs[REG_RCX])
-# define RDX_sig(p) ((p)->uc_mcontext.gregs[REG_RDX])
-# define RBX_sig(p) ((p)->uc_mcontext.gregs[REG_RBX])
 # define RSP_sig(p) ((p)->uc_mcontext.gregs[REG_RSP])
 # define RBP_sig(p) ((p)->uc_mcontext.gregs[REG_RBP])
-# define RSI_sig(p) ((p)->uc_mcontext.gregs[REG_RSI])
-# define RDI_sig(p) ((p)->uc_mcontext.gregs[REG_RDI])
-# define R8_sig(p) ((p)->uc_mcontext.gregs[REG_R8])
-# define R9_sig(p) ((p)->uc_mcontext.gregs[REG_R9])
-# define R10_sig(p) ((p)->uc_mcontext.gregs[REG_R10])
-# define R12_sig(p) ((p)->uc_mcontext.gregs[REG_R12])
 # if defined(__linux__) && defined(__arm__)
 #  define R11_sig(p) ((p)->uc_mcontext.arm_fp)
 #  define R13_sig(p) ((p)->uc_mcontext.arm_sp)
 #  define R14_sig(p) ((p)->uc_mcontext.arm_lr)
 #  define R15_sig(p) ((p)->uc_mcontext.arm_pc)
 # else
 #  define R11_sig(p) ((p)->uc_mcontext.gregs[REG_R11])
 #  define R13_sig(p) ((p)->uc_mcontext.gregs[REG_R13])
@@ -190,75 +132,47 @@ struct AutoSignalHandler
 # endif
 # if defined(__linux__) && (defined(__sparc__) && defined(__arch64__))
 #  define PC_sig(p) ((p)->uc_mcontext.mc_gregs[MC_PC])
 #  define FP_sig(p) ((p)->uc_mcontext.mc_fp)
 #  define SP_sig(p) ((p)->uc_mcontext.mc_i7)
 # endif
 # if defined(__linux__) && \
      (defined(__ppc64__) ||  defined (__PPC64__) || defined(__ppc64le__) || defined (__PPC64LE__))
-// powerpc stack frame pointer (SFP or SP or FP)
 #  define R01_sig(p) ((p)->uc_mcontext.gp_regs[1])
-// powerpc next instruction pointer (NIP or PC)
 #  define R32_sig(p) ((p)->uc_mcontext.gp_regs[32])
 # endif
 #elif defined(__NetBSD__)
-# define XMM_sig(p,i) (((struct fxsave64*)(p)->uc_mcontext.__fpregs)->fx_xmm[i])
 # define EIP_sig(p) ((p)->uc_mcontext.__gregs[_REG_EIP])
 # define EBP_sig(p) ((p)->uc_mcontext.__gregs[_REG_EBP])
 # define ESP_sig(p) ((p)->uc_mcontext.__gregs[_REG_ESP])
 # define RIP_sig(p) ((p)->uc_mcontext.__gregs[_REG_RIP])
-# define RAX_sig(p) ((p)->uc_mcontext.__gregs[_REG_RAX])
-# define RCX_sig(p) ((p)->uc_mcontext.__gregs[_REG_RCX])
-# define RDX_sig(p) ((p)->uc_mcontext.__gregs[_REG_RDX])
-# define RBX_sig(p) ((p)->uc_mcontext.__gregs[_REG_RBX])
 # define RSP_sig(p) ((p)->uc_mcontext.__gregs[_REG_RSP])
 # define RBP_sig(p) ((p)->uc_mcontext.__gregs[_REG_RBP])
-# define RSI_sig(p) ((p)->uc_mcontext.__gregs[_REG_RSI])
-# define RDI_sig(p) ((p)->uc_mcontext.__gregs[_REG_RDI])
-# define R8_sig(p) ((p)->uc_mcontext.__gregs[_REG_R8])
-# define R9_sig(p) ((p)->uc_mcontext.__gregs[_REG_R9])
-# define R10_sig(p) ((p)->uc_mcontext.__gregs[_REG_R10])
 # define R11_sig(p) ((p)->uc_mcontext.__gregs[_REG_R11])
-# define R12_sig(p) ((p)->uc_mcontext.__gregs[_REG_R12])
 # define R13_sig(p) ((p)->uc_mcontext.__gregs[_REG_R13])
 # define R14_sig(p) ((p)->uc_mcontext.__gregs[_REG_R14])
 # define R15_sig(p) ((p)->uc_mcontext.__gregs[_REG_R15])
 # if defined(__aarch64__)
 #  define EPC_sig(p) ((p)->uc_mcontext.__gregs[_REG_PC])
 #  define RFP_sig(p) ((p)->uc_mcontext.__gregs[_REG_X29])
 #  define RLR_sig(p) ((p)->uc_mcontext.__gregs[_REG_X30])
 #  define R31_sig(p) ((p)->uc_mcontext.__gregs[_REG_SP])
 # endif
 # if defined(__mips__)
 #  define EPC_sig(p) ((p)->uc_mcontext.__gregs[_REG_EPC])
 #  define RFP_sig(p) ((p)->uc_mcontext.__gregs[_REG_S8])
 # endif
 #elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-# if defined(__DragonFly__)
-#  define XMM_sig(p,i) (((union savefpu*)(p)->uc_mcontext.mc_fpregs)->sv_xmm.sv_xmm[i])
-# else
-#  define XMM_sig(p,i) (((struct savefpu*)(p)->uc_mcontext.mc_fpstate)->sv_xmm[i])
-# endif
 # define EIP_sig(p) ((p)->uc_mcontext.mc_eip)
 # define EBP_sig(p) ((p)->uc_mcontext.mc_ebp)
 # define ESP_sig(p) ((p)->uc_mcontext.mc_esp)
 # define RIP_sig(p) ((p)->uc_mcontext.mc_rip)
-# define RAX_sig(p) ((p)->uc_mcontext.mc_rax)
-# define RCX_sig(p) ((p)->uc_mcontext.mc_rcx)
-# define RDX_sig(p) ((p)->uc_mcontext.mc_rdx)
-# define RBX_sig(p) ((p)->uc_mcontext.mc_rbx)
 # define RSP_sig(p) ((p)->uc_mcontext.mc_rsp)
 # define RBP_sig(p) ((p)->uc_mcontext.mc_rbp)
-# define RSI_sig(p) ((p)->uc_mcontext.mc_rsi)
-# define RDI_sig(p) ((p)->uc_mcontext.mc_rdi)
-# define R8_sig(p) ((p)->uc_mcontext.mc_r8)
-# define R9_sig(p) ((p)->uc_mcontext.mc_r9)
-# define R10_sig(p) ((p)->uc_mcontext.mc_r10)
-# define R12_sig(p) ((p)->uc_mcontext.mc_r12)
 # if defined(__FreeBSD__) && defined(__arm__)
 #  define R11_sig(p) ((p)->uc_mcontext.__gregs[_REG_R11])
 #  define R13_sig(p) ((p)->uc_mcontext.__gregs[_REG_R13])
 #  define R14_sig(p) ((p)->uc_mcontext.__gregs[_REG_R14])
 #  define R15_sig(p) ((p)->uc_mcontext.__gregs[_REG_R15])
 # else
 #  define R11_sig(p) ((p)->uc_mcontext.mc_r11)
 #  define R13_sig(p) ((p)->uc_mcontext.mc_r13)
@@ -477,508 +391,105 @@ ToRegisterState(CONTEXT* context)
 #if defined(__arm__) || defined(__aarch64__) || defined(__mips__)
     state.lr = ContextToLR(context);
 #else
     state.lr = (void*)UINTPTR_MAX;
 #endif
     return state;
 }
 
-#if defined(WASM_HUGE_MEMORY)
-MOZ_COLD static void
-SetFPRegToNaN(size_t size, void* fp_reg)
-{
-    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
-    memset(fp_reg, 0, Simd128DataSize);
-    switch (size) {
-      case 4: *static_cast<float*>(fp_reg) = GenericNaN(); break;
-      case 8: *static_cast<double*>(fp_reg) = GenericNaN(); break;
-      default:
-        // All SIMD accesses throw on OOB.
-        MOZ_CRASH("unexpected size in SetFPRegToNaN");
-    }
-}
-
-MOZ_COLD static void
-SetGPRegToZero(void* gp_reg)
-{
-    memset(gp_reg, 0, sizeof(intptr_t));
-}
-
-MOZ_COLD static void
-SetFPRegToLoadedValue(SharedMem<void*> addr, size_t size, void* fp_reg)
-{
-    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
-    memset(fp_reg, 0, Simd128DataSize);
-    AtomicOperations::memcpySafeWhenRacy(fp_reg, addr, size);
-}
-
-MOZ_COLD static void
-SetGPRegToLoadedValue(SharedMem<void*> addr, size_t size, void* gp_reg)
-{
-    MOZ_RELEASE_ASSERT(size <= sizeof(void*));
-    memset(gp_reg, 0, sizeof(void*));
-    AtomicOperations::memcpySafeWhenRacy(gp_reg, addr, size);
-}
-
-MOZ_COLD static void
-SetGPRegToLoadedValueSext32(SharedMem<void*> addr, size_t size, void* gp_reg)
-{
-    MOZ_RELEASE_ASSERT(size <= sizeof(int32_t));
-    int8_t msb = AtomicOperations::loadSafeWhenRacy(addr.cast<uint8_t*>() + (size - 1));
-    memset(gp_reg, 0, sizeof(void*));
-    memset(gp_reg, msb >> 7, sizeof(int32_t));
-    AtomicOperations::memcpySafeWhenRacy(gp_reg, addr, size);
-}
+// =============================================================================
+// All signals/exceptions funnel down to this one trap-handling function which
+// tests whether the pc is in a wasm module and, if so, whether there is
+// actually a trap expected at this pc. These tests both avoid real bugs being
+// silently converted to wasm traps and provides the trapping wasm bytecode
+// offset we need to report in the error.
+//
+// Crashing inside wasm trap handling (due to a bug in trap handling or exposed
+// during trap handling) must be reported like a normal crash, not cause the
+// crash report to be lost. On Windows and non-Mach Unix, a crash during the
+// handler reenters the handler, possibly repeatedly until exhausting the stack,
+// and so we prevent recursion with the thread-local sAlreadyHandlingTrap. On
+// Mach, the wasm exception handler has its own thread and is installed only on
+// the thread-level debugging ports of JSRuntime threads, so a crash on
+// exception handler thread will not recurse; it will bubble up to the
+// process-level debugging ports (where Breakpad is installed).
+// =============================================================================
 
-MOZ_COLD static void
-StoreValueFromFPReg(SharedMem<void*> addr, size_t size, const void* fp_reg)
-{
-    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
-    AtomicOperations::memcpySafeWhenRacy(addr, const_cast<void*>(fp_reg), size);
-}
-
-MOZ_COLD static void
-StoreValueFromGPReg(SharedMem<void*> addr, size_t size, const void* gp_reg)
-{
-    MOZ_RELEASE_ASSERT(size <= sizeof(void*));
-    AtomicOperations::memcpySafeWhenRacy(addr, const_cast<void*>(gp_reg), size);
-}
-
-MOZ_COLD static void
-StoreValueFromGPImm(SharedMem<void*> addr, size_t size, int32_t imm)
-{
-    MOZ_RELEASE_ASSERT(size <= sizeof(imm));
-    AtomicOperations::memcpySafeWhenRacy(addr, static_cast<void*>(&imm), size);
-}
+static MOZ_THREAD_LOCAL(bool) sAlreadyHandlingTrap;
 
-#if defined(JS_CODEGEN_X64)
-# if !defined(XP_DARWIN)
-MOZ_COLD static void*
-AddressOfFPRegisterSlot(CONTEXT* context, FloatRegisters::Encoding encoding)
-{
-    switch (encoding) {
-      case X86Encoding::xmm0:  return &XMM_sig(context, 0);
-      case X86Encoding::xmm1:  return &XMM_sig(context, 1);
-      case X86Encoding::xmm2:  return &XMM_sig(context, 2);
-      case X86Encoding::xmm3:  return &XMM_sig(context, 3);
-      case X86Encoding::xmm4:  return &XMM_sig(context, 4);
-      case X86Encoding::xmm5:  return &XMM_sig(context, 5);
-      case X86Encoding::xmm6:  return &XMM_sig(context, 6);
-      case X86Encoding::xmm7:  return &XMM_sig(context, 7);
-      case X86Encoding::xmm8:  return &XMM_sig(context, 8);
-      case X86Encoding::xmm9:  return &XMM_sig(context, 9);
-      case X86Encoding::xmm10: return &XMM_sig(context, 10);
-      case X86Encoding::xmm11: return &XMM_sig(context, 11);
-      case X86Encoding::xmm12: return &XMM_sig(context, 12);
-      case X86Encoding::xmm13: return &XMM_sig(context, 13);
-      case X86Encoding::xmm14: return &XMM_sig(context, 14);
-      case X86Encoding::xmm15: return &XMM_sig(context, 15);
-      default: break;
-    }
-    MOZ_CRASH();
-}
-
-MOZ_COLD static void*
-AddressOfGPRegisterSlot(CONTEXT* context, Registers::Code code)
+struct AutoHandlingTrap
 {
-    switch (code) {
-      case X86Encoding::rax: return &RAX_sig(context);
-      case X86Encoding::rcx: return &RCX_sig(context);
-      case X86Encoding::rdx: return &RDX_sig(context);
-      case X86Encoding::rbx: return &RBX_sig(context);
-      case X86Encoding::rsp: return &RSP_sig(context);
-      case X86Encoding::rbp: return &RBP_sig(context);
-      case X86Encoding::rsi: return &RSI_sig(context);
-      case X86Encoding::rdi: return &RDI_sig(context);
-      case X86Encoding::r8:  return &R8_sig(context);
-      case X86Encoding::r9:  return &R9_sig(context);
-      case X86Encoding::r10: return &R10_sig(context);
-      case X86Encoding::r11: return &R11_sig(context);
-      case X86Encoding::r12: return &R12_sig(context);
-      case X86Encoding::r13: return &R13_sig(context);
-      case X86Encoding::r14: return &R14_sig(context);
-      case X86Encoding::r15: return &R15_sig(context);
-      default: break;
-    }
-    MOZ_CRASH();
-}
-# else
-MOZ_COLD static void*
-AddressOfFPRegisterSlot(CONTEXT* context, FloatRegisters::Encoding encoding)
-{
-    switch (encoding) {
-      case X86Encoding::xmm0:  return &context->float_.__fpu_xmm0;
-      case X86Encoding::xmm1:  return &context->float_.__fpu_xmm1;
-      case X86Encoding::xmm2:  return &context->float_.__fpu_xmm2;
-      case X86Encoding::xmm3:  return &context->float_.__fpu_xmm3;
-      case X86Encoding::xmm4:  return &context->float_.__fpu_xmm4;
-      case X86Encoding::xmm5:  return &context->float_.__fpu_xmm5;
-      case X86Encoding::xmm6:  return &context->float_.__fpu_xmm6;
-      case X86Encoding::xmm7:  return &context->float_.__fpu_xmm7;
-      case X86Encoding::xmm8:  return &context->float_.__fpu_xmm8;
-      case X86Encoding::xmm9:  return &context->float_.__fpu_xmm9;
-      case X86Encoding::xmm10: return &context->float_.__fpu_xmm10;
-      case X86Encoding::xmm11: return &context->float_.__fpu_xmm11;
-      case X86Encoding::xmm12: return &context->float_.__fpu_xmm12;
-      case X86Encoding::xmm13: return &context->float_.__fpu_xmm13;
-      case X86Encoding::xmm14: return &context->float_.__fpu_xmm14;
-      case X86Encoding::xmm15: return &context->float_.__fpu_xmm15;
-      default: break;
-    }
-    MOZ_CRASH();
-}
-
-MOZ_COLD static void*
-AddressOfGPRegisterSlot(CONTEXT* context, Registers::Code code)
-{
-    switch (code) {
-      case X86Encoding::rax: return &context->thread.__rax;
-      case X86Encoding::rcx: return &context->thread.__rcx;
-      case X86Encoding::rdx: return &context->thread.__rdx;
-      case X86Encoding::rbx: return &context->thread.__rbx;
-      case X86Encoding::rsp: return &context->thread.__rsp;
-      case X86Encoding::rbp: return &context->thread.__rbp;
-      case X86Encoding::rsi: return &context->thread.__rsi;
-      case X86Encoding::rdi: return &context->thread.__rdi;
-      case X86Encoding::r8:  return &context->thread.__r8;
-      case X86Encoding::r9:  return &context->thread.__r9;
-      case X86Encoding::r10: return &context->thread.__r10;
-      case X86Encoding::r11: return &context->thread.__r11;
-      case X86Encoding::r12: return &context->thread.__r12;
-      case X86Encoding::r13: return &context->thread.__r13;
-      case X86Encoding::r14: return &context->thread.__r14;
-      case X86Encoding::r15: return &context->thread.__r15;
-      default: break;
-    }
-    MOZ_CRASH();
-}
-# endif  // !XP_DARWIN
-#elif defined(JS_CODEGEN_ARM64)
-MOZ_COLD static void*
-AddressOfFPRegisterSlot(CONTEXT* context, FloatRegisters::Encoding encoding)
-{
-    MOZ_CRASH("NYI - asm.js not supported yet on this platform");
-}
-
-MOZ_COLD static void*
-AddressOfGPRegisterSlot(CONTEXT* context, Registers::Code code)
-{
-    MOZ_CRASH("NYI - asm.js not supported yet on this platform");
-}
-#endif
-
-MOZ_COLD static void
-SetRegisterToCoercedUndefined(CONTEXT* context, size_t size,
-                              const Disassembler::OtherOperand& value)
-{
-    if (value.kind() == Disassembler::OtherOperand::FPR) {
-        SetFPRegToNaN(size, AddressOfFPRegisterSlot(context, value.fpr()));
-    } else {
-        SetGPRegToZero(AddressOfGPRegisterSlot(context, value.gpr()));
-    }
-}
-
-MOZ_COLD static void
-SetRegisterToLoadedValue(CONTEXT* context, SharedMem<void*> addr, size_t size,
-                         const Disassembler::OtherOperand& value)
-{
-    if (value.kind() == Disassembler::OtherOperand::FPR) {
-        SetFPRegToLoadedValue(addr, size, AddressOfFPRegisterSlot(context, value.fpr()));
-    } else {
-        SetGPRegToLoadedValue(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
-    }
-}
-
-MOZ_COLD static void
-SetRegisterToLoadedValueSext32(CONTEXT* context, SharedMem<void*> addr, size_t size,
-                               const Disassembler::OtherOperand& value)
-{
-    SetGPRegToLoadedValueSext32(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
-}
-
-MOZ_COLD static void
-StoreValueFromRegister(CONTEXT* context, SharedMem<void*> addr, size_t size,
-                       const Disassembler::OtherOperand& value)
-{
-    if (value.kind() == Disassembler::OtherOperand::FPR) {
-        StoreValueFromFPReg(addr, size, AddressOfFPRegisterSlot(context, value.fpr()));
-    } else if (value.kind() == Disassembler::OtherOperand::GPR) {
-        StoreValueFromGPReg(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
-    } else {
-        StoreValueFromGPImm(addr, size, value.imm());
-    }
-}
-
-MOZ_COLD static uint8_t*
-ComputeAccessAddress(CONTEXT* context, const Disassembler::ComplexAddress& address)
-{
-    MOZ_RELEASE_ASSERT(!address.isPCRelative(), "PC-relative addresses not supported yet");
-
-    uintptr_t result = address.disp();
-
-    if (address.hasBase()) {
-        uintptr_t base;
-        StoreValueFromGPReg(SharedMem<void*>::unshared(&base), sizeof(uintptr_t),
-                            AddressOfGPRegisterSlot(context, address.base()));
-        result += base;
+    AutoHandlingTrap() {
+        MOZ_ASSERT(!sAlreadyHandlingTrap.get());
+        sAlreadyHandlingTrap.set(true);
     }
 
-    if (address.hasIndex()) {
-        uintptr_t index;
-        StoreValueFromGPReg(SharedMem<void*>::unshared(&index), sizeof(uintptr_t),
-                            AddressOfGPRegisterSlot(context, address.index()));
-        MOZ_ASSERT(address.scale() < 32, "address shift overflow");
-        result += index * (uintptr_t(1) << address.scale());
-    }
-
-    return reinterpret_cast<uint8_t*>(result);
-}
-#endif // WASM_HUGE_MEMORY
-
-MOZ_COLD static MOZ_MUST_USE bool
-HandleOutOfBounds(CONTEXT* context, uint8_t* pc, uint8_t* faultingAddress,
-                  const ModuleSegment* segment, const Instance& instance, JitActivation* activation,
-                  uint8_t** ppc)
-{
-    MOZ_RELEASE_ASSERT(segment->code().containsCodePC(pc));
-
-    Trap trap;
-    BytecodeOffset bytecode;
-    MOZ_ALWAYS_TRUE(segment->code().lookupTrap(pc, &trap, &bytecode));
-
-    if (trap != Trap::OutOfBounds) {
-        return false;
-    }
-
-    if (bytecode.isValid()) {
-        activation->startWasmTrap(Trap::OutOfBounds, bytecode.offset(), ToRegisterState(context));
-        *ppc = segment->trapCode();
-        return true;
+    ~AutoHandlingTrap() {
+        MOZ_ASSERT(sAlreadyHandlingTrap.get());
+        sAlreadyHandlingTrap.set(false);
     }
-
-#ifndef WASM_HUGE_MEMORY
-    return false;
-#else
-    // On WASM_HUGE_MEMORY platforms, asm.js code may fault. asm.js does not
-    // trap on fault and so has no trap out-of-line path. Instead, stores are
-    // silently ignored (by advancing the pc past the store and resuming) and
-    // loads silently succeed with a JS-semantics-determined value.
-    MOZ_RELEASE_ASSERT(instance.isAsmJS());
-
-    // Asm.JS memory cannot grow or shrink - only wasm can grow or shrink it,
-    // and asm.js is not allowed to use wasm memory.  On this Asm.JS-only path
-    // we therefore need not worry about memory growing or shrinking while the
-    // signal handler is executing, and we can read the length without locking
-    // the memory.  Indeed, the buffer's byteLength always holds the correct
-    // value.
-    uint32_t memoryLength = instance.memory()->buffer().byteLength();
-
-    // Disassemble the instruction which caused the trap so that we can extract
-    // information about it and decide what to do.
-    Disassembler::HeapAccess access;
-    uint8_t* end = Disassembler::DisassembleHeapAccess(pc, &access);
-    const Disassembler::ComplexAddress& address = access.address();
-    MOZ_RELEASE_ASSERT(end > pc);
-    MOZ_RELEASE_ASSERT(segment->containsCodePC(end));
-
-    // Check x64 asm.js heap access invariants.
-    MOZ_RELEASE_ASSERT(address.disp() >= 0);
-    MOZ_RELEASE_ASSERT(address.base() == HeapReg.code());
-    MOZ_RELEASE_ASSERT(!address.hasIndex() || address.index() != HeapReg.code());
-    MOZ_RELEASE_ASSERT(address.scale() == 0);
-    if (address.hasBase()) {
-        uintptr_t base;
-        StoreValueFromGPReg(SharedMem<void*>::unshared(&base), sizeof(uintptr_t),
-                            AddressOfGPRegisterSlot(context, address.base()));
-        MOZ_RELEASE_ASSERT(reinterpret_cast<uint8_t*>(base) == instance.memoryBase());
-    }
-    if (address.hasIndex()) {
-        uintptr_t index;
-        StoreValueFromGPReg(SharedMem<void*>::unshared(&index), sizeof(uintptr_t),
-                            AddressOfGPRegisterSlot(context, address.index()));
-        MOZ_RELEASE_ASSERT(uint32_t(index) == index);
-    }
-
-    // Determine the actual effective address of the faulting access. We can't
-    // rely on the faultingAddress given to us by the OS, because we need the
-    // address of the start of the access, and the OS may sometimes give us an
-    // address somewhere in the middle of the heap access.
-    uint8_t* accessAddress = ComputeAccessAddress(context, address);
-    MOZ_RELEASE_ASSERT(size_t(faultingAddress - accessAddress) < access.size(),
-                       "Given faulting address does not appear to be within computed "
-                       "faulting address range");
-    MOZ_RELEASE_ASSERT(accessAddress >= instance.memoryBase(),
-                       "Access begins outside the asm.js heap");
-    MOZ_RELEASE_ASSERT(accessAddress + access.size() <= instance.memoryBase() +
-                       instance.memoryMappedSize(),
-                       "Access extends beyond the asm.js heap guard region");
-    MOZ_RELEASE_ASSERT(accessAddress + access.size() > instance.memoryBase() +
-                       memoryLength,
-                       "Computed access address is not actually out of bounds");
+};
 
-    // The basic sandbox model is that all heap accesses are a heap base
-    // register plus an index, and the index is always computed with 32-bit
-    // operations, so we know it can only be 4 GiB off of the heap base.
-    //
-    // However, we wish to support the optimization of folding immediates
-    // and scaled indices into addresses, and any address arithmetic we fold
-    // gets done at full pointer width, so it doesn't get properly wrapped.
-    // We support this by extending HugeMappedSize to the greatest size that
-    // could be reached by such an unwrapped address, and then when we arrive
-    // here in the signal handler for such an access, we compute the fully
-    // wrapped address, and perform the load or store on it.
-    //
-    // Taking a signal is really slow, but in theory programs really shouldn't
-    // be hitting this anyway.
-    intptr_t unwrappedOffset = accessAddress - instance.memoryBase().unwrap(/* for value */);
-    uint32_t wrappedOffset = uint32_t(unwrappedOffset);
-    size_t size = access.size();
-    MOZ_RELEASE_ASSERT(wrappedOffset + size > wrappedOffset);
-    bool inBounds = wrappedOffset + size < memoryLength;
+static MOZ_MUST_USE bool
+HandleTrap(CONTEXT* context, JSContext* cx)
+{
+    MOZ_ASSERT(sAlreadyHandlingTrap.get());
 
-    if (inBounds) {
-        // We now know that this is an access that is actually in bounds when
-        // properly wrapped. Complete the load or store with the wrapped
-        // address.
-        SharedMem<uint8_t*> wrappedAddress = instance.memoryBase() + wrappedOffset;
-        MOZ_RELEASE_ASSERT(wrappedAddress >= instance.memoryBase());
-        MOZ_RELEASE_ASSERT(wrappedAddress + size > wrappedAddress);
-        MOZ_RELEASE_ASSERT(wrappedAddress + size <= instance.memoryBase() + memoryLength);
-        switch (access.kind()) {
-          case Disassembler::HeapAccess::Load:
-            SetRegisterToLoadedValue(context, wrappedAddress.cast<void*>(), size, access.otherOperand());
-            break;
-          case Disassembler::HeapAccess::LoadSext32:
-            SetRegisterToLoadedValueSext32(context, wrappedAddress.cast<void*>(), size, access.otherOperand());
-            break;
-          case Disassembler::HeapAccess::Store:
-            StoreValueFromRegister(context, wrappedAddress.cast<void*>(), size, access.otherOperand());
-            break;
-          case Disassembler::HeapAccess::LoadSext64:
-            MOZ_CRASH("no int64 accesses in asm.js");
-          case Disassembler::HeapAccess::Unknown:
-            MOZ_CRASH("Failed to disassemble instruction");
-        }
-    } else {
-        // We now know that this is an out-of-bounds access made by an asm.js
-        // load/store that we should handle.
-        switch (access.kind()) {
-          case Disassembler::HeapAccess::Load:
-          case Disassembler::HeapAccess::LoadSext32:
-            // Assign the JS-defined result value to the destination register
-            // (ToInt32(undefined) or ToNumber(undefined), determined by the
-            // type of the destination register). Very conveniently, we can
-            // infer the type from the register class, so the only types using
-            // FP registers are float32 and double.
-            SetRegisterToCoercedUndefined(context, access.size(), access.otherOperand());
-            break;
-          case Disassembler::HeapAccess::Store:
-            // Do nothing.
-            break;
-          case Disassembler::HeapAccess::LoadSext64:
-            MOZ_CRASH("no int64 accesses in asm.js");
-          case Disassembler::HeapAccess::Unknown:
-            MOZ_CRASH("Failed to disassemble instruction");
-        }
-    }
-
-    *ppc = end;
-    return true;
-#endif // WASM_HUGE_MEMORY
-}
-
-MOZ_COLD static bool
-IsHeapAccessAddress(const Instance &instance, uint8_t* faultingAddress)
-{
-    size_t accessLimit = instance.memoryMappedSize();
-
-    return instance.metadata().usesMemory() &&
-           faultingAddress >= instance.memoryBase() &&
-           faultingAddress < instance.memoryBase() + accessLimit;
-}
-
-#if defined(XP_WIN)
-
-static bool
-HandleFault(PEXCEPTION_POINTERS exception)
-{
-    EXCEPTION_RECORD* record = exception->ExceptionRecord;
-    CONTEXT* context = exception->ContextRecord;
-
-    if (record->ExceptionCode != EXCEPTION_ACCESS_VIOLATION &&
-        record->ExceptionCode != EXCEPTION_ILLEGAL_INSTRUCTION)
-    {
-        return false;
-    }
-
-    uint8_t** ppc = ContextToPC(context);
-    uint8_t* pc = *ppc;
-
+    uint8_t* pc = *ContextToPC(context);
     const CodeSegment* codeSegment = LookupCodeSegment(pc);
     if (!codeSegment || !codeSegment->isModule()) {
         return false;
     }
 
-    const ModuleSegment* moduleSegment = codeSegment->asModule();
-
-    JitActivation* activation = TlsContext.get()->activation()->asJit();
-    MOZ_ASSERT(activation);
-
-    const Instance* instance = LookupFaultingInstance(*moduleSegment, pc, ContextToFP(context));
-    if (!instance) {
-        return false;
-    }
+    const ModuleSegment& segment = *codeSegment->asModule();
 
-    if (record->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
-        Trap trap;
-        BytecodeOffset bytecode;
-        if (!moduleSegment->code().lookupTrap(pc, &trap, &bytecode)) {
-            return false;
-        }
-
-        activation->startWasmTrap(trap, bytecode.offset(), ToRegisterState(context));
-        *ppc = moduleSegment->trapCode();
-        return true;
-    }
-
-    MOZ_RELEASE_ASSERT(&instance->code() == &moduleSegment->code());
-
-    if (record->NumberParameters < 2) {
+    Trap trap;
+    BytecodeOffset bytecode;
+    if (!segment.code().lookupTrap(pc, &trap, &bytecode)) {
         return false;
     }
 
-    uint8_t* faultingAddress = reinterpret_cast<uint8_t*>(record->ExceptionInformation[1]);
+    // We have a safe, expected wasm trap. Call startWasmTrap() to store enough
+    // register state at the point of the trap to allow stack unwinding or
+    // resumption, both of which will call finishWasmTrap().
+    jit::JitActivation* activation = cx->activation()->asJit();
+    activation->startWasmTrap(trap, bytecode.offset(), ToRegisterState(context));
+    *ContextToPC(context) = segment.trapCode();
+    return true;
+}
 
-    // This check isn't necessary, but, since we can, check anyway to make
-    // sure we aren't covering up a real bug.
-    if (!IsHeapAccessAddress(*instance, faultingAddress)) {
-        return false;
-    }
+// =============================================================================
+// The following platform specific signal/exception handlers are installed by
+// wasm::EnsureSignalHandlers() and funnel all potential wasm traps into
+// HandleTrap() above.
+// =============================================================================
 
-    MOZ_ASSERT(activation->compartment() == instance->realm()->compartment());
-
-    return HandleOutOfBounds(context, pc, faultingAddress, moduleSegment, *instance, activation, ppc);
-}
+#if defined(XP_WIN)
 
 static LONG WINAPI
-WasmFaultHandler(LPEXCEPTION_POINTERS exception)
+WasmTrapHandler(LPEXCEPTION_POINTERS exception)
 {
-    // Before anything else, prevent handler recursion.
-    if (sAlreadyInSignalHandler.get()) {
+    if (sAlreadyHandlingTrap.get()) {
         return EXCEPTION_CONTINUE_SEARCH;
     }
-    AutoSignalHandler ash;
+    AutoHandlingTrap aht;
 
-    if (HandleFault(exception)) {
-        return EXCEPTION_CONTINUE_EXECUTION;
+    EXCEPTION_RECORD* record = exception->ExceptionRecord;
+    if (record->ExceptionCode != EXCEPTION_ACCESS_VIOLATION &&
+        record->ExceptionCode != EXCEPTION_ILLEGAL_INSTRUCTION)
+    {
+        return EXCEPTION_CONTINUE_SEARCH;
     }
 
-    // No need to worry about calling other handlers, the OS does this for us.
-    return EXCEPTION_CONTINUE_SEARCH;
+    if (!HandleTrap(exception->ContextRecord, TlsContext.get())) {
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+    return EXCEPTION_CONTINUE_EXECUTION;
 }
 
 #elif defined(XP_DARWIN)
 # include <mach/exc.h>
 
 // This definition was generated by mig (the Mach Interface Generator) for the
 // routine 'exception_raise' (exc.defs).
 #pragma pack(4)
@@ -1036,70 +547,26 @@ HandleMachException(JSContext* cx, const
         return false;
     }
     kret = thread_get_state(cxThread, float_state,
                             (thread_state_t)&context.float_, &float_state_count);
     if (kret != KERN_SUCCESS) {
         return false;
     }
 
-    uint8_t** ppc = ContextToPC(&context);
-    uint8_t* pc = *ppc;
-
     if (request.body.exception != EXC_BAD_ACCESS &&
         request.body.exception != EXC_BAD_INSTRUCTION)
     {
         return false;
     }
 
-    // The faulting thread is suspended so we can access cx fields that can
-    // normally only be accessed by the cx's main thread.
-    AutoNoteSingleThreadedRegion anstr;
-
-    const CodeSegment* codeSegment = LookupCodeSegment(pc);
-    if (!codeSegment || !codeSegment->isModule()) {
-        return false;
-    }
-
-    const ModuleSegment* moduleSegment = codeSegment->asModule();
-
-    const Instance* instance = LookupFaultingInstance(*moduleSegment, pc, ContextToFP(&context));
-    if (!instance) {
-        return false;
-    }
-
-    JitActivation* activation = cx->activation()->asJit();
-    MOZ_ASSERT(activation->compartment() == instance->realm()->compartment());
-
-    if (request.body.exception == EXC_BAD_INSTRUCTION) {
-        Trap trap;
-        BytecodeOffset bytecode;
-        if (!moduleSegment->code().lookupTrap(pc, &trap, &bytecode)) {
-            return false;
-        }
-
-        activation->startWasmTrap(trap, bytecode.offset(), ToRegisterState(&context));
-        *ppc = moduleSegment->trapCode();
-    } else {
-        MOZ_RELEASE_ASSERT(&instance->code() == &moduleSegment->code());
-
-        MOZ_ASSERT(request.body.exception == EXC_BAD_ACCESS);
-        if (request.body.codeCnt != 2) {
-            return false;
-        }
-
-        uint8_t* faultingAddress = reinterpret_cast<uint8_t*>(request.body.code[1]);
-
-        // This check isn't necessary, but, since we can, check anyway to make
-        // sure we aren't covering up a real bug.
-        if (!IsHeapAccessAddress(*instance, faultingAddress)) {
-            return false;
-        }
-
-        if (!HandleOutOfBounds(&context, pc, faultingAddress, moduleSegment, *instance, activation, ppc)) {
+    {
+        AutoNoteSingleThreadedRegion anstr;
+        AutoHandlingTrap aht;
+        if (!HandleTrap(&context, cx)) {
             return false;
         }
     }
 
     // Update the thread state with the new pc and register values.
     kret = thread_set_state(cxThread, float_state, (thread_state_t)&context.float_, float_state_count);
     if (kret != KERN_SUCCESS) {
         return false;
@@ -1264,108 +731,34 @@ MachExceptionHandler::install(JSContext*
     installed_ = true;
     onFailure.release();
     return true;
 }
 
 #else  // If not Windows or Mac, assume Unix
 
 #ifdef __mips__
-    static const uint32_t kWasmTrapSignal = SIGFPE;
+static const uint32_t kWasmTrapSignal = SIGFPE;
 #else
-    static const uint32_t kWasmTrapSignal = SIGILL;
+static const uint32_t kWasmTrapSignal = SIGILL;
 #endif
 
-// Be very cautious and default to not handling; we don't want to accidentally
-// silence real crashes from real bugs.
-static bool
-HandleFault(int signum, siginfo_t* info, void* ctx)
-{
-    // Before anything else, prevent handler recursion.
-    if (sAlreadyInSignalHandler.get()) {
-        return false;
-    }
-    AutoSignalHandler ash;
-
-    MOZ_RELEASE_ASSERT(signum == SIGSEGV || signum == SIGBUS || signum == kWasmTrapSignal);
-
-    CONTEXT* context = (CONTEXT*)ctx;
-    uint8_t** ppc = ContextToPC(context);
-    uint8_t* pc = *ppc;
-
-    const CodeSegment* segment = LookupCodeSegment(pc);
-    if (!segment || !segment->isModule()) {
-        return false;
-    }
-
-    const ModuleSegment* moduleSegment = segment->asModule();
-
-    const Instance* instance = LookupFaultingInstance(*moduleSegment, pc, ContextToFP(context));
-    if (!instance) {
-        return false;
-    }
-
-    JitActivation* activation = TlsContext.get()->activation()->asJit();
-    MOZ_ASSERT(activation->compartment() == instance->realm()->compartment());
-
-    if (signum == kWasmTrapSignal) {
-        // Wasm traps for MIPS raise only integer overflow fp exception.
-#ifdef __mips__
-        if (info->si_code != FPE_INTOVF) {
-            return false;
-        }
-#endif
-        Trap trap;
-        BytecodeOffset bytecode;
-        if (!moduleSegment->code().lookupTrap(pc, &trap, &bytecode)) {
-            return false;
-        }
-
-        activation->startWasmTrap(trap, bytecode.offset(), ToRegisterState(context));
-        *ppc = moduleSegment->trapCode();
-        return true;
-    }
-
-    MOZ_RELEASE_ASSERT(&instance->code() == &moduleSegment->code());
-
-    uint8_t* faultingAddress = reinterpret_cast<uint8_t*>(info->si_addr);
-
-    // Although it's not strictly necessary, to make sure we're not covering up
-    // any real bugs, check that the faulting address is indeed in the
-    // instance's memory.
-    if (!faultingAddress) {
-        // On some Linux systems, the kernel apparently sometimes "gives up" and
-        // passes a null faultingAddress with si_code set to SI_KERNEL.
-        // This is observed on some automation machines for some out-of-bounds
-        // atomic accesses on x86/64.
-#ifdef SI_KERNEL
-        if (info->si_code != SI_KERNEL) {
-            return false;
-        }
-#else
-        return false;
-#endif
-    } else {
-        if (!IsHeapAccessAddress(*instance, faultingAddress)) {
-            return false;
-        }
-    }
-
-    return HandleOutOfBounds(context, pc, faultingAddress, moduleSegment, *instance, activation, ppc);
-}
-
 static struct sigaction sPrevSEGVHandler;
 static struct sigaction sPrevSIGBUSHandler;
 static struct sigaction sPrevWasmTrapHandler;
 
 static void
-WasmFaultHandler(int signum, siginfo_t* info, void* context)
+WasmTrapHandler(int signum, siginfo_t* info, void* context)
 {
-    if (HandleFault(signum, info, context)) {
-        return;
+    if (!sAlreadyHandlingTrap.get()) {
+        AutoHandlingTrap aht;
+        MOZ_RELEASE_ASSERT(signum == SIGSEGV || signum == SIGBUS || signum == kWasmTrapSignal);
+        if (HandleTrap((CONTEXT*)context, TlsContext.get())) {
+            return;
+        }
     }
 
     struct sigaction* previousSignal = nullptr;
     switch (signum) {
       case SIGSEGV: previousSignal = &sPrevSEGVHandler; break;
       case SIGBUS: previousSignal = &sPrevSIGBUSHandler; break;
       case kWasmTrapSignal: previousSignal = &sPrevWasmTrapHandler; break;
     }
@@ -1421,69 +814,75 @@ ProcessHasSignalHandlers()
 
 #if defined(ANDROID) && defined(MOZ_LINKER)
     // Signal handling is broken on some android systems.
     if (IsSignalHandlingBroken()) {
         return false;
     }
 #endif
 
-    // Initalize ThreadLocal flag used by WasmFaultHandler
-    sAlreadyInSignalHandler.infallibleInit();
+    // Initalize ThreadLocal flag used by WasmTrapHandler
+    sAlreadyHandlingTrap.infallibleInit();
 
     // Install a SIGSEGV handler to handle safely-out-of-bounds asm.js heap
     // access and/or unaligned accesses.
 #if defined(XP_WIN)
+
+# if defined(_M_ARM64)
+    // The AArch64 Windows build is not ready for this yet.
+    return false;
+# endif
+
 # if defined(MOZ_ASAN)
     // Under ASan we need to let the ASan runtime's ShadowExceptionHandler stay
     // in the first handler position. This requires some coordination with
     // MemoryProtectionExceptionHandler::isDisabled().
     const bool firstHandler = false;
 # else
-    // Otherwise, WasmFaultHandler needs to go first, so that we can recover
+    // Otherwise, WasmTrapHandler needs to go first, so that we can recover
     // from wasm faults and continue execution without triggering handlers
     // such as MemoryProtectionExceptionHandler that assume we are crashing.
     const bool firstHandler = true;
 # endif
-    if (!AddVectoredExceptionHandler(firstHandler, WasmFaultHandler)) {
+    if (!AddVectoredExceptionHandler(firstHandler, WasmTrapHandler)) {
         return false;
     }
 #elif defined(XP_DARWIN)
     // OSX handles seg faults via the Mach exception handler above, so don't
-    // install WasmFaultHandler.
+    // install WasmTrapHandler.
 #else
     // SA_NODEFER allows us to reenter the signal handler if we crash while
     // handling the signal, and fall through to the Breakpad handler by testing
     // handlingSegFault.
 
     // Allow handling OOB with signals on all architectures
     struct sigaction faultHandler;
     faultHandler.sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK;
-    faultHandler.sa_sigaction = WasmFaultHandler;
+    faultHandler.sa_sigaction = WasmTrapHandler;
     sigemptyset(&faultHandler.sa_mask);
     if (sigaction(SIGSEGV, &faultHandler, &sPrevSEGVHandler)) {
         MOZ_CRASH("unable to install segv handler");
     }
 
 # if defined(JS_CODEGEN_ARM)
     // On Arm Handle Unaligned Accesses
     struct sigaction busHandler;
     busHandler.sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK;
-    busHandler.sa_sigaction = WasmFaultHandler;
+    busHandler.sa_sigaction = WasmTrapHandler;
     sigemptyset(&busHandler.sa_mask);
     if (sigaction(SIGBUS, &busHandler, &sPrevSIGBUSHandler)) {
         MOZ_CRASH("unable to install sigbus handler");
     }
 # endif
 
     // Install a handler to handle the instructions that are emitted to implement
     // wasm traps.
     struct sigaction wasmTrapHandler;
     wasmTrapHandler.sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK;
-    wasmTrapHandler.sa_sigaction = WasmFaultHandler;
+    wasmTrapHandler.sa_sigaction = WasmTrapHandler;
     sigemptyset(&wasmTrapHandler.sa_mask);
     if (sigaction(kWasmTrapSignal, &wasmTrapHandler, &sPrevWasmTrapHandler)) {
         MOZ_CRASH("unable to install wasm trap handler");
     }
 #endif
 
     sHaveSignalHandlers = true;
     return true;
@@ -1508,8 +907,59 @@ wasm::EnsureSignalHandlers(JSContext* cx
 }
 
 bool
 wasm::HaveSignalHandlers()
 {
     MOZ_ASSERT(sTriedInstallSignalHandlers);
     return sHaveSignalHandlers;
 }
+
+bool
+wasm::MemoryAccessTraps(const RegisterState& regs, uint8_t* addr, uint32_t numBytes, uint8_t** newPC)
+{
+    const wasm::CodeSegment* codeSegment = wasm::LookupCodeSegment(regs.pc);
+    if (!codeSegment || !codeSegment->isModule()) {
+        return false;
+    }
+
+    const wasm::ModuleSegment& segment = *codeSegment->asModule();
+
+    Trap trap;
+    BytecodeOffset bytecode;
+    if (!segment.code().lookupTrap(regs.pc, &trap, &bytecode) || trap != Trap::OutOfBounds) {
+        return false;
+    }
+
+    Instance& instance = *reinterpret_cast<Frame*>(regs.fp)->tls->instance;
+    MOZ_ASSERT(&instance.code() == &segment.code());
+
+    if (!instance.memoryAccessInGuardRegion((uint8_t*)addr, numBytes)) {
+        return false;
+    }
+
+    jit::JitActivation* activation = TlsContext.get()->activation()->asJit();
+    activation->startWasmTrap(Trap::OutOfBounds, bytecode.offset(), regs);
+    *newPC = segment.trapCode();
+    return true;
+}
+
+bool
+wasm::HandleIllegalInstruction(const RegisterState& regs, uint8_t** newPC)
+{
+    const wasm::CodeSegment* codeSegment = wasm::LookupCodeSegment(regs.pc);
+    if (!codeSegment || !codeSegment->isModule()) {
+        return false;
+    }
+
+    const wasm::ModuleSegment& segment = *codeSegment->asModule();
+
+    Trap trap;
+    BytecodeOffset bytecode;
+    if (!segment.code().lookupTrap(regs.pc, &trap, &bytecode)) {
+        return false;
+    }
+
+    jit::JitActivation* activation = TlsContext.get()->activation()->asJit();
+    activation->startWasmTrap(trap, bytecode.offset(), regs);
+    *newPC = segment.trapCode();
+    return true;
+}
--- a/js/src/wasm/WasmSignalHandlers.h
+++ b/js/src/wasm/WasmSignalHandlers.h
@@ -20,34 +20,47 @@
 #define wasm_signal_handlers_h
 
 #include "mozilla/Attributes.h"
 
 #if defined(XP_DARWIN)
 # include <mach/mach.h>
 #endif
 
-#include "js/TypeDecls.h"
+#include "js/ProfilingFrameIterator.h"
 #include "threading/Thread.h"
-#include "wasm/WasmTypes.h"
+#include "wasm/WasmProcess.h"
 
 namespace js {
 namespace wasm {
 
+typedef JS::ProfilingFrameIterator::RegisterState RegisterState;
+
 // Ensure the given JSRuntime is set up to use signals. Failure to enable signal
 // handlers indicates some catastrophic failure and creation of the runtime must
 // fail.
 MOZ_MUST_USE bool
 EnsureSignalHandlers(JSContext* cx);
 
 // Return whether signals can be used in this process for asm.js/wasm
 // out-of-bounds.
 bool
 HaveSignalHandlers();
 
+// Return whether, with the given simulator register state, a memory access to
+// 'addr' of size 'numBytes' needs to trap and, if so, where the simulator
+// should redirect pc to.
+bool
+MemoryAccessTraps(const RegisterState& regs, uint8_t* addr, uint32_t numBytes, uint8_t** newPC);
+
+// Return whether, with the given simulator register state, an illegal
+// instruction fault is expected and, if so, the value of the next PC.
+bool
+HandleIllegalInstruction(const RegisterState& regs, uint8_t** newPC);
+
 #if defined(XP_DARWIN)
 // On OSX we are forced to use the lower-level Mach exception mechanism instead
 // of Unix signals. Mach exceptions are not handled on the victim's stack but
 // rather require an extra thread. For simplicity, we create one such thread
 // per JSContext (upon the first use of wasm in the JSContext). This thread
 // and related resources are owned by AsmJSMachExceptionHandler which is owned
 // by JSContext.
 class MachExceptionHandler
@@ -62,22 +75,12 @@ class MachExceptionHandler
     MachExceptionHandler();
     ~MachExceptionHandler() { uninstall(); }
     mach_port_t port() const { return port_; }
     bool installed() const { return installed_; }
     bool install(JSContext* cx);
 };
 #endif
 
-// On trap, the bytecode offset to be reported in callstacks is saved.
-
-struct TrapData
-{
-    void* resumePC;
-    void* unwoundPC;
-    Trap trap;
-    uint32_t bytecodeOffset;
-};
-
 } // namespace wasm
 } // namespace js
 
 #endif // wasm_signal_handlers_h
--- a/js/src/wasm/WasmTypes.h
+++ b/js/src/wasm/WasmTypes.h
@@ -1469,16 +1469,34 @@ struct TrapSiteVectorArray : EnumeratedA
     bool empty() const;
     void clear();
     void swap(TrapSiteVectorArray& rhs);
     void podResizeToFit();
 
     WASM_DECLARE_SERIALIZABLE(TrapSiteVectorArray)
 };
 
+// On trap, the bytecode offset to be reported in callstacks is saved.
+
+struct TrapData
+{
+    // The resumePC indicates where, if the trap doesn't throw, the trap stub
+    // should jump to after restoring all register state.
+    void* resumePC;
+
+    // The unwoundPC is the PC after adjustment by wasm::StartUnwinding(), which
+    // basically unwinds partially-construted wasm::Frames when pc is in the
+    // prologue/epilogue. Stack traces during a trap should use this PC since
+    // it corresponds to the JitActivation::wasmExitFP.
+    void* unwoundPC;
+
+    Trap trap;
+    uint32_t bytecodeOffset;
+};
+
 // The (,Callable,Func)Offsets classes are used to record the offsets of
 // different key points in a CodeRange during compilation.
 
 struct Offsets
 {
     explicit Offsets(uint32_t begin = 0, uint32_t end = 0)
       : begin(begin), end(end)
     {}
@@ -1980,20 +1998,18 @@ typedef Vector<TableDesc, 0, SystemAlloc
 //
 // After the TlsData struct follows the module's declared TLS variables.
 
 struct TlsData
 {
     // Pointer to the base of the default memory (or null if there is none).
     uint8_t* memoryBase;
 
-#ifndef WASM_HUGE_MEMORY
     // Bounds check limit of memory, in bytes (or zero if there is no memory).
     uint32_t boundsCheckLimit;
-#endif
 
     // Pointer to the Instance that contains this TLS data.
     Instance* instance;
 
     // Equal to instance->realm_.
     JS::Realm* realm;
 
     // The containing JSContext.
--- a/layout/reftests/async-scrolling/reftest.list
+++ b/layout/reftests/async-scrolling/reftest.list
@@ -47,17 +47,17 @@ fuzzy(0-1,0-60000) skip-if(!asyncPan) ==
 fuzzy-if(Android,0-1,0-197) skip-if(!asyncPan) == position-sticky-transformed.html position-sticky-transformed-ref.html
 skip-if(!asyncPan) == offscreen-prerendered-active-opacity.html offscreen-prerendered-active-opacity-ref.html
 fuzzy-if(Android,0-6,0-4) fuzzy-if(skiaContent&&!Android,0-1,0-34) skip-if(!asyncPan) == offscreen-clipped-blendmode-1.html offscreen-clipped-blendmode-ref.html
 fuzzy-if(Android,0-6,0-4) skip-if(!asyncPan) == offscreen-clipped-blendmode-2.html offscreen-clipped-blendmode-ref.html
 fuzzy-if(Android,0-6,0-4) skip == offscreen-clipped-blendmode-3.html offscreen-clipped-blendmode-ref.html # bug 1251588 - wrong AGR on mix-blend-mode item
 fuzzy-if(Android,0-6,0-4) skip-if(!asyncPan) == offscreen-clipped-blendmode-4.html offscreen-clipped-blendmode-ref.html
 fuzzy-if(Android,0-7,0-4) skip-if(!asyncPan) == perspective-scrolling-1.html perspective-scrolling-1-ref.html
 fuzzy-if(Android,0-7,0-4) skip-if(!asyncPan) == perspective-scrolling-2.html perspective-scrolling-2-ref.html
-fuzzy-if(Android,0-7,0-4) fails-if(webrender) skip-if(!asyncPan) == perspective-scrolling-3.html perspective-scrolling-3-ref.html # bug 1361720 for webrender
+fuzzy-if(Android,0-7,0-4) skip-if(!asyncPan) == perspective-scrolling-3.html perspective-scrolling-3-ref.html
 fuzzy-if(Android,0-7,0-4) skip-if(!asyncPan) == perspective-scrolling-4.html perspective-scrolling-4-ref.html
 pref(apz.disable_for_scroll_linked_effects,true) skip-if(!asyncPan) == disable-apz-for-sle-pages.html disable-apz-for-sle-pages-ref.html
 fuzzy-if(browserIsRemote&&d2d,0-1,0-20) skip-if(!asyncPan) == background-blend-mode-1.html background-blend-mode-1-ref.html
 skip-if(Android||!asyncPan) != opaque-fractional-displayport-1.html about:blank
 skip-if(Android||!asyncPan) != opaque-fractional-displayport-2.html about:blank
 fuzzy-if(Android,0-6,0-4) skip-if(!asyncPan) == fixed-pos-scrolled-clip-1.html fixed-pos-scrolled-clip-1-ref.html
 fuzzy-if(Android,0-6,0-8) skip-if(!asyncPan) == fixed-pos-scrolled-clip-2.html fixed-pos-scrolled-clip-2-ref.html
 fuzzy-if(Android,0-6,0-8) skip-if(!asyncPan) == fixed-pos-scrolled-clip-3.html fixed-pos-scrolled-clip-3-ref.html
--- a/mobile/android/components/BrowserCLH.js
+++ b/mobile/android/components/BrowserCLH.js
@@ -178,36 +178,54 @@ BrowserCLH.prototype = {
   },
 
   _initLoginManagerEvents: function(aWindow) {
     if (Services.prefs.getBoolPref("reftest.remote", false)) {
       // XXX known incompatibility between reftest harness and form-fill.
       return;
     }
 
+    function shouldIgnoreLoginManagerEvent(event) {
+      // If we have a null principal then prevent any more password manager code from running and
+      // incorrectly using the document `location`.
+      return event.target.nodePrincipal.isNullPrincipal;
+    }
+
     let options = {
       capture: true,
       mozSystemGroup: true,
     };
 
     // NOTE: Much of this logic is duplicated in browser/base/content/content.js
     // for desktop.
     aWindow.addEventListener("DOMFormHasPassword", event => {
+      if (shouldIgnoreLoginManagerEvent(event)) {
+        return;
+      }
       this.LoginManagerContent.onDOMFormHasPassword(event, event.target.ownerGlobal.top);
     }, options);
 
     aWindow.addEventListener("DOMInputPasswordAdded", event => {
+      if (shouldIgnoreLoginManagerEvent(event)) {
+        return;
+      }
       this.LoginManagerContent.onDOMInputPasswordAdded(event, event.target.ownerGlobal.top);
     }, options);
 
     aWindow.addEventListener("DOMAutoComplete", event => {
+      if (shouldIgnoreLoginManagerEvent(event)) {
+        return;
+      }
       this.LoginManagerContent.onUsernameInput(event);
     }, options);
 
     aWindow.addEventListener("blur", event => {
+      if (shouldIgnoreLoginManagerEvent(event)) {
+        return;
+      }
       if (ChromeUtils.getClassName(event.target) === "HTMLInputElement") {
         this.LoginManagerContent.onUsernameInput(event);
       }
     }, options);
 
     aWindow.addEventListener("pageshow", event => {
       // XXXbz what about non-HTML documents??
       if (ChromeUtils.getClassName(event.target) == "HTMLDocument") {
--- a/mobile/android/geckoview/src/androidTest/java/org/mozilla/geckoview/test/SessionLifecycleTest.kt
+++ b/mobile/android/geckoview/src/androidTest/java/org/mozilla/geckoview/test/SessionLifecycleTest.kt
@@ -6,31 +6,42 @@ package org.mozilla.geckoview.test
 
 import org.mozilla.geckoview.GeckoSession
 import org.mozilla.geckoview.GeckoSessionSettings
 import org.mozilla.geckoview.GeckoView
 import org.mozilla.geckoview.test.rule.GeckoSessionTestRule.ClosedSessionAtStart
 import org.mozilla.geckoview.test.rule.GeckoSessionTestRule.NullDelegate
 import org.mozilla.geckoview.test.rule.GeckoSessionTestRule.ReuseSession
 import org.mozilla.geckoview.test.util.Callbacks
+import org.mozilla.geckoview.test.util.UiThreadUtils
 
+import android.os.Debug
 import android.os.Parcelable
+import android.os.SystemClock
 import android.support.test.InstrumentationRegistry
 import android.support.test.filters.MediumTest
 import android.support.test.runner.AndroidJUnit4
+import android.util.Log
 import android.util.SparseArray
 
 import org.hamcrest.Matchers.*
 import org.junit.Test
 import org.junit.runner.RunWith
+import java.io.File
+import java.io.IOException
+import java.lang.ref.ReferenceQueue
+import java.lang.ref.WeakReference
 
 @RunWith(AndroidJUnit4::class)
 @MediumTest
 @ReuseSession(false)
 class SessionLifecycleTest : BaseSessionTest() {
+    companion object {
+        val LOGTAG = "SessionLifecycleTest"
+    }
 
     @Test fun open_interleaved() {
         val session1 = sessionRule.createOpenSession()
         val session2 = sessionRule.createOpenSession()
         session1.close()
         val session3 = sessionRule.createOpenSession()
         session2.close()
         session3.close()
@@ -137,28 +148,46 @@ class SessionLifecycleTest : BaseSession
         sessionRule.session.reload()
         sessionRule.session.waitForPageStop()
     }
 
     @Test fun readFromParcel_closedSessionAfterParceling() {
         val session = sessionRule.createOpenSession()
 
         session.toParcel { parcel ->
+            assertThat("Session is still open", session.isOpen, equalTo(true))
             session.close()
 
             val newSession = sessionRule.createClosedSession()
             newSession.readFromParcel(parcel)
             assertThat("New session should not be open",
                        newSession.isOpen, equalTo(false))
         }
 
         sessionRule.session.reload()
         sessionRule.session.waitForPageStop()
     }
 
+    @Test fun readFromParcel_closedSessionAfterReadParcel() {
+        val session = sessionRule.createOpenSession()
+
+        session.toParcel { parcel ->
+            assertThat("Session is still open", session.isOpen, equalTo(true))
+            val newSession = sessionRule.createClosedSession()
+            newSession.readFromParcel(parcel)
+            assertThat("New session should be open",
+                    newSession.isOpen, equalTo(true))
+            assertThat("Old session should be closed",
+                    session.isOpen, equalTo(false))
+        }
+
+        sessionRule.session.reload()
+        sessionRule.session.waitForPageStop()
+    }
+
     @Test fun readFromParcel_closeOpenAndLoad() {
         val newSession = sessionRule.createClosedSession()
 
         sessionRule.session.toParcel { parcel ->
             newSession.readFromParcel(parcel)
         }
 
         newSession.close()
@@ -347,9 +376,72 @@ class SessionLifecycleTest : BaseSession
 
             newSession.close()
             assertThat("New session can be closed", newSession.isOpen, equalTo(false))
         }
 
         sessionRule.session.reload()
         sessionRule.session.waitForPageStop()
     }
+
+    @Test fun collectClosed() {
+        // We can't use a normal scoped function like `run` because
+        // those are inlined, which leaves a local reference.
+        fun createSession(): QueuedWeakReference<GeckoSession> {
+            return QueuedWeakReference<GeckoSession>(GeckoSession())
+        }
+
+        waitUntilCollected(createSession())
+    }
+
+    @Test fun collectAfterClose() {
+        fun createSession(): QueuedWeakReference<GeckoSession> {
+            val s = GeckoSession()
+            s.open(sessionRule.runtime)
+            s.close()
+            return QueuedWeakReference<GeckoSession>(s)
+        }
+
+        waitUntilCollected(createSession())
+    }
+
+    @Test fun collectOpen() {
+        fun createSession(): QueuedWeakReference<GeckoSession> {
+            val s = GeckoSession()
+            s.open(sessionRule.runtime)
+            return QueuedWeakReference<GeckoSession>(s)
+        }
+
+        waitUntilCollected(createSession())
+    }
+
+    private fun dumpHprof() {
+        try {
+            val dest = File(InstrumentationRegistry.getTargetContext()
+                    .filesDir.parent, "dump.hprof").absolutePath
+            Debug.dumpHprofData(dest)
+            Log.d(LOGTAG, "Dumped hprof to $dest")
+        } catch (e: IOException) {
+            Log.e(LOGTAG, "Failed to dump hprof", e)
+        }
+
+    }
+
+    private fun waitUntilCollected(ref: QueuedWeakReference<*>) {
+        val start = SystemClock.uptimeMillis()
+        while (ref.queue.poll() == null) {
+            val elapsed = SystemClock.uptimeMillis() - start
+            if (elapsed > sessionRule.timeoutMillis) {
+                dumpHprof()
+                throw UiThreadUtils.TimeoutException("Timed out after " + elapsed + "ms")
+            }
+
+            try {
+                UiThreadUtils.loopUntilIdle(100)
+            } catch (e: UiThreadUtils.TimeoutException) {
+            }
+            Runtime.getRuntime().gc()
+        }
+    }
+
+    class QueuedWeakReference<T> @JvmOverloads constructor(obj: T, var queue: ReferenceQueue<T> =
+            ReferenceQueue()) : WeakReference<T>(obj, queue)
 }
--- a/mobile/android/geckoview/src/main/java/org/mozilla/gecko/EventDispatcher.java
+++ b/mobile/android/geckoview/src/main/java/org/mozilla/gecko/EventDispatcher.java
@@ -71,35 +71,47 @@ public final class EventDispatcher exten
     public EventDispatcher(final NativeQueue queue) {
         mNativeQueue = queue;
     }
 
     private boolean isReadyForDispatchingToGecko() {
         return mNativeQueue.isReady();
     }
 
-    @WrapForJNI(dispatchTo = "gecko") @Override // JNIObject
+    @WrapForJNI @Override // JNIObject
     protected native void disposeNative();
 
     @WrapForJNI private static final int DETACHED = 0;
     @WrapForJNI private static final int ATTACHED = 1;
     @WrapForJNI private static final int REATTACHING = 2;
 
     @WrapForJNI(calledFrom = "gecko")
     private synchronized void setAttachedToGecko(final int state) {
         if (mAttachedToGecko && state == DETACHED) {
-            if (GeckoThread.isRunning()) {
-                disposeNative();
-            } else {
-                GeckoThread.queueNativeCall(this, "disposeNative");
-            }
+            dispose(false);
         }
         mAttachedToGecko = (state == ATTACHED);
     }
 
+    private void dispose(boolean force) {
+        final Handler geckoHandler = ThreadUtils.sGeckoHandler;
+        if (geckoHandler == null) {
+            return;
+        }
+
+        geckoHandler.post(new Runnable() {
+            @Override
+            public void run() {
+                if (force || !mAttachedToGecko) {
+                    disposeNative();
+                }
+            }
+        });
+    }
+
     private <T> void registerListener(final Class<?> listType,
                                       final Map<String, List<T>> listenersMap,
                                       final T listener,
                                       final String[] events) {
         try {
             synchronized (listenersMap) {
                 for (final String event : events) {
                     if (event == null) {
@@ -344,16 +356,21 @@ public final class EventDispatcher exten
                         listener.handleMessage(type, message, wrappedCallback);
                     }
                 });
             }
             return true;
         }
     }
 
+    @Override
+    protected void finalize() throws Throwable {
+        dispose(true);
+    }
+
     private static class NativeCallbackDelegate extends JNIObject implements EventCallback {
         @WrapForJNI(calledFrom = "gecko")
         private NativeCallbackDelegate() {
         }
 
         @Override // JNIObject
         protected void disposeNative() {
             // We dispose in finalize().
--- a/mobile/android/geckoview/src/main/java/org/mozilla/gecko/gfx/LayerSession.java
+++ b/mobile/android/geckoview/src/main/java/org/mozilla/gecko/gfx/LayerSession.java
@@ -67,20 +67,18 @@ public class LayerSession {
         }
 
         @WrapForJNI(calledFrom = "ui")
         private void onCompositorDetached() {
             // Clear out any pending calls on the UI thread.
             LayerSession.this.onCompositorDetached();
         }
 
-        @Override protected void disposeNative() {
-            // Disposal happens in native code.
-            throw new UnsupportedOperationException();
-        }
+        @WrapForJNI(dispatchTo = "gecko")
+        @Override protected native void disposeNative();
 
         @WrapForJNI(calledFrom = "ui", dispatchTo = "gecko")
         public native void attachNPZC(PanZoomController npzc);
 
         @WrapForJNI(calledFrom = "ui", dispatchTo = "gecko")
         public native void onBoundsChanged(int left, int top, int width, int height);
 
         // Gecko thread pauses compositor; blocks UI thread.
@@ -136,16 +134,21 @@ public class LayerSession {
         private void updateOverscrollVelocity(final float x, final float y) {
             LayerSession.this.updateOverscrollVelocity(x, y);
         }
 
         @WrapForJNI(calledFrom = "ui")
         private void updateOverscrollOffset(final float x, final float y) {
             LayerSession.this.updateOverscrollOffset(x, y);
         }
+
+        @Override
+        protected void finalize() throws Throwable {
+            disposeNative();
+        }
     }
 
     protected final Compositor mCompositor = new Compositor();
 
     // All fields are accessed on UI thread only.
     private PanZoomController mNPZC;
     private OverscrollEdgeEffect mOverscroll;
     private DynamicToolbarAnimator mToolbar;
--- a/mobile/android/geckoview/src/main/java/org/mozilla/gecko/gfx/PanZoomController.java
+++ b/mobile/android/geckoview/src/main/java/org/mozilla/gecko/gfx/PanZoomController.java
@@ -200,16 +200,21 @@ public class PanZoomController extends J
         ThreadUtils.assertOnUiThread();
 
         if (event.getToolType(0) == MotionEvent.TOOL_TYPE_MOUSE) {
             return handleMouseEvent(event);
         }
         return handleMotionEvent(event);
     }
 
+    @Override
+    protected void finalize() throws Throwable {
+        setAttached(false);
+    }
+
     /**
      * Process a non-touch motion event through the pan-zoom controller. Currently, hover
      * and scroll events are supported. Pointer coordinates should be relative to the
      * display surface.
      *
      * @param event MotionEvent to process.
      * @return True if the event was handled.
      */
--- a/mobile/android/geckoview/src/main/java/org/mozilla/geckoview/GeckoEditable.java
+++ b/mobile/android/geckoview/src/main/java/org/mozilla/geckoview/GeckoEditable.java
@@ -1,15 +1,16 @@
 /* -*- Mode: Java; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: nil; -*-
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 package org.mozilla.geckoview;
 
+import java.lang.ref.WeakReference;
 import java.lang.reflect.Array;
 import java.lang.reflect.Field;
 import java.lang.reflect.InvocationHandler;
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -60,17 +61,23 @@ import android.view.inputmethod.EditorIn
                SessionTextInput.EditableClient {
 
     private static final boolean DEBUG = false;
     private static final String LOGTAG = "GeckoEditable";
 
     // Filters to implement Editable's filtering functionality
     private InputFilter[] mFilters;
 
-    /* package */ final GeckoSession mSession;
+    /**
+     * We need a WeakReference here to avoid unnecessary
+     * retention of the GeckoSession. Passing objects around
+     * via JNI seems to confuse the GC into thinking we have
+     * a native GC root.
+     */
+    /* package */ final WeakReference<GeckoSession> mSession;
     private final AsyncText mText;
     private final Editable mProxy;
     private final ConcurrentLinkedQueue<Action> mActions;
     private KeyCharacterMap mKeyMap;
 
     // mIcRunHandler is the Handler that currently runs Gecko-to-IC Runnables
     // mIcPostHandler is the Handler to post Gecko-to-IC Runnables to
     // The two can be different when switching from one handler to another
@@ -635,17 +642,17 @@ import android.view.inputmethod.EditorIn
     }
 
     public GeckoEditable(@NonNull final GeckoSession session) {
         if (DEBUG) {
             // Called by SessionTextInput.
             ThreadUtils.assertOnUiThread();
         }
 
-        mSession = session;
+        mSession = new WeakReference<>(session);
         mText = new AsyncText();
         mActions = new ConcurrentLinkedQueue<Action>();
 
         final Class<?>[] PROXY_INTERFACES = { Editable.class };
         mProxy = (Editable) Proxy.newProxyInstance(Editable.class.getClassLoader(),
                                                    PROXY_INTERFACES, this);
 
         mIcRunHandler = mIcPostHandler = ThreadUtils.getUiHandler();
@@ -1366,17 +1373,21 @@ import android.view.inputmethod.EditorIn
             @Override
             public void run() {
                 if (DEBUG) {
                     Log.d(LOGTAG, "restartInput(" + reason + ", " + toggleSoftInput + ')');
                 }
                 if (toggleSoftInput) {
                     mSoftInputReentrancyGuard.incrementAndGet();
                 }
-                mSession.getTextInput().getDelegate().restartInput(mSession, reason);
+
+                final GeckoSession session = mSession.get();
+                if (session != null) {
+                    session.getTextInput().getDelegate().restartInput(session, reason);
+                }
 
                 if (!toggleSoftInput) {
                     return;
                 }
                 postToInputConnection(new Runnable() {
                     @Override
                     public void run() {
                         int state = mIMEState;
@@ -1506,41 +1517,46 @@ import android.view.inputmethod.EditorIn
                 } else {
                     isReentrant = reentrancyGuard > 0;
                 }
 
                 // When using Find In Page, we can still receive notifyIMEContext calls due to the
                 // selection changing when highlighting. However in this case we don't want to
                 // show/hide the keyboard because the find box has the focus and is taking input from
                 // the keyboard.
-                final View view = mSession.getTextInput().getView();
+                final GeckoSession session = mSession.get();
+                if (session == null) {
+                    return;
+                }
+
+                final View view = session.getTextInput().getView();
                 final boolean isFocused = (view == null) || view.hasFocus();
 
                 final boolean isUserAction = ((flags &
                         SessionTextInput.EditableListener.IME_FLAG_USER_ACTION) != 0);
 
                 if (!force && (isReentrant || !isFocused || !isUserAction)) {
                     if (DEBUG) {
                         Log.d(LOGTAG, "toggleSoftInput: no-op, reentrant=" + isReentrant +
                                 ", focused=" + isFocused + ", user=" + isUserAction);
                     }
                     return;
                 }
                 if (state == SessionTextInput.EditableListener.IME_STATE_DISABLED) {
                     if (DEBUG) {
                         Log.d(LOGTAG, "hideSoftInput");
                     }
-                    mSession.getTextInput().getDelegate().hideSoftInput(mSession);
+                    session.getTextInput().getDelegate().hideSoftInput(session);
                     return;
                 }
                 if (DEBUG) {
                     Log.d(LOGTAG, "showSoftInput");
                 }
-                mSession.getEventDispatcher().dispatch("GeckoView:ZoomToInput", null);
-                mSession.getTextInput().getDelegate().showSoftInput(mSession);
+                session.getEventDispatcher().dispatch("GeckoView:ZoomToInput", null);
+                session.getTextInput().getDelegate().showSoftInput(session);
             }
         });
     }
 
     @Override // IGeckoEditableParent
     public void onSelectionChange(final IBinder token,
                                   final int start, final int end) {
         // On Gecko or binder thread.
--- a/mobile/android/geckoview/src/main/java/org/mozilla/geckoview/GeckoSession.java
+++ b/mobile/android/geckoview/src/main/java/org/mozilla/geckoview/GeckoSession.java
@@ -3,16 +3,17 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 package org.mozilla.geckoview;
 
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
+import java.lang.ref.WeakReference;
 import java.net.URLConnection;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.UUID;
 
 import org.mozilla.gecko.annotation.WrapForJNI;
 import org.mozilla.gecko.EventDispatcher;
 import org.mozilla.gecko.GeckoAppShell;
@@ -681,22 +682,25 @@ public class GeckoSession extends LayerS
     }<