Merge mozilla-central to mozilla-inbound. a=merge CLOSED TREE
authorCiure Andrei <aciure@mozilla.com>
Sun, 02 Dec 2018 11:55:39 +0200
changeset 508415 adb3cb65badb25f414c3b3eedc4f2f5d8ec7cf29
parent 508414 a17e7caad4470179473f9058e4ad17e72462eb73 (current diff)
parent 508378 8e021c409c6a16d9bab9713d20e30fa05c5b8365 (diff)
child 508416 0c29c0c61a3aed41c295446b2b3dc693bad33155
push id1905
push userffxbld-merge
push dateMon, 21 Jan 2019 12:33:13 +0000
treeherdermozilla-release@c2fca1944d8c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmerge
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Merge mozilla-central to mozilla-inbound. a=merge CLOSED TREE
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1588,17 +1588,17 @@ dependencies = [
 ]
 
 [[package]]
 name = "mozurl"
 version = "0.0.1"
 dependencies = [
  "nserror 0.1.0",
  "nsstring 0.1.0",
- "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "xpcom 0.1.0",
 ]
 
 [[package]]
 name = "mozversion"
 version = "0.1.3"
 dependencies = [
  "regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2073,17 +2073,17 @@ source = "registry+https://github.com/ru
 dependencies = [
  "arrayref 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "lmdb-rkv 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "ordered-float 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
- "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "uuid 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "ron"
 version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
@@ -2837,17 +2837,17 @@ name = "unreachable"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "url"
-version = "1.7.0"
+version = "1.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "idna 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "percent-encoding 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -2916,17 +2916,17 @@ dependencies = [
  "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 1.0.80 (git+https://github.com/servo/serde?branch=deserialize_from_enums9)",
  "serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
  "tokio 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webidl"
 version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "lalrpop 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -3381,17 +3381,17 @@ dependencies = [
 "checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
 "checksum uluru 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d2606e9192f308ddc4f0b3c5d1bf3400e28a70fff956e9d9f46d23b094746d9f"
 "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
 "checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f"
 "checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"
 "checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f"
 "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
 "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
-"checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7"
+"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
 "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
 "checksum uuid 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e1436e58182935dcd9ce0add9ea0b558e8a87befe01c1a301e6020aeb0876363"
 "checksum vcpkg 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9e0a7d8bed3178a8fb112199d466eeca9ed09a14ba8ad67718179b4fd5487d0b"
 "checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c"
 "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
 "checksum walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "63636bd0eb3d00ccb8b9036381b526efac53caf112b7783b730ab3f8e44da369"
 "checksum want 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "797464475f30ddb8830cc529aaaae648d581f99e2036a928877dfde027ddf6b3"
 "checksum wasmparser 0.22.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1b4e0f66e314a8e63ff5c3cc5103f7d0a3de9ee98bb61a960adcf7f1d9debd2f"
--- a/browser/base/content/browser-contentblocking.js
+++ b/browser/base/content/browser-contentblocking.js
@@ -2,45 +2,57 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 var TrackingProtection = {
   reportBreakageLabel: "trackingprotection",
   telemetryIdentifier: "tp",
   PREF_ENABLED_GLOBALLY: "privacy.trackingprotection.enabled",
   PREF_ENABLED_IN_PRIVATE_WINDOWS: "privacy.trackingprotection.pbmode.enabled",
-  PREF_UI_ENABLED: "browser.contentblocking.trackingprotection.control-center.ui.enabled",
   PREF_TRACKING_TABLE: "urlclassifier.trackingTable",
   PREF_TRACKING_ANNOTATION_TABLE: "urlclassifier.trackingAnnotationTable",
   enabledGlobally: false,
   enabledInPrivateWindows: false,
 
   get categoryItem() {
     delete this.categoryItem;
     return this.categoryItem =
       document.getElementById("identity-popup-content-blocking-category-tracking-protection");
   },
 
+  get categoryLabel() {
+    delete this.categoryLabel;
+    return this.categoryLabel =
+      document.getElementById("identity-popup-content-blocking-tracking-protection-state-label");
+  },
+
   get subViewList() {
     delete this.subViewList;
     return this.subViewList = document.getElementById("identity-popup-trackersView-list");
   },
 
   get strictInfo() {
     delete this.strictInfo;
     return this.strictInfo = document.getElementById("identity-popup-trackersView-strict-info");
   },
 
+  strings: {
+    get subViewBlocked() {
+      delete this.subViewBlocked;
+      return this.subViewBlocked =
+        gNavigatorBundle.getString("contentBlocking.trackersView.blocked.label");
+    },
+  },
+
   init() {
     this.updateEnabled();
 
     Services.prefs.addObserver(this.PREF_ENABLED_GLOBALLY, this);
     Services.prefs.addObserver(this.PREF_ENABLED_IN_PRIVATE_WINDOWS, this);
 
-    XPCOMUtils.defineLazyPreferenceGetter(this, "visible", this.PREF_UI_ENABLED, false);
     XPCOMUtils.defineLazyPreferenceGetter(this, "trackingTable", this.PREF_TRACKING_TABLE, false);
     XPCOMUtils.defineLazyPreferenceGetter(this, "trackingAnnotationTable", this.PREF_TRACKING_ANNOTATION_TABLE, false);
   },
 
   uninit() {
     Services.prefs.removeObserver(this.PREF_ENABLED_GLOBALLY, this);
     Services.prefs.removeObserver(this.PREF_ENABLED_IN_PRIVATE_WINDOWS, this);
   },
@@ -55,24 +67,39 @@ var TrackingProtection = {
             PrivateBrowsingUtils.isWindowPrivate(window));
   },
 
   updateEnabled() {
     this.enabledGlobally =
       Services.prefs.getBoolPref(this.PREF_ENABLED_GLOBALLY);
     this.enabledInPrivateWindows =
       Services.prefs.getBoolPref(this.PREF_ENABLED_IN_PRIVATE_WINDOWS);
+    this.updateCategoryLabel();
   },
 
-  isBlockerActivated(state) {
-    return state & Ci.nsIWebProgressListener.STATE_BLOCKED_TRACKING_CONTENT;
+  updateCategoryLabel() {
+    let label;
+    if (this.enabled) {
+      label = "contentBlocking.trackers.blocked.label";
+    } else {
+      label = "contentBlocking.trackers.allowed.label";
+    }
+    this.categoryLabel.textContent = gNavigatorBundle.getString(label);
+  },
+
+  isBlocking(state) {
+    return (state & Ci.nsIWebProgressListener.STATE_BLOCKED_TRACKING_CONTENT) != 0;
   },
 
   isAllowing(state) {
-    return state & Ci.nsIWebProgressListener.STATE_LOADED_TRACKING_CONTENT;
+    return (state & Ci.nsIWebProgressListener.STATE_LOADED_TRACKING_CONTENT) != 0;
+  },
+
+  isDetected(state) {
+    return this.isBlocking(state) || this.isAllowing(state);
   },
 
   async updateSubView() {
     let previousURI = gBrowser.currentURI.spec;
     let previousWindow = gBrowser.selectedBrowser.innerWindowID;
 
     let contentBlockingLogJSON = await gBrowser.selectedBrowser.getContentBlockingLog();
     let contentBlockingLog = JSON.parse(contentBlockingLogJSON);
@@ -109,17 +136,17 @@ var TrackingProtection = {
   },
 
   async _createListItem(origin, actions) {
     // Figure out if this list entry was actually detected by TP or something else.
     let isDetected = false;
     let isAllowed = false;
     for (let [state] of actions) {
       isAllowed = isAllowed || this.isAllowing(state);
-      isDetected = isDetected || isAllowed || this.isBlockerActivated(state);
+      isDetected = isDetected || isAllowed || this.isBlocking(state);
     }
 
     if (!isDetected) {
       return null;
     }
 
     let uri = Services.io.newURI(origin);
 
@@ -127,47 +154,84 @@ var TrackingProtection = {
     // need to make sure that this is a tracker that we would actually have blocked
     // before showing it to the user.
     let isTracker = await this._isOnTrackingTable(uri);
     if (!isTracker) {
       return null;
     }
 
     let listItem = document.createXULElement("hbox");
-    listItem.className = "identity-popup-trackersView-list-item";
+    listItem.className = "identity-popup-content-blocking-list-item";
     listItem.classList.toggle("allowed", isAllowed);
+    // Repeat the host in the tooltip in case it's too long
+    // and overflows in our panel.
+    listItem.tooltipText = uri.host;
 
     let image = document.createXULElement("image");
+    image.className = "identity-popup-trackersView-icon";
+    image.classList.toggle("allowed", isAllowed);
     listItem.append(image);
 
     let label = document.createXULElement("label");
     label.value = uri.host;
+    label.className = "identity-popup-content-blocking-list-host-label";
     label.setAttribute("crop", "end");
     listItem.append(label);
 
+    if (!isAllowed) {
+      let stateLabel = document.createXULElement("label");
+      stateLabel.value = this.strings.subViewBlocked;
+      stateLabel.className = "identity-popup-content-blocking-list-state-label";
+      listItem.append(stateLabel);
+    }
+
     return listItem;
   },
 };
 
 var ThirdPartyCookies = {
   telemetryIdentifier: "cr",
   PREF_ENABLED: "network.cookie.cookieBehavior",
   PREF_REPORT_BREAKAGE_ENABLED: "browser.contentblocking.rejecttrackers.reportBreakage.enabled",
   PREF_ENABLED_VALUES: [
     // These values match the ones exposed under the Content Blocking section
     // of the Preferences UI.
     Ci.nsICookieService.BEHAVIOR_REJECT_FOREIGN,  // Block all third-party cookies
     Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER,  // Block third-party cookies from trackers
   ],
-  PREF_UI_ENABLED: "browser.contentblocking.rejecttrackers.control-center.ui.enabled",
 
   get categoryItem() {
     delete this.categoryItem;
     return this.categoryItem =
-      document.getElementById("identity-popup-content-blocking-category-3rdpartycookies");
+      document.getElementById("identity-popup-content-blocking-category-cookies");
+  },
+
+  get categoryLabel() {
+    delete this.categoryLabel;
+    return this.categoryLabel =
+      document.getElementById("identity-popup-content-blocking-cookies-state-label");
+  },
+
+  get subViewList() {
+    delete this.subViewList;
+    return this.subViewList = document.getElementById("identity-popup-cookiesView-list");
+  },
+
+  strings: {
+    get subViewAllowed() {
+      delete this.subViewAllowed;
+      return this.subViewAllowed =
+        gNavigatorBundle.getString("contentBlocking.cookiesView.allowed.label");
+    },
+
+    get subViewBlocked() {
+      delete this.subViewBlocked;
+      return this.subViewBlocked =
+        gNavigatorBundle.getString("contentBlocking.cookiesView.blocked.label");
+    },
   },
 
   get reportBreakageLabel() {
     switch (this.behaviorPref) {
     case Ci.nsICookieService.BEHAVIOR_ACCEPT:
       return "nocookiesblocked";
     case Ci.nsICookieService.BEHAVIOR_REJECT_FOREIGN:
       return "allthirdpartycookiesblocked";
@@ -178,50 +242,246 @@ var ThirdPartyCookies = {
     default:
       Cu.reportError(`Error: Unknown cookieBehavior pref observed: ${this.behaviorPref}`);
       // fall through
     case Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER:
       return "cookierestrictions";
     }
   },
 
-  get categoryLabelDefault() {
-    delete this.categoryLabelDefault;
-    return this.categoryLabelDefault =
-      document.getElementById("identity-popup-content-blocking-category-label-default");
-  },
-
-  get categoryLabelTrackers() {
-    delete this.categoryLabelTrackers;
-    return this.categoryLabelTrackers =
-      document.getElementById("identity-popup-content-blocking-category-label-trackers");
-  },
-
   updateCategoryLabel() {
-    let rejectTrackers = this.behaviorPref == Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER;
-    this.categoryLabelDefault.hidden = rejectTrackers;
-    this.categoryLabelTrackers.hidden = !rejectTrackers;
+    let label;
+    switch (this.behaviorPref) {
+    case Ci.nsICookieService.BEHAVIOR_REJECT_FOREIGN:
+      label = "contentBlocking.cookies.3rdPartyBlocked.label";
+      break;
+    case Ci.nsICookieService.BEHAVIOR_REJECT:
+      label = "contentBlocking.cookies.allBlocked.label";
+      break;
+    case Ci.nsICookieService.BEHAVIOR_LIMIT_FOREIGN:
+      label = "contentBlocking.cookies.unvisitedBlocked.label";
+      break;
+    case Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER:
+      label = "contentBlocking.cookies.trackersBlocked.label";
+      break;
+    default:
+      Cu.reportError(`Error: Unknown cookieBehavior pref observed: ${this.behaviorPref}`);
+      // fall through
+    case Ci.nsICookieService.BEHAVIOR_ACCEPT:
+      label = "contentBlocking.cookies.allowed.label";
+      break;
+    }
+    this.categoryLabel.textContent = gNavigatorBundle.getString(label);
   },
 
   init() {
     XPCOMUtils.defineLazyPreferenceGetter(this, "behaviorPref", this.PREF_ENABLED,
       Ci.nsICookieService.BEHAVIOR_ACCEPT, this.updateCategoryLabel.bind(this));
-    XPCOMUtils.defineLazyPreferenceGetter(this, "visible", this.PREF_UI_ENABLED, false);
     XPCOMUtils.defineLazyPreferenceGetter(this, "reportBreakageEnabled",
       this.PREF_REPORT_BREAKAGE_ENABLED, false);
     this.updateCategoryLabel();
   },
+
   get enabled() {
     return this.PREF_ENABLED_VALUES.includes(this.behaviorPref);
   },
 
-  isBlockerActivated(state) {
+  isBlocking(state) {
     return (state & Ci.nsIWebProgressListener.STATE_COOKIES_BLOCKED_TRACKER) != 0 ||
+           (state & Ci.nsIWebProgressListener.STATE_COOKIES_BLOCKED_ALL) != 0 ||
+           (state & Ci.nsIWebProgressListener.STATE_COOKIES_BLOCKED_BY_PERMISSION) != 0 ||
            (state & Ci.nsIWebProgressListener.STATE_COOKIES_BLOCKED_FOREIGN) != 0;
   },
+
+  isDetected(state) {
+    return (state & Ci.nsIWebProgressListener.STATE_COOKIES_LOADED) != 0;
+  },
+
+  async updateSubView() {
+    let contentBlockingLogJSON = await gBrowser.selectedBrowser.getContentBlockingLog();
+    let contentBlockingLog = JSON.parse(contentBlockingLogJSON);
+
+    let categories = this._processContentBlockingLog(contentBlockingLog);
+
+    this.subViewList.textContent = "";
+
+    for (let category of ["firstParty", "trackers", "thirdParty"]) {
+      if (categories[category].length) {
+        let box = document.createXULElement("vbox");
+        let label = document.createXULElement("label");
+        label.className = "identity-popup-cookiesView-list-header";
+        label.textContent = gNavigatorBundle.getString(`contentBlocking.cookiesView.${category}.label`);
+        box.appendChild(label);
+        for (let info of categories[category]) {
+          box.appendChild(this._createListItem(info));
+        }
+        this.subViewList.appendChild(box);
+      }
+    }
+  },
+
+  _hasException(origin) {
+    for (let perm of Services.perms.getAllForPrincipal(gBrowser.contentPrincipal)) {
+      if (perm.type == "3rdPartyStorage^" + origin || perm.type.startsWith("3rdPartyStorage^" + origin + "^")) {
+        return true;
+      }
+    }
+
+    let principal = Services.scriptSecurityManager.createCodebasePrincipalFromOrigin(origin);
+    // Cookie exceptions get "inherited" from parent- to sub-domain, so we need to
+    // make sure to include parent domains in the permission check for "cookies".
+    return Services.perms.testPermissionFromPrincipal(principal, "cookie") != Services.perms.UNKNOWN_ACTION;
+  },
+
+  _clearException(origin) {
+    for (let perm of Services.perms.getAllForPrincipal(gBrowser.contentPrincipal)) {
+      if (perm.type == "3rdPartyStorage^" + origin || perm.type.startsWith("3rdPartyStorage^" + origin + "^")) {
+        Services.perms.removePermission(perm);
+      }
+    }
+
+    // OAs don't matter here, so we can just use the hostname.
+    let host = Services.io.newURI(origin).host;
+
+    // Cookie exceptions get "inherited" from parent- to sub-domain, so we need to
+    // clear any cookie permissions from parent domains as well.
+    for (let perm of Services.perms.enumerator) {
+      if (perm.type == "cookie" &&
+          Services.eTLD.hasRootDomain(host, perm.principal.URI.host)) {
+        Services.perms.removePermission(perm);
+      }
+    }
+  },
+
+  // Transforms and filters cookie entries in the content blocking log
+  // so that we can categorize and display them in the UI.
+  _processContentBlockingLog(log) {
+    let newLog = {
+      firstParty: [],
+      trackers: [],
+      thirdParty: [],
+    };
+
+    let firstPartyDomain = null;
+    try {
+      firstPartyDomain = Services.eTLD.getBaseDomain(gBrowser.currentURI);
+    } catch (e) {
+      // There are nasty edge cases here where someone is trying to set a cookie
+      // on a public suffix or an IP address. Just categorize those as third party...
+      if (e.result != Cr.NS_ERROR_HOST_IS_IP_ADDRESS &&
+          e.result != Cr.NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
+        throw e;
+      }
+    }
+
+    for (let [origin, actions] of Object.entries(log)) {
+      if (!origin.startsWith("http")) {
+        continue;
+      }
+
+      let info = {origin, isAllowed: true, hasException: this._hasException(origin)};
+      let hasCookie = false;
+      let isTracker = false;
+
+      // Extract information from the states entries in the content blocking log.
+      // Each state will contain a single state flag from nsIWebProgressListener.
+      // Note that we are using the same helper functions that are applied to the
+      // bit map passed to onSecurityChange (which contains multiple states), thus
+      // not checking exact equality, just presence of bits.
+      for (let [state, blocked] of actions) {
+        if (this.isDetected(state)) {
+          hasCookie = true;
+        }
+        if (TrackingProtection.isAllowing(state)) {
+          isTracker = true;
+        }
+        // blocked tells us whether the resource was actually blocked
+        // (which it may not be in case of an exception).
+        if (this.isBlocking(state) && blocked) {
+          info.isAllowed = false;
+        }
+      }
+
+      if (!hasCookie) {
+        continue;
+      }
+
+      let isFirstParty = false;
+      try {
+        let uri = Services.io.newURI(origin);
+        isFirstParty = Services.eTLD.getBaseDomain(uri) == firstPartyDomain;
+      } catch (e) {
+        if (e.result != Cr.NS_ERROR_HOST_IS_IP_ADDRESS &&
+            e.result != Cr.NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
+          throw e;
+        }
+      }
+
+      if (isFirstParty) {
+        newLog.firstParty.push(info);
+      } else if (isTracker) {
+        newLog.trackers.push(info);
+      } else {
+        newLog.thirdParty.push(info);
+      }
+    }
+
+    return newLog;
+  },
+
+  _createListItem({origin, isAllowed, hasException}) {
+    let listItem = document.createXULElement("hbox");
+    listItem.className = "identity-popup-content-blocking-list-item";
+    listItem.classList.toggle("allowed", isAllowed);
+    // Repeat the origin in the tooltip in case it's too long
+    // and overflows in our panel.
+    listItem.tooltipText = origin;
+
+    let image = document.createXULElement("image");
+    image.className = "identity-popup-cookiesView-icon";
+    image.classList.toggle("allowed", isAllowed);
+    listItem.append(image);
+
+    let label = document.createXULElement("label");
+    label.value = origin;
+    label.className = "identity-popup-content-blocking-list-host-label";
+    label.setAttribute("crop", "end");
+    listItem.append(label);
+
+    let stateLabel;
+    if (isAllowed && hasException) {
+      stateLabel = document.createXULElement("label");
+      stateLabel.value = this.strings.subViewAllowed;
+      stateLabel.className = "identity-popup-content-blocking-list-state-label";
+      listItem.append(stateLabel);
+    } else if (!isAllowed) {
+      stateLabel = document.createXULElement("label");
+      stateLabel.value = this.strings.subViewBlocked;
+      stateLabel.className = "identity-popup-content-blocking-list-state-label";
+      listItem.append(stateLabel);
+    }
+
+    if (hasException) {
+      let removeException = document.createXULElement("button");
+      removeException.className = "identity-popup-permission-remove-button";
+      removeException.tooltipText = gNavigatorBundle.getFormattedString(
+        "contentBlocking.cookiesView.removeButton.tooltip", [origin]);
+      removeException.addEventListener("click", () => {
+        this._clearException(origin);
+        // Just flip the display based on what state we had previously.
+        stateLabel.value = isAllowed ? this.strings.subViewBlocked : this.strings.subViewAllowed;
+        listItem.classList.toggle("allowed", !isAllowed);
+        image.classList.toggle("allowed", !isAllowed);
+        removeException.hidden = true;
+      });
+      listItem.append(removeException);
+    }
+
+    return listItem;
+  },
 };
 
 
 var ContentBlocking = {
   // If the user ignores the doorhanger, we stop showing it after some time.
   MAX_INTROS: 20,
   PREF_ANIMATIONS_ENABLED: "toolkit.cosmeticAnimations.enabled",
   PREF_REPORT_BREAKAGE_ENABLED: "browser.contentblocking.reportBreakage.enabled",
@@ -392,21 +652,19 @@ var ContentBlocking = {
     // Leave the ? at the end of the URL to signify that this URL had its query stripped.
     let urlWithoutQuery = this.reportURI.asciiSpec.replace(this.reportURI.query, "");
     let body = `Full URL: ${urlWithoutQuery}\n`;
     body += `userAgent: ${navigator.userAgent}\n`;
 
     body += "\n**Preferences**\n";
     body += `${TrackingProtection.PREF_ENABLED_GLOBALLY}: ${Services.prefs.getBoolPref(TrackingProtection.PREF_ENABLED_GLOBALLY)}\n`;
     body += `${TrackingProtection.PREF_ENABLED_IN_PRIVATE_WINDOWS}: ${Services.prefs.getBoolPref(TrackingProtection.PREF_ENABLED_IN_PRIVATE_WINDOWS)}\n`;
-    body += `${TrackingProtection.PREF_UI_ENABLED}: ${Services.prefs.getBoolPref(TrackingProtection.PREF_UI_ENABLED)}\n`;
     body += `urlclassifier.trackingTable: ${Services.prefs.getStringPref("urlclassifier.trackingTable")}\n`;
     body += `network.http.referer.defaultPolicy: ${Services.prefs.getIntPref("network.http.referer.defaultPolicy")}\n`;
     body += `network.http.referer.defaultPolicy.pbmode: ${Services.prefs.getIntPref("network.http.referer.defaultPolicy.pbmode")}\n`;
-    body += `${ThirdPartyCookies.PREF_UI_ENABLED}: ${Services.prefs.getBoolPref(ThirdPartyCookies.PREF_UI_ENABLED)}\n`;
     body += `${ThirdPartyCookies.PREF_ENABLED}: ${Services.prefs.getIntPref(ThirdPartyCookies.PREF_ENABLED)}\n`;
     body += `network.cookie.lifetimePolicy: ${Services.prefs.getIntPref("network.cookie.lifetimePolicy")}\n`;
     body += `privacy.restrict3rdpartystorage.expiration: ${Services.prefs.getIntPref("privacy.restrict3rdpartystorage.expiration")}\n`;
 
     let comments = document.getElementById("identity-popup-breakageReportView-collection-comments");
     body += "\n**Comments**\n" + comments.value;
 
     formData.set("body", body);
@@ -442,16 +700,21 @@ var ContentBlocking = {
     this.identityPopupMultiView.showSubView("identity-popup-breakageReportView");
   },
 
   async showTrackersSubview() {
     await TrackingProtection.updateSubView();
     this.identityPopupMultiView.showSubView("identity-popup-trackersView");
   },
 
+  async showCookiesSubview() {
+    await ThirdPartyCookies.updateSubView();
+    this.identityPopupMultiView.showSubView("identity-popup-cookiesView");
+  },
+
   shieldHistogramAdd(value) {
     if (PrivateBrowsingUtils.isWindowPrivate(window)) {
       return;
     }
     Services.telemetry.getHistogramById("TRACKING_PROTECTION_SHIELD").add(value);
   },
 
   onSecurityChange(oldState, state, webProgress, isSimulated,
@@ -468,81 +731,80 @@ var ContentBlocking = {
 
     // The user might have navigated before the shield animation
     // finished. In this case, reset the animation to be able to
     // play it in full again and avoid choppiness.
     if (webProgress.isTopLevel) {
       this.iconBox.removeAttribute("animate");
     }
 
-    let anyBlockerActivated = false;
+    let anyDetected = false;
+    let anyBlocking = false;
 
     for (let blocker of this.blockers) {
       // Store data on whether the blocker is activated in the current document for
       // reporting it using the "report breakage" dialog. Under normal circumstances this
       // dialog should only be able to open in the currently selected tab and onSecurityChange
       // runs on tab switch, so we can avoid associating the data with the document directly.
-      blocker.activated = blocker.isBlockerActivated(state);
+      blocker.activated = blocker.isBlocking(state);
       blocker.categoryItem.classList.toggle("blocked", blocker.enabled);
-      blocker.categoryItem.hidden = !blocker.visible;
-      anyBlockerActivated = anyBlockerActivated || blocker.activated;
+      let detected = blocker.isDetected(state);
+      blocker.categoryItem.hidden = !detected;
+      anyDetected = anyDetected || detected;
+      anyBlocking = anyBlocking || blocker.activated;
     }
 
-    // We consider the shield state "active" when some kind of blocking activity
-    // occurs on the page.  Note that merely allowing the loading of content that
-    // we could have blocked does not trigger the appearance of the shield.
-    // This state will be overriden later if there's an exception set for this site.
-    let active = anyBlockerActivated;
-    let isAllowing = state & Ci.nsIWebProgressListener.STATE_LOADED_TRACKING_CONTENT;
-    let detected = anyBlockerActivated || isAllowing;
-
     let isBrowserPrivate = PrivateBrowsingUtils.isBrowserPrivate(gBrowser.selectedBrowser);
 
     // Check whether the user has added an exception for this site.
     let type =  isBrowserPrivate ? "trackingprotection-pb" : "trackingprotection";
     let hasException = Services.perms.testExactPermission(baseURI, type) ==
       Services.perms.ALLOW_ACTION;
 
-    this.content.toggleAttribute("detected", detected);
+    // We consider the shield state "active" when some kind of blocking activity
+    // occurs on the page.  Note that merely allowing the loading of content that
+    // we could have blocked does not trigger the appearance of the shield.
+    // This state will be overriden later if there's an exception set for this site.
+    this.content.toggleAttribute("detected", anyDetected);
+    this.content.toggleAttribute("blocking", anyBlocking);
     this.content.toggleAttribute("hasException", hasException);
-    this.content.toggleAttribute("active", active);
 
-    this.iconBox.toggleAttribute("active", active);
+    this.iconBox.toggleAttribute("active", anyBlocking);
     this.iconBox.toggleAttribute("hasException", hasException);
 
     // For release (due to the large volume) we only want to receive reports
     // for breakage that is directly related to third party cookie blocking.
     if (this.reportBreakageEnabled ||
         (ThirdPartyCookies.reportBreakageEnabled &&
          ThirdPartyCookies.activated &&
          !TrackingProtection.activated)) {
       this.reportBreakageButton.removeAttribute("hidden");
     } else {
       this.reportBreakageButton.setAttribute("hidden", "true");
     }
 
     if (isSimulated) {
       this.iconBox.removeAttribute("animate");
-    } else if (active && webProgress.isTopLevel) {
+    } else if (anyBlocking && webProgress.isTopLevel) {
       this.iconBox.setAttribute("animate", "true");
 
       if (!isBrowserPrivate) {
         let introCount = Services.prefs.getIntPref(this.prefIntroCount);
         if (introCount < this.MAX_INTROS) {
           Services.prefs.setIntPref(this.prefIntroCount, ++introCount);
           Services.prefs.savePrefFile(null);
           this.showIntroPanel();
         }
       }
     }
 
     if (hasException) {
       this.iconBox.setAttribute("tooltiptext", this.disabledTooltipText);
       this.shieldHistogramAdd(1);
-    } else if (active) {
+    } else if (anyBlocking) {
       this.iconBox.setAttribute("tooltiptext", this.activeTooltipText);
       this.shieldHistogramAdd(2);
     } else {
       this.iconBox.removeAttribute("tooltiptext");
       this.shieldHistogramAdd(0);
     }
   },
 
--- a/browser/base/content/test/trackingUI/benignPage.html
+++ b/browser/base/content/test/trackingUI/benignPage.html
@@ -2,11 +2,17 @@
 <!-- This Source Code Form is subject to the terms of the Mozilla Public
    - License, v. 2.0. If a copy of the MPL was not distributed with this
    - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
 <html dir="ltr" xml:lang="en-US" lang="en-US">
   <head>
     <meta charset="utf8">
   </head>
   <body>
-    <iframe src="http://not-tracking.example.com/"></iframe>
+    <!--TODO: We used to have an iframe here, to double-check that benign-->
+    <!--iframes may be included in pages. However, the cookie restrictions-->
+    <!--project introduced a change that declared blockable content to be-->
+    <!--found on any page that embeds iframes, rendering this unusable for-->
+    <!--our purposes. That's not ideal and we intend to restore this iframe.-->
+    <!--(See bug 1511303 for a more detailed technical explanation.)-->
+    <!--<iframe src="http://not-tracking.example.com/"></iframe>-->
   </body>
 </html>
--- a/browser/base/content/test/trackingUI/browser.ini
+++ b/browser/base/content/test/trackingUI/browser.ini
@@ -8,16 +8,17 @@ support-files =
   trackingAPI.js
   trackingPage.html
 
 [browser_trackingUI_3.js]
 [browser_trackingUI_animation.js]
 [browser_trackingUI_animation_2.js]
 [browser_trackingUI_appMenu.js]
 [browser_trackingUI_categories.js]
+[browser_trackingUI_cookies_subview.js]
 [browser_trackingUI_fetch.js]
 support-files =
   file_trackingUI_fetch.html
   file_trackingUI_fetch.js
   file_trackingUI_fetch.js^headers^
 [browser_trackingUI_open_preferences.js]
 [browser_trackingUI_pbmode_exceptions.js]
 [browser_trackingUI_report_breakage.js]
--- a/browser/base/content/test/trackingUI/browser_trackingUI_categories.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_categories.js
@@ -11,19 +11,17 @@ registerCleanupFunction(function() {
   Services.prefs.clearUserPref(TP_PB_PREF);
   Services.prefs.clearUserPref(TPC_PREF);
   Services.prefs.clearUserPref(TT_PREF);
   Services.prefs.clearUserPref(CAT_PREF);
 });
 
 add_task(async function testCategoryLabelsInControlPanel() {
   await BrowserTestUtils.withNewTab("http://www.example.com", async function() {
-    let promisePanelOpen = BrowserTestUtils.waitForEvent(gIdentityHandler._identityPopup, "popupshown");
-    gIdentityHandler._identityBox.click();
-    await promisePanelOpen;
+    await openIdentityPopup();
 
     let preferencesButton = document.getElementById("tracking-protection-preferences-button");
     ok(preferencesButton.label, "The preferencesButton label exists");
 
     Services.prefs.setStringPref(CAT_PREF, "strict");
     await TestUtils.waitForCondition(() => preferencesButton.label ==
       gNavigatorBundle.getString("contentBlocking.category.strict"));
     is(preferencesButton.label, gNavigatorBundle.getString("contentBlocking.category.strict"),
@@ -65,8 +63,60 @@ add_task(async function testCategoryLabe
 
     Services.prefs.setStringPref(CAT_PREF, "custom");
     await TestUtils.waitForCondition(() => appMenuCategoryLabel.value ==
       gNavigatorBundle.getString("contentBlocking.category.custom"));
     is(appMenuCategoryLabel.value, gNavigatorBundle.getString("contentBlocking.category.custom"),
       "The appMenuCategory label has been changed to custom");
   });
 });
+
+add_task(async function testSubcategoryLabels() {
+  await BrowserTestUtils.withNewTab("http://www.example.com", async function() {
+    let categoryLabel =
+      document.getElementById("identity-popup-content-blocking-tracking-protection-state-label");
+
+    Services.prefs.setBoolPref(TP_PREF, true);
+    await TestUtils.waitForCondition(() => categoryLabel.textContent ==
+      gNavigatorBundle.getString("contentBlocking.trackers.blocked.label"),
+      "The category label has updated correctly");
+    is(categoryLabel.textContent, gNavigatorBundle.getString("contentBlocking.trackers.blocked.label"));
+
+    Services.prefs.setBoolPref(TP_PREF, false);
+    await TestUtils.waitForCondition(() => categoryLabel.textContent ==
+      gNavigatorBundle.getString("contentBlocking.trackers.allowed.label"),
+      "The category label has updated correctly");
+    is(categoryLabel.textContent, gNavigatorBundle.getString("contentBlocking.trackers.allowed.label"));
+
+    categoryLabel =
+      document.getElementById("identity-popup-content-blocking-cookies-state-label");
+
+    Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_ACCEPT);
+    await TestUtils.waitForCondition(() => categoryLabel.textContent ==
+      gNavigatorBundle.getString("contentBlocking.cookies.allowed.label"),
+      "The category label has updated correctly");
+    is(categoryLabel.textContent, gNavigatorBundle.getString("contentBlocking.cookies.allowed.label"));
+
+    Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_REJECT);
+    await TestUtils.waitForCondition(() => categoryLabel.textContent ==
+      gNavigatorBundle.getString("contentBlocking.cookies.allBlocked.label"),
+      "The category label has updated correctly");
+    is(categoryLabel.textContent, gNavigatorBundle.getString("contentBlocking.cookies.allBlocked.label"));
+
+    Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_REJECT_FOREIGN);
+    await TestUtils.waitForCondition(() => categoryLabel.textContent ==
+      gNavigatorBundle.getString("contentBlocking.cookies.3rdPartyBlocked.label"),
+      "The category label has updated correctly");
+    is(categoryLabel.textContent, gNavigatorBundle.getString("contentBlocking.cookies.3rdPartyBlocked.label"));
+
+    Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER);
+    await TestUtils.waitForCondition(() => categoryLabel.textContent ==
+      gNavigatorBundle.getString("contentBlocking.cookies.trackersBlocked.label"),
+      "The category label has updated correctly");
+    is(categoryLabel.textContent, gNavigatorBundle.getString("contentBlocking.cookies.trackersBlocked.label"));
+
+    Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_LIMIT_FOREIGN);
+    await TestUtils.waitForCondition(() => categoryLabel.textContent ==
+      gNavigatorBundle.getString("contentBlocking.cookies.unvisitedBlocked.label"),
+      "The category label has updated correctly");
+    is(categoryLabel.textContent, gNavigatorBundle.getString("contentBlocking.cookies.unvisitedBlocked.label"));
+  });
+});
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_cookies_subview.js
@@ -0,0 +1,190 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+/* eslint-disable mozilla/no-arbitrary-setTimeout */
+
+"use strict";
+
+const COOKIE_PAGE = "http://not-tracking.example.com/browser/browser/base/content/test/trackingUI/cookiePage.html";
+
+const TPC_PREF = "network.cookie.cookieBehavior";
+
+add_task(async function setup() {
+  // Avoid the content blocking tour interfering with our tests by popping up.
+  await SpecialPowers.pushPrefEnv({set: [[ContentBlocking.prefIntroCount, ContentBlocking.MAX_INTROS]]});
+  await UrlClassifierTestUtils.addTestTrackers();
+
+  registerCleanupFunction(() => {
+    UrlClassifierTestUtils.cleanupTestTrackers();
+  });
+});
+
+async function assertSitesListed(trackersBlocked, thirdPartyBlocked, firstPartyBlocked) {
+  await BrowserTestUtils.withNewTab(COOKIE_PAGE, async function(browser) {
+    await openIdentityPopup();
+
+    let categoryItem =
+      document.getElementById("identity-popup-content-blocking-category-cookies");
+    ok(BrowserTestUtils.is_visible(categoryItem), "TP category item is visible");
+    let cookiesView = document.getElementById("identity-popup-cookiesView");
+    let viewShown = BrowserTestUtils.waitForEvent(cookiesView, "ViewShown");
+    categoryItem.click();
+    await viewShown;
+
+    ok(true, "Cookies view was shown");
+
+    let listHeaders = cookiesView.querySelectorAll(".identity-popup-cookiesView-list-header");
+    is(listHeaders.length, 1, "We have 1 list header");
+    is(listHeaders[0].textContent,
+       gNavigatorBundle.getString(`contentBlocking.cookiesView.trackers.label`),
+       "The list header is for tracking cookies.");
+
+    let listItems = cookiesView.querySelectorAll(".identity-popup-content-blocking-list-item");
+    is(listItems.length, 1, "We have 1 cookie in the list");
+
+    let listItem = listItems[0];
+    let label = listItem.querySelector(".identity-popup-content-blocking-list-host-label");
+    is(label.value, "http://trackertest.org", "Has an item for trackertest.org");
+    ok(BrowserTestUtils.is_visible(listItem), "List item is visible");
+    is(listItem.classList.contains("allowed"), !trackersBlocked,
+      "Indicates whether the cookie was blocked or allowed");
+
+    let mainView = document.getElementById("identity-popup-mainView");
+    viewShown = BrowserTestUtils.waitForEvent(mainView, "ViewShown");
+    let backButton = cookiesView.querySelector(".subviewbutton-back");
+    backButton.click();
+    await viewShown;
+
+    ok(true, "Main view was shown");
+
+    let change = waitForSecurityChange();
+    let timeoutPromise = new Promise(resolve => setTimeout(resolve, 1000));
+
+    await ContentTask.spawn(browser, {}, function() {
+      content.postMessage("third-party-cookie", "*");
+    });
+
+    let result = await Promise.race([change, timeoutPromise]);
+    is(result, undefined, "No securityChange events should be received");
+
+    viewShown = BrowserTestUtils.waitForEvent(cookiesView, "ViewShown");
+    categoryItem.click();
+    await viewShown;
+
+    ok(true, "Cookies view was shown");
+
+    listHeaders = cookiesView.querySelectorAll(".identity-popup-cookiesView-list-header");
+    is(listHeaders.length, 2, "We now have 2 list headers");
+    is(listHeaders[1].textContent,
+       gNavigatorBundle.getString(`contentBlocking.cookiesView.thirdParty.label`),
+       "The new list header is for third party cookies.");
+
+    listItems = cookiesView.querySelectorAll(".identity-popup-content-blocking-list-item");
+    is(listItems.length, 2, "We have 2 cookies in the list");
+
+    listItem = listItems[1];
+    label = listItem.querySelector(".identity-popup-content-blocking-list-host-label");
+    is(label.value, "https://test1.example.org", "Has an item for test1.example.org");
+    ok(BrowserTestUtils.is_visible(listItem), "List item is visible");
+    is(listItem.classList.contains("allowed"), !thirdPartyBlocked,
+      "Indicates whether the cookie was blocked or allowed");
+
+    viewShown = BrowserTestUtils.waitForEvent(mainView, "ViewShown");
+    backButton.click();
+    await viewShown;
+
+    ok(true, "Main view was shown");
+
+    change = waitForSecurityChange();
+    timeoutPromise = new Promise(resolve => setTimeout(resolve, 1000));
+
+    await ContentTask.spawn(browser, {}, function() {
+      content.postMessage("first-party-cookie", "*");
+    });
+
+    result = await Promise.race([change, timeoutPromise]);
+    is(result, undefined, "No securityChange events should be received");
+
+    viewShown = BrowserTestUtils.waitForEvent(cookiesView, "ViewShown");
+    categoryItem.click();
+    await viewShown;
+
+    ok(true, "Cookies view was shown");
+
+    listHeaders = cookiesView.querySelectorAll(".identity-popup-cookiesView-list-header");
+    is(listHeaders.length, 3, "We now have 3 list headers");
+    is(listHeaders[0].textContent,
+       gNavigatorBundle.getString(`contentBlocking.cookiesView.firstParty.label`),
+       "The new list header is for first party cookies.");
+
+    listItems = cookiesView.querySelectorAll(".identity-popup-content-blocking-list-item");
+    is(listItems.length, 3, "We have 2 cookies in the list");
+
+    listItem = listItems[0];
+    label = listItem.querySelector(".identity-popup-content-blocking-list-host-label");
+    is(label.value, "http://not-tracking.example.com", "Has an item for the first party");
+    ok(BrowserTestUtils.is_visible(listItem), "List item is visible");
+    is(listItem.classList.contains("allowed"), !firstPartyBlocked,
+      "Indicates whether the cookie was blocked or allowed");
+  });
+}
+
+add_task(async function testCookiesSubView() {
+  info("Testing cookies subview with reject tracking cookies.");
+  Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER);
+  await assertSitesListed(true, false, false);
+  info("Testing cookies subview with reject third party cookies.");
+  Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_REJECT_FOREIGN);
+  await assertSitesListed(true, true, false);
+  info("Testing cookies subview with reject all cookies.");
+  Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_REJECT);
+  await assertSitesListed(true, true, true);
+  info("Testing cookies subview with accept all cookies.");
+  Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_ACCEPT);
+  await assertSitesListed(false, false, false);
+
+  Services.prefs.clearUserPref(TPC_PREF);
+});
+
+add_task(async function testCookiesSubViewAllowed() {
+  Services.prefs.setIntPref(TPC_PREF, Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER);
+  let principal = Services.scriptSecurityManager.createCodebasePrincipalFromOrigin("http://trackertest.org/");
+  Services.perms.addFromPrincipal(principal, "cookie", Services.perms.ALLOW_ACTION);
+
+  await BrowserTestUtils.withNewTab(COOKIE_PAGE, async function(browser) {
+    await openIdentityPopup();
+
+    let categoryItem =
+      document.getElementById("identity-popup-content-blocking-category-cookies");
+    ok(BrowserTestUtils.is_visible(categoryItem), "TP category item is visible");
+    let cookiesView = document.getElementById("identity-popup-cookiesView");
+    let viewShown = BrowserTestUtils.waitForEvent(cookiesView, "ViewShown");
+    categoryItem.click();
+    await viewShown;
+
+    ok(true, "Cookies view was shown");
+
+    let listHeaders = cookiesView.querySelectorAll(".identity-popup-cookiesView-list-header");
+    is(listHeaders.length, 1, "We have 1 list header");
+    is(listHeaders[0].textContent,
+       gNavigatorBundle.getString(`contentBlocking.cookiesView.trackers.label`),
+       "The list header is for tracking cookies.");
+
+    let listItems = cookiesView.querySelectorAll(".identity-popup-content-blocking-list-item");
+    is(listItems.length, 1, "We have 1 cookie in the list");
+
+    let listItem = listItems[0];
+    let label = listItem.querySelector(".identity-popup-content-blocking-list-host-label");
+    is(label.value, "http://trackertest.org", "Has an item for trackertest.org");
+    ok(BrowserTestUtils.is_visible(listItem), "List item is visible");
+    ok(listItem.classList.contains("allowed"), "Indicates whether the cookie was blocked or allowed");
+
+    let button = listItem.querySelector(".identity-popup-permission-remove-button");
+    ok(BrowserTestUtils.is_visible(button), "Permission remove button is visible");
+    button.click();
+    is(Services.perms.testExactPermissionFromPrincipal(principal, "cookie"), Services.perms.UNKNOWN_ACTION, "Button click should remove cookie pref.");
+    ok(!listItem.classList.contains("allowed"), "Has removed the allowed class");
+  });
+
+  Services.prefs.clearUserPref(TPC_PREF);
+});
--- a/browser/base/content/test/trackingUI/browser_trackingUI_fetch.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_fetch.js
@@ -1,27 +1,10 @@
 const URL = "http://mochi.test:8888/browser/browser/base/content/test/trackingUI/file_trackingUI_fetch.html";
 
-function waitForSecurityChange(numChanges = 1) {
-  return new Promise(resolve => {
-    let n = 0;
-    let listener = {
-      onSecurityChange() {
-        n = n + 1;
-        info("Received onSecurityChange event " + n + " of " + numChanges);
-        if (n >= numChanges) {
-          gBrowser.removeProgressListener(listener);
-          resolve();
-        }
-      },
-    };
-    gBrowser.addProgressListener(listener);
-  });
-}
-
 add_task(async function test_fetch() {
   await SpecialPowers.pushPrefEnv({ set: [
     ["privacy.trackingprotection.enabled", true],
   ]});
 
   await BrowserTestUtils.withNewTab({ gBrowser, url: URL }, async function(newTabBrowser) {
     let securityChange = waitForSecurityChange();
     await ContentTask.spawn(newTabBrowser, null, async function() {
--- a/browser/base/content/test/trackingUI/browser_trackingUI_open_preferences.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_open_preferences.js
@@ -1,17 +1,15 @@
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
 "use strict";
 
 const TP_PREF = "privacy.trackingprotection.enabled";
 const TPC_PREF = "network.cookie.cookieBehavior";
-const TP_UI_PREF = "browser.contentblocking.trackingprotection.control-center.ui.enabled";
-const RT_UI_PREF = "browser.contentblocking.rejecttrackers.control-center.ui.enabled";
 const TRACKING_PAGE = "http://tracking.example.org/browser/browser/base/content/test/trackingUI/trackingPage.html";
 
 async function waitAndAssertPreferencesShown() {
   await BrowserTestUtils.waitForEvent(gIdentityHandler._identityPopup, "popuphidden");
   await TestUtils.waitForCondition(() => gBrowser.currentURI.spec == "about:preferences#privacy",
     "Should open about:preferences.");
 
   await ContentTask.spawn(gBrowser.selectedBrowser, {}, async function() {
@@ -61,18 +59,16 @@ add_task(async function testOpenPreferen
 });
 
 // Tests that clicking the contentblocking category items "add blocking" labels
 // links to about:preferences
 add_task(async function testOpenPreferencesFromAddBlockingButtons() {
   SpecialPowers.pushPrefEnv({set: [
     [TP_PREF, false],
     [TPC_PREF, Ci.nsICookieService.BEHAVIOR_ACCEPT],
-    [TP_UI_PREF, true],
-    [RT_UI_PREF, true],
   ]});
 
   await BrowserTestUtils.withNewTab(TRACKING_PAGE, async function() {
     let addBlockingButtons = document.querySelectorAll(".identity-popup-content-blocking-category-add-blocking");
     for (let button of addBlockingButtons) {
       let promisePanelOpen = BrowserTestUtils.waitForEvent(gIdentityHandler._identityPopup, "popupshown");
       gIdentityHandler._identityBox.click();
       await promisePanelOpen;
--- a/browser/base/content/test/trackingUI/browser_trackingUI_pbmode_exceptions.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_pbmode_exceptions.js
@@ -52,22 +52,16 @@ function testTrackingPage(window) {
     ok(!hidden("#tracking-action-unblock-private"), "unblockButtonPrivate is visible");
   } else {
     ok(!hidden("#tracking-action-unblock"), "unblockButton is visible");
     ok(hidden("#tracking-action-unblock-private"), "unblockButtonPrivate is hidden");
   }
 
   ok(hidden("#identity-popup-content-blocking-not-detected"), "blocking not detected label is hidden");
   ok(!hidden("#identity-popup-content-blocking-detected"), "blocking detected label is visible");
-
-  ok(!hidden("#identity-popup-content-blocking-category-list"), "category list is visible");
-  ok(hidden("#identity-popup-content-blocking-category-tracking-protection > #identity-popup-content-blocking-tracking-protection-label-allowed"),
-    "TP category item is not showing the allowed label");
-  ok(!hidden("#identity-popup-content-blocking-category-tracking-protection > #identity-popup-content-blocking-tracking-protection-label-blocked"),
-    "TP category item is set to blocked");
 }
 
 function testTrackingPageUnblocked() {
   info("Tracking content must be white-listed and not blocked");
   ok(ContentBlocking.content.hasAttribute("detected"), "trackers are detected");
   ok(ContentBlocking.content.hasAttribute("hasException"), "content shows exception");
 
   ok(!ContentBlocking.iconBox.hasAttribute("active"), "shield is active");
@@ -76,22 +70,16 @@ function testTrackingPageUnblocked() {
      gNavigatorBundle.getString("trackingProtection.icon.disabledTooltip"), "correct tooltip");
 
   ok(BrowserTestUtils.is_visible(ContentBlocking.iconBox), "icon box is visible");
   ok(!hidden("#tracking-action-block"), "blockButton is visible");
   ok(hidden("#tracking-action-unblock"), "unblockButton is hidden");
 
   ok(hidden("#identity-popup-content-blocking-not-detected"), "blocking not detected label is hidden");
   ok(!hidden("#identity-popup-content-blocking-detected"), "blocking detected label is visible");
-
-  ok(!hidden("#identity-popup-content-blocking-category-list"), "category list is visible");
-  ok(!hidden("#identity-popup-content-blocking-category-tracking-protection > #identity-popup-content-blocking-tracking-protection-label-allowed"),
-    "TP category item is showing the allowed label");
-  ok(hidden("#identity-popup-content-blocking-category-tracking-protection > #identity-popup-content-blocking-tracking-protection-label-blocked"),
-    "TP category item is not set to blocked");
 }
 
 add_task(async function testExceptionAddition() {
   await UrlClassifierTestUtils.addTestTrackers();
   let privateWin = await BrowserTestUtils.openNewBrowserWindow({private: true});
   browser = privateWin.gBrowser;
   let tab = await BrowserTestUtils.openNewForegroundTab({ gBrowser: browser, waitForLoad: true, waitForStateStop: true });
 
--- a/browser/base/content/test/trackingUI/browser_trackingUI_report_breakage.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_report_breakage.js
@@ -21,23 +21,16 @@ add_task(async function setup() {
   let oldCanRecord = Services.telemetry.canRecordExtended;
   Services.telemetry.canRecordExtended = true;
 
   registerCleanupFunction(() => {
     Services.telemetry.canRecordExtended = oldCanRecord;
   });
 });
 
-function openIdentityPopup() {
-  let mainView = document.getElementById("identity-popup-mainView");
-  let viewShown = BrowserTestUtils.waitForEvent(mainView, "ViewShown");
-  gIdentityHandler._identityBox.click();
-  return viewShown;
-}
-
 add_task(async function testReportBreakageVisibility() {
   let scenarios = [
     {
       url: TRACKING_PAGE,
       prefs: {
         "privacy.trackingprotection.enabled": true,
         "browser.contentblocking.reportBreakage.enabled": true,
       },
@@ -181,21 +174,19 @@ add_task(async function testReportBreaka
         let body = CommonUtils.readBytesFromInputStream(request.bodyInputStream);
         let boundary = request.getHeader("Content-Type").match(/boundary=-+([^-]*)/i)[1];
         let regex = new RegExp("-+" + boundary + "-*\\s+");
         let sections = body.split(regex);
 
         let prefs = [
           "privacy.trackingprotection.enabled",
           "privacy.trackingprotection.pbmode.enabled",
-          "browser.contentblocking.trackingprotection.control-center.ui.enabled",
           "urlclassifier.trackingTable",
           "network.http.referer.defaultPolicy",
           "network.http.referer.defaultPolicy.pbmode",
-          "browser.contentblocking.rejecttrackers.control-center.ui.enabled",
           "network.cookie.cookieBehavior",
           "network.cookie.lifetimePolicy",
           "privacy.restrict3rdpartystorage.expiration",
         ];
         let prefsBody = "";
 
         for (let pref of prefs) {
           prefsBody += `${pref}: ${Preferences.get(pref)}\r\n`;
--- a/browser/base/content/test/trackingUI/browser_trackingUI_state.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_state.js
@@ -51,49 +51,53 @@ function clickButton(sel) {
   let el = win.document.querySelector(sel);
   el.doCommand();
 }
 
 function testBenignPage() {
   info("Non-tracking content must not be blocked");
   ok(!ContentBlocking.content.hasAttribute("detected"), "no trackers are detected");
   ok(!ContentBlocking.content.hasAttribute("hasException"), "content shows no exception");
-  ok(!ContentBlocking.content.hasAttribute("active"), "content is not active");
 
   ok(!ContentBlocking.iconBox.hasAttribute("active"), "shield is not active");
   ok(!ContentBlocking.iconBox.hasAttribute("hasException"), "icon box shows no exception");
   ok(!ContentBlocking.iconBox.hasAttribute("tooltiptext"), "icon box has no tooltip");
 
   ok(BrowserTestUtils.is_hidden(ContentBlocking.iconBox), "icon box is hidden");
   ok(hidden("#tracking-action-block"), "blockButton is hidden");
   ok(hidden("#tracking-action-unblock"), "unblockButton is hidden");
 
   ok(!hidden("#identity-popup-content-blocking-not-detected"), "blocking not detected label is visible");
   ok(hidden("#identity-popup-content-blocking-detected"), "blocking detected label is hidden");
-  ok(hidden("#identity-popup-content-blocking-category-list"), "category list is hidden");
+  ok(hidden("#identity-popup-content-blocking-category-cookies"),
+    "Not showing cookie restrictions category");
+  ok(hidden("#identity-popup-content-blocking-category-tracking-protection"),
+    "Not showing trackers category");
 }
 
 function testBenignPageWithException() {
   info("Non-tracking content must not be blocked");
   ok(!ContentBlocking.content.hasAttribute("detected"), "no trackers are detected");
   ok(ContentBlocking.content.hasAttribute("hasException"), "content shows exception");
-  ok(!ContentBlocking.content.hasAttribute("active"), "content is not active");
 
   ok(!ContentBlocking.iconBox.hasAttribute("active"), "shield is not active");
   ok(ContentBlocking.iconBox.hasAttribute("hasException"), "shield shows exception");
   is(ContentBlocking.iconBox.getAttribute("tooltiptext"),
      gNavigatorBundle.getString("trackingProtection.icon.disabledTooltip"), "correct tooltip");
 
   ok(!BrowserTestUtils.is_hidden(ContentBlocking.iconBox), "icon box is not hidden");
   ok(!hidden("#tracking-action-block"), "blockButton is visible");
   ok(hidden("#tracking-action-unblock"), "unblockButton is hidden");
 
   ok(!hidden("#identity-popup-content-blocking-not-detected"), "blocking not detected label is visible");
   ok(hidden("#identity-popup-content-blocking-detected"), "blocking detected label is hidden");
-  ok(hidden("#identity-popup-content-blocking-category-list"), "category list is hidden");
+  ok(hidden("#identity-popup-content-blocking-category-cookies"),
+    "Not showing cookie restrictions category");
+  ok(hidden("#identity-popup-content-blocking-category-tracking-protection"),
+    "Not showing trackers category");
 }
 
 function areTrackersBlocked(isPrivateBrowsing) {
   let blockedByTP = Services.prefs.getBoolPref(isPrivateBrowsing ? TP_PB_PREF : TP_PREF);
   let blockedByTPC = Services.prefs.getIntPref(TPC_PREF) == Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER;
   return blockedByTP || blockedByTPC;
 }
 
@@ -101,18 +105,16 @@ function testTrackingPage(window) {
   info("Tracking content must be blocked");
   ok(ContentBlocking.content.hasAttribute("detected"), "trackers are detected");
   ok(!ContentBlocking.content.hasAttribute("hasException"), "content shows no exception");
 
   let isPrivateBrowsing = PrivateBrowsingUtils.isWindowPrivate(window);
   let blockedByTP = areTrackersBlocked(isPrivateBrowsing);
   is(BrowserTestUtils.is_visible(ContentBlocking.iconBox), blockedByTP,
      "icon box is" + (blockedByTP ? "" : " not") + " visible");
-  is(ContentBlocking.content.hasAttribute("active"), blockedByTP,
-      "content is" + (blockedByTP ? "" : " not") + " active");
   is(ContentBlocking.iconBox.hasAttribute("active"), blockedByTP,
       "shield is" + (blockedByTP ? "" : " not") + " active");
   ok(!ContentBlocking.iconBox.hasAttribute("hasException"), "icon box shows no exception");
   is(ContentBlocking.iconBox.getAttribute("tooltiptext"),
      blockedByTP ? gNavigatorBundle.getString("trackingProtection.icon.activeTooltip") : "",
      "correct tooltip");
 
   ok(hidden("#tracking-action-block"), "blockButton is hidden");
@@ -126,79 +128,43 @@ function testTrackingPage(window) {
     ok(hidden("#tracking-action-unblock-private"), "unblockButtonPrivate is hidden");
     is(!hidden("#tracking-action-unblock"), blockedByTP,
        "unblockButton is" + (blockedByTP ? "" : " not") + " visible");
   }
 
   ok(hidden("#identity-popup-content-blocking-not-detected"), "blocking not detected label is hidden");
   ok(!hidden("#identity-popup-content-blocking-detected"), "blocking detected label is visible");
 
-  ok(!hidden("#identity-popup-content-blocking-category-list"), "category list is visible");
-
-  let cookiesBlocked = Services.prefs.getIntPref(TPC_PREF) == Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER;
-  if (cookiesBlocked) {
-    let category = "#identity-popup-content-blocking-category-3rdpartycookies";
-    is(hidden(category + " > .identity-popup-content-blocking-category-add-blocking"), blockedByTP,
-      "Category item is" + (blockedByTP ? " not" : "") + " showing add blocking");
-    is(hidden(category + " > .identity-popup-content-blocking-category-state-label"), !blockedByTP,
-      "Category item is" + (blockedByTP ? "" : " not") + " set to blocked");
-
-    ok(hidden("#identity-popup-content-blocking-category-label-default"),
-      "Not showing default cookie restrictions label.");
-    ok(!hidden("#identity-popup-content-blocking-category-label-trackers"),
-      "Showing trackers cookie restrictions label.");
-  } else {
-    let category = "#identity-popup-content-blocking-category-tracking-protection";
-    is(hidden(category + " > #identity-popup-content-blocking-tracking-protection-label-allowed"), blockedByTP,
-      "Category item is" + (blockedByTP ? " not" : "") + " showing the allowed label");
-    is(!hidden(category + " > #identity-popup-content-blocking-tracking-protection-label-blocked"), blockedByTP,
-      "Category item is" + (blockedByTP ? "" : " not") + " set to blocked");
-
-    ok(hidden("#identity-popup-content-blocking-category-label-trackers"),
-      "Not showing trackers cookie restrictions label.");
-    ok(!hidden("#identity-popup-content-blocking-category-label-default"),
-      "Showing default cookie restrictions label.");
-  }
+  ok(!hidden("#identity-popup-content-blocking-category-tracking-protection"),
+    "Showing trackers category");
+  ok(!hidden("#identity-popup-content-blocking-category-cookies"),
+    "Showing cookie restrictions category");
 }
 
 function testTrackingPageUnblocked(blockedByTP, window) {
   info("Tracking content must be white-listed and not blocked");
   ok(ContentBlocking.content.hasAttribute("detected"), "trackers are detected");
   ok(ContentBlocking.content.hasAttribute("hasException"), "content shows exception");
 
-  ok(!ContentBlocking.content.hasAttribute("active"), "content is not active");
   ok(!ContentBlocking.iconBox.hasAttribute("active"), "shield is not active");
   ok(ContentBlocking.iconBox.hasAttribute("hasException"), "shield shows exception");
   is(ContentBlocking.iconBox.getAttribute("tooltiptext"),
      gNavigatorBundle.getString("trackingProtection.icon.disabledTooltip"), "correct tooltip");
 
   ok(BrowserTestUtils.is_visible(ContentBlocking.iconBox), "icon box is visible");
   ok(!hidden("#tracking-action-block"), "blockButton is visible");
   ok(hidden("#tracking-action-unblock"), "unblockButton is hidden");
 
   ok(hidden("#identity-popup-content-blocking-not-detected"), "blocking not detected label is hidden");
   ok(!hidden("#identity-popup-content-blocking-detected"), "blocking detected label is visible");
 
-  ok(!hidden("#identity-popup-content-blocking-category-list"), "category list is visible");
-
-  let cookiesBlocked = Services.prefs.getIntPref(TPC_PREF) == Ci.nsICookieService.BEHAVIOR_REJECT_TRACKER;
-  if (cookiesBlocked) {
-    let category = "#identity-popup-content-blocking-category-3rdpartycookies";
-    is(hidden(category + " > .identity-popup-content-blocking-category-add-blocking"), blockedByTP,
-      "Category item is" + (blockedByTP ? " not" : "") + " showing add blocking");
-    ok(!hidden("#identity-popup-content-blocking-category-tracking-protection > #identity-popup-content-blocking-tracking-protection-label-allowed"),
-      "TP category item is showing the allowed label");
-  } else {
-    let category = "#identity-popup-content-blocking-category-tracking-protection";
-    // If there's an exception we always show the "Allowed" label.
-    ok(!hidden(category + " > #identity-popup-content-blocking-tracking-protection-label-allowed"),
-      "Category item is showing the allowed label");
-    ok(hidden(category + " > #identity-popup-content-blocking-tracking-protection-label-blocked"),
-      "Category item is not set to blocked");
-  }
+  ok(!hidden("#identity-popup-content-blocking-category-tracking-protection"),
+    "Showing trackers category");
+  ok(!hidden("#identity-popup-content-blocking-category-cookies"),
+    "Showing cookie restrictions category");
 }
 
 async function testContentBlocking(tab) {
   info("Testing with Tracking Protection ENABLED.");
 
   info("Load a test page not containing tracking elements");
   await promiseTabLoadEvent(tab, BENIGN_PAGE);
   testBenignPage();
--- a/browser/base/content/test/trackingUI/browser_trackingUI_telemetry.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_telemetry.js
@@ -45,17 +45,17 @@ add_task(async function setup() {
 add_task(async function testShieldHistogram() {
   Services.prefs.setBoolPref(PREF, true);
   let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser);
 
   // Reset these to make counting easier
   getShieldHistogram().clear();
 
   await promiseTabLoadEvent(tab, BENIGN_PAGE);
-  is(getShieldCounts()[0], 2, "Page loads without tracking");
+  is(getShieldCounts()[0], 1, "Page loads without tracking");
 
   await promiseTabLoadEvent(tab, TRACKING_PAGE);
   // Note that right now the shield histogram is not measuring what
   // you might think.  Since onSecurityChange fires twice for a tracking page,
   // the total page loads count is double counting, and the shield count
   // (which is meant to measure times when the shield wasn't shown) fires even
   // when tracking elements exist on the page.
   todo_is(getShieldCounts()[0], 1, "FIXME: TOTAL PAGE LOADS WITHOUT TRACKING IS DOUBLE COUNTING");
@@ -81,32 +81,27 @@ add_task(async function testShieldHistog
 add_task(async function testIdentityPopupEvents() {
   Services.prefs.setBoolPref(PREF, true);
   let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser);
 
   await promiseTabLoadEvent(tab, BENIGN_PAGE);
 
   Services.telemetry.clearEvents();
 
-  let { gIdentityHandler } = gBrowser.ownerGlobal;
-  let promisePanelOpen = BrowserTestUtils.waitForEvent(gIdentityHandler._identityPopup, "popupshown");
-  gIdentityHandler._identityBox.click();
-  await promisePanelOpen;
+  await openIdentityPopup();
 
   let events = Services.telemetry.snapshotEvents(Ci.nsITelemetry.DATASET_RELEASE_CHANNEL_OPTIN, true).parent;
   let openEvents = events.filter(
     e => e[1] == "security.ui.identitypopup" && e[2] == "open" && e[3] == "identity_popup");
   is(openEvents.length, 1, "recorded telemetry for opening the identity popup");
   is(openEvents[0][4], "shield-hidden", "recorded the shield as hidden");
 
   await promiseTabLoadEvent(tab, TRACKING_PAGE);
 
-  promisePanelOpen = BrowserTestUtils.waitForEvent(gIdentityHandler._identityPopup, "popupshown");
-  gIdentityHandler._identityBox.click();
-  await promisePanelOpen;
+  await openIdentityPopup();
 
   events = Services.telemetry.snapshotEvents(Ci.nsITelemetry.DATASET_RELEASE_CHANNEL_OPTIN, true).parent;
   openEvents = events.filter(
     e => e[1] == "security.ui.identitypopup" && e[2] == "open" && e[3] == "identity_popup");
   is(openEvents.length, 1, "recorded telemetry for opening the identity popup");
   is(openEvents[0][4], "shield-showing", "recorded the shield as showing");
 
   info("Disable TP for the page (which reloads the page)");
--- a/browser/base/content/test/trackingUI/browser_trackingUI_trackers_subview.js
+++ b/browser/base/content/test/trackingUI/browser_trackingUI_trackers_subview.js
@@ -4,61 +4,40 @@
 
 "use strict";
 
 const TRACKING_PAGE = "http://tracking.example.org/browser/browser/base/content/test/trackingUI/trackingPage.html";
 
 const TP_PREF = "privacy.trackingprotection.enabled";
 
 add_task(async function setup() {
+  // Avoid the content blocking tour interfering with our tests by popping up.
+  await SpecialPowers.pushPrefEnv({set: [[ContentBlocking.prefIntroCount, ContentBlocking.MAX_INTROS]]});
   await UrlClassifierTestUtils.addTestTrackers();
-});
-
-function openIdentityPopup() {
-  let mainView = document.getElementById("identity-popup-mainView");
-  let viewShown = BrowserTestUtils.waitForEvent(mainView, "ViewShown");
-  gIdentityHandler._identityBox.click();
-  return viewShown;
-}
 
-function waitForSecurityChange(counter) {
-  return new Promise(resolve => {
-    let webProgressListener = {
-      onStateChange: () => {},
-      onStatusChange: () => {},
-      onLocationChange: () => {},
-      onSecurityChange: (webProgress, request, oldState, state) => {
-        if (--counter == 0) {
-          gBrowser.removeProgressListener(webProgressListener);
-          resolve(counter);
-        }
-      },
-      onProgressChange: () => {},
-      QueryInterface: ChromeUtils.generateQI([Ci.nsIWebProgressListener]),
-    };
-
-    gBrowser.addProgressListener(webProgressListener);
+  registerCleanupFunction(() => {
+    UrlClassifierTestUtils.cleanupTestTrackers();
   });
-}
+});
 
 async function assertSitesListed(blocked) {
   await BrowserTestUtils.withNewTab(TRACKING_PAGE, async function(browser) {
     await openIdentityPopup();
 
     let categoryItem =
       document.getElementById("identity-popup-content-blocking-category-tracking-protection");
     ok(BrowserTestUtils.is_visible(categoryItem), "TP category item is visible");
     let trackersView = document.getElementById("identity-popup-trackersView");
     let viewShown = BrowserTestUtils.waitForEvent(trackersView, "ViewShown");
     categoryItem.click();
     await viewShown;
 
     ok(true, "Trackers view was shown");
 
-    let listItems = document.querySelectorAll(".identity-popup-trackersView-list-item");
+    let listItems = trackersView.querySelectorAll(".identity-popup-content-blocking-list-item");
     is(listItems.length, 1, "We have 1 tracker in the list");
 
     let strictInfo = document.getElementById("identity-popup-trackersView-strict-info");
     is(BrowserTestUtils.is_hidden(strictInfo), Services.prefs.getBoolPref(TP_PREF),
       "Strict info is hidden if TP is enabled.");
 
     let mainView = document.getElementById("identity-popup-mainView");
     viewShown = BrowserTestUtils.waitForEvent(mainView, "ViewShown");
@@ -79,17 +58,17 @@ async function assertSitesListed(blocked
     is(result, undefined, "No securityChange events should be received");
 
     viewShown = BrowserTestUtils.waitForEvent(trackersView, "ViewShown");
     categoryItem.click();
     await viewShown;
 
     ok(true, "Trackers view was shown");
 
-    listItems = Array.from(document.querySelectorAll(".identity-popup-trackersView-list-item"));
+    listItems = Array.from(trackersView.querySelectorAll(".identity-popup-content-blocking-list-item"));
     is(listItems.length, 2, "We have 2 trackers in the list");
 
     let listItem = listItems.find(item => item.querySelector("label").value == "trackertest.org");
     ok(listItem, "Has an item for trackertest.org");
     ok(BrowserTestUtils.is_visible(listItem), "List item is visible");
     is(listItem.classList.contains("allowed"), !blocked,
       "Indicates whether the tracker was blocked or allowed");
 
@@ -97,24 +76,24 @@ async function assertSitesListed(blocked
     ok(listItem, "Has an item for itisatracker.org");
     ok(BrowserTestUtils.is_visible(listItem), "List item is visible");
     is(listItem.classList.contains("allowed"), !blocked,
       "Indicates whether the tracker was blocked or allowed");
   });
 }
 
 add_task(async function testTrackersSubView() {
+  info("Testing trackers subview with TP disabled.");
   Services.prefs.setBoolPref(TP_PREF, false);
   await assertSitesListed(false);
+  info("Testing trackers subview with TP enabled.");
   Services.prefs.setBoolPref(TP_PREF, true);
   await assertSitesListed(true);
+  info("Testing trackers subview with TP enabled and a CB exception.");
   let uri = Services.io.newURI("https://tracking.example.org");
   Services.perms.add(uri, "trackingprotection", Services.perms.ALLOW_ACTION);
   await assertSitesListed(false);
+  info("Testing trackers subview with TP enabled and a CB exception removed.");
   Services.perms.remove(uri, "trackingprotection");
   await assertSitesListed(true);
+
   Services.prefs.clearUserPref(TP_PREF);
 });
-
-add_task(function cleanup() {
-  Services.prefs.clearUserPref(TP_PREF);
-  UrlClassifierTestUtils.cleanupTestTrackers();
-});
--- a/browser/base/content/test/trackingUI/head.js
+++ b/browser/base/content/test/trackingUI/head.js
@@ -27,8 +27,32 @@ function promiseTabLoadEvent(tab, url) {
 
   let loaded = BrowserTestUtils.browserLoaded(tab.linkedBrowser, false, handle);
 
   if (url)
     BrowserTestUtils.loadURI(tab.linkedBrowser, url);
 
   return loaded;
 }
+
+function openIdentityPopup() {
+  let mainView = document.getElementById("identity-popup-mainView");
+  let viewShown = BrowserTestUtils.waitForEvent(mainView, "ViewShown");
+  gIdentityHandler._identityBox.click();
+  return viewShown;
+}
+
+function waitForSecurityChange(numChanges = 1) {
+  return new Promise(resolve => {
+    let n = 0;
+    let listener = {
+      onSecurityChange() {
+        n = n + 1;
+        info("Received onSecurityChange event " + n + " of " + numChanges);
+        if (n >= numChanges) {
+          gBrowser.removeProgressListener(listener);
+          resolve(n);
+        }
+      },
+    };
+    gBrowser.addProgressListener(listener);
+  });
+}
--- a/browser/base/content/test/trackingUI/trackingAPI.js
+++ b/browser/base/content/test/trackingUI/trackingAPI.js
@@ -13,10 +13,25 @@ onmessage = event => {
     }
     break;
   case "cookie": {
       let ifr = document.createElement("iframe");
       ifr.src = "https://trackertest.org/browser/browser/base/content/test/trackingUI/cookieServer.sjs";
       document.body.appendChild(ifr);
     }
     break;
+  case "first-party-cookie": {
+      // Since the content blocking log doesn't seem to get updated for
+      // top-level cookies right now, we just create an iframe with the
+      // first party domain...
+      let ifr = document.createElement("iframe");
+      ifr.src = "http://not-tracking.example.com/browser/browser/base/content/test/trackingUI/cookieServer.sjs";
+      document.body.appendChild(ifr);
+    }
+    break;
+  case "third-party-cookie": {
+      let ifr = document.createElement("iframe");
+      ifr.src = "https://test1.example.org/browser/browser/base/content/test/trackingUI/cookieServer.sjs";
+      document.body.appendChild(ifr);
+    }
+    break;
   }
 };
--- a/browser/components/controlcenter/content/panel.inc.xul
+++ b/browser/components/controlcenter/content/panel.inc.xul
@@ -69,37 +69,30 @@
 
           <description id="identity-popup-content-blocking-detected"
                        crop="end">&contentBlocking.detected;</description>
           <description id="identity-popup-content-blocking-not-detected"
                        crop="end">&contentBlocking.notDetected;</description>
 
           <vbox id="identity-popup-content-blocking-category-list">
             <toolbarbutton id="identity-popup-content-blocking-category-tracking-protection"
-                  onclick="ContentBlocking.showTrackersSubview()"
-                  class="identity-popup-content-blocking-category" align="center">
+                           onclick="ContentBlocking.showTrackersSubview()"
+                           class="identity-popup-content-blocking-category" align="center">
               <image class="identity-popup-content-blocking-category-icon tracking-protection-icon"/>
               <label flex="1" class="identity-popup-content-blocking-category-label">&contentBlocking.trackingProtection3.label;</label>
-              <label flex="1" id="identity-popup-content-blocking-tracking-protection-label-allowed"
-                     class="identity-popup-content-blocking-category-state-label">&contentBlocking.trackingProtection.allowed.label;</label>
-              <label flex="1" id="identity-popup-content-blocking-tracking-protection-label-blocked"
-                     class="identity-popup-content-blocking-category-state-label">&contentBlocking.trackingProtection.blocked.label;</label>
+              <label flex="1" id="identity-popup-content-blocking-tracking-protection-state-label" class="identity-popup-content-blocking-category-state-label"/>
             </toolbarbutton>
-            <hbox id="identity-popup-content-blocking-category-3rdpartycookies"
-                  class="identity-popup-content-blocking-category" align="center" role="group">
+            <toolbarbutton id="identity-popup-content-blocking-category-cookies"
+                           onclick="ContentBlocking.showCookiesSubview()"
+                           class="identity-popup-content-blocking-category" align="center">
               <image class="identity-popup-content-blocking-category-icon thirdpartycookies-icon"/>
               <label flex="1" id="identity-popup-content-blocking-category-label-default"
-                     class="identity-popup-content-blocking-category-label">&contentBlocking.3rdPartyCookies.label;</label>
-              <label flex="1" id="identity-popup-content-blocking-category-label-trackers"
-                     hidden="true" class="identity-popup-content-blocking-category-label">&contentBlocking.3rdPartyCookies.trackers.label;</label>
-              <label flex="1" class="identity-popup-content-blocking-category-state-label">&contentBlocking.3rdPartyCookies.blocking.label;</label>
-              <label flex="1" class="identity-popup-content-blocking-category-add-blocking text-link"
-                     id="identity-popup-3rdpartycookies-add-blocking"
-                     onclick="ContentBlocking.openPreferences('identityPopup-CB-3rdpartycookies'); gIdentityHandler.recordClick('cookies_add_blocking');">&contentBlocking.3rdPartyCookies.add.label;</label>
-            </hbox>
+                     class="identity-popup-content-blocking-category-label">&contentBlocking.cookies.label;</label>
+              <label flex="1" id="identity-popup-content-blocking-cookies-state-label" class="identity-popup-content-blocking-category-state-label"/>
+            </toolbarbutton>
           </vbox>
 
           <button id="tracking-action-unblock"
                   class="panel-button tracking-protection-button"
                   label="&trackingProtection.unblock5.label;"
                   accesskey="&trackingProtection.unblock5.accesskey;"
                   oncommand="ContentBlocking.disableForCurrentPage(); gIdentityHandler.recordClick('unblock');" />
           <button id="tracking-action-unblock-private"
@@ -241,30 +234,45 @@
 
     </panelview>
 
     <!-- Trackers SubView -->
     <panelview id="identity-popup-trackersView"
                role="document"
                title="&contentBlocking.trackersView.label;"
                descriptionheightworkaround="true">
-        <vbox id="identity-popup-trackersView-list">
+        <vbox id="identity-popup-trackersView-list" class="identity-popup-content-blocking-list">
         </vbox>
         <hbox id="identity-popup-trackersView-strict-info">
           <image/>
           <label>&contentBlocking.trackersView.strictInfo.label;</label>
         </hbox>
         <vbox class="identity-popup-footer">
           <button id="identity-popup-trackersView-settings-button"
                   label="&contentBlocking.manageSettings.label;"
                   accesskey="&contentBlocking.manageSettings.accesskey;"
                   oncommand="ContentBlocking.openPreferences();"/>
         </vbox>
     </panelview>
 
+    <!-- Cookies SubView -->
+    <panelview id="identity-popup-cookiesView"
+               role="document"
+               title="&contentBlocking.cookiesView.label;"
+               descriptionheightworkaround="true">
+        <vbox id="identity-popup-cookiesView-list" class="identity-popup-content-blocking-list">
+        </vbox>
+        <vbox class="identity-popup-footer">
+          <button id="identity-popup-cookiesView-settings-button"
+                  label="&contentBlocking.manageSettings.label;"
+                  accesskey="&contentBlocking.manageSettings.accesskey;"
+                  oncommand="ContentBlocking.openPreferences();"/>
+        </vbox>
+    </panelview>
+
     <!-- Report Breakage SubView -->
     <panelview id="identity-popup-breakageReportView"
                title="&contentBlocking.breakageReportView.label;"
                descriptionheightworkaround="true">
         <vbox id="identity-popup-breakageReportView-heading">
           <description>&contentBlocking.breakageReportView2.description;</description>
           <label id="identity-popup-breakageReportView-learn-more"
                  class="text-link">&contentBlocking.breakageReportView.learnMore;</label>
--- a/browser/components/preferences/browserLanguages.js
+++ b/browser/components/preferences/browserLanguages.js
@@ -462,16 +462,18 @@ var gBrowserLanguagesDialog = {
     this._selectedLocales.addItem(item);
     let selectedCount = this._selectedLocales.items.length;
     let availableCount = Services.locale.availableLocales.length;
     if (selectedCount == availableCount) {
       // Remove the installed label, they're all installed.
       this._availableLocales.items.shift();
       this._availableLocales.setItems(this._availableLocales.items);
     }
+    // The label isn't always reset when the selected item is removed, so set it again.
+    this._availableLocales.enableWithMessageId("browser-languages-select-language");
   },
 
   async requestRemoteLanguage(item) {
     this._availableLocales.disableWithMessageId(
       "browser-languages-downloading");
 
     let {url, hash} = this.availableLangpacks.get(item.value);
     let addon;
@@ -506,25 +508,30 @@ var gBrowserLanguagesDialog = {
       await Promise.all(addonInfos.map(
         info => installFromUrl(info.sourceURI.spec)));
     } catch (e) {
       Cu.reportError(e);
     }
   },
 
   showError() {
-    document.querySelectorAll(".warning-message-separator")
-      .forEach(separator => separator.classList.add("thin"));
     document.getElementById("warning-message").hidden = false;
     this._availableLocales.enableWithMessageId("browser-languages-select-language");
+
+    // The height has likely changed, find our SubDialog and tell it to resize.
+    requestAnimationFrame(() => {
+      let dialogs = window.opener.gSubDialog._dialogs;
+      let index = dialogs.findIndex(d => d._frame.contentDocument == document);
+      if (index != -1) {
+        dialogs[index].resizeDialog();
+      }
+    });
   },
 
   hideError() {
-    document.querySelectorAll(".warning-message-separator")
-      .forEach(separator => separator.classList.remove("thin"));
     document.getElementById("warning-message").hidden = true;
   },
 
   getSelectedLocales() {
     return this._selectedLocales.items.map(item => item.value);
   },
 
   async selectedLocaleRemoved(item) {
--- a/browser/components/preferences/browserLanguages.xul
+++ b/browser/components/preferences/browserLanguages.xul
@@ -53,15 +53,13 @@
         </menulist>
         <button id="add"
                 class="add-browser-language action-button"
                 data-l10n-id="languages-customize-add"
                 disabled="true"/>
       </row>
     </rows>
   </grid>
-  <separator class="warning-message-separator"/>
   <hbox id="warning-message" class="message-bar message-bar-warning" hidden="true">
     <image class="message-bar-icon"/>
     <description class="message-bar-description" data-l10n-id="browser-languages-error"/>
   </hbox>
-  <separator class="warning-message-separator"/>
 </dialog>
--- a/browser/components/preferences/in-content/subdialogs.js
+++ b/browser/components/preferences/in-content/subdialogs.js
@@ -274,16 +274,20 @@ SubDialog.prototype = {
     // Some subdialogs may want to perform additional, asynchronous steps during initializations.
     //
     // In that case, we expect them to define a Promise which will delay measuring
     // until the promise is fulfilled.
     if (aEvent.target.contentDocument.mozSubdialogReady) {
       await aEvent.target.contentDocument.mozSubdialogReady;
     }
 
+    await this.resizeDialog();
+  },
+
+  async resizeDialog() {
     // Do this on load to wait for the CSS to load and apply before calculating the size.
     let docEl = this._frame.contentDocument.documentElement;
 
     let titleBarHeight = this._titleBar.clientHeight +
                          parseFloat(getComputedStyle(this._titleBar).borderBottomWidth);
 
     // These are deduced from styles which we don't change, so it's safe to get them now:
     let boxHorizontalBorder = 2 * parseFloat(getComputedStyle(this._box).borderLeftWidth);
--- a/browser/components/urlbar/tests/browser/browser.ini
+++ b/browser/components/urlbar/tests/browser/browser.ini
@@ -4,13 +4,13 @@
 
 [DEFAULT]
 support-files =
   head.js
 
 [browser_UrlbarInput_formatValue.js]
 [browser_UrlbarInput_overflow.js]
 [browser_UrlbarInput_tooltip.js]
-skip-if = asan # Bug 1504985
+skip-if = os == "win" || asan # Bug 1511655 and bug 1504985
 [browser_UrlbarInput_trimURLs.js]
 subsuite = clipboard
 [browser_UrlbarInput_unit.js]
 support-files = empty.xul
--- a/browser/locales/en-US/chrome/browser/browser.dtd
+++ b/browser/locales/en-US/chrome/browser/browser.dtd
@@ -964,54 +964,26 @@ you can use these alternative items. Oth
 <!ENTITY getUserMedia.audioCapture.label "Audio from the tab will be shared.">
 <!ENTITY getUserMedia.allWindowsShared.message "All visible windows on your screen will be shared.">
 
 <!ENTITY contentBlocking.title "Content Blocking">
 <!ENTITY contentBlocking.detected "Blockable content detected on this site.">
 <!ENTITY contentBlocking.notDetected "No blockable content detected on this page.">
 
 <!ENTITY contentBlocking.trackingProtection3.label "Trackers">
-<!-- LOCALIZATION NOTE (contentBlocking.trackingProtection.allowed.label):
-     This label signals that this type of content blocking is turned
-     OFF and is not blocking tracker content, so this is not
-     a positive thing. It forms the end of the (imaginary) sentence
-     "Trackers [are] Allowed"-->
-<!ENTITY contentBlocking.trackingProtection.allowed.label "Allowed">
-<!-- LOCALIZATION NOTE (contentBlocking.trackingProtection.blocked.label):
-     This label signals that this type of content blocking is turned
-     ON and is successfully blocking tracker content, so this is
-     a positive thing. It forms the end of the (imaginary) sentence
-     "Trackers [are] Blocked"-->
-<!ENTITY contentBlocking.trackingProtection.blocked.label "Blocked">
-
-<!ENTITY contentBlocking.3rdPartyCookies.label "Third-Party Cookies">
-<!ENTITY contentBlocking.3rdPartyCookies.trackers.label "Tracking Cookies">
-<!-- LOCALIZATION NOTE (contentBlocking.3rdPartyCookies.blocked.label):
-     This label signals that this type of content blocking is turned
-     ON and is successfully blocking third-party cookies, so this is
-     a positive thing. It forms the end of the (imaginary) sentence
-     "Third-Party Cookies [are] Blocked"-->
-<!ENTITY contentBlocking.3rdPartyCookies.blocked.label "Blocked">
-<!-- LOCALIZATION NOTE (contentBlocking.tranckingProtection.blocking.label):
-     This label signals that this type of content blocking is turned
-     ON, so this is a positive thing. It forms the verb in the (imaginary) sentence
-     "Firefox is blocking Third-Party Cookies"-->
-<!ENTITY contentBlocking.3rdPartyCookies.blocking.label "Blocking">
-<!-- LOCALIZATION NOTE (contentBlocking.3rdPartyCookies.add.label):
-     This is displayed as a link to preferences, where the user can add
-     this specific type of content blocking. When this text is shown
-     the type of content blocking is currently not enabled. -->
-<!ENTITY contentBlocking.3rdPartyCookies.add.label "Add Blocking…">
 
 <!ENTITY contentBlocking.manageSettings.label "Manage Content Blocking">
 <!ENTITY contentBlocking.manageSettings.accesskey "M">
 
 <!ENTITY contentBlocking.trackersView.label "Trackers">
 <!ENTITY contentBlocking.trackersView.strictInfo.label "To block all trackers, set content blocking to “Strict”.">
 
+<!ENTITY contentBlocking.cookies.label "Cookies">
+<!ENTITY contentBlocking.cookiesView.label "Cookies and Site Data">
+
 <!ENTITY contentBlocking.openBreakageReportView2.label "Report a problem">
 <!ENTITY contentBlocking.breakageReportView.label "Report Problems">
 <!ENTITY contentBlocking.breakageReportView2.description "Content blocking can cause problems with some websites. When you report problems, you’ll help make &brandShortName; better for everyone. (This will send a URL as well as information about your browser settings to Mozilla.)">
 <!ENTITY contentBlocking.breakageReportView.learnMore "Learn More">
 <!ENTITY contentBlocking.breakageReportView.collection.url.label "URL">
 <!ENTITY contentBlocking.breakageReportView.collection.comments.label "What problems did you have? (Optional)">
 <!ENTITY contentBlocking.breakageReportView.sendReport.label "Send Report">
 <!ENTITY contentBlocking.breakageReportView.cancel.label "Cancel">
--- a/browser/locales/en-US/chrome/browser/browser.properties
+++ b/browser/locales/en-US/chrome/browser/browser.properties
@@ -484,16 +484,59 @@ contentBlocking.tooltip=Open Content Blo
 # The terminology used to refer to levels of Content Blocking is also used
 # in preferences and should be translated consistently.
 # LOCALIZATION NOTE (contentBlocking.category.standard):
 # "Standard" in this case is an adjective, meaning "default" or "normal"
 contentBlocking.category.standard=Standard
 contentBlocking.category.strict=Strict
 contentBlocking.category.custom=Custom
 
+# LOCALIZATION NOTE (contentBlocking.trackers.allowed.label):
+#   This label signals that this type of content blocking is turned
+#   OFF and is not blocking tracker content, so this is not
+#   a positive thing. It forms the end of the (imaginary) sentence
+#   "Trackers [are] Allowed"
+contentBlocking.trackers.allowed.label=Allowed
+# LOCALIZATION NOTE (contentBlocking.trackers.blocked.label):
+#   This label signals that this type of content blocking is turned
+#   ON and is successfully blocking tracker content, so this is
+#   a positive thing. It forms the end of the (imaginary) sentence
+#   "Trackers [are] Blocked"
+contentBlocking.trackers.blocked.label=Blocked
+
+# LOCALIZATION NOTE (contentBlocking.trackersView.blocked.label):
+#   This label is shown next to a tracker in the trackers subview.
+#   It forms the end of the (imaginary) sentence "www.example.com [was] Blocked"
+contentBlocking.trackersView.blocked.label=Blocked
+
+# LOCALIZATION NOTE (contentBlocking.cookies.allowed.label):
+#   This label signals that this type of content blocking is turned
+#   OFF and is not blocking tracker content, so this is not
+#   a positive thing. It forms the end of the (imaginary) sentence
+#   "Cookies [are] Allowed"
+contentBlocking.cookies.allowed.label=Allowed
+contentBlocking.cookies.trackersBlocked.label=Tracking Cookies Blocked
+contentBlocking.cookies.3rdPartyBlocked.label=Third-Party Cookies Blocked
+contentBlocking.cookies.unvisitedBlocked.label=Unvisited Site Cookies Blocked
+contentBlocking.cookies.allBlocked.label=All Cookies Blocked
+
+contentBlocking.cookiesView.firstParty.label=From This Site
+contentBlocking.cookiesView.trackers.label=Tracking Cookies
+contentBlocking.cookiesView.thirdParty.label=Third-Party Cookies
+# LOCALIZATION NOTE (contentBlocking.cookiesView.allowed.label):
+#   This label is shown next to a cookie origin in the cookies subview.
+#   It forms the end of the (imaginary) sentence "www.example.com [was] Allowed"
+contentBlocking.cookiesView.allowed.label=Allowed
+# LOCALIZATION NOTE (contentBlocking.cookiesView.blocked.label):
+#   This label is shown next to a cookie origin in the cookies subview.
+#   It forms the end of the (imaginary) sentence "www.example.com [was] Blocked"
+contentBlocking.cookiesView.blocked.label=Blocked
+# LOCALIZATION NOTE (contentBlocking.cookiesView.removeButton.tooltip): %S is the domain of the site.
+contentBlocking.cookiesView.removeButton.tooltip=Clear cookie exception for %S
+
 # LOCALIZATION NOTE (contentBlocking.intro.title): %S is brandShortName.
 contentBlocking.intro.title=New in %S: Content Blocking
 # LOCALIZATION NOTE (contentBlocking.v1.intro.description): %S is brandShortName.
 contentBlocking.intro.v1.description=When you see the shield, %S is blocking parts of the page that can slow your browsing or track you online.
 contentBlocking.intro.v2.description=The privacy benefits of Tracking Protection are now just one part of content blocking. When you see the shield, content blocking is on.
 # LOCALIZATION NOTE (trackingProtection.intro.step1of3): Indicates that the intro panel is step one of three in a tour.
 trackingProtection.intro.step1of3=1 of 3
 trackingProtection.intro.nextButton.label=Next
--- a/browser/themes/shared/controlcenter/panel.inc.css
+++ b/browser/themes/shared/controlcenter/panel.inc.css
@@ -160,17 +160,18 @@
 }
 
 /* CONTENT */
 
 #tracking-protection-preferences-button > .toolbarbutton-text,
 .identity-popup-footer,
 .tracking-protection-button,
 #identity-popup-trackersView-strict-info > label,
-.identity-popup-trackersView-list-item > label,
+.identity-popup-cookiesView-list-header,
+.identity-popup-content-blocking-list-item > label,
 #identity-popup-mainView-panel-header > label,
 #identity-popup-trackersView > .panel-header,
 #identity-popup-securityView > .panel-header,
 #identity-popup-breakageReportView > .panel-header,
 #identity-popup-content-blocking-report-breakage,
 .identity-popup-content-blocking-category-label,
 .identity-popup-content-blocking-category-state-label,
 .identity-popup-content-blocking-category-add-blocking,
@@ -398,76 +399,105 @@ description#identity-popup-content-verif
 #identity-popup-breakageReportView-collection-comments {
   height: 120px;
 }
 
 #identity-popup-content-blocking-content {
   background-image: url("chrome://browser/skin/controlcenter/tracking-protection.svg");
 }
 
-#identity-popup-content-blocking-category-tracking-protection {
+.identity-popup-content-blocking-category {
   /* Overwrite toolbarbutton styles */
   -moz-appearance: none;
   margin: 0;
   padding-inline-start: 0;
 }
 
-#identity-popup-content-blocking-category-tracking-protection:-moz-focusring,
-#identity-popup-content-blocking-category-tracking-protection:hover {
+.identity-popup-content-blocking-category:-moz-focusring,
+.identity-popup-content-blocking-category:hover {
   border-radius: 2px;
+  background-color: var(--arrowpanel-dimmed);
+}
+
+.identity-popup-content-blocking-category:hover:active {
   background-color: var(--arrowpanel-dimmed-further);
 }
 
-#identity-popup-content-blocking-category-tracking-protection:hover:active {
-  background-color: var(--arrowpanel-dimmed-even-further);
-}
-
-#identity-popup-content-blocking-category-tracking-protection::after {
+.identity-popup-content-blocking-category::after {
   content: url(chrome://browser/skin/back-12.svg);
   -moz-context-properties: fill, fill-opacity;
   transform: scaleX(-1) translateY(1px);
   float: right;
 }
 
-#identity-popup-content-blocking-category-tracking-protection:-moz-locale-dir(rtl)::after {
+.identity-popup-content-blocking-category:-moz-locale-dir(rtl)::after {
   transform: scaleX(1) translateY(1px);
 }
 
 /* This subview could get filled with a lot of trackers, set a maximum size
  * and allow it to scroll vertically.*/
+#identity-popup-cookiesView,
 #identity-popup-trackersView {
   max-height: 600px;
 }
 
-#identity-popup-trackersView-list {
+.identity-popup-cookiesView-list-header {
+  color: var(--panel-disabled-color);
+  margin: 5px 0;
+}
+
+.identity-popup-content-blocking-list {
   padding: 5px 20px;
   -moz-box-flex: 1;
   overflow: auto;
 }
 
-.identity-popup-trackersView-list-item {
+.identity-popup-content-blocking-list-item {
   margin: 5px 0;
   overflow: hidden;
+  -moz-box-align: center;
+}
+
+.identity-popup-content-blocking-list-item:not(.allowed) {
+  color: var(--panel-disabled-color);
 }
 
-.identity-popup-trackersView-list-item > label {
-  /* Limit to full width - container padding - icon width - icon margin */
-  max-width: calc(var(--identity-popup-width) - 40px - 16px - 10px);
+.identity-popup-content-blocking-list-host-label {
+  -moz-box-flex: 1;
 }
 
-.identity-popup-trackersView-list-item > image {
-  list-style-image: url(chrome://browser/skin/controlcenter/trackers-disabled.svg);
+.identity-popup-content-blocking-list-state-label {
+  -moz-box-flex: 1;
+  text-align: end;
+  margin-inline-start: 5px;
+  margin-inline-end: 2px;
+}
+
+.identity-popup-trackersView-icon,
+.identity-popup-cookiesView-icon {
   margin-inline-end: 10px;
   -moz-context-properties: fill, fill-opacity;
 }
 
-.identity-popup-trackersView-list-item.allowed > image {
+.identity-popup-trackersView-icon {
+  list-style-image: url(chrome://browser/skin/controlcenter/trackers-disabled.svg);
+}
+
+.identity-popup-trackersView-icon.allowed {
   list-style-image: url(chrome://browser/skin/controlcenter/trackers.svg);
 }
 
+.identity-popup-cookiesView-icon {
+  list-style-image: url(chrome://browser/skin/controlcenter/3rdpartycookies-disabled.svg);
+}
+
+.identity-popup-cookiesView-icon.allowed {
+  list-style-image: url(chrome://browser/skin/controlcenter/3rdpartycookies.svg);
+}
+
 #identity-popup-trackersView-strict-info {
   min-height: 40px;
   /* Limit to full width - margin */
   max-width: calc(var(--identity-popup-width) - 12px);
   min-width: calc(var(--identity-popup-width) - 12px);
   background-color: #45a1ff80;
   margin: 6px;
   text-align: center;
@@ -492,61 +522,30 @@ description#identity-popup-content-verif
 }
 
 /* Content Blocking categories */
 
 #identity-popup-content-blocking-category-list {
   margin-top: 10px;
 }
 
-/* Don't show the categories when no trackers were detected. */
-#identity-popup-content-blocking-content:not([detected]) > #identity-popup-content-blocking-category-list {
-  display: none;
-}
-
 /* Show the "detected"/"not detected" message depending on the content state. */
 #identity-popup-content-blocking-content:not([detected]) > #identity-popup-content-blocking-detected,
 #identity-popup-content-blocking-content[detected] > #identity-popup-content-blocking-not-detected {
   display: none;
 }
 
-.identity-popup-content-blocking-category-state-label {
-  display: none;
-}
-
-/* TODO: This will be cleaned up by bug 1501992 */
-/* Hide the state label unless we blocked something only for third party cookies */
-#identity-popup-content-blocking-content:not([hasException]) #identity-popup-content-blocking-category-3rdpartycookies.blocked .identity-popup-content-blocking-category-state-label,
-/* For trackers, either show a "blocked" or "allowed" label depending on the state. */
-#identity-popup-content-blocking-content:not([hasException]) #identity-popup-content-blocking-category-tracking-protection.blocked > #identity-popup-content-blocking-tracking-protection-label-blocked,
-#identity-popup-content-blocking-category-tracking-protection:not(.blocked) > #identity-popup-content-blocking-tracking-protection-label-allowed,
-#identity-popup-content-blocking-content[hasException] #identity-popup-content-blocking-tracking-protection-label-allowed {
-  display: -moz-box;
-}
-
-.identity-popup-content-blocking-category.blocked .identity-popup-content-blocking-category-add-blocking {
-  display: none;
-}
-
 .tracking-protection-icon {
   list-style-image: url(chrome://browser/skin/controlcenter/trackers.svg);
 }
 
-#identity-popup-content-blocking-category-tracking-protection.blocked > .tracking-protection-icon {
-  list-style-image: url(chrome://browser/skin/controlcenter/trackers-disabled.svg);
-}
-
 .thirdpartycookies-icon {
   list-style-image: url(chrome://browser/skin/controlcenter/3rdpartycookies.svg);
 }
 
-#identity-popup-content-blocking-category-3rdpartycookies.blocked > .thirdpartycookies-icon {
-  list-style-image: url(chrome://browser/skin/controlcenter/3rdpartycookies-disabled.svg);
-}
-
 /* Content Blocking action button */
 
 .tracking-protection-button {
   list-style-image: url(chrome://browser/skin/tracking-protection.svg);
   -moz-appearance: none;
   margin: 1em 0 0;
   display: none;
   height: 32px;
@@ -593,25 +592,25 @@ description#identity-popup-content-verif
 
 #identity-popup-content-blocking-report-breakage {
   margin-top: 6px;
 }
 
 /* Hide the "report breakage" button if we have not detected any trackers
  * (except if the user added an exception, in which case they might still
  * (especially!) want to report the breakage). */
-#identity-popup-content-blocking-content:not([active]):not([hasException]) #identity-popup-content-blocking-report-breakage {
+#identity-popup-content-blocking-content:not([blocking]):not([hasException]) #identity-popup-content-blocking-report-breakage {
   display: none;
 }
 
 /* Show the right action buttons depending on content state */
 /* Offer to temporarily add an exception in private mode. */
-#main-window:not([privatebrowsingmode]) #identity-popup-content-blocking-content[active]:not([hasException]) > #tracking-action-unblock,
+#main-window:not([privatebrowsingmode]) #identity-popup-content-blocking-content[blocking]:not([hasException]) > #tracking-action-unblock,
 /* Offer to permanently add an exception in normal mode. */
-#main-window[privatebrowsingmode] #identity-popup-content-blocking-content[active]:not([hasException]) > #tracking-action-unblock-private,
+#main-window[privatebrowsingmode] #identity-popup-content-blocking-content[blocking]:not([hasException]) > #tracking-action-unblock-private,
 /* If there's an exception just offer to remove the exception again. */
 #identity-popup-content-blocking-content[hasException] > #tracking-action-block {
   display: -moz-box;
 }
 
 /* PERMISSIONS */
 
 #identity-popup-permissions-content {
@@ -713,16 +712,17 @@ description#identity-popup-content-verif
   margin: 0;
   border-width: 0;
   border-radius: 50%;
   min-width: 0;
   padding: 2px;
   background-color: transparent;
   color: inherit;
   opacity: 0.6;
+  margin-inline-start: 2px;
 }
 
 .identity-popup-permission-remove-button > .button-box {
   padding: 0;
 }
 
 .identity-popup-permission-remove-button > .button-box > .button-icon {
   margin: 0;
--- a/browser/themes/shared/ctrlTab.inc.css
+++ b/browser/themes/shared/ctrlTab.inc.css
@@ -19,16 +19,18 @@
   font-weight: bold;
 %endif
 }
 
 .ctrlTab-preview {
   -moz-appearance: none;
   /* !important overrides the :hover color from button.css on Linux */
   color: inherit !important;
+  /* remove the :-moz-focusring outline from button.css on Windows */
+  outline: none !important;
   margin: 0;
   text-shadow: 0 0 1px hsl(0,0%,12%), 0 0 2px hsl(0,0%,12%);
 }
 
 .ctrlTab-canvas > html|img,
 .ctrlTab-canvas > html|canvas {
   min-width: inherit;
   max-width: inherit;
--- a/browser/themes/shared/incontentprefs/preferences.inc.css
+++ b/browser/themes/shared/incontentprefs/preferences.inc.css
@@ -842,40 +842,38 @@ menulist[indicator=true] > menupopup men
   width: calc(5ch + 22px); /* 5 chars + 11px padding on both sides */
 }
 
 #defaultBrowserLanguage {
   margin-inline-start: 0;
   min-width: 20em;
 }
 
-#LanguagesDialog > .dialog-button-box > .dialog-button[dlgtype="help"],
-#BrowserLanguagesDialog > .dialog-button-box > .dialog-button[dlgtype="help"] {
-  margin-inline-start: 0;
+#selectedLocales {
+  height: 150px;
 }
 
-#LanguagesDialog > .dialog-button-box > .dialog-button[dlgtype="accept"],
-#BrowserLanguagesDialog > .dialog-button-box > .dialog-button[dlgtype="accept"] {
-  margin-inline-end: 0;
-}
-
-#requestedLocales {
-  min-height: 200px;
+#selectedLocales > richlistitem {
+  padding-inline-start: 13px;
 }
 
 #availableLanguages,
 #availableLocales {
   margin: 0;
   margin-inline-end: 4px;
 }
 
 #warning-message > .message-bar-description {
   width: 32em;
 }
 
+#warning-message {
+  margin-top: 8px;
+}
+
 .add-web-language,
 .add-browser-language {
   margin: 0;
   margin-inline-start: 4px;
 }
 
 .action-button {
   margin-inline-end: 0;
--- a/dom/media/platforms/agnostic/DAV1DDecoder.cpp
+++ b/dom/media/platforms/agnostic/DAV1DDecoder.cpp
@@ -13,18 +13,16 @@
 
 namespace mozilla {
 
 DAV1DDecoder::DAV1DDecoder(const CreateDecoderParams& aParams)
     : mInfo(aParams.VideoConfig()),
       mTaskQueue(aParams.mTaskQueue),
       mImageContainer(aParams.mImageContainer) {}
 
-DAV1DDecoder::~DAV1DDecoder() {}
-
 RefPtr<MediaDataDecoder::InitPromise> DAV1DDecoder::Init() {
   Dav1dSettings settings;
   dav1d_default_settings(&settings);
   int decoder_threads = 2;
   if (mInfo.mDisplay.width >= 2048) {
     decoder_threads = 8;
   } else if (mInfo.mDisplay.width >= 1024) {
     decoder_threads = 4;
@@ -72,32 +70,32 @@ void DAV1DDecoder::ReleaseDataBuffer(con
   Unused << rv;
 }
 
 RefPtr<MediaDataDecoder::DecodePromise> DAV1DDecoder::InvokeDecode(
     MediaRawData* aSample) {
   MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn());
   MOZ_ASSERT(aSample);
 
-  // Save the last timing values to use in drain.
-  mLastTimecode = aSample->mTimecode;
-  mLastDuration = aSample->mDuration;
-  mLastOffset = aSample->mOffset;
   // Add the buffer to the hashtable in order to increase
   // the ref counter and keep it alive. When dav1d does not
   // need it any more will call it's release callback. Remove
   // the buffer, in there, to reduce the ref counter and eventually
   // free it. We need a hashtable and not an array because the
   // release callback are not coming in the same order that the
   // buffers have been added in the decoder (threading ordering
   // inside decoder)
   mDecodingBuffers.Put(aSample->Data(), aSample);
   Dav1dData data;
   int res = dav1d_data_wrap(&data, aSample->Data(), aSample->Size(),
                             ReleaseDataBuffer_s, this);
+  data.m.timestamp = aSample->mTimecode.ToMicroseconds();
+  data.m.duration = aSample->mDuration.ToMicroseconds();
+  data.m.offset = aSample->mOffset;
+
   if (res < 0) {
     LOG("Create decoder data error.");
     return DecodePromise::CreateAndReject(
         MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__);
   }
   DecodedData results;
   do {
     res = dav1d_send_data(mContext, &data);
@@ -107,34 +105,33 @@ RefPtr<MediaDataDecoder::DecodePromise> 
           MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, __func__), __func__);
     }
     // Alway consume the whole buffer on success.
     // At this point only -EAGAIN error is expected.
     MOZ_ASSERT((res == 0 && !data.sz) ||
                (res == -EAGAIN && data.sz == aSample->Size()));
 
     MediaResult rs(NS_OK);
-    res = GetPicture(aSample, results, rs);
+    res = GetPicture(results, rs);
     if (res < 0) {
       if (res == -EAGAIN) {
         // No frames ready to return. This is not an
         // error, in some circumstances, we need to
         // feed it with a certain amount of frames
         // before we get a picture.
         continue;
       }
       return DecodePromise::CreateAndReject(rs, __func__);
     }
   } while (data.sz > 0);
 
   return DecodePromise::CreateAndResolve(std::move(results), __func__);
 }
 
-int DAV1DDecoder::GetPicture(const MediaRawData* aSample, DecodedData& aData,
-                             MediaResult& aResult) {
+int DAV1DDecoder::GetPicture(DecodedData& aData, MediaResult& aResult) {
   class Dav1dPictureWrapper {
    public:
     Dav1dPicture* operator&() { return &p; }
     const Dav1dPicture& operator*() const { return p; }
     ~Dav1dPictureWrapper() { dav1d_picture_unref(&p); }
 
    private:
     Dav1dPicture p = Dav1dPicture();
@@ -147,93 +144,102 @@ int DAV1DDecoder::GetPicture(const Media
     aResult = MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, __func__);
     return res;
   }
 
   if ((*picture).p.layout == DAV1D_PIXEL_LAYOUT_I400) {
     return 0;
   }
 
-  RefPtr<VideoData> v = ConstructImage(aSample, *picture);
+  RefPtr<VideoData> v = ConstructImage(*picture);
   if (!v) {
     LOG("Image allocation error: %ux%u"
         " display %ux%u picture %ux%u",
         (*picture).p.w, (*picture).p.h, mInfo.mDisplay.width,
         mInfo.mDisplay.height, mInfo.mImage.width, mInfo.mImage.height);
     aResult = MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__);
     return -1;
   }
   aData.AppendElement(std::move(v));
   return 0;
 }
 
 already_AddRefed<VideoData> DAV1DDecoder::ConstructImage(
-    const MediaRawData* aSample, const Dav1dPicture& picture) {
+    const Dav1dPicture& aPicture) {
   VideoData::YCbCrBuffer b;
-  if (picture.p.bpc == 10) {
+  if (aPicture.p.bpc == 10) {
     b.mColorDepth = ColorDepth::COLOR_10;
-  } else if (picture.p.bpc == 12) {
+  } else if (aPicture.p.bpc == 12) {
     b.mColorDepth = ColorDepth::COLOR_12;
   }
-  b.mPlanes[0].mData = static_cast<uint8_t*>(picture.data[0]);
-  b.mPlanes[0].mStride = picture.stride[0];
-  b.mPlanes[0].mHeight = picture.p.h;
-  b.mPlanes[0].mWidth = picture.p.w;
+
+  // On every other case use the default (BT601).
+  if (aPicture.seq_hdr->color_description_present) {
+    if (aPicture.seq_hdr->pri == DAV1D_COLOR_PRI_BT709) {
+      b.mYUVColorSpace = YUVColorSpace::BT709;
+    }
+  } else if (aPicture.p.h >= 720) {
+    b.mYUVColorSpace = YUVColorSpace::BT709;
+  }
+
+  b.mPlanes[0].mData = static_cast<uint8_t*>(aPicture.data[0]);
+  b.mPlanes[0].mStride = aPicture.stride[0];
+  b.mPlanes[0].mHeight = aPicture.p.h;
+  b.mPlanes[0].mWidth = aPicture.p.w;
   b.mPlanes[0].mOffset = 0;
   b.mPlanes[0].mSkip = 0;
 
-  b.mPlanes[1].mData = static_cast<uint8_t*>(picture.data[1]);
-  b.mPlanes[1].mStride = picture.stride[1];
+  b.mPlanes[1].mData = static_cast<uint8_t*>(aPicture.data[1]);
+  b.mPlanes[1].mStride = aPicture.stride[1];
   b.mPlanes[1].mOffset = 0;
   b.mPlanes[1].mSkip = 0;
 
-  b.mPlanes[2].mData = static_cast<uint8_t*>(picture.data[2]);
-  b.mPlanes[2].mStride = picture.stride[1];
+  b.mPlanes[2].mData = static_cast<uint8_t*>(aPicture.data[2]);
+  b.mPlanes[2].mStride = aPicture.stride[1];
   b.mPlanes[2].mOffset = 0;
   b.mPlanes[2].mSkip = 0;
 
   // https://code.videolan.org/videolan/dav1d/blob/master/tools/output/yuv.c#L67
-  const int ss_ver = picture.p.layout == DAV1D_PIXEL_LAYOUT_I420;
-  const int ss_hor = picture.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+  const int ss_ver = aPicture.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+  const int ss_hor = aPicture.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 
-  b.mPlanes[1].mHeight = (picture.p.h + ss_ver) >> ss_ver;
-  b.mPlanes[1].mWidth = (picture.p.w + ss_hor) >> ss_hor;
+  b.mPlanes[1].mHeight = (aPicture.p.h + ss_ver) >> ss_ver;
+  b.mPlanes[1].mWidth = (aPicture.p.w + ss_hor) >> ss_hor;
 
-  b.mPlanes[2].mHeight = (picture.p.h + ss_ver) >> ss_ver;
-  b.mPlanes[2].mWidth = (picture.p.w + ss_hor) >> ss_hor;
+  b.mPlanes[2].mHeight = (aPicture.p.h + ss_ver) >> ss_ver;
+  b.mPlanes[2].mWidth = (aPicture.p.w + ss_hor) >> ss_hor;
 
   // Timestamp, duration and offset used here are wrong.
   // We need to take those values from the decoder. Latest
   // dav1d version allows for that.
+  media::TimeUnit timecode =
+      media::TimeUnit::FromMicroseconds(aPicture.m.timestamp);
+  media::TimeUnit duration =
+      media::TimeUnit::FromMicroseconds(aPicture.m.duration);
+  int64_t offset = aPicture.m.offset;
+  bool keyframe = aPicture.frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY;
+
   return VideoData::CreateAndCopyData(
-      mInfo, mImageContainer, aSample->mOffset, aSample->mTime,
-      aSample->mDuration, b, aSample->mKeyframe, aSample->mTimecode,
-      mInfo.ScaledImageRect(picture.p.w, picture.p.h));
+      mInfo, mImageContainer, offset, timecode, duration, b, keyframe, timecode,
+      mInfo.ScaledImageRect(aPicture.p.w, aPicture.p.h));
 }
 
 RefPtr<MediaDataDecoder::DecodePromise> DAV1DDecoder::Drain() {
   RefPtr<DAV1DDecoder> self = this;
   return InvokeAsync(mTaskQueue, __func__, [self, this] {
     int res = 0;
     DecodedData results;
     do {
-      RefPtr<MediaRawData> empty(new MediaRawData());
-      // Update last timecode in case we loop over.
-      empty->mTimecode = empty->mTime = mLastTimecode =
-          mLastTimecode + mLastDuration;
-      empty->mDuration = mLastDuration;
-      empty->mOffset = mLastOffset;
-
       MediaResult rs(NS_OK);
-      res = GetPicture(empty, results, rs);
+      res = GetPicture(results, rs);
       if (res < 0 && res != -EAGAIN) {
         return DecodePromise::CreateAndReject(rs, __func__);
       }
     } while (res != -EAGAIN);
-    return DecodePromise::CreateAndResolve(results, __func__);
+    return DecodePromise::CreateAndResolve(std::move(results), __func__);
   });
 }
 
 RefPtr<MediaDataDecoder::FlushPromise> DAV1DDecoder::Flush() {
   RefPtr<DAV1DDecoder> self = this;
   return InvokeAsync(mTaskQueue, __func__, [self]() {
     dav1d_flush(self->mContext);
     return FlushPromise::CreateAndResolve(true, __func__);
--- a/dom/media/platforms/agnostic/DAV1DDecoder.h
+++ b/dom/media/platforms/agnostic/DAV1DDecoder.h
@@ -28,35 +28,27 @@ class DAV1DDecoder : public MediaDataDec
   RefPtr<ShutdownPromise> Shutdown() override;
   nsCString GetDescriptionName() const override {
     return NS_LITERAL_CSTRING("av1 libdav1d video decoder");
   }
 
   void ReleaseDataBuffer(const uint8_t* buf);
 
  private:
-  ~DAV1DDecoder();
+  ~DAV1DDecoder() = default;
   RefPtr<DecodePromise> InvokeDecode(MediaRawData* aSample);
-  int GetPicture(const MediaRawData* aSample, DecodedData& aData,
-                 MediaResult& aResult);
-  already_AddRefed<VideoData> ConstructImage(const MediaRawData* aSample,
-                                             const Dav1dPicture&);
+  int GetPicture(DecodedData& aData, MediaResult& aResult);
+  already_AddRefed<VideoData> ConstructImage(const Dav1dPicture& aPicture);
 
   Dav1dContext* mContext;
 
   const VideoInfo& mInfo;
   const RefPtr<TaskQueue> mTaskQueue;
   const RefPtr<layers::ImageContainer> mImageContainer;
 
   // Keep the buffers alive until dav1d
   // does not need them any more.
   MediaRawDataHashtable mDecodingBuffers;
-
-  // Store the last timing values to use
-  // them during drain.
-  media::TimeUnit mLastTimecode;
-  media::TimeUnit mLastDuration;
-  int64_t mLastOffset = 0;
 };
 
 }  // namespace mozilla
 
 #endif  // DAV1DDecoder_h_
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-5b26863178f8533eeba2de28c6bdc019ba9ed3e8
+6ca634197cfe26738194f87042020fe838c0047a
--- a/gfx/wr/Cargo.lock
+++ b/gfx/wr/Cargo.lock
@@ -1551,16 +1551,17 @@ dependencies = [
  "mozangle 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "pathfinder_font_renderer 0.5.0 (git+https://github.com/pcwalton/pathfinder?branch=webrender)",
  "pathfinder_gfx_utils 0.2.0 (git+https://github.com/pcwalton/pathfinder?branch=webrender)",
  "pathfinder_partitioner 0.2.0 (git+https://github.com/pcwalton/pathfinder?branch=webrender)",
  "pathfinder_path_utils 0.2.0 (git+https://github.com/pcwalton/pathfinder?branch=webrender)",
  "plane-split 0.13.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "png 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "ron 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_profiler 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
--- a/gfx/wr/appveyor.yml
+++ b/gfx/wr/appveyor.yml
@@ -3,18 +3,18 @@ before_test:
   - ps: Set-ScreenResolution 1920 1080
 
 environment:
   PATH: 'C:\msys64\mingw64\bin;C:\msys64\usr\bin;%PATH%;C:\Rust\bin'
   RUST_BACKTRACE: 1
   TARGET: x86_64-pc-windows-msvc
 
 install:
-  - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-1.27.0-${env:TARGET}.msi"
-  - msiexec /passive /i "rust-1.27.0-%TARGET%.msi" ADDLOCAL=Rustc,Cargo,Std INSTALLDIR=C:\Rust
+  - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-1.30.0-${env:TARGET}.msi"
+  - msiexec /passive /i "rust-1.30.0-%TARGET%.msi" ADDLOCAL=Rustc,Cargo,Std INSTALLDIR=C:\Rust
   - rustc -V
   - cargo -V
 
 build: false
 
 test_script:
   - cd webrender_api
   - cargo test --verbose
--- a/gfx/wr/webrender/Cargo.toml
+++ b/gfx/wr/webrender/Cargo.toml
@@ -63,16 +63,17 @@ optional = true
 
 [dependencies.pathfinder_path_utils]
 git = "https://github.com/pcwalton/pathfinder"
 branch = "webrender"
 optional = true
 
 [dev-dependencies]
 mozangle = "0.1"
+rand = "0.4"
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.4", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.6.2"
 
 [target.'cfg(target_os = "macos")'.dependencies]
--- a/gfx/wr/webrender/src/batch.rs
+++ b/gfx/wr/webrender/src/batch.rs
@@ -146,17 +146,18 @@ impl AlphaBatchList {
     pub fn set_params_and_get_batch(
         &mut self,
         key: BatchKey,
         bounding_rect: &PictureRect,
         z_id: ZBufferId,
     ) -> &mut Vec<PrimitiveInstanceData> {
         if z_id != self.current_z_id ||
            self.current_batch_index == usize::MAX ||
-           !self.batches[self.current_batch_index].key.is_compatible_with(&key) {
+           !self.batches[self.current_batch_index].key.is_compatible_with(&key)
+        {
             let mut selected_batch_index = None;
 
             match key.blend_mode {
                 BlendMode::SubpixelWithBgColor => {
                     'outer_multipass: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
                         // Some subpixel batches are drawn in two passes. Because of this, we need
                         // to check for overlaps with every batch (which is a bit different
                         // than the normal batching below).
--- a/gfx/wr/webrender/src/display_list_flattener.rs
+++ b/gfx/wr/webrender/src/display_list_flattener.rs
@@ -25,18 +25,18 @@ use picture::{Picture3DContext, PictureC
 use prim_store::{PrimitiveInstance, PrimitiveDataInterner, PrimitiveKeyKind, RadialGradientParams};
 use prim_store::{PrimitiveKey, PrimitiveSceneData, PrimitiveInstanceKind, GradientStopKey, NinePatchDescriptor};
 use prim_store::{PrimitiveDataHandle, PrimitiveStore, PrimitiveStoreStats, LineDecorationCacheKey};
 use prim_store::{ScrollNodeAndClipChain, PictureIndex, register_prim_chase_id, get_line_decoration_sizes};
 use render_backend::{DocumentView};
 use resource_cache::{FontInstanceMap, ImageRequest};
 use scene::{Scene, ScenePipeline, StackingContextHelpers};
 use scene_builder::DocumentResources;
-use spatial_node::{StickyFrameInfo, ScrollFrameKind};
-use std::{f32, mem};
+use spatial_node::{StickyFrameInfo, ScrollFrameKind, SpatialNodeType};
+use std::{f32, mem, usize};
 use std::collections::vec_deque::VecDeque;
 use tiling::{CompositeOps};
 use util::{MaxRect, VecHelper};
 
 #[derive(Debug, Copy, Clone)]
 struct ClipNode {
     id: ClipChainId,
     count: usize,
@@ -149,16 +149,27 @@ pub struct DisplayListFlattener<'a> {
 
     /// Reference to the document resources, which contains
     /// shared (interned) data between display lists.
     resources: &'a mut DocumentResources,
 
     /// The root picture index for this flattener. This is the picture
     /// to start the culling phase from.
     pub root_pic_index: PictureIndex,
+
+    /// TODO(gw): This is a complete hack that relies on knowledge of
+    ///           what the Gecko display list looks like. It's used
+    ///           for now to work out which scroll root to use to
+    ///           create the picture cache for the content. It's only
+    ///           ever used if picture caching is enabled in the
+    ///           RendererOptions struct. We will need to work out
+    ///           a better API to avoid this, before we enable it
+    ///           for all users. Another alternative is that this
+    ///           will disappear itself when document splitting is used.
+    picture_cache_scroll_root: Option<SpatialNodeIndex>,
 }
 
 impl<'a> DisplayListFlattener<'a> {
     pub fn create_frame_builder(
         scene: &Scene,
         clip_scroll_tree: &mut ClipScrollTree,
         font_instances: FontInstanceMap,
         view: &DocumentView,
@@ -186,43 +197,208 @@ impl<'a> DisplayListFlattener<'a> {
             hit_testing_runs: Vec::new(),
             pending_shadow_items: VecDeque::new(),
             sc_stack: Vec::new(),
             pipeline_clip_chain_stack: vec![ClipChainId::NONE],
             prim_store: PrimitiveStore::new(&prim_store_stats),
             clip_store: ClipStore::new(),
             resources,
             root_pic_index: PictureIndex(0),
+            picture_cache_scroll_root: None,
         };
 
         flattener.push_root(
             root_pipeline_id,
             &root_pipeline.viewport_size,
             &root_pipeline.content_size,
         );
 
         flattener.flatten_root(
             root_pipeline,
             &root_pipeline.viewport_size,
         );
 
         debug_assert!(flattener.sc_stack.is_empty());
 
+        // If picture caching is enabled, splice up the root
+        // stacking context to enable correct surface caching.
+        flattener.setup_picture_caching(
+            root_pipeline_id,
+        );
+
         new_scene.root_pipeline_id = Some(root_pipeline_id);
         new_scene.pipeline_epochs = scene.pipeline_epochs.clone();
         new_scene.pipelines = scene.pipelines.clone();
 
         FrameBuilder::with_display_list_flattener(
             view.inner_rect,
             background_color,
             view.window_size,
             flattener,
         )
     }
 
+    /// Cut the primitives in the root stacking context based on the picture
+    /// caching scroll root. This is a temporary solution for the initial
+    /// implementation of picture caching. We need to work out the specifics
+    /// of how WR should decide (or Gecko should communicate) where the main
+    /// content frame is that should get surface caching.
+    fn setup_picture_caching(
+        &mut self,
+        root_pipeline_id: PipelineId,
+    ) {
+        if !self.config.enable_picture_caching {
+            return;
+        }
+
+        // This method is basically a hack to set up picture caching in a minimal
+        // way without having to check the public API (yet). The intent is to
+        // work out a good API for this and switch to using it. In the mean
+        // time, this allows basic picture caching to be enabled and used for
+        // ironing out remaining bugs, fixing performance issues and profiling.
+
+        //
+        // We know that the display list will contain something like the following:
+        //  [Some number of primitives attached to root scroll now]
+        //  [IFrame for the content]
+        //  [A scroll root for the content (what we're interested in)]
+        //  [Primitives attached to the scroll root, possibly with sub-scroll roots]
+        //  [Some number of trailing primitives attached to root scroll frame]
+        //
+        // So we want to slice that stacking context up into:
+        //  [root primitives]
+        //  [tile cache picture]
+        //     [primitives attached to cached scroll root]
+        //  [trailing root primitives]
+        //
+        // This step is typically very quick, because there are only
+        // a small number of items in the root stacking context, since
+        // most of the content is embedded in its own picture.
+        //
+
+        // See if we found a scroll root for the cached surface root.
+        if let Some(picture_cache_scroll_root) = self.picture_cache_scroll_root {
+            // Get the list of existing primitives in the main stacking context.
+            let mut old_prim_list = mem::replace(
+                &mut self.prim_store.pictures[self.root_pic_index.0].prim_list,
+                PrimitiveList::empty(),
+            );
+
+            // Find the first primitive which has the desired scroll root.
+            let first_index = old_prim_list.prim_instances.iter().position(|instance| {
+                let scroll_root = self.find_scroll_root(
+                    instance.spatial_node_index,
+                );
+
+                scroll_root == picture_cache_scroll_root
+            }).unwrap_or(old_prim_list.prim_instances.len());
+
+            // Split off the preceding primtives.
+            let mut remaining_prims = old_prim_list.prim_instances.split_off(first_index);
+
+            // Find the first primitive in reverse order that is not the root scroll node.
+            let last_index = remaining_prims.iter().rposition(|instance| {
+                let scroll_root = self.find_scroll_root(
+                    instance.spatial_node_index,
+                );
+
+                scroll_root != ROOT_SPATIAL_NODE_INDEX
+            }).unwrap_or(remaining_prims.len() - 1);
+
+            let preceding_prims = old_prim_list.prim_instances;
+            let trailing_prims = remaining_prims.split_off(last_index + 1);
+
+            let prim_list = PrimitiveList::new(
+                remaining_prims,
+                &self.resources.prim_interner,
+            );
+
+            // Now, create a picture with tile caching enabled that will hold all
+            // of the primitives selected as belonging to the main scroll root.
+            let prim_key = PrimitiveKey::new(
+                true,
+                LayoutRect::zero(),
+                LayoutRect::max_rect(),
+                PrimitiveKeyKind::Unused,
+            );
+
+            let primitive_data_handle = self.resources
+                .prim_interner
+                .intern(&prim_key, || {
+                    PrimitiveSceneData {
+                        culling_rect: LayoutRect::zero(),
+                        is_backface_visible: true,
+                    }
+                }
+            );
+
+            let pic_index = self.prim_store.pictures.alloc().init(PicturePrimitive::new_image(
+                Some(PictureCompositeMode::TileCache { clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0) }),
+                Picture3DContext::Out,
+                root_pipeline_id,
+                None,
+                true,
+                RasterSpace::Screen,
+                prim_list,
+                picture_cache_scroll_root,
+                LayoutRect::max_rect(),
+                &self.clip_store,
+            ));
+
+            let instance = PrimitiveInstance::new(
+                PrimitiveInstanceKind::Picture { pic_index: PictureIndex(pic_index) },
+                primitive_data_handle,
+                ClipChainId::NONE,
+                picture_cache_scroll_root,
+            );
+
+            // This contains the tile caching picture, with preceding and
+            // trailing primitives outside the main scroll root.
+            let mut new_prim_list = preceding_prims;
+            new_prim_list.push(instance);
+            new_prim_list.extend(trailing_prims);
+
+            // Finally, store the sliced primitive list in the root picture.
+            self.prim_store.pictures[self.root_pic_index.0].prim_list = PrimitiveList::new(
+                new_prim_list,
+                &self.resources.prim_interner,
+            );
+        }
+    }
+
+    /// Find the spatial node that is the scroll root for a given
+    /// spatial node.
+    fn find_scroll_root(
+        &self,
+        spatial_node_index: SpatialNodeIndex,
+    ) -> SpatialNodeIndex {
+        let mut scroll_root = ROOT_SPATIAL_NODE_INDEX;
+        let mut node_index = spatial_node_index;
+
+        while node_index != ROOT_SPATIAL_NODE_INDEX {
+            let node = &self.clip_scroll_tree.spatial_nodes[node_index.0];
+            match node.node_type {
+                SpatialNodeType::ReferenceFrame(..) |
+                SpatialNodeType::StickyFrame(..) => {
+                    // TODO(gw): In future, we may need to consider sticky frames.
+                }
+                SpatialNodeType::ScrollFrame(ref info) => {
+                    // If we found an explicit scroll root, store that
+                    // and keep looking up the tree.
+                    if let ScrollFrameKind::Explicit = info.frame_kind {
+                        scroll_root = node_index;
+                    }
+                }
+            }
+            node_index = node.parent.expect("unable to find parent node");
+        }
+
+        scroll_root
+    }
+
     fn get_complex_clips(
         &self,
         pipeline_id: PipelineId,
         complex_clips: ItemRange<ComplexClipRegion>,
     ) -> impl 'a + Iterator<Item = ComplexClipRegion> {
         //Note: we could make this a bit more complex to early out
         // on `complex_clips.is_empty()` if it's worth it
         self.scene
@@ -369,26 +545,34 @@ impl<'a> DisplayListFlattener<'a> {
         // positioning offsets.
         let frame_rect = item.clip_rect().translate(reference_frame_relative_offset);
         let content_rect = item.rect().translate(reference_frame_relative_offset);
 
         debug_assert!(info.clip_id != info.scroll_frame_id);
 
         self.add_clip_node(info.clip_id, clip_and_scroll_ids, clip_region);
 
-        self.add_scroll_frame(
+        let node_index = self.add_scroll_frame(
             info.scroll_frame_id,
             info.clip_id,
             info.external_id,
             pipeline_id,
             &frame_rect,
             &content_rect.size,
             info.scroll_sensitivity,
             ScrollFrameKind::Explicit,
         );
+
+        // TODO(gw): See description of picture_cache_scroll_root field for information
+        //           about this temporary hack. What it's trying to identify is the first
+        //           scroll root within the first iframe that we encounter in the display
+        //           list.
+        if self.picture_cache_scroll_root.is_none() && pipeline_id != self.scene.root_pipeline_id.unwrap() {
+            self.picture_cache_scroll_root = Some(node_index);
+        }
     }
 
     fn flatten_reference_frame(
         &mut self,
         traversal: &mut BuiltDisplayListIter<'a>,
         pipeline_id: PipelineId,
         item: &DisplayItemRef,
         reference_frame: &ReferenceFrame,
--- a/gfx/wr/webrender/src/frame_builder.rs
+++ b/gfx/wr/webrender/src/frame_builder.rs
@@ -48,16 +48,17 @@ impl Default for ChasePrimitive {
 #[derive(Clone, Copy)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct FrameBuilderConfig {
     pub default_font_render_mode: FontRenderMode,
     pub dual_source_blending_is_supported: bool,
     pub dual_source_blending_is_enabled: bool,
     pub chase_primitive: ChasePrimitive,
+    pub enable_picture_caching: bool,
 }
 
 /// A builder structure for `tiling::Frame`
 pub struct FrameBuilder {
     screen_rect: DeviceIntRect,
     background_color: Option<ColorF>,
     window_size: DeviceIntSize,
     root_pic_index: PictureIndex,
@@ -149,16 +150,17 @@ impl FrameBuilder {
             background_color: None,
             root_pic_index: PictureIndex(0),
             pending_retained_tiles: FastHashMap::default(),
             config: FrameBuilderConfig {
                 default_font_render_mode: FontRenderMode::Mono,
                 dual_source_blending_is_enabled: true,
                 dual_source_blending_is_supported: false,
                 chase_primitive: ChasePrimitive::Nothing,
+                enable_picture_caching: false,
             },
         }
     }
 
     /// Provide any cached surface tiles from the previous frame builder
     /// to a new frame builder. These will be consumed or dropped the
     /// first time a new frame builder creates a frame.
     pub fn set_retained_tiles(
--- a/gfx/wr/webrender/src/intern.rs
+++ b/gfx/wr/webrender/src/intern.rs
@@ -1,19 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use internal_types::FastHashMap;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::marker::PhantomData;
-use std::mem;
-use std::ops;
-use std::u64;
+use std::{mem, ops, u64};
+use util::VecHelper;
 
 /*
 
  The interning module provides a generic data structure
  interning container. It is similar in concept to a
  traditional string interning container, but it is
  specialized to the WR thread model.
 
@@ -55,17 +54,19 @@ impl Epoch {
 }
 
 /// A list of updates to be applied to the data store,
 /// provided by the interning structure.
 pub struct UpdateList<S> {
     /// The current epoch of the scene builder.
     epoch: Epoch,
     /// The additions and removals to apply.
-    updates: Vec<Update<S>>,
+    updates: Vec<Update>,
+    /// Actual new data to insert.
+    data: Vec<S>,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
 pub struct ItemUid<T> {
     uid: usize,
     _marker: PhantomData<T>,
@@ -84,27 +85,27 @@ pub struct Handle<T> {
 impl <T> Handle<T> where T: Copy {
     pub fn uid(&self) -> ItemUid<T> {
         self.uid
     }
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum UpdateKind<S> {
-    Insert(S),
+pub enum UpdateKind {
+    Insert,
     Remove,
     UpdateEpoch,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct Update<S> {
+pub struct Update {
     index: usize,
-    kind: UpdateKind<S>,
+    kind: UpdateKind,
 }
 
 /// The data item is stored with an epoch, for validating
 /// correct access patterns.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct Item<T> {
     epoch: Epoch,
@@ -132,37 +133,34 @@ impl<S, T, M> DataStore<S, T, M> where S
     }
 
     /// Apply any updates from the scene builder thread to
     /// this data store.
     pub fn apply_updates(
         &mut self,
         update_list: UpdateList<S>,
     ) {
+        let mut data_iter = update_list.data.into_iter();
         for update in update_list.updates {
             match update.kind {
-                UpdateKind::Insert(data) => {
-                    let item = Item {
-                        data: T::from(data),
+                UpdateKind::Insert => {
+                    self.items.entry(update.index).set(Item {
+                        data: T::from(data_iter.next().unwrap()),
                         epoch: update_list.epoch,
-                    };
-                    if self.items.len() == update.index {
-                        self.items.push(item)
-                    } else {
-                        self.items[update.index] = item;
-                    }
+                    });
                 }
                 UpdateKind::Remove => {
                     self.items[update.index].epoch = Epoch::INVALID;
                 }
                 UpdateKind::UpdateEpoch => {
                     self.items[update.index].epoch = update_list.epoch;
                 }
             }
         }
+        debug_assert!(data_iter.next().is_none());
     }
 }
 
 /// Retrieve an item from the store via handle
 impl<S, T, M> ops::Index<Handle<M>> for DataStore<S, T, M> {
     type Output = T;
     fn index(&self, handle: Handle<M>) -> &T {
         let item = &self.items[handle.index as usize];
@@ -189,33 +187,36 @@ impl<S, T, M> ops::IndexMut<Handle<M>> f
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct Interner<S : Eq + Hash + Clone + Debug, D, M> {
     /// Uniquely map an interning key to a handle
     map: FastHashMap<S, Handle<M>>,
     /// List of free slots in the data store for re-use.
     free_list: Vec<usize>,
     /// Pending list of updates that need to be applied.
-    updates: Vec<Update<S>>,
+    updates: Vec<Update>,
+    /// Pending new data to insert.
+    update_data: Vec<S>,
     /// The current epoch for the interner.
     current_epoch: Epoch,
     /// Incrementing counter for identifying stable values.
     next_uid: usize,
     /// The information associated with each interned
     /// item that can be accessed by the interner.
     local_data: Vec<Item<D>>,
 }
 
 impl<S, D, M> Interner<S, D, M> where S: Eq + Hash + Clone + Debug, M: Copy + Debug {
     /// Construct a new interner
     pub fn new() -> Self {
         Interner {
             map: FastHashMap::default(),
             free_list: Vec::new(),
             updates: Vec::new(),
+            update_data: Vec::new(),
             current_epoch: Epoch(1),
             next_uid: 0,
             local_data: Vec::new(),
         }
     }
 
     /// Intern a data structure, and return a handle to
     /// that data. The handle can then be stored in the
@@ -254,18 +255,19 @@ impl<S, D, M> Interner<S, D, M> where S:
         let index = match self.free_list.pop() {
             Some(index) => index,
             None => self.local_data.len(),
         };
 
         // Add a pending update to insert the new data.
         self.updates.push(Update {
             index,
-            kind: UpdateKind::Insert(data.clone()),
+            kind: UpdateKind::Insert,
         });
+        self.update_data.alloc().init(data.clone());
 
         // Generate a handle for access via the data store.
         let handle = Handle {
             index: index as u32,
             epoch: self.current_epoch,
             uid: ItemUid {
                 uid: self.next_uid,
                 _marker: PhantomData,
@@ -275,34 +277,31 @@ impl<S, D, M> Interner<S, D, M> where S:
 
         // Store this handle so the next time it is
         // interned, it gets re-used.
         self.map.insert(data.clone(), handle);
         self.next_uid += 1;
 
         // Create the local data for this item that is
         // being interned.
-        let local_item = Item {
+        self.local_data.entry(index).set(Item {
             epoch: self.current_epoch,
             data: f(),
-        };
-        if self.local_data.len() == index {
-            self.local_data.push(local_item);
-        } else {
-            self.local_data[index] = local_item;
-        }
+        });
 
         handle
     }
 
     /// Retrieve the pending list of updates for an interner
     /// that need to be applied to the data store. Also run
     /// a GC step that removes old entries.
     pub fn end_frame_and_get_pending_updates(&mut self) -> UpdateList<S> {
         let mut updates = mem::replace(&mut self.updates, Vec::new());
+        let data = mem::replace(&mut self.update_data, Vec::new());
+
         let free_list = &mut self.free_list;
         let current_epoch = self.current_epoch.0;
 
         // First, run a GC step. Walk through the handles, and
         // if we find any that haven't been used for some time,
         // remove them. If this ever shows up in profiles, we
         // can make the GC step partial (scan only part of the
         // map each frame). It also might make sense in the
@@ -322,16 +321,17 @@ impl<S, D, M> Interner<S, D, M> where S:
                 return false;
             }
 
             true
         });
 
         let updates = UpdateList {
             updates,
+            data,
             epoch: self.current_epoch,
         };
 
         // Begin the next epoch
         self.current_epoch = Epoch(self.current_epoch.0 + 1);
 
         updates
     }
--- a/gfx/wr/webrender/src/lib.rs
+++ b/gfx/wr/webrender/src/lib.rs
@@ -185,16 +185,18 @@ extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 #[cfg(feature = "debugger")]
 extern crate image as image_loader;
 #[cfg(feature = "debugger")]
 extern crate base64;
 #[cfg(all(feature = "capture", feature = "png"))]
 extern crate png;
+#[cfg(test)]
+extern crate rand;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
 pub use device::{build_shader_strings, ReadPixelsFormat, UploadMethod, VertexUsageHint};
 pub use device::{ProgramBinary, ProgramCache, ProgramCacheObserver};
 pub use device::Device;
 pub use frame_builder::ChasePrimitive;
--- a/gfx/wr/webrender/src/picture.rs
+++ b/gfx/wr/webrender/src/picture.rs
@@ -72,17 +72,18 @@ pub struct TileTransformInfo {
     /// Tiles check this to see if the dependencies have changed.
     changed: bool,
 }
 
 #[derive(Debug)]
 pub struct GlobalTransformInfo {
     /// Current (quantized) value of the transform, that is
     /// independent of the value of the spatial node index.
-    key: TransformKey,
+    /// Only calculated on first use.
+    current: Option<TransformKey>,
     /// Tiles check this to see if the dependencies have changed.
     changed: bool,
 }
 
 /// Information about the state of an opacity binding.
 #[derive(Debug)]
 pub struct OpacityBindingInfo {
     /// The current value retrieved from dynamic scene properties.
@@ -126,39 +127,55 @@ pub struct Tile {
     transform_info: Vec<TileTransformInfo>,
     /// Uniquely describes the content of this tile, in a way that can be
     /// (reasonably) efficiently hashed and compared.
     descriptor: TileDescriptor,
 }
 
 impl Tile {
     /// Construct a new, invalid tile.
-    fn new(tile_offset: TileOffset) -> Self {
+    fn new(
+        tile_offset: TileOffset,
+        local_tile_size: SizeKey,
+        raster_transform: TransformKey,
+    ) -> Self {
         Tile {
             opacity_bindings: FastHashSet::default(),
             image_keys: FastHashSet::default(),
             is_valid: false,
             is_visible: false,
             is_cacheable: true,
             in_use: false,
             handle: TextureCacheHandle::invalid(),
-            descriptor: TileDescriptor::new(tile_offset),
+            descriptor: TileDescriptor::new(
+                tile_offset,
+                local_tile_size,
+                raster_transform,
+            ),
             tile_transform_map: FastHashMap::default(),
             transform_info: Vec::new(),
         }
     }
 
     /// Add a (possibly) new transform dependency to this tile.
     fn push_transform_dependency(
         &mut self,
         spatial_node_index: SpatialNodeIndex,
         surface_spatial_node_index: SpatialNodeIndex,
         clip_scroll_tree: &ClipScrollTree,
-        global_transforms: &[GlobalTransformInfo],
+        global_transforms: &mut [GlobalTransformInfo],
     ) {
+        // If the primitive is positioned by the same spatial
+        // node as the surface, we don't care about it since
+        // the primitive can never move to a different position
+        // relative to the surface.
+        if spatial_node_index == surface_spatial_node_index {
+            return;
+        }
+
         let transform_info = &mut self.transform_info;
         let descriptor = &mut self.descriptor;
 
         // Get the mapping from unstable spatial node index to
         // a local transform index within this tile.
         let tile_transform_index = self
             .tile_transform_map
             .entry(spatial_node_index)
@@ -166,18 +183,27 @@ impl Tile {
                 let index = transform_info.len();
 
                 let mapping: CoordinateSpaceMapping<LayoutPixel, PicturePixel> = CoordinateSpaceMapping::new(
                     surface_spatial_node_index,
                     spatial_node_index,
                     clip_scroll_tree,
                 ).expect("todo: handle invalid mappings");
 
+                // See if the transform changed, and cache the current
+                // transform if not set before.
+                let changed = get_global_transform_changed(
+                    global_transforms,
+                    spatial_node_index,
+                    clip_scroll_tree,
+                    surface_spatial_node_index,
+                );
+
                 transform_info.push(TileTransformInfo {
-                    changed: global_transforms[spatial_node_index.0].changed,
+                    changed,
                     spatial_node_index,
                 });
 
                 let key = mapping.into();
 
                 descriptor.transforms.push(key);
 
                 TileTransformIndex(index as u32)
@@ -234,26 +260,30 @@ pub struct TileDescriptor {
 
     /// Identifies the raster configuration of the rasterization
     /// root, to ensure tiles are invalidated if they are drawn in
     /// screen-space with an incompatible transform.
     pub raster_transform: TransformKey,
 }
 
 impl TileDescriptor {
-    fn new(tile_offset: TileOffset) -> Self {
+    fn new(
+        tile_offset: TileOffset,
+        local_tile_size: SizeKey,
+        raster_transform: TransformKey,
+    ) -> Self {
         TileDescriptor {
             prim_uids: Vec::new(),
             clip_uids: Vec::new(),
             transform_ids: Vec::new(),
             opacity_bindings: Vec::new(),
             transforms: Vec::new(),
             tile_offset,
-            raster_transform: TransformKey::Local,
-            local_tile_size: SizeKey::zero(),
+            raster_transform,
+            local_tile_size,
         }
     }
 
     /// Clear the dependency information for a tile, when the dependencies
     /// are being rebuilt.
     fn clear(&mut self) {
         self.prim_uids.clear();
         self.clip_uids.clear();
@@ -295,16 +325,20 @@ pub struct TileCache {
     /// If true, we need to update the prim dependencies, due
     /// to relative transforms changing. The dependencies are
     /// stored in each tile, and are a list of things that
     /// force the tile to re-rasterize if they change (e.g.
     /// images, transforms).
     pub needs_update: bool,
     /// If Some(..) the region that is dirty in this picture.
     pub dirty_region: Option<DirtyRegion>,
+    /// The current transform of the surface itself, to allow
+    /// invalidating tiles if the surface transform changes.
+    /// This is only relevant when raster_space == RasterSpace::Screen.
+    raster_transform: TransformKey,
 }
 
 impl TileCache {
     /// Construct a new tile cache.
     pub fn new() -> Self {
         TileCache {
             tiles: Vec::new(),
             old_tiles: FastHashMap::default(),
@@ -313,16 +347,17 @@ impl TileCache {
             transforms: Vec::new(),
             opacity_bindings: FastHashMap::default(),
             needs_update: true,
             dirty_region: None,
             space_mapper: SpaceMapper::new(
                 ROOT_SPATIAL_NODE_INDEX,
                 PictureRect::zero(),
             ),
+            raster_transform: TransformKey::Local,
         }
     }
 
     /// Update the transforms array for this tile cache from the clip-scroll
     /// tree. This marks each transform as changed for later use during
     /// tile invalidation.
     pub fn update_transforms(
         &mut self,
@@ -364,39 +399,38 @@ impl TileCache {
         self.local_tile_size = local_tile_rect.size;
 
         // Walk the transforms and see if we need to rebuild the primitive
         // dependencies for each tile.
         // TODO(gw): We could be smarter here and only rebuild for the primitives
         //           which are affected by transforms that have changed.
         if self.transforms.len() == frame_context.clip_scroll_tree.spatial_nodes.len() {
             for (i, transform) in self.transforms.iter_mut().enumerate() {
-                let mapping: CoordinateSpaceMapping<LayoutPixel, PicturePixel> = CoordinateSpaceMapping::new(
-                    surface_spatial_node_index,
-                    SpatialNodeIndex(i),
-                    frame_context.clip_scroll_tree,
-                ).expect("todo: handle invalid mappings");
+                // If this relative transform was used on the previous frame,
+                // update it and store whether it changed for use during
+                // tile invalidation later.
+                if let Some(ref mut current) = transform.current {
+                    let mapping: CoordinateSpaceMapping<LayoutPixel, PicturePixel> = CoordinateSpaceMapping::new(
+                        surface_spatial_node_index,
+                        SpatialNodeIndex(i),
+                        frame_context.clip_scroll_tree,
+                    ).expect("todo: handle invalid mappings");
 
-                let key = mapping.into();
-                transform.changed = transform.key != key;
-                transform.key = key;
+                    let key = mapping.into();
+                    transform.changed = key != *current;
+                    *current = key;
+                }
             }
         } else {
             // If the size of the transforms array changed, just invalidate all the transforms for now.
             self.transforms.clear();
 
-            for i in 0 .. frame_context.clip_scroll_tree.spatial_nodes.len() {
-                let mapping: CoordinateSpaceMapping<LayoutPixel, PicturePixel> = CoordinateSpaceMapping::new(
-                    surface_spatial_node_index,
-                    SpatialNodeIndex(i),
-                    frame_context.clip_scroll_tree,
-                ).expect("todo: handle invalid mappings");
-
+            for _ in 0 .. frame_context.clip_scroll_tree.spatial_nodes.len() {
                 self.transforms.push(GlobalTransformInfo {
-                    key: mapping.into(),
+                    current: None,
                     changed: true,
                 });
             }
         };
 
         // Do a hacky diff of opacity binding values from the last frame. This is
         // used later on during tile invalidation tests.
         let current_properties = frame_context.scene_properties.float_properties();
@@ -408,17 +442,17 @@ impl TileCache {
             };
             self.opacity_bindings.insert(*id, OpacityBindingInfo {
                 value: *value,
                 changed,
             });
         }
 
         // Update the state of the transform for compositing this picture.
-        let raster_transform = match raster_space {
+        self.raster_transform = match raster_space {
             RasterSpace::Screen => {
                 // In general cases, if we're rasterizing a picture in screen space, then the
                 // value of the surface spatial node will affect the contents of the picture
                 // itself. However, if the surface and raster spatial nodes are in the same
                 // coordinate system (which is the common case!) then we are effectively drawing
                 // in a local space anyway, so don't care about that transform for the purposes
                 // of validating the surface cache contents.
 
@@ -441,17 +475,17 @@ impl TileCache {
         };
 
         // Walk the transforms and see if we need to rebuild the primitive
         // dependencies for each tile.
         // TODO(gw): We could be smarter here and only rebuild for the primitives
         //           which are affected by transforms that have changed.
         for tile in &mut self.tiles {
             tile.descriptor.local_tile_size = self.local_tile_size.into();
-            tile.descriptor.raster_transform = raster_transform.clone();
+            tile.descriptor.raster_transform = self.raster_transform.clone();
 
             debug_assert_eq!(tile.transform_info.len(), tile.descriptor.transforms.len());
             for (info, transform) in tile.transform_info.iter_mut().zip(tile.descriptor.transforms.iter_mut()) {
                 let mapping: CoordinateSpaceMapping<LayoutPixel, PicturePixel> = CoordinateSpaceMapping::new(
                     surface_spatial_node_index,
                     info.spatial_node_index,
                     frame_context.clip_scroll_tree,
                 ).expect("todo: handle invalid mappings");
@@ -529,20 +563,31 @@ impl TileCache {
                 // the tile address. This saves invalidating existing tiles when we
                 // just resize the picture by adding / remove primitives.
                 let tx = x0 - self.tile_rect.origin.x + x;
                 let ty = y0 - self.tile_rect.origin.y + y;
                 let tile_offset = TileOffset::new(x + x0, y + y0);
 
                 let tile = if tx >= 0 && ty >= 0 && tx < self.tile_rect.size.width && ty < self.tile_rect.size.height {
                     let index = (ty * self.tile_rect.size.width + tx) as usize;
-                    mem::replace(&mut self.tiles[index], Tile::new(tile_offset))
+                    mem::replace(
+                        &mut self.tiles[index],
+                        Tile::new(
+                            tile_offset,
+                            self.local_tile_size.into(),
+                            self.raster_transform.clone(),
+                        )
+                    )
                 } else {
                     self.old_tiles.remove(&tile_offset).unwrap_or_else(|| {
-                        Tile::new(tile_offset)
+                        Tile::new(
+                            tile_offset,
+                            self.local_tile_size.into(),
+                            self.raster_transform.clone(),
+                        )
                     })
                 };
                 new_tiles.push(tile);
             }
         }
 
         self.tiles = new_tiles;
         self.tile_rect.origin = TileOffset::new(x0, y0);
@@ -565,17 +610,25 @@ impl TileCache {
         self.space_mapper.set_target_spatial_node(
             prim_instance.spatial_node_index,
             clip_scroll_tree,
         );
 
         let prim_data = &prim_data_store[prim_instance.prim_data_handle];
 
         // Map the primitive local rect into the picture space.
-        let rect = match self.space_mapper.map(&prim_data.prim_rect) {
+        // TODO(gw): We should maybe store this in the primitive template
+        //           during interning so that we never have to calculate
+        //           it during frame building.
+        let culling_rect = match prim_data.prim_rect.intersection(&prim_data.clip_rect) {
+            Some(rect) => rect,
+            None => return,
+        };
+
+        let rect = match self.space_mapper.map(&culling_rect) {
             Some(rect) => rect,
             None => {
                 return;
             }
         };
 
         // If the rect is invalid, no need to create dependencies.
         // TODO(gw): Need to handle pictures with filters here.
@@ -698,26 +751,26 @@ impl TileCache {
                     tile.image_keys.insert(*image_key);
                 }
 
                 // Include the transform of the primitive itself.
                 tile.push_transform_dependency(
                     prim_instance.spatial_node_index,
                     surface_spatial_node_index,
                     clip_scroll_tree,
-                    &self.transforms,
+                    &mut self.transforms,
                 );
 
                 // Include the transforms of any relevant clip nodes for this primitive.
                 for clip_chain_spatial_node in &clip_chain_spatial_nodes {
                     tile.push_transform_dependency(
                         *clip_chain_spatial_node,
                         surface_spatial_node_index,
                         clip_scroll_tree,
-                        &self.transforms,
+                        &mut self.transforms,
                     );
                 }
 
                 // Include any opacity bindings this primitive depends on.
                 for id in &opacity_bindings {
                     if tile.opacity_bindings.insert(*id) {
                         tile.descriptor.opacity_bindings.push(*id);
                     }
@@ -738,19 +791,17 @@ impl TileCache {
         frame_context: &FrameBuildingContext,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         retained_tiles: &mut FastHashMap<TileDescriptor, TextureCacheHandle>,
     ) {
         self.needs_update = false;
 
         for (_, tile) in self.old_tiles.drain() {
-            if resource_cache.texture_cache.is_allocated(&tile.handle) {
-                resource_cache.texture_cache.mark_unused(&tile.handle);
-            }
+            resource_cache.texture_cache.mark_unused(&tile.handle);
         }
 
         let world_mapper = SpaceMapper::new_with_target(
             ROOT_SPATIAL_NODE_INDEX,
             surface_spatial_node_index,
             frame_context.screen_world_rect,
             frame_context.clip_scroll_tree,
         );
@@ -763,96 +814,98 @@ impl TileCache {
         let mut dirty_rect = PictureRect::zero();
 
         // Step through each tile and invalidate if the dependencies have changed.
         for y in 0 .. self.tile_rect.size.height {
             for x in 0 .. self.tile_rect.size.width {
                 let i = y * self.tile_rect.size.width + x;
                 let tile = &mut self.tiles[i as usize];
 
+                // If this tile is unused (has no primitives on it), we can just
+                // skip any invalidation / dirty region work for it.
+                if !tile.in_use {
+                    continue;
+                }
+
+                let tile_rect = PictureRect::new(
+                    PicturePoint::new(
+                        (self.tile_rect.origin.x + x) as f32 * self.local_tile_size.width,
+                        (self.tile_rect.origin.y + y) as f32 * self.local_tile_size.height,
+                    ),
+                    self.local_tile_size,
+                );
+
+                // Check if this tile is actually visible.
+                let tile_world_rect = world_mapper
+                    .map(&tile_rect)
+                    .expect("bug: unable to map tile to world coords");
+                tile.is_visible = frame_context.screen_world_rect.intersects(&tile_world_rect);
+
                 // Try to reuse cached tiles from the previous scene in this new
                 // scene, if possible.
-                if !resource_cache.texture_cache.is_allocated(&tile.handle) {
+                if tile.is_visible && !resource_cache.texture_cache.is_allocated(&tile.handle) {
                     // See if we have a retained tile from last scene that matches the
                     // exact content of this tile.
-                    if let Some(handle) = retained_tiles.remove(&tile.descriptor) {
+                    if let Some(retained_handle) = retained_tiles.remove(&tile.descriptor) {
                         // Only use if not evicted from texture cache in the meantime.
-                        if resource_cache.texture_cache.is_allocated(&handle) {
+                        if resource_cache.texture_cache.is_allocated(&retained_handle) {
                             // We found a matching tile from the previous scene, so use it!
-                            tile.handle = handle;
+                            tile.handle = retained_handle;
                             tile.is_valid = true;
                             // We know that the hash key of the descriptor validates that
                             // the local transforms in this tile exactly match the value
                             // of the current relative transforms needed for this tile,
                             // so we can mark those transforms as valid to avoid the
                             // retained tile being invalidated below.
                             for info in &mut tile.transform_info {
                                 info.changed = false;
                             }
                         }
                     }
                 }
 
-                let tile_rect = PictureRect::new(
-                    PicturePoint::new(
-                        (self.tile_rect.origin.x + x) as f32 * self.local_tile_size.width,
-                        (self.tile_rect.origin.y + y) as f32 * self.local_tile_size.height,
-                    ),
-                    self.local_tile_size,
-                );
-
                 // Invalidate the tile if not cacheable
                 if !tile.is_cacheable {
                     tile.is_valid = false;
                 }
 
-                if !tile.in_use {
-                    tile.is_valid = false;
-                }
-
                 // Invalidate the tile if any images have changed
                 for image_key in &tile.image_keys {
                     if resource_cache.is_image_dirty(*image_key) {
                         tile.is_valid = false;
                         break;
                     }
                 }
 
-                // Invalidate the tile if any dependent transforms changed
-                for info in &tile.transform_info {
-                    if info.changed {
-                        tile.is_valid = false;
-                        break;
-                    }
-                }
-
                 // Invalidate the tile if any opacity bindings changed.
                 for id in &tile.opacity_bindings {
                     let changed = match self.opacity_bindings.get(id) {
                         Some(info) => info.changed,
                         None => true,
                     };
                     if changed {
                         tile.is_valid = false;
                         break;
                     }
                 }
 
+                // Invalidate the tile if any dependent transforms changed
+                for info in &tile.transform_info {
+                    if info.changed {
+                        tile.is_valid = false;
+                        break;
+                    }
+                }
+
                 // Invalidate the tile if it was evicted by the texture cache.
                 if !resource_cache.texture_cache.is_allocated(&tile.handle) {
                     tile.is_valid = false;
                 }
 
-                // Check if this tile is actually visible.
-                let tile_world_rect = world_mapper
-                    .map(&tile_rect)
-                    .expect("bug: unable to map tile to world coords");
-                tile.is_visible = frame_context.screen_world_rect.intersects(&tile_world_rect);
-
-                if tile.is_visible && tile.in_use {
+                if tile.is_visible {
                     // Ensure we request the texture cache handle for this tile
                     // each frame it will be used so the texture cache doesn't
                     // decide to evict tiles that we currently want to use.
                     resource_cache.texture_cache.request(&tile.handle, gpu_cache);
 
                     // If we have an invalid tile, which is also visible, add it to the
                     // dirty rect we will need to draw.
                     if !tile.is_valid {
@@ -2115,30 +2168,30 @@ impl PicturePrimitive {
                         // Step through each tile and build the dirty rect
                         for y in 0 .. tile_cache.tile_rect.size.height {
                             for x in 0 .. tile_cache.tile_rect.size.width {
                                 let i = y * tile_cache.tile_rect.size.width + x;
                                 let tile = &mut tile_cache.tiles[i as usize];
 
                                 // If tile is invalidated, and on-screen, then we will
                                 // need to rasterize it.
-                                if !tile.is_valid && tile.is_visible {
+                                if !tile.is_valid && tile.is_visible && tile.in_use {
                                     // Notify the texture cache that we want to use this handle
                                     // and make sure it is allocated.
                                     frame_state.resource_cache.texture_cache.update(
                                         &mut tile.handle,
                                         descriptor,
                                         TextureFilter::Linear,
                                         None,
                                         [0.0; 3],
                                         DirtyRect::All,
                                         frame_state.gpu_cache,
                                         None,
                                         UvRectKind::Rect,
-                                        Eviction::Auto,
+                                        Eviction::Eager,
                                     );
 
                                     let cache_item = frame_state
                                         .resource_cache
                                         .get_texture_cache_item(&tile.handle);
 
                                     // Set up the blit command now that we know where the dest
                                     // rect is in the texture cache.
@@ -2615,8 +2668,33 @@ fn create_raster_mappers(
         raster_spatial_node_index,
         surface_spatial_node_index,
         raster_bounds,
         clip_scroll_tree,
     );
 
     (map_raster_to_world, map_pic_to_raster)
 }
+
+// Check whether a relative transform between two spatial nodes has changed
+// since last frame. If that relative transform hasn't been calculated, then
+// do that now and store it for later use.
+fn get_global_transform_changed(
+    global_transforms: &mut [GlobalTransformInfo],
+    spatial_node_index: SpatialNodeIndex,
+    clip_scroll_tree: &ClipScrollTree,
+    surface_spatial_node_index: SpatialNodeIndex,
+) -> bool {
+    let transform = &mut global_transforms[spatial_node_index.0];
+
+    if transform.current.is_none() {
+        let mapping: CoordinateSpaceMapping<LayoutPixel, PicturePixel> = CoordinateSpaceMapping::new(
+            surface_spatial_node_index,
+            spatial_node_index,
+            clip_scroll_tree,
+        ).expect("todo: handle invalid mappings");
+
+        transform.current = Some(mapping.into());
+        transform.changed = true;
+    }
+
+    transform.changed
+}
--- a/gfx/wr/webrender/src/render_backend.rs
+++ b/gfx/wr/webrender/src/render_backend.rs
@@ -98,17 +98,17 @@ impl FrameId {
     }
 
     /// Returns the backing usize for this FrameId.
     pub fn as_usize(&self) -> usize {
         self.0
     }
 
     /// Advances this FrameId to the next frame.
-    fn advance(&mut self) {
+    pub fn advance(&mut self) {
         self.0 += 1;
     }
 
     /// An invalid sentinel FrameId, which will always compare less than
     /// any valid FrameId.
     pub const INVALID: FrameId = FrameId(0);
 }
 
@@ -1106,16 +1106,19 @@ impl RenderBackend {
             let (blob_rasterizer, blob_requests) = self.resource_cache
                 .create_blob_scene_builder_requests(&blobs_to_rasterize);
 
             txn.blob_requests = blob_requests;
             txn.blob_rasterizer = blob_rasterizer;
         }
 
         if !transaction_msg.use_scene_builder_thread && txn.can_skip_scene_builder() {
+            if let Some(rasterizer) = txn.blob_rasterizer.take() {
+                self.resource_cache.set_blob_rasterizer(rasterizer);
+            }
             self.update_document(
                 txn.document_id,
                 replace(&mut txn.resource_updates, Vec::new()),
                 None,
                 replace(&mut txn.frame_ops, Vec::new()),
                 replace(&mut txn.notifications, Vec::new()),
                 txn.render_frame,
                 txn.invalidate_rendered_frame,
--- a/gfx/wr/webrender/src/render_task.rs
+++ b/gfx/wr/webrender/src/render_task.rs
@@ -139,18 +139,22 @@ impl RenderTaskTree {
         }
 
         let pass_index = if task.is_global_cached_task() {
             0
         } else {
             pass_index
         };
 
-        let pass = &mut passes[pass_index];
-        pass.add_render_task(id, task.get_dynamic_size(), task.target_kind(), &task.location);
+        passes[pass_index].add_render_task(
+            id,
+            task.get_dynamic_size(),
+            task.target_kind(),
+            &task.location,
+        );
     }
 
     pub fn prepare_for_render(&mut self) {
         for task in &mut self.tasks {
             task.prepare_for_render();
         }
     }
 
--- a/gfx/wr/webrender/src/renderer.rs
+++ b/gfx/wr/webrender/src/renderer.rs
@@ -1821,16 +1821,17 @@ impl Renderer {
             (false, _) => FontRenderMode::Mono,
         };
 
         let config = FrameBuilderConfig {
             default_font_render_mode,
             dual_source_blending_is_enabled: true,
             dual_source_blending_is_supported: ext_dual_source_blending,
             chase_primitive: options.chase_primitive,
+            enable_picture_caching: options.enable_picture_caching,
         };
 
         let device_pixel_ratio = options.device_pixel_ratio;
         // First set the flags to default and later call set_debug_flags to ensure any
         // potential transition when enabling a flag is run.
         let debug_flags = DebugFlags::default();
         let payload_rx_for_backend = payload_rx.to_mpsc_receiver();
         let recorder = options.recorder;
--- a/gfx/wr/webrender/src/texture_allocator.rs
+++ b/gfx/wr/webrender/src/texture_allocator.rs
@@ -1,249 +1,264 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use std::slice::Iter;
 use util;
 
+//TODO: gather real-world statistics on the bin usage in order to assist the decision
+// on where to place the size thresholds.
+
+/// This is an optimization tweak to enable looking through all the free rectangles in a bin
+/// and choosing the smallest, as opposed to picking the first match.
+const FIND_SMALLEST_AREA: bool = false;
+
+const NUM_BINS: usize = 3;
 /// The minimum number of pixels on each side that we require for rects to be classified as
-/// "medium" within the free list.
-const MINIMUM_MEDIUM_RECT_SIZE: i32 = 16;
+/// particular bin of freelists.
+const MIN_RECT_AXIS_SIZES: [i32; NUM_BINS] = [1, 16, 32];
+
+#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
+struct FreeListBin(u8);
+
+#[derive(Debug, Clone, Copy)]
+struct FreeListIndex(usize);
 
-/// The minimum number of pixels on each side that we require for rects to be classified as
-/// "large" within the free list.
-const MINIMUM_LARGE_RECT_SIZE: i32 = 32;
+impl FreeListBin {
+    fn for_size(size: &DeviceIntSize) -> Self {
+        MIN_RECT_AXIS_SIZES
+            .iter()
+            .enumerate()
+            .rev()
+            .find(|(_, &min_size)| min_size <= size.width && min_size <= size.height)
+            .map(|(id, _)| FreeListBin(id as u8))
+            .expect("Unable to find a bin!")
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FreeRectSlice(pub u32);
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FreeRect {
+    slice: FreeRectSlice,
+    rect: DeviceIntRect,
+}
 
 /// A texture allocator using the guillotine algorithm with the rectangle merge improvement. See
 /// sections 2.2 and 2.2.5 in "A Thousand Ways to Pack the Bin - A Practical Approach to Two-
 /// Dimensional Rectangle Bin Packing":
 ///
 ///    http://clb.demon.fi/files/RectangleBinPack.pdf
 ///
 /// This approach was chosen because of its simplicity, good performance, and easy support for
 /// dynamic texture deallocation.
+///
+/// Note: the allocations are spread across multiple textures, and also are binned
+/// orthogonally in order to speed up the search.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct GuillotineAllocator {
-    texture_size: DeviceIntSize,
-    free_list: FreeRectList,
-    allocations: u32,
-    dirty: bool,
+pub struct ArrayAllocationTracker {
+    bins: [Vec<FreeRect>; NUM_BINS],
 }
 
-impl GuillotineAllocator {
-    pub fn new(texture_size: DeviceIntSize) -> GuillotineAllocator {
-        let mut page = GuillotineAllocator {
-            texture_size,
-            free_list: FreeRectList::new(),
-            allocations: 0,
-            dirty: false,
-        };
-        page.clear();
-        page
+impl ArrayAllocationTracker {
+    pub fn new() -> Self {
+        ArrayAllocationTracker {
+            bins: [
+                Vec::new(),
+                Vec::new(),
+                Vec::new(),
+            ],
+        }
     }
 
-    fn find_index_of_best_rect_in_bin(
-        &self,
-        bin: FreeListBin,
-        requested_dimensions: &DeviceIntSize,
-    ) -> Option<FreeListIndex> {
-        let mut smallest_index_and_area = None;
-        for (candidate_index, candidate_rect) in self.free_list.iter(bin).enumerate() {
-            if !requested_dimensions.fits_inside(&candidate_rect.size) {
-                continue;
-            }
-
-            let candidate_area = candidate_rect.size.width * candidate_rect.size.height;
-            smallest_index_and_area = Some((candidate_index, candidate_area));
-            break;
-        }
-
-        smallest_index_and_area.map(|(index, _)| FreeListIndex(bin, index))
+    fn push(&mut self, slice: FreeRectSlice, rect: DeviceIntRect) {
+        let id = FreeListBin::for_size(&rect.size).0 as usize;
+        self.bins[id].push(FreeRect {
+            slice,
+            rect,
+        })
     }
 
     /// Find a suitable rect in the free list. We choose the smallest such rect
     /// in terms of area (Best-Area-Fit, BAF).
     fn find_index_of_best_rect(
         &self,
         requested_dimensions: &DeviceIntSize,
-    ) -> Option<FreeListIndex> {
-        let bin = FreeListBin::for_size(requested_dimensions);
-        for &target_bin in &[FreeListBin::Small, FreeListBin::Medium, FreeListBin::Large] {
-            if bin <= target_bin {
-                if let Some(index) =
-                    self.find_index_of_best_rect_in_bin(target_bin, requested_dimensions)
-                {
-                    return Some(index);
+    ) -> Option<(FreeListBin, FreeListIndex)> {
+        let start_bin = FreeListBin::for_size(requested_dimensions);
+        (start_bin.0 .. NUM_BINS as u8)
+            .find_map(|id| if FIND_SMALLEST_AREA {
+                let mut smallest_index_and_area = None;
+                for (candidate_index, candidate) in self.bins[id as usize].iter().enumerate() {
+                    if requested_dimensions.width > candidate.rect.size.width ||
+                        requested_dimensions.height > candidate.rect.size.height
+                    {
+                        continue;
+                    }
+
+                    let candidate_area = candidate.rect.size.area();
+                    match smallest_index_and_area {
+                        Some((_, area)) if candidate_area >= area => continue,
+                        _ => smallest_index_and_area = Some((candidate_index, candidate_area)),
+                    }
                 }
-            }
-        }
-        None
+
+                smallest_index_and_area
+                    .map(|(index, _)| (FreeListBin(id), FreeListIndex(index)))
+            } else {
+                self.bins[id as usize]
+                    .iter()
+                    .position(|candidate| {
+                        requested_dimensions.width <= candidate.rect.size.width &&
+                        requested_dimensions.height <= candidate.rect.size.height
+                    })
+                    .map(|index| (FreeListBin(id), FreeListIndex(index)))
+            })
     }
 
-    pub fn allocate(&mut self, requested_dimensions: &DeviceIntSize) -> Option<DeviceIntPoint> {
-        if requested_dimensions.width == 0 || requested_dimensions.height == 0 {
-            return Some(DeviceIntPoint::new(0, 0));
-        }
-        let index = match self.find_index_of_best_rect(requested_dimensions) {
-            None => return None,
-            Some(index) => index,
-        };
-
-        // Remove the rect from the free list and decide how to guillotine it. We choose the split
-        // that results in the single largest area (Min Area Split Rule, MINAS).
-        let chosen_rect = self.free_list.remove(index);
+    // Split that results in the single largest area (Min Area Split Rule, MINAS).
+    fn split_guillotine(&mut self, chosen: &FreeRect, requested_dimensions: &DeviceIntSize) {
         let candidate_free_rect_to_right = DeviceIntRect::new(
             DeviceIntPoint::new(
-                chosen_rect.origin.x + requested_dimensions.width,
-                chosen_rect.origin.y,
+                chosen.rect.origin.x + requested_dimensions.width,
+                chosen.rect.origin.y,
             ),
             DeviceIntSize::new(
-                chosen_rect.size.width - requested_dimensions.width,
+                chosen.rect.size.width - requested_dimensions.width,
                 requested_dimensions.height,
             ),
         );
         let candidate_free_rect_to_bottom = DeviceIntRect::new(
             DeviceIntPoint::new(
-                chosen_rect.origin.x,
-                chosen_rect.origin.y + requested_dimensions.height,
+                chosen.rect.origin.x,
+                chosen.rect.origin.y + requested_dimensions.height,
             ),
             DeviceIntSize::new(
                 requested_dimensions.width,
-                chosen_rect.size.height - requested_dimensions.height,
+                chosen.rect.size.height - requested_dimensions.height,
             ),
         );
-        let candidate_free_rect_to_right_area =
-            candidate_free_rect_to_right.size.width * candidate_free_rect_to_right.size.height;
-        let candidate_free_rect_to_bottom_area =
-            candidate_free_rect_to_bottom.size.width * candidate_free_rect_to_bottom.size.height;
 
         // Guillotine the rectangle.
         let new_free_rect_to_right;
         let new_free_rect_to_bottom;
-        if candidate_free_rect_to_right_area > candidate_free_rect_to_bottom_area {
+        if candidate_free_rect_to_right.size.area() > candidate_free_rect_to_bottom.size.area() {
             new_free_rect_to_right = DeviceIntRect::new(
                 candidate_free_rect_to_right.origin,
                 DeviceIntSize::new(
                     candidate_free_rect_to_right.size.width,
-                    chosen_rect.size.height,
+                    chosen.rect.size.height,
                 ),
             );
             new_free_rect_to_bottom = candidate_free_rect_to_bottom
         } else {
             new_free_rect_to_right = candidate_free_rect_to_right;
             new_free_rect_to_bottom = DeviceIntRect::new(
                 candidate_free_rect_to_bottom.origin,
                 DeviceIntSize::new(
-                    chosen_rect.size.width,
+                    chosen.rect.size.width,
                     candidate_free_rect_to_bottom.size.height,
                 ),
             )
         }
 
-        // Add the guillotined rects back to the free list. If any changes were made, we're now
-        // dirty since coalescing might be able to defragment.
+        // Add the guillotined rects back to the free list.
         if !util::rect_is_empty(&new_free_rect_to_right) {
-            self.free_list.push(&new_free_rect_to_right);
-            self.dirty = true
+            self.push(chosen.slice, new_free_rect_to_right);
         }
         if !util::rect_is_empty(&new_free_rect_to_bottom) {
-            self.free_list.push(&new_free_rect_to_bottom);
-            self.dirty = true
-        }
-
-        // Bump the allocation counter.
-        self.allocations += 1;
-
-        // Return the result.
-        Some(chosen_rect.origin)
-    }
-
-    fn clear(&mut self) {
-        self.free_list = FreeRectList::new();
-        self.free_list.push(&DeviceIntRect::new(
-            DeviceIntPoint::zero(),
-            self.texture_size,
-        ));
-        self.allocations = 0;
-        self.dirty = false;
-    }
-}
-
-/// A binning free list. Binning is important to avoid sifting through lots of small strips when
-/// allocating many texture items.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct FreeRectList {
-    small: Vec<DeviceIntRect>,
-    medium: Vec<DeviceIntRect>,
-    large: Vec<DeviceIntRect>,
-}
-
-impl FreeRectList {
-    fn new() -> Self {
-        FreeRectList {
-            small: vec![],
-            medium: vec![],
-            large: vec![],
+            self.push(chosen.slice, new_free_rect_to_bottom);
         }
     }
 
-    fn push(&mut self, rect: &DeviceIntRect) {
-        match FreeListBin::for_size(&rect.size) {
-            FreeListBin::Small => self.small.push(*rect),
-            FreeListBin::Medium => self.medium.push(*rect),
-            FreeListBin::Large => self.large.push(*rect),
+    pub fn allocate(
+        &mut self, requested_dimensions: &DeviceIntSize
+    ) -> Option<(FreeRectSlice, DeviceIntPoint)> {
+        if requested_dimensions.width == 0 || requested_dimensions.height == 0 {
+            return Some((FreeRectSlice(0), DeviceIntPoint::new(0, 0)));
         }
+        let (bin, index) = self.find_index_of_best_rect(requested_dimensions)?;
+
+        // Remove the rect from the free list and decide how to guillotine it.
+        let chosen = self.bins[bin.0 as usize].swap_remove(index.0);
+        self.split_guillotine(&chosen, requested_dimensions);
+
+        // Return the result.
+        Some((chosen.slice, chosen.rect.origin))
     }
 
-    fn remove(&mut self, index: FreeListIndex) -> DeviceIntRect {
-        match index.0 {
-            FreeListBin::Small => self.small.swap_remove(index.1),
-            FreeListBin::Medium => self.medium.swap_remove(index.1),
-            FreeListBin::Large => self.large.swap_remove(index.1),
-        }
-    }
-
-    fn iter(&self, bin: FreeListBin) -> Iter<DeviceIntRect> {
-        match bin {
-            FreeListBin::Small => self.small.iter(),
-            FreeListBin::Medium => self.medium.iter(),
-            FreeListBin::Large => self.large.iter(),
-        }
+    /// Add a new slice to the allocator, and immediately allocate a rect from it.
+    pub fn extend(
+        &mut self,
+        slice: FreeRectSlice,
+        total_size: DeviceIntSize,
+        requested_dimensions: DeviceIntSize,
+    ) {
+        self.split_guillotine(
+            &FreeRect { slice, rect: total_size.into() },
+            &requested_dimensions
+        );
     }
 }
 
-#[derive(Debug, Clone, Copy)]
-struct FreeListIndex(FreeListBin, usize);
+#[cfg(test)]
+fn random_fill(count: usize, texture_size: i32) -> f32 {
+    use rand::{thread_rng, Rng};
+
+    let total_rect = DeviceIntRect::new(
+        DeviceIntPoint::zero(),
+        DeviceIntSize::new(texture_size, texture_size),
+    );
+    let mut rng = thread_rng();
+    let mut allocator = ArrayAllocationTracker::new();
+    let mut slices: Vec<Vec<DeviceIntRect>> = Vec::new();
+    let mut requested_area = 0f32;
+    // fill up the allocator
+    for _ in 0 .. count {
+        let size = DeviceIntSize::new(
+            rng.gen_range(1, texture_size),
+            rng.gen_range(1, texture_size),
+        );
+        requested_area += size.area() as f32;
 
-#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
-enum FreeListBin {
-    Small,
-    Medium,
-    Large,
+        match allocator.allocate(&size) {
+            Some((slice, origin)) => {
+                let rect = DeviceIntRect::new(origin, size);
+                assert_eq!(None, slices[slice.0 as usize].iter().find(|r| r.intersects(&rect)));
+                assert!(total_rect.contains_rect(&rect));
+                slices[slice.0 as usize].push(rect);
+            }
+            None => {
+                allocator.extend(FreeRectSlice(slices.len() as u32), total_rect.size, size);
+                let rect = DeviceIntRect::new(DeviceIntPoint::zero(), size);
+                slices.push(vec![rect]);
+            }
+        }
+    }
+    // validate the free rects
+    for (i, free_vecs) in allocator.bins.iter().enumerate() {
+        for fr in free_vecs {
+            assert_eq!(FreeListBin(i as u8), FreeListBin::for_size(&fr.rect.size));
+            assert_eq!(None, slices[fr.slice.0 as usize].iter().find(|r| r.intersects(&fr.rect)));
+            assert!(total_rect.contains_rect(&fr.rect));
+            slices[fr.slice.0 as usize].push(fr.rect);
+        }
+    }
+
+    let allocated_area = slices.len() as f32 * (texture_size * texture_size) as f32;
+    requested_area / allocated_area
 }
 
-impl FreeListBin {
-    fn for_size(size: &DeviceIntSize) -> FreeListBin {
-        if size.width >= MINIMUM_LARGE_RECT_SIZE && size.height >= MINIMUM_LARGE_RECT_SIZE {
-            FreeListBin::Large
-        } else if size.width >= MINIMUM_MEDIUM_RECT_SIZE && size.height >= MINIMUM_MEDIUM_RECT_SIZE
-        {
-            FreeListBin::Medium
-        } else {
-            debug_assert!(size.width > 0 && size.height > 0);
-            FreeListBin::Small
-        }
-    }
+#[test]
+fn test_small() {
+    random_fill(100, 100);
 }
 
-trait FitsInside {
-    fn fits_inside(&self, other: &Self) -> bool;
+#[test]
+fn test_large() {
+    random_fill(1000, 10000);
 }
-
-impl FitsInside for DeviceIntSize {
-    fn fits_inside(&self, other: &DeviceIntSize) -> bool {
-        self.width <= other.width && self.height <= other.height
-    }
-}
--- a/gfx/wr/webrender/src/texture_cache.rs
+++ b/gfx/wr/webrender/src/texture_cache.rs
@@ -848,17 +848,31 @@ impl TextureCache {
         // the frame age threshold. Reverse order avoids iterator invalidation when
         // removing entries.
         for i in (0..self.handles.select(kind).len()).rev() {
             let evict = {
                 let entry = self.entries.get(&self.handles.select(kind)[i]);
                 match entry.eviction {
                     Eviction::Manual => false,
                     Eviction::Auto => threshold.should_evict(entry.last_access),
-                    Eviction::Eager => entry.last_access < self.now,
+                    Eviction::Eager => {
+                        // Texture cache entries can be evicted at the start of
+                        // a frame, or at any time during the frame when a cache
+                        // allocation is occurring. This means that entries tagged
+                        // with eager eviction may get evicted before they have a
+                        // chance to be requested on the current frame. Instead,
+                        // advance the frame id of the entry by one before
+                        // comparison. This ensures that an eager entry will
+                        // not be evicted until it is not used for at least
+                        // one complete frame.
+                        let mut entry_frame_id = entry.last_access.frame_id();
+                        entry_frame_id.advance();
+
+                        entry_frame_id < self.now.frame_id()
+                    }
                 }
             };
             if evict {
                 let handle = self.handles.select(kind).swap_remove(i);
                 let entry = self.entries.free(handle);
                 entry.evict();
                 self.free(entry);
             }
--- a/gfx/wr/webrender/src/tiling.rs
+++ b/gfx/wr/webrender/src/tiling.rs
@@ -20,28 +20,31 @@ use pathfinder_partitioner::mesh::Mesh;
 use picture::SurfaceInfo;
 use prim_store::{PrimitiveStore, DeferredResolve, PrimitiveScratchBuffer};
 use profiler::FrameProfileCounters;
 use render_backend::{FrameId, FrameResources};
 use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind, TileBlit};
 use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree, ScalingTask};
 use resource_cache::ResourceCache;
 use std::{cmp, usize, f32, i32, mem};
-use texture_allocator::GuillotineAllocator;
+use texture_allocator::{ArrayAllocationTracker, FreeRectSlice};
 #[cfg(feature = "pathfinder")]
 use webrender_api::{DevicePixel, FontRenderMode};
 
 const STYLE_SOLID: i32 = ((BorderStyle::Solid as i32) << 8) | ((BorderStyle::Solid as i32) << 16);
 const STYLE_MASK: i32 = 0x00FF_FF00;
 
 /// According to apitrace, textures larger than 2048 break fast clear
 /// optimizations on some intel drivers. We sometimes need to go larger, but
 /// we try to avoid it. This can go away when proper tiling support lands,
 /// since we can then split large primitives across multiple textures.
 const IDEAL_MAX_TEXTURE_DIMENSION: i32 = 2048;
+/// If we ever need a larger texture than the ideal, we better round it up to a
+/// reasonable number in order to have a bit of leeway in placing things inside.
+const TEXTURE_DIMENSION_MASK: i32 = 0xFF;
 
 /// Identifies a given `RenderTarget` in a `RenderTargetList`.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTargetIndex(pub usize);
 
 pub struct RenderTargetContext<'a, 'rc> {
@@ -50,82 +53,32 @@ pub struct RenderTargetContext<'a, 'rc> 
     pub resource_cache: &'rc mut ResourceCache,
     pub use_dual_source_blending: bool,
     pub clip_scroll_tree: &'a ClipScrollTree,
     pub resources: &'a FrameResources,
     pub surfaces: &'a [SurfaceInfo],
     pub scratch: &'a PrimitiveScratchBuffer,
 }
 
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct TextureAllocator {
-    // TODO(gw): Replace this with a simpler allocator for
-    // render target allocation - this use case doesn't need
-    // to deal with coalescing etc that the general texture
-    // cache allocator requires.
-    allocator: GuillotineAllocator,
-
-    // Track the used rect of the render target, so that
-    // we can set a scissor rect and only clear to the
-    // used portion of the target as an optimization.
-    used_rect: DeviceIntRect,
-}
-
-impl TextureAllocator {
-    fn new(size: DeviceIntSize) -> Self {
-        TextureAllocator {
-            allocator: GuillotineAllocator::new(size),
-            used_rect: DeviceIntRect::zero(),
-        }
-    }
-
-    fn allocate(&mut self, size: &DeviceIntSize) -> Option<DeviceIntPoint> {
-        let origin = self.allocator.allocate(size);
-
-        if let Some(origin) = origin {
-            // TODO(gw): We need to make all the device rects
-            //           be consistent in the use of the
-            //           DeviceIntRect and DeviceIntRect types!
-            let origin = DeviceIntPoint::new(origin.x as i32, origin.y as i32);
-            let size = DeviceIntSize::new(size.width as i32, size.height as i32);
-            let rect = DeviceIntRect::new(origin, size);
-            self.used_rect = rect.union(&self.used_rect);
-        }
-
-        origin
-    }
-}
-
 /// Represents a number of rendering operations on a surface.
 ///
 /// In graphics parlance, a "render target" usually means "a surface (texture or
 /// framebuffer) bound to the output of a shader". This trait has a slightly
 /// different meaning, in that it represents the operations on that surface
 /// _before_ it's actually bound and rendered. So a `RenderTarget` is built by
 /// the `RenderBackend` by inserting tasks, and then shipped over to the
 /// `Renderer` where a device surface is resolved and the tasks are transformed
 /// into draw commands on that surface.
 ///
 /// We express this as a trait to generalize over color and alpha surfaces.
 /// a given `RenderTask` will draw to one or the other, depending on its type
 /// and sometimes on its parameters. See `RenderTask::target_kind`.
 pub trait RenderTarget {
     /// Creates a new RenderTarget of the given type.
-    fn new(
-        size: Option<DeviceIntSize>,
-        screen_size: DeviceIntSize,
-    ) -> Self;
-
-    /// Allocates a region of the given size in this target, and returns either
-    /// the offset of that region or `None` if it won't fit.
-    ///
-    /// If a non-`None` result is returned, that value is generally stored in
-    /// a task which is then added to this target via `add_task()`.
-    fn allocate(&mut self, size: DeviceIntSize) -> Option<DeviceIntPoint>;
+    fn new(screen_size: DeviceIntSize) -> Self;
 
     /// Optional hook to provide additional processing for the target at the
     /// end of the build phase.
     fn build(
         &mut self,
         _ctx: &mut RenderTargetContext,
         _gpu_cache: &mut GpuCache,
         _render_tasks: &mut RenderTaskTree,
@@ -150,18 +103,21 @@ pub trait RenderTarget {
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
         deferred_resolves: &mut Vec<DeferredResolve>,
     );
+
+    fn needs_depth(&self) -> bool;
+
     fn used_rect(&self) -> DeviceIntRect;
-    fn needs_depth(&self) -> bool;
+    fn add_used(&mut self, rect: DeviceIntRect);
 }
 
 /// A tag used to identify the output format of a `RenderTarget`.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTargetKind {
     Color, // RGBA8
@@ -203,29 +159,31 @@ pub struct RenderTargetList<T> {
     ///
     /// We initially create our per-slice allocators with a width and height of
     /// IDEAL_MAX_TEXTURE_DIMENSION. If we encounter a larger primitive, the
     /// allocation will fail, but we'll bump max_dynamic_size, which will cause the
     /// allocator for the next slice to be just large enough to accomodate it.
     pub max_dynamic_size: DeviceIntSize,
     pub targets: Vec<T>,
     pub saved_index: Option<SavedTargetIndex>,
+    pub alloc_tracker: ArrayAllocationTracker,
 }
 
 impl<T: RenderTarget> RenderTargetList<T> {
     fn new(
         screen_size: DeviceIntSize,
         format: ImageFormat,
     ) -> Self {
         RenderTargetList {
             screen_size,
             format,
             max_dynamic_size: DeviceIntSize::new(0, 0),
             targets: Vec::new(),
             saved_index: None,
+            alloc_tracker: ArrayAllocationTracker::new(),
         }
     }
 
     fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
@@ -246,66 +204,54 @@ impl<T: RenderTarget> RenderTargetList<T
                 deferred_resolves,
                 prim_headers,
                 transforms,
                 z_generator,
             );
         }
     }
 
-    fn add_task(
-        &mut self,
-        task_id: RenderTaskId,
-        ctx: &RenderTargetContext,
-        gpu_cache: &mut GpuCache,
-        render_tasks: &mut RenderTaskTree,
-        clip_store: &ClipStore,
-        transforms: &mut TransformPalette,
-        deferred_resolves: &mut Vec<DeferredResolve>,
-    ) {
-        self.targets.last_mut().unwrap().add_task(
-            task_id,
-            ctx,
-            gpu_cache,
-            render_tasks,
-            clip_store,
-            transforms,
-            deferred_resolves,
-        );
-    }
-
     fn allocate(
         &mut self,
         alloc_size: DeviceIntSize,
-    ) -> (DeviceIntPoint, RenderTargetIndex) {
-        let existing_origin = self.targets
-            .last_mut()
-            .and_then(|target| target.allocate(alloc_size));
-
-        let origin = match existing_origin {
-            Some(origin) => origin,
+    ) -> (RenderTargetIndex, DeviceIntPoint) {
+        let (free_rect_slice, origin) = match self.alloc_tracker.allocate(&alloc_size) {
+            Some(allocation) => allocation,
             None => {
                 // Have the allocator restrict slice sizes to our max ideal
                 // dimensions, unless we've already gone bigger on a previous
                 // slice.
+                let rounded_dimensions = DeviceIntSize::new(
+                    (self.max_dynamic_size.width + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
+                    (self.max_dynamic_size.height + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
+                );
                 let allocator_dimensions = DeviceIntSize::new(
-                    cmp::max(IDEAL_MAX_TEXTURE_DIMENSION, self.max_dynamic_size.width),
-                    cmp::max(IDEAL_MAX_TEXTURE_DIMENSION, self.max_dynamic_size.height),
+                    cmp::max(IDEAL_MAX_TEXTURE_DIMENSION, rounded_dimensions.width),
+                    cmp::max(IDEAL_MAX_TEXTURE_DIMENSION, rounded_dimensions.height),
                 );
-                let mut new_target = T::new(Some(allocator_dimensions), self.screen_size);
-                let origin = new_target.allocate(alloc_size).expect(&format!(
-                    "Each render task must allocate <= size of one target! ({})",
-                    alloc_size
-                ));
-                self.targets.push(new_target);
-                origin
+
+                assert!(alloc_size.width <= allocator_dimensions.width &&
+                    alloc_size.height <= allocator_dimensions.height);
+                let slice = FreeRectSlice(self.targets.len() as u32);
+                self.targets.push(T::new(self.screen_size));
+
+                self.alloc_tracker.extend(
+                    slice,
+                    allocator_dimensions,
+                    alloc_size,
+                );
+
+                (slice, DeviceIntPoint::zero())
             }
         };
 
-        (origin, RenderTargetIndex(self.targets.len() - 1))
+        self.targets[free_rect_slice.0 as usize]
+            .add_used(DeviceIntRect::new(origin, alloc_size));
+
+        (RenderTargetIndex(free_rect_slice.0 as usize), origin)
     }
 
     pub fn needs_depth(&self) -> bool {
         self.targets.iter().any(|target| target.needs_depth())
     }
 
     pub fn check_ready(&self, t: &Texture) {
         let dimensions = t.get_dimensions();
@@ -384,46 +330,39 @@ pub struct ColorRenderTarget {
     pub horizontal_blurs: Vec<BlurInstance>,
     pub readbacks: Vec<DeviceIntRect>,
     pub scalings: Vec<ScalingInstance>,
     pub blits: Vec<BlitJob>,
     // List of frame buffer outputs for this render target.
     pub outputs: Vec<FrameOutput>,
     pub tile_blits: Vec<TileBlit>,
     pub color_clears: Vec<RenderTaskId>,
-    allocator: Option<TextureAllocator>,
     alpha_tasks: Vec<RenderTaskId>,
     screen_size: DeviceIntSize,
+    // Track the used rect of the render target, so that
+    // we can set a scissor rect and only clear to the
+    // used portion of the target as an optimization.
+    pub used_rect: DeviceIntRect,
 }
 
 impl RenderTarget for ColorRenderTarget {
-    fn allocate(&mut self, size: DeviceIntSize) -> Option<DeviceIntPoint> {
-        self.allocator
-            .as_mut()
-            .expect("bug: calling allocate on framebuffer")
-            .allocate(&size)
-    }
-
-    fn new(
-        size: Option<DeviceIntSize>,
-        screen_size: DeviceIntSize,
-    ) -> Self {
+    fn new(screen_size: DeviceIntSize) -> Self {
         ColorRenderTarget {
             alpha_batch_containers: Vec::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             readbacks: Vec::new(),
             scalings: Vec::new(),
             blits: Vec::new(),
-            allocator: size.map(TextureAllocator::new),
             outputs: Vec::new(),
             alpha_tasks: Vec::new(),
             color_clears: Vec::new(),
             tile_blits: Vec::new(),
             screen_size,
+            used_rect: DeviceIntRect::zero(),
         }
     }
 
     fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
@@ -595,62 +534,59 @@ impl RenderTarget for ColorRenderTarget 
                     BlitSource::RenderTask { .. } => {
                         panic!("BUG: render task blit jobs to render tasks not supported");
                     }
                 }
             }
         }
     }
 
-    fn used_rect(&self) -> DeviceIntRect {
-        self.allocator
-            .as_ref()
-            .expect("bug: used_rect called on framebuffer")
-            .used_rect
-    }
-
     fn needs_depth(&self) -> bool {
         self.alpha_batch_containers.iter().any(|ab| {
             !ab.opaque_batches.is_empty()
         })
     }
+
+    fn used_rect(&self) -> DeviceIntRect {
+        self.used_rect
+    }
+
+    fn add_used(&mut self, rect: DeviceIntRect) {
+        self.used_rect = self.used_rect.union(&rect);
+    }
 }
 
 /// Contains the work (in the form of instance arrays) needed to fill an alpha
 /// output surface (R8).
 ///
 /// See `RenderTarget`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub scalings: Vec<ScalingInstance>,
     pub zero_clears: Vec<RenderTaskId>,
-    allocator: TextureAllocator,
+    // Track the used rect of the render target, so that
+    // we can set a scissor rect and only clear to the
+    // used portion of the target as an optimization.
+    pub used_rect: DeviceIntRect,
 }
 
 impl RenderTarget for AlphaRenderTarget {
-    fn allocate(&mut self, size: DeviceIntSize) -> Option<DeviceIntPoint> {
-        self.allocator.allocate(&size)
-    }
-
-    fn new(
-        size: Option<DeviceIntSize>,
-        _: DeviceIntSize,
-    ) -> Self {
+    fn new(_screen_size: DeviceIntSize) -> Self {
         AlphaRenderTarget {
             clip_batcher: ClipBatcher::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             scalings: Vec::new(),
             zero_clears: Vec::new(),
-            allocator: TextureAllocator::new(size.expect("bug: alpha targets need size")),
+            used_rect: DeviceIntRect::zero(),
         }
     }
 
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
@@ -723,22 +659,26 @@ impl RenderTarget for AlphaRenderTarget 
                     &mut self.scalings,
                     render_tasks.get_task_address(task_id),
                     render_tasks.get_task_address(task.children[0]),
                 );
             }
         }
     }
 
-    fn used_rect(&self) -> DeviceIntRect {
-        self.allocator.used_rect
+    fn needs_depth(&self) -> bool {
+        false
     }
 
-    fn needs_depth(&self) -> bool {
-        false
+    fn used_rect(&self) -> DeviceIntRect {
+        self.used_rect
+    }
+
+    fn add_used(&mut self, rect: DeviceIntRect) {
+        self.used_rect = self.used_rect.union(&rect);
     }
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct TextureCacheRenderTarget {
     pub target_kind: RenderTargetKind,
     pub horizontal_blurs: Vec<BlurInstance>,
@@ -890,17 +830,17 @@ pub struct RenderPass {
     /// `RenderTaskTree`.
     tasks: Vec<RenderTaskId>,
 }
 
 impl RenderPass {
     /// Creates a pass for the main framebuffer. There is only one of these, and
     /// it is always the last pass.
     pub fn new_main_framebuffer(screen_size: DeviceIntSize) -> Self {
-        let target = ColorRenderTarget::new(None, screen_size);
+        let target = ColorRenderTarget::new(screen_size);
         RenderPass {
             kind: RenderPassKind::MainFramebuffer(target),
             tasks: vec![],
         }
     }
 
     /// Creates an intermediate off-screen pass.
     pub fn new_off_screen(screen_size: DeviceIntSize) -> Self {
@@ -997,76 +937,76 @@ impl RenderPass {
                 }) {
                     Some(render_tasks.save_target())
                 } else {
                     None
                 };
 
                 // Step through each task, adding to batches as appropriate.
                 for &task_id in &self.tasks {
-                    let (target_kind, texture_target) = {
+                    let (target_kind, texture_target, layer) = {
                         let task = &mut render_tasks[task_id];
                         let target_kind = task.target_kind();
 
                         // Find a target to assign this task to, or create a new
                         // one if required.
-                        let texture_target = match task.location {
+                        let (texture_target, layer) = match task.location {
                             RenderTaskLocation::TextureCache { texture, layer, .. } => {
-                                Some((texture, layer))
+                                (Some(texture), layer)
                             }
                             RenderTaskLocation::Fixed(..) => {
-                                None
+                                (None, 0)
                             }
                             RenderTaskLocation::Dynamic(ref mut origin, size) => {
-                                let (alloc_origin, target_index) =  match target_kind {
+                                let (target_index, alloc_origin) =  match target_kind {
                                     RenderTargetKind::Color => color.allocate(size),
                                     RenderTargetKind::Alpha => alpha.allocate(size),
                                 };
                                 *origin = Some((alloc_origin, target_index));
-                                None
+                                (None, target_index.0)
                             }
                         };
 
                         // Replace the pending saved index with a real one
                         if let Some(index) = task.saved_index {
                             assert_eq!(index, SavedTargetIndex::PENDING);
                             task.saved_index = match target_kind {
                                 RenderTargetKind::Color => saved_color,
                                 RenderTargetKind::Alpha => saved_alpha,
                             };
                         }
 
                         // Give the render task an opportunity to add any
                         // information to the GPU cache, if appropriate.
                         task.write_gpu_blocks(gpu_cache);
 
-                        (target_kind, texture_target)
+                        (target_kind, texture_target, layer)
                     };
 
                     match texture_target {
                         Some(texture_target) => {
                             let texture = texture_cache
-                                .entry(texture_target)
+                                .entry((texture_target, layer))
                                 .or_insert(
                                     TextureCacheRenderTarget::new(target_kind)
                                 );
                             texture.add_task(task_id, render_tasks);
                         }
                         None => {
                             match target_kind {
-                                RenderTargetKind::Color => color.add_task(
+                                RenderTargetKind::Color => color.targets[layer].add_task(
                                     task_id,
                                     ctx,
                                     gpu_cache,
                                     render_tasks,
                                     clip_store,
                                     transforms,
                                     deferred_resolves,
                                 ),
-                                RenderTargetKind::Alpha => alpha.add_task(
+                                RenderTargetKind::Alpha => alpha.targets[layer].add_task(
                                     task_id,
                                     ctx,
                                     gpu_cache,
                                     render_tasks,
                                     clip_store,
                                     transforms,
                                     deferred_resolves,
                                 ),
--- a/gfx/wr/webrender/src/util.rs
+++ b/gfx/wr/webrender/src/util.rs
@@ -12,17 +12,16 @@ use std::{i32, f32, fmt, ptr};
 use std::borrow::Cow;
 
 
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 /// A typesafe helper that separates new value construction from
 /// vector growing, allowing LLVM to ideally construct the element in place.
-#[must_use]
 pub struct Allocation<'a, T: 'a> {
     vec: &'a mut Vec<T>,
     index: usize,
 }
 
 impl<'a, T> Allocation<'a, T> {
     // writing is safe because alloc() ensured enough capacity
     // and `Allocation` holds a mutable borrow to prevent anyone else
@@ -32,31 +31,62 @@ impl<'a, T> Allocation<'a, T> {
         unsafe {
             ptr::write(self.vec.as_mut_ptr().add(self.index), value);
             self.vec.set_len(self.index + 1);
         }
         self.index
     }
 }
 
+/// An entry into a vector, similar to `std::collections::hash_map::Entry`.
+pub enum VecEntry<'a, T: 'a> {
+    Vacant(Allocation<'a, T>),
+    Occupied(&'a mut T),
+}
+
+impl<'a, T> VecEntry<'a, T> {
+    #[inline(always)]
+    pub fn set(self, value: T) {
+        match self {
+            VecEntry::Vacant(alloc) => { alloc.init(value); }
+            VecEntry::Occupied(slot) => { *slot = value; }
+        }
+    }
+}
+
 pub trait VecHelper<T> {
+    /// Growns the vector by a single entry, returning the allocation.
     fn alloc(&mut self) -> Allocation<T>;
+    /// Either returns an existing elemenet, or grows the vector by one.
+    /// Doesn't expect indices to be higher than the current length.
+    fn entry(&mut self, index: usize) -> VecEntry<T>;
 }
 
 impl<T> VecHelper<T> for Vec<T> {
     fn alloc(&mut self) -> Allocation<T> {
         let index = self.len();
         if self.capacity() == index {
             self.reserve(1);
         }
         Allocation {
             vec: self,
             index,
         }
     }
+
+    fn entry(&mut self, index: usize) -> VecEntry<T> {
+        if index < self.len() {
+            VecEntry::Occupied(unsafe {
+                self.get_unchecked_mut(index)
+            })
+        } else {
+            assert_eq!(index, self.len());
+            VecEntry::Vacant(self.alloc())
+        }
+    }
 }
 
 
 // Represents an optimized transform where there is only
 // a scale and translation (which are guaranteed to maintain
 // an axis align rectangle under transformation). The
 // scaling is applied first, followed by the translation.
 // TODO(gw): We should try and incorporate F <-> T units here,
--- a/media/libdav1d/README_MOZILLA
+++ b/media/libdav1d/README_MOZILLA
@@ -16,9 +16,9 @@ To update to a specific upstream git tag
 The upstream git repository is https://aomedia.googlesource.com/aom
 
 To update to a fork, use
 
   ./mach vendor dav1d --repo <repository url> [-r <commit>]
 
 The last update was pulled from https://code.videolan.org/videolan/dav1d
 
-The git commit ID used was 46e2a2d0cc451e1d6bb929f80088f8a7b8940dd0 (2018-10-25T16:51:31.000Z).
+The git commit ID used was 197a19ad702d5e7472852efcde98feeb07f373e0 (2018-11-26T12:15:41.000Z).
--- a/media/libdav1d/moz.build
+++ b/media/libdav1d/moz.build
@@ -5,16 +5,17 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 Library('dav1d')
 
 LOCAL_INCLUDES += [
     '/third_party/dav1d',
     '/third_party/dav1d/include',
     '/third_party/dav1d/include/dav1d',
+    '/third_party/dav1d/src',
 ]
 
 if CONFIG['CPU_ARCH'] == 'x86':
     if CONFIG['OS_TARGET'] == 'Android':
         LOCAL_INCLUDES += ['/media/libdav1d/config/x86_32/android/']
         EXPORTS.dav1d += ['config/x86_32/android/config.h']
     else:
         LOCAL_INCLUDES += ['/media/libdav1d/config/x86_32/']
@@ -68,16 +69,17 @@ SOURCES += [
 # includes src
 EXPORTS.dav1d.src += [
     '../../third_party/dav1d/src/cdf.h',
     '../../third_party/dav1d/src/cpu.h',
     '../../third_party/dav1d/src/ctx.h',
     '../../third_party/dav1d/src/data.h',
     '../../third_party/dav1d/src/decode.h',
     '../../third_party/dav1d/src/dequant_tables.h',
+    '../../third_party/dav1d/src/film_grain.h',
     '../../third_party/dav1d/src/getbits.h',
     '../../third_party/dav1d/src/intra_edge.h',
     '../../third_party/dav1d/src/lf_mask.h',
     '../../third_party/dav1d/src/msac.h',
     '../../third_party/dav1d/src/obu.h',
     '../../third_party/dav1d/src/picture.h',
     '../../third_party/dav1d/src/qm.h',
     '../../third_party/dav1d/src/ref.h',
@@ -89,16 +91,17 @@ EXPORTS.dav1d.src += [
     '../../third_party/dav1d/src/wedge.h',
 ]
 
 # common BITDEPTH 8, 10
 relative_path = '../../third_party/dav1d/src/'
 bitdepth_basenames = [
     'cdef_apply_tmpl.c',
     'cdef_tmpl.c',
+    'film_grain_tmpl.c',
     'ipred_prepare_tmpl.c',
     'ipred_tmpl.c',
     'itx_tmpl.c',
     'lf_apply_tmpl.c',
     'loopfilter_tmpl.c',
     'looprestoration_tmpl.c',
     'lr_apply_tmpl.c',
     'mc_tmpl.c',
@@ -153,16 +156,17 @@ EXPORTS.dav1d += [
     '../../third_party/dav1d/include/common/validate.h',
 ]
 
 # include/dav1d
 EXPORTS.dav1d += [
    '../../third_party/dav1d/include/dav1d/common.h',
    '../../third_party/dav1d/include/dav1d/data.h',
    '../../third_party/dav1d/include/dav1d/dav1d.h',
+   '../../third_party/dav1d/include/dav1d/headers.h',
    '../../third_party/dav1d/include/dav1d/picture.h',
 ]
 
 if CONFIG['OS_TARGET'] == 'WINNT':
     RCFILE = 'dav1d.rc'
     SOURCES += [
         '../../third_party/dav1d/src/win32/thread.c'
     ]
@@ -174,8 +178,11 @@ if CONFIG['CC_TYPE'] == 'msvc':
 if CONFIG['CC_TYPE'] == 'gcc':
     LOCAL_INCLUDES += ['../../third_party/dav1d/include/compat/gcc/']
     EXPORTS.dav1d += ['../../third_party/dav1d/include/compat/gcc/stdatomic.h']
 
 FINAL_LIBRARY = 'gkmedias'
 
 # We allow warnings for third-party code that can be updated from upstream.
 AllowCompilerWarnings()
+
+# And furthermore, don't show warnings.
+DisableCompilerWarnings()
--- a/media/libdav1d/moz.yaml
+++ b/media/libdav1d/moz.yaml
@@ -15,15 +15,15 @@ origin:
   description: dav1d, a fast AV1 decoder
 
   # Full URL for the package's homepage/etc
   # Usually different from repository url
   url: https://code.videolan.org/videolan/dav1d
 
   # Human-readable identifier for this version/release
   # Generally "version NNN", "tag SSS", "bookmark SSS"
-  release: commit 46e2a2d0cc451e1d6bb929f80088f8a7b8940dd0
+  release: commit 36b807afe75040d9953bf63f68b67e6cd2fe4fc0
 
   # The package's license, where possible using the mnemonic from
   # https://spdx.org/licenses/
   # Multiple licenses can be specified (as a YAML list)
   # A "LICENSE" file must exist containing the full license text
   license: BSD-2-Clause
--- a/netwerk/base/mozurl/Cargo.toml
+++ b/netwerk/base/mozurl/Cargo.toml
@@ -1,10 +1,10 @@
 [package]
 name = "mozurl"
 version = "0.0.1"
 authors = ["Nika Layzell <nika@thelayzells.com>"]
 
 [dependencies]
-url = "1.5.1"
+url = "1.7.2"
 nsstring = { path = "../../../servo/support/gecko/nsstring" }
 nserror = { path = "../../../xpcom/rust/nserror" }
 xpcom = { path = "../../../xpcom/rust/xpcom" }
--- a/taskcluster/ci/test/test-platforms.yml
+++ b/taskcluster/ci/test/test-platforms.yml
@@ -194,16 +194,17 @@ windows10-64/opt:
         - windows-tests
         - mochitest-headless
         - raptor-firefox
 
 windows10-64-ux/opt:
     build-platform: win64-nightly/opt
     test-sets:
         - raptor-firefox
+        - talos-ux
 
 windows10-64-pgo/opt:
     build-platform: win64-pgo/opt
     test-sets:
         - awsy
         - desktop-screenshot-capture
         - marionette-gpu-tests
         - windows-talos
--- a/taskcluster/ci/test/test-sets.yml
+++ b/taskcluster/ci/test/test-sets.yml
@@ -71,16 +71,19 @@ talos:
     - talos-tp5o
     - talos-perf-reftest
     - talos-perf-reftest-singletons
     - talos-tp6-stylo-threads
     - talos-tps
     # - talos-h1 Bug 1487031 - Disabled for not finding actionable regressions
     # - talos-h2 Bug 1487031 - Disabled for not finding actionable regressions
 
+talos-ux:
+    - talos-g4
+
 raptor-firefox:
     - raptor-tp6-1-firefox
     - raptor-tp6-2-firefox
     - raptor-tp6-3-firefox
     - raptor-tp6-4-firefox
     - raptor-tp6-5-firefox
     - raptor-tp6-6-firefox
     - raptor-speedometer-firefox
--- a/third_party/dav1d/.gitlab-ci.yml
+++ b/third_party/dav1d/.gitlab-ci.yml
@@ -4,109 +4,110 @@ stages:
 
 build-debian:
     image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
     stage: build
     tags:
         - debian
         - amd64
     script:
-        - env CFLAGS='-Werror' meson build --buildtype release
+        - meson build --buildtype release --werror
         - ninja -C build
         - cd build && meson test -v
 
 build-debian-static:
     image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
     stage: build
     tags:
         - debian
         - amd64
     script:
-        - env CFLAGS='-Werror' meson build --buildtype release --default-library static
+        - meson build --buildtype release --default-library static --werror
         - ninja -C build
         - cd build && meson test -v
 
 build-win32:
     image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
     stage: build
     tags:
         - win32
     script:
-        - env CFLAGS='-Werror'
-            meson build --buildtype release
-                        --libdir lib
-                        --prefix "$(pwd)/build/dav1d_install"
-                        --cross-file /opt/crossfiles/i686-w64-mingw32.meson
-                        -Ddefault_library=both
+        - meson build --buildtype release
+                      --werror
+                      --libdir lib
+                      --prefix "$(pwd)/build/dav1d_install"
+                      --cross-file /opt/crossfiles/i686-w64-mingw32.meson
+                      -Ddefault_library=both
         - ninja -C build
         - ninja -C build install
     artifacts:
         name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
         paths:
             - build/dav1d_install/
         expire_in: 1 week
 
 build-win64:
     image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
     stage: build
     tags:
         - win64
     script:
-        - env CFLAGS='-Werror'
-            meson build --buildtype release
-                        --libdir lib
-                        --prefix "$(pwd)/build/dav1d_install"
-                        --cross-file /opt/crossfiles/x86_64-w64-mingw32.meson
-                        -Ddefault_library=both
+        - meson build --buildtype release
+                      --werror
+                      --libdir lib
+                      --prefix "$(pwd)/build/dav1d_install"
+                      --cross-file /opt/crossfiles/x86_64-w64-mingw32.meson
+                      -Ddefault_library=both
         - ninja -C build
         - ninja -C build install
     artifacts:
         name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
         paths:
             - build/dav1d_install/
         expire_in: 1 week
 
 build-debian-aarch64:
     stage: build
+    image: registry.videolan.org:5000/dav1d-debian-unstable-aarch64:20181122182457
     tags:
         - aarch64
         - debian
     script:
-        - env CFLAGS='-Werror' meson build --buildtype release
+        - meson build --buildtype release --werror
         - ninja -C build
         - cd build && meson test -v
 
 build-debian-aarch64-clang-5:
     stage: build
+    image: registry.videolan.org:5000/dav1d-debian-unstable-aarch64:20181122182457
     tags:
         - aarch64
-        - clang5
         - debian
     script:
         - env CC=clang-5.0 CFLAGS='-integrated-as' meson build --buildtype release
         - ninja -C build
         - cd build && meson test -v
 
 build-macos:
     stage: build
     tags:
         - macos
     script:
-        - env CFLAGS='-Werror' meson build --buildtype release -Ddefault_library=both
+        - meson build --buildtype release -Ddefault_library=both --werror
         - ninja -C build
         - cd build && meson test -v
 
 build-debian-werror:
-    image: dav1d-debian-aarch64:201810240631
+    image: registry.videolan.org:5000/dav1d-debian-unstable-aarch64:20181122182457
     stage: build
     tags:
         - aarch64
         - debian
     script:
-        - env CC='clang-7' CFLAGS='-Werror' meson build -Dbuild_tests=false
+        - env CC='clang-7' meson build --buildtype debug --werror
         - ninja -C build
 
 test-debian:
     image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
     stage: test
     tags:
         - debian
         - amd64
@@ -117,8 +118,71 @@ test-debian:
     script:
         - test -d cache || mkdir cache
         - test -d cache/dav1d-test-data.git && GIT_DIR=cache/dav1d-test-data.git git fetch --refmap=refs/heads/master:refs/heads/master origin master
         - test -d cache/dav1d-test-data.git || git clone --bare https://code.videolan.org/videolan/dav1d-test-data.git cache/dav1d-test-data.git
         - git clone cache/dav1d-test-data.git tests/dav1d-test-data
         - meson build --buildtype release -Dtestdata_tests=true
         - ninja -C build
         - cd build && time meson test -v
+
+test-debian-asan:
+    image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
+    stage: test
+    tags:
+        - debian
+        - amd64
+    cache:
+        key: testdata.git
+        paths:
+            - cache/dav1d-test-data.git/
+    variables:
+        ASAN_OPTIONS: 'detect_leaks=0'
+    script:
+        - test -d cache || mkdir cache
+        - test -d cache/dav1d-test-data.git && GIT_DIR=cache/dav1d-test-data.git git fetch --refmap=refs/heads/master:refs/heads/master origin master
+        - test -d cache/dav1d-test-data.git || git clone --bare https://code.videolan.org/videolan/dav1d-test-data.git cache/dav1d-test-data.git
+        - git clone cache/dav1d-test-data.git tests/dav1d-test-data
+        - meson build --buildtype debugoptimized -Dtestdata_tests=true -Db_sanitize=address -Dbuild_asm=false
+        - ninja -C build
+        - cd build && time meson test -v --setup=sanitizer
+
+test-debian-msan:
+    image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
+    stage: test
+    tags:
+        - debian
+        - amd64
+    cache:
+        key: testdata.git
+        paths:
+            - cache/dav1d-test-data.git/
+    variables:
+        MSAN_OPTIONS: 'exitcode=1'
+    script:
+        - test -d cache || mkdir cache
+        - test -d cache/dav1d-test-data.git && GIT_DIR=cache/dav1d-test-data.git git fetch --refmap=refs/heads/master:refs/heads/master origin master
+        - test -d cache/dav1d-test-data.git || git clone --bare https://code.videolan.org/videolan/dav1d-test-data.git cache/dav1d-test-data.git
+        - git clone cache/dav1d-test-data.git tests/dav1d-test-data
+        - env CC=clang meson build --buildtype debugoptimized -Dtestdata_tests=true -Db_sanitize=memory -Db_lundef=false -Dbuild_asm=false
+        - ninja -C build
+        - cd build && time meson test -v --setup=sanitizer
+
+test-debian-ubsan:
+    image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132
+    stage: test
+    tags:
+        - debian
+        - amd64
+    cache:
+        key: testdata.git
+        paths:
+            - cache/dav1d-test-data.git/
+    variables:
+        UBSAN_OPTIONS: 'print_stacktrace=1:halt_on_error=1'
+    script:
+        - test -d cache || mkdir cache
+        - test -d cache/dav1d-test-data.git && GIT_DIR=cache/dav1d-test-data.git git fetch --refmap=refs/heads/master:refs/heads/master origin master
+        - test -d cache/dav1d-test-data.git || git clone --bare https://code.videolan.org/videolan/dav1d-test-data.git cache/dav1d-test-data.git
+        - git clone cache/dav1d-test-data.git tests/dav1d-test-data
+        - env CC=clang meson build --buildtype debugoptimized -Dtestdata_tests=true -Db_sanitize=undefined -Db_lundef=false -Dbuild_asm=false
+        - ninja -C build
+        - cd build && time meson test -v --setup=sanitizer
new file mode 100644
--- /dev/null
+++ b/third_party/dav1d/doc/Doxyfile.in
@@ -0,0 +1,19 @@
+PROJECT_NAME            = dav1d
+OUTPUT_DIRECTORY        = @DOXYGEN_OUTPUT@
+STRIP_FROM_PATH         = @DOXYGEN_STRIP@
+OUTPUT_LANGUAGE         = English
+TAB_SIZE                = 4
+EXTRACT_ALL             = YES
+OPTIMIZE_OUTPUT_FOR_C   = YES
+DOXYFILE_ENCODING       = UTF-8
+TYPEDEF_HIDES_STRUCT    = YES
+
+QUIET                   = YES
+WARNINGS                = YES
+WARN_IF_UNDOCUMENTED    = YES
+
+INPUT                   = @DOXYGEN_INPUT@
+FILE_PATTERNS           = *.h
+
+GENERATE_HTML           = YES
+GENERATE_LATEX          = NO
new file mode 100644
--- /dev/null
+++ b/third_party/dav1d/doc/meson.build
@@ -0,0 +1,42 @@
+# Copyright © 2018, VideoLAN and dav1d authors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+doxygen = find_program('doxygen', required: false)
+
+if doxygen.found()
+    conf_data = configuration_data()
+    conf_data.set('DOXYGEN_INPUT', join_paths(meson.source_root(), 'include/dav1d'))
+    conf_data.set('DOXYGEN_STRIP', join_paths(meson.source_root(), 'include'))
+    conf_data.set('DOXYGEN_OUTPUT', meson.current_build_dir())
+    doxyfile = configure_file(input: 'Doxyfile.in',
+                              output: 'Doxyfile',
+                              configuration: conf_data)
+
+    custom_target('doc',
+                  build_by_default: false,
+                  command: [doxygen, doxyfile],
+                  output: ['html']
+    )
+endif
+
--- a/third_party/dav1d/include/dav1d/common.h
+++ b/third_party/dav1d/include/dav1d/common.h
@@ -20,24 +20,42 @@
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __COMMON_H__
-#define __COMMON_H__
+#ifndef __DAV1D_COMMON_H__
+#define __DAV1D_COMMON_H__
+
+#include <stddef.h>
+#include <stdint.h>
 
 #ifndef DAV1D_API
     #if defined _WIN32
       #define DAV1D_API __declspec(dllexport)
     #else
       #if __GNUC__ >= 4
         #define DAV1D_API __attribute__ ((visibility ("default")))
       #else
         #define DAV1D_API
       #endif
     #endif
 #endif
 
-#endif // __COMMON_H__
+/**
+ * Input packet metadata which are copied from the input data used to
+ * decode each image into the matching structure of the output image
+ * returned back to the user. Since these are metadata fields, they
+ * can be used for other purposes than the documented ones, they will
+ * still be passed from input data to output picture without being
+ * used internally.
+ */
+typedef struct Dav1dDataProps {
+    int64_t timestamp; ///< container timestamp of input data, INT64_MIN if unknown (default)
+    int64_t duration; ///< container duration of input data, 0 if unknown (default)
+    int64_t offset; ///< stream offset of input data, -1 if unknown (default)
+    size_t size; ///< packet size, default Dav1dData.sz
+} Dav1dDataProps;
+
+#endif // __DAV1D_COMMON_H__
--- a/third_party/dav1d/include/dav1d/data.h
+++ b/third_party/dav1d/include/dav1d/data.h
@@ -32,25 +32,26 @@
 #include <stdint.h>
 
 #include "common.h"
 
 typedef struct Dav1dData {
     const uint8_t *data; ///< data pointer
     size_t sz; ///< data size
     struct Dav1dRef *ref; ///< allocation origin
+    Dav1dDataProps m;
 } Dav1dData;
 
 /**
  * Allocate data.
  *
  * @param data Input context.
  * @param   sz Size of the data that should be allocated.
  *
- * @return Pointer to the allocated bufferon success. NULL on error.
+ * @return Pointer to the allocated buffer on success. NULL on error.
  */
 DAV1D_API uint8_t * dav1d_data_create(Dav1dData *data, size_t sz);
 
 /**
  * Wrap an existing data array.
  *
  * @param          data Input context.
  * @param           buf The data to be wrapped.
--- a/third_party/dav1d/include/dav1d/dav1d.h
+++ b/third_party/dav1d/include/dav1d/dav1d.h
@@ -36,20 +36,26 @@ extern "C" {
 
 #include "common.h"
 #include "picture.h"
 #include "data.h"
 
 typedef struct Dav1dContext Dav1dContext;
 typedef struct Dav1dRef Dav1dRef;
 
+#define DAV1D_MAX_FRAME_THREADS 256
+#define DAV1D_MAX_TILE_THREADS 64
+
 typedef struct Dav1dSettings {
     int n_frame_threads;
     int n_tile_threads;
     Dav1dPicAllocator allocator;
+    int apply_grain;
+    int operating_point; ///< select an operating point for scalable AV1 bitstreams (0 - 31)
+    int all_layers; ///< output all spatial layers of a scalable AV1 biststream
 } Dav1dSettings;
 
 /**
  * Get library version.
  */
 DAV1D_API const char *dav1d_version(void);
 
 /**
@@ -69,16 +75,32 @@ DAV1D_API void dav1d_default_settings(Da
  * @note The context must be freed using dav1d_close() when decoding is
  *       finished.
  *
  * @return 0 on success, or < 0 (a negative errno code) on error.
  */
 DAV1D_API int dav1d_open(Dav1dContext **c_out, const Dav1dSettings *s);
 
 /**
+ * Parse a Sequence Header OBU from bitstream data.
+ *
+ * @param out Output Sequence Header.
+ * @param buf The data to be parser.
+ * @param sz  Size of the data.
+ *
+ * @return 0 on success, or < 0 (a negative errno code) on error.
+ *
+ * @note It is safe to feed this function data containing other OBUs than a
+ *       Sequence Header, as they will simply be ignored. If there is more than
+ *       one Sequence Header OBU present, only the last will be returned.
+ */
+DAV1D_API int dav1d_parse_sequence_header(Dav1dSequenceHeader *out,
+                                          const uint8_t *buf, const size_t sz);
+
+/**
  * Feed bitstream data to the decoder.
  *
  * @param   c Input decoder instance.
  * @param  in Input bitstream data. On success, ownership of the reference is
  *            passed to the library.
  *
  * @return
  *         0: Success, and the data was consumed.
@@ -101,30 +123,68 @@ DAV1D_API int dav1d_send_data(Dav1dConte
  *         0: Success, and a frame is returned.
  *   -EAGAIN: Not enough data to output a frame. dav1d_send_data() should be
  *            called with new input.
  *   other negative errno codes: Error during decoding or because of invalid
  *                               passed-in arguments.
  *
  * @note To drain buffered frames from the decoder (i.e. on end of stream),
  *       call this function until it returns -EAGAIN.
+ *
+ * @code{.c}
+ *  Dav1dData data = { 0 };
+ *  Dav1dPicture p = { 0 };
+ *  int res;
+ *
+ *  read_data(&data);
+ *  do {
+ *      res = dav1d_send_data(c, &data);
+ *      // Keep going even if the function can't consume the current data
+ *         packet. It eventually will after one or more frames have been
+ *         returned in this loop.
+ *      if (res < 0 && res != -EAGAIN)
+ *          free_and_abort();
+ *      res = dav1d_get_picture(c, &p);
+ *      if (res < 0) {
+ *          if (res != -EAGAIN)
+ *              free_and_abort();
+ *      } else
+ *          output_and_unref_picture(&p);
+ *  // Stay in the loop as long as there's data to consume.
+ *  } while (data.sz || read_data(&data) == SUCCESS);
+ *
+ *  // Handle EOS by draining all buffered frames.
+ *  do {
+ *      res = dav1d_get_picture(c, &p);
+ *      if (res < 0) {
+ *          if (res != -EAGAIN)
+ *              free_and_abort();
+ *      } else
+ *          output_and_unref_picture(&p);
+ *  } while (res == 0);
+ * @endcode
  */
 DAV1D_API int dav1d_get_picture(Dav1dContext *c, Dav1dPicture *out);
 
 /**
  * Close a decoder instance and free all associated memory.
  *
  * @param c_out The decoder instance to close. *c_out will be set to NULL.
  */
 DAV1D_API void dav1d_close(Dav1dContext **c_out);
 
 /**
- * Flush all delayed frames in decoder, to be used when seeking.
+ * Flush all delayed frames in decoder and clear internal decoder state,
+ * to be used when seeking.
  *
  * @param c Input decoder instance.
+ *
+ * @note Decoding will start only after a valid sequence header OBU is
+ *       delivered to dav1d_send_data().
+ *
  */
 DAV1D_API void dav1d_flush(Dav1dContext *c);
 
 # ifdef __cplusplus
 }
 # endif
 
 #endif /* __DAV1D_H__ */
new file mode 100644
--- /dev/null
+++ b/third_party/dav1d/include/dav1d/headers.h
@@ -0,0 +1,385 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_HEADERS_H__
+#define __DAV1D_HEADERS_H__
+
+// Constants from Section 3. "Symbols and abbreviated terms"
+#define DAV1D_MAX_CDEF_STRENGTHS 8
+#define DAV1D_MAX_OPERATING_POINTS 32
+#define DAV1D_MAX_TILE_COLS 64
+#define DAV1D_MAX_TILE_ROWS 64
+#define DAV1D_MAX_SEGMENTS 8
+#define DAV1D_NUM_REF_FRAMES 8
+#define DAV1D_PRIMARY_REF_NONE 7
+#define DAV1D_REFS_PER_FRAME 7
+#define DAV1D_TOTAL_REFS_PER_FRAME (DAV1D_REFS_PER_FRAME + 1)
+
+enum Dav1dTxfmMode {
+    DAV1D_TX_4X4_ONLY,
+    DAV1D_TX_LARGEST,
+    DAV1D_TX_SWITCHABLE,
+    DAV1D_N_TX_MODES,
+};
+
+enum Dav1dFilterMode {
+    DAV1D_FILTER_8TAP_REGULAR,
+    DAV1D_FILTER_8TAP_SMOOTH,
+    DAV1D_FILTER_8TAP_SHARP,
+    DAV1D_N_SWITCHABLE_FILTERS,
+    DAV1D_FILTER_BILINEAR = DAV1D_N_SWITCHABLE_FILTERS,
+    DAV1D_N_FILTERS,
+    DAV1D_FILTER_SWITCHABLE = DAV1D_N_FILTERS,
+};
+
+enum Dav1dAdaptiveBoolean {
+    DAV1D_OFF = 0,
+    DAV1D_ON = 1,
+    DAV1D_ADAPTIVE = 2,
+};
+
+enum Dav1dRestorationType {
+    DAV1D_RESTORATION_NONE,
+    DAV1D_RESTORATION_SWITCHABLE,
+    DAV1D_RESTORATION_WIENER,
+    DAV1D_RESTORATION_SGRPROJ,
+};
+
+enum Dav1dWarpedMotionType {
+    DAV1D_WM_TYPE_IDENTITY,
+    DAV1D_WM_TYPE_TRANSLATION,
+    DAV1D_WM_TYPE_ROT_ZOOM,
+    DAV1D_WM_TYPE_AFFINE,
+};
+
+typedef struct Dav1dWarpedMotionParams {
+    enum Dav1dWarpedMotionType type;
+    int32_t matrix[6];
+    union {
+        struct {
+            int16_t alpha, beta, gamma, delta;
+        };
+        int16_t abcd[4];
+    };
+} Dav1dWarpedMotionParams;
+
+enum Dav1dPixelLayout {
+    DAV1D_PIXEL_LAYOUT_I400, ///< monochrome
+    DAV1D_PIXEL_LAYOUT_I420, ///< 4:2:0 planar
+    DAV1D_PIXEL_LAYOUT_I422, ///< 4:2:2 planar
+    DAV1D_PIXEL_LAYOUT_I444, ///< 4:4:4 planar
+};
+
+enum Dav1dFrameType {
+    DAV1D_FRAME_TYPE_KEY = 0,    ///< Key Intra frame
+    DAV1D_FRAME_TYPE_INTER = 1,  ///< Inter frame
+    DAV1D_FRAME_TYPE_INTRA = 2,  ///< Non key Intra frame
+    DAV1D_FRAME_TYPE_SWITCH = 3, ///< Switch Inter frame
+};
+
+enum Dav1dColorPrimaries {
+    DAV1D_COLOR_PRI_BT709 = 1,
+    DAV1D_COLOR_PRI_UNKNOWN = 2,
+    DAV1D_COLOR_PRI_BT470M = 4,
+    DAV1D_COLOR_PRI_BT470BG = 5,
+    DAV1D_COLOR_PRI_BT601 = 6,
+    DAV1D_COLOR_PRI_SMPTE240 = 7,
+    DAV1D_COLOR_PRI_FILM = 8,
+    DAV1D_COLOR_PRI_BT2020 = 9,
+    DAV1D_COLOR_PRI_XYZ = 10,
+    DAV1D_COLOR_PRI_SMPTE431 = 11,
+    DAV1D_COLOR_PRI_SMPTE432 = 12,
+    DAV1D_COLOR_PRI_EBU3213 = 22,
+};
+
+enum Dav1dTransferCharacteristics {
+    DAV1D_TRC_BT709 = 1,
+    DAV1D_TRC_UNKNOWN = 2,
+    DAV1D_TRC_BT470M = 4,
+    DAV1D_TRC_BT470BG = 5,
+    DAV1D_TRC_BT601 = 6,
+    DAV1D_TRC_SMPTE240 = 7,
+    DAV1D_TRC_LINEAR = 8,
+    DAV1D_TRC_LOG100 = 9,         ///< logarithmic (100:1 range)
+    DAV1D_TRC_LOG100_SQRT10 = 10, ///< lograithmic (100*sqrt(10):1 range)
+    DAV1D_TRC_IEC61966 = 11,
+    DAV1D_TRC_BT1361 = 12,
+    DAV1D_TRC_SRGB = 13,
+    DAV1D_TRC_BT2020_10BIT = 14,
+    DAV1D_TRC_BT2020_12BIT = 15,
+    DAV1D_TRC_SMPTE2084 = 16,     ///< PQ
+    DAV1D_TRC_SMPTE428 = 17,
+    DAV1D_TRC_HLG = 18,           ///< hybrid log/gamma (BT.2100 / ARIB STD-B67)
+};
+
+enum Dav1dMatrixCoefficients {
+    DAV1D_MC_IDENTITY = 0,
+    DAV1D_MC_BT709 = 1,
+    DAV1D_MC_UNKNOWN = 2,
+    DAV1D_MC_FCC = 4,
+    DAV1D_MC_BT470BG = 5,
+    DAV1D_MC_BT601 = 6,
+    DAV1D_MC_SMPTE240 = 7,
+    DAV1D_MC_SMPTE_YCGCO = 8,
+    DAV1D_MC_BT2020_NCL = 9,
+    DAV1D_MC_BT2020_CL = 10,
+    DAV1D_MC_SMPTE2085 = 11,
+    DAV1D_MC_CHROMAT_NCL = 12, ///< Chromaticity-derived
+    DAV1D_MC_CHROMAT_CL = 13,
+    DAV1D_MC_ICTCP = 14,
+};
+
+enum Dav1dChromaSamplePosition {
+    DAV1D_CHR_UNKNOWN = 0,
+    DAV1D_CHR_VERTICAL = 1,  ///< Horizontally co-located with luma(0, 0)
+                           ///< sample, between two vertical samples
+    DAV1D_CHR_COLOCATED = 2, ///< Co-located with luma(0, 0) sample
+};
+
+typedef struct Dav1dSequenceHeader {
+    /**
+     * Stream profile, 0 for 8-10 bits/component 4:2:0 or monochrome;
+     * 1 for 8-10 bits/component 4:4:4; 2 for 4:2:2 at any bits/component,
+     * or 12 bits/component at any chroma subsampling.
+     */
+    int profile;
+    /**
+     * Maximum dimensions for this stream. In non-scalable streams, these
+     * are often the actual dimensions of the stream, although that is not
+     * a normative requirement.
+     */
+    int max_width, max_height;
+    enum Dav1dPixelLayout layout; ///< format of the picture
+    enum Dav1dColorPrimaries pri; ///< color primaries (av1)
+    enum Dav1dTransferCharacteristics trc; ///< transfer characteristics (av1)
+    enum Dav1dMatrixCoefficients mtrx; ///< matrix coefficients (av1)
+    enum Dav1dChromaSamplePosition chr; ///< chroma sample position (av1)
+    /**
+     * Pixel data uses JPEG pixel range ([0,255] for 8bits) instead of
+     * MPEG pixel range ([16,235] for 8bits luma, [16,240] for 8bits chroma).
+     */
+    int color_range;
+
+    int num_operating_points;
+    struct Dav1dSequenceHeaderOperatingPoint {
+        int major_level, minor_level;
+        int initial_display_delay;
+        int idc;
+        int tier;
+        int decoder_model_param_present;
+        int decoder_buffer_delay;
+        int encoder_buffer_delay;
+        int low_delay_mode;
+        int display_model_param_present;
+    } operating_points[DAV1D_MAX_OPERATING_POINTS];
+
+    int still_picture;
+    int reduced_still_picture_header;
+    int timing_info_present;
+    int num_units_in_tick;
+    int time_scale;
+    int equal_picture_interval;
+    int num_ticks_per_picture;
+    int decoder_model_info_present;
+    int encoder_decoder_buffer_delay_length;
+    int num_units_in_decoding_tick;
+    int buffer_removal_delay_length;
+    int frame_presentation_delay_length;
+    int display_model_info_present;
+    int width_n_bits, height_n_bits;
+    int frame_id_numbers_present;
+    int delta_frame_id_n_bits;
+    int frame_id_n_bits;
+    int sb128;
+    int filter_intra;
+    int intra_edge_filter;
+    int inter_intra;
+    int masked_compound;
+    int warped_motion;
+    int dual_filter;
+    int order_hint;
+    int jnt_comp;
+    int ref_frame_mvs;
+    enum Dav1dAdaptiveBoolean screen_content_tools;
+    enum Dav1dAdaptiveBoolean force_integer_mv;
+    int order_hint_n_bits;
+    int super_res;
+    int cdef;
+    int restoration;
+    /**
+     * 0, 1 and 2 mean 8, 10 or 12 bits/component, respectively. This is not
+     * exactly the same as 'hbd' from the spec; the spec's hbd distinguishes
+     * between 8 (0) and 10-12 (1) bits/component, and another element
+     * (twelve_bit) to distinguish between 10 and 12 bits/component. To get
+     * the spec's hbd, use !!our_hbd, and to get twelve_bit, use hbd == 2.
+     */
+    int hbd;
+    int ss_hor, ss_ver, monochrome;
+    int color_description_present;
+    int separate_uv_delta_q;
+    int film_grain_present;
+} Dav1dSequenceHeader;
+
+typedef struct Dav1dSegmentationData {
+    int delta_q;
+    int delta_lf_y_v, delta_lf_y_h, delta_lf_u, delta_lf_v;
+    int ref;
+    int skip;
+    int globalmv;
+} Dav1dSegmentationData;
+
+typedef struct Dav1dSegmentationDataSet {
+    Dav1dSegmentationData d[DAV1D_MAX_SEGMENTS];
+    int preskip;
+    int last_active_segid;
+} Dav1dSegmentationDataSet;
+
+typedef struct Dav1dLoopfilterModeRefDeltas {
+    int mode_delta[2 /* is_zeromv */];
+    int ref_delta[DAV1D_TOTAL_REFS_PER_FRAME];
+} Dav1dLoopfilterModeRefDeltas;
+
+typedef struct Dav1dFilmGrainData {
+    uint16_t seed;
+    int num_y_points;
+    uint8_t y_points[14][2 /* value, scaling */];
+    int chroma_scaling_from_luma;
+    int num_uv_points[2];
+    uint8_t uv_points[2][10][2 /* value, scaling */];
+    int scaling_shift;
+    int ar_coeff_lag;
+    int8_t ar_coeffs_y[24];
+    int8_t ar_coeffs_uv[2][25];
+    int ar_coeff_shift;
+    int grain_scale_shift;
+    int uv_mult[2];
+    int uv_luma_mult[2];
+    int uv_offset[2];
+    int overlap_flag;
+    int clip_to_restricted_range;
+} Dav1dFilmGrainData;
+
+typedef struct Dav1dFrameHeader {
+    enum Dav1dFrameType frame_type; ///< type of the picture
+    int width[2 /* { coded_width, superresolution_upscaled_width } */], height;
+    int frame_offset; ///< frame number
+    struct {
+        int present, update;
+        Dav1dFilmGrainData data;
+    } film_grain; ///< film grain parameters
+    int temporal_id, spatial_id; ///< spatial and temporal id of the frame for SVC
+
+    int show_existing_frame;
+    int existing_frame_idx;
+    int frame_id;
+    int frame_presentation_delay;
+    int show_frame;
+    int showable_frame;
+    int error_resilient_mode;
+    int disable_cdf_update;
+    int allow_screen_content_tools;
+    int force_integer_mv;
+    int frame_size_override;
+    int primary_ref_frame;
+    int buffer_removal_time_present;
+    struct Dav1dFrameHeaderOperatingPoint {
+        int buffer_removal_time;
+    } operating_points[DAV1D_MAX_OPERATING_POINTS];
+    int refresh_frame_flags;
+    int render_width, render_height;
+    struct {
+        int width_scale_denominator;
+        int enabled;
+    } super_res;
+    int have_render_size;
+    int allow_intrabc;
+    int frame_ref_short_signaling;
+    int refidx[DAV1D_REFS_PER_FRAME];
+    int hp;
+    enum Dav1dFilterMode subpel_filter_mode;
+    int switchable_motion_mode;
+    int use_ref_frame_mvs;
+    int refresh_context;
+    struct {
+        int uniform;
+        unsigned n_bytes;
+        int min_log2_cols, max_log2_cols, log2_cols, cols;
+        int min_log2_rows, max_log2_rows, log2_rows, rows;
+        uint16_t col_start_sb[DAV1D_MAX_TILE_COLS + 1];
+        uint16_t row_start_sb[DAV1D_MAX_TILE_ROWS + 1];
+        int update;
+    } tiling;
+    struct {
+        int yac;
+        int ydc_delta;
+        int udc_delta, uac_delta, vdc_delta, vac_delta;
+        int qm, qm_y, qm_u, qm_v;
+    } quant;
+    struct {
+        int enabled, update_map, temporal, update_data;
+        Dav1dSegmentationDataSet seg_data;
+        int lossless[DAV1D_MAX_SEGMENTS], qidx[DAV1D_MAX_SEGMENTS];
+    } segmentation;
+    struct {
+        struct {
+            int present;
+            int res_log2;
+        } q;
+        struct {
+            int present;
+            int res_log2;
+            int multi;
+        } lf;
+    } delta;
+    int all_lossless;
+    struct {
+        int level_y[2 /* dir */];
+        int level_u, level_v;
+        int mode_ref_delta_enabled;
+        int mode_ref_delta_update;
+        Dav1dLoopfilterModeRefDeltas mode_ref_deltas;
+        int sharpness;
+    } loopfilter;
+    struct {
+        int damping;
+        int n_bits;
+        int y_strength[DAV1D_MAX_CDEF_STRENGTHS];
+        int uv_strength[DAV1D_MAX_CDEF_STRENGTHS];
+    } cdef;
+    struct {
+        enum Dav1dRestorationType type[3 /* plane */];
+        int unit_size[2 /* y, uv */];
+    } restoration;
+    enum Dav1dTxfmMode txfm_mode;
+    int switchable_comp_refs;
+    int skip_mode_allowed, skip_mode_enabled, skip_mode_refs[2];
+    int warp_motion;
+    int reduced_txtp_set;
+    Dav1dWarpedMotionParams gmv[DAV1D_REFS_PER_FRAME];
+} Dav1dFrameHeader;
+
+#endif /* __DAV1D_HEADERS_H__ */
--- a/third_party/dav1d/include/dav1d/picture.h
+++ b/third_party/dav1d/include/dav1d/picture.h
@@ -27,137 +27,56 @@
 
 #ifndef __DAV1D_PICTURE_H__
 #define __DAV1D_PICTURE_H__
 
 #include <stddef.h>
 #include <stdint.h>
 
 #include "common.h"
-
-enum Dav1dPixelLayout {
-    DAV1D_PIXEL_LAYOUT_I400, ///< monochrome
-    DAV1D_PIXEL_LAYOUT_I420, ///< 4:2:0 planar
-    DAV1D_PIXEL_LAYOUT_I422, ///< 4:2:2 planar
-    DAV1D_PIXEL_LAYOUT_I444, ///< 4:4:4 planar
-};
-
-enum Dav1dFrameType {
-    DAV1D_FRAME_TYPE_KEY = 0,    ///< Key Intra frame
-    DAV1D_FRAME_TYPE_INTER = 1,  ///< Inter frame
-    DAV1D_FRAME_TYPE_INTRA = 2,  ///< Non key Intra frame
-    DAV1D_FRAME_TYPE_SWITCH = 3, ///< Switch Inter frame
-};
-
-enum Dav1dColorPrimaries {
-    DAV1D_COLOR_PRI_BT709 = 1,
-    DAV1D_COLOR_PRI_UNKNOWN = 2,
-    DAV1D_COLOR_PRI_BT470M = 4,
-    DAV1D_COLOR_PRI_BT470BG = 5,
-    DAV1D_COLOR_PRI_BT601 = 6,
-    DAV1D_COLOR_PRI_SMPTE240 = 7,
-    DAV1D_COLOR_PRI_FILM = 8,
-    DAV1D_COLOR_PRI_BT2020 = 9,
-    DAV1D_COLOR_PRI_XYZ = 10,
-    DAV1D_COLOR_PRI_SMPTE431 = 11,
-    DAV1D_COLOR_PRI_SMPTE432 = 12,
-    DAV1D_COLOR_PRI_EBU3213 = 22,
-};
-
-enum Dav1dTransferCharacteristics {
-    DAV1D_TRC_BT709 = 1,
-    DAV1D_TRC_UNKNOWN = 2,
-    DAV1D_TRC_BT470M = 4,
-    DAV1D_TRC_BT470BG = 5,
-    DAV1D_TRC_BT601 = 6,
-    DAV1D_TRC_SMPTE240 = 7,
-    DAV1D_TRC_LINEAR = 8,
-    DAV1D_TRC_LOG100 = 9,         ///< logarithmic (100:1 range)
-    DAV1D_TRC_LOG100_SQRT10 = 10, ///< lograithmic (100*sqrt(10):1 range)
-    DAV1D_TRC_IEC61966 = 11,
-    DAV1D_TRC_BT1361 = 12,
-    DAV1D_TRC_SRGB = 13,
-    DAV1D_TRC_BT2020_10BIT = 14,
-    DAV1D_TRC_BT2020_12BIT = 15,
-    DAV1D_TRC_SMPTE2084 = 16,     ///< PQ
-    DAV1D_TRC_SMPTE428 = 17,
-    DAV1D_TRC_HLG = 18,           ///< hybrid log/gamma (BT.2100 / ARIB STD-B67)
-};
-
-enum Dav1dMatrixCoefficients {
-    DAV1D_MC_IDENTITY = 0,
-    DAV1D_MC_BT709 = 1,
-    DAV1D_MC_UNKNOWN = 2,
-    DAV1D_MC_FCC = 4,
-    DAV1D_MC_BT470BG = 5,
-    DAV1D_MC_BT601 = 6,
-    DAV1D_MC_SMPTE240 = 7,
-    DAV1D_MC_SMPTE_YCGCO = 8,
-    DAV1D_MC_BT2020_NCL = 9,
-    DAV1D_MC_BT2020_CL = 10,
-    DAV1D_MC_SMPTE2085 = 11,
-    DAV1D_MC_CHROMAT_NCL = 12, ///< Chromaticity-derived
-    DAV1D_MC_CHROMAT_CL = 13,
-    DAV1D_MC_ICTCP = 14,
-};
-
-enum Dav1dChromaSamplePosition {
-    DAV1D_CHR_UNKNOWN = 0,
-    DAV1D_CHR_VERTICAL = 1,  ///< Horizontally co-located with luma(0, 0)
-                           ///< sample, between two vertical samples
-    DAV1D_CHR_COLOCATED = 2, ///< Co-located with luma(0, 0) sample
-};
+#include "headers.h"
 
 typedef struct Dav1dPictureParameters {
     int w; ///< width (in pixels)
     int h; ///< height (in pixels)
     enum Dav1dPixelLayout layout; ///< format of the picture
-    enum Dav1dFrameType type; ///< type of the picture
     int bpc; ///< bits per pixel component (8 or 10)
-
-    enum Dav1dColorPrimaries pri; ///< color primaries (av1)
-    enum Dav1dTransferCharacteristics trc; ///< transfer characteristics (av1)
-    enum Dav1dMatrixCoefficients mtrx; ///< matrix coefficients (av1)
-    enum Dav1dChromaSamplePosition chr; ///< chroma sample position (av1)
-    /**
-     * Pixel data uses JPEG pixel range ([0,255] for 8bits) instead of
-     * MPEG pixel range ([16,235] for 8bits luma, [16,240] for 8bits chroma).
-     */
-    int fullrange;
 } Dav1dPictureParameters;
 
 typedef struct Dav1dPicture {
-    int poc; ///< frame number
+    Dav1dSequenceHeader *seq_hdr;
+    Dav1dFrameHeader *frame_hdr;
 
     /**
      * Pointers to planar image data (Y is [0], U is [1], V is [2]). The data
      * should be bytes (for 8 bpc) or words (for 10 bpc). In case of words
      * containing 10 bpc image data, the pixels should be located in the LSB
      * bits, so that values range between [0, 1023]; the upper bits should be
      * zero'ed out.
      */
     void *data[3];
-    struct Dav1dRef *ref; ///< allocation origin
 
     /**
      * Number of bytes between 2 lines in data[] for luma [0] or chroma [1].
      */
     ptrdiff_t stride[2];
 
     Dav1dPictureParameters p;
+    Dav1dDataProps m;
+    struct Dav1dRef *frame_hdr_ref, *seq_hdr_ref, *ref; ///< allocation origins
 
     void *allocator_data; ///< pointer managed by the allocator
 } Dav1dPicture;
 
 typedef struct Dav1dPicAllocator {
     void *cookie; ///< custom data to pass to the allocator callbacks.
     /**
      * Allocate the picture buffer based on the Dav1dPictureParameters.
      *
-     * The data[0], data[1] and data[2] must be 32 bits aligned and with a
+     * The data[0], data[1] and data[2] must be 32 byte aligned and with a
      * pixel width/height multiple of 128 pixels.
      * data[1] and data[2] must share the same stride[1].
      *
      * @param  pic The picture to allocate the buffer for. The callback needs to
      *             fill the picture data[0], data[1], data[2], stride[0] and
      *             stride[1].
      *             The allocator can fill the pic allocator_data pointer with
      *             a custom pointer that will be passed to
@@ -165,24 +84,20 @@ typedef struct Dav1dPicAllocator {
      * @param cookie Custom pointer passed to all calls.
     *
     * @return 0 on success. A negative errno value on error.
      */
     int (*alloc_picture_callback)(Dav1dPicture *pic, void *cookie);
     /**
      * Release the picture buffer.
      *
-     * @param buf           The buffer that was returned by 
-     *                                   alloc_picture_callback().
-     * @param allocator_tag The Dav1dPicture.allocator_data that was filled by
-     *                      alloc_picture_callback()
-     * @param cookie        Custom pointer passed to all calls.
+     * @param pic    The picture that was filled by alloc_picture_callback().
+     * @param cookie Custom pointer passed to all calls.
      */
-    void (*release_picture_callback)(uint8_t *buf, void *allocator_data,
-                                     void *cookie);
+    void (*release_picture_callback)(Dav1dPicture *pic, void *cookie);
 } Dav1dPicAllocator;
 
 /**
  * Release reference to a picture.
  */
 DAV1D_API void dav1d_picture_unref(Dav1dPicture *p);
 
 #endif /* __DAV1D_PICTURE_H__ */
--- a/third_party/dav1d/meson.build
+++ b/third_party/dav1d/meson.build
@@ -319,13 +319,15 @@ endif
 
 
 #
 # Include subdir meson.build files
 # The order is important!
 
 subdir('include')
 
+subdir('doc')
+
 subdir('src')
 
 subdir('tools')
 
 subdir('tests')
new file mode 100644
--- /dev/null
+++ b/third_party/dav1d/src/arm/64/looprestoration.S
@@ -0,0 +1,627 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Martin Storsjo
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/arm/asm.S"
+
+// void dav1d_wiener_filter_h_neon(int16_t *dst, const pixel (*left)[4],
+//                                 const pixel *src, ptrdiff_t stride,
+//                                 const int16_t fh[7], const intptr_t w,
+//                                 int h, enum LrEdgeFlags edges);
+function wiener_filter_h_neon, export=1
+        mov             w8,  w5
+        ld1             {v0.8h},  [x4]
+        mov             w9,  #(1 << 14) - (1 << 2)
+        dup             v30.8h,  w9
+        movi            v31.8h,  #8, lsl #8
+        // Calculate mid_stride
+        add             w10, w5,  #7
+        bic             w10, w10, #7
+        lsl             w10, w10, #1
+
+        // Clear the last unused element of v0, to allow filtering a single
+        // pixel with one plain mul+addv.
+        ins             v0.h[7], wzr
+
+        // Set up pointers for reading/writing alternate rows
+        add             x12, x0,  x10
+        lsl             w10, w10, #1
+        add             x13, x2,  x3
+        lsl             x3,  x3,  #1
+
+        // Subtract the width from mid_strid3
+        sub             x10, x10, w5, uxtw #1
+
+        // For w >= 8, we read (w+5)&~7+8 pixels, for w < 8 we read 16 pixels.
+        cmp             w5,  #8
+        add             w11, w5,  #13
+        bic             w11, w11, #7
+        b.ge            1f
+        mov             w11, #16
+1:
+        sub             x3,  x3,  w11, uxtw
+
+        // Set up the src pointers to include the left edge, for LR_HAVE_LEFT, left == NULL
+        tst             w7,  #1 // LR_HAVE_LEFT
+        b.eq            2f
+        // LR_HAVE_LEFT
+        cbnz            x1,  0f
+        // left == NULL
+        sub             x2,  x2,  #3
+        sub             x13, x13, #3
+        b               1f
+0:      // LR_HAVE_LEFT, left != NULL
+2:      // !LR_HAVE_LEFT, increase the stride.
+        // For this case we don't read the left 3 pixels from the src pointer,
+        // but shift it as if we had done that.
+        add             x3,  x3,  #3
+
+
+1:      // Loop vertically
+        ld1             {v3.16b},  [x2],  #16
+        ld1             {v5.16b},  [x13], #16
+
+        tst             w7,  #1 // LR_HAVE_LEFT
+        b.eq            0f
+        cbz             x1,  2f
+        // LR_HAVE_LEFT, left != NULL
+        ld1             {v2.s}[3],  [x1], #4
+        // Move x2/x13 back to account for the last 3 bytes we loaded earlier,
+        // which we'll shift out.
+        sub             x2,  x2,  #3
+        sub             x13, x13, #3
+        ld1             {v4.s}[3],  [x1], #4
+        ext             v3.16b, v2.16b, v3.16b, #13
+        ext             v5.16b, v4.16b, v5.16b, #13
+        b               2f
+0:
+        // !LR_HAVE_LEFT, fill v2 with the leftmost byte
+        // and shift v3 to have 3x the first byte at the front.
+        dup             v2.16b, v3.b[0]
+        dup             v4.16b, v5.b[0]
+        // Move x2 back to account for the last 3 bytes we loaded before,
+        // which we shifted out.
+        sub             x2,  x2,  #3
+        sub             x13, x13, #3
+        ext             v3.16b, v2.16b, v3.16b, #13
+        ext             v5.16b, v4.16b, v5.16b, #13
+
+2:
+        uxtl            v2.8h,  v3.8b
+        uxtl2           v3.8h,  v3.16b
+        uxtl            v4.8h,  v5.8b
+        uxtl2           v5.8h,  v5.16b
+
+        tst             w7,  #2 // LR_HAVE_RIGHT
+        b.ne            4f
+        // If we'll need to pad the right edge, load that byte to pad with
+        // here since we can find it pretty easily from here.
+        sub             w9,  w5, #14
+        ldr             b28, [x2,  w9, sxtw]
+        ldr             b29, [x13, w9, sxtw]
+        // Fill v28/v29 with the right padding pixel
+        dup             v28.8b,  v28.b[0]
+        dup             v29.8b,  v29.b[0]
+        uxtl            v28.8h,  v28.8b
+        uxtl            v29.8h,  v29.8b
+3:      // !LR_HAVE_RIGHT
+        // If we'll have to pad the right edge we need to quit early here.
+        cmp             w5,  #11
+        b.ge            4f   // If w >= 11, all used input pixels are valid
+        cmp             w5,  #7
+        b.ge            5f   // If w >= 7, we can filter 4 pixels
+        b               6f
+
+4:      // Loop horizontally
+.macro filter wd
+        // Interleaving the mul/mla chains actually hurts performance
+        // significantly on Cortex A53, thus keeping mul/mla tightly
+        // chained like this.
+        ext             v16.16b, v2.16b,  v3.16b, #2
+        ext             v17.16b, v2.16b,  v3.16b, #4
+        ext             v18.16b, v2.16b,  v3.16b, #6
+        ext             v19.16b, v2.16b,  v3.16b, #8
+        ext             v20.16b, v2.16b,  v3.16b, #10
+        ext             v21.16b, v2.16b,  v3.16b, #12
+        mul             v6\wd,   v2\wd,   v0.h[0]
+        mla             v6\wd,   v16\wd,  v0.h[1]
+        mla             v6\wd,   v17\wd,  v0.h[2]
+        mla             v6\wd,   v18\wd,  v0.h[3]
+        mla             v6\wd,   v19\wd,  v0.h[4]
+        mla             v6\wd,   v20\wd,  v0.h[5]
+        mla             v6\wd,   v21\wd,  v0.h[6]
+        ext             v22.16b, v4.16b,  v5.16b, #2
+        ext             v23.16b, v4.16b,  v5.16b, #4
+        ext             v24.16b, v4.16b,  v5.16b, #6
+        ext             v25.16b, v4.16b,  v5.16b, #8
+        ext             v26.16b, v4.16b,  v5.16b, #10
+        ext             v27.16b, v4.16b,  v5.16b, #12
+        mul             v7\wd,   v4\wd,   v0.h[0]
+        mla             v7\wd,   v22\wd,  v0.h[1]
+        mla             v7\wd,   v23\wd,  v0.h[2]
+        mla             v7\wd,   v24\wd,  v0.h[3]
+        mla             v7\wd,   v25\wd,  v0.h[4]
+        mla             v7\wd,   v26\wd,  v0.h[5]
+        mla             v7\wd,   v27\wd,  v0.h[6]
+
+        shl             v18\wd,  v18\wd,  #7
+        shl             v24\wd,  v24\wd,  #7
+        sub             v18\wd,  v18\wd,  v30\wd
+        sub             v24\wd,  v24\wd,  v30\wd
+        sqadd           v6\wd,   v6\wd,   v18\wd
+        sqadd           v7\wd,   v7\wd,   v24\wd
+        sshr            v6\wd,   v6\wd,   #3
+        sshr            v7\wd,   v7\wd,   #3
+        add             v6\wd,   v6\wd,   v31\wd
+        add             v7\wd,   v7\wd,   v31\wd
+.endm
+        filter          .8h
+        st1             {v6.8h},  [x0],  #16
+        st1             {v7.8h},  [x12], #16
+
+        subs            w5,  w5,  #8
+        b.le            9f
+        tst             w7,  #2 // LR_HAVE_RIGHT
+        mov             v2.16b,  v3.16b
+        mov             v4.16b,  v5.16b
+        ld1             {v3.8b},  [x2],  #8
+        ld1             {v5.8b},  [x13], #8
+        uxtl            v3.8h,   v3.8b
+        uxtl            v5.8h,   v5.8b
+        b.ne            4b // If we don't need to pad, just keep filtering.
+        b               3b // If we need to pad, check how many pixels we have left.
+
+5:      // Filter 4 pixels, 7 <= w < 11
+        filter          .4h
+        st1             {v6.4h},  [x0],  #8
+        st1             {v7.4h},  [x12], #8
+
+        subs            w5,  w5,  #4 // 3 <= w < 7
+        ext             v2.16b,  v2.16b,  v3.16b, #8
+        ext             v3.16b,  v3.16b,  v3.16b, #8
+        ext             v4.16b,  v4.16b,  v5.16b, #8
+        ext             v5.16b,  v5.16b,  v5.16b, #8
+
+6:      // Pad the right edge and filter the last few pixels.
+        // w < 7, w+3 pixels valid in v2-v3
+        cmp             w5,  #5
+        b.lt            7f
+        b.gt            8f
+        // w == 5, 8 pixels valid in v2, v3 invalid
+        mov             v3.16b,  v28.16b
+        mov             v5.16b,  v29.16b
+        b               88f
+
+7:      // 1 <= w < 5, 4-7 pixels valid in v2
+        sub             w9,  w5,  #1
+        // w9 = (pixels valid - 4)
+        adr             x11, L(variable_shift_tbl)
+        ldrh            w9,  [x11, w9, uxtw #1]
+        sub             x11, x11, w9, uxth
+        mov             v3.16b,  v28.16b
+        mov             v5.16b,  v29.16b
+        br              x11
+        // Shift v2 right, shifting out invalid pixels,
+        // shift v2 left to the original offset, shifting in padding pixels.
+44:     // 4 pixels valid
+        ext             v2.16b,  v2.16b,  v2.16b,  #8
+        ext             v2.16b,  v2.16b,  v3.16b,  #8
+        ext             v4.16b,  v4.16b,  v4.16b,  #8
+        ext             v4.16b,  v4.16b,  v5.16b,  #8
+        b               88f
+55:     // 5 pixels valid
+        ext             v2.16b,  v2.16b,  v2.16b,  #10
+        ext             v2.16b,  v2.16b,  v3.16b,  #6
+        ext             v4.16b,  v4.16b,  v4.16b,  #10
+        ext             v4.16b,  v4.16b,  v5.16b,  #6
+        b               88f
+66:     // 6 pixels valid
+        ext             v2.16b,  v2.16b,  v2.16b,  #12
+        ext             v2.16b,  v2.16b,  v3.16b,  #4
+        ext             v4.16b,  v4.16b,  v4.16b,  #12
+        ext             v4.16b,  v4.16b,  v5.16b,  #4
+        b               88f
+77:     // 7 pixels valid
+        ext             v2.16b,  v2.16b,  v2.16b,  #14
+        ext             v2.16b,  v2.16b,  v3.16b,  #2
+        ext             v4.16b,  v4.16b,  v4.16b,  #14
+        ext             v4.16b,  v4.16b,  v5.16b,  #2
+        b               88f
+
+L(variable_shift_tbl):
+        .hword L(variable_shift_tbl) - 44b
+        .hword L(variable_shift_tbl) - 55b
+        .hword L(variable_shift_tbl) - 66b
+        .hword L(variable_shift_tbl) - 77b
+
+8:      // w > 5, w == 6, 9 pixels valid in v2-v3, 1 pixel valid in v3
+        ins             v28.h[0],  v3.h[0]
+        ins             v29.h[0],  v5.h[0]
+        mov             v3.16b,  v28.16b
+        mov             v5.16b,  v29.16b
+
+88:
+        // w < 7, v2-v3 padded properly
+        cmp             w5,  #4
+        b.lt            888f
+
+        // w >= 4, filter 4 pixels
+        filter          .4h
+        st1             {v6.4h},  [x0],  #8
+        st1             {v7.4h},  [x12], #8
+        subs            w5,  w5,  #4 // 0 <= w < 4
+        ext             v2.16b,  v2.16b,  v3.16b, #8
+        ext             v4.16b,  v4.16b,  v5.16b, #8
+        b.eq            9f
+888:    // 1 <= w < 4, filter 1 pixel at a time
+        mul             v6.8h,   v2.8h,   v0.8h
+        mul             v7.8h,   v4.8h,   v0.8h
+        addv            h6,      v6.8h
+        addv            h7,      v7.8h
+        dup             v16.4h,  v2.h[3]
+        dup             v17.4h,  v4.h[3]
+        shl             v16.4h,  v16.4h,  #7
+        shl             v17.4h,  v17.4h,  #7
+        sub             v16.4h,  v16.4h,  v30.4h
+        sub             v17.4h,  v17.4h,  v30.4h
+        sqadd           v6.4h,   v6.4h,   v16.4h
+        sqadd           v7.4h,   v7.4h,   v17.4h
+        sshr            v6.4h,   v6.4h,   #3
+        sshr            v7.4h,   v7.4h,   #3
+        add             v6.4h,   v6.4h,   v31.4h
+        add             v7.4h,   v7.4h,   v31.4h
+        st1             {v6.h}[0], [x0],  #2
+        st1             {v7.h}[0], [x12], #2
+        subs            w5,  w5,  #1
+        ext             v2.16b,  v2.16b,  v3.16b,  #2
+        ext             v4.16b,  v4.16b,  v5.16b,  #2
+        b.gt            888b
+
+9:
+        subs            w6,  w6,  #2
+        b.le            0f
+        // Jump to the next row and loop horizontally
+        add             x0,  x0,  x10
+        add             x12, x12, x10
+        add             x2,  x2,  x3
+        add             x13, x13, x3
+        mov             w5,  w8
+        b               1b
+0:
+        ret
+.purgem filter
+endfunc
+
+// void dav1d_wiener_filter_v_neon(pixel *dst, ptrdiff_t stride,
+//                                 const int16_t *mid, int w, int h,
+//                                 const int16_t fv[7], enum LrEdgeFlags edges,
+//                                 ptrdiff_t mid_stride);
+function wiener_filter_v_neon, export=1
+        mov             w8,  w4
+        ld1             {v0.8h},  [x5]
+        mov             w9,  #128
+        dup             v1.8h, w9
+        add             v1.8h,  v1.8h,  v0.8h
+
+        // Calculate the number of rows to move back when looping vertically
+        mov             w11, w4
+        tst             w6,  #4 // LR_HAVE_TOP
+        b.eq            0f
+        sub             x2,  x2,  x7,  lsl #1
+        add             w11, w11, #2
+0:
+        tst             w6,  #8 // LR_HAVE_BOTTOM
+        b.eq            1f
+        add             w11, w11, #2
+
+1:      // Start of horizontal loop; start one vertical filter slice.
+        // Load rows into v16-v19 and pad properly.
+        tst             w6,  #4 // LR_HAVE_TOP
+        ld1             {v16.8h}, [x2], x7
+        b.eq            2f
+        // LR_HAVE_TOP
+        ld1             {v18.8h}, [x2], x7
+        mov             v17.16b, v16.16b
+        ld1             {v19.8h}, [x2], x7
+        b               3f
+2:      // !LR_HAVE_TOP
+        mov             v17.16b, v16.16b
+        mov             v18.16b, v16.16b
+        mov             v19.16b, v16.16b
+
+3:
+        cmp             w4,  #4
+        b.lt            5f
+        // Start filtering normally; fill in v20-v22 with unique rows.
+        ld1             {v20.8h}, [x2], x7
+        ld1             {v21.8h}, [x2], x7
+        ld1             {v22.8h}, [x2], x7
+
+4:
+.macro filter compare
+        subs            w4,  w4,  #1
+        // Interleaving the mul/mla chains actually hurts performance
+        // significantly on Cortex A53, thus keeping mul/mla tightly
+        // chained like this.
+        smull           v2.4s,  v16.4h,  v0.h[0]
+        smlal           v2.4s,  v17.4h,  v0.h[1]
+        smlal           v2.4s,  v18.4h,  v0.h[2]
+        smlal           v2.4s,  v19.4h,  v1.h[3]
+        smlal           v2.4s,  v20.4h,  v0.h[4]
+        smlal           v2.4s,  v21.4h,  v0.h[5]
+        smlal           v2.4s,  v22.4h,  v0.h[6]
+        smull2          v3.4s,  v16.8h,  v0.h[0]
+        smlal2          v3.4s,  v17.8h,  v0.h[1]
+        smlal2          v3.4s,  v18.8h,  v0.h[2]
+        smlal2          v3.4s,  v19.8h,  v1.h[3]
+        smlal2          v3.4s,  v20.8h,  v0.h[4]
+        smlal2          v3.4s,  v21.8h,  v0.h[5]
+        smlal2          v3.4s,  v22.8h,  v0.h[6]
+        sqrshrun        v2.4h,  v2.4s,   #11
+        sqrshrun2       v2.8h,  v3.4s,   #11
+        sqxtun          v2.8b,  v2.8h
+        st1             {v2.8b}, [x0], x1
+.if \compare
+        cmp             w4,  #4
+.else
+        b.le            9f
+.endif
+        mov             v16.16b,  v17.16b
+        mov             v17.16b,  v18.16b
+        mov             v18.16b,  v19.16b
+        mov             v19.16b,  v20.16b
+        mov             v20.16b,  v21.16b
+        mov             v21.16b,  v22.16b
+.endm
+        filter          1
+        b.lt            7f
+        ld1             {v22.8h}, [x2], x7
+        b               4b
+
+5:      // Less than 4 rows in total; not all of v20-v21 are filled yet.
+        tst             w6,  #8 // LR_HAVE_BOTTOM
+        b.eq            6f
+        // LR_HAVE_BOTTOM
+        cmp             w4,  #2
+        // We load at least 2 rows in all cases.
+        ld1             {v20.8h}, [x2], x7
+        ld1             {v21.8h}, [x2], x7
+        b.gt            53f // 3 rows in total
+        b.eq            52f // 2 rows in total
+51:     // 1 row in total, v19 already loaded, load edge into v20-v22.
+        mov             v22.16b,  v21.16b
+        b               8f
+52:     // 2 rows in total, v19 already loaded, load v20 with content data
+        // and 2 rows of edge.
+        ld1             {v22.8h}, [x2], x7
+        mov             v23.16b,  v22.16b
+        b               8f
+53:
+        // 3 rows in total, v19 already loaded, load v20 and v21 with content
+        // and 2 rows of edge.
+        ld1             {v22.8h}, [x2], x7
+        ld1             {v23.8h}, [x2], x7
+        mov             v24.16b,  v23.16b
+        b               8f
+
+6:
+        // !LR_HAVE_BOTTOM
+        cmp             w4,  #2
+        b.gt            63f // 3 rows in total
+        b.eq            62f // 2 rows in total
+61:     // 1 row in total, v19 already loaded, pad that into v20-v22.
+        mov             v20.16b,  v19.16b
+        mov             v21.16b,  v19.16b
+        mov             v22.16b,  v19.16b
+        b               8f
+62:     // 2 rows in total, v19 already loaded, load v20 and pad that into v20-v23.
+        ld1             {v20.8h}, [x2], x7
+        mov             v21.16b,  v20.16b
+        mov             v22.16b,  v20.16b
+        mov             v23.16b,  v20.16b
+        b               8f
+63:
+        // 3 rows in total, v19 already loaded, load v20 and v21 and pad v21 into v22-v24.
+        ld1             {v20.8h}, [x2], x7
+        ld1             {v21.8h}, [x2], x7
+        mov             v22.16b,  v21.16b
+        mov             v23.16b,  v21.16b
+        mov             v24.16b,  v21.16b
+        b               8f
+
+7:
+        // All registers up to v21 are filled already, 3 valid rows left.
+        // < 4 valid rows left; fill in padding and filter the last
+        // few rows.
+        tst             w6,  #8 // LR_HAVE_BOTTOM
+        b.eq            71f
+        // LR_HAVE_BOTTOM; load 2 rows of edge.
+        ld1             {v22.8h}, [x2], x7
+        ld1             {v23.8h}, [x2], x7
+        mov             v24.16b,  v23.16b
+        b               8f
+71:
+        // !LR_HAVE_BOTTOM, pad 3 rows
+        mov             v22.16b,  v21.16b
+        mov             v23.16b,  v21.16b
+        mov             v24.16b,  v21.16b
+
+8:      // At this point, all registers up to v22-v24 are loaded with
+        // edge/padding (depending on how many rows are left).
+        filter          0 // This branches to 9f when done
+        mov             v22.16b,  v23.16b
+        mov             v23.16b,  v24.16b
+        b               8b
+
+9:      // End of one vertical slice.
+        subs            w3,  w3,  #8
+        b.le            0f
+        // Move pointers back up to the top and loop horizontally.
+        msub            x0,  x1,  x8,  x0
+        msub            x2,  x7,  x11, x2
+        add             x0,  x0,  #8
+        add             x2,  x2,  #16
+        mov             w4,  w8
+        b               1b
+
+0:
+        ret
+.purgem filter
+endfunc
+
+// void dav1d_copy_narrow_neon(pixel *dst, ptrdiff_t stride,
+//                             const pixel *src, int w, int h);
+function copy_narrow_neon, export=1
+        adr             x5,  L(copy_narrow_tbl)
+        ldrh            w6,  [x5, w3, uxtw #1]
+        sub             x5,  x5,  w6, uxth
+        br              x5
+10:
+        add             x7,  x0,  x1
+        lsl             x1,  x1,  #1
+18:
+        cmp             w4,  #8
+        b.lt            110f
+        subs            w4,  w4,  #8
+        ld1             {v0.8b}, [x2], #8
+        st1             {v0.b}[0], [x0], x1
+        st1             {v0.b}[1], [x7], x1
+        st1             {v0.b}[2], [x0], x1
+        st1             {v0.b}[3], [x7], x1
+        st1             {v0.b}[4], [x0], x1
+        st1             {v0.b}[5], [x7], x1
+        st1             {v0.b}[6], [x0], x1
+        st1             {v0.b}[7], [x7], x1
+        b.le            0f
+        b               18b
+110:
+        asr             x1,  x1,  #1
+11:
+        subs            w4,  w4,  #1
+        ld1             {v0.b}[0], [x2], #1
+        st1             {v0.b}[0], [x0], x1
+        b.ge            11b
+0:
+        ret
+
+20:
+        add             x7,  x0,  x1
+        lsl             x1,  x1,  #1
+24:
+        cmp             w4,  #4
+        b.lt            210f
+        subs            w4,  w4,  #4
+        ld1             {v0.4h}, [x2], #8
+        st1             {v0.h}[0], [x0], x1
+        st1             {v0.h}[1], [x7], x1
+        st1             {v0.h}[2], [x0], x1
+        st1             {v0.h}[3], [x7], x1
+        b.le            0f
+        b               24b
+210:
+        asr             x1,  x1,  #1
+22:
+        subs            w4,  w4,  #1
+        ld1             {v0.h}[0], [x2], #2
+        st1             {v0.h}[0], [x0], x1
+        b.ge            22b
+0:
+        ret
+
+30:
+        ldrh            w5,  [x2]
+        ldrb            w6,  [x2, #2]
+        add             x2,  x2,  #3
+        subs            w4,  w4,  #1
+        strh            w5,  [x0]
+        strb            w6,  [x0, #2]
+        add             x0,  x0,  x1
+        b.gt            30b
+        ret
+
+40:
+        add             x7,  x0,  x1
+        lsl             x1,  x1,  #1
+42:
+        cmp             w4,  #2
+        b.lt            41f
+        subs            w4,  w4,  #2
+        ld1             {v0.2s}, [x2], #8
+        st1             {v0.s}[0], [x0], x1
+        st1             {v0.s}[1], [x7], x1
+        b.le            0f
+        b               42b
+41:
+        ld1             {v0.s}[0], [x2]
+        st1             {v0.s}[0], [x0]
+0:
+        ret
+
+50:
+        ldr             w5,  [x2]
+        ldrb            w6,  [x2, #4]
+        add             x2,  x2,  #5
+        subs            w4,  w4,  #1
+        str             w5,  [x0]
+        strb            w6,  [x0, #4]
+        add             x0,  x0,  x1
+        b.gt            50b
+        ret
+
+60:
+        ldr             w5,  [x2]
+        ldrh            w6,  [x2, #4]
+        add             x2,  x2,  #6
+        subs            w4,  w4,  #1
+        str             w5,  [x0]
+        strh            w6,  [x0, #4]
+        add             x0,  x0,  x1
+        b.gt            60b
+        ret
+
+70:
+        ldr             w5,  [x2]
+        ldrh            w6,  [x2, #4]
+        ldrb            w7,  [x2, #6]
+        add             x2,  x2,  #7
+        subs            w4,  w4,  #1
+        str             w5,  [x0]
+        strh            w6,  [x0, #4]
+        strb            w7,  [x0, #6]
+        add             x0,  x0,  x1
+        b.gt            70b
+        ret
+
+L(copy_narrow_tbl):
+        .hword 0
+        .hword L(copy_narrow_tbl) - 10b
+        .hword L(copy_narrow_tbl) - 20b
+        .hword L(copy_narrow_tbl) - 30b
+        .hword L(copy_narrow_tbl) - 40b
+        .hword L(copy_narrow_tbl) - 50b
+        .hword L(copy_narrow_tbl) - 60b
+        .hword L(copy_narrow_tbl) - 70b
+endfunc
--- a/third_party/dav1d/src/arm/64/mc.S
+++ b/third_party/dav1d/src/arm/64/mc.S
@@ -1,11 +1,12 @@
 /*
  * Copyright © 2018, VideoLAN and dav1d authors
  * Copyright © 2018, Janne Grunau
+ * Copyright © 2018, Martin Storsjo
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice, this
  *    list of conditions and the following disclaimer.
  *
@@ -21,16 +22,17 @@
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "src/arm/asm.S"
+#include "src/arm/64/util.S"
 
 .macro avg dst, t0, t1
         ld1             {\t0\().8h},   [x2],  16
         ld1             {\t1\().8h},   [x3],  16
         add             \t0\().8h,   \t0\().8h,   \t1\().8h
         sqrshrun        \dst\().8b,  \t0\().8h,   #5
 .endm
 
@@ -225,8 +227,2107 @@ L(\type\()_tbl):
         .hword L(\type\()_tbl) -    8b
         .hword L(\type\()_tbl) -    4b
 endfunc
 .endm
 
 bidir_fn avg
 bidir_fn w_avg
 bidir_fn mask
+
+
+// This has got the same signature as the put_8tap functions,
+// and assumes that x8 is set to (24-clz(w)).
+function put
+        adr             x9,  L(put_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+2:
+        ld1             {v0.h}[0], [x2], x3
+        ld1             {v1.h}[0], [x2], x3
+        subs            w5,  w5,  #2
+        st1             {v0.h}[0], [x0], x1
+        st1             {v1.h}[0], [x0], x1
+        b.gt            2b
+        ret
+4:
+        ld1             {v0.s}[0], [x2], x3
+        ld1             {v1.s}[0], [x2], x3
+        subs            w5,  w5,  #2
+        st1             {v0.s}[0], [x0], x1
+        st1             {v1.s}[0], [x0], x1
+        b.gt            4b
+        ret
+8:
+        ld1             {v0.8b}, [x2], x3
+        ld1             {v1.8b}, [x2], x3
+        subs            w5,  w5,  #2
+        st1             {v0.8b}, [x0], x1
+        st1             {v1.8b}, [x0], x1
+        b.gt            8b
+        ret
+160:
+        add             x8,  x0,  x1
+        lsl             x1,  x1,  #1
+        add             x9,  x2,  x3
+        lsl             x3,  x3,  #1
+16:
+        ld1             {v0.16b}, [x2], x3
+        ld1             {v1.16b}, [x9], x3
+        subs            w5,  w5,  #2
+        st1             {v0.16b}, [x0], x1
+        st1             {v1.16b}, [x8], x1
+        b.gt            16b
+        ret
+32:
+        ldp             x6,  x7,  [x2]
+        ldp             x8,  x9,  [x2, #16]
+        stp             x6,  x7,  [x0]
+        subs            w5,  w5,  #1
+        stp             x8,  x9,  [x0, #16]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.gt            32b
+        ret
+64:
+        ldp             x6,  x7,  [x2]
+        ldp             x8,  x9,  [x2, #16]
+        stp             x6,  x7,  [x0]
+        ldp             x10, x11, [x2, #32]
+        stp             x8,  x9,  [x0, #16]
+        subs            w5,  w5,  #1
+        ldp             x12, x13, [x2, #48]
+        stp             x10, x11, [x0, #32]
+        stp             x12, x13, [x0, #48]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.gt            64b
+        ret
+128:
+        ldp             q0,  q1,  [x2]
+        ldp             q2,  q3,  [x2, #32]
+        stp             q0,  q1,  [x0]
+        ldp             q4,  q5,  [x2, #64]
+        stp             q2,  q3,  [x0, #32]
+        ldp             q6,  q7,  [x2, #96]
+        subs            w5,  w5,  #1
+        stp             q4,  q5,  [x0, #64]
+        stp             q6,  q7,  [x0, #96]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.gt            128b
+        ret
+
+L(put_tbl):
+        .hword L(put_tbl) - 128b
+        .hword L(put_tbl) -  64b
+        .hword L(put_tbl) -  32b
+        .hword L(put_tbl) - 160b
+        .hword L(put_tbl) -   8b
+        .hword L(put_tbl) -   4b
+        .hword L(put_tbl) -   2b
+endfunc
+
+
+// This has got the same signature as the prep_8tap functions,
+// and assumes that x8 is set to (24-clz(w)), and x7 to w*2.
+function prep
+        adr             x9,  L(prep_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+4:
+        ld1             {v0.s}[0], [x1], x2
+        ld1             {v1.s}[0], [x1], x2
+        subs            w4,  w4,  #2
+        ushll           v0.8h, v0.8b, #4
+        ushll           v1.8h, v1.8b, #4
+        st1             {v0.4h, v1.4h}, [x0], #16
+        b.gt            4b
+        ret
+8:
+        ld1             {v0.8b}, [x1], x2
+        ld1             {v1.8b}, [x1], x2
+        subs            w4,  w4,  #2
+        ushll           v0.8h, v0.8b, #4
+        ushll           v1.8h, v1.8b, #4
+        st1             {v0.8h, v1.8h}, [x0], #32
+        b.gt            8b
+        ret
+160:
+        add             x9,  x1,  x2
+        lsl             x2,  x2,  #1
+16:
+        ld1             {v0.16b}, [x1], x2
+        ld1             {v1.16b}, [x9], x2
+        subs            w4,  w4,  #2
+        ushll           v4.8h, v0.8b,  #4
+        ushll2          v5.8h, v0.16b, #4
+        ushll           v6.8h, v1.8b,  #4
+        ushll2          v7.8h, v1.16b, #4
+        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64
+        b.gt            16b
+        ret
+320:
+        add             x8,  x0,  w3, uxtw
+32:
+        ld1             {v0.16b, v1.16b},  [x1], x2
+        subs            w4,  w4,  #2
+        ushll           v4.8h,  v0.8b,  #4
+        ushll2          v5.8h,  v0.16b, #4
+        ld1             {v2.16b, v3.16b},  [x1], x2
+        ushll           v6.8h,  v1.8b,  #4
+        ushll2          v7.8h,  v1.16b, #4
+        ushll           v16.8h, v2.8b,  #4
+        st1             {v4.8h,  v5.8h},  [x0], x7
+        ushll2          v17.8h, v2.16b, #4
+        st1             {v6.8h,  v7.8h},  [x8], x7
+        ushll           v18.8h, v3.8b,  #4
+        st1             {v16.8h, v17.8h}, [x0], x7
+        ushll2          v19.8h, v3.16b, #4
+        st1             {v18.8h, v19.8h}, [x8], x7
+        b.gt            32b
+        ret
+640:
+        add             x8,  x0,  #32
+        mov             x6,  #64
+64:
+        ldp             q0,  q1,  [x1]
+        subs            w4,  w4,  #1
+        ushll           v4.8h,  v0.8b,  #4
+        ushll2          v5.8h,  v0.16b, #4
+        ldp             q2,  q3,  [x1, #32]
+        ushll           v6.8h,  v1.8b,  #4
+        ushll2          v7.8h,  v1.16b, #4
+        add             x1,  x1,  x2
+        ushll           v16.8h, v2.8b,  #4
+        st1             {v4.8h,  v5.8h},  [x0], x6
+        ushll2          v17.8h, v2.16b, #4
+        ushll           v18.8h, v3.8b,  #4
+        st1             {v6.8h,  v7.8h},  [x8], x6
+        ushll2          v19.8h, v3.16b, #4
+        st1             {v16.8h, v17.8h}, [x0], x6
+        st1             {v18.8h, v19.8h}, [x8], x6
+        b.gt            64b
+        ret
+1280:
+        add             x8,  x0,  #64
+        mov             x6,  #128
+128:
+        ldp             q0,  q1,  [x1]
+        ldp             q2,  q3,  [x1, #32]
+        ushll           v16.8h,  v0.8b,  #4
+        ushll2          v17.8h,  v0.16b, #4
+        ushll           v18.8h,  v1.8b,  #4
+        ushll2          v19.8h,  v1.16b, #4
+        ushll           v20.8h,  v2.8b,  #4
+        ushll2          v21.8h,  v2.16b, #4
+        ldp             q4,  q5,  [x1, #64]
+        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], x6
+        ushll           v22.8h,  v3.8b,  #4
+        ushll2          v23.8h,  v3.16b, #4
+        ushll           v24.8h,  v4.8b,  #4
+        ushll2          v25.8h,  v4.16b, #4
+        ushll           v26.8h,  v5.8b,  #4
+        ushll2          v27.8h,  v5.16b, #4
+        ldp             q6,  q7,  [x1, #96]
+        st1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x8], x6
+        ushll           v28.8h,  v6.8b,  #4
+        ushll2          v29.8h,  v6.16b, #4
+        ushll           v30.8h,  v7.8b,  #4
+        ushll2          v31.8h,  v7.16b, #4
+        subs            w4,  w4,  #1
+        add             x1,  x1,  x2
+        st1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x0], x6
+        st1             {v28.8h, v29.8h, v30.8h, v31.8h}, [x8], x6
+        b.gt            128b
+        ret
+
+L(prep_tbl):
+        .hword L(prep_tbl) - 1280b
+        .hword L(prep_tbl) -  640b
+        .hword L(prep_tbl) -  320b
+        .hword L(prep_tbl) -  160b
+        .hword L(prep_tbl) -    8b
+        .hword L(prep_tbl) -    4b
+endfunc
+
+
+.macro load_slice s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6
+        ld1             {\d0\wd}[0], [\s0], \strd
+        ld1             {\d1\wd}[0], [\s1], \strd
+.ifnb \d2
+        ld1             {\d2\wd}[0], [\s0], \strd
+        ld1             {\d3\wd}[0], [\s1], \strd
+.endif
+.ifnb \d4
+        ld1             {\d4\wd}[0], [\s0], \strd
+.endif
+.ifnb \d5
+        ld1             {\d5\wd}[0], [\s1], \strd
+.endif
+.ifnb \d6
+        ld1             {\d6\wd}[0], [\s0], \strd
+.endif
+.endm
+.macro load_reg s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6
+        ld1             {\d0\wd}, [\s0], \strd
+        ld1             {\d1\wd}, [\s1], \strd
+.ifnb \d2
+        ld1             {\d2\wd}, [\s0], \strd
+        ld1             {\d3\wd}, [\s1], \strd
+.endif
+.ifnb \d4
+        ld1             {\d4\wd}, [\s0], \strd
+.endif
+.ifnb \d5
+        ld1             {\d5\wd}, [\s1], \strd
+.endif
+.ifnb \d6
+        ld1             {\d6\wd}, [\s0], \strd
+.endif
+.endm
+.macro load_h s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
+        load_slice      \s0, \s1, \strd, .h, \d0, \d1, \d2, \d3, \d4, \d5, \d6
+.endm
+.macro load_s s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
+        load_slice      \s0, \s1, \strd, .s, \d0, \d1, \d2, \d3, \d4, \d5, \d6
+.endm
+.macro load_8b s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
+        load_reg        \s0, \s1, \strd, .8b, \d0, \d1, \d2, \d3, \d4, \d5, \d6
+.endm
+.macro load_16b s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
+        load_reg        \s0, \s1, \strd, .16b, \d0, \d1, \d2, \d3, \d4, \d5, \d6
+.endm
+.macro interleave_1 wd, r0, r1, r2, r3, r4
+        trn1            \r0\wd, \r0\wd, \r1\wd
+        trn1            \r1\wd, \r1\wd, \r2\wd
+.ifnb \r3
+        trn1            \r2\wd, \r2\wd, \r3\wd
+        trn1            \r3\wd, \r3\wd, \r4\wd
+.endif
+.endm
+.macro interleave_1_h r0, r1, r2, r3, r4
+        interleave_1    .4h, \r0, \r1, \r2, \r3, \r4
+.endm
+.macro interleave_1_s r0, r1, r2, r3, r4
+        interleave_1    .2s, \r0, \r1, \r2, \r3, \r4
+.endm
+.macro interleave_2 wd, r0, r1, r2, r3, r4, r5
+        trn1            \r0\wd,  \r0\wd, \r2\wd
+        trn1            \r1\wd,  \r1\wd, \r3\wd
+        trn1            \r2\wd,  \r2\wd, \r4\wd
+        trn1            \r3\wd,  \r3\wd, \r5\wd
+.endm
+.macro interleave_2_s r0, r1, r2, r3, r4, r5
+        interleave_2    .2s, \r0, \r1, \r2, \r3, \r4, \r5
+.endm
+.macro uxtl_b r0, r1, r2, r3, r4, r5, r6
+        uxtl            \r0\().8h, \r0\().8b
+        uxtl            \r1\().8h, \r1\().8b
+.ifnb \r2
+        uxtl            \r2\().8h, \r2\().8b
+        uxtl            \r3\().8h, \r3\().8b
+.endif
+.ifnb \r4
+        uxtl            \r4\().8h, \r4\().8b
+.endif
+.ifnb \r5
+        uxtl            \r5\().8h, \r5\().8b
+.endif
+.ifnb \r6
+        uxtl            \r6\().8h, \r6\().8b
+.endif
+.endm
+.macro mul_mla_4 d, s0, s1, s2, s3, wd
+        mul             \d\wd,  \s0\wd,  v0.h[0]
+        mla             \d\wd,  \s1\wd,  v0.h[1]
+        mla             \d\wd,  \s2\wd,  v0.h[2]
+        mla             \d\wd,  \s3\wd,  v0.h[3]
+.endm
+.macro mul_mla_8_1 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+        mul             \d0\().8h, \s0\().8h, v0.h[0]
+        mul             \d1\().8h, \s1\().8h, v0.h[0]
+        mla             \d0\().8h, \s1\().8h, v0.h[1]
+        mla             \d1\().8h, \s2\().8h, v0.h[1]
+        mla             \d0\().8h, \s2\().8h, v0.h[2]
+        mla             \d1\().8h, \s3\().8h, v0.h[2]
+        mla             \d0\().8h, \s3\().8h, v0.h[3]
+        mla             \d1\().8h, \s4\().8h, v0.h[3]
+        mla             \d0\().8h, \s4\().8h, v0.h[4]
+        mla             \d1\().8h, \s5\().8h, v0.h[4]
+        mla             \d0\().8h, \s5\().8h, v0.h[5]
+        mla             \d1\().8h, \s6\().8h, v0.h[5]
+        mla             \d0\().8h, \s6\().8h, v0.h[6]
+        mla             \d1\().8h, \s7\().8h, v0.h[6]
+        mla             \d0\().8h, \s7\().8h, v0.h[7]
+        mla             \d1\().8h, \s8\().8h, v0.h[7]
+.endm
+.macro mul_mla_8_2 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9
+        mul             \d0\().8h, \s0\().8h, v0.h[0]
+        mul             \d1\().8h, \s2\().8h, v0.h[0]
+        mla             \d0\().8h, \s1\().8h, v0.h[1]
+        mla             \d1\().8h, \s3\().8h, v0.h[1]
+        mla             \d0\().8h, \s2\().8h, v0.h[2]
+        mla             \d1\().8h, \s4\().8h, v0.h[2]
+        mla             \d0\().8h, \s3\().8h, v0.h[3]
+        mla             \d1\().8h, \s5\().8h, v0.h[3]
+        mla             \d0\().8h, \s4\().8h, v0.h[4]
+        mla             \d1\().8h, \s6\().8h, v0.h[4]
+        mla             \d0\().8h, \s5\().8h, v0.h[5]
+        mla             \d1\().8h, \s7\().8h, v0.h[5]
+        mla             \d0\().8h, \s6\().8h, v0.h[6]
+        mla             \d1\().8h, \s8\().8h, v0.h[6]
+        mla             \d0\().8h, \s7\().8h, v0.h[7]
+        mla             \d1\().8h, \s9\().8h, v0.h[7]
+.endm
+.macro mul_mla_8_4 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11
+        mul             \d0\().8h, \s0\().8h,  v0.h[0]
+        mul             \d1\().8h, \s4\().8h,  v0.h[0]
+        mla             \d0\().8h, \s1\().8h,  v0.h[1]
+        mla             \d1\().8h, \s5\().8h,  v0.h[1]
+        mla             \d0\().8h, \s2\().8h,  v0.h[2]
+        mla             \d1\().8h, \s6\().8h,  v0.h[2]
+        mla             \d0\().8h, \s3\().8h,  v0.h[3]
+        mla             \d1\().8h, \s7\().8h,  v0.h[3]
+        mla             \d0\().8h, \s4\().8h,  v0.h[4]
+        mla             \d1\().8h, \s8\().8h,  v0.h[4]
+        mla             \d0\().8h, \s5\().8h,  v0.h[5]
+        mla             \d1\().8h, \s9\().8h,  v0.h[5]
+        mla             \d0\().8h, \s6\().8h,  v0.h[6]
+        mla             \d1\().8h, \s10\().8h, v0.h[6]
+        mla             \d0\().8h, \s7\().8h,  v0.h[7]
+        mla             \d1\().8h, \s11\().8h, v0.h[7]
+.endm
+.macro sqrshrun_b shift, r0, r1, r2, r3
+        sqrshrun        \r0\().8b, \r0\().8h,  #\shift
+.ifnb \r1
+        sqrshrun        \r1\().8b, \r1\().8h,  #\shift
+.endif
+.ifnb \r2
+        sqrshrun        \r2\().8b, \r2\().8h,  #\shift
+        sqrshrun        \r3\().8b, \r3\().8h,  #\shift
+.endif
+.endm
+.macro srshr_h shift, r0, r1, r2, r3
+        srshr           \r0\().8h, \r0\().8h,  #\shift
+.ifnb \r1
+        srshr           \r1\().8h, \r1\().8h,  #\shift
+.endif
+.ifnb \r2
+        srshr           \r2\().8h, \r2\().8h,  #\shift
+        srshr           \r3\().8h, \r3\().8h,  #\shift
+.endif
+.endm
+.macro st_h strd, reg, lanes
+        st1             {\reg\().h}[0], [x0], \strd
+        st1             {\reg\().h}[1], [x8], \strd
+.if \lanes > 2
+        st1             {\reg\().h}[2], [x0], \strd
+        st1             {\reg\().h}[3], [x8], \strd
+.endif
+.endm
+.macro st_s strd, r0, r1, r2, r3
+        st1             {\r0\().s}[0], [x0], \strd
+        st1             {\r0\().s}[1], [x8], \strd
+.ifnb \r1
+        st1             {\r1\().s}[0], [x0], \strd
+        st1             {\r1\().s}[1], [x8], \strd
+.endif
+.endm
+.macro st_d strd, r0, r1, r2, r3
+        st1             {\r0\().d}[0], [x0], \strd
+        st1             {\r0\().d}[1], [x8], \strd
+.ifnb \r1
+        st1             {\r1\().d}[0], [x0], \strd
+        st1             {\r1\().d}[1], [x8], \strd
+.endif
+.endm
+.macro shift_store_4 type, strd, r0, r1, r2, r3
+.ifc \type, put
+        sqrshrun_b      6,     \r0, \r1, \r2, \r3
+        st_s            \strd, \r0, \r1, \r2, \r3
+.else
+        srshr_h         2,     \r0, \r1, \r2, \r3
+        st_d            \strd, \r0, \r1, \r2, \r3
+.endif
+.endm
+.macro st_reg strd, wd, r0, r1, r2, r3, r4, r5, r6, r7
+        st1             {\r0\wd}, [x0], \strd
+        st1             {\r1\wd}, [x8], \strd
+.ifnb \r2
+        st1             {\r2\wd}, [x0], \strd
+        st1             {\r3\wd}, [x8], \strd
+.endif
+.ifnb \r4
+        st1             {\r4\wd}, [x0], \strd
+        st1             {\r5\wd}, [x8], \strd
+        st1             {\r6\wd}, [x0], \strd
+        st1             {\r7\wd}, [x8], \strd
+.endif
+.endm
+.macro st_8b strd, r0, r1, r2, r3, r4, r5, r6, r7
+        st_reg          \strd, .8b,  \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
+.endm
+.macro st_16b strd, r0, r1, r2, r3, r4, r5, r6, r7
+        st_reg          \strd, .16b, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
+.endm
+.macro shift_store_8 type, strd, r0, r1, r2, r3
+.ifc \type, put
+        sqrshrun_b      6,     \r0, \r1, \r2, \r3
+        st_8b           \strd, \r0, \r1, \r2, \r3
+.else
+        srshr_h         2,     \r0, \r1, \r2, \r3
+        st_16b          \strd, \r0, \r1, \r2, \r3
+.endif
+.endm
+.macro shift_store_16 type, strd, r0, r1, r2, r3
+.ifc \type, put
+        sqrshrun        \r0\().8b,  \r0\().8h, #6
+        sqrshrun2       \r0\().16b, \r1\().8h, #6
+        sqrshrun        \r2\().8b,  \r2\().8h, #6
+        sqrshrun2       \r2\().16b, \r3\().8h, #6
+        st_16b          \strd, \r0, \r2
+.else
+        srshr_h         2,     \r0, \r1, \r2, \r3
+        st1             {\r0\().8h, \r1\().8h}, [x0], \strd
+        st1             {\r2\().8h, \r3\().8h}, [x8], \strd
+.endif
+.endm
+
+.macro make_8tap_fn op, type, type_h, type_v
+function \op\()_8tap_\type\()_8bpc_neon, export=1
+        mov             x8,  \type_h
+        mov             x9,  \type_v
+        b               \op\()_8tap
+endfunc
+.endm
+
+// No spaces in these expressions, due to gas-preprocessor.
+#define REGULAR ((0*15<<7)|3*15)
+#define SMOOTH  ((1*15<<7)|4*15)
+#define SHARP   ((2*15<<7)|3*15)
+
+.macro filter_fn type, dst, d_strd, src, s_strd, w, h, mx, xmx, my, xmy, ds2, sr2, shift_hv
+make_8tap_fn \type, regular,        REGULAR, REGULAR
+make_8tap_fn \type, regular_smooth, REGULAR, SMOOTH
+make_8tap_fn \type, regular_sharp,  REGULAR, SHARP
+make_8tap_fn \type, smooth,         SMOOTH,  SMOOTH
+make_8tap_fn \type, smooth_regular, SMOOTH,  REGULAR
+make_8tap_fn \type, smooth_sharp,   SMOOTH,  SHARP
+make_8tap_fn \type, sharp,          SHARP,   SHARP
+make_8tap_fn \type, sharp_regular,  SHARP,   REGULAR
+make_8tap_fn \type, sharp_smooth,   SHARP,   SMOOTH
+
+function \type\()_8tap
+        mov             w10,  #0x4081  // (1 << 14) | (1 << 7) | (1 << 0)
+        mul             \mx,  \mx, w10
+        mul             \my,  \my, w10
+        add             \mx,  \mx, w8 // mx, 8tap_h, 4tap_h
+        add             \my,  \my, w9 // my, 8tap_v, 4tap_v
+.ifc \type, prep
+        uxtw            \d_strd, \w
+        lsl             \d_strd, \d_strd, #1
+.endif
+
+        clz             w8,  \w
+        tst             \mx, #(0x7f << 14)
+        sub             w8,  w8,  #24
+        movrel          x10, X(mc_subpel_filters), -8
+        b.ne            L(\type\()_8tap_h)
+        tst             \my, #(0x7f << 14)
+        b.ne            L(\type\()_8tap_v)
+        b               \type
+
+L(\type\()_8tap_h):
+        cmp             \w,  #4
+        ubfm            w9,  \mx, #7, #13
+        and             \mx, \mx, #0x7f
+        b.le            4f
+        mov             \mx,  w9
+4:
+        tst             \my,  #(0x7f << 14)
+        add             \xmx, x10, \mx, uxtw #3
+        b.ne            L(\type\()_8tap_hv)
+
+        adr             x9,  L(\type\()_8tap_h_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+20:     // 2xN h
+.ifc \type, put
+        add             \xmx,  \xmx,  #2
+        ld1             {v0.s}[0], [\xmx]
+        sub             \src,  \src,  #1
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd,  \d_strd,  #1
+        lsl             \s_strd,  \s_strd,  #1
+        sxtl            v0.8h,  v0.8b
+2:
+        ld1             {v4.8b},  [\src], \s_strd
+        ld1             {v6.8b},  [\sr2], \s_strd
+        uxtl            v4.8h,  v4.8b
+        uxtl            v6.8h,  v6.8b
+        ext             v5.16b, v4.16b, v4.16b, #2
+        ext             v7.16b, v6.16b, v6.16b, #2
+        subs            \h,  \h,  #2
+        trn1            v3.2s,  v4.2s,  v6.2s
+        trn2            v6.2s,  v4.2s,  v6.2s
+        trn1            v4.2s,  v5.2s,  v7.2s
+        trn2            v7.2s,  v5.2s,  v7.2s
+        mul             v3.4h,  v3.4h,  v0.h[0]
+        mla             v3.4h,  v4.4h,  v0.h[1]
+        mla             v3.4h,  v6.4h,  v0.h[2]
+        mla             v3.4h,  v7.4h,  v0.h[3]
+        srshr           v3.4h,  v3.4h,  #2
+        sqrshrun        v3.8b,  v3.8h,  #4
+        st1             {v3.h}[0], [\dst], \d_strd
+        st1             {v3.h}[1], [\ds2], \d_strd
+        b.gt            2b
+        ret
+.endif
+
+40:     // 4xN h
+        add             \xmx,  \xmx,  #2
+        ld1             {v0.s}[0], [\xmx]
+        sub             \src,  \src,  #1
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd,  \d_strd,  #1
+        lsl             \s_strd,  \s_strd,  #1
+        sxtl            v0.8h,  v0.8b
+4:
+        ld1             {v16.8b}, [\src], \s_strd
+        ld1             {v20.8b}, [\sr2], \s_strd
+        uxtl            v16.8h,  v16.8b
+        uxtl            v20.8h,  v20.8b
+        ext             v17.16b, v16.16b, v16.16b, #2
+        ext             v18.16b, v16.16b, v16.16b, #4
+        ext             v19.16b, v16.16b, v16.16b, #6
+        ext             v21.16b, v20.16b, v20.16b, #2
+        ext             v22.16b, v20.16b, v20.16b, #4
+        ext             v23.16b, v20.16b, v20.16b, #6
+        subs            \h,  \h,  #2
+        mul             v16.4h,  v16.4h,  v0.h[0]
+        mla             v16.4h,  v17.4h,  v0.h[1]
+        mla             v16.4h,  v18.4h,  v0.h[2]
+        mla             v16.4h,  v19.4h,  v0.h[3]
+        mul             v20.4h,  v20.4h,  v0.h[0]
+        mla             v20.4h,  v21.4h,  v0.h[1]
+        mla             v20.4h,  v22.4h,  v0.h[2]
+        mla             v20.4h,  v23.4h,  v0.h[3]
+        srshr           v16.4h,  v16.4h,  #2
+        srshr           v20.4h,  v20.4h,  #2
+.ifc \type, put
+        sqrshrun        v16.8b,  v16.8h,  #4
+        sqrshrun        v20.8b,  v20.8h,  #4
+        st1             {v16.s}[0], [\dst], \d_strd
+        st1             {v20.s}[0], [\ds2], \d_strd
+.else
+        st1             {v16.4h}, [\dst], \d_strd
+        st1             {v20.4h}, [\ds2], \d_strd
+.endif
+        b.gt            4b
+        ret
+
+80:     // 8xN h
+        ld1             {v0.8b}, [\xmx]
+        sub             \src,  \src,  #3
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd,  \d_strd,  #1
+        lsl             \s_strd,  \s_strd,  #1
+        sxtl            v0.8h, v0.8b
+8:
+        ld1             {v16.8b, v17.8b},  [\src], \s_strd
+        ld1             {v20.8b, v21.8b},  [\sr2], \s_strd
+        uxtl            v16.8h,  v16.8b
+        uxtl            v17.8h,  v17.8b
+        uxtl            v20.8h,  v20.8b
+        uxtl            v21.8h,  v21.8b
+
+        mul             v18.8h,  v16.8h,  v0.h[0]
+        mul             v22.8h,  v20.8h,  v0.h[0]
+.irpc i, 1234567
+        ext             v19.16b, v16.16b, v17.16b, #(2*\i)
+        ext             v23.16b, v20.16b, v21.16b, #(2*\i)
+        mla             v18.8h,  v19.8h,  v0.h[\i]
+        mla             v22.8h,  v23.8h,  v0.h[\i]
+.endr
+        subs            \h,  \h,  #2
+        srshr           v18.8h,  v18.8h, #2
+        srshr           v22.8h,  v22.8h, #2
+.ifc \type, put
+        sqrshrun        v18.8b,  v18.8h, #4
+        sqrshrun        v22.8b,  v22.8h, #4
+        st1             {v18.8b}, [\dst], \d_strd
+        st1             {v22.8b}, [\ds2], \d_strd
+.else
+        st1             {v18.8h}, [\dst], \d_strd
+        st1             {v22.8h}, [\ds2], \d_strd
+.endif
+        b.gt            8b
+        ret
+160:
+320:
+640:
+1280:   // 16xN, 32xN, ... h
+        ld1             {v0.8b}, [\xmx]
+        sub             \src,  \src,  #3
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \s_strd,  \s_strd,  #1
+        sxtl            v0.8h, v0.8b
+
+        sub             \s_strd,  \s_strd,  \w, uxtw
+        sub             \s_strd,  \s_strd,  #8
+.ifc \type, put
+        lsl             \d_strd,  \d_strd,  #1
+        sub             \d_strd,  \d_strd,  \w, uxtw
+.endif
+161:
+        ld1             {v16.8b, v17.8b, v18.8b},  [\src], #24
+        ld1             {v20.8b, v21.8b, v22.8b},  [\sr2], #24
+        mov             \mx, \w
+        uxtl            v16.8h,  v16.8b
+        uxtl            v17.8h,  v17.8b
+        uxtl            v18.8h,  v18.8b
+        uxtl            v20.8h,  v20.8b
+        uxtl            v21.8h,  v21.8b
+        uxtl            v22.8h,  v22.8b
+
+16:
+        mul             v24.8h,  v16.8h,  v0.h[0]
+        mul             v25.8h,  v17.8h,  v0.h[0]
+        mul             v26.8h,  v20.8h,  v0.h[0]
+        mul             v27.8h,  v21.8h,  v0.h[0]
+.irpc i, 1234567
+        ext             v28.16b, v16.16b, v17.16b, #(2*\i)
+        ext             v29.16b, v17.16b, v18.16b, #(2*\i)
+        ext             v30.16b, v20.16b, v21.16b, #(2*\i)
+        ext             v31.16b, v21.16b, v22.16b, #(2*\i)
+        mla             v24.8h,  v28.8h,  v0.h[\i]
+        mla             v25.8h,  v29.8h,  v0.h[\i]
+        mla             v26.8h,  v30.8h,  v0.h[\i]
+        mla             v27.8h,  v31.8h,  v0.h[\i]
+.endr
+        srshr           v24.8h,  v24.8h, #2
+        srshr           v25.8h,  v25.8h, #2
+        srshr           v26.8h,  v26.8h, #2
+        srshr           v27.8h,  v27.8h, #2
+        subs            \mx, \mx, #16
+.ifc \type, put
+        sqrshrun        v24.8b,  v24.8h, #4
+        sqrshrun2       v24.16b, v25.8h, #4
+        sqrshrun        v26.8b,  v26.8h, #4
+        sqrshrun2       v26.16b, v27.8h, #4
+        st1             {v24.16b}, [\dst], #16
+        st1             {v26.16b}, [\ds2], #16
+.else
+        st1             {v24.8h, v25.8h}, [\dst], #32
+        st1             {v26.8h, v27.8h}, [\ds2], #32
+.endif
+        b.le            9f
+
+        mov             v16.16b, v18.16b
+        mov             v20.16b, v22.16b
+        ld1             {v17.8b, v18.8b}, [\src], #16
+        ld1             {v21.8b, v22.8b}, [\sr2], #16
+        uxtl            v17.8h,  v17.8b
+        uxtl            v18.8h,  v18.8b
+        uxtl            v21.8h,  v21.8b
+        uxtl            v22.8h,  v22.8b
+        b               16b
+
+9:
+        add             \dst,  \dst,  \d_strd
+        add             \ds2,  \ds2,  \d_strd
+        add             \src,  \src,  \s_strd
+        add             \sr2,  \sr2,  \s_strd
+
+        subs            \h,  \h,  #2
+        b.gt            161b
+        ret
+
+L(\type\()_8tap_h_tbl):
+        .hword L(\type\()_8tap_h_tbl) - 1280b
+        .hword L(\type\()_8tap_h_tbl) -  640b
+        .hword L(\type\()_8tap_h_tbl) -  320b
+        .hword L(\type\()_8tap_h_tbl) -  160b
+        .hword L(\type\()_8tap_h_tbl) -   80b
+        .hword L(\type\()_8tap_h_tbl) -   40b
+        .hword L(\type\()_8tap_h_tbl) -   20b
+        .hword 0
+
+
+L(\type\()_8tap_v):
+        cmp             \h,  #4
+        ubfm            w9,  \my, #7, #13
+        and             \my, \my, #0x7f
+        b.le            4f
+        mov             \my, w9
+4:
+        add             \xmy, x10, \my, uxtw #3
+
+        adr             x9,  L(\type\()_8tap_v_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+20:     // 2xN v
+.ifc \type, put
+        b.gt            28f
+
+        cmp             \h,  #2
+        add             \xmy, \xmy, #2
+        ld1             {v0.s}[0], [\xmy]
+        sub             \src,  \src,  \s_strd
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \s_strd,  \s_strd,  #1
+        lsl             \d_strd,  \d_strd,  #1
+        sxtl            v0.8h, v0.8b
+
+        // 2x2 v
+        load_h          \src, \sr2, \s_strd, v1, v2, v3, v4, v5
+        interleave_1_h  v1, v2, v3, v4, v5
+        b.gt            24f
+        uxtl_b          v1, v2, v3, v4
+        mul_mla_4       v6, v1, v2, v3, v4, .4h
+        sqrshrun_b      6,  v6
+        st_h            \d_strd, v6, 2
+        ret
+
+24:     // 2x4 v
+        load_h          \sr2, \src, \s_strd, v6, v7
+        interleave_1_h  v5, v6, v7
+        interleave_2_s  v1, v2, v3, v4, v5, v6
+        uxtl_b          v1, v2, v3, v4
+        mul_mla_4       v6, v1, v2, v3, v4, .8h
+        sqrshrun_b      6,  v6
+        st_h            \d_strd, v6, 4
+        ret
+
+28:     // 2x8, 2x16 v
+        ld1             {v0.8b}, [\xmy]
+        sub             \sr2,  \src,  \s_strd, lsl #1
+        add             \ds2,  \dst,  \d_strd
+        sub             \src,  \sr2,  \s_strd
+        lsl             \d_strd,  \d_strd,  #1
+        lsl             \s_strd,  \s_strd,  #1
+        sxtl            v0.8h, v0.8b
+
+        load_h          \src, \sr2, \s_strd, v1,  v2,  v3,  v4, v5, v6, v7
+        interleave_1_h  v1,  v2,  v3,  v4,  v5
+        interleave_1_h  v5,  v6,  v7
+        interleave_2_s  v1,  v2,  v3,  v4,  v5,  v6
+        uxtl_b          v1,  v2,  v3,  v4
+216:
+        subs            \h,  \h,  #8
+        load_h          \sr2, \src, \s_strd, v16, v17, v18, v19
+        load_h          \sr2, \src, \s_strd, v20, v21, v22, v23
+        interleave_1_h  v7,  v16, v17, v18, v19
+        interleave_1_h  v19, v20, v21, v22, v23
+        interleave_2_s  v5,  v6,  v7,  v16, v17, v18
+        interleave_2_s  v17, v18, v19, v20, v21, v22
+        uxtl_b          v5,  v6,  v7,  v16
+        uxtl_b          v17, v18, v19, v20
+        mul_mla_8_4     v30, v31, v1,  v2,  v3,  v4,  v5,  v6,  v7,  v16, v17, v18, v19, v20
+        sqrshrun_b      6,   v30, v31
+        st_h            \d_strd, v30, 4
+        st_h            \d_strd, v31, 4
+        b.le            0f
+        mov             v1.16b,  v17.16b
+        mov             v2.16b,  v18.16b
+        mov             v3.16b,  v19.16b
+        mov             v4.16b,  v20.16b
+        mov             v5.16b,  v21.16b
+        mov             v6.16b,  v22.16b
+        mov             v7.16b,  v23.16b
+        b               216b
+0:
+        ret
+.endif
+
+40:
+        b.gt            480f
+
+        // 4x2, 4x4 v
+        cmp             \h,  #2
+        add             \xmy, \xmy, #2
+        ld1             {v0.s}[0], [\xmy]
+        sub             \src, \src, \s_strd
+        add             \ds2, \dst, \d_strd
+        add             \sr2, \src, \s_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h, v0.8b
+
+        load_s          \src, \sr2, \s_strd, v1, v2, v3, v4, v5
+        interleave_1_s  v1, v2, v3, v4, v5
+        uxtl_b          v1, v2, v3, v4
+        mul_mla_4       v6, v1, v2, v3, v4, .8h
+        shift_store_4   \type, \d_strd, v6
+        b.le            0f
+        load_s          \sr2, \src, \s_strd, v6, v7
+        interleave_1_s  v5, v6, v7
+        uxtl_b          v5, v6
+        mul_mla_4       v7, v3, v4, v5, v6, .8h
+        shift_store_4   \type, \d_strd, v7
+0:
+        ret
+
+480:    // 4x8, 4x16 v
+        ld1             {v0.8b}, [\xmy]
+        sub             \sr2, \src, \s_strd, lsl #1
+        add             \ds2, \dst, \d_strd
+        sub             \src, \sr2, \s_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h, v0.8b
+
+        load_s          \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22
+        interleave_1_s  v16, v17, v18
+        interleave_1_s  v18, v19, v20, v21, v22
+        uxtl_b          v16, v17
+        uxtl_b          v18, v19, v20, v21
+
+48:
+        subs            \h,  \h,  #4
+        load_s          \sr2, \src, \s_strd, v23, v24, v25, v26
+        interleave_1_s  v22, v23, v24, v25, v26
+        uxtl_b          v22, v23, v24, v25
+        mul_mla_8_2     v1,  v2,  v16, v17, v18, v19, v20, v21, v22, v23, v24, v25
+        shift_store_4   \type, \d_strd, v1, v2
+        b.le            0f
+        subs            \h,  \h,  #4
+        load_s          \sr2,  \src, \s_strd, v27, v16, v17, v18
+        interleave_1_s  v26, v27, v16, v17, v18
+        uxtl_b          v26, v27, v16, v17
+        mul_mla_8_2     v1,  v2,  v20, v21, v22, v23, v24, v25, v26, v27, v16, v17
+        shift_store_4   \type, \d_strd, v1, v2
+        b.le            0f
+        subs            \h,  \h,  #4
+        load_s          \sr2, \src, \s_strd, v19, v20, v21, v22
+        interleave_1_s  v18, v19, v20, v21, v22
+        uxtl_b          v18, v19, v20, v21
+        mul_mla_8_2     v1,  v2,  v24, v25, v26, v27, v16, v17, v18, v19, v20, v21
+        shift_store_4   \type, \d_strd, v1, v2
+        b               48b
+0:
+        ret
+
+80:
+        b.gt            880f
+
+        // 8x2, 8x4 v
+        cmp             \h,  #2
+        add             \xmy, \xmy, #2
+        ld1             {v0.s}[0], [\xmy]
+        sub             \src, \src, \s_strd
+        add             \ds2, \dst, \d_strd
+        add             \sr2, \src, \s_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h, v0.8b
+
+        load_8b         \src, \sr2, \s_strd, v1, v2, v3, v4, v5
+        uxtl_b          v1, v2, v3, v4, v5
+        mul_mla_4       v6, v1, v2, v3, v4, .8h
+        mul_mla_4       v7, v2, v3, v4, v5, .8h
+        shift_store_8   \type, \d_strd, v6, v7
+        b.le            0f
+        load_8b         \sr2, \src, \s_strd, v6, v7
+        uxtl_b          v6, v7
+        mul_mla_4       v1, v3, v4, v5, v6, .8h
+        mul_mla_4       v2, v4, v5, v6, v7, .8h
+        shift_store_8   \type, \d_strd, v1, v2
+0:
+        ret
+
+880:    // 8x8, 8x16, 8x32 v
+1680:   // 16x8, 16x16, ...
+320:    // 32x8, 32x16, ...
+640:
+1280:
+        ld1             {v0.8b}, [\xmy]
+        sub             \src, \src, \s_strd
+        sub             \src, \src, \s_strd, lsl #1
+        sxtl            v0.8h, v0.8b
+        mov             \my,  \h
+168:
+        add             \ds2, \dst, \d_strd
+        add             \sr2, \src, \s_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+
+        load_8b         \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22
+        uxtl_b          v16, v17, v18, v19, v20, v21, v22
+
+88:
+        subs            \h,  \h,  #2
+        load_8b         \sr2, \src, \s_strd, v23, v24
+        uxtl_b          v23, v24
+        mul_mla_8_1     v1,  v2,  v16, v17, v18, v19, v20, v21, v22, v23, v24
+        shift_store_8   \type, \d_strd, v1, v2
+        b.le            9f
+        subs            \h,  \h,  #2
+        load_8b         \sr2, \src, \s_strd, v25, v26
+        uxtl_b          v25, v26
+        mul_mla_8_1     v3,  v4,  v18, v19, v20, v21, v22, v23, v24, v25, v26
+        shift_store_8   \type, \d_strd, v3, v4
+        b.le            9f
+        subs            \h,  \h,  #4
+        load_8b         \sr2, \src, \s_strd, v27, v16, v17, v18
+        uxtl_b          v27, v16, v17, v18
+        mul_mla_8_1     v1,  v2,  v20, v21, v22, v23, v24, v25, v26, v27, v16
+        mul_mla_8_1     v3,  v4,  v22, v23, v24, v25, v26, v27, v16, v17, v18
+        shift_store_8   \type, \d_strd, v1, v2, v3, v4
+        b.le            9f
+        subs            \h,  \h,  #4
+        load_8b         \sr2, \src, \s_strd, v19, v20, v21, v22
+        uxtl_b          v19, v20, v21, v22
+        mul_mla_8_1     v1,  v2,  v24, v25, v26, v27, v16, v17, v18, v19, v20
+        mul_mla_8_1     v3,  v4,  v26, v27, v16, v17, v18, v19, v20, v21, v22
+        shift_store_8   \type, \d_strd, v1, v2, v3, v4
+        b.gt            88b
+9:
+        subs            \w,  \w,  #8
+        b.le            0f
+        asr             \s_strd, \s_strd, #1
+        asr             \d_strd, \d_strd, #1
+        msub            \src, \s_strd, \xmy, \src
+        msub            \dst, \d_strd, \xmy, \dst
+        sub             \src, \src, \s_strd, lsl #3
+        mov             \h,  \my
+        add             \src, \src, #8
+.ifc \type, put
+        add             \dst, \dst, #8
+.else
+        add             \dst, \dst, #16
+.endif
+        b               168b
+0:
+        ret
+
+160:
+        b.gt            1680b
+
+        // 16x4 v
+        add             \xmy, \xmy, #2
+        ld1             {v0.s}[0], [\xmy]
+        sub             \src, \src, \s_strd
+        add             \ds2, \dst, \d_strd
+        add             \sr2, \src, \s_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h, v0.8b
+
+        cmp             \h,  #2
+        load_16b        \src, \sr2, \s_strd, v1,  v2,  v3,  v4,  v5
+        uxtl            v16.8h, v1.8b
+        uxtl            v17.8h, v2.8b
+        uxtl            v18.8h, v3.8b
+        uxtl            v19.8h, v4.8b
+        uxtl            v20.8h, v5.8b
+        uxtl2           v23.8h, v1.16b
+        uxtl2           v24.8h, v2.16b
+        uxtl2           v25.8h, v3.16b
+        uxtl2           v26.8h, v4.16b
+        uxtl2           v27.8h, v5.16b
+        mul_mla_4       v1,  v16, v17, v18, v19, .8h
+        mul_mla_4       v16, v17, v18, v19, v20, .8h
+        mul_mla_4       v2,  v23, v24, v25, v26, .8h
+        mul_mla_4       v17, v24, v25, v26, v27, .8h
+        shift_store_16  \type, \d_strd, v1, v2, v16, v17
+        b.le            0f
+        load_16b        \sr2, \src, \s_strd, v6,  v7
+        uxtl            v21.8h, v6.8b
+        uxtl            v22.8h, v7.8b
+        uxtl2           v28.8h, v6.16b
+        uxtl2           v29.8h, v7.16b
+        mul_mla_4       v1,  v18, v19, v20, v21, .8h
+        mul_mla_4       v3,  v19, v20, v21, v22, .8h
+        mul_mla_4       v2,  v25, v26, v27, v28, .8h
+        mul_mla_4       v4,  v26, v27, v28, v29, .8h
+        shift_store_16  \type, \d_strd, v1, v2, v3, v4
+0:
+        ret
+
+L(\type\()_8tap_v_tbl):
+        .hword L(\type\()_8tap_v_tbl) - 1280b
+        .hword L(\type\()_8tap_v_tbl) -  640b
+        .hword L(\type\()_8tap_v_tbl) -  320b
+        .hword L(\type\()_8tap_v_tbl) -  160b
+        .hword L(\type\()_8tap_v_tbl) -   80b
+        .hword L(\type\()_8tap_v_tbl) -   40b
+        .hword L(\type\()_8tap_v_tbl) -   20b
+        .hword 0
+
+L(\type\()_8tap_hv):
+        cmp             \h,  #4
+        ubfm            w9,  \my, #7, #13
+        and             \my, \my, #0x7f
+        b.le            4f
+        mov             \my,  w9
+4:
+        add             \xmy,  x10, \my, uxtw #3
+
+        adr             x9,  L(\type\()_8tap_hv_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+20:
+.ifc \type, put
+        add             \xmx,  \xmx,  #2
+        ld1             {v0.s}[0],  [\xmx]
+        b.gt            280f
+        add             \xmy,  \xmy,  #2
+        ld1             {v1.s}[0],  [\xmy]
+
+        // 2x2, 2x4 hv
+        sub             \sr2, \src, #1
+        sub             \src, \sr2, \s_strd
+        add             \ds2, \dst, \d_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h,  v0.8b
+        sxtl            v1.8h,  v1.8b
+        mov             x15, x30
+
+        ld1             {v28.8b}, [\src], \s_strd
+        uxtl            v28.8h,  v28.8b
+        ext             v29.16b, v28.16b, v28.16b, #2
+        mul             v28.4h,  v28.4h,  v0.4h
+        mul             v29.4h,  v29.4h,  v0.4h
+        addv            h28, v28.4h
+        addv            h29, v29.4h
+        trn1            v16.4h, v28.4h, v29.4h
+        srshr           v16.4h, v16.4h, #2
+        bl              L(\type\()_8tap_filter_2)
+
+        trn1            v16.2s, v16.2s, v28.2s
+        trn1            v17.2s, v28.2s, v30.2s
+        mov             v18.8b, v30.8b
+
+2:
+        bl              L(\type\()_8tap_filter_2)
+
+        trn1            v18.2s, v18.2s, v28.2s
+        trn1            v19.2s, v28.2s, v30.2s
+        smull           v2.4s,  v16.4h, v1.h[0]
+        smlal           v2.4s,  v17.4h, v1.h[1]
+        smlal           v2.4s,  v18.4h, v1.h[2]
+        smlal           v2.4s,  v19.4h, v1.h[3]
+
+        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
+        sqxtun          v2.8b,  v2.8h
+        subs            \h,  \h,  #2
+        st1             {v2.h}[0], [\dst], \d_strd
+        st1             {v2.h}[1], [\ds2], \d_strd
+        b.le            0f
+        mov             v16.8b, v18.8b
+        mov             v17.8b, v19.8b
+        mov             v18.8b, v30.8b
+        b               2b
+
+280:    // 2x8, 2x16, 2x32 hv
+        ld1             {v1.8b},  [\xmy]
+        sub             \src, \src, #1
+        sub             \sr2, \src, \s_strd, lsl #1
+        sub             \src, \sr2, \s_strd
+        add             \ds2, \dst, \d_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h,  v0.8b
+        sxtl            v1.8h,  v1.8b
+        mov             x15, x30
+
+        ld1             {v28.8b}, [\src], \s_strd
+        uxtl            v28.8h,  v28.8b
+        ext             v29.16b, v28.16b, v28.16b, #2
+        mul             v28.4h,  v28.4h,  v0.4h
+        mul             v29.4h,  v29.4h,  v0.4h
+        addv            h28, v28.4h
+        addv            h29, v29.4h
+        trn1            v16.4h, v28.4h, v29.4h
+        srshr           v16.4h, v16.4h, #2
+
+        bl              L(\type\()_8tap_filter_2)
+        trn1            v16.2s, v16.2s, v28.2s
+        trn1            v17.2s, v28.2s, v30.2s
+        mov             v18.8b, v30.8b
+        bl              L(\type\()_8tap_filter_2)
+        trn1            v18.2s, v18.2s, v28.2s
+        trn1            v19.2s, v28.2s, v30.2s
+        mov             v20.8b, v30.8b
+        bl              L(\type\()_8tap_filter_2)
+        trn1            v20.2s, v20.2s, v28.2s
+        trn1            v21.2s, v28.2s, v30.2s
+        mov             v22.8b, v30.8b
+
+28:
+        bl              L(\type\()_8tap_filter_2)
+        trn1            v22.2s, v22.2s, v28.2s
+        trn1            v23.2s, v28.2s, v30.2s
+        smull           v2.4s,  v16.4h, v1.h[0]
+        smlal           v2.4s,  v17.4h, v1.h[1]
+        smlal           v2.4s,  v18.4h, v1.h[2]
+        smlal           v2.4s,  v19.4h, v1.h[3]
+        smlal           v2.4s,  v20.4h, v1.h[4]
+        smlal           v2.4s,  v21.4h, v1.h[5]
+        smlal           v2.4s,  v22.4h, v1.h[6]
+        smlal           v2.4s,  v23.4h, v1.h[7]
+
+        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
+        sqxtun          v2.8b,  v2.8h
+        subs            \h,  \h,  #2
+        st1             {v2.h}[0], [\dst], \d_strd
+        st1             {v2.h}[1], [\ds2], \d_strd
+        b.le            0f
+        mov             v16.8b, v18.8b
+        mov             v17.8b, v19.8b
+        mov             v18.8b, v20.8b
+        mov             v19.8b, v21.8b
+        mov             v20.8b, v22.8b
+        mov             v21.8b, v23.8b
+        mov             v22.8b, v30.8b
+        b               28b
+
+0:
+        br              x15
+
+L(\type\()_8tap_filter_2):
+        ld1             {v28.8b},  [\sr2], \s_strd
+        ld1             {v30.8b},  [\src], \s_strd
+        uxtl            v28.8h,  v28.8b
+        uxtl            v30.8h,  v30.8b
+        ext             v29.16b, v28.16b, v28.16b, #2
+        ext             v31.16b, v30.16b, v30.16b, #2
+        trn1            v27.2s,  v28.2s,  v30.2s
+        trn2            v30.2s,  v28.2s,  v30.2s
+        trn1            v28.2s,  v29.2s,  v31.2s
+        trn2            v31.2s,  v29.2s,  v31.2s
+        mul             v27.4h,  v27.4h,  v0.h[0]
+        mla             v27.4h,  v28.4h,  v0.h[1]
+        mla             v27.4h,  v30.4h,  v0.h[2]
+        mla             v27.4h,  v31.4h,  v0.h[3]
+        srshr           v28.4h,  v27.4h,  #2
+        trn2            v30.2s,  v28.2s,  v28.2s
+        ret
+.endif
+
+40:
+        add             \xmx, \xmx, #2
+        ld1             {v0.s}[0],  [\xmx]
+        b.gt            480f
+        add             \xmy, \xmy,  #2
+        ld1             {v1.s}[0],  [\xmy]
+        sub             \sr2, \src, #1
+        sub             \src, \sr2, \s_strd
+        add             \ds2, \dst, \d_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h,  v0.8b
+        sxtl            v1.8h,  v1.8b
+        mov             x15, x30
+
+        // 4x2, 4x4 hv
+        ld1             {v26.8b}, [\src], \s_strd
+        uxtl            v26.8h,  v26.8b
+        ext             v28.16b, v26.16b, v26.16b, #2
+        ext             v29.16b, v26.16b, v26.16b, #4
+        ext             v30.16b, v26.16b, v26.16b, #6
+        mul             v31.4h,  v26.4h,  v0.h[0]
+        mla             v31.4h,  v28.4h,  v0.h[1]
+        mla             v31.4h,  v29.4h,  v0.h[2]
+        mla             v31.4h,  v30.4h,  v0.h[3]
+        srshr           v16.4h,  v31.4h,  #2
+
+        bl              L(\type\()_8tap_filter_4)
+        mov             v17.8b, v28.8b
+        mov             v18.8b, v29.8b
+
+4:
+        smull           v2.4s,  v16.4h, v1.h[0]
+        bl              L(\type\()_8tap_filter_4)
+        smull           v3.4s,  v17.4h, v1.h[0]
+        smlal           v2.4s,  v17.4h, v1.h[1]
+        smlal           v3.4s,  v18.4h, v1.h[1]
+        smlal           v2.4s,  v18.4h, v1.h[2]
+        smlal           v3.4s,  v28.4h, v1.h[2]
+        smlal           v2.4s,  v28.4h, v1.h[3]
+        smlal           v3.4s,  v29.4h, v1.h[3]
+        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
+        sqrshrn         v3.4h,  v3.4s,  #\shift_hv
+        subs            \h,  \h,  #2
+.ifc \type, put
+        sqxtun          v2.8b,  v2.8h
+        sqxtun          v3.8b,  v3.8h
+        st1             {v2.s}[0], [\dst], \d_strd
+        st1             {v3.s}[0], [\ds2], \d_strd
+.else
+        st1             {v2.4h}, [\dst], \d_strd
+        st1             {v3.4h}, [\ds2], \d_strd
+.endif
+        b.le            0f
+        mov             v16.16b, v18.16b
+        mov             v17.16b, v28.16b
+        mov             v18.16b, v29.16b
+        b               4b
+
+480:    // 4x8, 4x16, 4x32 hv
+        ld1             {v1.8b},  [\xmy]
+        sub             \src, \src, #1
+        sub             \sr2, \src, \s_strd, lsl #1
+        sub             \src, \sr2, \s_strd
+        add             \ds2, \dst, \d_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+        sxtl            v0.8h,  v0.8b
+        sxtl            v1.8h,  v1.8b
+        mov             x15, x30
+
+        ld1             {v26.8b}, [\src], \s_strd
+        uxtl            v26.8h,  v26.8b
+        ext             v28.16b, v26.16b, v26.16b, #2
+        ext             v29.16b, v26.16b, v26.16b, #4
+        ext             v30.16b, v26.16b, v26.16b, #6
+        mul             v31.4h,  v26.4h,  v0.h[0]
+        mla             v31.4h,  v28.4h,  v0.h[1]
+        mla             v31.4h,  v29.4h,  v0.h[2]
+        mla             v31.4h,  v30.4h,  v0.h[3]
+        srshr           v16.4h,  v31.4h,  #2
+
+        bl              L(\type\()_8tap_filter_4)
+        mov             v17.8b, v28.8b
+        mov             v18.8b, v29.8b
+        bl              L(\type\()_8tap_filter_4)
+        mov             v19.8b, v28.8b
+        mov             v20.8b, v29.8b
+        bl              L(\type\()_8tap_filter_4)
+        mov             v21.8b, v28.8b
+        mov             v22.8b, v29.8b
+
+48:
+        smull           v2.4s,  v16.4h, v1.h[0]
+        bl              L(\type\()_8tap_filter_4)
+        smull           v3.4s,  v17.4h, v1.h[0]
+        smlal           v2.4s,  v17.4h, v1.h[1]
+        smlal           v3.4s,  v18.4h, v1.h[1]
+        smlal           v2.4s,  v18.4h, v1.h[2]
+        smlal           v3.4s,  v19.4h, v1.h[2]
+        smlal           v2.4s,  v19.4h, v1.h[3]
+        smlal           v3.4s,  v20.4h, v1.h[3]
+        smlal           v2.4s,  v20.4h, v1.h[4]
+        smlal           v3.4s,  v21.4h, v1.h[4]
+        smlal           v2.4s,  v21.4h, v1.h[5]
+        smlal           v3.4s,  v22.4h, v1.h[5]
+        smlal           v2.4s,  v22.4h, v1.h[6]
+        smlal           v3.4s,  v28.4h, v1.h[6]
+        smlal           v2.4s,  v28.4h, v1.h[7]
+        smlal           v3.4s,  v29.4h, v1.h[7]
+        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
+        sqrshrn         v3.4h,  v3.4s,  #\shift_hv
+        subs            \h,  \h,  #2
+.ifc \type, put
+        sqxtun          v2.8b,  v2.8h
+        sqxtun          v3.8b,  v3.8h
+        st1             {v2.s}[0], [\dst], \d_strd
+        st1             {v3.s}[0], [\ds2], \d_strd
+.else
+        st1             {v2.4h}, [\dst], \d_strd
+        st1             {v3.4h}, [\ds2], \d_strd
+.endif
+        b.le            0f
+        mov             v16.8b,  v18.8b
+        mov             v17.8b,  v19.8b
+        mov             v18.8b,  v20.8b
+        mov             v19.8b,  v21.8b
+        mov             v20.8b,  v22.8b
+        mov             v21.8b,  v28.8b
+        mov             v22.8b,  v29.8b
+        b               48b
+0:
+        br              x15
+
+L(\type\()_8tap_filter_4):
+        ld1             {v26.8b}, [\sr2], \s_strd
+        ld1             {v27.8b}, [\src], \s_strd
+        uxtl            v26.8h,  v26.8b
+        uxtl            v27.8h,  v27.8b
+        ext             v28.16b, v26.16b, v26.16b, #2
+        ext             v29.16b, v26.16b, v26.16b, #4
+        ext             v30.16b, v26.16b, v26.16b, #6
+        mul             v31.4h,  v26.4h,  v0.h[0]
+        mla             v31.4h,  v28.4h,  v0.h[1]
+        mla             v31.4h,  v29.4h,  v0.h[2]
+        mla             v31.4h,  v30.4h,  v0.h[3]
+        ext             v28.16b, v27.16b, v27.16b, #2
+        ext             v29.16b, v27.16b, v27.16b, #4
+        ext             v30.16b, v27.16b, v27.16b, #6
+        mul             v27.4h,  v27.4h,  v0.h[0]
+        mla             v27.4h,  v28.4h,  v0.h[1]
+        mla             v27.4h,  v29.4h,  v0.h[2]
+        mla             v27.4h,  v30.4h,  v0.h[3]
+        srshr           v28.4h,  v31.4h,  #2
+        srshr           v29.4h,  v27.4h,  #2
+        ret
+
+80:
+160:
+320:
+        b.gt            880f
+        add             \xmy,  \xmy,  #2
+        ld1             {v0.8b},  [\xmx]
+        ld1             {v1.s}[0],  [\xmy]
+        sub             \src,  \src,  #3
+        sub             \src,  \src,  \s_strd
+        sxtl            v0.8h,  v0.8b
+        sxtl            v1.8h,  v1.8b
+        mov             x15, x30
+        mov             \my,  \h
+
+164:    // 8x2, 8x4, 16x2, 16x4, 32x2, 32x4 hv
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd, \d_strd, #1
+        lsl             \s_strd, \s_strd, #1
+
+        ld1             {v28.8b, v29.8b},  [\src], \s_strd
+        uxtl            v28.8h,  v28.8b
+        uxtl            v29.8h,  v29.8b
+        mul             v24.8h,  v28.8h,  v0.h[0]
+.irpc i, 1234567
+        ext             v26.16b, v28.16b, v29.16b, #(2*\i)
+        mla             v24.8h,  v26.8h,  v0.h[\i]
+.endr
+        srshr           v16.8h,  v24.8h, #2
+
+        bl              L(\type\()_8tap_filter_8)
+        mov             v17.16b, v24.16b
+        mov             v18.16b, v25.16b
+
+8:
+        smull           v2.4s,  v16.4h, v1.h[0]
+        smull2          v3.4s,  v16.8h, v1.h[0]
+        bl              L(\type\()_8tap_filter_8)
+        smull           v4.4s,  v17.4h, v1.h[0]
+        smull2          v5.4s,  v17.8h, v1.h[0]
+        smlal           v2.4s,  v17.4h, v1.h[1]
+        smlal2          v3.4s,  v17.8h, v1.h[1]
+        smlal           v4.4s,  v18.4h, v1.h[1]
+        smlal2          v5.4s,  v18.8h, v1.h[1]
+        smlal           v2.4s,  v18.4h, v1.h[2]
+        smlal2          v3.4s,  v18.8h, v1.h[2]
+        smlal           v4.4s,  v24.4h, v1.h[2]
+        smlal2          v5.4s,  v24.8h, v1.h[2]
+        smlal           v2.4s,  v24.4h, v1.h[3]
+        smlal2          v3.4s,  v24.8h, v1.h[3]
+        smlal           v4.4s,  v25.4h, v1.h[3]
+        smlal2          v5.4s,  v25.8h, v1.h[3]
+        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
+        sqrshrn2        v2.8h,  v3.4s,  #\shift_hv
+        sqrshrn         v4.4h,  v4.4s,  #\shift_hv
+        sqrshrn2        v4.8h,  v5.4s,  #\shift_hv
+        subs            \h,  \h,  #2
+.ifc \type, put
+        sqxtun          v2.8b,  v2.8h
+        sqxtun          v4.8b,  v4.8h
+        st1             {v2.8b}, [\dst], \d_strd
+        st1             {v4.8b}, [\ds2], \d_strd
+.else
+        st1             {v2.8h}, [\dst], \d_strd
+        st1             {v4.8h}, [\ds2], \d_strd
+.endif
+        b.le            9f
+        mov             v16.16b, v18.16b
+        mov             v17.16b, v24.16b
+        mov             v18.16b, v25.16b
+        b               8b
+9:
+        subs            \w,  \w,  #8
+        b.le            0f
+        asr             \s_strd,  \s_strd,  #1
+        asr             \d_strd,  \d_strd,  #1
+        msub            \src,  \s_strd,  \xmy,  \src
+        msub            \dst,  \d_strd,  \xmy,  \dst
+        sub             \src,  \src,  \s_strd,  lsl #2
+        mov             \h,  \my
+        add             \src,  \src,  #8
+.ifc \type, put
+        add             \dst,  \dst,  #8
+.else
+        add             \dst,  \dst,  #16
+.endif
+        b               164b
+
+880:    // 8x8, 8x16, ..., 16x8, ..., 32x8, ... hv
+640:
+1280:
+        ld1             {v0.8b},  [\xmx]
+        ld1             {v1.8b},  [\xmy]
+        sub             \src,  \src,  #3
+        sub             \src,  \src,  \s_strd
+        sub             \src,  \src,  \s_strd, lsl #1
+        sxtl            v0.8h,  v0.8b
+        sxtl            v1.8h,  v1.8b
+        mov             x15, x30
+        mov             \my,  \h
+
+168:
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd, \d_strd, #1
+        lsl             \s_strd, \s_strd, #1
+
+        ld1             {v28.8b, v29.8b},  [\src], \s_strd
+        uxtl            v28.8h,  v28.8b
+        uxtl            v29.8h,  v29.8b
+        mul             v24.8h,  v28.8h,  v0.h[0]
+.irpc i, 1234567
+        ext             v26.16b, v28.16b, v29.16b, #(2*\i)
+        mla             v24.8h,  v26.8h,  v0.h[\i]
+.endr
+        srshr           v16.8h,  v24.8h, #2
+
+        bl              L(\type\()_8tap_filter_8)
+        mov             v17.16b, v24.16b
+        mov             v18.16b, v25.16b
+        bl              L(\type\()_8tap_filter_8)
+        mov             v19.16b, v24.16b
+        mov             v20.16b, v25.16b
+        bl              L(\type\()_8tap_filter_8)
+        mov             v21.16b, v24.16b
+        mov             v22.16b, v25.16b
+
+88:
+        smull           v2.4s,  v16.4h, v1.h[0]
+        smull2          v3.4s,  v16.8h, v1.h[0]
+        bl              L(\type\()_8tap_filter_8)
+        smull           v4.4s,  v17.4h, v1.h[0]
+        smull2          v5.4s,  v17.8h, v1.h[0]
+        smlal           v2.4s,  v17.4h, v1.h[1]
+        smlal2          v3.4s,  v17.8h, v1.h[1]
+        smlal           v4.4s,  v18.4h, v1.h[1]
+        smlal2          v5.4s,  v18.8h, v1.h[1]
+        smlal           v2.4s,  v18.4h, v1.h[2]
+        smlal2          v3.4s,  v18.8h, v1.h[2]
+        smlal           v4.4s,  v19.4h, v1.h[2]
+        smlal2          v5.4s,  v19.8h, v1.h[2]
+        smlal           v2.4s,  v19.4h, v1.h[3]
+        smlal2          v3.4s,  v19.8h, v1.h[3]
+        smlal           v4.4s,  v20.4h, v1.h[3]
+        smlal2          v5.4s,  v20.8h, v1.h[3]
+        smlal           v2.4s,  v20.4h, v1.h[4]
+        smlal2          v3.4s,  v20.8h, v1.h[4]
+        smlal           v4.4s,  v21.4h, v1.h[4]
+        smlal2          v5.4s,  v21.8h, v1.h[4]
+        smlal           v2.4s,  v21.4h, v1.h[5]
+        smlal2          v3.4s,  v21.8h, v1.h[5]
+        smlal           v4.4s,  v22.4h, v1.h[5]
+        smlal2          v5.4s,  v22.8h, v1.h[5]
+        smlal           v2.4s,  v22.4h, v1.h[6]
+        smlal2          v3.4s,  v22.8h, v1.h[6]
+        smlal           v4.4s,  v24.4h, v1.h[6]
+        smlal2          v5.4s,  v24.8h, v1.h[6]
+        smlal           v2.4s,  v24.4h, v1.h[7]
+        smlal2          v3.4s,  v24.8h, v1.h[7]
+        smlal           v4.4s,  v25.4h, v1.h[7]
+        smlal2          v5.4s,  v25.8h, v1.h[7]
+        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
+        sqrshrn2        v2.8h,  v3.4s,  #\shift_hv
+        sqrshrn         v4.4h,  v4.4s,  #\shift_hv
+        sqrshrn2        v4.8h,  v5.4s,  #\shift_hv
+        subs            \h,  \h,  #2
+.ifc \type, put
+        sqxtun          v2.8b,  v2.8h
+        sqxtun          v4.8b,  v4.8h
+        st1             {v2.8b}, [\dst], \d_strd
+        st1             {v4.8b}, [\ds2], \d_strd
+.else
+        st1             {v2.8h}, [\dst], \d_strd
+        st1             {v4.8h}, [\ds2], \d_strd
+.endif
+        b.le            9f
+        mov             v16.16b, v18.16b
+        mov             v17.16b, v19.16b
+        mov             v18.16b, v20.16b
+        mov             v19.16b, v21.16b
+        mov             v20.16b, v22.16b
+        mov             v21.16b, v24.16b
+        mov             v22.16b, v25.16b
+        b               88b
+9:
+        subs            \w,  \w,  #8
+        b.le            0f
+        asr             \s_strd,  \s_strd,  #1
+        asr             \d_strd,  \d_strd,  #1
+        msub            \src,  \s_strd,  \xmy,  \src
+        msub            \dst,  \d_strd,  \xmy,  \dst
+        sub             \src,  \src,  \s_strd,  lsl #3
+        mov             \h,  \my
+        add             \src,  \src,  #8
+.ifc \type, put
+        add             \dst,  \dst,  #8
+.else
+        add             \dst,  \dst,  #16
+.endif
+        b               168b
+0:
+        br              x15
+
+L(\type\()_8tap_filter_8):
+        ld1             {v28.8b, v29.8b},  [\sr2], \s_strd
+        ld1             {v30.8b, v31.8b},  [\src], \s_strd
+        uxtl            v28.8h,  v28.8b
+        uxtl            v29.8h,  v29.8b
+        uxtl            v30.8h,  v30.8b
+        uxtl            v31.8h,  v31.8b
+        mul             v24.8h,  v28.8h,  v0.h[0]
+        mul             v25.8h,  v30.8h,  v0.h[0]
+.irpc i, 1234567
+        ext             v26.16b, v28.16b, v29.16b, #(2*\i)
+        ext             v27.16b, v30.16b, v31.16b, #(2*\i)
+        mla             v24.8h,  v26.8h,  v0.h[\i]
+        mla             v25.8h,  v27.8h,  v0.h[\i]
+.endr
+        srshr           v24.8h,  v24.8h, #2
+        srshr           v25.8h,  v25.8h, #2
+        ret
+
+L(\type\()_8tap_hv_tbl):
+        .hword L(\type\()_8tap_hv_tbl) - 1280b
+        .hword L(\type\()_8tap_hv_tbl) -  640b
+        .hword L(\type\()_8tap_hv_tbl) -  320b
+        .hword L(\type\()_8tap_hv_tbl) -  160b
+        .hword L(\type\()_8tap_hv_tbl) -   80b
+        .hword L(\type\()_8tap_hv_tbl) -   40b
+        .hword L(\type\()_8tap_hv_tbl) -   20b
+        .hword 0
+endfunc
+
+
+function \type\()_bilin_8bpc_neon, export=1
+        dup             v1.16b, \mx
+        dup             v3.16b, \my
+        mov             w9,  #16
+        sub             w8, w9, \mx
+        sub             w9, w9, \my
+        dup             v0.16b, w8
+        dup             v2.16b, w9
+.ifc \type, prep
+        uxtw            \d_strd, \w
+        lsl             \d_strd, \d_strd, #1
+.endif
+
+        clz             w8,  \w
+        sub             w8,  w8,  #24
+        cbnz            \mx, L(\type\()_bilin_h)
+        cbnz            \my, L(\type\()_bilin_v)
+        b               \type
+
+L(\type\()_bilin_h):
+        cbnz            \my, L(\type\()_bilin_hv)
+
+        adr             x9,  L(\type\()_bilin_h_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+20:     // 2xN h
+.ifc \type, put
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd,  \d_strd,  #1
+        lsl             \s_strd,  \s_strd,  #1
+2:
+        ld1             {v4.s}[0],  [\src], \s_strd
+        ld1             {v6.s}[0],  [\sr2], \s_strd
+        ext             v5.8b,  v4.8b,  v4.8b, #1
+        ext             v7.8b,  v6.8b,  v6.8b, #1
+        trn1            v4.4h,  v4.4h,  v6.4h
+        trn1            v5.4h,  v5.4h,  v7.4h
+        subs            \h,  \h,  #2
+        umull           v4.8h,  v4.8b,  v0.8b
+        umlal           v4.8h,  v5.8b,  v1.8b
+        uqrshrn         v4.8b,  v4.8h,  #4
+        st1             {v4.h}[0], [\dst], \d_strd
+        st1             {v4.h}[1], [\ds2], \d_strd
+        b.gt            2b
+        ret
+.endif
+
+40:     // 4xN h
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd,  \d_strd,  #1
+        lsl             \s_strd,  \s_strd,  #1
+4:
+        ld1             {v4.8b}, [\src], \s_strd
+        ld1             {v6.8b}, [\sr2], \s_strd
+        ext             v5.8b,  v4.8b,  v4.8b, #1
+        ext             v7.8b,  v6.8b,  v6.8b, #1
+        trn1            v4.2s,  v4.2s,  v6.2s
+        trn1            v5.2s,  v5.2s,  v7.2s
+        subs            \h,  \h,  #2
+        umull           v4.8h,  v4.8b,  v0.8b
+        umlal           v4.8h,  v5.8b,  v1.8b
+.ifc \type, put
+        uqrshrn         v4.8b,  v4.8h,  #4
+        st1             {v4.s}[0], [\dst], \d_strd
+        st1             {v4.s}[1], [\ds2], \d_strd
+.else
+        st1             {v4.d}[0], [\dst], \d_strd
+        st1             {v4.d}[1], [\ds2], \d_strd
+.endif
+        b.gt            4b
+        ret
+
+80:     // 8xN h
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \d_strd,  \d_strd,  #1
+        lsl             \s_strd,  \s_strd,  #1
+8:
+        ld1             {v4.16b}, [\src], \s_strd
+        ld1             {v6.16b}, [\sr2], \s_strd
+        ext             v5.16b, v4.16b, v4.16b, #1
+        ext             v7.16b, v6.16b, v6.16b, #1
+        subs            \h,  \h,  #2
+        umull           v4.8h,  v4.8b,  v0.8b
+        umull           v6.8h,  v6.8b,  v0.8b
+        umlal           v4.8h,  v5.8b,  v1.8b
+        umlal           v6.8h,  v7.8b,  v1.8b
+.ifc \type, put
+        uqrshrn         v4.8b,  v4.8h,  #4
+        uqrshrn         v6.8b,  v6.8h,  #4
+        st1             {v4.8b}, [\dst], \d_strd
+        st1             {v6.8b}, [\ds2], \d_strd
+.else
+        st1             {v4.8h}, [\dst], \d_strd
+        st1             {v6.8h}, [\ds2], \d_strd
+.endif
+        b.gt            8b
+        ret
+160:
+320:
+640:
+1280:   // 16xN, 32xN, ... h
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \s_strd,  \s_strd,  #1
+
+        sub             \s_strd,  \s_strd,  \w, uxtw
+        sub             \s_strd,  \s_strd,  #8
+.ifc \type, put
+        lsl             \d_strd,  \d_strd,  #1
+        sub             \d_strd,  \d_strd,  \w, uxtw
+.endif
+161:
+        ld1             {v16.d}[1],  [\src], #8
+        ld1             {v20.d}[1],  [\sr2], #8
+        mov             \mx, \w
+
+16:
+        ld1             {v18.16b},  [\src], #16
+        ld1             {v22.16b},  [\sr2], #16
+        ext             v17.16b, v16.16b, v18.16b, #8
+        ext             v19.16b, v16.16b, v18.16b, #9
+        ext             v21.16b, v20.16b, v22.16b, #8
+        ext             v23.16b, v20.16b, v22.16b, #9
+        umull           v16.8h,  v17.8b,  v0.8b
+        umull2          v17.8h,  v17.16b, v0.16b
+        umull           v20.8h,  v21.8b,  v0.8b
+        umull2          v21.8h,  v21.16b, v0.16b
+        umlal           v16.8h,  v19.8b,  v1.8b
+        umlal2          v17.8h,  v19.16b, v1.16b
+        umlal           v20.8h,  v23.8b,  v1.8b
+        umlal2          v21.8h,  v23.16b, v1.16b
+        subs            \mx, \mx, #16
+.ifc \type, put
+        uqrshrn         v16.8b,  v16.8h, #4
+        uqrshrn2        v16.16b, v17.8h, #4
+        uqrshrn         v20.8b,  v20.8h, #4
+        uqrshrn2        v20.16b, v21.8h, #4
+        st1             {v16.16b}, [\dst], #16
+        st1             {v20.16b}, [\ds2], #16
+.else
+        st1             {v16.8h, v17.8h}, [\dst], #32
+        st1             {v20.8h, v21.8h}, [\ds2], #32
+.endif
+        b.le            9f
+
+        mov             v16.16b, v18.16b
+        mov             v20.16b, v22.16b
+        b               16b
+
+9:
+        add             \dst,  \dst,  \d_strd
+        add             \ds2,  \ds2,  \d_strd
+        add             \src,  \src,  \s_strd
+        add             \sr2,  \sr2,  \s_strd
+
+        subs            \h,  \h,  #2
+        b.gt            161b
+        ret
+
+L(\type\()_bilin_h_tbl):
+        .hword L(\type\()_bilin_h_tbl) - 1280b
+        .hword L(\type\()_bilin_h_tbl) -  640b
+        .hword L(\type\()_bilin_h_tbl) -  320b
+        .hword L(\type\()_bilin_h_tbl) -  160b
+        .hword L(\type\()_bilin_h_tbl) -   80b
+        .hword L(\type\()_bilin_h_tbl) -   40b
+        .hword L(\type\()_bilin_h_tbl) -   20b
+        .hword 0
+
+
+L(\type\()_bilin_v):
+        cmp             \h,  #4
+        adr             x9,  L(\type\()_bilin_v_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+20:     // 2xN v
+.ifc \type, put
+        cmp             \h,  #2
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \s_strd,  \s_strd,  #1
+        lsl             \d_strd,  \d_strd,  #1
+
+        // 2x2 v
+        ld1             {v16.h}[0], [\src], \s_strd
+        b.gt            24f
+        ld1             {v17.h}[0], [\sr2], \s_strd
+        ld1             {v18.h}[0], [\src], \s_strd
+        trn1            v16.4h, v16.4h, v17.4h
+        trn1            v17.4h, v17.4h, v18.4h
+        umull           v4.8h,  v16.8b,  v2.8b
+        umlal           v4.8h,  v17.8b,  v3.8b
+        uqrshrn         v4.8b,  v4.8h,  #4
+        st1             {v4.h}[0], [\dst]
+        st1             {v4.h}[1], [\ds2]
+        ret
+24:     // 2x4, 2x8, ... v
+        ld1             {v17.h}[0], [\sr2], \s_strd
+        ld1             {v18.h}[0], [\src], \s_strd
+        ld1             {v19.h}[0], [\sr2], \s_strd
+        ld1             {v20.h}[0], [\src], \s_strd
+        trn1            v16.4h, v16.4h, v17.4h
+        trn1            v17.4h, v17.4h, v18.4h
+        trn1            v18.4h, v18.4h, v19.4h
+        trn1            v19.4h, v19.4h, v20.4h
+        trn1            v16.2s, v16.2s, v18.2s
+        trn1            v17.2s, v17.2s, v19.2s
+        umull           v4.8h,  v16.8b,  v2.8b
+        umlal           v4.8h,  v17.8b,  v3.8b
+        subs            \h,  \h,  #4
+        uqrshrn         v4.8b,  v4.8h,  #4
+        st1             {v4.h}[0], [\dst], \d_strd
+        st1             {v4.h}[1], [\ds2], \d_strd
+        st1             {v4.h}[2], [\dst], \d_strd
+        st1             {v4.h}[3], [\ds2], \d_strd
+        b.le            0f
+        mov             v16.8b, v20.8b
+        b               24b
+0:
+        ret
+.endif
+
+40:     // 4xN v
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \s_strd,  \s_strd,  #1
+        lsl             \d_strd,  \d_strd,  #1
+        ld1             {v16.s}[0], [\src], \s_strd
+4:
+        ld1             {v17.s}[0], [\sr2], \s_strd
+        ld1             {v18.s}[0], [\src], \s_strd
+        trn1            v16.2s, v16.2s, v17.2s
+        trn1            v17.2s, v17.2s, v18.2s
+        umull           v4.8h,  v16.8b,  v2.8b
+        umlal           v4.8h,  v17.8b,  v3.8b
+        subs            \h,  \h,  #2
+.ifc \type, put
+        uqrshrn         v4.8b,  v4.8h,  #4
+        st1             {v4.s}[0], [\dst], \d_strd
+        st1             {v4.s}[1], [\ds2], \d_strd
+.else
+        st1             {v4.d}[0], [\dst], \d_strd
+        st1             {v4.d}[1], [\ds2], \d_strd
+.endif
+        b.le            0f
+        mov             v16.8b, v18.8b
+        b               4b
+0:
+        ret
+
+80:     // 8xN v
+        add             \ds2,  \dst,  \d_strd
+        add             \sr2,  \src,  \s_strd
+        lsl             \s_strd,  \s_strd,  #1
+        lsl             \d_strd,  \d_strd,  #1
+        ld1             {v16.8b}, [\src], \s_strd
+8:
+        ld1             {v17.8b}, [\sr2], \s_strd
+        ld1             {v18.8b}, [\src], \s_strd
+        umull           v4.8h,  v16.8b,  v2.8b
+        umull           v5.8h,  v17.8b,  v2.8b
+        umlal           v4.8h,  v17.8b,  v3.8b
+        umlal           v5.8h,  v18.8b,  v3.8b
+        subs            \h,  \h,  #2
+.ifc \type, put
+        uqrshrn         v4.8b,  v4.8h,  #4
+        uqrshrn         v5.8b,  v5.8h,  #4
+        st1             {v4.8b}, [\dst], \d_strd
+        st1             {v5.8b}, [\ds2], \d_strd
+.else
+        st1             {v4.8h}, [\dst], \d_strd
+        st1             {v5.8h}, [\ds2], \d_strd
+.endif
+        b.le            0f
+        mov             v16.8b, v18.8b
+        b               8b
+0:
+        ret
+
+160:    // 16xN, 32xN, ...
+320:
+640:
+1280:
+        mov             \my,  \h
+1:
+        add             \ds2, \dst, \d_strd
+        add             \sr2, \src, \s_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+
+        ld1             {v16.16b}, [\src], \s_strd
+2:
+        ld1             {v17.16b}, [\sr2], \s_strd
+        ld1             {v18.16b}, [\src], \s_strd
+        umull           v4.8h,  v16.8b,  v2.8b
+        umull2          v5.8h,  v16.16b, v2.16b
+        umull           v6.8h,  v17.8b,  v2.8b
+        umull2          v7.8h,  v17.16b, v2.16b
+        umlal           v4.8h,  v17.8b,  v3.8b
+        umlal2          v5.8h,  v17.16b, v3.16b
+        umlal           v6.8h,  v18.8b,  v3.8b
+        umlal2          v7.8h,  v18.16b, v3.16b
+        subs            \h,  \h,  #2
+.ifc \type, put
+        uqrshrn         v4.8b,  v4.8h,  #4
+        uqrshrn2        v4.16b, v5.8h,  #4
+        uqrshrn         v6.8b,  v6.8h,  #4
+        uqrshrn2        v6.16b, v7.8h,  #4
+        st1             {v4.16b}, [\dst], \d_strd
+        st1             {v6.16b}, [\ds2], \d_strd
+.else
+        st1             {v4.8h, v5.8h}, [\dst], \d_strd
+        st1             {v6.8h, v7.8h}, [\ds2], \d_strd
+.endif
+        b.le            9f
+        mov             v16.16b, v18.16b
+        b               2b
+9:
+        subs            \w,  \w,  #16
+        b.le            0f
+        asr             \s_strd, \s_strd, #1
+        asr             \d_strd, \d_strd, #1
+        msub            \src, \s_strd, \xmy, \src
+        msub            \dst, \d_strd, \xmy, \dst
+        sub             \src, \src, \s_strd, lsl #1
+        mov             \h,  \my
+        add             \src, \src, #16
+.ifc \type, put
+        add             \dst, \dst, #16
+.else
+        add             \dst, \dst, #32
+.endif
+        b               1b
+0:
+        ret
+
+L(\type\()_bilin_v_tbl):
+        .hword L(\type\()_bilin_v_tbl) - 1280b
+        .hword L(\type\()_bilin_v_tbl) -  640b
+        .hword L(\type\()_bilin_v_tbl) -  320b
+        .hword L(\type\()_bilin_v_tbl) -  160b
+        .hword L(\type\()_bilin_v_tbl) -   80b
+        .hword L(\type\()_bilin_v_tbl) -   40b
+        .hword L(\type\()_bilin_v_tbl) -   20b
+        .hword 0
+
+L(\type\()_bilin_hv):
+        uxtl            v2.8h, v2.8b
+        uxtl            v3.8h, v3.8b
+        adr             x9,  L(\type\()_bilin_hv_tbl)
+        ldrh            w8,  [x9, x8, lsl #1]
+        sub             x9,  x9,  w8, uxtw
+        br              x9
+
+20:     // 2xN hv
+.ifc \type, put
+        add             \sr2, \src, \s_strd
+        add             \ds2, \dst, \d_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+
+        ld1             {v28.s}[0],  [\src], \s_strd
+        ext             v29.8b, v28.8b, v28.8b, #1
+        umull           v16.8h, v28.8b, v0.8b
+        umlal           v16.8h, v29.8b, v1.8b
+
+2:
+        ld1             {v28.s}[0],  [\sr2], \s_strd
+        ld1             {v30.s}[0],  [\src], \s_strd
+        ext             v29.8b, v28.8b, v28.8b, #1
+        ext             v31.8b, v30.8b, v30.8b, #1
+        trn1            v28.4h, v28.4h, v30.4h
+        trn1            v29.4h, v29.4h, v31.4h
+        umull           v17.8h, v28.8b, v0.8b
+        umlal           v17.8h, v29.8b, v1.8b
+
+        trn1            v16.2s, v16.2s, v17.2s
+
+        mul             v4.4h,  v16.4h, v2.4h
+        mla             v4.4h,  v17.4h, v3.4h
+        uqrshrn         v4.8b,  v4.8h,  #8
+        subs            \h,  \h,  #2
+        st1             {v4.h}[0], [\dst], \d_strd
+        st1             {v4.h}[1], [\ds2], \d_strd
+        b.le            0f
+        trn2            v16.2s, v17.2s, v17.2s
+        b               2b
+0:
+        ret
+.endif
+
+40:     // 4xN hv
+        add             \sr2, \src, \s_strd
+        add             \ds2, \dst, \d_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+
+        ld1             {v28.8b},  [\src], \s_strd
+        ext             v29.8b, v28.8b, v28.8b, #1
+        umull           v16.8h, v28.8b, v0.8b
+        umlal           v16.8h, v29.8b, v1.8b
+
+4:
+        ld1             {v28.8b},  [\sr2], \s_strd
+        ld1             {v30.8b},  [\src], \s_strd
+        ext             v29.8b, v28.8b, v28.8b, #1
+        ext             v31.8b, v30.8b, v30.8b, #1
+        trn1            v28.2s, v28.2s, v30.2s
+        trn1            v29.2s, v29.2s, v31.2s
+        umull           v17.8h, v28.8b, v0.8b
+        umlal           v17.8h, v29.8b, v1.8b
+
+        trn1            v16.2d, v16.2d, v17.2d
+
+        mul             v4.8h,  v16.8h, v2.8h
+        mla             v4.8h,  v17.8h, v3.8h
+        subs            \h,  \h,  #2
+.ifc \type, put
+        uqrshrn         v4.8b,  v4.8h,  #8
+        st1             {v4.s}[0], [\dst], \d_strd
+        st1             {v4.s}[1], [\ds2], \d_strd
+.else
+        urshr           v4.8h,  v4.8h,  #4
+        st1             {v4.d}[0], [\dst], \d_strd
+        st1             {v4.d}[1], [\ds2], \d_strd
+.endif
+        b.le            0f
+        trn2            v16.2d, v17.2d, v17.2d
+        b               4b
+0:
+        ret
+
+80:     // 8xN, 16xN, ... hv
+160:
+320:
+640:
+1280:
+        mov             \my,  \h
+
+1:
+        add             \sr2, \src, \s_strd
+        add             \ds2, \dst, \d_strd
+        lsl             \s_strd, \s_strd, #1
+        lsl             \d_strd, \d_strd, #1
+
+        ld1             {v28.16b},  [\src], \s_strd
+        ext             v29.16b, v28.16b, v28.16b, #1
+        umull           v16.8h, v28.8b, v0.8b
+        umlal           v16.8h, v29.8b, v1.8b
+
+2:
+        ld1             {v28.16b},  [\sr2], \s_strd
+        ld1             {v30.16b},  [\src], \s_strd
+        ext             v29.16b, v28.16b, v28.16b, #1
+        ext             v31.16b, v30.16b, v30.16b, #1
+        umull           v17.8h, v28.8b, v0.8b
+        umlal           v17.8h, v29.8b, v1.8b
+        umull           v18.8h, v30.8b, v0.8b
+        umlal           v18.8h, v31.8b, v1.8b
+
+        mul             v4.8h,  v16.8h, v2.8h
+        mla             v4.8h,  v17.8h, v3.8h
+        mul             v5.8h,  v17.8h, v2.8h
+        mla             v5.8h,  v18.8h, v3.8h
+        subs            \h,  \h,  #2
+.ifc \type, put
+        uqrshrn         v4.8b,  v4.8h,  #8
+        uqrshrn         v5.8b,  v5.8h,  #8
+        st1             {v4.8b}, [\dst], \d_strd
+        st1             {v5.8b}, [\ds2], \d_strd
+.else
+        urshr           v4.8h,  v4.8h,  #4
+        urshr           v5.8h,  v5.8h,  #4
+        st1             {v4.8h}, [\dst], \d_strd
+        st1             {v5.8h}, [\ds2], \d_strd
+.endif
+        b.le            9f
+        mov             v16.16b, v18.16b
+        b               2b
+9:
+        subs            \w,  \w,  #8
+        b.le            0f
+        asr             \s_strd,  \s_strd,  #1
+        asr             \d_strd,  \d_strd,  #1
+        msub            \src,  \s_strd,  \xmy,  \src
+        msub            \dst,  \d_strd,  \xmy,  \dst
+        sub             \src,  \src,  \s_strd,  lsl #1
+        mov             \h,  \my
+        add             \src,  \src,  #8
+.ifc \type, put
+        add             \dst,  \dst,  #8
+.else
+        add             \dst,  \dst,  #16
+.endif
+        b               1b
+0:
+        ret
+
+L(\type\()_bilin_hv_tbl):
+        .hword L(\type\()_bilin_hv_tbl) - 1280b
+        .hword L(\type\()_bilin_hv_tbl) -  640b
+        .hword L(\type\()_bilin_hv_tbl) -  320b
+        .hword L(\type\()_bilin_hv_tbl) -  160b
+        .hword L(\type\()_bilin_hv_tbl) -   80b
+        .hword L(\type\()_bilin_hv_tbl) -   40b
+        .hword L(\type\()_bilin_hv_tbl) -   20b
+        .hword 0
+endfunc
+.endm
+
+filter_fn put,  x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, x8, x9, 10
+filter_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6
--- a/third_party/dav1d/src/arm/asm.S
+++ b/third_party/dav1d/src/arm/asm.S
@@ -49,16 +49,24 @@
 #define A @
 #define T
 #else
 #define A
 #define T @
 #endif
 #endif
 
+#if !defined(PIC)
+#if defined(__PIC__)
+#define PIC __PIC__
+#elif defined(__pic__)
+#define PIC __pic__
+#endif
+#endif
+
 #ifndef PRIVATE_PREFIX
 #define PRIVATE_PREFIX dav1d_
 #endif
 
 #define PASTE(a,b) a ## b
 #define CONCAT(a,b) PASTE(a,b)
 
 #ifdef PREFIX
@@ -116,9 +124,11 @@ EXTERN\name:
 .endm
 
 #ifdef __APPLE__
 #define L(x) L ## x
 #else
 #define L(x) .L ## x
 #endif
 
+#define X(x) CONCAT(EXTERN, x)
+
 #endif /* __DAV1D_SRC_ARM_ASM_S__ */
new file mode 100644
--- /dev/null
+++ b/third_party/dav1d/src/arm/looprestoration_init_tmpl.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/looprestoration.h"
+
+#include "common/attributes.h"
+#include "common/intops.h"
+#include "src/tables.h"
+
+#if BITDEPTH == 8 && ARCH_AARCH64
+// This calculates things slightly differently than the reference C version.
+// This version calculates roughly this:
+// int16_t sum = 0;
+// for (int i = 0; i < 7; i++)
+//     sum += src[idx] * fh[i];
+// int16_t sum2 = (src[x] << 7) - (1 << (BITDEPTH + 6)) + rounding_off_h;
+// sum = iclip(sum + sum2, INT16_MIN, INT16_MAX) >> round_bits_h;
+// sum += 2048;
+void dav1d_wiener_filter_h_neon(int16_t *dst, const pixel (*left)[4],
+                                const pixel *src, ptrdiff_t stride,
+                                const int16_t fh[7], const intptr_t w,
+                                int h, enum LrEdgeFlags edges);
+// This calculates things slightly differently than the reference C version.
+// This version calculates roughly this:
+// fv[3] += 128;
+// int32_t sum = 0;
+// for (int i = 0; i < 7; i++)
+//     sum += mid[idx] * fv[i];
+// sum = (sum + rounding_off_v) >> round_bits_v;
+// This function assumes that the width is a multiple of 8.
+void dav1d_wiener_filter_v_neon(pixel *dst, ptrdiff_t stride,
+                                const int16_t *mid, int w, int h,
+                                const int16_t fv[7], enum LrEdgeFlags edges,
+                                ptrdiff_t mid_stride);
+void dav1d_copy_narrow_neon(pixel *dst, ptrdiff_t stride,
+                            const pixel *src, int w, int h);
+
+static void wiener_filter_neon(pixel *const dst, const ptrdiff_t dst_stride,
+                               const pixel (*const left)[4],
+                               const pixel *lpf, const ptrdiff_t lpf_stride,
+                               const int w, const int h, const int16_t fh[7],
+                               const int16_t fv[7], const enum LrEdgeFlags edges)
+{
+    ALIGN_STK_32(int16_t, mid, 68 * 384,);
+    int mid_stride = (w + 7) & ~7;
+
+    // Horizontal filter
+    dav1d_wiener_filter_h_neon(&mid[2 * mid_stride], left, dst, dst_stride,
+                               fh, w, h, edges);
+    if (edges & LR_HAVE_TOP)
+        dav1d_wiener_filter_h_neon(mid, NULL, lpf, lpf_stride,
+                                   fh, w, 2, edges);
+    if (edges & LR_HAVE_BOTTOM)
+        dav1d_wiener_filter_h_neon(&mid[(2 + h) * mid_stride], NULL,
+                                   lpf + 6 * PXSTRIDE(lpf_stride), lpf_stride,
+                                   fh, w, 2, edges);
+
+    // Vertical filter
+    if (w >= 8)
+        dav1d_wiener_filter_v_neon(dst, dst_stride, &mid[2*mid_stride],
+                                   w & ~7, h, fv, edges, mid_stride * sizeof(*mid));
+    if (w & 7) {
+        // For uneven widths, do a full 8 pixel wide filtering into a temp
+        // buffer and copy out the narrow slice of pixels separately into dest.
+        ALIGN_STK_16(pixel, tmp, 64 * 8,);
+        dav1d_wiener_filter_v_neon(tmp, w & 7, &mid[2*mid_stride + (w & ~7)],
+                                   w & 7, h, fv, edges, mid_stride * sizeof(*mid));
+        dav1d_copy_narrow_neon(dst + (w & ~7), dst_stride, tmp, w & 7, h);
+    }
+}
+#endif
+
+void bitfn(dav1d_loop_restoration_dsp_init_arm)(Dav1dLoopRestorationDSPContext *const c) {
+    const unsigned flags = dav1d_get_cpu_flags();
+
+    if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
+
+#if BITDEPTH == 8 && ARCH_AARCH64
+    c->wiener = wiener_filter_neon;
+#endif
+}
--- a/third_party/dav1d/src/arm/mc_init_tmpl.c
+++ b/third_party/dav1d/src/arm/mc_init_tmpl.c
@@ -25,23 +25,73 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "config.h"
 
 #include "src/mc.h"
 #include "src/cpu.h"
 
+decl_mc_fn(dav1d_put_8tap_regular_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_regular_smooth_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_regular_sharp_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_smooth_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_smooth_regular_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_smooth_sharp_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_sharp_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_sharp_regular_8bpc_neon);
+decl_mc_fn(dav1d_put_8tap_sharp_smooth_8bpc_neon);
+decl_mc_fn(dav1d_put_bilin_8bpc_neon);
+
+decl_mct_fn(dav1d_prep_8tap_regular_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_regular_smooth_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_regular_sharp_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_smooth_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_smooth_regular_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_smooth_sharp_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_sharp_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_sharp_regular_8bpc_neon);
+decl_mct_fn(dav1d_prep_8tap_sharp_smooth_8bpc_neon);
+decl_mct_fn(dav1d_prep_bilin_8bpc_neon);
+
 decl_avg_fn(dav1d_avg_8bpc_neon);
 decl_w_avg_fn(dav1d_w_avg_8bpc_neon);
 decl_mask_fn(dav1d_mask_8bpc_neon);
 
 void bitfn(dav1d_mc_dsp_init_arm)(Dav1dMCDSPContext *const c) {
+#define init_mc_fn(type, name, suffix) \
+    c->mc[type] = dav1d_put_##name##_8bpc_##suffix
+#define init_mct_fn(type, name, suffix) \
+    c->mct[type] = dav1d_prep_##name##_8bpc_##suffix
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
 
 #if BITDEPTH == 8
+#if ARCH_AARCH64
+    init_mc_fn (FILTER_2D_8TAP_REGULAR,        8tap_regular,        neon);
+    init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
+    init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  neon);
+    init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
+    init_mc_fn (FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         neon);
+    init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   neon);
+    init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  neon);
+    init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   neon);
+    init_mc_fn (FILTER_2D_8TAP_SHARP,          8tap_sharp,          neon);
+    init_mc_fn (FILTER_2D_BILINEAR,            bilin,               neon);
+
+    init_mct_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        neon);
+    init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
+    init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  neon);
+    init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
+    init_mct_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         neon);
+    init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   neon);
+    init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  neon);
+    init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   neon);
+    init_mct_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          neon);
+    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               neon);
+#endif
+
     c->avg = dav1d_avg_8bpc_neon;
     c->w_avg = dav1d_w_avg_8bpc_neon;
     c->mask = dav1d_mask_8bpc_neon;
 #endif
 }
--- a/third_party/dav1d/src/cdef_apply_tmpl.c
+++ b/third_party/dav1d/src/cdef_apply_tmpl.c
@@ -83,56 +83,56 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameC
                              const Av1Filter *const lflvl,
                              const int by_start, const int by_end)
 {
     const Dav1dDSPContext *const dsp = f->dsp;
     enum CdefEdgeFlags edges = HAVE_BOTTOM | (by_start > 0 ? HAVE_TOP : 0);
     pixel *ptrs[3] = { p[0], p[1], p[2] };
     const int sbsz = 16;
     const int sb64w = f->sb128w << 1;
-    const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8;
-    const enum Dav1dPixelLayout layout = f->cur.p.p.layout;
+    const int damping = f->frame_hdr->cdef.damping + BITDEPTH - 8;
+    const enum Dav1dPixelLayout layout = f->cur.p.layout;
     const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
     const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
     const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
     const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
 
     // FIXME a design improvement that could be made here is to keep a set of
     // flags for each block position on whether the block was filtered; if not,
     // the backup of pre-filter data is empty, and the restore is therefore
     // unnecessary as well.
 
     for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= HAVE_TOP) {
         const int tf = f->lf.top_pre_cdef_toggle;
         if (by + 2 >= f->bh) edges &= ~HAVE_BOTTOM;
 
         if (edges & HAVE_BOTTOM) {
             // backup pre-filter data for next iteration
-            backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.p.stride,
+            backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.stride,
                          8, f->bw * 4, layout);
         }
 
         pixel lr_bak[2 /* idx */][3 /* plane */][8 /* y */][2 /* x */];
         pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
         edges &= ~HAVE_LEFT;
         edges |= HAVE_RIGHT;
         for (int sbx = 0, last_skip = 1; sbx < sb64w; sbx++, edges |= HAVE_LEFT) {
             const int sb128x = sbx >>1;
             const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
             const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
             if (cdef_idx == -1 ||
-                (!f->frame_hdr.cdef.y_strength[cdef_idx] &&
-                 !f->frame_hdr.cdef.uv_strength[cdef_idx]))
+                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
+                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
             {
                 last_skip = 1;
                 goto next_sb;
             }
 
-            const int y_lvl = f->frame_hdr.cdef.y_strength[cdef_idx];
-            const int uv_lvl = f->frame_hdr.cdef.uv_strength[cdef_idx];
+            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
+            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
             pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
             for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
                  bx += 2, edges |= HAVE_LEFT)
             {
                 if (bx + 2 >= f->bw) edges &= ~HAVE_RIGHT;
 
                 // check if this 8x8 block had any coded coefficients; if not,
                 // go to the next block
@@ -143,51 +143,51 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameC
                 {
                     last_skip = 1;
                     goto next_b;
                 }
 
                 if (last_skip && edges & HAVE_LEFT) {
                     // we didn't backup the prefilter data because it wasn't
                     // there, so do it here instead
-                    backup2x8(lr_bak[bit], bptrs, f->cur.p.stride, 0, layout);
+                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout);
                 }
                 if (edges & HAVE_RIGHT) {
                     // backup pre-filter data for next iteration
-                    backup2x8(lr_bak[!bit], bptrs, f->cur.p.stride, 8, layout);
+                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout);
                 }
 
                 // the actual filter
                 const int y_pri_lvl = (y_lvl >> 2) << (BITDEPTH - 8);
                 int y_sec_lvl = y_lvl & 3;
                 y_sec_lvl += y_sec_lvl == 3;
                 y_sec_lvl <<= BITDEPTH - 8;
                 const int uv_pri_lvl = (uv_lvl >> 2) << (BITDEPTH - 8);
                 int uv_sec_lvl = uv_lvl & 3;
                 uv_sec_lvl += uv_sec_lvl == 3;
                 uv_sec_lvl <<= BITDEPTH - 8;
                 unsigned variance;
-                const int dir = dsp->cdef.dir(bptrs[0], f->cur.p.stride[0],
+                const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
                                               &variance);
                 if (y_lvl) {
-                    dsp->cdef.fb[0](bptrs[0], f->cur.p.stride[0], lr_bak[bit][0],
+                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
                                     (pixel *const [2]) {
                                         &f->lf.cdef_line_ptr[tf][0][0][bx * 4],
                                         &f->lf.cdef_line_ptr[tf][0][1][bx * 4],
                                     },
                                     adjust_strength(y_pri_lvl, variance),
                                     y_sec_lvl, y_pri_lvl ? dir : 0,
                                     damping, edges);
                 }
                 if (uv_lvl && has_chroma) {
                     const int uvdir =
-                        f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
+                        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
                         ((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir];
                     for (int pl = 1; pl <= 2; pl++) {
-                        dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.p.stride[1],
+                        dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
                                              lr_bak[bit][pl],
                                              (pixel *const [2]) {
                                                  &f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor],
                                                  &f->lf.cdef_line_ptr[tf][pl][1][bx * 4 >> ss_hor],
                                              },
                                              uv_pri_lvl, uv_sec_lvl,
                                              uv_pri_lvl ? uvdir : 0,
                                              damping - 1, edges);
@@ -204,14 +204,14 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameC
             }
 
         next_sb:
             iptrs[0] += sbsz * 4;
             iptrs[1] += sbsz * 4 >> ss_hor;
             iptrs[2] += sbsz * 4 >> ss_hor;
         }
 
-        ptrs[0] += 8 * PXSTRIDE(f->cur.p.stride[0]);
-        ptrs[1] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
-        ptrs[2] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
+        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
+        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
+        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
         f->lf.top_pre_cdef_toggle ^= 1;
     }
 }
--- a/third_party/dav1d/src/cdf.c
+++ b/third_party/dav1d/src/cdf.c
@@ -4067,17 +4067,17 @@ void dav1d_init_states(CdfThreadContext 
     memcpy(cdf_init[qcat].cdf->kfym, default_kf_y_mode_cdf,
            sizeof(default_kf_y_mode_cdf));
     cdf_init[qcat].cdf->coef = av1_default_coef_cdf[qcat];
     cdf_init[qcat].cdf->mv = default_mv_cdf;
     cdf_init[qcat].cdf->dmv = default_mv_cdf;
     dav1d_cdf_thread_ref(cdf, &cdf_init[qcat]);
 }
 
-void dav1d_update_tile_cdf(const Av1FrameHeader *const hdr,
+void dav1d_update_tile_cdf(const Dav1dFrameHeader *const hdr,
                            CdfContext *const dst,
                            const CdfContext *const src)
 {
     int i, j, k, l;
 
 #define update_cdf_1d(n1d, name) \
     do { \
         memcpy(dst->name, src->name, sizeof(*dst->name) * n1d); \
@@ -4133,17 +4133,17 @@ void dav1d_update_tile_cdf(const Av1Fram
     update_cdf_3d(2, 2, 9, coef.eob_bin_256);
     update_cdf_3d(2, 2, 10, coef.eob_bin_512);
     update_cdf_3d(2, 2, 11, coef.eob_bin_1024);
     update_bit_3d(N_TX_SIZES, 2, 11 /*22*/, coef.eob_hi_bit);
     update_cdf_4d(N_TX_SIZES, 2, 4, 3, coef.eob_base_tok);
     update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 4, coef.base_tok);
     update_bit_2d(2, 3, coef.dc_sign);
     update_cdf_4d(4, 2, 21, 4, coef.br_tok);
-    update_cdf_2d(3, NUM_SEGMENTS, m.seg_id);
+    update_cdf_2d(3, DAV1D_MAX_SEGMENTS, m.seg_id);
     update_cdf_1d(8, m.cfl_sign);
     update_cdf_2d(6, 16, m.cfl_alpha);
     update_bit_0d(m.restore_wiener);
     update_bit_0d(m.restore_sgrproj);
     update_cdf_1d(3, m.restore_switchable);
     update_cdf_1d(4, m.delta_q);
     update_cdf_2d(5, 4, m.delta_lf);
     update_bit_2d(7, 3, m.pal_y);
@@ -4166,17 +4166,17 @@ void dav1d_update_tile_cdf(const Av1Fram
             update_bit_1d(10, dmv.comp[k].classN);
             update_bit_0d(dmv.comp[k].sign);
         }
         return;
     }
 
     update_bit_1d(3, m.skip_mode);
     update_cdf_2d(4, N_INTRA_PRED_MODES, m.y_mode);
-    update_cdf_3d(2, 8, N_SWITCHABLE_FILTERS, m.filter);
+    update_cdf_3d(2, 8, DAV1D_N_SWITCHABLE_FILTERS, m.filter);
     update_bit_1d(6, m.newmv_mode);
     update_bit_1d(2, m.globalmv_mode);
     update_bit_1d(6, m.refmv_mode);
     update_bit_1d(3, m.drl_bit);
     update_cdf_2d(8, N_COMP_INTER_PRED_MODES, m.comp_inter_mode);
     update_bit_1d(4, m.intra);
     update_bit_1d(5, m.comp);
     update_bit_1d(5, m.comp_dir);
--- a/third_party/dav1d/src/cdf.h
+++ b/third_party/dav1d/src/cdf.h
@@ -35,17 +35,17 @@
 #include "src/thread_data.h"
 
 typedef struct CdfModeContext {
     uint16_t y_mode[4][N_INTRA_PRED_MODES + 1];
     uint16_t use_filter_intra[N_BS_SIZES][2];
     uint16_t filter_intra[5 + 1];
     uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 1];
     uint16_t angle_delta[8][8];
-    uint16_t filter[2][8][N_SWITCHABLE_FILTERS + 1];
+    uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1];
     uint16_t newmv_mode[6][2];
     uint16_t globalmv_mode[2][2];
     uint16_t refmv_mode[6][2];
     uint16_t drl_bit[3][2];
     uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES + 1];
     uint16_t intra[4][2];
     uint16_t comp[5][2];
     uint16_t comp_dir[5][2];
@@ -63,17 +63,17 @@ typedef struct CdfModeContext {
     uint16_t txsz[N_TX_SIZES - 1][3][4];
     uint16_t txpart[7][3][2];
     uint16_t txtp_inter[4][N_TX_SIZES][N_TX_TYPES + 1];
     uint16_t txtp_intra[3][N_TX_SIZES][N_INTRA_PRED_MODES][N_TX_TYPES + 1];
     uint16_t skip[3][2];
     uint16_t skip_mode[3][2];
     uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 1];
     uint16_t seg_pred[3][2];
-    uint16_t seg_id[3][NUM_SEGMENTS + 1];
+    uint16_t seg_id[3][DAV1D_MAX_SEGMENTS + 1];
     uint16_t cfl_sign[8 + 1];
     uint16_t cfl_alpha[6][16 + 1];
     uint16_t restore_wiener[2];
     uint16_t restore_sgrproj[2];
     uint16_t restore_switchable[3 + 1];
     uint16_t delta_q[4 + 1];
     uint16_t delta_lf[5][4 + 1];
     uint16_t obmc[N_BS_SIZES][2];
@@ -127,17 +127,17 @@ typedef struct CdfContext {
 typedef struct CdfThreadContext {
     CdfContext *cdf;
     Dav1dRef *ref; ///< allocation origin
     struct thread_data *t;
     atomic_uint *progress;
 } CdfThreadContext;
 
 void dav1d_init_states(CdfThreadContext *cdf, int qidx);
-void dav1d_update_tile_cdf(const Av1FrameHeader *hdr, CdfContext *dst,
+void dav1d_update_tile_cdf(const Dav1dFrameHeader *hdr, CdfContext *dst,
                          const CdfContext *src);
 
 void dav1d_cdf_thread_alloc(CdfThreadContext *cdf, struct thread_data *t);
 void dav1d_cdf_thread_ref(CdfThreadContext *dst, CdfThreadContext *src);
 void dav1d_cdf_thread_unref(CdfThreadContext *cdf);
 
 /*
  * These are binary signals (so a signal is either "done" or "not done").
--- a/third_party/dav1d/src/cpu.h
+++ b/third_party/dav1d/src/cpu.h
@@ -25,18 +25,20 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef __DAV1D_SRC_CPU_H__
 #define __DAV1D_SRC_CPU_H__
 
 #include "config.h"
 
+#include "dav1d/common.h"
+
 #if ARCH_AARCH64 || ARCH_ARM
 #include "src/arm/cpu.h"
 #elif ARCH_X86
 #include "src/x86/cpu.h"
 #endif
 
 unsigned dav1d_get_cpu_flags(void);
-void dav1d_set_cpu_flags_mask(const unsigned mask);
+DAV1D_API void dav1d_set_cpu_flags_mask(const unsigned mask);
 
 #endif /* __DAV1D_SRC_CPU_H__ */
--- a/third_party/dav1d/src/data.c
+++ b/third_party/dav1d/src/data.c
@@ -23,49 +23,56 @@
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "config.h"
 
 #include <errno.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "dav1d/data.h"
 
 #include "common/validate.h"
 
 #include "src/data.h"
 #include "src/ref.h"
 
 uint8_t * dav1d_data_create(Dav1dData *const buf, const size_t sz) {
     validate_input_or_ret(buf != NULL, NULL);
 
     buf->ref = dav1d_ref_create(sz);
     if (!buf->ref) return NULL;
     buf->data = buf->ref->const_data;
-    buf->sz = sz;
+    buf->sz = buf->m.size = sz;
+    buf->m.timestamp = INT64_MIN;
+    buf->m.duration = 0;
+    buf->m.offset = -1;
 
     return buf->ref->data;
 }
 
 int dav1d_data_wrap(Dav1dData *const buf, const uint8_t *const ptr, const size_t sz,
                     void (*free_callback)(const uint8_t *data, void *user_data),
                     void *user_data)
 {
     validate_input_or_ret(buf != NULL, -EINVAL);
     validate_input_or_ret(ptr != NULL, -EINVAL);
     validate_input_or_ret(free_callback != NULL, -EINVAL);
 
     buf->ref = dav1d_ref_wrap(ptr, free_callback, user_data);
     if (!buf->ref) return -ENOMEM;
     buf->data = ptr;
-    buf->sz = sz;
+    buf->sz = buf->m.size = sz;
+    buf->m.timestamp = INT64_MIN;
+    buf->m.duration = 0;
+    buf->m.offset = -1;
 
     return 0;
 }
 
 void dav1d_data_move_ref(Dav1dData *const dst, Dav1dData *const src) {
     validate_input(dst != NULL);
     validate_input(dst->data == NULL);
     validate_input(src != NULL);
--- a/third_party/dav1d/src/decode.c
+++ b/third_party/dav1d/src/decode.c
@@ -44,45 +44,45 @@
 #include "src/env.h"
 #include "src/qm.h"
 #include "src/recon.h"
 #include "src/ref.h"
 #include "src/tables.h"
 #include "src/thread_task.h"
 #include "src/warpmv.h"
 
-static void init_quant_tables(const Av1SequenceHeader *const seq_hdr,
-                              const Av1FrameHeader *const frame_hdr,
+static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
+                              const Dav1dFrameHeader *const frame_hdr,
                               const int qidx, uint16_t (*dq)[3][2])
 {
     for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
         const int yac = frame_hdr->segmentation.enabled ?
             iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
         const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
         const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
         const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
         const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
         const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
 
-        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->bpc > 8][ydc][0];
-        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->bpc > 8][yac][1];
-        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->bpc > 8][udc][0];
-        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->bpc > 8][uac][1];
-        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->bpc > 8][vdc][0];
-        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->bpc > 8][vac][1];
+        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
+        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
+        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
+        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
+        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
+        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
     }
 }
 
 static int read_mv_component_diff(Dav1dTileContext *const t,
                                   CdfMvComponent *const mv_comp,
                                   const int have_fp)
 {
     Dav1dTileState *const ts = t->ts;
     const Dav1dFrameContext *const f = t->f;
-    const int have_hp = f->frame_hdr.hp;
+    const int have_hp = f->frame_hdr->hp;
     const int sign = msac_decode_bool_adapt(&ts->msac, mv_comp->sign);
     const int cl = msac_decode_symbol_adapt(&ts->msac, mv_comp->classes, 11);
     int up, fp, hp;
 
     if (!cl) {
         up = msac_decode_bool_adapt(&ts->msac, mv_comp->class0);
         if (have_fp) {
             fp = msac_decode_symbol_adapt(&ts->msac, mv_comp->class0_fp[up], 4);
@@ -278,17 +278,17 @@ static void find_matching_ref(const Dav1
         masks[0] |= 1ULL << 32;
     }
 #undef matches
 }
 
 static void derive_warpmv(const Dav1dTileContext *const t,
                           const int bw4, const int bh4,
                           const uint64_t masks[2], const struct mv mv,
-                          WarpedMotionParams *const wmp)
+                          Dav1dWarpedMotionParams *const wmp)
 {
     int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
     const Dav1dFrameContext *const f = t->f;
     const ptrdiff_t b4_stride = f->b4_stride;
     const refmvs *const r = &f->mvs[t->by * b4_stride + t->bx];
 
 #define add_sample(dx, dy, sx, sy, rp) do { \
     pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
@@ -347,19 +347,19 @@ static void derive_warpmv(const Dav1dTil
         // replace the discarded samples;
         mvd[i] = mvd[j];
         memcpy(pts[i], pts[j], sizeof(*pts));
     }
 
     if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
         !dav1d_get_shear_params(wmp))
     {
-        wmp->type = WM_TYPE_AFFINE;
+        wmp->type = DAV1D_WM_TYPE_AFFINE;
     } else
-        wmp->type = WM_TYPE_IDENTITY;
+        wmp->type = DAV1D_WM_TYPE_IDENTITY;
 }
 
 static inline int findoddzero(const uint8_t *buf, int len) {
     for (int n = 0; n < len; n++)
         if (!buf[n * 2]) return 1;
     return 0;
 }
 
@@ -417,21 +417,21 @@ static void read_pal_plane(Dav1dTileCont
             used_cache[i++] = cache[n];
     const int n_used_cache = i;
 
     // parse new entries
     uint16_t *const pal = f->frame_thread.pass ?
         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
                             ((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl];
     if (i < pal_sz) {
-        int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
+        int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
 
         if (i < pal_sz) {
-            int bits = f->cur.p.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);
-            const int max = (1 << f->cur.p.p.bpc) - 1;
+            int bits = f->cur.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);
+            const int max = (1 << f->cur.p.bpc) - 1;
 
             do {
                 const int delta = msac_decode_bools(&ts->msac, bits);
                 prev = pal[i++] = imin(prev + delta + !pl, max);
                 if (prev + !pl >= max) {
                     for (; i < pal_sz; i++)
                         pal[i] = max;
                     break;
@@ -473,27 +473,27 @@ static void read_pal_uv(Dav1dTileContext
 
     // V pal coding
     Dav1dTileState *const ts = t->ts;
     const Dav1dFrameContext *const f = t->f;
     uint16_t *const pal = f->frame_thread.pass ?
         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
                             ((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];
     if (msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) {
-        const int bits = f->cur.p.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);
-        int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
-        const int max = (1 << f->cur.p.p.bpc) - 1;
+        const int bits = f->cur.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);
+        int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
+        const int max = (1 << f->cur.p.bpc) - 1;
         for (int i = 1; i < b->pal_sz[1]; i++) {
             int delta = msac_decode_bools(&ts->msac, bits);
             if (delta && msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) delta = -delta;
             prev = pal[i] = (prev + delta) & max;
         }
     } else {
         for (int i = 0; i < b->pal_sz[1]; i++)
-            pal[i] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
+            pal[i] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
     }
     if (DEBUG_BLOCK_INFO) {
         printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
         for (int n = 0; n < b->pal_sz[1]; n++)
             printf("%c%02x", n ? ' ' : '[', pal[n]);
         printf("]\n");
     }
 }
@@ -608,38 +608,38 @@ static void read_vartx_tree(Dav1dTileCon
 {
     const Dav1dFrameContext *const f = t->f;
     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
     const int bw4 = b_dim[0], bh4 = b_dim[1];
 
     // var-tx tree coding
     b->tx_split[0] = b->tx_split[1] = 0;
     b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
-    if (f->frame_hdr.segmentation.lossless[b->seg_id] ||
+    if (f->frame_hdr->segmentation.lossless[b->seg_id] ||
         b->max_ytx == TX_4X4)
     {
         b->max_ytx = b->uvtx = TX_4X4;
-        if (f->frame_hdr.txfm_mode == TX_SWITCHABLE) {
+        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
             rep_macro(type, t->dir tx, off, TX_4X4)
             case_set(bh4, l., 1, by4);
             case_set(bw4, a->, 0, bx4);
 #undef set_ctx
         }
-    } else if (f->frame_hdr.txfm_mode != TX_SWITCHABLE || b->skip) {
-        if (f->frame_hdr.txfm_mode == TX_SWITCHABLE) {
+    } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
+        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
             rep_macro(type, t->dir tx, off, mul * b_dim[2 + diridx])
             case_set(bh4, l., 1, by4);
             case_set(bw4, a->, 0, bx4);
 #undef set_ctx
         } else {
-            assert(f->frame_hdr.txfm_mode == TX_LARGEST);
+            assert(f->frame_hdr->txfm_mode == DAV1D_TX_LARGEST);
         }
-        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
     } else {
         assert(imin(bw4, bh4) <= 16 || b->max_ytx == TX_64X64);
         int y, x, y_off, x_off;
         const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
         for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
             for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
                 read_tx_tree(t, b->max_ytx, 0, b->tx_split, x_off, y_off);
                 // contexts are updated inside read_tx_tree()
@@ -647,30 +647,30 @@ static void read_vartx_tree(Dav1dTileCon
             }
             t->bx -= x;
             t->by += ytx->h;
         }
         t->by -= y;
         if (DEBUG_BLOCK_INFO)
             printf("Post-vartxtree[%x/%x]: r=%d\n",
                    b->tx_split[0], b->tx_split[1], t->ts->msac.rng);
-        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
     }
 }
 
 static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f,
                                             const int by, const int bx,
                                             const int w4, int h4,
                                             const uint8_t *ref_seg_map,
                                             const ptrdiff_t stride)
 {
     unsigned seg_id = 8;
 
-    assert(f->frame_hdr.primary_ref_frame != PRIMARY_REF_NONE);
-    if (dav1d_thread_picture_wait(&f->refp[f->frame_hdr.primary_ref_frame],
+    assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
+    if (dav1d_thread_picture_wait(&f->refp[f->frame_hdr->primary_ref_frame],
                                   (by + h4) * 4, PLANE_TYPE_BLOCK))
     {
         return 8;
     }
 
     ref_seg_map += by * stride + bx;
     do {
         for (int x = 0; x < w4; x++)
@@ -689,25 +689,25 @@ static int decode_b(Dav1dTileContext *co
                     const enum EdgeFlags intra_edge_flags)
 {
     Dav1dTileState *const ts = t->ts;
     const Dav1dFrameContext *const f = t->f;
     Av1Block b_mem, *const b = f->frame_thread.pass ?
         &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
     const int bx4 = t->bx & 31, by4 = t->by & 31;
-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
-    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
     const int bw4 = b_dim[0], bh4 = b_dim[1];
     const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
     const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
     const int have_left = t->bx > ts->tiling.col_start;
     const int have_top = t->by > ts->tiling.row_start;
-    const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
+    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
                            (bw4 > ss_hor || t->bx & 1) &&
                            (bh4 > ss_ver || t->by & 1);
 
     if (f->frame_thread.pass == 2) {
         if (b->intra) {
             f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
 
             const enum IntraPredMode y_mode_nofilt =
@@ -722,17 +722,17 @@ static int decode_b(Dav1dTileContext *co
             if (has_chroma) {
 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
                 rep_macro(type, t->dir uvmode, off, mul * b->uv_mode)
                 case_set(cbh4, l., 1, cby4);
                 case_set(cbw4, a->, 0, cbx4);
 #undef set_ctx
             }
         } else {
-            if (f->frame_hdr.frame_type & 1 /* not intrabc */ &&
+            if (f->frame_hdr->frame_type & 1 /* not intrabc */ &&
                 b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
             {
                 uint64_t mask[2] = { 0, 0 };
                 find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
                                   have_left, have_top, b->ref[0], mask);
                 derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
             }
             if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
@@ -758,34 +758,34 @@ static int decode_b(Dav1dTileContext *co
     }
 
     const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 
     b->bl = bl;
     b->bp = bp;
     b->bs = bs;
 
-    const Av1SegmentationData *seg = NULL;
+    const Dav1dSegmentationData *seg = NULL;
 
     // segment_id (if seg_feature for skip/ref/gmv is enabled)
     int seg_pred = 0;
-    if (f->frame_hdr.segmentation.enabled) {
-        if (!f->frame_hdr.segmentation.update_map) {
+    if (f->frame_hdr->segmentation.enabled) {
+        if (!f->frame_hdr->segmentation.update_map) {
             if (f->prev_segmap) {
                 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
                                                        f->prev_segmap,
                                                        f->b4_stride);
                 if (seg_id >= 8) return -1;
                 b->seg_id = seg_id;
             } else {
                 b->seg_id = 0;
             }
-            seg = &f->frame_hdr.segmentation.seg_data.d[b->seg_id];
-        } else if (f->frame_hdr.segmentation.seg_data.preskip) {
-            if (f->frame_hdr.segmentation.temporal &&
+            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
+        } else if (f->frame_hdr->segmentation.seg_data.preskip) {
+            if (f->frame_hdr->segmentation.temporal &&
                 (seg_pred = msac_decode_bool_adapt(&ts->msac,
                                        ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
                                                           t->l.seg_pred[by4]])))
             {
                 // temporal predicted seg_id
                 if (f->prev_segmap) {
                     unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
                                                            w4, h4,
@@ -798,38 +798,38 @@ static int decode_b(Dav1dTileContext *co
                 }
             } else {
                 int seg_ctx;
                 const unsigned pred_seg_id =
                     get_cur_frame_segid(t->by, t->bx, have_top, have_left,
                                         &seg_ctx, f->cur_segmap, f->b4_stride);
                 const unsigned diff = msac_decode_symbol_adapt(&ts->msac,
                                                    ts->cdf.m.seg_id[seg_ctx],
-                                                   NUM_SEGMENTS);
+                                                   DAV1D_MAX_SEGMENTS);
                 const unsigned last_active_seg_id =
-                    f->frame_hdr.segmentation.seg_data.last_active_segid;
+                    f->frame_hdr->segmentation.seg_data.last_active_segid;
                 b->seg_id = neg_deinterleave(diff, pred_seg_id,
                                              last_active_seg_id + 1);
                 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
-                if (b->seg_id >= NUM_SEGMENTS) b->seg_id = 0; // error?
+                if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
             }
 
             if (DEBUG_BLOCK_INFO)
                 printf("Post-segid[preskip;%d]: r=%d\n",
                        b->seg_id, ts->msac.rng);
 
-            seg = &f->frame_hdr.segmentation.seg_data.d[b->seg_id];
+            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
         }
     } else {
         b->seg_id = 0;
     }
 
     // skip_mode
     if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
-        f->frame_hdr.skip_mode_enabled && imin(bw4, bh4) > 1)
+        f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
     {
         const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
         b->skip_mode = msac_decode_bool_adapt(&ts->msac,
                                               ts->cdf.m.skip_mode[smctx]);
         if (DEBUG_BLOCK_INFO)
             printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
     } else {
         b->skip_mode = 0;
@@ -841,21 +841,21 @@ static int decode_b(Dav1dTileContext *co
     } else {
         const int sctx = t->a->skip[bx4] + t->l.skip[by4];
         b->skip = msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
         if (DEBUG_BLOCK_INFO)
             printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
     }
 
     // segment_id
-    if (f->frame_hdr.segmentation.enabled &&
-        f->frame_hdr.segmentation.update_map &&
-        !f->frame_hdr.segmentation.seg_data.preskip)
+    if (f->frame_hdr->segmentation.enabled &&
+        f->frame_hdr->segmentation.update_map &&
+        !f->frame_hdr->segmentation.seg_data.preskip)
     {
-        if (!b->skip && f->frame_hdr.segmentation.temporal &&
+        if (!b->skip && f->frame_hdr->segmentation.temporal &&
             (seg_pred = msac_decode_bool_adapt(&ts->msac,
                                    ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
                                                       t->l.seg_pred[by4]])))
         {
             // temporal predicted seg_id
             if (f->prev_segmap) {
                 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
                                                        f->prev_segmap,
@@ -870,143 +870,143 @@ static int decode_b(Dav1dTileContext *co
             const unsigned pred_seg_id =
                 get_cur_frame_segid(t->by, t->bx, have_top, have_left,
                                     &seg_ctx, f->cur_segmap, f->b4_stride);
             if (b->skip) {
                 b->seg_id = pred_seg_id;
             } else {
                 const unsigned diff = msac_decode_symbol_adapt(&ts->msac,
                                                    ts->cdf.m.seg_id[seg_ctx],
-                                                   NUM_SEGMENTS);
+                                                   DAV1D_MAX_SEGMENTS);
                 const unsigned last_active_seg_id =
-                    f->frame_hdr.segmentation.seg_data.last_active_segid;
+                    f->frame_hdr->segmentation.seg_data.last_active_segid;
                 b->seg_id = neg_deinterleave(diff, pred_seg_id,
                                              last_active_seg_id + 1);
                 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
             }
-            if (b->seg_id >= NUM_SEGMENTS) b->seg_id = 0; // error?
+            if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
         }
 
-        seg = &f->frame_hdr.segmentation.seg_data.d[b->seg_id];
+        seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
 
         if (DEBUG_BLOCK_INFO)
             printf("Post-segid[postskip;%d]: r=%d\n",
                    b->seg_id, ts->msac.rng);
     }
 
     // cdef index
     if (!b->skip) {
-        const int idx = f->seq_hdr.sb128 ? ((t->bx & 16) >> 4) +
+        const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
                                            ((t->by & 16) >> 3) : 0;
         if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
-            const int v = msac_decode_bools(&ts->msac, f->frame_hdr.cdef.n_bits);
+            const int v = msac_decode_bools(&ts->msac, f->frame_hdr->cdef.n_bits);
             t->cur_sb_cdef_idx_ptr[idx] = v;
             if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
             if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
             if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
 
             if (DEBUG_BLOCK_INFO)
                 printf("Post-cdef_idx[%d]: r=%d\n",
                         *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
         }
     }
 
     // delta-q/lf
-    if