Merge graphics to m-c. a=merge
authorRyan VanderMeulen <ryanvm@gmail.com>
Tue, 30 May 2017 12:13:40 -0400
changeset 409366 c3cd8c58fa87d571874f2bcc34ec0864ac7acd5f
parent 409340 e15fb840661215dc3928c3bf1fea0ca0379511f9 (current diff)
parent 409365 5235f24cdc6bb4add5bfeeb8f7551aa355f3f9fb (diff)
child 409367 ae492e1619671ca2b5f55369b70d97c7da67dd72
push id7391
push usermtabara@mozilla.com
push dateMon, 12 Jun 2017 13:08:53 +0000
treeherdermozilla-beta@2191d7f87e2e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmerge
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Merge graphics to m-c. a=merge
--- a/dom/base/nsDOMWindowUtils.cpp
+++ b/dom/base/nsDOMWindowUtils.cpp
@@ -109,16 +109,18 @@
 #include "HTMLImageElement.h"
 #include "mozilla/css/ImageLoader.h"
 #include "mozilla/layers/APZCTreeManager.h" // for layers::ZoomToRectBehavior
 #include "mozilla/dom/Promise.h"
 #include "mozilla/StyleSheetInlines.h"
 #include "mozilla/gfx/GPUProcessManager.h"
 #include "mozilla/dom/TimeoutManager.h"
 #include "mozilla/PreloadedStyleSheet.h"
+#include "mozilla/layers/WebRenderBridgeChild.h"
+#include "mozilla/layers/WebRenderLayerManager.h"
 
 #ifdef XP_WIN
 #undef GetClassName
 #endif
 
 using namespace mozilla;
 using namespace mozilla::dom;
 using namespace mozilla::ipc;
@@ -2517,16 +2519,24 @@ nsDOMWindowUtils::SetAsyncScrollOffset(n
   nsIWidget* widget = GetWidget();
   if (!widget) {
     return NS_ERROR_FAILURE;
   }
   LayerManager* manager = widget->GetLayerManager();
   if (!manager) {
     return NS_ERROR_FAILURE;
   }
+  if (WebRenderLayerManager* wrlm = manager->AsWebRenderLayerManager()) {
+    WebRenderBridgeChild* wrbc = wrlm->WrBridge();
+    if (!wrbc) {
+      return NS_ERROR_UNEXPECTED;
+    }
+    wrbc->SendSetAsyncScrollOffset(viewId, aX, aY);
+    return NS_OK;
+  }
   ShadowLayerForwarder* forwarder = manager->AsShadowForwarder();
   if (!forwarder || !forwarder->HasShadowManager()) {
     return NS_ERROR_UNEXPECTED;
   }
   forwarder->GetShadowManager()->SendSetAsyncScrollOffset(viewId, aX, aY);
   return NS_OK;
 }
 
@@ -2544,16 +2554,24 @@ nsDOMWindowUtils::SetAsyncZoom(nsIDOMNod
   nsIWidget* widget = GetWidget();
   if (!widget) {
     return NS_ERROR_FAILURE;
   }
   LayerManager* manager = widget->GetLayerManager();
   if (!manager) {
     return NS_ERROR_FAILURE;
   }
+  if (WebRenderLayerManager* wrlm = manager->AsWebRenderLayerManager()) {
+    WebRenderBridgeChild* wrbc = wrlm->WrBridge();
+    if (!wrbc) {
+      return NS_ERROR_UNEXPECTED;
+    }
+    wrbc->SendSetAsyncZoom(viewId, aValue);
+    return NS_OK;
+  }
   ShadowLayerForwarder* forwarder = manager->AsShadowForwarder();
   if (!forwarder || !forwarder->HasShadowManager()) {
     return NS_ERROR_UNEXPECTED;
   }
   forwarder->GetShadowManager()->SendSetAsyncZoom(viewId, aValue);
   return NS_OK;
 }
 
@@ -2570,16 +2588,24 @@ nsDOMWindowUtils::FlushApzRepaints(bool*
     *aOutResult = false;
     return NS_OK;
   }
   LayerManager* manager = widget->GetLayerManager();
   if (!manager) {
     *aOutResult = false;
     return NS_OK;
   }
+  if (WebRenderLayerManager* wrlm = manager->AsWebRenderLayerManager()) {
+    WebRenderBridgeChild* wrbc = wrlm->WrBridge();
+    if (!wrbc) {
+      return NS_ERROR_UNEXPECTED;
+    }
+    wrbc->SendFlushApzRepaints();
+    return NS_OK;
+  }
   ShadowLayerForwarder* forwarder = manager->AsShadowForwarder();
   if (!forwarder || !forwarder->HasShadowManager()) {
     *aOutResult = false;
     return NS_OK;
   }
   forwarder->GetShadowManager()->SendFlushApzRepaints();
   *aOutResult = true;
   return NS_OK;
--- a/dom/ipc/ContentParent.cpp
+++ b/dom/ipc/ContentParent.cpp
@@ -5211,28 +5211,16 @@ ContentParent::RecvClassifyLocal(const U
   if (!uriClassifier) {
     return IPC_FAIL_NO_REASON(this);
   }
   *aRv = uriClassifier->ClassifyLocalWithTables(uri, aTables, *aResults);
   return IPC_OK();
 }
 
 mozilla::ipc::IPCResult
-ContentParent::RecvAllocPipelineId(RefPtr<AllocPipelineIdPromise>&& aPromise)
-{
-  GPUProcessManager* pm = GPUProcessManager::Get();
-  if (!pm) {
-    aPromise->Reject(PromiseRejectReason::HandlerRejected, __func__);
-    return IPC_OK();
-  }
-  aPromise->Resolve(wr::AsPipelineId(pm->AllocateLayerTreeId()), __func__);
-  return IPC_OK();
-}
-
-mozilla::ipc::IPCResult
 ContentParent::RecvFileCreationRequest(const nsID& aID,
                                        const nsString& aFullPath,
                                        const nsString& aType,
                                        const nsString& aName,
                                        const bool& aLastModifiedPassed,
                                        const int64_t& aLastModified,
                                        const bool& aExistenceCheck,
                                        const bool& aIsFromNsIFile)
--- a/dom/ipc/ContentParent.h
+++ b/dom/ipc/ContentParent.h
@@ -638,19 +638,16 @@ public:
   DeallocPURLClassifierParent(PURLClassifierParent* aActor) override;
 
   virtual mozilla::ipc::IPCResult
   RecvClassifyLocal(const URIParams& aURI,
                     const nsCString& aTables,
                     nsresult* aRv,
                     nsTArray<nsCString>* aResults) override;
 
-  virtual mozilla::ipc::IPCResult
-  RecvAllocPipelineId(RefPtr<AllocPipelineIdPromise>&& aPromise) override;
-
   // Use the PHangMonitor channel to ask the child to repaint a tab.
   void ForceTabPaint(TabParent* aTabParent, uint64_t aLayerObserverEpoch);
 
   // This function is called when we are about to load a document from an
   // HTTP(S), FTP or wyciwyg channel for a content process.  It is a useful
   // place to start to kick off work as early as possible in response to such
   // document loads.
   nsresult AboutToLoadHttpFtpWyciwygDocumentForChild(nsIChannel* aChannel);
--- a/dom/ipc/PContent.ipdl
+++ b/dom/ipc/PContent.ipdl
@@ -58,17 +58,16 @@ include ServiceWorkerConfiguration;
 include GraphicsMessages;
 include ProfilerTypes;
 include MemoryReportTypes;
 
 // Workaround to prevent error if PContentChild.cpp & PContentBridgeParent.cpp
 // are put into different UnifiedProtocolsXX.cpp files.
 // XXX Remove this once bug 1069073 is fixed
 include "mozilla/dom/PContentBridgeParent.h";
-include "mozilla/layers/WebRenderMessageUtils.h";
 
 using GeoPosition from "nsGeoPositionIPCSerialiser.h";
 using AlertNotificationType from "mozilla/AlertNotificationIPCSerializer.h";
 
 using struct ChromePackage from "mozilla/chrome/RegistryMessageUtils.h";
 using struct SubstitutionMapping from "mozilla/chrome/RegistryMessageUtils.h";
 using struct OverrideMapping from "mozilla/chrome/RegistryMessageUtils.h";
 using base::ChildPrivileges from "base/process_util.h";
@@ -91,17 +90,16 @@ using mozilla::OriginAttributes from "mo
 using struct mozilla::layers::TextureFactoryIdentifier from "mozilla/layers/CompositorTypes.h";
 using mozilla::layers::CompositorOptions from "mozilla/layers/CompositorOptions.h";
 using struct mozilla::dom::FlyWebPublishOptions from "mozilla/dom/FlyWebPublishOptionsIPCSerializer.h";
 using mozilla::Telemetry::Accumulation from "mozilla/TelemetryComms.h";
 using mozilla::Telemetry::KeyedAccumulation from "mozilla/TelemetryComms.h";
 using mozilla::Telemetry::ScalarAction from "mozilla/TelemetryComms.h";
 using mozilla::Telemetry::KeyedScalarAction from "mozilla/TelemetryComms.h";
 using mozilla::Telemetry::ChildEventData from "mozilla/TelemetryComms.h";
-using mozilla::wr::PipelineId from "mozilla/webrender/WebRenderTypes.h";
 
 union ChromeRegistryItem
 {
     ChromePackage;
     OverrideMapping;
     SubstitutionMapping;
 };
 
@@ -1099,18 +1097,16 @@ parent:
 
     sync GetA11yContentId() returns (uint32_t aContentId);
     async A11yHandlerControl(uint32_t aPid,
                              IHandlerControlHolder aHandlerControl);
 
     async AddMemoryReport(MemoryReport aReport);
     async FinishMemoryReport(uint32_t aGeneration);
 
-    async AllocPipelineId() returns (PipelineId pipelineId);
-
 both:
      async AsyncMessage(nsString aMessage, CpowEntry[] aCpows,
                         Principal aPrincipal, ClonedMessageData aData);
 
     /**
      * Notify `push-subscription-modified` observers in the parent and child.
      */
     async NotifyPushSubscriptionModifiedObservers(nsCString scope,
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -74,9 +74,9 @@ there is another crate in m-c called moz
 the same folder to store its rust dependencies. If one of the libraries that is
 required by both mozjs_sys and webrender is updated without updating the other
 project's Cargo.lock file, that results in build bustage.
 This means that any time you do this sort of manual update of packages, you need
 to make sure that mozjs_sys also has its Cargo.lock file updated if needed, hence
 the need to run the cargo update command in js/src as well. Hopefully this will
 be resolved soon.
 
-Latest Commit: 102603520d52f335f152ab74b6bcfdae061b6bc8
+Latest Commit: 76a3213080ca5c2e2a612c3023c50c81a111fd55
--- a/gfx/ipc/GPUProcessManager.cpp
+++ b/gfx/ipc/GPUProcessManager.cpp
@@ -76,25 +76,27 @@ GPUProcessManager::Initialize()
 void
 GPUProcessManager::Shutdown()
 {
   sSingleton = nullptr;
 }
 
 GPUProcessManager::GPUProcessManager()
  : mTaskFactory(this),
-   mNextLayerTreeId(0),
    mNextNamespace(0),
+   mIdNamespace(0),
+   mResourceId(0),
    mNumProcessAttempts(0),
    mDeviceResetCount(0),
    mProcess(nullptr),
    mGPUChild(nullptr)
 {
   MOZ_COUNT_CTOR(GPUProcessManager);
 
+  mIdNamespace = AllocateNamespace();
   mObserver = new Observer(this);
   nsContentUtils::RegisterShutdownObserver(mObserver);
 
   mDeviceResetLastTime = TimeStamp::Now();
 
   LayerTreeOwnerTracker::Initialize();
 }
 
@@ -883,18 +885,30 @@ bool
 GPUProcessManager::IsLayerTreeIdMapped(uint64_t aLayersId, base::ProcessId aRequestingId)
 {
   return LayerTreeOwnerTracker::Get()->IsMapped(aLayersId, aRequestingId);
 }
 
 uint64_t
 GPUProcessManager::AllocateLayerTreeId()
 {
+  // Allocate tree id by using id namespace.
+  // By it, tree id does not conflict with external image id and
+  // async image pipeline id.
   MOZ_ASSERT(NS_IsMainThread());
-  return ++mNextLayerTreeId;
+  ++mResourceId;
+  if (mResourceId == UINT32_MAX) {
+    // Move to next id namespace.
+    mIdNamespace = AllocateNamespace();
+    mResourceId = 1;
+  }
+
+  uint64_t layerTreeId = mIdNamespace;
+  layerTreeId = (layerTreeId << 32) | mResourceId;
+  return layerTreeId;
 }
 
 uint32_t
 GPUProcessManager::AllocateNamespace()
 {
   MOZ_ASSERT(NS_IsMainThread());
   return ++mNextNamespace;
 }
--- a/gfx/ipc/GPUProcessManager.h
+++ b/gfx/ipc/GPUProcessManager.h
@@ -239,18 +239,19 @@ private:
   friend class Observer;
 
 private:
   bool mDecodeVideoOnGpuProcess = true;
 
   RefPtr<Observer> mObserver;
   ipc::TaskFactory<GPUProcessManager> mTaskFactory;
   RefPtr<VsyncIOThreadHolder> mVsyncIOThread;
-  uint64_t mNextLayerTreeId;
   uint32_t mNextNamespace;
+  uint32_t mIdNamespace;
+  uint32_t mResourceId;
   uint32_t mNumProcessAttempts;
 
   nsTArray<RefPtr<RemoteCompositorSession>> mRemoteSessions;
   nsTArray<GPUProcessListener*> mListeners;
 
   uint32_t mDeviceResetCount;
   TimeStamp mDeviceResetLastTime;
 
--- a/gfx/layers/LayersLogging.cpp
+++ b/gfx/layers/LayersLogging.cpp
@@ -6,18 +6,18 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "LayersLogging.h"
 #include <stdint.h>                     // for uint8_t
 #include "ImageTypes.h"                 // for ImageFormat
 #include "mozilla/gfx/Matrix.h"         // for Matrix4x4, Matrix
 #include "mozilla/gfx/Point.h"          // for IntSize
 #include "nsDebug.h"                    // for NS_ERROR
-#include "nsPoint.h"                    // for nsIntPoint
-#include "nsRect.h"                     // for mozilla::gfx::IntRect
+#include "nsPoint.h"                    // for nsPoint
+#include "nsRect.h"                     // for nsRect
 #include "base/basictypes.h"
 
 using namespace mozilla::gfx;
 
 namespace mozilla {
 namespace layers {
 
 void
@@ -65,36 +65,49 @@ AppendToString(std::stringstream& aStrea
   aStream << pfx;
   aStream << nsPrintfCString(
     "(x=%d, y=%d, w=%d, h=%d)",
     r.x, r.y, r.width, r.height).get();
   aStream << sfx;
 }
 
 void
-AppendToString(std::stringstream& aStream, const nsIntPoint& p,
+AppendToString(std::stringstream& aStream, const WrColor& c,
                const char* pfx, const char* sfx)
 {
   aStream << pfx;
-  aStream << nsPrintfCString("(x=%d, y=%d)", p.x, p.y).get();
+  aStream << nsPrintfCString(
+    "rgba(%d, %d, %d, %f)",
+    uint8_t(c.r*255.f), uint8_t(c.g*255.f), uint8_t(c.b*255.f), c.a).get();
   aStream << sfx;
 }
 
 void
-AppendToString(std::stringstream& aStream, const IntRect& r,
+AppendToString(std::stringstream& aStream, const WrRect& r,
                const char* pfx, const char* sfx)
 {
   aStream << pfx;
   aStream << nsPrintfCString(
-    "(x=%d, y=%d, w=%d, h=%d)",
+    "(x=%f, y=%f, w=%f, h=%f)",
     r.x, r.y, r.width, r.height).get();
   aStream << sfx;
 }
 
 void
+AppendToString(std::stringstream& aStream, const WrSize& s,
+               const char* pfx, const char* sfx)
+{
+  aStream << pfx;
+  aStream << nsPrintfCString(
+    "(w=%f, h=%f)",
+    s.width, s.height).get();
+  aStream << sfx;
+}
+
+void
 AppendToString(std::stringstream& aStream, const nsRegion& r,
                const char* pfx, const char* sfx)
 {
   aStream << pfx;
 
   aStream << "< ";
   for (auto iter = r.RectIter(); !iter.Done(); iter.Next()) {
     AppendToString(aStream, iter.Get());
--- a/gfx/layers/LayersLogging.h
+++ b/gfx/layers/LayersLogging.h
@@ -13,16 +13,20 @@
 #include "mozilla/gfx/Types.h"          // for SamplingFilter, SurfaceFormat
 #include "mozilla/layers/CompositorTypes.h"  // for TextureFlags
 #include "mozilla/layers/WebRenderLayersLogging.h"
 #include "nsAString.h"
 #include "nsPrintfCString.h"            // for nsPrintfCString
 #include "nsRegion.h"                   // for nsRegion, nsIntRegion
 #include "nscore.h"                     // for nsACString, etc
 
+struct WrColor;
+struct WrRect;
+struct WrSize;
+
 namespace mozilla {
 namespace gfx {
 template <class units, class F> struct RectTyped;
 } // namespace gfx
 
 enum class ImageFormat;
 
 namespace layers {
@@ -83,16 +87,28 @@ AppendToString(std::stringstream& aStrea
   aStream << pfx;
   aStream << nsPrintfCString(
     "(x=%d, y=%d, w=%d, h=%d)",
     r.x, r.y, r.width, r.height).get();
   aStream << sfx;
 }
 
 void
+AppendToString(std::stringstream& aStream, const WrColor& c,
+               const char* pfx="", const char* sfx="");
+
+void
+AppendToString(std::stringstream& aStream, const WrRect& r,
+               const char* pfx="", const char* sfx="");
+
+void
+AppendToString(std::stringstream& aStream, const WrSize& s,
+               const char* pfx="", const char* sfx="");
+
+void
 AppendToString(std::stringstream& aStream, const nsRegion& r,
                const char* pfx="", const char* sfx="");
 
 void
 AppendToString(std::stringstream& aStream, const nsIntRegion& r,
                const char* pfx="", const char* sfx="");
 
 template <typename units>
--- a/gfx/layers/apz/src/APZCTreeManager.cpp
+++ b/gfx/layers/apz/src/APZCTreeManager.cpp
@@ -1799,16 +1799,35 @@ APZCTreeManager::GetTargetAPZC(const Scr
 {
   MutexAutoLock lock(mTreeLock);
   RefPtr<HitTestingTreeNode> node = GetTargetNode(aGuid, nullptr);
   MOZ_ASSERT(!node || node->GetApzc()); // any node returned must have an APZC
   RefPtr<AsyncPanZoomController> apzc = node ? node->GetApzc() : nullptr;
   return apzc.forget();
 }
 
+static bool
+GuidComparatorIgnoringPresShell(const ScrollableLayerGuid& aOne, const ScrollableLayerGuid& aTwo)
+{
+  return aOne.mLayersId == aTwo.mLayersId
+      && aOne.mScrollId == aTwo.mScrollId;
+}
+
+already_AddRefed<AsyncPanZoomController>
+APZCTreeManager::GetTargetAPZC(const uint64_t& aLayersId,
+                               const FrameMetrics::ViewID& aScrollId)
+{
+  MutexAutoLock lock(mTreeLock);
+  ScrollableLayerGuid guid(aLayersId, 0, aScrollId);
+  RefPtr<HitTestingTreeNode> node = GetTargetNode(guid, &GuidComparatorIgnoringPresShell);
+  MOZ_ASSERT(!node || node->GetApzc()); // any node returned must have an APZC
+  RefPtr<AsyncPanZoomController> apzc = node ? node->GetApzc() : nullptr;
+  return apzc.forget();
+}
+
 already_AddRefed<HitTestingTreeNode>
 APZCTreeManager::GetTargetNode(const ScrollableLayerGuid& aGuid,
                                GuidComparator aComparator) const
 {
   mTreeLock.AssertCurrentThreadOwns();
   RefPtr<HitTestingTreeNode> target = DepthFirstSearchPostOrder<ReverseIterator>(mRootNode.get(),
       [&aGuid, &aComparator](HitTestingTreeNode* node)
       {
@@ -1839,23 +1858,16 @@ APZCTreeManager::GetTargetAPZC(const Scr
       &hitResult, aOutHitScrollbar);
 
   if (aOutHitResult) {
     *aOutHitResult = hitResult;
   }
   return target.forget();
 }
 
-static bool
-GuidComparatorIgnoringPresShell(const ScrollableLayerGuid& aOne, const ScrollableLayerGuid& aTwo)
-{
-  return aOne.mLayersId == aTwo.mLayersId
-      && aOne.mScrollId == aTwo.mScrollId;
-}
-
 RefPtr<const OverscrollHandoffChain>
 APZCTreeManager::BuildOverscrollHandoffChain(const RefPtr<AsyncPanZoomController>& aInitialTarget)
 {
   // Scroll grabbing is a mechanism that allows content to specify that
   // the initial target of a pan should be not the innermost scrollable
   // frame at the touch point (which is what GetTargetAPZC finds), but
   // something higher up in the tree.
   // It's not sufficient to just find the initial target, however, as
--- a/gfx/layers/apz/src/APZCTreeManager.h
+++ b/gfx/layers/apz/src/APZCTreeManager.h
@@ -455,16 +455,18 @@ public:
      pointer to it. This allows caller code to just use the target APZC without worrying
      about it going away. These are public for testing code and generally should not be
      used by other production code.
   */
   RefPtr<HitTestingTreeNode> GetRootNode() const;
   already_AddRefed<AsyncPanZoomController> GetTargetAPZC(const ScreenPoint& aPoint,
                                                          HitTestResult* aOutHitResult,
                                                          HitTestingTreeNode** aOutScrollbarNode = nullptr);
+  already_AddRefed<AsyncPanZoomController> GetTargetAPZC(const uint64_t& aLayersId,
+                                                         const FrameMetrics::ViewID& aScrollId);
   ScreenToParentLayerMatrix4x4 GetScreenToApzcTransform(const AsyncPanZoomController *aApzc) const;
   ParentLayerToScreenMatrix4x4 GetApzcToGeckoTransform(const AsyncPanZoomController *aApzc) const;
 
   /**
    * Process touch velocity.
    * Sometimes the touch move event will have a velocity even though no scrolling
    * is occurring such as when the toolbar is being hidden/shown in Fennec.
    * This function can be called to have the y axis' velocity queue updated.
--- a/gfx/layers/apz/util/APZCCallbackHelper.cpp
+++ b/gfx/layers/apz/util/APZCCallbackHelper.cpp
@@ -9,16 +9,18 @@
 #include "gfxPlatform.h" // For gfxPlatform::UseTiling
 #include "gfxPrefs.h"
 #include "LayersLogging.h"  // For Stringify
 #include "mozilla/dom/Element.h"
 #include "mozilla/dom/TabParent.h"
 #include "mozilla/IntegerPrintfMacros.h"
 #include "mozilla/layers/LayerTransactionChild.h"
 #include "mozilla/layers/ShadowLayers.h"
+#include "mozilla/layers/WebRenderLayerManager.h"
+#include "mozilla/layers/WebRenderBridgeChild.h"
 #include "mozilla/TouchEvents.h"
 #include "nsContentUtils.h"
 #include "nsContainerFrame.h"
 #include "nsIScrollableFrame.h"
 #include "nsLayoutUtils.h"
 #include "nsIInterfaceRequestorUtils.h"
 #include "nsIContent.h"
 #include "nsIDocument.h"
@@ -698,16 +700,23 @@ static void
 SendLayersDependentApzcTargetConfirmation(nsIPresShell* aShell, uint64_t aInputBlockId,
                                           const nsTArray<ScrollableLayerGuid>& aTargets)
 {
   LayerManager* lm = aShell->GetLayerManager();
   if (!lm) {
     return;
   }
 
+  if (WebRenderLayerManager* wrlm = lm->AsWebRenderLayerManager()) {
+    if (WebRenderBridgeChild* wrbc = wrlm->WrBridge()) {
+      wrbc->SendSetConfirmedTargetAPZC(aInputBlockId, aTargets);
+    }
+    return;
+  }
+
   LayerTransactionChild* shadow = lm->AsShadowForwarder()->GetShadowManager();
   if (!shadow) {
     return;
   }
 
   shadow->SendSetConfirmedTargetAPZC(aInputBlockId, aTargets);
 }
 
--- a/gfx/layers/ipc/CompositorBridgeChild.cpp
+++ b/gfx/layers/ipc/CompositorBridgeChild.cpp
@@ -74,24 +74,25 @@ static void ShmemAllocated(CompositorBri
 }
 
 static StaticRefPtr<CompositorBridgeChild> sCompositorBridge;
 
 Atomic<int32_t> KnowsCompositor::sSerialCounter(0);
 
 CompositorBridgeChild::CompositorBridgeChild(LayerManager *aLayerManager, uint32_t aNamespace)
   : mLayerManager(aLayerManager)
-  , mNamespace(aNamespace)
+  , mIdNamespace(aNamespace)
+  , mResourceId(0)
   , mCanSend(false)
   , mFwdTransactionId(0)
   , mDeviceResetSequenceNumber(0)
   , mMessageLoop(MessageLoop::current())
   , mSectionAllocator(nullptr)
 {
-  MOZ_ASSERT(mNamespace);
+  MOZ_ASSERT(mIdNamespace);
   MOZ_ASSERT(NS_IsMainThread());
 }
 
 CompositorBridgeChild::~CompositorBridgeChild()
 {
   if (mCanSend) {
     gfxCriticalError() << "CompositorBridgeChild was not deinitialized";
   }
@@ -1164,23 +1165,35 @@ CompositorBridgeChild::AllocPWebRenderBr
 bool
 CompositorBridgeChild::DeallocPWebRenderBridgeChild(PWebRenderBridgeChild* aActor)
 {
   WebRenderBridgeChild* child = static_cast<WebRenderBridgeChild*>(aActor);
   child->ReleaseIPDLReference();
   return true;
 }
 
+uint64_t
+CompositorBridgeChild::GetNextResourceId()
+{
+  ++mResourceId;
+  MOZ_RELEASE_ASSERT(mResourceId != UINT32_MAX);
+
+  uint64_t id = mIdNamespace;
+  id = (id << 32) | mResourceId;
+
+  return id;
+}
+
 wr::MaybeExternalImageId
 CompositorBridgeChild::GetNextExternalImageId()
 {
-  static uint32_t sNextID = 1;
-  ++sNextID;
-  MOZ_RELEASE_ASSERT(sNextID != UINT32_MAX);
+  return Some(wr::ToExternalImageId(GetNextResourceId()));
+}
 
-  uint64_t imageId = mNamespace;
-  imageId = (imageId << 32) | sNextID;
-  return Some(wr::ToExternalImageId(imageId));
+wr::PipelineId
+CompositorBridgeChild::GetNextPipelineId()
+{
+  return wr::AsPipelineId(GetNextResourceId());
 }
 
 } // namespace layers
 } // namespace mozilla
 
--- a/gfx/layers/ipc/CompositorBridgeChild.h
+++ b/gfx/layers/ipc/CompositorBridgeChild.h
@@ -230,16 +230,18 @@ public:
   bool DeallocPWebRenderBridgeChild(PWebRenderBridgeChild* aActor) override;
 
   uint64_t DeviceResetSequenceNumber() const {
     return mDeviceResetSequenceNumber;
   }
 
   wr::MaybeExternalImageId GetNextExternalImageId() override;
 
+  wr::PipelineId GetNextPipelineId();
+
 private:
   // Private destructor, to discourage deletion outside of Release():
   virtual ~CompositorBridgeChild();
 
   void InitIPDL();
   void DeallocPCompositorBridgeChild() override;
 
   virtual PLayerTransactionChild*
@@ -263,16 +265,18 @@ private:
 
   mozilla::ipc::IPCResult RecvObserveLayerUpdate(const uint64_t& aLayersId,
                                                  const uint64_t& aEpoch,
                                                  const bool& aActive) override;
 
   already_AddRefed<nsIEventTarget>
   GetSpecificMessageEventTarget(const Message& aMsg) override;
 
+  uint64_t GetNextResourceId();
+
   // Class used to store the shared FrameMetrics, mutex, and APZCId  in a hash table
   class SharedFrameMetricsData {
   public:
     SharedFrameMetricsData(
         const mozilla::ipc::SharedMemoryBasic::Handle& metrics,
         const CrossProcessMutexHandle& handle,
         const uint64_t& aLayersId,
         const uint32_t& aAPZCId);
@@ -291,17 +295,18 @@ private:
     CrossProcessMutex* mMutex;
     uint64_t mLayersId;
     // Unique ID of the APZC that is sharing the FrameMetrics
     uint32_t mAPZCId;
   };
 
   RefPtr<LayerManager> mLayerManager;
 
-  uint32_t mNamespace;
+  uint32_t mIdNamespace;
+  uint32_t mResourceId;
 
   // When not multi-process, hold a reference to the CompositorBridgeParent to keep it
   // alive. This reference should be null in multi-process.
   RefPtr<CompositorBridgeParent> mCompositorBridgeParent;
 
   // The ViewID of the FrameMetrics is used as the key for this hash table.
   // While this should be safe to use since the ViewID is unique
   nsClassHashtable<nsUint64HashKey, SharedFrameMetricsData> mFrameMetricsTable;
--- a/gfx/layers/ipc/CompositorBridgeParent.cpp
+++ b/gfx/layers/ipc/CompositorBridgeParent.cpp
@@ -1303,20 +1303,20 @@ CompositorBridgeParent::GetAnimationStor
 mozilla::ipc::IPCResult
 CompositorBridgeParent::RecvGetFrameUniformity(FrameUniformityData* aOutData)
 {
   mCompositionManager->GetFrameUniformity(aOutData);
   return IPC_OK();
 }
 
 void
-CompositorBridgeParent::FlushApzRepaints(const LayerTransactionParent* aLayerTree)
+CompositorBridgeParent::FlushApzRepaints(const uint64_t& aLayersId)
 {
   MOZ_ASSERT(mApzcTreeManager);
-  uint64_t layersId = aLayerTree->GetId();
+  uint64_t layersId = aLayersId;
   if (layersId == 0) {
     // The request is coming from the parent-process layer tree, so we should
     // use the compositor's root layer tree id.
     layersId = mRootLayerTreeID;
   }
   RefPtr<CompositorBridgeParent> self = this;
   APZThreadUtils::RunOnControllerThread(NS_NewRunnableFunction([=] () {
     self->mApzcTreeManager->FlushApzRepaints(layersId);
@@ -1327,32 +1327,31 @@ void
 CompositorBridgeParent::GetAPZTestData(const LayerTransactionParent* aLayerTree,
                                        APZTestData* aOutData)
 {
   MonitorAutoLock lock(*sIndirectLayerTreesLock);
   *aOutData = sIndirectLayerTrees[mRootLayerTreeID].mApzTestData;
 }
 
 void
-CompositorBridgeParent::SetConfirmedTargetAPZC(const LayerTransactionParent* aLayerTree,
-                                         const uint64_t& aInputBlockId,
-                                         const nsTArray<ScrollableLayerGuid>& aTargets)
+CompositorBridgeParent::SetConfirmedTargetAPZC(const uint64_t& aLayersId,
+                                               const uint64_t& aInputBlockId,
+                                               const nsTArray<ScrollableLayerGuid>& aTargets)
 {
   if (!mApzcTreeManager) {
     return;
   }
   // Need to specifically bind this since it's overloaded.
   void (APZCTreeManager::*setTargetApzcFunc)
         (uint64_t, const nsTArray<ScrollableLayerGuid>&) =
         &APZCTreeManager::SetTargetAPZC;
   RefPtr<Runnable> task = NewRunnableMethod
         <uint64_t, StoreCopyPassByConstLRef<nsTArray<ScrollableLayerGuid>>>
         (mApzcTreeManager.get(), setTargetApzcFunc, aInputBlockId, aTargets);
   APZThreadUtils::RunOnControllerThread(task.forget());
-
 }
 
 void
 CompositorBridgeParent::InitializeLayerManager(const nsTArray<LayersBackend>& aBackendHints)
 {
   NS_ASSERTION(!mLayerManager, "Already initialised mLayerManager");
   NS_ASSERTION(!mCompositor,   "Already initialised mCompositor");
 
--- a/gfx/layers/ipc/CompositorBridgeParent.h
+++ b/gfx/layers/ipc/CompositorBridgeParent.h
@@ -103,20 +103,20 @@ public:
   virtual void NotifyClearCachedResources(LayerTransactionParent* aLayerTree) { }
 
   virtual void ForceComposite(LayerTransactionParent* aLayerTree) { }
   virtual bool SetTestSampleTime(LayerTransactionParent* aLayerTree,
                                  const TimeStamp& aTime) { return true; }
   virtual void LeaveTestMode(LayerTransactionParent* aLayerTree) { }
   virtual void ApplyAsyncProperties(LayerTransactionParent* aLayerTree) = 0;
   virtual CompositorAnimationStorage* GetAnimationStorage(const uint64_t& aId) { return nullptr; }
-  virtual void FlushApzRepaints(const LayerTransactionParent* aLayerTree) = 0;
+  virtual void FlushApzRepaints(const uint64_t& aLayersId) = 0;
   virtual void GetAPZTestData(const LayerTransactionParent* aLayerTree,
                               APZTestData* aOutData) { }
-  virtual void SetConfirmedTargetAPZC(const LayerTransactionParent* aLayerTree,
+  virtual void SetConfirmedTargetAPZC(const uint64_t& aLayersId,
                                       const uint64_t& aInputBlockId,
                                       const nsTArray<ScrollableLayerGuid>& aTargets) = 0;
   virtual void UpdatePaintTime(LayerTransactionParent* aLayerTree, const TimeDuration& aPaintTime) {}
 
   virtual ShmemAllocator* AsShmemAllocator() override { return this; }
 
   virtual CompositorBridgeParentBase* AsCompositorBridgeParentBase() override { return this; }
 
@@ -226,20 +226,20 @@ public:
                                    bool aHitTestUpdate) override;
   virtual void ForceComposite(LayerTransactionParent* aLayerTree) override;
   virtual bool SetTestSampleTime(LayerTransactionParent* aLayerTree,
                                  const TimeStamp& aTime) override;
   virtual void LeaveTestMode(LayerTransactionParent* aLayerTree) override;
   virtual void ApplyAsyncProperties(LayerTransactionParent* aLayerTree)
                override;
   virtual CompositorAnimationStorage* GetAnimationStorage(const uint64_t& aId) override;
-  virtual void FlushApzRepaints(const LayerTransactionParent* aLayerTree) override;
+  virtual void FlushApzRepaints(const uint64_t& aLayersId) override;
   virtual void GetAPZTestData(const LayerTransactionParent* aLayerTree,
                               APZTestData* aOutData) override;
-  virtual void SetConfirmedTargetAPZC(const LayerTransactionParent* aLayerTree,
+  virtual void SetConfirmedTargetAPZC(const uint64_t& aLayersId,
                                       const uint64_t& aInputBlockId,
                                       const nsTArray<ScrollableLayerGuid>& aTargets) override;
   virtual AsyncCompositionManager* GetCompositionManager(LayerTransactionParent* aLayerTree) override { return mCompositionManager; }
 
   virtual PTextureParent* AllocPTextureParent(const SurfaceDescriptor& aSharedData,
                                               const LayersBackend& aLayersBackend,
                                               const TextureFlags& aFlags,
                                               const uint64_t& aId,
--- a/gfx/layers/ipc/CrossProcessCompositorBridgeParent.cpp
+++ b/gfx/layers/ipc/CrossProcessCompositorBridgeParent.cpp
@@ -417,56 +417,54 @@ CrossProcessCompositorBridgeParent::GetA
   }
 
   MOZ_ASSERT(state->mParent);
   // GetAnimationStorage in CompositorBridgeParent expects id as 0
   return state->mParent->GetAnimationStorage(0);
 }
 
 void
-CrossProcessCompositorBridgeParent::FlushApzRepaints(const LayerTransactionParent* aLayerTree)
+CrossProcessCompositorBridgeParent::FlushApzRepaints(const uint64_t& aLayersId)
 {
-  uint64_t id = aLayerTree->GetId();
-  MOZ_ASSERT(id != 0);
+  MOZ_ASSERT(aLayersId != 0);
   const CompositorBridgeParent::LayerTreeState* state =
-    CompositorBridgeParent::GetIndirectShadowTree(id);
+    CompositorBridgeParent::GetIndirectShadowTree(aLayersId);
   if (!state) {
     return;
   }
 
   MOZ_ASSERT(state->mParent);
-  state->mParent->FlushApzRepaints(aLayerTree);
+  state->mParent->FlushApzRepaints(aLayersId);
 }
 
 void
 CrossProcessCompositorBridgeParent::GetAPZTestData(
   const LayerTransactionParent* aLayerTree,
   APZTestData* aOutData)
 {
   uint64_t id = aLayerTree->GetId();
   MOZ_ASSERT(id != 0);
   MonitorAutoLock lock(*sIndirectLayerTreesLock);
   *aOutData = sIndirectLayerTrees[id].mApzTestData;
 }
 
 void
 CrossProcessCompositorBridgeParent::SetConfirmedTargetAPZC(
-  const LayerTransactionParent* aLayerTree,
+  const uint64_t& aLayersId,
   const uint64_t& aInputBlockId,
   const nsTArray<ScrollableLayerGuid>& aTargets)
 {
-  uint64_t id = aLayerTree->GetId();
-  MOZ_ASSERT(id != 0);
+  MOZ_ASSERT(aLayersId != 0);
   const CompositorBridgeParent::LayerTreeState* state =
-    CompositorBridgeParent::GetIndirectShadowTree(id);
+    CompositorBridgeParent::GetIndirectShadowTree(aLayersId);
   if (!state || !state->mParent) {
     return;
   }
 
-  state->mParent->SetConfirmedTargetAPZC(aLayerTree, aInputBlockId, aTargets);
+  state->mParent->SetConfirmedTargetAPZC(aLayersId, aInputBlockId, aTargets);
 }
 
 AsyncCompositionManager*
 CrossProcessCompositorBridgeParent::GetCompositionManager(LayerTransactionParent* aLayerTree)
 {
   uint64_t id = aLayerTree->GetId();
   const CompositorBridgeParent::LayerTreeState* state =
     CompositorBridgeParent::GetIndirectShadowTree(id);
--- a/gfx/layers/ipc/CrossProcessCompositorBridgeParent.h
+++ b/gfx/layers/ipc/CrossProcessCompositorBridgeParent.h
@@ -99,20 +99,20 @@ public:
   virtual void NotifyClearCachedResources(LayerTransactionParent* aLayerTree) override;
   virtual bool SetTestSampleTime(LayerTransactionParent* aLayerTree,
                                  const TimeStamp& aTime) override;
   virtual void LeaveTestMode(LayerTransactionParent* aLayerTree) override;
   virtual void ApplyAsyncProperties(LayerTransactionParent* aLayerTree)
                override;
   virtual CompositorAnimationStorage*
     GetAnimationStorage(const uint64_t& aId) override;
-  virtual void FlushApzRepaints(const LayerTransactionParent* aLayerTree) override;
+  virtual void FlushApzRepaints(const uint64_t& aLayersId) override;
   virtual void GetAPZTestData(const LayerTransactionParent* aLayerTree,
                               APZTestData* aOutData) override;
-  virtual void SetConfirmedTargetAPZC(const LayerTransactionParent* aLayerTree,
+  virtual void SetConfirmedTargetAPZC(const uint64_t& aLayersId,
                                       const uint64_t& aInputBlockId,
                                       const nsTArray<ScrollableLayerGuid>& aTargets) override;
 
   virtual AsyncCompositionManager* GetCompositionManager(LayerTransactionParent* aParent) override;
   virtual mozilla::ipc::IPCResult RecvRemotePluginsReady()  override { return IPC_FAIL_NO_REASON(this); }
 
   void DidComposite(uint64_t aId,
                     TimeStamp& aCompositeStart,
--- a/gfx/layers/ipc/LayerTransactionParent.cpp
+++ b/gfx/layers/ipc/LayerTransactionParent.cpp
@@ -839,17 +839,17 @@ LayerTransactionParent::RecvSetAsyncZoom
   }
   controller->SetTestAsyncZoom(LayerToParentLayerScale(aValue));
   return IPC_OK();
 }
 
 mozilla::ipc::IPCResult
 LayerTransactionParent::RecvFlushApzRepaints()
 {
-  mCompositorBridge->FlushApzRepaints(this);
+  mCompositorBridge->FlushApzRepaints(GetId());
   return IPC_OK();
 }
 
 mozilla::ipc::IPCResult
 LayerTransactionParent::RecvGetAPZTestData(APZTestData* aOutData)
 {
   mCompositorBridge->GetAPZTestData(this, aOutData);
   return IPC_OK();
@@ -861,17 +861,17 @@ LayerTransactionParent::RecvRequestPrope
   *aValue = -1;
   return IPC_OK();
 }
 
 mozilla::ipc::IPCResult
 LayerTransactionParent::RecvSetConfirmedTargetAPZC(const uint64_t& aBlockId,
                                                    nsTArray<ScrollableLayerGuid>&& aTargets)
 {
-  mCompositorBridge->SetConfirmedTargetAPZC(this, aBlockId, aTargets);
+  mCompositorBridge->SetConfirmedTargetAPZC(GetId(), aBlockId, aTargets);
   return IPC_OK();
 }
 
 bool
 LayerTransactionParent::Attach(Layer* aLayer,
                                CompositableHost* aCompositable,
                                bool aIsAsync)
 {
--- a/gfx/layers/ipc/PLayerTransaction.ipdl
+++ b/gfx/layers/ipc/PLayerTransaction.ipdl
@@ -66,16 +66,20 @@ parent:
 
   // Create a new Compositable.
   async NewCompositable(CompositableHandle handle, TextureInfo info);
 
   // Release an object that is no longer in use.
   async ReleaseLayer(LayerHandle layer);
   async ReleaseCompositable(CompositableHandle compositable);
 
+  // Tell the compositor to notify APZ that a layer has been confirmed for an
+  // input event.
+  async SetConfirmedTargetAPZC(uint64_t aInputBlockId, ScrollableLayerGuid[] aTargets);
+
   // Testing APIs
 
   // Enter test mode, set the sample time to sampleTime, and resample
   // animations. sampleTime must not be null.
   sync SetTestSampleTime(TimeStamp sampleTime);
   // Leave test mode and resume normal compositing
   sync LeaveTestMode();
 
@@ -117,20 +121,16 @@ parent:
   sync GetAPZTestData() returns (APZTestData data);
 
   // Query a named property from the last frame
   sync RequestProperty(nsString property) returns (float value);
 
   // Return the TextureFactoryIdentifier for this compositor.
   sync GetTextureFactoryIdentifier() returns (TextureFactoryIdentifier aIdentifier);
 
-  // Tell the compositor to notify APZ that a layer has been confirmed for an
-  // input event.
-  async SetConfirmedTargetAPZC(uint64_t aInputBlockId, ScrollableLayerGuid[] aTargets);
-
   async RecordPaintTimes(PaintTiming timing);
 
   async Shutdown();
   sync ShutdownSync();
 
 child:
   async __delete__();
 };
--- a/gfx/layers/ipc/PWebRenderBridge.ipdl
+++ b/gfx/layers/ipc/PWebRenderBridge.ipdl
@@ -9,16 +9,17 @@ include LayersSurfaces;
 include LayersMessages;
 include "mozilla/GfxMessageUtils.h";
 include "mozilla/layers/WebRenderMessageUtils.h";
 
 include WebRenderMessages;
 include protocol PCompositorBridge;
 include protocol PTexture;
 
+using struct mozilla::layers::ScrollableLayerGuid from "FrameMetrics.h";
 using struct mozilla::layers::TextureInfo from "mozilla/layers/CompositorTypes.h";
 using mozilla::layers::CompositableHandle from "mozilla/layers/LayersTypes.h";
 using mozilla::wr::ByteBuffer from "mozilla/webrender/WebRenderTypes.h";
 using mozilla::wr::ExternalImageId from "mozilla/webrender/WebRenderTypes.h";
 using mozilla::wr::ImageKey from "mozilla/webrender/WebRenderTypes.h";
 using mozilla::wr::FontKey from "mozilla/webrender/WebRenderTypes.h";
 using WrBuiltDisplayListDescriptor from "mozilla/webrender/webrender_ffi.h";
 using WrSize from "mozilla/webrender/webrender_ffi.h";
@@ -62,15 +63,23 @@ parent:
   async AddExternalImageId(ExternalImageId aImageId, CompositableHandle aHandle);
   async AddExternalImageIdForCompositable(ExternalImageId aImageId, CompositableHandle aHandle);
   async RemoveExternalImageId(ExternalImageId aImageId);
   async SetLayerObserverEpoch(uint64_t layerObserverEpoch);
   async ClearCachedResources();
   // Schedule a composite if one isn't already scheduled.
   async ForceComposite();
 
+  // These correspond exactly to the equivalent APIs in PLayerTransaction -
+  // see those for documentation.
+  async SetConfirmedTargetAPZC(uint64_t aInputBlockId, ScrollableLayerGuid[] aTargets);
+  // More copied from PLayerTransaction, but these are only used for testing.
+  sync SetAsyncScrollOffset(ViewID scrollId, float x, float y);
+  sync SetAsyncZoom(ViewID scrollId, float zoom);
+  async FlushApzRepaints();
+
   async Shutdown();
 child:
   async __delete__();
 };
 
 } // layers
 } // mozilla
--- a/gfx/layers/wr/ScrollingLayersHelper.cpp
+++ b/gfx/layers/wr/ScrollingLayersHelper.cpp
@@ -27,22 +27,31 @@ ScrollingLayersHelper::ScrollingLayersHe
   }
 
   Layer* layer = mLayer->GetLayer();
   for (uint32_t i = layer->GetScrollMetadataCount(); i > 0; i--) {
     const FrameMetrics& fm = layer->GetFrameMetrics(i - 1);
     if (!fm.IsScrollable()) {
       return;
     }
-    LayoutDeviceRect contentRect = fm.GetExpandedScrollableRect()
-        * fm.GetDevPixelsPerCSSPixel();
+    LayerRect contentRect = ViewAs<LayerPixel>(
+        fm.GetExpandedScrollableRect() * fm.GetDevPixelsPerCSSPixel(),
+        PixelCastJustification::WebRenderHasUnitResolution);
     // TODO: check coordinate systems are sane here
     LayerRect clipBounds = ViewAs<LayerPixel>(
         fm.GetCompositionBounds(),
         PixelCastJustification::MovingDownToChildren);
+    // The content rect that we hand to PushScrollLayer should be relative to
+    // the same origin as the clipBounds that we hand to PushScrollLayer - that
+    // is, both of them should be relative to the stacking context `aStackingContext`.
+    // However, when we get the scrollable rect from the FrameMetrics, it has
+    // a nominal top-left of 0,0 (maybe different for RTL pages?) and so to
+    // get it in the same coordinate space we're going to shift it by the
+    // composition bounds top-left.
+    contentRect.MoveBy(clipBounds.TopLeft());
     mBuilder->PushScrollLayer(fm.GetScrollId(),
         aStackingContext.ToRelativeWrRect(contentRect),
         aStackingContext.ToRelativeWrRect(clipBounds));
   }
 }
 
 ScrollingLayersHelper::~ScrollingLayersHelper()
 {
--- a/gfx/layers/wr/WebRenderBridgeParent.cpp
+++ b/gfx/layers/wr/WebRenderBridgeParent.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set sw=4 ts=8 et tw=80 : */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "mozilla/layers/WebRenderBridgeParent.h"
 
+#include "apz/src/AsyncPanZoomController.h"
 #include "CompositableHost.h"
 #include "gfxPrefs.h"
 #include "GLContext.h"
 #include "GLContextProvider.h"
 #include "mozilla/Range.h"
 #include "mozilla/layers/AnimationHelper.h"
 #include "mozilla/layers/APZCTreeManager.h"
 #include "mozilla/layers/Compositor.h"
@@ -270,17 +271,17 @@ WebRenderBridgeParent::RecvDeleteImage(c
 
 mozilla::ipc::IPCResult
 WebRenderBridgeParent::RecvDeleteCompositorAnimations(InfallibleTArray<uint64_t>&& aIds)
 {
   if (mDestroyed) {
     return IPC_OK();
   }
 
-  uint64_t storageId = mWidget ? 0 : mPipelineId.mHandle;
+  uint64_t storageId = mWidget ? 0 : GetLayersId();
   CompositorAnimationStorage* storage =
     mCompositorBridge->GetAnimationStorage(storageId);
   MOZ_ASSERT(storage);
 
   for (uint32_t i = 0; i < aIds.Length(); i++) {
     storage->ClearById(aIds[i]);
   }
 
@@ -341,19 +342,18 @@ WebRenderBridgeParent::GetRootCompositor
     // This WebRenderBridgeParent is attached to the root
     // CompositorBridgeParent.
     return static_cast<CompositorBridgeParent*>(mCompositorBridge);
   }
 
   // Otherwise, this WebRenderBridgeParent is attached to a
   // CrossProcessCompositorBridgeParent so we have an extra level of
   // indirection to unravel.
-  uint64_t layersId = wr::AsUint64(mPipelineId);
   CompositorBridgeParent::LayerTreeState* lts =
-      CompositorBridgeParent::GetIndirectShadowTree(layersId);
+      CompositorBridgeParent::GetIndirectShadowTree(GetLayersId());
   MOZ_ASSERT(lts);
   return lts->mParent;
 }
 
 void
 WebRenderBridgeParent::UpdateAPZ()
 {
   CompositorBridgeParent* cbp = GetRootCompositorBridgeParent();
@@ -362,17 +362,17 @@ WebRenderBridgeParent::UpdateAPZ()
   }
   uint64_t rootLayersId = cbp->RootLayerTreeId();
   RefPtr<WebRenderBridgeParent> rootWrbp = cbp->GetWebRenderBridgeParent();
   if (!rootWrbp) {
     return;
   }
   if (RefPtr<APZCTreeManager> apzc = cbp->GetAPZCTreeManager()) {
     apzc->UpdateHitTestingTree(rootLayersId, rootWrbp->GetScrollData(),
-        mScrollData.IsFirstPaint(), wr::AsUint64(mPipelineId),
+        mScrollData.IsFirstPaint(), GetLayersId(),
         /* TODO: propagate paint sequence number */ 0);
   }
 }
 
 bool
 WebRenderBridgeParent::PushAPZStateToWR(nsTArray<WrTransformProperty>& aTransformArray)
 {
   CompositorBridgeParent* cbp = GetRootCompositorBridgeParent();
@@ -533,17 +533,17 @@ WebRenderBridgeParent::ProcessWebRenderC
           NS_ERROR("ReceiveCompositableUpdate failed");
         }
         break;
       }
       case WebRenderParentCommand::TOpAddCompositorAnimations: {
         const OpAddCompositorAnimations& op = cmd.get_OpAddCompositorAnimations();
         CompositorAnimations data(Move(op.data()));
         if (data.animations().Length()) {
-          uint64_t id = mWidget ? 0 : wr::AsUint64(mPipelineId);
+          uint64_t id = mWidget ? 0 : GetLayersId();
           CompositorAnimationStorage* storage =
             mCompositorBridge->GetAnimationStorage(id);
           if (storage) {
             storage->SetAnimations(data.id(), data.animations());
             // Store the default opacity
             if (op.opacity().type() == OptionalOpacity::Tfloat) {
               storage->SetAnimatedValue(data.id(), op.opacity().get_float());
             }
@@ -569,17 +569,17 @@ WebRenderBridgeParent::ProcessWebRenderC
   mApi->SetRootDisplayList(gfx::Color(0.3f, 0.f, 0.f, 1.f), aEpoch, LayerSize(aSize.width, aSize.height),
                            mPipelineId, aContentSize,
                            dlDesc, dl.mData, dl.mLength);
 
   ScheduleComposition();
   DeleteOldImages();
 
   if (ShouldParentObserveEpoch()) {
-    mCompositorBridge->ObserveLayerUpdate(wr::AsUint64(mPipelineId), GetChildLayerObserverEpoch(), true);
+    mCompositorBridge->ObserveLayerUpdate(GetLayersId(), GetChildLayerObserverEpoch(), true);
   }
 }
 
 mozilla::ipc::IPCResult
 WebRenderBridgeParent::RecvDPGetSnapshot(PTextureParent* aTexture)
 {
   if (mDestroyed) {
     return IPC_OK();
@@ -707,41 +707,93 @@ WebRenderBridgeParent::RecvSetLayerObser
 {
   mChildLayerObserverEpoch = aLayerObserverEpoch;
   return IPC_OK();
 }
 
 mozilla::ipc::IPCResult
 WebRenderBridgeParent::RecvClearCachedResources()
 {
-  mCompositorBridge->ObserveLayerUpdate(wr::AsUint64(mPipelineId), GetChildLayerObserverEpoch(), false);
+  mCompositorBridge->ObserveLayerUpdate(GetLayersId(), GetChildLayerObserverEpoch(), false);
   return IPC_OK();
 }
 
 mozilla::ipc::IPCResult
 WebRenderBridgeParent::RecvForceComposite()
 {
   if (mDestroyed) {
     return IPC_OK();
   }
   ScheduleComposition();
   return IPC_OK();
 }
 
+already_AddRefed<AsyncPanZoomController>
+WebRenderBridgeParent::GetTargetAPZC(const FrameMetrics::ViewID& aScrollId)
+{
+  RefPtr<AsyncPanZoomController> apzc;
+  if (CompositorBridgeParent* cbp = GetRootCompositorBridgeParent()) {
+    if (RefPtr<APZCTreeManager> apzctm = cbp->GetAPZCTreeManager()) {
+      apzc = apzctm->GetTargetAPZC(GetLayersId(), aScrollId);
+    }
+  }
+  return apzc.forget();
+}
+
+mozilla::ipc::IPCResult
+WebRenderBridgeParent::RecvSetConfirmedTargetAPZC(const uint64_t& aBlockId,
+                                                  nsTArray<ScrollableLayerGuid>&& aTargets)
+{
+  mCompositorBridge->SetConfirmedTargetAPZC(GetLayersId(), aBlockId, aTargets);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult
+WebRenderBridgeParent::RecvSetAsyncScrollOffset(const FrameMetrics::ViewID& aScrollId,
+                                                const float& aX,
+                                                const float& aY)
+{
+  RefPtr<AsyncPanZoomController> apzc = GetTargetAPZC(aScrollId);
+  if (!apzc) {
+    return IPC_FAIL_NO_REASON(this);
+  }
+  apzc->SetTestAsyncScrollOffset(CSSPoint(aX, aY));
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult
+WebRenderBridgeParent::RecvSetAsyncZoom(const FrameMetrics::ViewID& aScrollId,
+                                        const float& aZoom)
+{
+  RefPtr<AsyncPanZoomController> apzc = GetTargetAPZC(aScrollId);
+  if (!apzc) {
+    return IPC_FAIL_NO_REASON(this);
+  }
+  apzc->SetTestAsyncZoom(LayerToParentLayerScale(aZoom));
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult
+WebRenderBridgeParent::RecvFlushApzRepaints()
+{
+  mCompositorBridge->FlushApzRepaints(GetLayersId());
+  return IPC_OK();
+}
+
 void
 WebRenderBridgeParent::ActorDestroy(ActorDestroyReason aWhy)
 {
   Destroy();
 }
 
 void
 WebRenderBridgeParent::SampleAnimations(nsTArray<WrOpacityProperty>& aOpacityArray,
                                         nsTArray<WrTransformProperty>& aTransformArray)
 {
-  uint64_t id = mWidget ? 0 : wr::AsUint64(mPipelineId);
+  uint64_t id = mWidget ? 0 : GetLayersId();
   CompositorAnimationStorage* storage =
     mCompositorBridge->GetAnimationStorage(id);
 
   AnimationHelper::SampleAnimations(storage,
                                     mCompositorScheduler->
                                       GetLastComposeTime());
 
   // return the animated data if has
@@ -857,16 +909,22 @@ WebRenderBridgeParent::FlushTransactionI
   }
   return id;
 }
 
 WebRenderBridgeParent::~WebRenderBridgeParent()
 {
 }
 
+uint64_t
+WebRenderBridgeParent::GetLayersId() const
+{
+  return wr::AsUint64(mPipelineId);
+}
+
 void
 WebRenderBridgeParent::DeleteOldImages()
 {
   for (wr::ImageKey key : mKeysToDelete) {
     mApi->DeleteImage(key);
   }
   mKeysToDelete.clear();
 }
--- a/gfx/layers/wr/WebRenderBridgeParent.h
+++ b/gfx/layers/wr/WebRenderBridgeParent.h
@@ -111,16 +111,25 @@ public:
   mozilla::ipc::IPCResult RecvAddExternalImageIdForCompositable(const ExternalImageId& aImageId,
                                                                 const CompositableHandle& aHandle) override;
   mozilla::ipc::IPCResult RecvRemoveExternalImageId(const ExternalImageId& aImageId) override;
   mozilla::ipc::IPCResult RecvSetLayerObserverEpoch(const uint64_t& aLayerObserverEpoch) override;
 
   mozilla::ipc::IPCResult RecvClearCachedResources() override;
   mozilla::ipc::IPCResult RecvForceComposite() override;
 
+  mozilla::ipc::IPCResult RecvSetConfirmedTargetAPZC(const uint64_t& aBlockId,
+                                                     nsTArray<ScrollableLayerGuid>&& aTargets) override;
+  mozilla::ipc::IPCResult RecvSetAsyncScrollOffset(const FrameMetrics::ViewID& aScrollId,
+                                                   const float& aX,
+                                                   const float& aY) override;
+  mozilla::ipc::IPCResult RecvSetAsyncZoom(const FrameMetrics::ViewID& aScrollId,
+                                           const float& aZoom) override;
+  mozilla::ipc::IPCResult RecvFlushApzRepaints() override;
+
   void ActorDestroy(ActorDestroyReason aWhy) override;
   void SetWebRenderProfilerEnabled(bool aEnabled);
 
   void Pause();
   bool Resume();
 
   void Destroy();
 
@@ -166,16 +175,17 @@ public:
 
   static uint32_t AllocIdNameSpace() {
     return ++sIdNameSpace;
   }
 
 private:
   virtual ~WebRenderBridgeParent();
 
+  uint64_t GetLayersId() const;
   void DeleteOldImages();
   void ProcessWebRenderCommands(const gfx::IntSize &aSize,
                                 InfallibleTArray<WebRenderParentCommand>& commands,
                                 const wr::Epoch& aEpoch,
                                 const WrSize& aContentSize,
                                 const ByteBuffer& dl,
                                 const WrBuiltDisplayListDescriptor& dlDesc);
   void ScheduleComposition();
@@ -198,16 +208,20 @@ private:
   CompositorBridgeParent* GetRootCompositorBridgeParent() const;
 
   // Have APZ push the async scroll state to WR. Returns true if an APZ
   // animation is in effect and we need to schedule another composition.
   // If scrollbars need their transforms updated, the provided aTransformArray
   // is populated with the property update details.
   bool PushAPZStateToWR(nsTArray<WrTransformProperty>& aTransformArray);
 
+  // Helper method to get an APZC reference from a scroll id. Uses the layers
+  // id of this bridge, and may return null if the APZC wasn't found.
+  already_AddRefed<AsyncPanZoomController> GetTargetAPZC(const FrameMetrics::ViewID& aId);
+
 private:
   struct PendingTransactionId {
     PendingTransactionId(wr::Epoch aEpoch, uint64_t aId)
       : mEpoch(aEpoch)
       , mId(aId)
     {}
     wr::Epoch mEpoch;
     uint64_t mId;
--- a/gfx/layers/wr/WebRenderImageLayer.cpp
+++ b/gfx/layers/wr/WebRenderImageLayer.cpp
@@ -3,16 +3,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "WebRenderImageLayer.h"
 
 #include "gfxPrefs.h"
 #include "LayersLogging.h"
 #include "mozilla/gfx/gfxVars.h"
+#include "mozilla/layers/CompositorBridgeChild.h"
 #include "mozilla/layers/ImageClient.h"
 #include "mozilla/layers/ScrollingLayersHelper.h"
 #include "mozilla/layers/StackingContextHelper.h"
 #include "mozilla/layers/TextureClientRecycleAllocator.h"
 #include "mozilla/layers/TextureWrapperImage.h"
 #include "mozilla/layers/WebRenderBridgeChild.h"
 #include "mozilla/webrender/WebRenderTypes.h"
 
@@ -26,17 +27,16 @@ WebRenderImageLayer::WebRenderImageLayer
   , mImageClientTypeContainer(CompositableType::UNKNOWN)
 {
   MOZ_COUNT_CTOR(WebRenderImageLayer);
 }
 
 WebRenderImageLayer::~WebRenderImageLayer()
 {
   MOZ_COUNT_DTOR(WebRenderImageLayer);
-  mPipelineIdRequest.DisconnectIfExists();
 
   for (auto key : mVideoKeys) {
     WrManager()->AddImageKeyForDiscard(key);
   }
   if (mKey.isSome()) {
     WrManager()->AddImageKeyForDiscard(mKey.value());
   }
 
@@ -111,48 +111,32 @@ WebRenderImageLayer::RenderLayer(wr::Dis
 
   CompositableType type = GetImageClientType();
   if (type == CompositableType::UNKNOWN) {
     return;
   }
 
   MOZ_ASSERT(GetImageClientType() != CompositableType::UNKNOWN);
 
-  // Allocate PipelineId if necessary
-  if (GetImageClientType() == CompositableType::IMAGE_BRIDGE &&
-      mPipelineId.isNothing() && !mPipelineIdRequest.Exists()) {
-    // Use Holder to pass this pointer to lambda.
-    // Static anaysis tool does not permit to pass refcounted variable to lambda.
-    // And we do not want to use RefPtr<WebRenderImageLayer> here.
-    Holder holder(this);
-    WrManager()->AllocPipelineId()
-      ->Then(AbstractThread::MainThread(), __func__,
-      [holder] (const wr::PipelineId& aPipelineId) {
-        holder->mPipelineIdRequest.Complete();
-        holder->mPipelineId = Some(aPipelineId);
-      },
-      [holder] (const ipc::PromiseRejectReason &aReason) {
-        holder->mPipelineIdRequest.Complete();
-      })->Track(mPipelineIdRequest);
-  }
-
   if (GetImageClientType() == CompositableType::IMAGE && !mImageClient) {
     mImageClient = ImageClient::CreateImageClient(CompositableType::IMAGE,
                                                   WrBridge(),
                                                   TextureFlags::DEFAULT);
     if (!mImageClient) {
       return;
     }
     mImageClient->Connect();
   }
 
   if (mExternalImageId.isNothing()) {
     if (GetImageClientType() == CompositableType::IMAGE_BRIDGE) {
       MOZ_ASSERT(!mImageClient);
       mExternalImageId = Some(WrBridge()->AllocExternalImageId(mContainer->GetAsyncContainerHandle()));
+      // Alloc async image pipeline id.
+      mPipelineId = Some(WrBridge()->GetCompositorBridgeChild()->GetNextPipelineId());
     } else {
       // Handle CompositableType::IMAGE case
       MOZ_ASSERT(mImageClient);
       mExternalImageId = Some(WrBridge()->AllocExternalImageIdForCompositable(mImageClient));
     }
   }
   MOZ_ASSERT(mExternalImageId.isSome());
 
--- a/gfx/layers/wr/WebRenderImageLayer.h
+++ b/gfx/layers/wr/WebRenderImageLayer.h
@@ -33,33 +33,22 @@ public:
                    const StackingContextHelper& aSc) override;
   Maybe<WrImageMask> RenderMaskLayer(const gfx::Matrix4x4& aTransform) override;
 
 protected:
   CompositableType GetImageClientType();
 
   void AddWRVideoImage(size_t aChannelNumber);
 
-  class Holder {
-  public:
-    explicit Holder(WebRenderImageLayer* aLayer)
-      : mLayer(aLayer)
-    {}
-    WebRenderImageLayer* operator ->() const { return mLayer; }
-  private:
-    WebRenderImageLayer* mLayer;
-  };
-
   wr::MaybeExternalImageId mExternalImageId;
   // Some video image format contains multiple channel data.
   nsTArray<wr::ImageKey> mVideoKeys;
   // The regular single channel image.
   Maybe<wr::ImageKey> mKey;
   RefPtr<ImageClient> mImageClient;
   CompositableType mImageClientTypeContainer;
   Maybe<wr::PipelineId> mPipelineId;
-  MozPromiseRequestHolder<PipelineIdPromise> mPipelineIdRequest;
 };
 
 } // namespace layers
 } // namespace mozilla
 
 #endif // GFX_WEBRENDERIMAGELAYER_H
--- a/gfx/layers/wr/WebRenderLayerManager.cpp
+++ b/gfx/layers/wr/WebRenderLayerManager.cpp
@@ -2,18 +2,16 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "WebRenderLayerManager.h"
 
 #include "gfxPrefs.h"
 #include "LayersLogging.h"
-#include "mozilla/dom/ContentChild.h"
-#include "mozilla/gfx/GPUProcessManager.h"
 #include "mozilla/layers/CompositorBridgeChild.h"
 #include "mozilla/layers/StackingContextHelper.h"
 #include "mozilla/layers/TextureClient.h"
 #include "mozilla/layers/WebRenderBridgeChild.h"
 #include "WebRenderCanvasLayer.h"
 #include "WebRenderColorLayer.h"
 #include "WebRenderContainerLayer.h"
 #include "WebRenderImageLayer.h"
@@ -488,35 +486,16 @@ WebRenderLayerManager::SendInvalidRegion
 }
 
 void
 WebRenderLayerManager::Composite()
 {
   WrBridge()->SendForceComposite();
 }
 
-RefPtr<PipelineIdPromise>
-WebRenderLayerManager::AllocPipelineId()
-{
-  if (XRE_IsParentProcess()) {
-    GPUProcessManager* pm = GPUProcessManager::Get();
-    if (!pm) {
-      return PipelineIdPromise::CreateAndReject(ipc::PromiseRejectReason::HandlerRejected, __func__);
-    }
-    return PipelineIdPromise::CreateAndResolve(wr::AsPipelineId(pm->AllocateLayerTreeId()), __func__);;
-  }
-
-  MOZ_ASSERT(XRE_IsContentProcess());
-  RefPtr<dom::ContentChild> contentChild = dom::ContentChild::GetSingleton();
-  if (!contentChild) {
-    return PipelineIdPromise::CreateAndReject(ipc::PromiseRejectReason::HandlerRejected, __func__);
-  }
-  return contentChild->SendAllocPipelineId();
-}
-
 void
 WebRenderLayerManager::SetRoot(Layer* aLayer)
 {
   mRoot = aLayer;
 }
 
 already_AddRefed<PaintedLayer>
 WebRenderLayerManager::CreatePaintedLayer()
--- a/gfx/layers/wr/WebRenderLayerManager.h
+++ b/gfx/layers/wr/WebRenderLayerManager.h
@@ -2,33 +2,30 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef GFX_WEBRENDERLAYERMANAGER_H
 #define GFX_WEBRENDERLAYERMANAGER_H
 
 #include "Layers.h"
-#include "mozilla/ipc/MessageChannel.h"
 #include "mozilla/MozPromise.h"
 #include "mozilla/layers/TransactionIdAllocator.h"
 #include "mozilla/webrender/WebRenderTypes.h"
 
 class nsIWidget;
 
 namespace mozilla {
 namespace layers {
 
 class CompositorBridgeChild;
 class KnowsCompositor;
 class PCompositorBridgeChild;
 class WebRenderBridgeChild;
 
-typedef MozPromise<mozilla::wr::PipelineId, mozilla::ipc::PromiseRejectReason, false> PipelineIdPromise;
-
 class WebRenderLayerManager final : public LayerManager
 {
   typedef nsTArray<RefPtr<Layer> > LayerRefArray;
 
 public:
   explicit WebRenderLayerManager(nsIWidget* aWidget);
   void Initialize(PCompositorBridgeChild* aCBChild, wr::PipelineId aLayersId, TextureFactoryIdentifier* aTextureFactoryIdentifier);
 
@@ -124,18 +121,16 @@ public:
 
   virtual void Mutated(Layer* aLayer) override;
   virtual void MutatedSimple(Layer* aLayer) override;
 
   void Hold(Layer* aLayer);
   void SetTransactionIncomplete() { mTransactionIncomplete = true; }
   bool IsMutatedLayer(Layer* aLayer);
 
-  RefPtr<PipelineIdPromise> AllocPipelineId();
-
 private:
   /**
    * Take a snapshot of the parent context, and copy
    * it into mTarget.
    */
   void MakeSnapshotIfRequired(LayoutDeviceIntSize aSize);
 
   void ClearLayer(Layer* aLayer);
--- a/gfx/thebes/gfxQuartzNativeDrawing.cpp
+++ b/gfx/thebes/gfxQuartzNativeDrawing.cpp
@@ -20,17 +20,17 @@ gfxQuartzNativeDrawing::gfxQuartzNativeD
 
 CGContextRef
 gfxQuartzNativeDrawing::BeginNativeDrawing()
 {
   NS_ASSERTION(!mCGContext, "BeginNativeDrawing called when drawing already in progress");
 
   DrawTarget *dt = mDrawTarget;
   if (dt->IsDualDrawTarget() || dt->IsTiledDrawTarget() ||
-      dt->GetBackendType() != BackendType::SKIA) {
+      dt->GetBackendType() != BackendType::SKIA || dt->IsRecording()) {
     // We need a DrawTarget that we can get a CGContextRef from:
     Matrix transform = dt->GetTransform();
 
     mNativeRect = transform.TransformBounds(mNativeRect);
     mNativeRect.RoundOut();
     if (mNativeRect.IsEmpty()) {
       return nullptr;
     }
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -29,16 +29,17 @@ rayon = {version = "0.7", features = ["u
 webrender_traits = {path = "../webrender_traits"}
 bitflags = "0.7"
 gamma-lut = "0.2"
 thread_profiler = "0.1.1"
 plane-split = "0.3"
 
 [dev-dependencies]
 angle = {git = "https://github.com/servo/angle", branch = "servo"}
+rand = "0.3"                # for the benchmarks
 servo-glutin = "0.10.1"     # for the example apps
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.2", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.3"
 
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/benches/coalesce.rs
@@ -0,0 +1,23 @@
+#![feature(test)]
+
+extern crate rand;
+extern crate test;
+extern crate webrender;
+extern crate webrender_traits;
+
+use rand::Rng;
+use test::Bencher;
+use webrender::TexturePage;
+use webrender_traits::{DeviceUintSize as Size};
+
+#[bench]
+fn bench_coalesce(b: &mut Bencher) {
+    let mut rng = rand::thread_rng();
+    let mut page = TexturePage::new_dummy(Size::new(10000, 10000));
+    let mut test_page = TexturePage::new_dummy(Size::new(10000, 10000));
+    while page.allocate(&Size::new(rng.gen_range(1, 100), rng.gen_range(1, 100))).is_some() {}
+    b.iter(|| {
+        test_page.fill_from(&page);
+        test_page.coalesce();
+    });
+}
--- a/gfx/webrender/res/cs_box_shadow.vs.glsl
+++ b/gfx/webrender/res/cs_box_shadow.vs.glsl
@@ -1,17 +1,17 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
-    CachePrimitiveInstance cpi = fetch_cache_instance();
-    RenderTaskData task = fetch_render_task(cpi.render_task_index);
-    BoxShadow bs = fetch_boxshadow(cpi.specific_prim_index);
+    PrimitiveInstance pi = fetch_prim_instance();
+    RenderTaskData task = fetch_render_task(pi.render_task_index);
+    BoxShadow bs = fetch_boxshadow(pi.specific_prim_address);
 
     vec2 p0 = task.data0.xy;
     vec2 p1 = p0 + task.data0.zw;
 
     vec2 pos = mix(p0, p1, aPosition.xy);
 
     vBorderRadii = bs.border_radius_edge_size_blur_radius_inverted.xx;
     vBlurRadius = bs.border_radius_edge_size_blur_radius_inverted.z;
--- a/gfx/webrender/res/cs_text_run.vs.glsl
+++ b/gfx/webrender/res/cs_text_run.vs.glsl
@@ -3,22 +3,22 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Draw a text run to a cache target. These are always
 // drawn un-transformed. These are used for effects such
 // as text-shadow.
 
 void main(void) {
-    CachePrimitiveInstance cpi = fetch_cache_instance();
-    RenderTaskData task = fetch_render_task(cpi.render_task_index);
-    TextRun text = fetch_text_run(cpi.specific_prim_index);
-    Glyph glyph = fetch_glyph(cpi.sub_index);
-    PrimitiveGeometry pg = fetch_prim_geometry(cpi.global_prim_index);
-    ResourceRect res = fetch_resource_rect(cpi.user_data.x);
+    PrimitiveInstance pi = fetch_prim_instance();
+    RenderTaskData task = fetch_render_task(pi.render_task_index);
+    TextRun text = fetch_text_run(pi.specific_prim_address);
+    Glyph glyph = fetch_glyph(pi.user_data0);
+    PrimitiveGeometry pg = fetch_prim_geometry(pi.global_prim_index);
+    ResourceRect res = fetch_resource_rect(pi.user_data1);
 
     // Glyphs size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
     vec2 size = res.uv_rect.zw - res.uv_rect.xy;
     vec2 origin = task.data0.xy + uDevicePixelRatio * (glyph.offset.xy - pg.local_rect.p0);
     vec4 local_rect = vec4(origin, size);
 
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -112,40 +112,34 @@ float distance_to_line(vec2 p0, vec2 per
 flat varying vec4 vClipMaskUvBounds;
 varying vec3 vClipMaskUv;
 #ifdef WR_FEATURE_TRANSFORM
     flat varying vec4 vLocalBounds;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 
-#define VECS_PER_LAYER             13
+#define VECS_PER_LAYER              9
 #define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_GEOM          2
 #define VECS_PER_SPLIT_GEOM         3
 
 uniform sampler2D sLayers;
 uniform sampler2D sRenderTasks;
 uniform sampler2D sPrimGeometry;
 
 uniform sampler2D sData16;
 uniform sampler2D sData32;
 uniform sampler2D sData64;
 uniform sampler2D sData128;
 uniform sampler2D sResourceRects;
 
 // Instanced attributes
-in int aGlobalPrimId;
-in int aPrimitiveAddress;
-in int aTaskIndex;
-in int aClipTaskIndex;
-in int aLayerIndex;
-in int aElementIndex;
-in ivec2 aUserData;
-in int aZIndex;
+in ivec4 aData0;
+in ivec4 aData1;
 
 // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.
 // TODO: convert back to a function once the driver issues are resolved, if ever.
 // https://github.com/servo/webrender/pull/623
 // https://github.com/servo/servo/issues/13953
 #define get_fetch_uv(i, vpi)  ivec2(vpi * (i % (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi)), i / (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi))
 
 vec4 fetch_data_1(int index) {
@@ -185,17 +179,16 @@ vec4[8] fetch_data_8(int index) {
     );
 }
 
 
 struct Layer {
     mat4 transform;
     mat4 inv_transform;
     RectWithSize local_clip_rect;
-    vec4 screen_vertices[4];
 };
 
 Layer fetch_layer(int index) {
     Layer layer;
 
     // Create a UV base coord for each 8 texels.
     // This is required because trying to use an offset
     // of more than 8 texels doesn't work on some versions
@@ -212,21 +205,16 @@ Layer fetch_layer(int index) {
     layer.inv_transform[0] = texelFetchOffset(sLayers, uv0, 0, ivec2(4, 0));
     layer.inv_transform[1] = texelFetchOffset(sLayers, uv0, 0, ivec2(5, 0));
     layer.inv_transform[2] = texelFetchOffset(sLayers, uv0, 0, ivec2(6, 0));
     layer.inv_transform[3] = texelFetchOffset(sLayers, uv0, 0, ivec2(7, 0));
 
     vec4 clip_rect = texelFetchOffset(sLayers, uv1, 0, ivec2(0, 0));
     layer.local_clip_rect = RectWithSize(clip_rect.xy, clip_rect.zw);
 
-    layer.screen_vertices[0] = texelFetchOffset(sLayers, uv1, 0, ivec2(1, 0));
-    layer.screen_vertices[1] = texelFetchOffset(sLayers, uv1, 0, ivec2(2, 0));
-    layer.screen_vertices[2] = texelFetchOffset(sLayers, uv1, 0, ivec2(3, 0));
-    layer.screen_vertices[3] = texelFetchOffset(sLayers, uv1, 0, ivec2(4, 0));
-
     return layer;
 }
 
 struct RenderTaskData {
     vec4 data0;
     vec4 data1;
     vec4 data2;
 };
@@ -442,90 +430,89 @@ PrimitiveGeometry fetch_prim_geometry(in
     vec4 local_clip_rect = texelFetchOffset(sPrimGeometry, uv, 0, ivec2(1, 0));
     pg.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
 
     return pg;
 }
 
 struct PrimitiveInstance {
     int global_prim_index;
-    int specific_prim_index;
+    int specific_prim_address;
     int render_task_index;
     int clip_task_index;
     int layer_index;
-    int sub_index;
     int z;
-    ivec2 user_data;
+    int user_data0;
+    int user_data1;
 };
 
 PrimitiveInstance fetch_prim_instance() {
     PrimitiveInstance pi;
 
-    pi.global_prim_index = aGlobalPrimId;
-    pi.specific_prim_index = aPrimitiveAddress;
-    pi.render_task_index = aTaskIndex;
-    pi.clip_task_index = aClipTaskIndex;
-    pi.layer_index = aLayerIndex;
-    pi.sub_index = aElementIndex;
-    pi.user_data = aUserData;
-    pi.z = aZIndex;
+    pi.global_prim_index = aData0.x;
+    pi.specific_prim_address = aData0.y;
+    pi.render_task_index = aData0.z;
+    pi.clip_task_index = aData0.w;
+    pi.layer_index = aData1.x;
+    pi.z = aData1.y;
+    pi.user_data0 = aData1.z;
+    pi.user_data1 = aData1.w;
 
     return pi;
 }
 
-struct CachePrimitiveInstance {
-    int global_prim_index;
-    int specific_prim_index;
+struct CompositeInstance {
     int render_task_index;
-    int sub_index;
-    ivec2 user_data;
+    int src_task_index;
+    int backdrop_task_index;
+    int user_data0;
+    int user_data1;
+    float z;
 };
 
-CachePrimitiveInstance fetch_cache_instance() {
-    CachePrimitiveInstance cpi;
-
-    PrimitiveInstance pi = fetch_prim_instance();
+CompositeInstance fetch_composite_instance() {
+    CompositeInstance ci;
 
-    cpi.global_prim_index = pi.global_prim_index;
-    cpi.specific_prim_index = pi.specific_prim_index;
-    cpi.render_task_index = pi.render_task_index;
-    cpi.sub_index = pi.sub_index;
-    cpi.user_data = pi.user_data;
+    ci.render_task_index = aData0.x;
+    ci.src_task_index = aData0.y;
+    ci.backdrop_task_index = aData0.z;
+    ci.z = float(aData0.w);
 
-    return cpi;
+    ci.user_data0 = aData1.x;
+    ci.user_data1 = aData1.y;
+
+    return ci;
 }
 
 struct Primitive {
     Layer layer;
     ClipArea clip_area;
     AlphaBatchTask task;
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
     int prim_index;
-    // when sending multiple primitives of the same type (e.g. border segments)
-    // this index allows the vertex shader to recognize the difference
-    int sub_index;
-    ivec2 user_data;
+    int user_data0;
+    int user_data1;
     float z;
 };
 
 Primitive load_primitive_custom(PrimitiveInstance pi) {
     Primitive prim;
 
     prim.layer = fetch_layer(pi.layer_index);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
     prim.task = fetch_alpha_batch_task(pi.render_task_index);
 
     PrimitiveGeometry pg = fetch_prim_geometry(pi.global_prim_index);
     prim.local_rect = pg.local_rect;
     prim.local_clip_rect = pg.local_clip_rect;
 
-    prim.prim_index = pi.specific_prim_index;
-    prim.sub_index = pi.sub_index;
-    prim.user_data = pi.user_data;
+    prim.prim_index = pi.specific_prim_address;
+    prim.user_data0 = pi.user_data0;
+    prim.user_data1 = pi.user_data1;
     prim.z = float(pi.z);
 
     return prim;
 }
 
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
@@ -534,17 +521,17 @@ Primitive load_primitive() {
 
 
 // Return the intersection of the plane (set up by "normal" and "point")
 // with the ray (set up by "ray_origin" and "ray_dir"),
 // writing the resulting scaler into "t".
 bool ray_plane(vec3 normal, vec3 point, vec3 ray_origin, vec3 ray_dir, out float t)
 {
     float denom = dot(normal, ray_dir);
-    if (denom > 1e-6) {
+    if (abs(denom) > 1e-6) {
         vec3 d = point - ray_origin;
         t = dot(d, normal) / denom;
         return t >= 0.0;
     }
 
     return false;
 }
 
@@ -563,22 +550,21 @@ vec4 untransform(vec2 ref, vec3 n, vec3 
     float z = p.z + d.z * t; // Z of the visible point on the layer
 
     vec4 r = inv_transform * vec4(ref, z, 1.0);
     return r;
 }
 
 // Given a CSS space position, transform it back into the layer space.
 vec4 get_layer_pos(vec2 pos, Layer layer) {
-    // get 3 of the layer corners in CSS space
-    vec3 a = layer.screen_vertices[0].xyz / layer.screen_vertices[0].w;
-    vec3 b = layer.screen_vertices[3].xyz / layer.screen_vertices[3].w;
-    vec3 c = layer.screen_vertices[2].xyz / layer.screen_vertices[2].w;
+    // get a point on the layer plane
+    vec4 ah = layer.transform * vec4(0.0, 0.0, 0.0, 1.0);
+    vec3 a = ah.xyz / ah.w;
     // get the normal to the layer plane
-    vec3 n = normalize(cross(b-a, c-a));
+    vec3 n = transpose(mat3(layer.inv_transform)) * vec3(0.0, 0.0, 1.0);
     return untransform(pos, n, a, layer.inv_transform);
 }
 
 struct VertexInfo {
     vec2 local_pos;
     vec2 screen_pos;
 };
 
--- a/gfx/webrender/res/ps_angle_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.vs.glsl
@@ -22,16 +22,16 @@ void main(void) {
 
     vStartPoint = start_point;
     vScaledDir = dir / dot(dir, dir);
 
     vTileSize = gradient.tile_size_repeat.xy;
     vTileRepeat = gradient.tile_size_repeat.zw;
 
     // V coordinate of gradient row in lookup texture.
-    vGradientIndex = float(prim.sub_index);
+    vGradientIndex = float(prim.user_data0);
 
     // The texture size of the lookup texture
     vGradientTextureSize = vec2(textureSize(sGradients, 0));
 
     // Whether to repeat the gradient instead of clamping.
     vGradientRepeat = float(int(gradient.extend_mode.x) == EXTEND_MODE_REPEAT);
 }
--- a/gfx/webrender/res/ps_blend.vs.glsl
+++ b/gfx/webrender/res/ps_blend.vs.glsl
@@ -1,17 +1,17 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
-    PrimitiveInstance pi = fetch_prim_instance();
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(pi.render_task_index);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(pi.user_data.x);
+    CompositeInstance ci = fetch_composite_instance();
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
 
     vec2 dest_origin = dest_task.render_target_origin -
                        dest_task.screen_space_origin +
                        src_task.screen_space_origin;
 
     vec2 local_pos = mix(dest_origin,
                          dest_origin + src_task.size,
                          aPosition.xy);
@@ -19,13 +19,13 @@ void main(void) {
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vec2 st0 = src_task.render_target_origin;
     vec2 st1 = src_task.render_target_origin + src_task.size;
 
     vec2 uv = src_task.render_target_origin + aPosition.xy * src_task.size;
     vUv = vec3(uv / texture_size, src_task.render_target_layer_index);
     vUvBounds = vec4(st0 + 0.5, st1 - 0.5) / texture_size.xyxy;
 
-    vOp = pi.sub_index;
-    vAmount = float(pi.user_data.y) / 65535.0;
+    vOp = ci.user_data0;
+    vAmount = float(ci.user_data1) / 65535.0;
 
-    gl_Position = uTransform * vec4(local_pos, pi.z, 1.0);
+    gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
 }
--- a/gfx/webrender/res/ps_border_corner.vs.glsl
+++ b/gfx/webrender/res/ps_border_corner.vs.glsl
@@ -86,38 +86,40 @@ void write_color(vec4 color0, vec4 color
     vColor11 = vec4(color1.rgb * modulate.w, color1.a);
 }
 
 int select_style(int color_select, vec2 fstyle) {
     ivec2 style = ivec2(fstyle);
 
     switch (color_select) {
         case SIDE_BOTH:
+        {
             // TODO(gw): A temporary hack! While we don't support
             //           border corners that have dots or dashes
             //           with another style, pretend they are solid
             //           border corners.
             bool has_dots = style.x == BORDER_STYLE_DOTTED ||
                             style.y == BORDER_STYLE_DOTTED;
             bool has_dashes = style.x == BORDER_STYLE_DASHED ||
                               style.y == BORDER_STYLE_DASHED;
             if (style.x != style.y && (has_dots || has_dashes))
                 return BORDER_STYLE_SOLID;
             return style.x;
+        }
         case SIDE_FIRST:
             return style.x;
         case SIDE_SECOND:
             return style.y;
     }
 }
 
 void main(void) {
     Primitive prim = load_primitive();
     Border border = fetch_border(prim.prim_index);
-    int sub_part = prim.sub_index;
+    int sub_part = prim.user_data0;
     BorderCorners corners = get_border_corners(border, prim.local_rect);
 
     vec2 p0, p1;
 
     // TODO(gw): We'll need to pass through multiple styles
     //           once we support style transitions per corner.
     int style;
     vec4 edge_distances;
@@ -130,17 +132,17 @@ void main(void) {
     switch (sub_part) {
         case 0: {
             p0 = corners.tl_outer;
             p1 = corners.tl_inner;
             color0 = border.colors[0];
             color1 = border.colors[1];
             vClipCenter = corners.tl_outer + border.radii[0].xy;
             vClipSign = vec2(1.0);
-            style = select_style(prim.user_data.x, border.style.yx);
+            style = select_style(prim.user_data1, border.style.yx);
             vec4 adjusted_widths = get_effective_border_widths(border, style);
             vec4 inv_adjusted_widths = border.widths - adjusted_widths;
             set_radii(style,
                       border.radii[0].xy,
                       border.widths.xy,
                       adjusted_widths.xy);
             set_edge_line(border.widths.xy,
                           corners.tl_outer,
@@ -152,17 +154,17 @@ void main(void) {
         }
         case 1: {
             p0 = vec2(corners.tr_inner.x, corners.tr_outer.y);
             p1 = vec2(corners.tr_outer.x, corners.tr_inner.y);
             color0 = border.colors[1];
             color1 = border.colors[2];
             vClipCenter = corners.tr_outer + vec2(-border.radii[0].z, border.radii[0].w);
             vClipSign = vec2(-1.0, 1.0);
-            style = select_style(prim.user_data.x, border.style.zy);
+            style = select_style(prim.user_data1, border.style.zy);
             vec4 adjusted_widths = get_effective_border_widths(border, style);
             vec4 inv_adjusted_widths = border.widths - adjusted_widths;
             set_radii(style,
                       border.radii[0].zw,
                       border.widths.zy,
                       adjusted_widths.zy);
             set_edge_line(border.widths.zy,
                           corners.tr_outer,
@@ -176,17 +178,17 @@ void main(void) {
         }
         case 2: {
             p0 = corners.br_inner;
             p1 = corners.br_outer;
             color0 = border.colors[2];
             color1 = border.colors[3];
             vClipCenter = corners.br_outer - border.radii[1].xy;
             vClipSign = vec2(-1.0, -1.0);
-            style = select_style(prim.user_data.x, border.style.wz);
+            style = select_style(prim.user_data1, border.style.wz);
             vec4 adjusted_widths = get_effective_border_widths(border, style);
             vec4 inv_adjusted_widths = border.widths - adjusted_widths;
             set_radii(style,
                       border.radii[1].xy,
                       border.widths.zw,
                       adjusted_widths.zw);
             set_edge_line(border.widths.zw,
                           corners.br_outer,
@@ -200,17 +202,17 @@ void main(void) {
         }
         case 3: {
             p0 = vec2(corners.bl_outer.x, corners.bl_inner.y);
             p1 = vec2(corners.bl_inner.x, corners.bl_outer.y);
             color0 = border.colors[3];
             color1 = border.colors[0];
             vClipCenter = corners.bl_outer + vec2(border.radii[1].z, -border.radii[1].w);
             vClipSign = vec2(1.0, -1.0);
-            style = select_style(prim.user_data.x, border.style.xw);
+            style = select_style(prim.user_data1, border.style.xw);
             vec4 adjusted_widths = get_effective_border_widths(border, style);
             vec4 inv_adjusted_widths = border.widths - adjusted_widths;
             set_radii(style,
                       border.radii[1].zw,
                       border.widths.xw,
                       adjusted_widths.xw);
             set_edge_line(border.widths.xw,
                           corners.bl_outer,
@@ -248,17 +250,17 @@ void main(void) {
         default: {
             vEdgeDistance = vec4(0.0);
             vAlphaSelect = 1.0;
             vSDFSelect = 0.0;
             break;
         }
     }
 
-    write_color(color0, color1, style, color_delta, prim.user_data.x);
+    write_color(color0, color1, style, color_delta, prim.user_data1);
 
     RectWithSize segment_rect;
     segment_rect.p0 = p0;
     segment_rect.size = p1 - p0;
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
--- a/gfx/webrender/res/ps_border_edge.vs.glsl
+++ b/gfx/webrender/res/ps_border_edge.vs.glsl
@@ -37,21 +37,25 @@ void write_alpha_select(float style) {
     }
 }
 
 void write_color(vec4 color, float style, bool flip) {
     vec2 modulate;
 
     switch (int(style)) {
         case BORDER_STYLE_GROOVE:
+        {
             modulate = flip ? vec2(1.3, 0.7) : vec2(0.7, 1.3);
             break;
+        }
         case BORDER_STYLE_RIDGE:
+        {
             modulate = flip ? vec2(0.7, 1.3) : vec2(1.3, 0.7);
             break;
+        }
         default:
             modulate = vec2(1.0);
             break;
     }
 
     vColor0 = vec4(color.rgb * modulate.x, color.a);
     vColor1 = vec4(color.rgb * modulate.y, color.a);
 }
@@ -96,17 +100,17 @@ void write_clip_params(float style,
             vClipSelect = 0.0;
             break;
     }
 }
 
 void main(void) {
     Primitive prim = load_primitive();
     Border border = fetch_border(prim.prim_index);
-    int sub_part = prim.sub_index;
+    int sub_part = prim.user_data0;
     BorderCorners corners = get_border_corners(border, prim.local_rect);
     vec4 color = border.colors[sub_part];
 
     // TODO(gw): Now that all border styles are supported, the switch
     //           statement below can be tidied up quite a bit.
 
     float style;
     bool color_flip;
--- a/gfx/webrender/res/ps_box_shadow.vs.glsl
+++ b/gfx/webrender/res/ps_box_shadow.vs.glsl
@@ -1,26 +1,26 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     BoxShadow bs = fetch_boxshadow(prim.prim_index);
-    RectWithSize segment_rect = fetch_instance_geometry(prim.sub_index);
+    RectWithSize segment_rect = fetch_instance_geometry(prim.user_data0);
 
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect.p0);
 
-    RenderTaskData child_task = fetch_render_task(prim.user_data.x);
+    RenderTaskData child_task = fetch_render_task(prim.user_data1);
     vUv.z = child_task.data1.x;
 
     // Constant offsets to inset from bilinear filtering border.
     vec2 patch_origin = child_task.data0.xy + vec2(1.0);
     vec2 patch_size_device_pixels = child_task.data0.zw - vec2(2.0);
     vec2 patch_size = patch_size_device_pixels / uDevicePixelRatio;
 
     vUv.xy = (vi.local_pos - prim.local_rect.p0) / patch_size;
--- a/gfx/webrender/res/ps_cache_image.vs.glsl
+++ b/gfx/webrender/res/ps_cache_image.vs.glsl
@@ -11,17 +11,17 @@ void main(void) {
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect.p0);
 
-    RenderTaskData child_task = fetch_render_task(prim.user_data.x);
+    RenderTaskData child_task = fetch_render_task(prim.user_data1);
     vUv.z = child_task.data1.x;
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vec2 uv0 = child_task.data0.xy / texture_size;
     vec2 uv1 = (child_task.data0.xy + child_task.data0.zw) / texture_size;
 
     vec2 f = (vi.local_pos - prim.local_rect.p0) / prim.local_rect.size;
 
--- a/gfx/webrender/res/ps_composite.vs.glsl
+++ b/gfx/webrender/res/ps_composite.vs.glsl
@@ -1,18 +1,18 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
-    PrimitiveInstance pi = fetch_prim_instance();
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(pi.render_task_index);
-    ReadbackTask backdrop_task = fetch_readback_task(pi.user_data.x);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(pi.user_data.y);
+    CompositeInstance ci = fetch_composite_instance();
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
+    ReadbackTask backdrop_task = fetch_readback_task(ci.backdrop_task_index);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
 
     vec2 dest_origin = dest_task.render_target_origin -
                        dest_task.screen_space_origin +
                        src_task.screen_space_origin;
 
     vec2 local_pos = mix(dest_origin,
                          dest_origin + src_task.size,
                          aPosition.xy);
@@ -22,13 +22,12 @@ void main(void) {
     vec2 st0 = backdrop_task.render_target_origin / texture_size;
     vec2 st1 = (backdrop_task.render_target_origin + backdrop_task.size) / texture_size;
     vUv0 = vec3(mix(st0, st1, aPosition.xy), backdrop_task.render_target_layer_index);
 
     st0 = src_task.render_target_origin / texture_size;
     st1 = (src_task.render_target_origin + src_task.size) / texture_size;
     vUv1 = vec3(mix(st0, st1, aPosition.xy), src_task.render_target_layer_index);
 
-    vOp = pi.sub_index;
+    vOp = ci.user_data0;
 
-    gl_Position = uTransform * vec4(local_pos, pi.z, 1.0);
-
+    gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
 }
--- a/gfx/webrender/res/ps_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_gradient.vs.glsl
@@ -4,18 +4,18 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.prim_index);
 
     vec4 abs_start_end_point = gradient.start_end_point + prim.local_rect.p0.xyxy;
 
-    GradientStop g0 = fetch_gradient_stop(prim.sub_index + 0);
-    GradientStop g1 = fetch_gradient_stop(prim.sub_index + 1);
+    GradientStop g0 = fetch_gradient_stop(prim.user_data0 + 0);
+    GradientStop g1 = fetch_gradient_stop(prim.user_data0 + 1);
 
     RectWithSize segment_rect;
     vec2 axis;
     vec4 adjusted_color_g0 = g0.color;
     vec4 adjusted_color_g1 = g1.color;
     if (abs_start_end_point.y == abs_start_end_point.w) {
         // Calculate the x coord of the gradient stops
         vec2 g01_x = mix(abs_start_end_point.xx, abs_start_end_point.zz,
--- a/gfx/webrender/res/ps_hardware_composite.vs.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.vs.glsl
@@ -1,25 +1,25 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
-    PrimitiveInstance pi = fetch_prim_instance();
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(pi.render_task_index);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(pi.user_data.x);
+    CompositeInstance ci = fetch_composite_instance();
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
 
     vec2 dest_origin = dest_task.render_target_origin -
                        dest_task.screen_space_origin +
                        src_task.screen_space_origin;
 
     vec2 local_pos = mix(dest_origin,
                          dest_origin + src_task.size,
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vec2 st0 = src_task.render_target_origin / texture_size;
     vec2 st1 = (src_task.render_target_origin + src_task.size) / texture_size;
     vUv = vec3(mix(st0, st1, aPosition.xy), src_task.render_target_layer_index);
 
-    gl_Position = uTransform * vec4(local_pos, pi.z, 1.0);
+    gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
 }
--- a/gfx/webrender/res/ps_image.vs.glsl
+++ b/gfx/webrender/res/ps_image.vs.glsl
@@ -1,17 +1,17 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Image image = fetch_image(prim.prim_index);
-    ResourceRect res = fetch_resource_rect(prim.user_data.x);
+    ResourceRect res = fetch_resource_rect(prim.user_data0);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
                                                     prim.local_rect.p0);
--- a/gfx/webrender/res/ps_radial_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.vs.glsl
@@ -30,16 +30,16 @@ void main(void) {
     float ratio_xy = gradient.start_end_radius_ratio_xy_extend_mode.z;
     vPos.y *= ratio_xy;
     vStartCenter.y *= ratio_xy;
     vEndCenter.y *= ratio_xy;
     vTileSize.y *= ratio_xy;
     vTileRepeat.y *= ratio_xy;
 
     // V coordinate of gradient row in lookup texture.
-    vGradientIndex = float(prim.sub_index);
+    vGradientIndex = float(prim.user_data0);
 
     // The texture size of the lookup texture
     vGradientTextureSize = vec2(textureSize(sGradients, 0));
 
     // Whether to repeat the gradient instead of clamping.
     vGradientRepeat = float(int(gradient.start_end_radius_ratio_xy_extend_mode.w) == EXTEND_MODE_REPEAT);
 }
--- a/gfx/webrender/res/ps_split_composite.vs.glsl
+++ b/gfx/webrender/res/ps_split_composite.vs.glsl
@@ -26,24 +26,24 @@ SplitGeometry fetch_split_geometry(int i
 
 vec3 bilerp(vec3 a, vec3 b, vec3 c, vec3 d, float s, float t) {
     vec3 x = mix(a, b, t);
     vec3 y = mix(c, d, t);
     return mix(x, y, s);
 }
 
 void main(void) {
-    PrimitiveInstance pi = fetch_prim_instance();
-    SplitGeometry geometry = fetch_split_geometry(pi.specific_prim_index);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(pi.user_data.x);
+    CompositeInstance ci = fetch_composite_instance();
+    SplitGeometry geometry = fetch_split_geometry(ci.user_data0);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
 
     vec3 world_pos = bilerp(geometry.points[0], geometry.points[1],
                             geometry.points[3], geometry.points[2],
                             aPosition.y, aPosition.x);
-    vec4 final_pos = vec4(world_pos.xy * uDevicePixelRatio, pi.z, 1.0);
+    vec4 final_pos = vec4(world_pos.xy * uDevicePixelRatio, ci.z, 1.0);
 
     gl_Position = uTransform * final_pos;
 
     vec2 uv_origin = src_task.render_target_origin;
     vec2 uv_pos = uv_origin + world_pos.xy - src_task.screen_space_origin;
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vUv = vec3(uv_pos / texture_size, src_task.render_target_layer_index);
     vUvTaskBounds = vec4(uv_origin, uv_origin + src_task.size) / texture_size.xyxy;
--- a/gfx/webrender/res/ps_text_run.vs.glsl
+++ b/gfx/webrender/res/ps_text_run.vs.glsl
@@ -1,18 +1,18 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.prim_index);
-    Glyph glyph = fetch_glyph(prim.sub_index);
-    ResourceRect res = fetch_resource_rect(prim.user_data.x);
+    Glyph glyph = fetch_glyph(prim.user_data0);
+    ResourceRect res = fetch_resource_rect(prim.user_data1);
 
     RectWithSize local_rect = RectWithSize(glyph.offset.xy,
                                            (res.uv_rect.zw - res.uv_rect.xy) / uDevicePixelRatio);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
--- a/gfx/webrender/res/ps_yuv_image.vs.glsl
+++ b/gfx/webrender/res/ps_yuv_image.vs.glsl
@@ -20,21 +20,21 @@ void main(void) {
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect.p0);
     vLocalPos = vi.local_pos - prim.local_rect.p0;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
-    ResourceRect y_rect = fetch_resource_rect(prim.user_data.x);
+    ResourceRect y_rect = fetch_resource_rect(prim.user_data0);
 #ifndef WR_FEATURE_INTERLEAVED_Y_CB_CR  // only 1 channel
-    ResourceRect u_rect = fetch_resource_rect(prim.user_data.x + 1);
+    ResourceRect u_rect = fetch_resource_rect(prim.user_data0 + 1);
 #ifndef WR_FEATURE_NV12 // 2 channel
-    ResourceRect v_rect = fetch_resource_rect(prim.user_data.x + 2);
+    ResourceRect v_rect = fetch_resource_rect(prim.user_data0 + 2);
 #endif
 #endif
 
     // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
     // non-normalized texture coordinates.
 #ifdef WR_FEATURE_TEXTURE_RECT
     vec2 y_texture_size_normalization_factor = vec2(1, 1);
 #else
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use euclid::Matrix4D;
 use fnv::FnvHasher;
 use gleam::gl;
 use internal_types::{PackedVertex, RenderTargetMode, TextureSampler, DEFAULT_TEXTURE};
-use internal_types::{BlurAttribute, ClearAttribute, ClipAttribute, VertexAttribute};
+use internal_types::{BlurAttribute, ClipAttribute, VertexAttribute};
 use internal_types::{DebugFontVertex, DebugColorVertex};
 //use notify::{self, Watcher};
 use super::shader_source;
 use std::collections::HashMap;
 use std::fs::File;
 use std::hash::BuildHasherDefault;
 use std::io::Read;
 use std::iter::repeat;
@@ -71,30 +71,28 @@ impl TextureTarget {
 pub enum TextureFilter {
     Nearest,
     Linear,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum VertexFormat {
     Triangles,
-    Rectangles,
     DebugFont,
     DebugColor,
-    Clear,
     Blur,
     Clip,
 }
 
 enum FBOTarget {
     Read,
     Draw,
 }
 
-fn get_gl_format_bgra(gl: &gl::Gl) -> gl::GLuint {
+pub fn get_gl_format_bgra(gl: &gl::Gl) -> gl::GLuint {
     match gl.get_type() {
         gl::GlType::Gl => {
             GL_FORMAT_BGRA_GL
         }
         gl::GlType::Gles => {
             GL_FORMAT_BGRA_GLES
         }
     }
@@ -185,79 +183,43 @@ impl VertexFormat {
                                           0 + vertex_stride * offset);
                 gl.vertex_attrib_pointer(VertexAttribute::Color as gl::GLuint,
                                           4,
                                           gl::UNSIGNED_BYTE,
                                           true,
                                           vertex_stride as gl::GLint,
                                           8 + vertex_stride * offset);
             }
-            VertexFormat::Rectangles |
             VertexFormat::Triangles => {
                 let vertex_stride = mem::size_of::<PackedVertex>() as gl::GLuint;
                 gl.enable_vertex_attrib_array(VertexAttribute::Position as gl::GLuint);
                 gl.vertex_attrib_divisor(VertexAttribute::Position as gl::GLuint, 0);
 
                 gl.vertex_attrib_pointer(VertexAttribute::Position as gl::GLuint,
                                           2,
                                           gl::FLOAT,
                                           false,
                                           vertex_stride as gl::GLint,
                                           0);
 
                 instance.bind(gl);
                 let mut offset = 0;
 
-                for &attrib in [VertexAttribute::GlobalPrimId,
-                                VertexAttribute::PrimitiveAddress,
-                                VertexAttribute::TaskIndex,
-                                VertexAttribute::ClipTaskIndex,
-                                VertexAttribute::LayerIndex,
-                                VertexAttribute::ElementIndex,
-                                VertexAttribute::ZIndex,
+                for &attrib in [VertexAttribute::Data0,
+                                VertexAttribute::Data1,
                                ].into_iter() {
                     gl.enable_vertex_attrib_array(attrib as gl::GLuint);
                     gl.vertex_attrib_divisor(attrib as gl::GLuint, 1);
                     gl.vertex_attrib_i_pointer(attrib as gl::GLuint,
-                                                1,
+                                                4,
                                                 gl::INT,
                                                 instance_stride,
                                                 offset);
-                    offset += 4;
+                    offset += 16;
                 }
-
-                gl.enable_vertex_attrib_array(VertexAttribute::UserData as gl::GLuint);
-                gl.vertex_attrib_divisor(VertexAttribute::UserData as gl::GLuint, 1);
-                gl.vertex_attrib_i_pointer(VertexAttribute::UserData as gl::GLuint,
-                                            2,
-                                            gl::INT,
-                                            instance_stride,
-                                            offset);
-            }
-            VertexFormat::Clear => {
-                let vertex_stride = mem::size_of::<PackedVertex>() as gl::GLuint;
-                gl.enable_vertex_attrib_array(ClearAttribute::Position as gl::GLuint);
-                gl.vertex_attrib_divisor(ClearAttribute::Position as gl::GLuint, 0);
-
-                gl.vertex_attrib_pointer(ClearAttribute::Position as gl::GLuint,
-                                          2,
-                                          gl::FLOAT,
-                                          false,
-                                          vertex_stride as gl::GLint,
-                                          0);
-
-                instance.bind(gl);
-
-                gl.enable_vertex_attrib_array(ClearAttribute::Rectangle as gl::GLuint);
-                gl.vertex_attrib_divisor(ClearAttribute::Rectangle as gl::GLuint, 1);
-                gl.vertex_attrib_i_pointer(ClearAttribute::Rectangle as gl::GLuint,
-                                            4,
-                                            gl::INT,
-                                            instance_stride,
-                                            0);
             }
             VertexFormat::Blur => {
                 let vertex_stride = mem::size_of::<PackedVertex>() as gl::GLuint;
                 gl.enable_vertex_attrib_array(BlurAttribute::Position as gl::GLuint);
                 gl.vertex_attrib_divisor(BlurAttribute::Position as gl::GLuint, 0);
 
                 gl.vertex_attrib_pointer(BlurAttribute::Position as gl::GLuint,
                                           2,
@@ -407,34 +369,25 @@ impl Program {
     fn attach_and_bind_shaders(&mut self,
                                vs_id: gl::GLuint,
                                fs_id: gl::GLuint,
                                vertex_format: VertexFormat) -> Result<(), ShaderError> {
         self.gl.attach_shader(self.id, vs_id);
         self.gl.attach_shader(self.id, fs_id);
 
         match vertex_format {
-            VertexFormat::Triangles | VertexFormat::Rectangles |
-            VertexFormat::DebugFont |  VertexFormat::DebugColor => {
+            VertexFormat::Triangles |
+            VertexFormat::DebugFont |
+            VertexFormat::DebugColor => {
                 self.gl.bind_attrib_location(self.id, VertexAttribute::Position as gl::GLuint, "aPosition");
                 self.gl.bind_attrib_location(self.id, VertexAttribute::Color as gl::GLuint, "aColor");
                 self.gl.bind_attrib_location(self.id, VertexAttribute::ColorTexCoord as gl::GLuint, "aColorTexCoord");
 
-                self.gl.bind_attrib_location(self.id, VertexAttribute::GlobalPrimId as gl::GLuint, "aGlobalPrimId");
-                self.gl.bind_attrib_location(self.id, VertexAttribute::PrimitiveAddress as gl::GLuint, "aPrimitiveAddress");
-                self.gl.bind_attrib_location(self.id, VertexAttribute::TaskIndex as gl::GLuint, "aTaskIndex");
-                self.gl.bind_attrib_location(self.id, VertexAttribute::ClipTaskIndex as gl::GLuint, "aClipTaskIndex");
-                self.gl.bind_attrib_location(self.id, VertexAttribute::LayerIndex as gl::GLuint, "aLayerIndex");
-                self.gl.bind_attrib_location(self.id, VertexAttribute::ElementIndex as gl::GLuint, "aElementIndex");
-                self.gl.bind_attrib_location(self.id, VertexAttribute::UserData as gl::GLuint, "aUserData");
-                self.gl.bind_attrib_location(self.id, VertexAttribute::ZIndex as gl::GLuint, "aZIndex");
-            }
-            VertexFormat::Clear => {
-                self.gl.bind_attrib_location(self.id, ClearAttribute::Position as gl::GLuint, "aPosition");
-                self.gl.bind_attrib_location(self.id, ClearAttribute::Rectangle as gl::GLuint, "aClearRectangle");
+                self.gl.bind_attrib_location(self.id, VertexAttribute::Data0 as gl::GLuint, "aData0");
+                self.gl.bind_attrib_location(self.id, VertexAttribute::Data1 as gl::GLuint, "aData1");
             }
             VertexFormat::Blur => {
                 self.gl.bind_attrib_location(self.id, BlurAttribute::Position as gl::GLuint, "aPosition");
                 self.gl.bind_attrib_location(self.id, BlurAttribute::RenderTaskIndex as gl::GLuint, "aBlurRenderTaskIndex");
                 self.gl.bind_attrib_location(self.id, BlurAttribute::SourceTaskIndex as gl::GLuint, "aBlurSourceTaskIndex");
                 self.gl.bind_attrib_location(self.id, BlurAttribute::Direction as gl::GLuint, "aBlurDirection");
             }
             VertexFormat::Clip => {
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -113,32 +113,18 @@ pub const DEFAULT_TEXTURE: TextureSample
 
 #[derive(Clone, Copy, Debug)]
 pub enum VertexAttribute {
     // vertex-frequency basic attributes
     Position,
     Color,
     ColorTexCoord,
     // instance-frequency primitive attributes
-    GlobalPrimId,
-    PrimitiveAddress,
-    TaskIndex,
-    ClipTaskIndex,
-    LayerIndex,
-    ElementIndex,
-    UserData,
-    ZIndex,
-}
-
-#[derive(Clone, Copy, Debug)]
-pub enum ClearAttribute {
-    // vertex frequency
-    Position,
-    // instance frequency
-    Rectangle,
+    Data0,
+    Data1,
 }
 
 #[derive(Clone, Copy, Debug)]
 pub enum BlurAttribute {
     // vertex frequency
     Position,
     // instance frequency
     RenderTaskIndex,
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -68,16 +68,19 @@ mod render_backend;
 mod render_task;
 mod resource_cache;
 mod scene;
 mod spring;
 mod texture_cache;
 mod tiling;
 mod util;
 
+#[doc(hidden)] // for benchmarks
+pub use texture_cache::TexturePage;
+
 #[cfg(feature = "webgl")]
 mod webgl_types;
 
 #[cfg(not(feature = "webgl"))]
 #[path = "webgl_stubs.rs"]
 mod webgl_types;
 
 mod shader_source {
@@ -135,9 +138,9 @@ extern crate offscreen_gl_context;
 extern crate byteorder;
 extern crate rayon;
 extern crate plane_split;
 
 #[cfg(any(target_os="macos", target_os="windows"))]
 extern crate gamma_lut;
 
 pub use renderer::{ExternalImage, ExternalImageSource, ExternalImageHandler};
-pub use renderer::{Renderer, RendererOptions};
+pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -8,16 +8,17 @@
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use debug_colors;
 use debug_render::DebugRenderer;
 use device::{DepthFunction, Device, FrameId, ProgramId, TextureId, VertexFormat, GpuMarker, GpuProfiler};
 use device::{GpuSample, TextureFilter, VAOId, VertexUsageHint, FileWatcherHandler, TextureTarget, ShaderError};
+use device::get_gl_format_bgra;
 use euclid::Matrix4D;
 use fnv::FnvHasher;
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use gpu_store::{GpuStore, GpuStoreLayout};
 use internal_types::{CacheTextureId, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{TextureUpdateList, PackedVertex, RenderTargetMode};
 use internal_types::{ORTHO_NEAR_PLANE, ORTHO_FAR_PLANE, SourceTexture};
@@ -38,25 +39,25 @@ use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use rayon::ThreadPool;
 use rayon::Configuration as ThreadPoolConfig;
-use tiling::{AlphaBatchKind, BlurCommand, Frame, PrimitiveBatch, RenderTarget};
+use tiling::{AlphaBatchKind, BlurCommand, CompositePrimitiveInstance, Frame, PrimitiveBatch, RenderTarget};
 use tiling::{AlphaRenderTarget, CacheClipInstance, PrimitiveInstance, ColorRenderTarget, RenderTargetKind};
 use time::precise_time_ns;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use util::TransformedRectKind;
 use webgl_types::GLContextHandleWrapper;
 use webrender_traits::{ColorF, Epoch, PipelineId, RenderNotifier, RenderDispatcher};
 use webrender_traits::{ExternalImageId, ExternalImageType, ImageData, ImageFormat, RenderApiSender};
-use webrender_traits::{DeviceIntRect, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
+use webrender_traits::{DeviceIntRect, DeviceUintRect, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
 use webrender_traits::{ImageDescriptor, BlobImageRenderer};
 use webrender_traits::{channel, FontRenderMode};
 use webrender_traits::VRCompositorHandler;
 use webrender_traits::{YuvColorSpace, YuvFormat};
 use webrender_traits::{YUV_COLOR_SPACES, YUV_FORMATS};
 
 pub const GPU_DATA_TEXTURE_POOL: usize = 5;
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
@@ -78,16 +79,28 @@ const GPU_TAG_PRIM_GRADIENT: GpuProfileT
 const GPU_TAG_PRIM_ANGLE_GRADIENT: GpuProfileTag = GpuProfileTag { label: "AngleGradient", color: debug_colors::POWDERBLUE };
 const GPU_TAG_PRIM_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag { label: "RadialGradient", color: debug_colors::LIGHTPINK };
 const GPU_TAG_PRIM_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "BoxShadow", color: debug_colors::CYAN };
 const GPU_TAG_PRIM_BORDER_CORNER: GpuProfileTag = GpuProfileTag { label: "BorderCorner", color: debug_colors::DARKSLATEGREY };
 const GPU_TAG_PRIM_BORDER_EDGE: GpuProfileTag = GpuProfileTag { label: "BorderEdge", color: debug_colors::LAVENDER };
 const GPU_TAG_PRIM_CACHE_IMAGE: GpuProfileTag = GpuProfileTag { label: "CacheImage", color: debug_colors::SILVER };
 const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag { label: "Blur", color: debug_colors::VIOLET };
 
+#[derive(Clone, Debug, PartialEq)]
+pub enum GraphicsApi {
+    OpenGL,
+}
+
+#[derive(Clone, Debug)]
+pub struct GraphicsApiInfo {
+    pub kind: GraphicsApi,
+    pub renderer: String,
+    pub version: String,
+}
+
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum ImageBufferKind {
     Texture2D = 0,
     TextureRect = 1,
     TextureExternal = 2,
 }
 pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 3] = [
     ImageBufferKind::Texture2D,
@@ -497,16 +510,22 @@ impl GpuDataTextures {
         device.bind_texture(TextureSampler::Data64, self.data64_texture.id);
         device.bind_texture(TextureSampler::Data128, self.data128_texture.id);
         device.bind_texture(TextureSampler::ResourceRects, self.resource_rects_texture.id);
         device.bind_texture(TextureSampler::Gradients, self.gradient_data_texture.id);
         device.bind_texture(TextureSampler::SplitGeometry, self.split_geometry_texture.id);
     }
 }
 
+#[derive(Clone, Debug, PartialEq)]
+pub enum ReadPixelsFormat {
+    Rgba8,
+    Bgra8,
+}
+
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     current_frame: Option<RendererFrame>,
@@ -1131,24 +1150,28 @@ impl Renderer {
             cpu_profiles: VecDeque::new(),
             gpu_profiles: VecDeque::new(),
         };
 
         let sender = RenderApiSender::new(api_tx, payload_tx);
         Ok((renderer, sender))
     }
 
+    pub fn get_graphics_api_info(&self) -> GraphicsApiInfo {
+        GraphicsApiInfo {
+            kind: GraphicsApi::OpenGL,
+            version: self.device.gl().get_string(gl::VERSION),
+            renderer: self.device.gl().get_string(gl::RENDERER),
+        }
+    }
+
     fn get_yuv_shader_index(buffer_kind: ImageBufferKind, format: YuvFormat, color_space: YuvColorSpace) -> usize {
         ((buffer_kind as usize) * YUV_FORMATS.len() + (format as usize)) * YUV_COLOR_SPACES.len() + (color_space as usize)
     }
 
-    pub fn gl(&self) -> &gl::Gl {
-        self.device.gl()
-    }
-
     /// Sets the new RenderNotifier.
     ///
     /// The RenderNotifier will be called when processing e.g. of a (scrolling) frame is done,
     /// and therefore the screen should be updated.
     pub fn set_render_notifier(&self, notifier: Box<RenderNotifier>) {
         let mut notifier_arc = self.notifier.lock().unwrap();
         *notifier_arc = Some(notifier);
     }
@@ -1500,18 +1523,22 @@ impl Renderer {
                     projection: &Matrix4D<f32>,
                     render_task_data: &[RenderTaskData],
                     cache_texture: TextureId,
                     render_target: Option<(TextureId, i32)>,
                     target_dimensions: DeviceUintSize) {
         let transform_kind = batch.key.flags.transform_kind();
         let needs_clipping = batch.key.flags.needs_clipping();
         debug_assert!(!needs_clipping ||
-                      batch.key.blend_mode == BlendMode::Alpha ||
-                      batch.key.blend_mode == BlendMode::PremultipliedAlpha);
+                      match batch.key.blend_mode {
+                          BlendMode::Alpha |
+                          BlendMode::PremultipliedAlpha |
+                          BlendMode::Subpixel(..) => true,
+                          BlendMode::None => false,
+                      });
 
         let (marker, shader) = match batch.key.kind {
             AlphaBatchKind::Composite => {
                 let shader = self.ps_composite.get(&mut self.device);
                 (GPU_TAG_PRIM_COMPOSITE, shader)
             }
             AlphaBatchKind::HardwareComposite => {
                 let shader = self.ps_hw_composite.get(&mut self.device);
@@ -1581,32 +1608,32 @@ impl Renderer {
             }
         };
 
         // Handle special case readback for composites.
         if batch.key.kind == AlphaBatchKind::Composite {
             // composites can't be grouped together because
             // they may overlap and affect each other.
             debug_assert!(batch.instances.len() == 1);
-            let instance = &batch.instances[0];
+            let instance = CompositePrimitiveInstance::from(&batch.instances[0]);
 
             // TODO(gw): This code branch is all a bit hacky. We rely
             // on pulling specific values from the render target data
             // and also cloning the single primitive instance to be
             // able to pass to draw_instanced_batch(). We should
             // think about a cleaner way to achieve this!
 
             // Before submitting the composite batch, do the
             // framebuffer readbacks that are needed for each
             // composite operation in this batch.
             let cache_texture_dimensions = self.device.get_texture_dimensions(cache_texture);
 
-            let backdrop = &render_task_data[instance.task_index as usize];
-            let readback = &render_task_data[instance.user_data[0] as usize];
-            let source = &render_task_data[instance.user_data[1] as usize];
+            let backdrop = &render_task_data[instance.task_index.0 as usize];
+            let readback = &render_task_data[instance.backdrop_task_index.0 as usize];
+            let source = &render_task_data[instance.src_task_index.0 as usize];
 
             // Bind the FBO to blit the backdrop to.
             // Called per-instance in case the layer (and therefore FBO)
             // changes. The device will skip the GL call if the requested
             // target is already bound.
             let cache_draw_target = (cache_texture, readback.data[4] as i32);
             self.device.bind_draw_target(Some(cache_draw_target), Some(cache_texture_dimensions));
 
@@ -2139,16 +2166,41 @@ impl Renderer {
                                                    dest_rect);
 
                     current_target += 1;
                 }
             }
         }
     }
 
+    pub fn read_pixels_rgba8(&self, rect: DeviceUintRect) -> Vec<u8> {
+        let mut pixels = vec![0u8; (4 * rect.size.width * rect.size.height) as usize];
+        self.read_pixels_into(rect, ReadPixelsFormat::Rgba8, &mut pixels);
+        pixels
+    }
+
+    pub fn read_pixels_into(&self,
+                            rect: DeviceUintRect,
+                            format: ReadPixelsFormat,
+                            output: &mut [u8]) {
+        let (gl_format, gl_type, size) = match format {
+            ReadPixelsFormat::Rgba8 => (gl::RGBA, gl::UNSIGNED_BYTE, 4),
+            ReadPixelsFormat::Bgra8 => (get_gl_format_bgra(self.device.gl()), gl::UNSIGNED_BYTE, 4),
+        };
+        assert_eq!(output.len(), (size * rect.size.width * rect.size.height) as usize);
+        self.device.gl().flush();
+        self.device.gl().read_pixels_into_buffer(rect.origin.x as gl::GLint,
+                                                 rect.origin.y as gl::GLint,
+                                                 rect.size.width as gl::GLsizei,
+                                                 rect.size.height as gl::GLsizei,
+                                                 gl_format,
+                                                 gl_type,
+                                                 output);
+    }
+
     // De-initialize the Renderer safely, assuming the GL is still alive and active.
     pub fn deinit(mut self) {
         //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
         self.device.begin_frame(1.0);
         self.device.deinit_texture(self.dummy_cache_texture_id);
         self.device.end_frame();
     }
 }
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use device::TextureFilter;
 use fnv::FnvHasher;
 use freelist::{FreeList, FreeListItem, FreeListItemId};
 use internal_types::{TextureUpdate, TextureUpdateOp};
 use internal_types::{CacheTextureId, RenderTargetMode, TextureUpdateList, RectUv};
 use profiler::TextureCacheProfileCounters;
-use std::cmp::{self, Ordering};
+use std::cmp;
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
 use std::hash::BuildHasherDefault;
 use std::mem;
 use std::slice::Iter;
 use time;
 use util;
 use webrender_traits::{ExternalImageType, ImageData, ImageFormat, DevicePixel, DeviceIntPoint};
@@ -45,38 +45,46 @@ const MINIMUM_LARGE_RECT_SIZE: u32 = 32;
 const COALESCING_TIMEOUT: u64 = 100;
 
 /// The number of items that we process in the coalescing work list before checking whether we hit
 /// the timeout.
 const COALESCING_TIMEOUT_CHECKING_INTERVAL: usize = 256;
 
 pub type TextureCacheItemId = FreeListItemId;
 
+enum CoalescingStatus {
+    Changed,
+    Unchanged,
+    Timeout,
+}
+
 /// A texture allocator using the guillotine algorithm with the rectangle merge improvement. See
 /// sections 2.2 and 2.2.5 in "A Thousand Ways to Pack the Bin - A Practical Approach to Two-
 /// Dimensional Rectangle Bin Packing":
 ///
 ///    http://clb.demon.fi/files/RectangleBinPack.pdf
 ///
 /// This approach was chosen because of its simplicity, good performance, and easy support for
 /// dynamic texture deallocation.
 pub struct TexturePage {
     texture_id: CacheTextureId,
     texture_size: DeviceUintSize,
     free_list: FreeRectList,
+    coalesce_vec: Vec<DeviceUintRect>,
     allocations: u32,
     dirty: bool,
 }
 
 impl TexturePage {
     pub fn new(texture_id: CacheTextureId, texture_size: DeviceUintSize) -> TexturePage {
         let mut page = TexturePage {
             texture_id: texture_id,
             texture_size: texture_size,
             free_list: FreeRectList::new(),
+            coalesce_vec: Vec::new(),
             allocations: 0,
             dirty: false,
         };
         page.clear();
         page
     }
 
     fn find_index_of_best_rect_in_bin(&self, bin: FreeListBin, requested_dimensions: &DeviceUintSize)
@@ -112,16 +120,19 @@ impl TexturePage {
     }
 
     pub fn can_allocate(&self, requested_dimensions: &DeviceUintSize) -> bool {
         self.find_index_of_best_rect(requested_dimensions).is_some()
     }
 
     pub fn allocate(&mut self,
                     requested_dimensions: &DeviceUintSize) -> Option<DeviceUintPoint> {
+        if requested_dimensions.width == 0 || requested_dimensions.height == 0 {
+            return Some(DeviceUintPoint::new(0, 0))
+        }
         let index = match self.find_index_of_best_rect(requested_dimensions) {
             None => return None,
             Some(index) => index,
         };
 
         // Remove the rect from the free list and decide how to guillotine it. We choose the split
         // that results in the single largest area (Min Area Split Rule, MINAS).
         let chosen_rect = self.free_list.remove(index);
@@ -168,116 +179,127 @@ impl TexturePage {
 
         // Bump the allocation counter.
         self.allocations += 1;
 
         // Return the result.
         Some(chosen_rect.origin)
     }
 
-    #[inline(never)]
+    fn coalesce_impl<F, U>(rects: &mut [DeviceUintRect], deadline: u64, fun_key: F, fun_union: U)
+                    -> CoalescingStatus where
+        F: Fn(&DeviceUintRect) -> (u32, u32),
+        U: Fn(&mut DeviceUintRect, &mut DeviceUintRect) -> usize,
+    {
+        let mut num_changed = 0;
+        rects.sort_by_key(&fun_key);
+
+        for work_index in 0..rects.len() {
+            if work_index % COALESCING_TIMEOUT_CHECKING_INTERVAL == 0 &&
+                    time::precise_time_ns() >= deadline {
+                return CoalescingStatus::Timeout
+            }
+
+            let (left, candidates) = rects.split_at_mut(work_index + 1);
+            let mut item = left.last_mut().unwrap();
+            if util::rect_is_empty(item) {
+                continue
+            }
+
+            let key = fun_key(item);
+            for candidate in candidates.iter_mut()
+                                       .take_while(|r| key == fun_key(r)) {
+                num_changed += fun_union(item, candidate);
+            }
+        }
+
+        if num_changed > 0 {
+            CoalescingStatus::Changed
+        } else {
+            CoalescingStatus::Unchanged
+        }
+    }
+
+    /// Combine rects that have the same width and are adjacent.
+    fn coalesce_horisontal(rects: &mut [DeviceUintRect], deadline: u64) -> CoalescingStatus {
+        Self::coalesce_impl(rects, deadline,
+                            |item| (item.size.width, item.origin.x),
+                            |item, candidate| {
+            if item.origin.y == candidate.max_y() || item.max_y() == candidate.origin.y {
+                *item = item.union(candidate);
+                candidate.size.width = 0;
+                1
+            } else { 0 }
+        })
+    }
+
+    /// Combine rects that have the same height and are adjacent.
+    fn coalesce_vertical(rects: &mut [DeviceUintRect], deadline: u64) -> CoalescingStatus {
+        Self::coalesce_impl(rects, deadline,
+                            |item| (item.size.height, item.origin.y),
+                            |item, candidate| {
+            if item.origin.x == candidate.max_x() || item.max_x() == candidate.origin.x {
+                *item = item.union(candidate);
+                candidate.size.height = 0;
+                1
+            } else { 0 }
+        })
+    }
+
     pub fn coalesce(&mut self) -> bool {
         if !self.dirty {
             return false
         }
 
         // Iterate to a fixed point or until a timeout is reached.
         let deadline = time::precise_time_ns() + COALESCING_TIMEOUT;
-        let mut free_list = mem::replace(&mut self.free_list, FreeRectList::new()).into_vec();
+        self.free_list.copy_to_vec(&mut self.coalesce_vec);
         let mut changed = false;
 
-        // Combine rects that have the same width and are adjacent.
-        let mut new_free_list = Vec::new();
-        free_list.sort_by(|a, b| {
-            match a.size.width.cmp(&b.size.width) {
-                Ordering::Equal => a.origin.x.cmp(&b.origin.x),
-                ordering => ordering,
-            }
-        });
-        for work_index in 0..free_list.len() {
-            if work_index % COALESCING_TIMEOUT_CHECKING_INTERVAL == 0 &&
-                    time::precise_time_ns() >= deadline {
-                self.free_list = FreeRectList::from_slice(&free_list[..]);
-                self.dirty = true;
+        //Note: we might want to consider try to use the last sorted order first
+        // but the elements get shuffled around a bit anyway during the bin placement
+
+        match Self::coalesce_horisontal(&mut self.coalesce_vec, deadline) {
+            CoalescingStatus::Changed => changed = true,
+            CoalescingStatus::Unchanged => (),
+            CoalescingStatus::Timeout => {
+                self.free_list.init_from_slice(&self.coalesce_vec);
                 return true
             }
+        }
 
-            if free_list[work_index].size.width == 0 {
-                continue
-            }
-            for candidate_index in (work_index + 1)..free_list.len() {
-                if free_list[work_index].size.width != free_list[candidate_index].size.width ||
-                        free_list[work_index].origin.x != free_list[candidate_index].origin.x {
-                    break
-                }
-                if free_list[work_index].origin.y == free_list[candidate_index].max_y() ||
-                        free_list[work_index].max_y() == free_list[candidate_index].origin.y {
-                    changed = true;
-                    free_list[work_index] =
-                        free_list[work_index].union(&free_list[candidate_index]);
-                    free_list[candidate_index].size.width = 0
-                }
-                new_free_list.push(free_list[work_index])
-            }
-            new_free_list.push(free_list[work_index])
-        }
-        free_list = new_free_list;
-
-        // Combine rects that have the same height and are adjacent.
-        let mut new_free_list = Vec::new();
-        free_list.sort_by(|a, b| {
-            match a.size.height.cmp(&b.size.height) {
-                Ordering::Equal => a.origin.y.cmp(&b.origin.y),
-                ordering => ordering,
-            }
-        });
-        for work_index in 0..free_list.len() {
-            if work_index % COALESCING_TIMEOUT_CHECKING_INTERVAL == 0 &&
-                    time::precise_time_ns() >= deadline {
-                self.free_list = FreeRectList::from_slice(&free_list[..]);
-                self.dirty = true;
+        match Self::coalesce_vertical(&mut self.coalesce_vec, deadline) {
+            CoalescingStatus::Changed => changed = true,
+            CoalescingStatus::Unchanged => (),
+            CoalescingStatus::Timeout => {
+                self.free_list.init_from_slice(&self.coalesce_vec);
                 return true
             }
+        }
 
-            if free_list[work_index].size.height == 0 {
-                continue
-            }
-            for candidate_index in (work_index + 1)..free_list.len() {
-                if free_list[work_index].size.height !=
-                        free_list[candidate_index].size.height ||
-                        free_list[work_index].origin.y != free_list[candidate_index].origin.y {
-                    break
-                }
-                if free_list[work_index].origin.x == free_list[candidate_index].max_x() ||
-                        free_list[work_index].max_x() == free_list[candidate_index].origin.x {
-                    changed = true;
-                    free_list[work_index] =
-                        free_list[work_index].union(&free_list[candidate_index]);
-                    free_list[candidate_index].size.height = 0
-                }
-            }
-            new_free_list.push(free_list[work_index])
+        if changed {
+            self.free_list.init_from_slice(&self.coalesce_vec);
         }
-        free_list = new_free_list;
-
-        self.free_list = FreeRectList::from_slice(&free_list[..]);
         self.dirty = changed;
         changed
     }
 
     pub fn clear(&mut self) {
         self.free_list = FreeRectList::new();
         self.free_list.push(&DeviceUintRect::new(
             DeviceUintPoint::zero(),
             self.texture_size));
         self.allocations = 0;
         self.dirty = false;
     }
 
     fn free(&mut self, rect: &DeviceUintRect) {
+        if util::rect_is_empty(rect) {
+            return
+        }
         debug_assert!(self.allocations > 0);
         self.allocations -= 1;
         if self.allocations == 0 {
             self.clear();
             return
         }
 
         self.free_list.push(rect);
@@ -307,16 +329,35 @@ impl TexturePage {
         self.texture_size = new_texture_size
     }
 
     fn can_grow(&self, max_size: u32) -> bool {
         self.texture_size.width < max_size || self.texture_size.height < max_size
     }
 }
 
+// testing functionality
+impl TexturePage {
+    #[doc(hidden)]
+    pub fn new_dummy(size: DeviceUintSize) -> TexturePage {
+        Self::new(CacheTextureId(0), size)
+    }
+
+    #[doc(hidden)]
+    pub fn fill_from(&mut self, other: &TexturePage) {
+        self.dirty = true;
+        self.free_list.small.clear();
+        self.free_list.small.extend_from_slice(&other.free_list.small);
+        self.free_list.medium.clear();
+        self.free_list.medium.extend_from_slice(&other.free_list.medium);
+        self.free_list.large.clear();
+        self.free_list.large.extend_from_slice(&other.free_list.large);
+    }
+}
+
 /// A binning free list. Binning is important to avoid sifting through lots of small strips when
 /// allocating many texture items.
 struct FreeRectList {
     small: Vec<DeviceUintRect>,
     medium: Vec<DeviceUintRect>,
     large: Vec<DeviceUintRect>,
 }
 
@@ -324,22 +365,25 @@ impl FreeRectList {
     fn new() -> FreeRectList {
         FreeRectList {
             small: vec![],
             medium: vec![],
             large: vec![],
         }
     }
 
-    fn from_slice(vector: &[DeviceUintRect]) -> FreeRectList {
-        let mut free_list = FreeRectList::new();
-        for rect in vector {
-            free_list.push(rect)
+    fn init_from_slice(&mut self, rects: &[DeviceUintRect]) {
+        self.small.clear();
+        self.medium.clear();
+        self.large.clear();
+        for rect in rects {
+            if !util::rect_is_empty(rect) {
+                self.push(rect)
+            }
         }
-        free_list
     }
 
     fn push(&mut self, rect: &DeviceUintRect) {
         match FreeListBin::for_size(&rect.size) {
             FreeListBin::Small => self.small.push(*rect),
             FreeListBin::Medium => self.medium.push(*rect),
             FreeListBin::Large => self.large.push(*rect),
         }
@@ -356,41 +400,43 @@ impl FreeRectList {
     fn iter(&self, bin: FreeListBin) -> Iter<DeviceUintRect> {
         match bin {
             FreeListBin::Small => self.small.iter(),
             FreeListBin::Medium => self.medium.iter(),
             FreeListBin::Large => self.large.iter(),
         }
     }
 
-    fn into_vec(mut self) -> Vec<DeviceUintRect> {
-        self.small.extend(self.medium.drain(..));
-        self.small.extend(self.large.drain(..));
-        self.small
+    fn copy_to_vec(&self, rects: &mut Vec<DeviceUintRect>) {
+        rects.clear();
+        rects.extend_from_slice(&self.small);
+        rects.extend_from_slice(&self.medium);
+        rects.extend_from_slice(&self.large);
     }
 }
 
 #[derive(Debug, Clone, Copy)]
 struct FreeListIndex(FreeListBin, usize);
 
 #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
 enum FreeListBin {
     Small,
     Medium,
     Large,
 }
 
 impl FreeListBin {
-    pub fn for_size(size: &DeviceUintSize) -> FreeListBin {
+    fn for_size(size: &DeviceUintSize) -> FreeListBin {
         if size.width >= MINIMUM_LARGE_RECT_SIZE && size.height >= MINIMUM_LARGE_RECT_SIZE {
             FreeListBin::Large
         } else if size.width >= MINIMUM_MEDIUM_RECT_SIZE &&
                 size.height >= MINIMUM_MEDIUM_RECT_SIZE {
             FreeListBin::Medium
         } else {
+            debug_assert!(size.width > 0 && size.height > 0);
             FreeListBin::Small
         }
     }
 }
 
 #[derive(Debug, Clone)]
 pub struct TextureCacheItem {
     // Identifies the texture and array slice
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -309,80 +309,75 @@ impl AlphaRenderItem {
                     LowLevelFilterOp::Sepia(amount) => (6, amount.to_f32_px()),
                     LowLevelFilterOp::Brightness(amount) => (7, amount.to_f32_px()),
                     LowLevelFilterOp::Opacity(amount) => (8, amount.to_f32_px()),
                 };
 
                 let amount = (amount * 65535.0).round() as i32;
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
 
-                batch.add_instance(PrimitiveInstance {
-                    global_prim_id: -1,
-                    prim_address: GpuStoreAddress(0),
-                    task_index: task_index.0 as i32,
-                    clip_task_index: -1,
-                    layer_index: -1,
-                    sub_index: filter_mode,
-                    user_data: [src_task_index.0 as i32, amount],
-                    z_sort_index: z,
-                });
+                let instance = CompositePrimitiveInstance::new(task_index,
+                                                               src_task_index,
+                                                               RenderTaskIndex(0),
+                                                               filter_mode,
+                                                               amount,
+                                                               z);
+
+                batch.add_instance(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::HardwareComposite(stacking_context_index, src_id, composite_op, z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let src_task_index = render_tasks.get_static_task_index(&src_id);
                 let key = AlphaBatchKey::new(AlphaBatchKind::HardwareComposite,
                                              AlphaBatchKeyFlags::empty(),
                                              composite_op.to_blend_mode(),
                                              BatchTextures::no_texture());
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
-                batch.add_instance(PrimitiveInstance {
-                    global_prim_id: -1,
-                    prim_address: GpuStoreAddress(0),
-                    task_index: task_index.0 as i32,
-                    clip_task_index: -1,
-                    layer_index: -1,
-                    sub_index: -1,
-                    user_data: [src_task_index.0 as i32, 0],
-                    z_sort_index: z,
-                });
+
+                let instance = CompositePrimitiveInstance::new(task_index,
+                                                               src_task_index,
+                                                               RenderTaskIndex(0),
+                                                               0,
+                                                               0,
+                                                               z);
+
+                batch.add_instance(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Composite(stacking_context_index,
                                        backdrop_id,
                                        src_id,
                                        mode,
                                        z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let key = AlphaBatchKey::new(AlphaBatchKind::Composite,
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::Alpha,
                                              BatchTextures::no_texture());
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
                 let backdrop_task = render_tasks.get_task_index(&backdrop_id, child_pass_index);
                 let src_task_index = render_tasks.get_static_task_index(&src_id);
-                batch.add_instance(PrimitiveInstance {
-                    global_prim_id: -1,
-                    prim_address: GpuStoreAddress(0),
-                    task_index: task_index.0 as i32,
-                    clip_task_index: -1,
-                    layer_index: -1,
-                    sub_index: mode as u32 as i32,
-                    user_data: [ backdrop_task.0 as i32,
-                                 src_task_index.0 as i32 ],
-                    z_sort_index: z,
-                });
+
+                let instance = CompositePrimitiveInstance::new(task_index,
+                                                               src_task_index,
+                                                               backdrop_task,
+                                                               mode as u32 as i32,
+                                                               0,
+                                                               z);
+
+                batch.add_instance(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Primitive(clip_scroll_group_index_opt, prim_index, z) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                 let (transform_kind, packed_layer_index) = match clip_scroll_group_index_opt {
                     Some(group_index) => {
                         let group = &ctx.clip_scroll_group_store[group_index.0];
                         let bounding_rect = group.screen_bounding_rect.as_ref().unwrap();
-                        (bounding_rect.0, group.packed_layer_index.0 as i32)
+                        (bounding_rect.0, group.packed_layer_index)
                     },
-                    None => (TransformedRectKind::AxisAligned, 0),
+                    None => (TransformedRectKind::AxisAligned, PackedLayerIndex(0)),
                 };
                 let needs_clipping = prim_metadata.needs_clipping();
                 let mut flags = AlphaBatchKeyFlags::empty();
                 if needs_clipping {
                     flags |= NEEDS_CLIPPING;
                 }
                 if transform_kind == TransformedRectKind::AxisAligned {
                     flags |= AXIS_ALIGNED;
@@ -393,73 +388,64 @@ impl AlphaRenderItem {
                 let item_bounding_rect = ctx.prim_store.cpu_bounding_rects[prim_index.0].as_ref().unwrap();
                 let clip_task_index = match prim_metadata.clip_task {
                     Some(ref clip_task) => {
                         render_tasks.get_task_index(&clip_task.id, child_pass_index)
                     }
                     None => {
                         OPAQUE_TASK_INDEX
                     }
-                }.0 as i32;
-                let global_prim_id = prim_index.0 as i32;
-                let prim_address = prim_metadata.gpu_prim_index;
-                let task_index = task_index.0 as i32;
+                };
                 let needs_blending = !prim_metadata.is_opaque ||
                                      needs_clipping ||
                                      transform_kind == TransformedRectKind::Complex;
                 let blend_mode = ctx.prim_store.get_blend_mode(needs_blending, prim_metadata);
-                let base_instance = PrimitiveInstance {
-                    task_index: task_index,
-                    clip_task_index: clip_task_index,
-                    layer_index: packed_layer_index,
-                    global_prim_id: global_prim_id,
-                    prim_address: prim_address,
-                    sub_index: 0,
-                    user_data: [0, 0],
-                    z_sort_index: z,
-                };
+
+                let base_instance = SimplePrimitiveInstance::new(prim_index,
+                                                                 prim_metadata.gpu_prim_index,
+                                                                 task_index,
+                                                                 clip_task_index,
+                                                                 packed_layer_index,
+                                                                 z);
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Border => {
                         let border_cpu = &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
                         // TODO(gw): Select correct blend mode for edges and corners!!
                         let corner_key = AlphaBatchKey::new(AlphaBatchKind::BorderCorner, flags, blend_mode, textures);
                         let edge_key = AlphaBatchKey::new(AlphaBatchKind::BorderEdge, flags, blend_mode, textures);
 
                         batch_list.with_suitable_batch(&corner_key, item_bounding_rect, |batch| {
                             for (i, instance_kind) in border_cpu.corner_instances.iter().enumerate() {
                                 let sub_index = i as i32;
                                 match *instance_kind {
                                     BorderCornerInstance::Single => {
                                         batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::Both as i32,
-                                                                               0));
+                                                                               BorderCornerSide::Both as i32,));
                                     }
                                     BorderCornerInstance::Double => {
                                         batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::First as i32,
-                                                                               0));
+                                                                               BorderCornerSide::First as i32));
                                         batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::Second as i32,
-                                                                               0));
+                                                                               BorderCornerSide::Second as i32));
                                     }
                                 }
                             }
                         });
 
                         batch_list.with_suitable_batch(&edge_key, item_bounding_rect, |batch| {
                             for border_segment in 0..4 {
-                                batch.add_instance(base_instance.build(border_segment, 0, 0));
+                                batch.add_instance(base_instance.build(border_segment, 0));
                             }
                         });
                     }
                     PrimitiveKind::Rectangle => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::Rectangle, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance);
+                        batch.add_instance(base_instance.build(0, 0));
                     }
                     PrimitiveKind::Image => {
                         let image_cpu = &ctx.prim_store.cpu_images[prim_metadata.cpu_prim_index.0];
 
                         let batch_kind = match image_cpu.color_texture_id {
                             SourceTexture::External(ext_image) => {
                                 match ext_image.image_type {
                                     ExternalImageType::Texture2DHandle => AlphaBatchKind::Image(ImageBufferKind::Texture2D),
@@ -474,17 +460,17 @@ impl AlphaRenderItem {
                             }
                             _ => {
                                 AlphaBatchKind::Image(ImageBufferKind::Texture2D)
                             }
                         };
 
                         let key = AlphaBatchKey::new(batch_kind, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, image_cpu.resource_address.0, 0));
+                        batch.add_instance(base_instance.build(image_cpu.resource_address.0, 0));
                     }
                     PrimitiveKind::TextRun => {
                         let text_cpu = &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
                         let batch_kind = if text_cpu.blur_radius == 0.0 {
                             AlphaBatchKind::TextRun
                         } else {
                             // Select a generic primitive shader that can blit the
                             // results of the cached text blur to the framebuffer,
@@ -499,46 +485,43 @@ impl AlphaRenderItem {
                                 let cache_task_id = task.id;
                                 render_tasks.get_task_index(&cache_task_id,
                                                             child_pass_index).0 as i32
                             }
                             None => 0,
                         };
 
                         for glyph_index in 0..prim_metadata.gpu_data_count {
-                            let user_data0 = match batch_kind {
+                            let user_data1 = match batch_kind {
                                 AlphaBatchKind::TextRun => text_cpu.resource_address.0 + glyph_index,
                                 AlphaBatchKind::CacheImage => cache_task_index,
                                 _ => unreachable!(),
                             };
 
                             batch.add_instance(base_instance.build(prim_metadata.gpu_data_address.0 + glyph_index,
-                                                                   user_data0,
-                                                                   0));
+                                                                   user_data1));
                         }
                     }
                     PrimitiveKind::AlignedGradient => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::AlignedGradient, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
                         for part_index in 0..(prim_metadata.gpu_data_count - 1) {
-                            batch.add_instance(base_instance.build(prim_metadata.gpu_data_address.0 + part_index, 0, 0));
+                            batch.add_instance(base_instance.build(prim_metadata.gpu_data_address.0 + part_index, 0));
                         }
                     }
                     PrimitiveKind::AngleGradient => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::AngleGradient, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
                         batch.add_instance(base_instance.build(prim_metadata.gpu_data_address.0,
-                                                               prim_metadata.gpu_data_count,
                                                                0));
                     }
                     PrimitiveKind::RadialGradient => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::RadialGradient, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
                         batch.add_instance(base_instance.build(prim_metadata.gpu_data_address.0,
-                                                               prim_metadata.gpu_data_count,
                                                                0));
                     }
                     PrimitiveKind::YuvImage => {
                         let image_yuv_cpu = &ctx.prim_store.cpu_yuv_images[prim_metadata.cpu_prim_index.0];
 
                         let get_buffer_kind = |texture: SourceTexture| {
                             match texture {
                                 SourceTexture::External(ext_image) => {
@@ -566,54 +549,51 @@ impl AlphaRenderItem {
                         ));
 
                         let key = AlphaBatchKey::new(AlphaBatchKind::YuvImage(buffer_kind, image_yuv_cpu.format, image_yuv_cpu.color_space),
                                                      flags,
                                                      blend_mode,
                                                      textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
-                        batch.add_instance(base_instance.build(0,
-                                                               image_yuv_cpu.yuv_resource_address.0,
+                        batch.add_instance(base_instance.build(image_yuv_cpu.yuv_resource_address.0,
                                                                0));
                     }
                     PrimitiveKind::BoxShadow => {
                         let cache_task_id = &prim_metadata.render_task.as_ref().unwrap().id;
                         let cache_task_index = render_tasks.get_task_index(cache_task_id,
                                                                            child_pass_index);
 
                         let key = AlphaBatchKey::new(AlphaBatchKind::BoxShadow, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
                         for rect_index in 0..prim_metadata.gpu_data_count {
                             batch.add_instance(base_instance.build(prim_metadata.gpu_data_address.0 + rect_index,
-                                                                   cache_task_index.0 as i32,
-                                                                   0));
+                                                                   cache_task_index.0 as i32));
                         }
                     }
                 }
             }
             AlphaRenderItem::SplitComposite(sc_index, task_id, gpu_address, z) => {
                 let key = AlphaBatchKey::new(AlphaBatchKind::SplitComposite,
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::PremultipliedAlpha,
                                              BatchTextures::no_texture());
                 let stacking_context = &ctx.stacking_context_store[sc_index.0];
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
                 let source_task = render_tasks.get_task_index(&task_id, child_pass_index);
-                batch.add_instance(PrimitiveInstance {
-                    global_prim_id: -1,
-                    prim_address: gpu_address,
-                    task_index: task_index.0 as i32,
-                    clip_task_index: -1,
-                    layer_index: -1, // not be used
-                    sub_index: 0,
-                    user_data: [ source_task.0 as i32, 0 ],
-                    z_sort_index: z,
-                });
+
+                let instance = CompositePrimitiveInstance::new(task_index,
+                                                               source_task,
+                                                               RenderTaskIndex(0),
+                                                               gpu_address.0,
+                                                               0,
+                                                               z);
+
+                batch.add_instance(PrimitiveInstance::from(instance));
             }
         }
     }
 }
 
 impl AlphaBatcher {
     fn new() -> AlphaBatcher {
         AlphaBatcher {
@@ -965,26 +945,23 @@ impl RenderTarget for ColorRenderTarget 
                     padding: 0,
                 });
             }
             RenderTaskKind::CachePrimitive(prim_index) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::BoxShadow => {
-                        self.box_shadow_cache_prims.push(PrimitiveInstance {
-                            global_prim_id: prim_index.0 as i32,
-                            prim_address: prim_metadata.gpu_prim_index,
-                            task_index: render_tasks.get_task_index(&task.id, pass_index).0 as i32,
-                            clip_task_index: 0,
-                            layer_index: 0,
-                            sub_index: 0,
-                            user_data: [0; 2],
-                            z_sort_index: 0,        // z is disabled for rendering cache primitives
-                        });
+                        let instance = SimplePrimitiveInstance::new(prim_index,
+                                                                    prim_metadata.gpu_prim_index,
+                                                                    render_tasks.get_task_index(&task.id, pass_index),
+                                                                    RenderTaskIndex(0),
+                                                                    PackedLayerIndex(0),
+                                                                    0);     // z is disabled for rendering cache primitives
+                        self.box_shadow_cache_prims.push(instance.build(0, 0));
                     }
                     PrimitiveKind::TextRun => {
                         let text = &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
                         // We only cache text runs with a text-shadow (for now).
                         debug_assert!(text.blur_radius != 0.0);
 
                         // TODO(gw): This should always be fine for now, since the texture
                         // atlas grows to 4k. However, it won't be a problem soon, once
@@ -993,27 +970,26 @@ impl RenderTarget for ColorRenderTarget 
                             colors: ctx.prim_store.get_color_textures(prim_metadata),
                         };
 
                         debug_assert!(textures.colors[0] != SourceTexture::Invalid);
                         debug_assert!(self.text_run_textures.colors[0] == SourceTexture::Invalid ||
                                       self.text_run_textures.colors[0] == textures.colors[0]);
                         self.text_run_textures = textures;
 
+                        let instance = SimplePrimitiveInstance::new(prim_index,
+                                                                    prim_metadata.gpu_prim_index,
+                                                                    render_tasks.get_task_index(&task.id, pass_index),
+                                                                    RenderTaskIndex(0),
+                                                                    PackedLayerIndex(0),
+                                                                    0);     // z is disabled for rendering cache primitives
+
                         for glyph_index in 0..prim_metadata.gpu_data_count {
-                            self.text_run_cache_prims.push(PrimitiveInstance {
-                                global_prim_id: prim_index.0 as i32,
-                                prim_address: prim_metadata.gpu_prim_index,
-                                task_index: render_tasks.get_task_index(&task.id, pass_index).0 as i32,
-                                clip_task_index: 0,
-                                layer_index: 0,
-                                sub_index: prim_metadata.gpu_data_address.0 + glyph_index,
-                                user_data: [ text.resource_address.0 + glyph_index, 0],
-                                z_sort_index: 0,        // z is disabled for rendering cache primitives
-                            });
+                            self.text_run_cache_prims.push(instance.build(prim_metadata.gpu_data_address.0 + glyph_index,
+                                                                          text.resource_address.0 + glyph_index));
                         }
                     }
                     _ => {
                         // No other primitives make use of primitive caching yet!
                         unreachable!()
                     }
                 }
             }
@@ -1284,37 +1260,117 @@ pub struct BlurCommand {
 #[derive(Clone, Copy, Debug)]
 pub struct CacheClipInstance {
     task_id: i32,
     layer_index: i32,
     address: GpuStoreAddress,
     segment: i32,
 }
 
+// 32 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
 pub struct PrimitiveInstance {
-    global_prim_id: i32,
-    prim_address: GpuStoreAddress,
+    data: [i32; 8],
+}
+
+struct SimplePrimitiveInstance {
+    pub global_prim_index: i32,
+    pub specific_prim_address: i32,
     pub task_index: i32,
-    clip_task_index: i32,
-    layer_index: i32,
-    sub_index: i32,
-    z_sort_index: i32,
-    pub user_data: [i32; 2],
+    pub clip_task_index: i32,
+    pub layer_index: i32,
+    pub z_sort_index: i32,
+}
+
+impl SimplePrimitiveInstance {
+    fn new(prim_index: PrimitiveIndex,
+           specific_prim_address: GpuStoreAddress,
+           task_index: RenderTaskIndex,
+           clip_task_index: RenderTaskIndex,
+           layer_index: PackedLayerIndex,
+           z_sort_index: i32) -> SimplePrimitiveInstance {
+        SimplePrimitiveInstance {
+            global_prim_index: prim_index.0 as i32,
+            specific_prim_address: specific_prim_address.0 as i32,
+            task_index: task_index.0 as i32,
+            clip_task_index: clip_task_index.0 as i32,
+            layer_index: layer_index.0 as i32,
+            z_sort_index: z_sort_index,
+        }
+    }
+
+    fn build(&self, data0: i32, data1: i32) -> PrimitiveInstance {
+        PrimitiveInstance {
+            data: [
+                self.global_prim_index,
+                self.specific_prim_address,
+                self.task_index,
+                self.clip_task_index,
+                self.layer_index,
+                self.z_sort_index,
+                data0,
+                data1,
+            ]
+        }
+    }
 }
 
-impl PrimitiveInstance {
-    pub fn build(&self,
-                 sub_index: i32,
-                 user_data0: i32,
-                 user_data1: i32) -> PrimitiveInstance {
+pub struct CompositePrimitiveInstance {
+    pub task_index: RenderTaskIndex,
+    pub src_task_index: RenderTaskIndex,
+    pub backdrop_task_index: RenderTaskIndex,
+    pub data0: i32,
+    pub data1: i32,
+    pub z: i32,
+}
+
+impl CompositePrimitiveInstance {
+    fn new(task_index: RenderTaskIndex,
+           src_task_index: RenderTaskIndex,
+           backdrop_task_index: RenderTaskIndex,
+           data0: i32,
+           data1: i32,
+           z: i32) -> CompositePrimitiveInstance {
+        CompositePrimitiveInstance {
+            task_index: task_index,
+            src_task_index: src_task_index,
+            backdrop_task_index: backdrop_task_index,
+            data0: data0,
+            data1: data1,
+            z: z,
+        }
+    }
+}
+
+impl From<CompositePrimitiveInstance> for PrimitiveInstance {
+    fn from(instance: CompositePrimitiveInstance) -> PrimitiveInstance {
         PrimitiveInstance {
-            sub_index: sub_index,
-            user_data: [user_data0, user_data1],
-            ..*self
+            data: [
+                instance.task_index.0 as i32,
+                instance.src_task_index.0 as i32,
+                instance.backdrop_task_index.0 as i32,
+                instance.z,
+                instance.data0,
+                instance.data1,
+                0,
+                0,
+            ]
+        }
+    }
+}
+
+impl<'a> From<&'a PrimitiveInstance> for CompositePrimitiveInstance {
+    fn from(instance: &'a PrimitiveInstance) -> CompositePrimitiveInstance {
+        CompositePrimitiveInstance {
+            task_index: RenderTaskIndex(instance.data[0] as usize),
+            src_task_index: RenderTaskIndex(instance.data[1] as usize),
+            backdrop_task_index: RenderTaskIndex(instance.data[2] as usize),
+            z: instance.data[3],
+            data0: instance.data[4],
+            data1: instance.data[5],
         }
     }
 }
 
 #[derive(Debug)]
 pub struct PrimitiveBatch {
     pub key: AlphaBatchKey,
     pub instances: Vec<PrimitiveInstance>,
@@ -1452,26 +1508,24 @@ impl ClipScrollGroup {
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct PackedLayer {
     pub transform: LayerToWorldTransform,
     pub inv_transform: WorldToLayerTransform,
     pub local_clip_rect: LayerRect,
-    pub screen_vertices: [WorldPoint4D; 4],
 }
 
 impl Default for PackedLayer {
     fn default() -> PackedLayer {
         PackedLayer {
             transform: LayerToWorldTransform::identity(),
             inv_transform: WorldToLayerTransform::identity(),
             local_clip_rect: LayerRect::zero(),
-            screen_vertices: [WorldPoint4D::zero(); 4],
         }
     }
 }
 
 impl PackedLayer {
     pub fn empty() -> PackedLayer {
         Default::default()
     }
@@ -1483,17 +1537,16 @@ impl PackedLayer {
 
     pub fn set_rect(&mut self,
                     local_rect: &LayerRect,
                     screen_rect: &DeviceIntRect,
                     device_pixel_ratio: f32)
                     -> Option<(TransformedRectKind, DeviceIntRect)> {
         let xf_rect = TransformedRect::new(&local_rect, &self.transform, device_pixel_ratio);
         xf_rect.bounding_rect.intersection(screen_rect).map(|rect| {
-            self.screen_vertices = xf_rect.vertices.clone();
             self.local_clip_rect = *local_rect;
             (xf_rect.kind, rect)
         })
     }
 }
 
 #[derive(Debug, Clone)]
 pub struct CompositeOps {
--- a/gfx/webrender_bindings/WebRenderAPI.cpp
+++ b/gfx/webrender_bindings/WebRenderAPI.cpp
@@ -1,24 +1,30 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "WebRenderAPI.h"
+#include "LayersLogging.h"
 #include "mozilla/webrender/RendererOGL.h"
 #include "mozilla/gfx/gfxVars.h"
 #include "mozilla/layers/CompositorThread.h"
 #include "mozilla/widget/CompositorWidget.h"
 #include "mozilla/widget/CompositorWidget.h"
 #include "mozilla/layers/SynchronousTask.h"
 
+#define WRDL_LOG(...)
+//#define WRDL_LOG(...) printf_stderr("WRDL: " __VA_ARGS__)
+
 namespace mozilla {
 namespace wr {
 
+using layers::Stringify;
+
 class NewRenderer : public RendererEvent
 {
 public:
   NewRenderer(WrAPI** aApi, layers::CompositorBridgeParentBase* aBridge,
               GLint* aMaxTextureSize,
               bool* aUseANGLE,
               RefPtr<widget::CompositorWidget>&& aWidget,
               layers::SynchronousTask* aTask,
@@ -551,16 +557,18 @@ DisplayListBuilder::PushStackingContext(
                                         const gfx::Matrix4x4* aTransform,
                                         const WrMixBlendMode& aMixBlendMode)
 {
   WrMatrix matrix;
   if (aTransform) {
     matrix = ToWrMatrix(*aTransform);
   }
   const WrMatrix* maybeTransform = aTransform ? &matrix : nullptr;
+  WRDL_LOG("PushStackingContext b=%s t=%s\n", Stringify(aBounds).c_str(),
+      aTransform ? Stringify(*aTransform).c_str() : "none");
   wr_dp_push_stacking_context(mWrState, aBounds, aAnimationId, aOpacity,
                               maybeTransform, aMixBlendMode);
 }
 
 void
 DisplayListBuilder::PushStackingContext(const WrRect& aBounds,
                                         const float aOpacity,
                                         const gfx::Matrix4x4& aTransform,
@@ -568,59 +576,68 @@ DisplayListBuilder::PushStackingContext(
 {
   PushStackingContext(aBounds, 0, &aOpacity,
                       &aTransform, aMixBlendMode);
 }
 
 void
 DisplayListBuilder::PopStackingContext()
 {
+  WRDL_LOG("PopStackingContext\n");
   wr_dp_pop_stacking_context(mWrState);
 }
 
 void
 DisplayListBuilder::PushClip(const WrRect& aClipRect,
                              const WrImageMask* aMask)
 {
+  WRDL_LOG("PushClip r=%s m=%p\n", Stringify(aClipRect).c_str(), aMask);
   wr_dp_push_clip(mWrState, aClipRect, aMask);
 }
 
 void
 DisplayListBuilder::PopClip()
 {
+  WRDL_LOG("PopClip\n");
   wr_dp_pop_clip(mWrState);
 }
 
 void
 DisplayListBuilder::PushBuiltDisplayList(BuiltDisplayList dl)
 {
   wr_dp_push_built_display_list(mWrState,
                                 dl.dl_desc,
                                 dl.dl.Extract());
 }
 
 void
 DisplayListBuilder::PushScrollLayer(const layers::FrameMetrics::ViewID& aScrollId,
                                     const WrRect& aContentRect,
                                     const WrRect& aClipRect)
 {
+  WRDL_LOG("PushScrollLayer id=%" PRIu64 " co=%s cl=%s\n",
+      aScrollId, Stringify(aContentRect).c_str(), Stringify(aClipRect).c_str());
   wr_dp_push_scroll_layer(mWrState, aScrollId, aContentRect, aClipRect);
 }
 
 void
 DisplayListBuilder::PopScrollLayer()
 {
+  WRDL_LOG("PopScrollLayer\n");
   wr_dp_pop_scroll_layer(mWrState);
 }
 
 void
 DisplayListBuilder::PushRect(const WrRect& aBounds,
                              const WrClipRegionToken aClip,
                              const WrColor& aColor)
 {
+  WRDL_LOG("PushRect b=%s c=%s\n",
+      Stringify(aBounds).c_str(),
+      Stringify(aColor).c_str());
   wr_dp_push_rect(mWrState, aBounds, aClip, aColor);
 }
 
 void
 DisplayListBuilder::PushLinearGradient(const WrRect& aBounds,
                                        const WrClipRegionToken aClip,
                                        const WrPoint& aStartPoint,
                                        const WrPoint& aEndPoint,
@@ -670,16 +687,18 @@ DisplayListBuilder::PushImage(const WrRe
 void
 DisplayListBuilder::PushImage(const WrRect& aBounds,
                               const WrClipRegionToken aClip,
                               const WrSize& aStretchSize,
                               const WrSize& aTileSpacing,
                               wr::ImageRendering aFilter,
                               wr::ImageKey aImage)
 {
+  WRDL_LOG("PushImage b=%s s=%s t=%s\n", Stringify(aBounds).c_str(),
+      Stringify(aStretchSize).c_str(), Stringify(aTileSpacing).c_str());
   wr_dp_push_image(mWrState, aBounds, aClip, aStretchSize, aTileSpacing, aFilter, aImage);
 }
 
 void
 DisplayListBuilder::PushYCbCrPlanarImage(const WrRect& aBounds,
                                          const WrClipRegionToken aClip,
                                          wr::ImageKey aImageChannel0,
                                          wr::ImageKey aImageChannel1,
@@ -823,27 +842,30 @@ DisplayListBuilder::PushBoxShadow(const 
                         aBlurRadius, aSpreadRadius, aBorderRadius,
                         aClipMode);
 }
 
 WrClipRegionToken
 DisplayListBuilder::PushClipRegion(const WrRect& aMain,
                                    const WrImageMask* aMask)
 {
+  WRDL_LOG("PushClipRegion r=%s m=%p\n", Stringify(aMain).c_str(), aMask);
   return wr_dp_push_clip_region(mWrState,
                                 aMain,
                                 nullptr, 0,
                                 aMask);
 }
 
 WrClipRegionToken
 DisplayListBuilder::PushClipRegion(const WrRect& aMain,
                                    const nsTArray<WrComplexClipRegion>& aComplex,
                                    const WrImageMask* aMask)
 {
+  WRDL_LOG("PushClipRegion r=%s cl=%d m=%p\n", Stringify(aMain).c_str(),
+      (int)aComplex.Length(), aMask);
   return wr_dp_push_clip_region(mWrState,
                                 aMain,
                                 aComplex.Elements(), aComplex.Length(),
                                 aMask);
 }
 
 } // namespace wr
 } // namespace mozilla
--- a/gfx/webrender_bindings/src/bindings.rs
+++ b/gfx/webrender_bindings/src/bindings.rs
@@ -2,17 +2,17 @@ use std::collections::HashSet;
 use std::ffi::CString;
 use std::{mem, slice};
 use std::path::PathBuf;
 use std::os::raw::{c_void, c_char};
 use std::collections::HashMap;
 use gleam::gl;
 
 use webrender_traits::*;
-use webrender::renderer::{Renderer, RendererOptions};
+use webrender::renderer::{ReadPixelsFormat, Renderer, RendererOptions};
 use webrender::renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
 use webrender::{ApiRecordingReceiver, BinaryRecorder};
 use app_units::Au;
 use euclid::{TypedPoint2D, TypedSize2D, TypedRect, TypedMatrix4D, SideOffsets2D};
 
 extern crate webrender_traits;
 
 // Enables binary recording that can be used with `wrench replay`
@@ -65,26 +65,16 @@ impl Into<ExternalImageId> for WrExterna
     }
 }
 impl Into<WrExternalImageId> for ExternalImageId {
     fn into(self) -> WrExternalImageId {
         WrExternalImageId(self.0)
     }
 }
 
-const GL_FORMAT_BGRA_GL: gl::GLuint = gl::BGRA;
-const GL_FORMAT_BGRA_GLES: gl::GLuint = gl::BGRA_EXT;
-
-fn get_gl_format_bgra(gl: &gl::Gl) -> gl::GLuint {
-    match gl.get_type() {
-        gl::GlType::Gl => GL_FORMAT_BGRA_GL,
-        gl::GlType::Gles => GL_FORMAT_BGRA_GLES,
-    }
-}
-
 fn make_slice<'a, T>(ptr: *const T, len: usize) -> &'a [T] {
     if ptr.is_null() {
         &[]
     } else {
         unsafe { slice::from_raw_parts(ptr, len) }
     }
 }
 
@@ -785,27 +775,22 @@ pub extern "C" fn wr_renderer_render(ren
 #[no_mangle]
 pub unsafe extern "C" fn wr_renderer_readback(renderer: &mut WrRenderer,
                                               width: u32,
                                               height: u32,
                                               dst_buffer: *mut u8,
                                               buffer_size: usize) {
     assert!(is_in_render_thread());
 
-    renderer.gl().flush();
-
     let mut slice = make_slice_mut(dst_buffer, buffer_size);
-    renderer.gl()
-            .read_pixels_into_buffer(0,
-                                     0,
-                                     width as gl::GLsizei,
-                                     height as gl::GLsizei,
-                                     get_gl_format_bgra(renderer.gl()),
-                                     gl::UNSIGNED_BYTE,
-                                     slice);
+    renderer.read_pixels_into(DeviceUintRect::new(
+                                DeviceUintPoint::new(0, 0),
+                                DeviceUintSize::new(width, height)),
+                              ReadPixelsFormat::Bgra8,
+                              &mut slice);
 }
 
 #[no_mangle]
 pub extern "C" fn wr_renderer_set_profiler_enabled(renderer: &mut WrRenderer,
                                                    enabled: bool) {
     renderer.set_profiler_enabled(enabled);
 }
 
--- a/ipc/ipdl/sync-messages.ini
+++ b/ipc/ipdl/sync-messages.ini
@@ -1041,16 +1041,20 @@ description =
 [PWebRenderBridge::UpdateImage]
 description =
 [PWebRenderBridge::DeleteImage]
 description =
 [PWebRenderBridge::DPSyncEnd]
 description =
 [PWebRenderBridge::DPGetSnapshot]
 description =
+[PWebRenderBridge::SetAsyncScrollOffset]
+description = test only
+[PWebRenderBridge::SetAsyncZoom]
+description = test only
 [PVRManager::GetSensorState]
 description =
 [PHal::GetCurrentBatteryInformation]
 description =
 [PHal::GetCurrentNetworkInformation]
 description =
 [PHal::GetScreenEnabled]
 description =
--- a/layout/painting/nsCSSRenderingGradients.cpp
+++ b/layout/painting/nsCSSRenderingGradients.cpp
@@ -1008,21 +1008,20 @@ nsCSSGradientRenderer::BuildWebRenderPar
                                                 LayoutDevicePoint& aLineStart,
                                                 LayoutDevicePoint& aLineEnd,
                                                 LayoutDeviceSize& aGradientRadius)
 {
   aMode = mGradient->mRepeating ? WrGradientExtendMode::Repeat : WrGradientExtendMode::Clamp;
 
   aStops.SetLength(mStops.Length());
   for(uint32_t i = 0; i < mStops.Length(); i++) {
-    Float alpha = mStops[i].mColor.a * aOpacity;
-    aStops[i].color.r = mStops[i].mColor.r * alpha;
-    aStops[i].color.g = mStops[i].mColor.g * alpha;
-    aStops[i].color.b = mStops[i].mColor.b * alpha;
-    aStops[i].color.a = alpha;
+    aStops[i].color.r = mStops[i].mColor.r;
+    aStops[i].color.g = mStops[i].mColor.g;
+    aStops[i].color.b = mStops[i].mColor.b;
+    aStops[i].color.a = mStops[i].mColor.a * aOpacity;
     aStops[i].offset = mStops[i].mPosition;
   }
 
   aLineStart = LayoutDevicePoint(mLineStart.x, mLineStart.y);
   aLineEnd = LayoutDevicePoint(mLineEnd.x, mLineEnd.y);
   aGradientRadius = LayoutDeviceSize(mRadiusX, mRadiusY);
 }
 
--- a/layout/painting/nsDisplayList.cpp
+++ b/layout/painting/nsDisplayList.cpp
@@ -5357,28 +5357,16 @@ nsDisplayBoxShadowInner::CanCreateWebRen
   }
 
   nsCSSShadowArray *shadows = aFrame->StyleEffects()->mBoxShadow;
   if (!shadows) {
     // Means we don't have to paint anything
     return true;
   }
 
-  for (uint32_t i = shadows->Length(); i > 0; --i) {
-    nsCSSShadowItem *shadowItem = shadows->ShadowAt(i - 1);
-    if (!shadowItem->mInset) {
-      continue;
-    }
-
-    if (shadowItem->mXOffset <= 0 || shadowItem->mYOffset <= 0) {
-      // Need to wait for WR to support clip out.
-      return false;
-    }
-  }
-
   return true;
 }
 
 LayerState
 nsDisplayBoxShadowInner::GetLayerState(nsDisplayListBuilder* aBuilder,
                                        LayerManager* aManager,
                                        const ContainerLayerParameters& aParameters)
 {
--- a/layout/reftests/box-shadow/reftest.list
+++ b/layout/reftests/box-shadow/reftest.list
@@ -6,32 +6,32 @@ random != boxshadow-blur-2.html boxshado
 == boxshadow-multiple.html boxshadow-multiple-ref.html
 == boxshadow-spread.html boxshadow-spread-ref.html
 == tableboxshadow-basic.html tableboxshadow-basic-ref.html
 == tableboxshadow-trshadow.html tableboxshadow-trshadow-ref.html
 == tableboxshadow-tdshadow.html tableboxshadow-tdshadow-ref.html
 == boxshadow-rounding.html boxshadow-rounding-ref.html
 # One uses old path, one uses WR box shadow.
 fails-if(Android) fuzzy-if(webrender,50,3310) == boxshadow-button.html boxshadow-button-ref.html
-fuzzy-if(OSX==1010,1,24) fuzzy-if(d2d,16,908) fuzzy-if(webrender,19,1680) == boxshadow-large-border-radius.html boxshadow-large-border-radius-ref.html # Bug 1209649
+fuzzy-if(OSX==1010,1,24) fuzzy-if(d2d,16,908) fuzzy-if(webrender,48,2040) == boxshadow-large-border-radius.html boxshadow-large-border-radius-ref.html # Bug 1209649
 
 fails-if(Android) == boxshadow-fileupload.html boxshadow-fileupload-ref.html
 fuzzy-if(skiaContent,13,28) == boxshadow-inner-basic.html boxshadow-inner-basic-ref.svg
 random-if(layersGPUAccelerated) == boxshadow-mixed.html boxshadow-mixed-ref.html
 random-if(d2d) fuzzy-if(skiaContent,1,100)  == boxshadow-rounded-spread.html boxshadow-rounded-spread-ref.html
 fuzzy-if(skiaContent,1,50) HTTP(..) == boxshadow-dynamic.xul boxshadow-dynamic-ref.xul
 random-if(d2d) == boxshadow-onecorner.html boxshadow-onecorner-ref.html
 random-if(d2d) == boxshadow-twocorners.html boxshadow-twocorners-ref.html
 random-if(d2d) == boxshadow-threecorners.html boxshadow-threecorners-ref.html
 fuzzy(2,440) fuzzy-if(webrender,25,1300) == boxshadow-skiprect.html boxshadow-skiprect-ref.html
 == boxshadow-opacity.html boxshadow-opacity-ref.html
 == boxshadow-color-rounding.html boxshadow-color-rounding-ref.html
 == boxshadow-color-rounding-middle.html boxshadow-color-rounding-middle-ref.html
-fuzzy(3,500) fuzzy-if(d2d,2,1080) == boxshadow-border-radius-int.html boxshadow-border-radius-int-ref.html
-== boxshadow-inset-neg-spread.html about:blank
+fuzzy(3,500) fuzzy-if(d2d,2,1080) fuzzy-if(webrender,12,1500) == boxshadow-border-radius-int.html boxshadow-border-radius-int-ref.html
+fuzzy-if(webrender,1,4) == boxshadow-inset-neg-spread.html about:blank
 == boxshadow-inset-neg-spread2.html boxshadow-inset-neg-spread2-ref.html
 fuzzy(26,3610) fuzzy-if(d2d,26,5910) fuzzy-if(webrender,43,200) == boxshadow-rotated.html boxshadow-rotated-ref.html # Bug 1211264
 == boxshadow-inset-large-border-radius.html boxshadow-inset-large-border-radius-ref.html
 
 # fuzzy due to blur going inside, but as long as it's essentially black instead of a light gray its ok.
 fuzzy(13,9445) fuzzy-if(d2d,13,10926) fuzzy-if(webrender,14,14307) == boxshadow-inset-large-offset.html boxshadow-inset-large-offset-ref.html
 
 == overflow-not-scrollable-1.html overflow-not-scrollable-1-ref.html
--- a/layout/reftests/css-break/reftest.list
+++ b/layout/reftests/css-break/reftest.list
@@ -1,12 +1,12 @@
 default-preferences pref(layout.css.box-decoration-break.enabled,true)
 
 == box-decoration-break-1.html box-decoration-break-1-ref.html
-fuzzy(1,20) fuzzy-if(skiaContent,1,700) == box-decoration-break-with-inset-box-shadow-1.html box-decoration-break-with-inset-box-shadow-1-ref.html
+fuzzy(1,20) fuzzy-if(skiaContent,1,700) fuzzy-if(webrender,5,450) == box-decoration-break-with-inset-box-shadow-1.html box-decoration-break-with-inset-box-shadow-1-ref.html
 fuzzy(16,460) fuzzy-if(Android,10,3673) fuzzy-if(skiaContent,57,374) == box-decoration-break-with-outset-box-shadow-1.html box-decoration-break-with-outset-box-shadow-1-ref.html
 random-if(!gtkWidget) HTTP(..) == box-decoration-break-border-image.html box-decoration-break-border-image-ref.html
 == box-decoration-break-block-border-padding.html box-decoration-break-block-border-padding-ref.html
 == box-decoration-break-block-margin.html box-decoration-break-block-margin-ref.html
 fuzzy-if(!Android,1,62) fuzzy-if(Android,8,6627) == box-decoration-break-first-letter.html box-decoration-break-first-letter-ref.html #Bug 1313773
 == box-decoration-break-with-bidi.html box-decoration-break-with-bidi-ref.html
 == box-decoration-break-bug-1235152.html box-decoration-break-bug-1235152-ref.html
 == box-decoration-break-bug-1249913.html box-decoration-break-bug-1249913-ref.html