merge mozilla-inbound to mozilla-central a=merge FIREFOX_AURORA_38_BASE
authorCarsten "Tomcat" Book <cbook@mozilla.com>
Mon, 23 Feb 2015 13:30:28 +0100
changeset 259051 98086da94ccdc88f6de86774ce3d1fa258dc7c44
parent 259034 627e0703a68ae745d60fb5fa4e18ec4bd5186dfb (current diff)
parent 259050 f7db65ae3c38a4ca3e2050745dd1b4e28b714e76 (diff)
child 259052 ace95bf5bd7bc2e14daf9d46679f14d38d313467
child 260586 f4d73ea8f2264f8ffd03415960603313352ac384
child 260593 0ae0c8b511b552fd06db3e4ec144a0824743ce76
child 260631 a69b59f490dbe58a975a2fd796d6c0a63e9516a2
push id721
push userjlund@mozilla.com
push dateTue, 21 Apr 2015 23:03:33 +0000
treeherdermozilla-release@d27c9211ebb3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmerge
milestone38.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
merge mozilla-inbound to mozilla-central a=merge
media/libvpx/vp8_rtcd_x86-win32-vs8.h
media/libvpx/vp8_rtcd_x86_64-win64-vs8.h
media/libvpx/vp9_rtcd_x86-win32-vs8.h
media/libvpx/vp9_rtcd_x86_64-win64-vs8.h
media/libvpx/vpx_config_x86-win32-vs8.asm
media/libvpx/vpx_config_x86-win32-vs8.h
media/libvpx/vpx_config_x86_64-win64-vs8.asm
media/libvpx/vpx_config_x86_64-win64-vs8.h
media/libvpx/vpx_scale_rtcd_x86-win32-vs8.h
media/libvpx/vpx_scale_rtcd_x86_64-win64-vs8.h
--- a/gfx/layers/d3d11/TextureD3D11.cpp
+++ b/gfx/layers/d3d11/TextureD3D11.cpp
@@ -820,17 +820,17 @@ SyncObjectD3D11::FinalizeFrame()
       gfxCriticalError() << "Failed to get KeyedMutex: " << hexa(hr);
       MOZ_CRASH();
     }
   }
 
   if (mD3D10SyncedTextures.size()) {
     RefPtr<IDXGIKeyedMutex> mutex;
     hr = mD3D10Texture->QueryInterface((IDXGIKeyedMutex**)byRef(mutex));
-    hr = mutex->AcquireSync(0, 10000);
+    hr = mutex->AcquireSync(0, 20000);
 
     if (hr == WAIT_TIMEOUT) {
       MOZ_CRASH();
     }
 
     D3D10_BOX box;
     box.front = box.top = box.left = 0;
     box.back = box.bottom = box.right = 1;
--- a/gfx/thebes/gfxPlatform.h
+++ b/gfx/thebes/gfxPlatform.h
@@ -154,16 +154,26 @@ GetBackendName(mozilla::gfx::BackendType
       case mozilla::gfx::BackendType::DIRECT2D1_1:
         return "direct2d 1.1";
       case mozilla::gfx::BackendType::NONE:
         return "none";
   }
   MOZ_CRASH("Incomplete switch");
 }
 
+enum class DeviceResetReason
+{
+  OK = 0,
+  HUNG,
+  REMOVED,
+  RESET,
+  DRIVER_ERROR,
+  INVALID_CALL
+};
+
 class gfxPlatform {
     friend class SRGBOverrideObserver;
 
 public:
     typedef mozilla::gfx::Color Color;
     typedef mozilla::gfx::DataSourceSurface DataSourceSurface;
     typedef mozilla::gfx::DrawTarget DrawTarget;
     typedef mozilla::gfx::IntSize IntSize;
@@ -430,17 +440,17 @@ public:
      * Whether to use the SIL Graphite rendering engine
      * (for fonts that include Graphite tables)
      */
     bool UseGraphiteShaping();
 
     // check whether format is supported on a platform or not (if unclear, returns true)
     virtual bool IsFontFormatSupported(nsIURI *aFontURI, uint32_t aFormatFlags) { return false; }
 
-    virtual bool DidRenderingDeviceReset() { return false; }
+    virtual bool DidRenderingDeviceReset(DeviceResetReason* aResetReason = nullptr) { return false; }
 
     void GetPrefFonts(nsIAtom *aLanguage, nsString& array, bool aAppendUnicode = true);
 
     // in some situations, need to make decisions about ambiguous characters, may need to look at multiple pref langs
     void GetLangPrefs(eFontPrefLang aPrefLangs[], uint32_t &aLen, eFontPrefLang aCharLang, eFontPrefLang aPageLang);
     
     /**
      * Iterate over pref fonts given a list of lang groups.  For a single lang
--- a/gfx/thebes/gfxTextRun.h
+++ b/gfx/thebes/gfxTextRun.h
@@ -98,48 +98,51 @@ public:
     }
 
     virtual ~gfxTextRun();
 
     typedef gfxFont::RunMetrics Metrics;
 
     // Public textrun API for general use
 
-    bool IsClusterStart(uint32_t aPos) {
+    bool IsClusterStart(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].IsClusterStart();
     }
-    bool IsLigatureGroupStart(uint32_t aPos) {
+    bool IsLigatureGroupStart(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].IsLigatureGroupStart();
     }
-    bool CanBreakLineBefore(uint32_t aPos) {
-        NS_ASSERTION(aPos < GetLength(), "aPos out of range");
-        return mCharacterGlyphs[aPos].CanBreakBefore() ==
-            CompressedGlyph::FLAG_BREAK_TYPE_NORMAL;
+    bool CanBreakLineBefore(uint32_t aPos) const {
+        return CanBreakBefore(aPos) == CompressedGlyph::FLAG_BREAK_TYPE_NORMAL;
     }
-    bool CanHyphenateBefore(uint32_t aPos) {
-        NS_ASSERTION(aPos < GetLength(), "aPos out of range");
-        return mCharacterGlyphs[aPos].CanBreakBefore() ==
-            CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
+    bool CanHyphenateBefore(uint32_t aPos) const {
+        return CanBreakBefore(aPos) == CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
     }
 
-    bool CharIsSpace(uint32_t aPos) {
+    // Returns a gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_* value
+    // as defined in gfxFont.h (may be NONE, NORMAL or HYPHEN).
+    uint8_t CanBreakBefore(uint32_t aPos) const {
+        NS_ASSERTION(aPos < GetLength(), "aPos out of range");
+        return mCharacterGlyphs[aPos].CanBreakBefore();
+    }
+
+    bool CharIsSpace(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsSpace();
     }
-    bool CharIsTab(uint32_t aPos) {
+    bool CharIsTab(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsTab();
     }
-    bool CharIsNewline(uint32_t aPos) {
+    bool CharIsNewline(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsNewline();
     }
-    bool CharIsLowSurrogate(uint32_t aPos) {
+    bool CharIsLowSurrogate(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsLowSurrogate();
     }
 
     // All uint32_t aStart, uint32_t aLength ranges below are restricted to
     // grapheme cluster boundaries! All offsets are in terms of the string
     // passed into MakeTextRun.
     
--- a/gfx/thebes/gfxWindowsPlatform.cpp
+++ b/gfx/thebes/gfxWindowsPlatform.cpp
@@ -13,16 +13,17 @@
 #include "gfxWindowsSurface.h"
 
 #include "nsUnicharUtils.h"
 
 #include "mozilla/Preferences.h"
 #include "mozilla/WindowsVersion.h"
 #include "nsServiceManagerUtils.h"
 #include "nsTArray.h"
+#include "mozilla/Telemetry.h"
 
 #include "nsIWindowsRegKey.h"
 #include "nsIFile.h"
 #include "plbase64.h"
 #include "nsIXULRuntime.h"
 #include "imgLoader.h"
 
 #include "nsIGfxInfo.h"
@@ -469,17 +470,19 @@ gfxWindowsPlatform::GetDPIScale()
 
 void
 gfxWindowsPlatform::UpdateRenderMode()
 {
 /* Pick the default render mode for
  * desktop.
  */
     bool didReset = false;
-    if (DidRenderingDeviceReset()) {
+    DeviceResetReason resetReason = DeviceResetReason::OK;
+    if (DidRenderingDeviceReset(&resetReason)) {
+      Telemetry::Accumulate(Telemetry::DEVICE_RESET_REASON, uint32_t(resetReason));
       mD3D11DeviceInitialized = false;
       mD3D11Device = nullptr;
       mD3D11ContentDevice = nullptr;
       mAdapter = nullptr;
 
       imgLoader::Singleton()->ClearCache(true);
       imgLoader::Singleton()->ClearCache(false);
       Factory::SetDirect3D11Device(nullptr);
@@ -1135,20 +1138,45 @@ gfxWindowsPlatform::IsFontFormatSupporte
         return false;
     }
 
     // no format hint set, need to look at data
     return true;
 }
 
 bool
-gfxWindowsPlatform::DidRenderingDeviceReset()
+gfxWindowsPlatform::DidRenderingDeviceReset(DeviceResetReason* aResetReason)
 {
+  if (aResetReason) {
+    *aResetReason = DeviceResetReason::OK;
+  }
+
   if (mD3D11Device) {
-    if (mD3D11Device->GetDeviceRemovedReason() != S_OK) {
+    HRESULT hr = mD3D11Device->GetDeviceRemovedReason();
+    if (hr != S_OK) {
+      if (aResetReason) {
+        switch (hr) {
+        case DXGI_ERROR_DEVICE_HUNG:
+          *aResetReason = DeviceResetReason::HUNG;
+          break;
+        case DXGI_ERROR_DEVICE_REMOVED:
+          *aResetReason = DeviceResetReason::REMOVED;
+          break;
+        case DXGI_ERROR_DEVICE_RESET:
+          *aResetReason = DeviceResetReason::RESET;
+          break;
+        case DXGI_ERROR_DRIVER_INTERNAL_ERROR:
+          *aResetReason = DeviceResetReason::DRIVER_ERROR;
+          break;
+        case DXGI_ERROR_INVALID_CALL:
+          *aResetReason = DeviceResetReason::INVALID_CALL;
+        default:
+          MOZ_ASSERT(false);
+        }
+      }
       return true;
     }
   }
   if (mD3D11ContentDevice) {
     if (mD3D11ContentDevice->GetDeviceRemovedReason() != S_OK) {
       return true;
     }
   }
--- a/gfx/thebes/gfxWindowsPlatform.h
+++ b/gfx/thebes/gfxWindowsPlatform.h
@@ -205,17 +205,17 @@ public:
                                            const uint8_t* aFontData,
                                            uint32_t aLength);
 
     /**
      * Check whether format is supported on a platform or not (if unclear, returns true)
      */
     virtual bool IsFontFormatSupported(nsIURI *aFontURI, uint32_t aFormatFlags);
 
-    virtual bool DidRenderingDeviceReset();
+    virtual bool DidRenderingDeviceReset(DeviceResetReason* aResetReason = nullptr);
 
     // ClearType is not always enabled even when available (e.g. Windows XP)
     // if either of these prefs are enabled and apply, use ClearType rendering
     bool UseClearTypeForDownloadableFonts();
     bool UseClearTypeAlways();
 
     static void GetDLLVersion(char16ptr_t aDLLPath, nsAString& aVersion);
 
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -7801,27 +7801,28 @@ CodeGenerator::visitGetNameCache(LGetNam
     TypedOrValueRegister output(GetValueOutput(ins));
     bool isTypeOf = ins->mir()->accessKind() != MGetNameCache::NAME;
 
     NameIC cache(liveRegs, isTypeOf, scopeChain, ins->mir()->name(), output);
     cache.setProfilerLeavePC(ins->mir()->profilerLeavePc());
     addCache(ins, allocateCache(cache));
 }
 
-typedef bool (*NameICFn)(JSContext *, size_t, HandleObject, MutableHandleValue);
+typedef bool (*NameICFn)(JSContext *, HandleScript, size_t, HandleObject, MutableHandleValue);
 const VMFunction NameIC::UpdateInfo = FunctionInfo<NameICFn>(NameIC::update);
 
 void
 CodeGenerator::visitNameIC(OutOfLineUpdateCache *ool, DataPtr<NameIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->scopeChainReg());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(NameIC::UpdateInfo, lir);
     StoreValueTo(ic->outputReg()).generate(this);
     restoreLiveIgnore(lir, StoreValueTo(ic->outputReg()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 void
@@ -7878,19 +7879,19 @@ CodeGenerator::visitGetPropertyCacheT(LG
     PropertyName *name = ins->mir()->name();
     bool monitoredResult = ins->mir()->monitoredResult();
     TypedOrValueRegister output(ins->mir()->type(), ToAnyRegister(ins->getDef(0)));
 
     addGetPropertyCache(ins, liveRegs, objReg, name, output, monitoredResult,
                         ins->mir()->profilerLeavePc());
 }
 
-typedef bool (*GetPropertyICFn)(JSContext *, size_t, HandleObject, MutableHandleValue);
-const VMFunction GetPropertyIC::UpdateInfo =
-    FunctionInfo<GetPropertyICFn>(GetPropertyIC::update);
+typedef bool (*GetPropertyICFn)(JSContext *, HandleScript, size_t, HandleObject,
+                                MutableHandleValue);
+const VMFunction GetPropertyIC::UpdateInfo = FunctionInfo<GetPropertyICFn>(GetPropertyIC::update);
 
 void
 CodeGenerator::visitGetPropertyIC(OutOfLineUpdateCache *ool, DataPtr<GetPropertyIC> &ic)
 {
     LInstruction *lir = ool->lir();
 
     if (ic->idempotent()) {
         size_t numLocs;
@@ -7898,16 +7899,17 @@ CodeGenerator::visitGetPropertyIC(OutOfL
         size_t locationBase = addCacheLocations(cacheLocs, &numLocs);
         ic->setLocationInfo(locationBase, numLocs);
     }
 
     saveLive(lir);
 
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(GetPropertyIC::UpdateInfo, lir);
     StoreValueTo(ic->output()).generate(this);
     restoreLiveIgnore(lir, StoreValueTo(ic->output()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 void
@@ -7940,29 +7942,30 @@ CodeGenerator::visitGetElementCacheT(LGe
     ConstantOrRegister index = TypedOrValueRegister(MIRType_Int32, ToAnyRegister(ins->index()));
     TypedOrValueRegister output(ins->mir()->type(), ToAnyRegister(ins->output()));
     const MGetElementCache *mir = ins->mir();
 
     addGetElementCache(ins, obj, index, output, mir->monitoredResult(),
                        mir->allowDoubleResult(), mir->profilerLeavePc());
 }
 
-typedef bool (*GetElementICFn)(JSContext *, size_t, HandleObject, HandleValue, MutableHandleValue);
-const VMFunction GetElementIC::UpdateInfo =
-    FunctionInfo<GetElementICFn>(GetElementIC::update);
+typedef bool (*GetElementICFn)(JSContext *, HandleScript, size_t, HandleObject, HandleValue,
+                               MutableHandleValue);
+const VMFunction GetElementIC::UpdateInfo = FunctionInfo<GetElementICFn>(GetElementIC::update);
 
 void
 CodeGenerator::visitGetElementIC(OutOfLineUpdateCache *ool, DataPtr<GetElementIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->index());
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(GetElementIC::UpdateInfo, lir);
     StoreValueTo(ic->output()).generate(this);
     restoreLiveIgnore(lir, StoreValueTo(ic->output()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 void
@@ -7997,30 +8000,31 @@ CodeGenerator::visitSetElementCacheT(LSe
     else
         value = TypedOrValueRegister(ins->mir()->value()->type(), ToAnyRegister(tmp));
 
     addSetElementCache(ins, obj, unboxIndex, temp, tempDouble, tempFloat32, index, value,
                        ins->mir()->strict(), ins->mir()->guardHoles(),
                        ins->mir()->profilerLeavePc());
 }
 
-typedef bool (*SetElementICFn)(JSContext *, size_t, HandleObject, HandleValue, HandleValue);
-const VMFunction SetElementIC::UpdateInfo =
-    FunctionInfo<SetElementICFn>(SetElementIC::update);
+typedef bool (*SetElementICFn)(JSContext *, HandleScript, size_t, HandleObject, HandleValue,
+                               HandleValue);
+const VMFunction SetElementIC::UpdateInfo = FunctionInfo<SetElementICFn>(SetElementIC::update);
 
 void
 CodeGenerator::visitSetElementIC(OutOfLineUpdateCache *ool, DataPtr<SetElementIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->value());
     pushArg(ic->index());
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(SetElementIC::UpdateInfo, lir);
     restoreLive(lir);
 
     masm.jump(ool->rejoin());
 }
 
 void
 CodeGenerator::visitBindNameCache(LBindNameCache *ins)
@@ -8028,28 +8032,28 @@ CodeGenerator::visitBindNameCache(LBindN
     Register scopeChain = ToRegister(ins->scopeChain());
     Register output = ToRegister(ins->output());
     BindNameIC cache(scopeChain, ins->mir()->name(), output);
     cache.setProfilerLeavePC(ins->mir()->profilerLeavePc());
 
     addCache(ins, allocateCache(cache));
 }
 
-typedef JSObject *(*BindNameICFn)(JSContext *, size_t, HandleObject);
-const VMFunction BindNameIC::UpdateInfo =
-    FunctionInfo<BindNameICFn>(BindNameIC::update);
+typedef JSObject *(*BindNameICFn)(JSContext *, HandleScript, size_t, HandleObject);
+const VMFunction BindNameIC::UpdateInfo = FunctionInfo<BindNameICFn>(BindNameIC::update);
 
 void
 CodeGenerator::visitBindNameIC(OutOfLineUpdateCache *ool, DataPtr<BindNameIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->scopeChainReg());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(BindNameIC::UpdateInfo, lir);
     StoreRegisterTo(ic->outputReg()).generate(this);
     restoreLiveIgnore(lir, StoreRegisterTo(ic->outputReg()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 typedef bool (*SetPropertyFn)(JSContext *, HandleObject,
@@ -8133,29 +8137,29 @@ CodeGenerator::visitSetPropertyCacheT(LS
     else
         value = TypedOrValueRegister(ins->valueType(), ToAnyRegister(ins->getOperand(1)));
 
     addSetPropertyCache(ins, liveRegs, objReg, ins->mir()->name(), value,
                         ins->mir()->strict(), ins->mir()->needsTypeBarrier(),
                         ins->mir()->profilerLeavePc());
 }
 
-typedef bool (*SetPropertyICFn)(JSContext *, size_t, HandleObject, HandleValue);
-const VMFunction SetPropertyIC::UpdateInfo =
-    FunctionInfo<SetPropertyICFn>(SetPropertyIC::update);
+typedef bool (*SetPropertyICFn)(JSContext *, HandleScript, size_t, HandleObject, HandleValue);
+const VMFunction SetPropertyIC::UpdateInfo = FunctionInfo<SetPropertyICFn>(SetPropertyIC::update);
 
 void
 CodeGenerator::visitSetPropertyIC(OutOfLineUpdateCache *ool, DataPtr<SetPropertyIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->value());
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(SetPropertyIC::UpdateInfo, lir);
     restoreLive(lir);
 
     masm.jump(ool->rejoin());
 }
 
 typedef bool (*ThrowFn)(JSContext *, HandleValue);
 static const VMFunction ThrowInfoCodeGen = FunctionInfo<ThrowFn>(js::Throw);
--- a/js/src/jit/IonCaches.cpp
+++ b/js/src/jit/IonCaches.cpp
@@ -466,16 +466,30 @@ IonCache::updateBaseAddress(JitCode *cod
     fallbackLabel_.repoint(code, &masm);
 }
 
 void
 IonCache::initializeAddCacheState(LInstruction *ins, AddCacheState *addState)
 {
 }
 
+static void *
+GetReturnAddressToIonCode(JSContext *cx)
+{
+    JitFrameIterator iter(cx);
+    MOZ_ASSERT(iter.type() == JitFrame_Exit);
+
+    void *returnAddr = iter.returnAddress();
+#ifdef DEBUG
+    ++iter;
+    MOZ_ASSERT(iter.isIonJS());
+#endif
+    return returnAddr;
+}
+
 static void
 GeneratePrototypeGuards(JSContext *cx, IonScript *ion, MacroAssembler &masm, JSObject *obj,
                         JSObject *holder, Register objectReg, Register scratchReg,
                         Label *failures)
 {
     /*
      * The guards here protect against the effects of JSObject::swap(). If the prototype chain
      * is directly altered, then TI will toss the jitcode, so we don't have to worry about
@@ -1238,17 +1252,17 @@ IsCacheableArrayLength(JSContext *cx, Ha
         return false;
     }
 
     return true;
 }
 
 template <class GetPropCache>
 static GetPropertyIC::NativeGetPropCacheability
-CanAttachNativeGetProp(typename GetPropCache::Context cx, const GetPropCache &cache,
+CanAttachNativeGetProp(JSContext *cx, const GetPropCache &cache,
                        HandleObject obj, HandlePropertyName name,
                        MutableHandleNativeObject holder, MutableHandleShape shape,
                        bool skipArrayLen = false)
 {
     if (!obj)
         return GetPropertyIC::CanAttachNone;
 
     // The lookup needs to be universally pure, otherwise we risk calling hooks out
@@ -1306,17 +1320,17 @@ CanAttachNativeGetProp(typename GetPropC
         // effectful. This is handled by allowGetters()
         return GetPropertyIC::CanAttachCallGetter;
     }
 
     return GetPropertyIC::CanAttachNone;
 }
 
 bool
-GetPropertyIC::allowArrayLength(Context cx, HandleObject obj) const
+GetPropertyIC::allowArrayLength(JSContext *cx, HandleObject obj) const
 {
     if (!idempotent())
         return true;
 
     uint32_t locationIndex, numLocations;
     getLocationInfo(&locationIndex, &numLocations);
 
     IonScript *ion = GetTopJitJSScript(cx)->ionScript();
@@ -1809,27 +1823,28 @@ GetPropertyIC::tryAttachArgumentsLength(
 
     MOZ_ASSERT(!hasNormalArgumentsLengthStub_);
     hasNormalArgumentsLengthStub_ = true;
     return linkAndAttachStub(cx, masm, attacher, ion, "ArgsObj length (normal)");
 }
 
 bool
 GetPropertyIC::tryAttachStub(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                             HandleObject obj, HandlePropertyName name,
-                             void *returnAddr, bool *emitted)
+                             HandleObject obj, HandlePropertyName name, bool *emitted)
 {
     MOZ_ASSERT(!*emitted);
 
     if (!canAttachStub())
         return true;
 
     if (!*emitted && !tryAttachArgumentsLength(cx, outerScript, ion, obj, name, emitted))
         return false;
 
+    void *returnAddr = GetReturnAddressToIonCode(cx);
+
     if (!*emitted && !tryAttachProxy(cx, outerScript, ion, obj, name, returnAddr, emitted))
         return false;
 
     if (!*emitted && !tryAttachNative(cx, outerScript, ion, obj, name, returnAddr, emitted))
         return false;
 
     if (!*emitted && !tryAttachUnboxed(cx, outerScript, ion, obj, name, returnAddr, emitted))
         return false;
@@ -1839,38 +1854,36 @@ GetPropertyIC::tryAttachStub(JSContext *
 
     if (!*emitted)
         JitSpew(JitSpew_IonIC, "Failed to attach GETPROP cache");
 
     return true;
 }
 
 /* static */ bool
-GetPropertyIC::update(JSContext *cx, size_t cacheIndex,
+GetPropertyIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
                       HandleObject obj, MutableHandleValue vp)
 {
-    void *returnAddr;
-    RootedScript outerScript(cx, GetTopJitJSScript(cx, &returnAddr));
     IonScript *ion = outerScript->ionScript();
 
     GetPropertyIC &cache = ion->getCache(cacheIndex).toGetProperty();
     RootedPropertyName name(cx, cache.name());
 
     // Override the return value if we are invalidated (bug 728188).
     AutoDetectInvalidation adi(cx, vp, ion);
 
     // If the cache is idempotent, we will redo the op in the interpreter.
     if (cache.idempotent())
         adi.disable();
 
     // For now, just stop generating new stubs once we hit the stub count
     // limit. Once we can make calls from within generated stubs, a new call
     // stub will be generated instead and the previous stubs unlinked.
     bool emitted = false;
-    if (!cache.tryAttachStub(cx, outerScript, ion, obj, name, returnAddr, &emitted))
+    if (!cache.tryAttachStub(cx, outerScript, ion, obj, name, &emitted))
         return false;
 
     if (cache.idempotent() && !emitted) {
         // Invalidate the cache if the property was not found, or was found on
         // a non-native object. This ensures:
         // 1) The property read has no observable side-effects.
         // 2) There's no need to dynamically monitor the return type. This would
         //    be complicated since (due to GVN) there can be multiple pc's
@@ -2928,88 +2941,88 @@ CanAttachSetUnboxed(JSContext *cx, Handl
         *unboxedType = property->type;
         return true;
     }
 
     return false;
 }
 
 bool
-SetPropertyIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj,
+SetPropertyIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
                       HandleValue value)
 {
-    void *returnAddr;
-    RootedScript script(cx, GetTopJitJSScript(cx, &returnAddr));
-    IonScript *ion = script->ionScript();
+    IonScript *ion = outerScript->ionScript();
     SetPropertyIC &cache = ion->getCache(cacheIndex).toSetProperty();
     RootedPropertyName name(cx, cache.name());
     RootedId id(cx, AtomToId(name));
 
     RootedObjectGroup oldGroup(cx, obj->getGroup(cx));
     if (!oldGroup)
         return false;
 
     // Stop generating new stubs once we hit the stub count limit, see
     // GetPropertyCache.
     NativeSetPropCacheability canCache = CanAttachNone;
     bool addedSetterStub = false;
     if (cache.canAttachStub() && !obj->watched()) {
         if (!addedSetterStub && obj->is<ProxyObject>()) {
+            void *returnAddr = GetReturnAddressToIonCode(cx);
             if (IsCacheableDOMProxy(obj)) {
                 DOMProxyShadowsResult shadows = GetDOMProxyShadowsCheck()(cx, obj, id);
                 if (shadows == ShadowCheckFailed)
                     return false;
                 if (shadows == Shadows) {
-                    if (!cache.attachDOMProxyShadowed(cx, script, ion, obj, returnAddr))
+                    if (!cache.attachDOMProxyShadowed(cx, outerScript, ion, obj, returnAddr))
                         return false;
                     addedSetterStub = true;
                 } else {
                     MOZ_ASSERT(shadows == DoesntShadow || shadows == DoesntShadowUnique);
                     if (shadows == DoesntShadowUnique)
                         cache.reset();
-                    if (!cache.attachDOMProxyUnshadowed(cx, script, ion, obj, returnAddr))
+                    if (!cache.attachDOMProxyUnshadowed(cx, outerScript, ion, obj, returnAddr))
                         return false;
                     addedSetterStub = true;
                 }
             }
 
             if (!addedSetterStub && !cache.hasGenericProxyStub()) {
-                if (!cache.attachGenericProxy(cx, script, ion, returnAddr))
+                if (!cache.attachGenericProxy(cx, outerScript, ion, returnAddr))
                     return false;
                 addedSetterStub = true;
             }
         }
 
         RootedShape shape(cx);
         RootedObject holder(cx);
         bool checkTypeset;
         canCache = CanAttachNativeSetProp(cx, obj, id, cache.value(), cache.needsTypeBarrier(),
                                           &holder, &shape, &checkTypeset);
 
         if (!addedSetterStub && canCache == CanAttachSetSlot) {
             RootedNativeObject nobj(cx, &obj->as<NativeObject>());
-            if (!cache.attachSetSlot(cx, script, ion, nobj, shape, checkTypeset))
+            if (!cache.attachSetSlot(cx, outerScript, ion, nobj, shape, checkTypeset))
                 return false;
             addedSetterStub = true;
         }
 
         if (!addedSetterStub && canCache == CanAttachCallSetter) {
-            if (!cache.attachCallSetter(cx, script, ion, obj, holder, shape, returnAddr))
+            void *returnAddr = GetReturnAddressToIonCode(cx);
+            if (!cache.attachCallSetter(cx, outerScript, ion, obj, holder, shape, returnAddr))
                 return false;
             addedSetterStub = true;
         }
 
         checkTypeset = false;
         uint32_t unboxedOffset;
         JSValueType unboxedType;
         if (!addedSetterStub && CanAttachSetUnboxed(cx, obj, id, cache.value(),
                                                     cache.needsTypeBarrier(),
                                                     &checkTypeset, &unboxedOffset, &unboxedType))
         {
-            if (!cache.attachSetUnboxed(cx, script, ion, obj, id, unboxedOffset, unboxedType,
+            if (!cache.attachSetUnboxed(cx, outerScript, ion, obj, id, unboxedOffset, unboxedType,
                                         checkTypeset))
             {
                 return false;
             }
             addedSetterStub = true;
         }
     }
 
@@ -3023,17 +3036,17 @@ SetPropertyIC::update(JSContext *cx, siz
     // The property did not exist before, now we can try to inline the property add.
     bool checkTypeset;
     if (!addedSetterStub && canCache == MaybeCanAttachAddSlot &&
         IsPropertyAddInlineable(&obj->as<NativeObject>(), id,
                                 cache.value(), oldSlots, oldShape, cache.needsTypeBarrier(),
                                 &checkTypeset))
     {
         RootedNativeObject nobj(cx, &obj->as<NativeObject>());
-        if (!cache.attachAddSlot(cx, script, ion, nobj, oldShape, oldGroup, checkTypeset))
+        if (!cache.attachAddSlot(cx, outerScript, ion, nobj, oldShape, oldGroup, checkTypeset))
             return false;
         addedSetterStub = true;
     }
 
     if (!addedSetterStub)
         JitSpew(JitSpew_IonIC, "Failed to attach SETPROP cache");
 
     return true;
@@ -3069,18 +3082,17 @@ EqualStringsHelper(JSString *str1, JSStr
     if (!str2Linear)
         return false;
 
     return EqualChars(&str1->asLinear(), str2Linear);
 }
 
 bool
 GetElementIC::attachGetProp(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                            HandleObject obj, const Value &idval, HandlePropertyName name,
-                            void *returnAddr)
+                            HandleObject obj, const Value &idval, HandlePropertyName name)
 {
     MOZ_ASSERT(index().reg().hasValue());
 
     RootedNativeObject holder(cx);
     RootedShape shape(cx);
 
     GetPropertyIC::NativeGetPropCacheability canCache =
         CanAttachNativeGetProp(cx, *this, obj, name, &holder, &shape,
@@ -3150,17 +3162,19 @@ GetElementIC::attachGetProp(JSContext *c
     masm.bind(&equal);
 
     RepatchStubAppender attacher(*this);
     if (canCache == GetPropertyIC::CanAttachReadSlot) {
         GenerateReadSlot(cx, ion, masm, attacher, obj, holder, shape, object(), output(),
                          &failures);
     } else {
         MOZ_ASSERT(canCache == GetPropertyIC::CanAttachCallGetter);
+
         // Set the frame for bailout safety of the OOL call.
+        void *returnAddr = GetReturnAddressToIonCode(cx);
         if (!GenerateCallGetter(cx, ion, masm, attacher, obj, name, holder, shape, liveRegs_,
                                 object(), output(), returnAddr, &failures))
         {
             return false;
         }
     }
 
     return linkAndAttachStub(cx, masm, attacher, ion, "property");
@@ -3501,21 +3515,19 @@ GetElementIC::attachArgumentsElement(JSC
     }
 
     MOZ_ASSERT(!hasNormalArgumentsStub_);
     hasNormalArgumentsStub_ = true;
     return linkAndAttachStub(cx, masm, attacher, ion, "ArgsObj element (normal)");
 }
 
 bool
-GetElementIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj,
+GetElementIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
                      HandleValue idval, MutableHandleValue res)
 {
-    void *returnAddr;
-    RootedScript outerScript(cx, GetTopJitJSScript(cx, &returnAddr));
     IonScript *ion = outerScript->ionScript();
     GetElementIC &cache = ion->getCache(cacheIndex).toGetElement();
     RootedScript script(cx);
     jsbytecode *pc;
     cache.getScriptedLocation(&script, &pc);
 
     // Override the return value when the script is invalidated (bug 728188).
     AutoDetectInvalidation adi(cx, res, ion);
@@ -3542,17 +3554,17 @@ GetElementIC::update(JSContext *cx, size
             (cache.output().hasValue() || !cache.output().typedReg().isFloat()))
         {
             if (!cache.attachArgumentsElement(cx, outerScript, ion, obj))
                 return false;
             attachedStub = true;
         }
         if (!attachedStub && cache.monitoredResult() && canAttachGetProp(obj, idval, id)) {
             RootedPropertyName name(cx, JSID_TO_ATOM(id)->asPropertyName());
-            if (!cache.attachGetProp(cx, outerScript, ion, obj, idval, name, returnAddr))
+            if (!cache.attachGetProp(cx, outerScript, ion, obj, idval, name))
                 return false;
             attachedStub = true;
         }
         if (!attachedStub && !cache.hasDenseStub() && canAttachDenseElement(obj, idval)) {
             if (!cache.attachDenseElement(cx, outerScript, ion, obj, idval))
                 return false;
             attachedStub = true;
         }
@@ -3892,20 +3904,19 @@ SetElementIC::attachTypedArrayElement(JS
     {
         return false;
     }
 
     return linkAndAttachStub(cx, masm, attacher, ion, "typed array");
 }
 
 bool
-SetElementIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj,
+SetElementIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
                      HandleValue idval, HandleValue value)
 {
-    RootedScript outerScript(cx, GetTopJitJSScript(cx));
     IonScript *ion = outerScript->ionScript();
     SetElementIC &cache = ion->getCache(cacheIndex).toSetElement();
 
     bool attachedStub = false;
     if (cache.canAttachStub()) {
         if (!cache.hasDenseStub() && IsDenseElementSetInlineable(obj, idval)) {
             if (!cache.attachDenseElement(cx, outerScript, ion, obj, idval))
                 return false;
@@ -4061,19 +4072,19 @@ IsCacheableNonGlobalScopeChain(JSObject 
             return false;
         }
     }
 
     MOZ_CRASH("Invalid scope chain");
 }
 
 JSObject *
-BindNameIC::update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain)
+BindNameIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
+                   HandleObject scopeChain)
 {
-    RootedScript outerScript(cx, GetTopJitJSScript(cx));
     IonScript *ion = outerScript->ionScript();
     BindNameIC &cache = ion->getCache(cacheIndex).toBindName();
     HandlePropertyName name = cache.name();
 
     RootedObject holder(cx);
     if (scopeChain->is<GlobalObject>()) {
         holder = scopeChain;
     } else {
@@ -4211,21 +4222,19 @@ IsCacheableNameCallGetter(HandleObject s
         return false;
 
     return IsCacheableGetPropCallNative(obj, holder, shape) ||
         IsCacheableGetPropCallPropertyOp(obj, holder, shape) ||
         IsCacheableGetPropCallScripted(obj, holder, shape);
 }
 
 bool
-NameIC::update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain,
+NameIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject scopeChain,
                MutableHandleValue vp)
 {
-    void *returnAddr;
-    RootedScript outerScript(cx, GetTopJitJSScript(cx, &returnAddr));
     IonScript *ion = outerScript->ionScript();
 
     NameIC &cache = ion->getCache(cacheIndex).toName();
     RootedPropertyName name(cx, cache.name());
 
     RootedScript script(cx);
     jsbytecode *pc;
     cache.getScriptedLocation(&script, &pc);
@@ -4248,18 +4257,22 @@ NameIC::update(JSContext *cx, size_t cac
     if (cache.canAttachStub()) {
         if (IsCacheableNameReadSlot(scopeChain, obj, holder, shape, pc, cache.outputReg())) {
             if (!cache.attachReadSlot(cx, outerScript, ion, scopeChain, obj,
                                       holder.as<NativeObject>(), shape))
             {
                 return false;
             }
         } else if (IsCacheableNameCallGetter(scopeChain, obj, holder, shape)) {
-            if (!cache.attachCallGetter(cx, outerScript, ion, scopeChain, obj, holder, shape, returnAddr))
+            void *returnAddr = GetReturnAddressToIonCode(cx);
+            if (!cache.attachCallGetter(cx, outerScript, ion, scopeChain, obj, holder, shape,
+                                        returnAddr))
+            {
                 return false;
+            }
         }
     }
 
     // Monitor changes to cache entry.
     TypeScript::Monitor(cx, script, pc, vp);
 
     return true;
 }
--- a/js/src/jit/IonCaches.h
+++ b/js/src/jit/IonCaches.h
@@ -633,26 +633,24 @@ class GetPropertyIC : public RepatchIonC
     enum NativeGetPropCacheability {
         CanAttachNone,
         CanAttachReadSlot,
         CanAttachArrayLength,
         CanAttachCallGetter
     };
 
     // Helpers for CanAttachNativeGetProp
-    typedef JSContext * Context;
-    bool allowArrayLength(Context cx, HandleObject obj) const;
+    bool allowArrayLength(JSContext *cx, HandleObject obj) const;
     bool allowGetters() const {
         return monitoredResult() && !idempotent();
     }
 
     // Attach the proper stub, if possible
     bool tryAttachStub(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                       HandleObject obj, HandlePropertyName name,
-                       void *returnAddr, bool *emitted);
+                       HandleObject obj, HandlePropertyName name, bool *emitted);
 
     bool tryAttachProxy(JSContext *cx, HandleScript outerScript, IonScript *ion,
                         HandleObject obj, HandlePropertyName name,
                         void *returnAddr, bool *emitted);
 
     bool tryAttachGenericProxy(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                HandleObject obj, HandlePropertyName name,
                                void *returnAddr, bool *emitted);
@@ -673,17 +671,18 @@ class GetPropertyIC : public RepatchIonC
                           void *returnAddr, bool *emitted);
 
     bool tryAttachTypedArrayLength(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                    HandleObject obj, HandlePropertyName name, bool *emitted);
 
     bool tryAttachArgumentsLength(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                   HandleObject obj, HandlePropertyName name, bool *emitted);
 
-    static bool update(JSContext *cx, size_t cacheIndex, HandleObject obj, MutableHandleValue vp);
+    static bool update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
+                       HandleObject obj, MutableHandleValue vp);
 };
 
 class SetPropertyIC : public RepatchIonCache
 {
   protected:
     // Registers live after the cache, excluding output registers. The initial
     // value of these registers must be preserved by the cache.
     RegisterSet liveRegs_;
@@ -759,17 +758,18 @@ class SetPropertyIC : public RepatchIonC
                             void *returnAddr);
 
     bool attachDOMProxyShadowed(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                 HandleObject obj, void *returnAddr);
 
     bool attachDOMProxyUnshadowed(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                   HandleObject obj, void *returnAddr);
 
-    static bool update(JSContext *cx, size_t cacheIndex, HandleObject obj, HandleValue value);
+    static bool update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
+                       HandleObject obj, HandleValue value);
 };
 
 class GetElementIC : public RepatchIonCache
 {
   protected:
     RegisterSet liveRegs_;
 
     Register object_;
@@ -841,31 +841,30 @@ class GetElementIC : public RepatchIonCa
     }
 
     static bool canAttachGetProp(JSObject *obj, const Value &idval, jsid id);
     static bool canAttachDenseElement(JSObject *obj, const Value &idval);
     static bool canAttachTypedArrayElement(JSObject *obj, const Value &idval,
                                            TypedOrValueRegister output);
 
     bool attachGetProp(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                       HandleObject obj, const Value &idval, HandlePropertyName name,
-                       void *returnAddr);
+                       HandleObject obj, const Value &idval, HandlePropertyName name);
 
     bool attachDenseElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                             HandleObject obj, const Value &idval);
 
     bool attachTypedArrayElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                  HandleObject tarr, const Value &idval);
 
     bool attachArgumentsElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                 HandleObject obj);
 
     static bool
-    update(JSContext *cx, size_t cacheIndex, HandleObject obj, HandleValue idval,
-           MutableHandleValue vp);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
+           HandleValue idval, MutableHandleValue vp);
 
     void incFailedUpdates() {
         failedUpdates_++;
     }
     void resetFailedUpdates() {
         failedUpdates_ = 0;
     }
     bool shouldDisable() const {
@@ -949,18 +948,18 @@ class SetElementIC : public RepatchIonCa
 
     bool attachDenseElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                             HandleObject obj, const Value &idval);
 
     bool attachTypedArrayElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                  HandleObject tarr);
 
     static bool
-    update(JSContext *cx, size_t cacheIndex, HandleObject obj, HandleValue idval,
-           HandleValue value);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
+           HandleValue idval, HandleValue value);
 };
 
 class BindNameIC : public RepatchIonCache
 {
   protected:
     Register scopeChain_;
     PropertyName *name_;
     Register output_;
@@ -987,17 +986,17 @@ class BindNameIC : public RepatchIonCach
 
     bool attachGlobal(JSContext *cx, HandleScript outerScript, IonScript *ion,
                       HandleObject scopeChain);
 
     bool attachNonGlobal(JSContext *cx, HandleScript outerScript, IonScript *ion,
                          HandleObject scopeChain, HandleObject holder);
 
     static JSObject *
-    update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject scopeChain);
 };
 
 class NameIC : public RepatchIonCache
 {
   protected:
     // Registers live after the cache, excluding output registers. The initial
     // value of these registers must be preserved by the cache.
     RegisterSet liveRegs_;
@@ -1038,17 +1037,18 @@ class NameIC : public RepatchIonCache
                         HandleObject scopeChain, HandleObject holderBase,
                         HandleNativeObject holder, HandleShape shape);
 
     bool attachCallGetter(JSContext *cx, HandleScript outerScript, IonScript *ion,
                           HandleObject scopeChain, HandleObject obj, HandleObject holder,
                           HandleShape shape, void *returnAddr);
 
     static bool
-    update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain, MutableHandleValue vp);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject scopeChain,
+           MutableHandleValue vp);
 };
 
 #undef CACHE_HEADER
 
 // Implement cache casts now that the compiler can see the inheritance.
 #define CACHE_CASTS(ickind)                                             \
     ickind##IC &IonCache::to##ickind()                                  \
     {                                                                   \
--- a/js/src/jit/JitFrames.h
+++ b/js/src/jit/JitFrames.h
@@ -284,26 +284,22 @@ void UpdateJitActivationsForMinorGC(JSRu
 static inline uint32_t
 MakeFrameDescriptor(uint32_t frameSize, FrameType type)
 {
     return (frameSize << FRAMESIZE_SHIFT) | type;
 }
 
 // Returns the JSScript associated with the topmost JIT frame.
 inline JSScript *
-GetTopJitJSScript(JSContext *cx, void **returnAddrOut = nullptr)
+GetTopJitJSScript(JSContext *cx)
 {
     JitFrameIterator iter(cx);
     MOZ_ASSERT(iter.type() == JitFrame_Exit);
     ++iter;
 
-    MOZ_ASSERT(iter.returnAddressToFp() != nullptr);
-    if (returnAddrOut)
-        *returnAddrOut = (void *) iter.returnAddressToFp();
-
     if (iter.isBaselineStub()) {
         ++iter;
         MOZ_ASSERT(iter.isBaselineJS());
     }
 
     MOZ_ASSERT(iter.isScripted());
     return iter.script();
 }
--- a/js/src/jit/VMFunctions.cpp
+++ b/js/src/jit/VMFunctions.cpp
@@ -31,20 +31,19 @@ using namespace js::jit;
 
 namespace js {
 namespace jit {
 
 // Don't explicitly initialize, it's not guaranteed that this initializer will
 // run before the constructors for static VMFunctions.
 /* static */ VMFunction *VMFunction::functions;
 
-AutoDetectInvalidation::AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval,
-                                               IonScript *ionScript)
+AutoDetectInvalidation::AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval)
   : cx_(cx),
-    ionScript_(ionScript ? ionScript : GetTopJitJSScript(cx)->ionScript()),
+    ionScript_(GetTopJitJSScript(cx)->ionScript()),
     rval_(rval),
     disabled_(false)
 { }
 
 void
 VMFunction::addToFunctions()
 {
     static bool initialized = false;
--- a/js/src/jit/VMFunctions.h
+++ b/js/src/jit/VMFunctions.h
@@ -612,17 +612,23 @@ class AutoDetectInvalidation
     JSContext *cx_;
     IonScript *ionScript_;
     MutableHandleValue rval_;
     bool disabled_;
 
     void setReturnOverride();
 
   public:
-    AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval, IonScript *ionScript = nullptr);
+    AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval, IonScript *ionScript)
+      : cx_(cx), ionScript_(ionScript), rval_(rval), disabled_(false)
+    {
+        MOZ_ASSERT(ionScript);
+    }
+
+    AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval);
 
     void disable() {
         MOZ_ASSERT(!disabled_);
         disabled_ = true;
     }
 
     ~AutoDetectInvalidation() {
         if (!disabled_ && ionScript_->invalidated())
--- a/layout/forms/nsFieldSetFrame.cpp
+++ b/layout/forms/nsFieldSetFrame.cpp
@@ -593,17 +593,26 @@ nsFieldSetFrame::Reflow(nsPresContext*  
       contentRect.ISize(wm) = mLegendRect.ISize(wm) +
         aReflowState.ComputedLogicalPadding().IStartEnd(wm);
     }
 
     // place the legend
     LogicalRect actualLegendRect = mLegendRect;
     actualLegendRect.Deflate(wm, legendMargin);
     LogicalPoint actualLegendPos(actualLegendRect.Origin(wm));
-    legendReflowState->ApplyRelativePositioning(&actualLegendPos, containerWidth);
+
+    // Note that legend's writing mode may be different from the fieldset's,
+    // so we need to convert offsets before applying them to it (bug 1134534).
+    LogicalMargin offsets =
+      legendReflowState->ComputedLogicalOffsets().
+        ConvertTo(wm, legendReflowState->GetWritingMode());
+    nsHTMLReflowState::ApplyRelativePositioning(legend, wm, offsets,
+                                                &actualLegendPos,
+                                                containerWidth);
+
     legend->SetPosition(wm, actualLegendPos, containerWidth);
     nsContainerFrame::PositionFrameView(legend);
     nsContainerFrame::PositionChildViews(legend);
   }
 
   // Return our size and our result.
   LogicalSize finalSize(wm, contentRect.ISize(wm) + border.IStartEnd(wm),
                         mLegendSpace + border.BStartEnd(wm) +
--- a/layout/generic/nsTextFrame.cpp
+++ b/layout/generic/nsTextFrame.cpp
@@ -8519,16 +8519,17 @@ nsTextFrame::ReflowText(nsLineLayout& aL
     aLineLayout.NotifyOptionalBreakPosition(this, length, fits,
                                             gfxBreakPriority::eNormalBreak);
   }
   bool breakAfter = forceBreakAfter;
   // length == 0 means either the text is empty or it's all collapsed away
   bool emptyTextAtStartOfLine = atStartOfLine && length == 0;
   if (!breakAfter && charsFit == length && !emptyTextAtStartOfLine &&
       transformedOffset + transformedLength == mTextRun->GetLength() &&
+      !StyleContext()->IsInlineDescendantOfRuby() &&
       (mTextRun->GetFlags() & nsTextFrameUtils::TEXT_HAS_TRAILING_BREAK)) {
     // We placed all the text in the textrun and we have a break opportunity at
     // the end of the textrun. We need to record it because the following
     // content may not care about nsLineBreaker.
 
     // Note that because we didn't break, we can be sure that (thanks to the
     // code up above) textMetrics.mAdvanceWidth includes the width of any
     // trailing whitespace. So we need to subtract trimmableWidth here
--- a/layout/generic/nsTextRunTransformations.cpp
+++ b/layout/generic/nsTextRunTransformations.cpp
@@ -566,18 +566,19 @@ nsCaseTransformTextRunFactory::Transform
     if (ch == uint32_t(-1)) {
       aDeletedCharsArray.AppendElement(true);
       mergeNeeded = true;
     } else {
       aDeletedCharsArray.AppendElement(false);
       aCharsToMergeArray.AppendElement(false);
       if (aTextRun) {
         aStyleArray->AppendElement(charStyle);
-        aCanBreakBeforeArray->AppendElement(inhibitBreakBefore ? false :
-                                            aTextRun->CanBreakLineBefore(i));
+        aCanBreakBeforeArray->AppendElement(
+          inhibitBreakBefore ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
+                             : aTextRun->CanBreakBefore(i));
       }
 
       if (IS_IN_BMP(ch)) {
         aConvertedString.Append(ch);
       } else {
         aConvertedString.Append(H_SURROGATE(ch));
         aConvertedString.Append(L_SURROGATE(ch));
         ++i;
@@ -586,17 +587,18 @@ nsCaseTransformTextRunFactory::Transform
         ++extraChars;
       }
 
       while (extraChars-- > 0) {
         mergeNeeded = true;
         aCharsToMergeArray.AppendElement(true);
         if (aTextRun) {
           aStyleArray->AppendElement(charStyle);
-          aCanBreakBeforeArray->AppendElement(false);
+          aCanBreakBeforeArray->AppendElement(
+            gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
         }
       }
     }
   }
 
   return mergeNeeded;
 }
 
new file mode 100644
--- /dev/null
+++ b/layout/reftests/forms/fieldset/legend-rtl-ref.html
@@ -0,0 +1,5 @@
+<!DOCTYPE html>
+<html>
+<fieldset><legend style="position:relative;left:20px;">Legend</legend></fieldset>
+<fieldset dir="rtl"><legend style="position:relative;right:20px;">Legend</legend></fieldset>
+</html>
new file mode 100644
--- /dev/null
+++ b/layout/reftests/forms/fieldset/legend-rtl.html
@@ -0,0 +1,5 @@
+<!DOCTYPE html>
+<html>
+<fieldset><legend dir="rtl" style="position:relative;left:20px;">Legend</legend></fieldset>
+<fieldset dir="rtl"><legend dir="ltr" style="position:relative;right:20px;">Legend</legend></fieldset>
+</html>
--- a/layout/reftests/forms/fieldset/reftest.list
+++ b/layout/reftests/forms/fieldset/reftest.list
@@ -6,8 +6,9 @@
 == fieldset-scrolled-1.html fieldset-scrolled-1-ref.html
 random-if(B2G) == fieldset-overflow-auto-1.html fieldset-overflow-auto-1-ref.html
 fuzzy-if(winWidget&&!layersGPUAccelerated,102,205) == positioned-container-1.html positioned-container-1-ref.html
 == relpos-legend-1.html relpos-legend-1-ref.html
 == relpos-legend-2.html relpos-legend-2-ref.html
 test-pref(layout.css.sticky.enabled,true) skip-if(B2G&&browserIsRemote) == sticky-legend-1.html sticky-legend-1-ref.html
 == abs-pos-child-sizing.html abs-pos-child-sizing-ref.html
 == overflow-hidden.html overflow-hidden-ref.html
+== legend-rtl.html legend-rtl-ref.html
new file mode 100644
--- /dev/null
+++ b/layout/reftests/text/auto-hyphenation-transformed-1-ref.html
@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html lang="de">
+<head>
+<meta charset="utf-8">
+<title>Test for auto hyphenation with text-transform</title>
+</head>
+<body>
+<div style="width:0px;">
+mas&shy;se<br>ma&shy;ße
+</div>
+<div style="width:0px;">
+MAS&shy;SE<br>MA&shy;SSE
+</div>
+<div style="width:0px;">
+Mas&shy;se<br>Ma&shy;ße
+</div>
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/layout/reftests/text/auto-hyphenation-transformed-1.html
@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html lang="de">
+<head>
+<meta charset="utf-8">
+<title>Test for auto hyphenation with text-transform</title>
+</head>
+<body>
+<div style="width:0px; -moz-hyphens:auto">
+masse<br>maße
+</div>
+<div style="width:0px; -moz-hyphens:auto; text-transform:uppercase">
+masse<br>maße
+</div>
+<div style="width:0px; -moz-hyphens:auto; text-transform:capitalize">
+masse<br>maße
+</div>
+</body>
+</html>
--- a/layout/reftests/text/reftest.list
+++ b/layout/reftests/text/reftest.list
@@ -288,16 +288,18 @@ pref(gfx.font_rendering.graphite.enabled
 == auto-hyphenation-sh-1.html auto-hyphenation-sh-1-ref.html
 == auto-hyphenation-sl-1.html auto-hyphenation-sl-1-ref.html
 == auto-hyphenation-sr-1.html auto-hyphenation-sr-1-ref.html
 == auto-hyphenation-sv-1.html auto-hyphenation-sv-1-ref.html # test swedish patterns
 != auto-hyphenation-sv-1.html auto-hyphenation-sv-1-notref.html # verify swedish != english
 == auto-hyphenation-tr-1.html auto-hyphenation-tr-1-ref.html
 == auto-hyphenation-uk-1.html auto-hyphenation-uk-1-ref.html
 
+== auto-hyphenation-transformed-1.html auto-hyphenation-transformed-1-ref.html
+
 # osx-font-smoothing - with and without subpixel AA, only under OSX
 fails-if(!cocoaWidget||OSX==1006||OSX==1007) != osx-font-smoothing.html osx-font-smoothing-ref.html
 fails-if(!cocoaWidget||OSX==1006||OSX==1007) != osx-font-smoothing-2.html osx-font-smoothing-2-notref.html
 == osx-font-smoothing-2.html osx-font-smoothing-2-ref.html
 
 pref(layout.css.text-align-true-value.enabled,true) == text-align-true.html text-align-true-ref.html
 
 # stray control chars should not be invisible, bug 909344
--- a/media/libvpx/moz.build
+++ b/media/libvpx/moz.build
@@ -37,18 +37,18 @@ if CONFIG['VPX_X86_ASM'] and CONFIG['OS_
 
 if CONFIG['VPX_X86_ASM']:
     SOURCES += files['X86_ASM']
 
     if '64' in CONFIG['OS_TEST']:
         SOURCES += files['X86-64_ASM']
 
     # AVX2 only supported on
-    # Darwin and mingw toolchains right now
-    if CONFIG['OS_TARGET'] == 'Darwin' or (CONFIG['OS_TARGET'] == 'WINNT' and CONFIG['GNU_CC']):
+    # Darwin and Windows toolchains right now
+    if CONFIG['OS_TARGET'] in ('Darwin', 'WINNT'):
         SOURCES += files['AVX2']
 
     #postproc is only enabled on x86 with asm
     SOURCES += files['VP8_POSTPROC']
 
 arm_asm_files = []
 if CONFIG['VPX_ARM_ASM']:
     arm_asm_files += files['ARM_ASM']
--- a/media/libvpx/update.py
+++ b/media/libvpx/update.py
@@ -7,18 +7,18 @@ import os
 import re
 import shutil
 import sys
 import subprocess
 from pprint import pprint
 from StringIO import StringIO
 
 PLATFORMS= [
-  'x86-win32-vs8',
-  'x86_64-win64-vs8',
+  'x86-win32-vs12',
+  'x86_64-win64-vs12',
   'x86-linux-gcc',
   'x86_64-linux-gcc',
   'generic-gnu',
   'x86-darwin9-gcc',
   'x86_64-darwin9-gcc',
   'armv7-android-gcc',
   'x86-win32-gcc',
   'x86_64-win64-gcc',
--- a/media/libvpx/vp8_rtcd.h
+++ b/media/libvpx/vp8_rtcd.h
@@ -4,25 +4,25 @@
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.
  */
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vp8_rtcd_x86_64-win64-vs8.h"
+#include "vp8_rtcd_x86_64-win64-vs12.h"
 #else
 #include "vp8_rtcd_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vp8_rtcd_x86-win32-vs8.h"
+#include "vp8_rtcd_x86-win32-vs12.h"
 #else
 #include "vp8_rtcd_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vp8_rtcd_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vp8_rtcd_x86-win32-vs8.h
rename to media/libvpx/vp8_rtcd_x86-win32-vs12.h
rename from media/libvpx/vp8_rtcd_x86_64-win64-vs8.h
rename to media/libvpx/vp8_rtcd_x86_64-win64-vs12.h
--- a/media/libvpx/vp9_rtcd.h
+++ b/media/libvpx/vp9_rtcd.h
@@ -4,25 +4,25 @@
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.
  */
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vp9_rtcd_x86_64-win64-vs8.h"
+#include "vp9_rtcd_x86_64-win64-vs12.h"
 #else
 #include "vp9_rtcd_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vp9_rtcd_x86-win32-vs8.h"
+#include "vp9_rtcd_x86-win32-vs12.h"
 #else
 #include "vp9_rtcd_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vp9_rtcd_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vp9_rtcd_x86-win32-vs8.h
rename to media/libvpx/vp9_rtcd_x86-win32-vs12.h
--- a/media/libvpx/vp9_rtcd_x86-win32-vs8.h
+++ b/media/libvpx/vp9_rtcd_x86-win32-vs12.h
@@ -26,21 +26,23 @@ union int_mv;
 struct yv12_buffer_config;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
+int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 
 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -51,21 +53,23 @@ RTCD_EXTERN void (*vp9_convolve8_avg_hor
 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -218,24 +222,26 @@ void vp9_fdct16x16_sse2(const int16_t *i
 RTCD_EXTERN void (*vp9_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct16x16_1)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride);
+void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct32x32_1)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride);
+void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct4x4)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride);
@@ -270,16 +276,17 @@ int vp9_full_search_sadx8(const struct m
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 
 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
@@ -362,16 +369,17 @@ RTCD_EXTERN void (*vp9_iht8x8_64_add)(co
 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_16_add vp9_iwht4x4_16_add_c
 
 void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_1_add vp9_iwht4x4_1_add_c
 
 void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_4_mmx(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 RTCD_EXTERN void (*vp9_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
@@ -406,16 +414,17 @@ void vp9_lpf_vertical_8_sse2(uint8_t *s,
 RTCD_EXTERN void (*vp9_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vp9_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 
 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 
 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 
 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
@@ -512,16 +521,17 @@ unsigned int vp9_sad32x32_avg_c(const ui
 unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 
 void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad32x32x3 vp9_sad32x32x3_c
 
 void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad32x32x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad32x32x8 vp9_sad32x32x8_c
 
 unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -588,16 +598,17 @@ unsigned int vp9_sad64x64_avg_c(const ui
 unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 
 void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad64x64x3 vp9_sad64x64x3_c
 
 void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad64x64x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad64x64x8 vp9_sad64x64x8_c
 
 unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 RTCD_EXTERN unsigned int (*vp9_sad8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
@@ -669,16 +680,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -694,16 +706,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -734,16 +747,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -759,16 +773,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -817,52 +832,57 @@ void vp9_v_predictor_4x4_sse(uint8_t *ds
 RTCD_EXTERN void (*vp9_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -879,34 +899,38 @@ void vp9_rtcd(void);
 static void setup_rtcd_internal(void)
 {
     int flags = x86_simd_caps();
 
     (void)flags;
 
     vp9_block_error = vp9_block_error_c;
     if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2;
+    if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2;
     vp9_convolve8 = vp9_convolve8_c;
     if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2;
     vp9_convolve8_avg = vp9_convolve8_avg_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
     vp9_convolve8_horiz = vp9_convolve8_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2;
     vp9_convolve8_vert = vp9_convolve8_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
     vp9_convolve_avg = vp9_convolve_avg_c;
     if (flags & HAS_SSE2) vp9_convolve_avg = vp9_convolve_avg_sse2;
     vp9_convolve_copy = vp9_convolve_copy_c;
     if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
@@ -945,20 +969,22 @@ static void setup_rtcd_internal(void)
     vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse;
     vp9_fdct16x16 = vp9_fdct16x16_c;
     if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2;
     vp9_fdct16x16_1 = vp9_fdct16x16_1_c;
     if (flags & HAS_SSE2) vp9_fdct16x16_1 = vp9_fdct16x16_1_sse2;
     vp9_fdct32x32 = vp9_fdct32x32_c;
     if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2;
     vp9_fdct32x32_1 = vp9_fdct32x32_1_c;
     if (flags & HAS_SSE2) vp9_fdct32x32_1 = vp9_fdct32x32_1_sse2;
     vp9_fdct32x32_rd = vp9_fdct32x32_rd_c;
     if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2;
     vp9_fdct4x4 = vp9_fdct4x4_c;
     if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2;
     vp9_fdct4x4_1 = vp9_fdct4x4_1_c;
     if (flags & HAS_SSE2) vp9_fdct4x4_1 = vp9_fdct4x4_1_sse2;
     vp9_fdct8x8 = vp9_fdct8x8_c;
     if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2;
     vp9_fdct8x8_1 = vp9_fdct8x8_1_c;
     if (flags & HAS_SSE2) vp9_fdct8x8_1 = vp9_fdct8x8_1_sse2;
@@ -970,16 +996,17 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2;
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
     vp9_fwht4x4 = vp9_fwht4x4_c;
     if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
     vp9_get16x16var = vp9_get16x16var_c;
     if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
+    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
     vp9_get8x8var = vp9_get8x8var_c;
     if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
     vp9_get_mb_ss = vp9_get_mb_ss_c;
     if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
@@ -1014,16 +1041,17 @@ static void setup_rtcd_internal(void)
     vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
     vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
     vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
+    if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
     vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c;
     if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx;
     vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2;
     vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2;
     vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2;
@@ -1036,16 +1064,17 @@ static void setup_rtcd_internal(void)
     vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2;
     vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2;
     vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2;
     vp9_mse16x16 = vp9_mse16x16_c;
     if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
+    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
     vp9_mse16x8 = vp9_mse16x8_c;
     if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
     vp9_mse8x16 = vp9_mse8x16_c;
     if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
     vp9_mse8x8 = vp9_mse8x8_c;
     if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
     vp9_sad16x16 = vp9_sad16x16_c;
     if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2;
@@ -1078,16 +1107,17 @@ static void setup_rtcd_internal(void)
     vp9_sad32x16x4d = vp9_sad32x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2;
     vp9_sad32x32 = vp9_sad32x32_c;
     if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2;
     vp9_sad32x32_avg = vp9_sad32x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2;
     vp9_sad32x32x4d = vp9_sad32x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2;
     vp9_sad32x64 = vp9_sad32x64_c;
     if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2;
     vp9_sad32x64_avg = vp9_sad32x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2;
     vp9_sad32x64x4d = vp9_sad32x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2;
     vp9_sad4x4 = vp9_sad4x4_c;
     if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse;
@@ -1110,16 +1140,17 @@ static void setup_rtcd_internal(void)
     vp9_sad64x32x4d = vp9_sad64x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2;
     vp9_sad64x64 = vp9_sad64x64_c;
     if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2;
     vp9_sad64x64_avg = vp9_sad64x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2;
     vp9_sad64x64x4d = vp9_sad64x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2;
     vp9_sad8x16 = vp9_sad8x16_c;
     if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2;
     vp9_sad8x16_avg = vp9_sad8x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2;
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
     vp9_sad8x16x4d = vp9_sad8x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2;
@@ -1147,31 +1178,33 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2;
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2;
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
@@ -1186,31 +1219,33 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2;
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2;
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
@@ -1230,34 +1265,39 @@ static void setup_rtcd_internal(void)
     vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c;
     if (flags & HAS_SSE2) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_sse2;
     vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
     vp9_variance16x16 = vp9_variance16x16_c;
     if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
     vp9_variance16x32 = vp9_variance16x32_c;
     if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
     vp9_variance16x8 = vp9_variance16x8_c;
     if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
     vp9_variance32x16 = vp9_variance32x16_c;
     if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
     vp9_variance32x32 = vp9_variance32x32_c;
     if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
     vp9_variance32x64 = vp9_variance32x64_c;
     if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
     vp9_variance4x4 = vp9_variance4x4_c;
     if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
     vp9_variance4x8 = vp9_variance4x8_c;
     if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
     vp9_variance64x32 = vp9_variance64x32_c;
     if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
     vp9_variance64x64 = vp9_variance64x64_c;
     if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
     vp9_variance8x16 = vp9_variance8x16_c;
     if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
     vp9_variance8x4 = vp9_variance8x4_c;
     if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
     vp9_variance8x8 = vp9_variance8x8_c;
     if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
 }
 #endif
rename from media/libvpx/vp9_rtcd_x86_64-win64-vs8.h
rename to media/libvpx/vp9_rtcd_x86_64-win64-vs12.h
--- a/media/libvpx/vp9_rtcd_x86_64-win64-vs8.h
+++ b/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h
@@ -26,21 +26,23 @@ union int_mv;
 struct yv12_buffer_config;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
-#define vp9_block_error vp9_block_error_sse2
+int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
+RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 
 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -51,21 +53,23 @@ RTCD_EXTERN void (*vp9_convolve8_avg_hor
 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vp9_convolve_avg vp9_convolve_avg_sse2
 
 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -218,25 +222,27 @@ void vp9_fdct16x16_sse2(const int16_t *i
 #define vp9_fdct16x16 vp9_fdct16x16_sse2
 
 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct16x16_1 vp9_fdct16x16_1_sse2
 
 void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride);
-#define vp9_fdct32x32 vp9_fdct32x32_sse2
+void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride);
+RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct32x32_1 vp9_fdct32x32_1_sse2
 
 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride);
-#define vp9_fdct32x32_rd vp9_fdct32x32_rd_sse2
+void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride);
+RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct4x4 vp9_fdct4x4_sse2
 
 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct4x4_1 vp9_fdct4x4_1_sse2
@@ -271,17 +277,18 @@ int vp9_full_search_sadx8(const struct m
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_mmx
 
 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_sse2
+void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 
 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get8x8var vp9_get8x8var_sse2
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_sse2
@@ -365,17 +372,18 @@ void vp9_iht8x8_64_add_sse2(const tran_l
 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_16_add vp9_iwht4x4_16_add_c
 
 void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_1_add vp9_iwht4x4_1_add_c
 
 void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
-#define vp9_lpf_horizontal_16 vp9_lpf_horizontal_16_sse2
+void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_4_mmx(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 #define vp9_lpf_horizontal_4 vp9_lpf_horizontal_4_mmx
 
 void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vp9_lpf_horizontal_4_dual vp9_lpf_horizontal_4_dual_sse2
@@ -409,17 +417,18 @@ void vp9_lpf_vertical_8_sse2(uint8_t *s,
 #define vp9_lpf_vertical_8 vp9_lpf_vertical_8_sse2
 
 void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_sse2
 
 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_sse2
+unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 
 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 #define vp9_mse16x8 vp9_mse16x8_sse2
 
 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 #define vp9_mse8x16 vp9_mse8x16_sse2
@@ -519,17 +528,18 @@ unsigned int vp9_sad32x32_avg_c(const ui
 unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 #define vp9_sad32x32_avg vp9_sad32x32_avg_sse2
 
 void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad32x32x3 vp9_sad32x32x3_c
 
 void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
-#define vp9_sad32x32x4d vp9_sad32x32x4d_sse2
+void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad32x32x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad32x32x8 vp9_sad32x32x8_c
 
 unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vp9_sad32x64 vp9_sad32x64_sse2
 
@@ -595,17 +605,18 @@ unsigned int vp9_sad64x64_avg_c(const ui
 unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 #define vp9_sad64x64_avg vp9_sad64x64_avg_sse2
 
 void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad64x64x3 vp9_sad64x64x3_c
 
 void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
-#define vp9_sad64x64x4d vp9_sad64x64x4d_sse2
+void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad64x64x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad64x64x8 vp9_sad64x64x8_c
 
 unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 #define vp9_sad8x16 vp9_sad8x16_sse2
 
@@ -676,16 +687,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -701,16 +713,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -741,16 +754,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -766,16 +780,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -825,53 +840,58 @@ void vp9_v_predictor_4x4_sse(uint8_t *ds
 #define vp9_v_predictor_4x4 vp9_v_predictor_4x4_sse
 
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
 
 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_sse2
+unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance16x32 vp9_variance16x32_sse2
 
 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance16x8 vp9_variance16x8_sse2
 
 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_sse2
+unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_sse2
+unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance32x64 vp9_variance32x64_sse2
 
 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance4x4 vp9_variance4x4_sse2
 
 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance4x8 vp9_variance4x8_sse2
 
 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_sse2
+unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_sse2
+unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x16 vp9_variance8x16_sse2
 
 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x4 vp9_variance8x4_sse2
@@ -885,28 +905,33 @@ void vp9_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/x86.h"
 static void setup_rtcd_internal(void)
 {
     int flags = x86_simd_caps();
 
     (void)flags;
 
+    vp9_block_error = vp9_block_error_sse2;
+    if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2;
     vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2;
     vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
     vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2;
     vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3;
     vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3;
@@ -927,109 +952,137 @@ static void setup_rtcd_internal(void)
     vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3;
     vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3;
     vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3;
     vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
+    vp9_fdct32x32 = vp9_fdct32x32_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2;
+    vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2;
     vp9_fdct8x8 = vp9_fdct8x8_sse2;
     if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3;
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
+    vp9_get16x16var = vp9_get16x16var_sse2;
+    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
     vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3;
     vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3;
     vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
+    vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
+    if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
+    vp9_mse16x16 = vp9_mse16x16_sse2;
+    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
     vp9_quantize_b = vp9_quantize_b_c;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
     if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
     vp9_quantize_fp = vp9_quantize_fp_c;
     if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3;
     vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c;
     if (flags & HAS_SSSE3) vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3;
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
+    vp9_sad32x32x4d = vp9_sad32x32x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2;
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
+    vp9_sad64x64x4d = vp9_sad64x64x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2;
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
     vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3;
     vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3;
     vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2;
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2;
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3;
     vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3;
     vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3;
     vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2;
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2;
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
+    vp9_variance16x16 = vp9_variance16x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
+    vp9_variance32x16 = vp9_variance32x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
+    vp9_variance32x32 = vp9_variance32x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
+    vp9_variance64x32 = vp9_variance64x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
+    vp9_variance64x64 = vp9_variance64x64_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
 }
 #endif
 
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 
 #endif
--- a/media/libvpx/vpx_config.asm
+++ b/media/libvpx/vpx_config.asm
@@ -2,12 +2,12 @@
 %include "vpx_config_x86-linux-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,elf64
 %include "vpx_config_x86_64-linux-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,macho32
 %include "vpx_config_x86-darwin9-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,macho64
 %include "vpx_config_x86_64-darwin9-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,win32
-%include "vpx_config_x86-win32-vs8.asm"
+%include "vpx_config_x86-win32-vs12.asm"
 %elifidn __OUTPUT_FORMAT__,x64
-%include "vpx_config_x86_64-win64-vs8.asm"
+%include "vpx_config_x86_64-win64-vs12.asm"
 %endif
--- a/media/libvpx/vpx_config.h
+++ b/media/libvpx/vpx_config.h
@@ -1,22 +1,22 @@
 #if defined(VPX_X86_ASM)
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vpx_config_x86_64-win64-vs8.h"
+#include "vpx_config_x86_64-win64-vs12.h"
 #else
 #include "vpx_config_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vpx_config_x86-win32-vs8.h"
+#include "vpx_config_x86-win32-vs12.h"
 #else
 #include "vpx_config_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vpx_config_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vpx_config_x86-win32-vs8.asm
rename to media/libvpx/vpx_config_x86-win32-vs12.asm
--- a/media/libvpx/vpx_config_x86-win32-vs8.asm
+++ b/media/libvpx/vpx_config_x86-win32-vs12.asm
@@ -12,18 +12,18 @@ HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MIPS64 equ 0
 HAVE_MMX equ 1
 HAVE_SSE equ 1
 HAVE_SSE2 equ 1
 HAVE_SSE3 equ 1
 HAVE_SSSE3 equ 1
 HAVE_SSE4_1 equ 1
-HAVE_AVX equ 0
-HAVE_AVX2 equ 0
+HAVE_AVX equ 1
+HAVE_AVX2 equ 1
 HAVE_ALTIVEC equ 0
 HAVE_VPX_PORTS equ 1
 HAVE_STDINT_H equ 0
 HAVE_ALT_TREE_LAYOUT equ 0
 HAVE_PTHREAD_H equ 0
 HAVE_SYS_MMAN_H equ 0
 HAVE_UNISTD_H equ 0
 CONFIG_EXTERNAL_BUILD equ 1
rename from media/libvpx/vpx_config_x86-win32-vs8.h
rename to media/libvpx/vpx_config_x86-win32-vs12.h
--- a/media/libvpx/vpx_config_x86-win32-vs8.h
+++ b/media/libvpx/vpx_config_x86-win32-vs12.h
@@ -24,18 +24,18 @@
 #define HAVE_DSPR2 0
 #define HAVE_MIPS64 0
 #define HAVE_MMX 1
 #define HAVE_SSE 1
 #define HAVE_SSE2 1
 #define HAVE_SSE3 1
 #define HAVE_SSSE3 1
 #define HAVE_SSE4_1 1
-#define HAVE_AVX 0
-#define HAVE_AVX2 0
+#define HAVE_AVX 1
+#define HAVE_AVX2 1
 #define HAVE_ALTIVEC 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 0
 #define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SYS_MMAN_H 0
 #define HAVE_UNISTD_H 0
 #define CONFIG_EXTERNAL_BUILD 1
rename from media/libvpx/vpx_config_x86_64-win64-vs8.asm
rename to media/libvpx/vpx_config_x86_64-win64-vs12.asm
--- a/media/libvpx/vpx_config_x86_64-win64-vs8.asm
+++ b/media/libvpx/vpx_config_x86_64-win64-vs12.asm
@@ -12,18 +12,18 @@ HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MIPS64 equ 0
 HAVE_MMX equ 1
 HAVE_SSE equ 1
 HAVE_SSE2 equ 1
 HAVE_SSE3 equ 1
 HAVE_SSSE3 equ 1
 HAVE_SSE4_1 equ 1
-HAVE_AVX equ 0
-HAVE_AVX2 equ 0
+HAVE_AVX equ 1
+HAVE_AVX2 equ 1
 HAVE_ALTIVEC equ 0
 HAVE_VPX_PORTS equ 1
 HAVE_STDINT_H equ 0
 HAVE_ALT_TREE_LAYOUT equ 0
 HAVE_PTHREAD_H equ 0
 HAVE_SYS_MMAN_H equ 0
 HAVE_UNISTD_H equ 0
 CONFIG_EXTERNAL_BUILD equ 1
rename from media/libvpx/vpx_config_x86_64-win64-vs8.h
rename to media/libvpx/vpx_config_x86_64-win64-vs12.h
--- a/media/libvpx/vpx_config_x86_64-win64-vs8.h
+++ b/media/libvpx/vpx_config_x86_64-win64-vs12.h
@@ -24,18 +24,18 @@
 #define HAVE_DSPR2 0
 #define HAVE_MIPS64 0
 #define HAVE_MMX 1
 #define HAVE_SSE 1
 #define HAVE_SSE2 1
 #define HAVE_SSE3 1
 #define HAVE_SSSE3 1
 #define HAVE_SSE4_1 1
-#define HAVE_AVX 0
-#define HAVE_AVX2 0
+#define HAVE_AVX 1
+#define HAVE_AVX2 1
 #define HAVE_ALTIVEC 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 0
 #define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SYS_MMAN_H 0
 #define HAVE_UNISTD_H 0
 #define CONFIG_EXTERNAL_BUILD 1
--- a/media/libvpx/vpx_scale_rtcd.h
+++ b/media/libvpx/vpx_scale_rtcd.h
@@ -4,25 +4,25 @@
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.
  */
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vpx_scale_rtcd_x86_64-win64-vs8.h"
+#include "vpx_scale_rtcd_x86_64-win64-vs12.h"
 #else
 #include "vpx_scale_rtcd_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vpx_scale_rtcd_x86-win32-vs8.h"
+#include "vpx_scale_rtcd_x86-win32-vs12.h"
 #else
 #include "vpx_scale_rtcd_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vpx_scale_rtcd_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vpx_scale_rtcd_x86-win32-vs8.h
rename to media/libvpx/vpx_scale_rtcd_x86-win32-vs12.h
rename from media/libvpx/vpx_scale_rtcd_x86_64-win64-vs8.h
rename to media/libvpx/vpx_scale_rtcd_x86_64-win64-vs12.h
--- a/media/webrtc/signaling/src/media-conduit/VideoConduit.cpp
+++ b/media/webrtc/signaling/src/media-conduit/VideoConduit.cpp
@@ -1102,17 +1102,18 @@ WebrtcVideoConduit::SendVideoFrame(unsig
   CSFLogDebug(logTag, "%s Inserted a frame", __FUNCTION__);
   return kMediaConduitNoError;
 }
 
 // Transport Layer Callbacks
 MediaConduitErrorCode
 WebrtcVideoConduit::ReceivedRTPPacket(const void *data, int len)
 {
-  CSFLogDebug(logTag, "%s: Channel %d, Len %d ", __FUNCTION__, mChannel, len);
+  CSFLogDebug(logTag, "%s: seq# %u, Channel %d, Len %d ", __FUNCTION__,
+              (uint16_t) ntohs(((uint16_t*) data)[1]), mChannel, len);
 
   // Media Engine should be receiving already.
   if(mEngineReceiving)
   {
     // let the engine know of a RTP packet to decode
     // XXX we need to get passed the time the packet was received
     if(mPtrViENetwork->ReceivedRTPPacket(mChannel, data, len, webrtc::PacketTime()) == -1)
     {
--- a/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@@ -13,24 +13,26 @@
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_utility.h"
 #include "webrtc/system_wrappers/interface/trace.h"
 
 namespace webrtc {
 namespace {
 
-enum Nalu {
-  kSlice = 1,
-  kIdr = 5,
-  kSei = 6,
-  kSps = 7,
-  kPps = 8,
-  kStapA = 24,
-  kFuA = 28
+enum Nalu { // 0-23 from H.264, 24-31 from RFC 6184
+  kSlice = 1, // I/P/B slice
+  kIdr = 5, // IDR slice
+  kSei = 6, // Supplementary Enhancement Info
+  kSeiRecPt = 6, // Recovery Point SEI Payload
+  kSps = 7, // Sequence Parameter Set
+  kPps = 8, // Picture Parameter Set
+  kPrefix = 14, // Prefix
+  kStapA = 24, // Single-Time Aggregation Packet Type A
+  kFuA = 28 // Fragmentation Unit Type A
 };
 
 static const size_t kNalHeaderSize = 1;
 static const size_t kFuAHeaderSize = 2;
 static const size_t kLengthFieldSize = 2;
 
 // Bit masks for FU (A and B) indicators.
 enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
@@ -46,24 +48,48 @@ void ParseSingleNalu(RtpDepacketizer::Pa
   parsed_payload->type.Video.codec = kRtpVideoH264;
   parsed_payload->type.Video.isFirstPacket = true;
   RTPVideoHeaderH264* h264_header =
       &parsed_payload->type.Video.codecHeader.H264;
   h264_header->single_nalu = true;
   h264_header->stap_a = false;
 
   uint8_t nal_type = payload_data[0] & kTypeMask;
+  size_t offset = 0;
   if (nal_type == kStapA) {
-    nal_type = payload_data[3] & kTypeMask;
+    offset = 3;
+    if (offset >= payload_data_length) {
+      return; // XXX malformed
+    }
+    nal_type = payload_data[offset] & kTypeMask;
     h264_header->stap_a = true;
   }
 
+  // key frames start with SPS, PPS, IDR, or Recovery Point SEI
+  // Recovery Point SEI's are used in AIR and GDR refreshes, which don't
+  // send large iframes, and instead use forms of incremental/continuous refresh.
   switch (nal_type) {
+    case kSei: // check if it is a Recovery Point SEI (aka GDR)
+      if (offset+1 >= payload_data_length) {
+        return; // XXX malformed
+      }
+      if (payload_data[offset+1] != kSeiRecPt) {
+        parsed_payload->frame_type = kVideoFrameDelta;
+        break; // some other form of SEI - not a keyframe
+      }
+      // else fall through since GDR is like IDR
     case kSps:
     case kPps:
+      // These are always combined with other packets with the same timestamp...
+      // XXX To support 'solitary' SPS/PPS/etc, either fix the jitter buffer to
+      // accept multiple sessions with the same timestamp, or pass marker info
+      // down into here (SPS/PPS as a pair without an kIdr NALU would still be
+      // painful, but might work).
+      h264_header->single_nalu = false;
+      // fall through...
     case kIdr:
       parsed_payload->frame_type = kVideoFrameKey;
       break;
     default:
       parsed_payload->frame_type = kVideoFrameDelta;
       break;
   }
 }
--- a/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
@@ -459,22 +459,33 @@ int VCMSessionInfo::InsertPacket(const V
       break;
 
   // Check for duplicate packets.
   if (rit != packets_.rend() &&
       (*rit).seqNum == packet.seqNum && (*rit).sizeBytes > 0)
     return -2;
 
   if (packet.codec == kVideoCodecH264) {
-    frame_type_ = packet.frameType;
+    // H.264 can have leading or trailing non-VCL (Video Coding Layer)
+    // NALUs, such as SPS/PPS/SEI and others.  Also, the RTP marker bit is
+    // not reliable for the last packet of a frame (RFC 6184 5.1 - "Decoders
+    // [] MUST NOT rely on this property"), so allow out-of-order packets to
+    // update the first and last seq# range.  Also mark as a key frame if
+    // any packet is of that type.
+    if (frame_type_ != kVideoFrameKey) {
+      frame_type_ = packet.frameType;
+    }
     if (packet.isFirstPacket &&
         (first_packet_seq_num_ == -1 ||
          IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) {
       first_packet_seq_num_ = packet.seqNum;
     }
+    // Note: the code does *not* currently handle the Marker bit being totally
+    // absent from a frame.  It does not, however, depend on it being on the last
+    // packet of the 'frame'/'session'.
     if (packet.markerBit &&
         (last_packet_seq_num_ == -1 ||
          IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) {
       last_packet_seq_num_ = packet.seqNum;
     }
   } else {
     // Only insert media packets between first and last packets (when
     // available).
@@ -508,16 +519,19 @@ int VCMSessionInfo::InsertPacket(const V
     }
   }
 
   // The insert operation invalidates the iterator |rit|.
   PacketIterator packet_list_it = packets_.insert(rit.base(), packet);
 
   int returnLength = InsertBuffer(frame_buffer, packet_list_it);
   UpdateCompleteSession();
+  // We call MakeDecodable() before decoding, which removes packets after a loss
+  // (and which means h.264 mode 1 frames with a loss in the first packet will be
+  // totally removed)
   if (decode_error_mode == kWithErrors)
     decodable_ = true;
   else if (decode_error_mode == kSelectiveErrors)
     UpdateDecodableSession(frame_data);
   return returnLength;
 }
 
 void VCMSessionInfo::InformOfEmptyPacket(uint16_t seq_num) {
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -205,16 +205,22 @@
   },
   "CYCLE_COLLECTOR_ASYNC_SNOW_WHITE_FREEING": {
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "10000",
     "n_buckets": 50,
     "description": "Time spent on one asynchronous SnowWhite freeing (ms)"
   },
+  "DEVICE_RESET_REASON": {
+    "expires_in_version": "never",
+    "kind": "enumerated",
+    "n_values": 10,
+    "description": "GPU Device Reset Reason (ok, hung, removed, reset, internal error, invalid call)"
+  },
   "FORGET_SKIPPABLE_MAX": {
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "10000",
     "n_buckets": 50,
     "description": "Max time spent on one forget skippable (ms)"
   },
   "GC_REASON_2": {
--- a/widget/nsBaseWidget.cpp
+++ b/widget/nsBaseWidget.cpp
@@ -176,46 +176,48 @@ WidgetShutdownObserver::Observe(nsISuppo
 
 void
 nsBaseWidget::Shutdown()
 {
   DestroyCompositor();
   mShutdownObserver = nullptr;
 }
 
-static void DeferredDestroyCompositor(CompositorParent* aCompositorParent,
-                              CompositorChild* aCompositorChild)
+static void DeferredDestroyCompositor(nsRefPtr<CompositorParent> aCompositorParent,
+                                      nsRefPtr<CompositorChild> aCompositorChild)
 {
     // Bug 848949 needs to be fixed before
     // we can close the channel properly
     //aCompositorChild->Close();
-    aCompositorParent->Release();
-    aCompositorChild->Release();
 }
 
 void nsBaseWidget::DestroyCompositor()
 {
   if (mCompositorChild) {
-    mCompositorChild->SendWillStop();
-    mCompositorChild->Destroy();
+    nsRefPtr<CompositorChild> compositorChild = mCompositorChild.forget();
+    nsRefPtr<CompositorParent> compositorParent = mCompositorParent.forget();
+
+    compositorChild->SendWillStop();
+    // New LayerManager, CompositorParent and CompositorChild might be created
+    // as a result of internal GetLayerManager() call.
+    compositorChild->Destroy();
 
     // The call just made to SendWillStop can result in IPC from the
     // CompositorParent to the CompositorChild (e.g. caused by the destruction
     // of shared memory). We need to ensure this gets processed by the
     // CompositorChild before it gets destroyed. It suffices to ensure that
     // events already in the MessageLoop get processed before the
     // CompositorChild is destroyed, so we add a task to the MessageLoop to
     // handle compositor desctruction.
+
+    // The DefferedDestroyCompositor task takes ownership of compositorParent and
+    // will release them when it runs.
     MessageLoop::current()->PostTask(FROM_HERE,
-               NewRunnableFunction(DeferredDestroyCompositor, mCompositorParent,
-                                   mCompositorChild));
-    // The DestroyCompositor task we just added to the MessageLoop will handle
-    // releasing mCompositorParent and mCompositorChild.
-    unused << mCompositorParent.forget();
-    unused << mCompositorChild.forget();
+               NewRunnableFunction(DeferredDestroyCompositor, compositorParent,
+                                   compositorChild));
   }
 }
 
 //-------------------------------------------------------------------------
 //
 // nsBaseWidget destructor
 //
 //-------------------------------------------------------------------------
--- a/widget/tests/test_platform_colors.xul
+++ b/widget/tests/test_platform_colors.xul
@@ -50,17 +50,17 @@ var colors = {
   "threedshadow": ["rgb(224, 224, 224)"],
   "window": ["rgb(255, 255, 255)"],
   "windowframe": ["rgb(204, 204, 204)"],
   "windowtext": ["rgb(0, 0, 0)"],
   "-moz-activehyperlinktext": ["rgb(238, 0, 0)"],
   "-moz-buttondefault": ["rgb(220, 220, 220)"],
   "-moz-buttonhoverface": ["rgb(240, 240, 240)"],
   "-moz-buttonhovertext": ["rgb(0, 0, 0)"],
-  "-moz-cellhighlight": ["rgb(212, 212, 212)"],
+  "-moz-cellhighlight": ["rgb(212, 212, 212)", "rgb(220, 220, 220)"],
   "-moz-cellhighlighttext": ["rgb(0, 0, 0)"],
   "-moz-eventreerow": ["rgb(255, 255, 255)"],
   "-moz-field": ["rgb(255, 255, 255)"],
   "-moz-fieldtext": ["rgb(0, 0, 0)"],
   "-moz-dialog": ["rgb(232, 232, 232)"],
   "-moz-dialogtext": ["rgb(0, 0, 0)"],
   "-moz-dragtargetzone": ["rgb(199, 208, 218)", "rgb(198, 198, 198)", "rgb(180, 213, 255)", "rgb(250, 236, 115)", "rgb(255, 176, 139)", "rgb(255, 209, 129)", "rgb(194, 249, 144)", "rgb(232, 184, 255)"],
   "-moz-hyperlinktext": ["rgb(0, 0, 238)"],