merge mozilla-inbound to mozilla-central a=merge FIREFOX_AURORA_38_BASE
authorCarsten "Tomcat" Book <cbook@mozilla.com>
Mon, 23 Feb 2015 13:30:28 +0100
changeset 230283 98086da94ccdc88f6de86774ce3d1fa258dc7c44
parent 230266 627e0703a68ae745d60fb5fa4e18ec4bd5186dfb (current diff)
parent 230282 f7db65ae3c38a4ca3e2050745dd1b4e28b714e76 (diff)
child 230284 0ae0c8b511b552fd06db3e4ec144a0824743ce76
child 230293 ace95bf5bd7bc2e14daf9d46679f14d38d313467
child 230299 f4d73ea8f2264f8ffd03415960603313352ac384
child 230340 a69b59f490dbe58a975a2fd796d6c0a63e9516a2
push id14025
push usercbook@mozilla.com
push dateMon, 23 Feb 2015 12:47:32 +0000
treeherderb2g-inbound@0ae0c8b511b5 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmerge
milestone38.0a1
merge mozilla-inbound to mozilla-central a=merge
media/libvpx/vp8_rtcd_x86-win32-vs8.h
media/libvpx/vp8_rtcd_x86_64-win64-vs8.h
media/libvpx/vp9_rtcd_x86-win32-vs8.h
media/libvpx/vp9_rtcd_x86_64-win64-vs8.h
media/libvpx/vpx_config_x86-win32-vs8.asm
media/libvpx/vpx_config_x86-win32-vs8.h
media/libvpx/vpx_config_x86_64-win64-vs8.asm
media/libvpx/vpx_config_x86_64-win64-vs8.h
media/libvpx/vpx_scale_rtcd_x86-win32-vs8.h
media/libvpx/vpx_scale_rtcd_x86_64-win64-vs8.h
--- a/gfx/layers/d3d11/TextureD3D11.cpp
+++ b/gfx/layers/d3d11/TextureD3D11.cpp
@@ -820,17 +820,17 @@ SyncObjectD3D11::FinalizeFrame()
       gfxCriticalError() << "Failed to get KeyedMutex: " << hexa(hr);
       MOZ_CRASH();
     }
   }
 
   if (mD3D10SyncedTextures.size()) {
     RefPtr<IDXGIKeyedMutex> mutex;
     hr = mD3D10Texture->QueryInterface((IDXGIKeyedMutex**)byRef(mutex));
-    hr = mutex->AcquireSync(0, 10000);
+    hr = mutex->AcquireSync(0, 20000);
 
     if (hr == WAIT_TIMEOUT) {
       MOZ_CRASH();
     }
 
     D3D10_BOX box;
     box.front = box.top = box.left = 0;
     box.back = box.bottom = box.right = 1;
--- a/gfx/thebes/gfxPlatform.h
+++ b/gfx/thebes/gfxPlatform.h
@@ -154,16 +154,26 @@ GetBackendName(mozilla::gfx::BackendType
       case mozilla::gfx::BackendType::DIRECT2D1_1:
         return "direct2d 1.1";
       case mozilla::gfx::BackendType::NONE:
         return "none";
   }
   MOZ_CRASH("Incomplete switch");
 }
 
+enum class DeviceResetReason
+{
+  OK = 0,
+  HUNG,
+  REMOVED,
+  RESET,
+  DRIVER_ERROR,
+  INVALID_CALL
+};
+
 class gfxPlatform {
     friend class SRGBOverrideObserver;
 
 public:
     typedef mozilla::gfx::Color Color;
     typedef mozilla::gfx::DataSourceSurface DataSourceSurface;
     typedef mozilla::gfx::DrawTarget DrawTarget;
     typedef mozilla::gfx::IntSize IntSize;
@@ -430,17 +440,17 @@ public:
      * Whether to use the SIL Graphite rendering engine
      * (for fonts that include Graphite tables)
      */
     bool UseGraphiteShaping();
 
     // check whether format is supported on a platform or not (if unclear, returns true)
     virtual bool IsFontFormatSupported(nsIURI *aFontURI, uint32_t aFormatFlags) { return false; }
 
-    virtual bool DidRenderingDeviceReset() { return false; }
+    virtual bool DidRenderingDeviceReset(DeviceResetReason* aResetReason = nullptr) { return false; }
 
     void GetPrefFonts(nsIAtom *aLanguage, nsString& array, bool aAppendUnicode = true);
 
     // in some situations, need to make decisions about ambiguous characters, may need to look at multiple pref langs
     void GetLangPrefs(eFontPrefLang aPrefLangs[], uint32_t &aLen, eFontPrefLang aCharLang, eFontPrefLang aPageLang);
     
     /**
      * Iterate over pref fonts given a list of lang groups.  For a single lang
--- a/gfx/thebes/gfxTextRun.h
+++ b/gfx/thebes/gfxTextRun.h
@@ -98,48 +98,51 @@ public:
     }
 
     virtual ~gfxTextRun();
 
     typedef gfxFont::RunMetrics Metrics;
 
     // Public textrun API for general use
 
-    bool IsClusterStart(uint32_t aPos) {
+    bool IsClusterStart(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].IsClusterStart();
     }
-    bool IsLigatureGroupStart(uint32_t aPos) {
+    bool IsLigatureGroupStart(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].IsLigatureGroupStart();
     }
-    bool CanBreakLineBefore(uint32_t aPos) {
-        NS_ASSERTION(aPos < GetLength(), "aPos out of range");
-        return mCharacterGlyphs[aPos].CanBreakBefore() ==
-            CompressedGlyph::FLAG_BREAK_TYPE_NORMAL;
+    bool CanBreakLineBefore(uint32_t aPos) const {
+        return CanBreakBefore(aPos) == CompressedGlyph::FLAG_BREAK_TYPE_NORMAL;
     }
-    bool CanHyphenateBefore(uint32_t aPos) {
-        NS_ASSERTION(aPos < GetLength(), "aPos out of range");
-        return mCharacterGlyphs[aPos].CanBreakBefore() ==
-            CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
+    bool CanHyphenateBefore(uint32_t aPos) const {
+        return CanBreakBefore(aPos) == CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
     }
 
-    bool CharIsSpace(uint32_t aPos) {
+    // Returns a gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_* value
+    // as defined in gfxFont.h (may be NONE, NORMAL or HYPHEN).
+    uint8_t CanBreakBefore(uint32_t aPos) const {
+        NS_ASSERTION(aPos < GetLength(), "aPos out of range");
+        return mCharacterGlyphs[aPos].CanBreakBefore();
+    }
+
+    bool CharIsSpace(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsSpace();
     }
-    bool CharIsTab(uint32_t aPos) {
+    bool CharIsTab(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsTab();
     }
-    bool CharIsNewline(uint32_t aPos) {
+    bool CharIsNewline(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsNewline();
     }
-    bool CharIsLowSurrogate(uint32_t aPos) {
+    bool CharIsLowSurrogate(uint32_t aPos) const {
         NS_ASSERTION(aPos < GetLength(), "aPos out of range");
         return mCharacterGlyphs[aPos].CharIsLowSurrogate();
     }
 
     // All uint32_t aStart, uint32_t aLength ranges below are restricted to
     // grapheme cluster boundaries! All offsets are in terms of the string
     // passed into MakeTextRun.
     
--- a/gfx/thebes/gfxWindowsPlatform.cpp
+++ b/gfx/thebes/gfxWindowsPlatform.cpp
@@ -13,16 +13,17 @@
 #include "gfxWindowsSurface.h"
 
 #include "nsUnicharUtils.h"
 
 #include "mozilla/Preferences.h"
 #include "mozilla/WindowsVersion.h"
 #include "nsServiceManagerUtils.h"
 #include "nsTArray.h"
+#include "mozilla/Telemetry.h"
 
 #include "nsIWindowsRegKey.h"
 #include "nsIFile.h"
 #include "plbase64.h"
 #include "nsIXULRuntime.h"
 #include "imgLoader.h"
 
 #include "nsIGfxInfo.h"
@@ -469,17 +470,19 @@ gfxWindowsPlatform::GetDPIScale()
 
 void
 gfxWindowsPlatform::UpdateRenderMode()
 {
 /* Pick the default render mode for
  * desktop.
  */
     bool didReset = false;
-    if (DidRenderingDeviceReset()) {
+    DeviceResetReason resetReason = DeviceResetReason::OK;
+    if (DidRenderingDeviceReset(&resetReason)) {
+      Telemetry::Accumulate(Telemetry::DEVICE_RESET_REASON, uint32_t(resetReason));
       mD3D11DeviceInitialized = false;
       mD3D11Device = nullptr;
       mD3D11ContentDevice = nullptr;
       mAdapter = nullptr;
 
       imgLoader::Singleton()->ClearCache(true);
       imgLoader::Singleton()->ClearCache(false);
       Factory::SetDirect3D11Device(nullptr);
@@ -1135,20 +1138,45 @@ gfxWindowsPlatform::IsFontFormatSupporte
         return false;
     }
 
     // no format hint set, need to look at data
     return true;
 }
 
 bool
-gfxWindowsPlatform::DidRenderingDeviceReset()
+gfxWindowsPlatform::DidRenderingDeviceReset(DeviceResetReason* aResetReason)
 {
+  if (aResetReason) {
+    *aResetReason = DeviceResetReason::OK;
+  }
+
   if (mD3D11Device) {
-    if (mD3D11Device->GetDeviceRemovedReason() != S_OK) {
+    HRESULT hr = mD3D11Device->GetDeviceRemovedReason();
+    if (hr != S_OK) {
+      if (aResetReason) {
+        switch (hr) {
+        case DXGI_ERROR_DEVICE_HUNG:
+          *aResetReason = DeviceResetReason::HUNG;
+          break;
+        case DXGI_ERROR_DEVICE_REMOVED:
+          *aResetReason = DeviceResetReason::REMOVED;
+          break;
+        case DXGI_ERROR_DEVICE_RESET:
+          *aResetReason = DeviceResetReason::RESET;
+          break;
+        case DXGI_ERROR_DRIVER_INTERNAL_ERROR:
+          *aResetReason = DeviceResetReason::DRIVER_ERROR;
+          break;
+        case DXGI_ERROR_INVALID_CALL:
+          *aResetReason = DeviceResetReason::INVALID_CALL;
+        default:
+          MOZ_ASSERT(false);
+        }
+      }
       return true;
     }
   }
   if (mD3D11ContentDevice) {
     if (mD3D11ContentDevice->GetDeviceRemovedReason() != S_OK) {
       return true;
     }
   }
--- a/gfx/thebes/gfxWindowsPlatform.h
+++ b/gfx/thebes/gfxWindowsPlatform.h
@@ -205,17 +205,17 @@ public:
                                            const uint8_t* aFontData,
                                            uint32_t aLength);
 
     /**
      * Check whether format is supported on a platform or not (if unclear, returns true)
      */
     virtual bool IsFontFormatSupported(nsIURI *aFontURI, uint32_t aFormatFlags);
 
-    virtual bool DidRenderingDeviceReset();
+    virtual bool DidRenderingDeviceReset(DeviceResetReason* aResetReason = nullptr);
 
     // ClearType is not always enabled even when available (e.g. Windows XP)
     // if either of these prefs are enabled and apply, use ClearType rendering
     bool UseClearTypeForDownloadableFonts();
     bool UseClearTypeAlways();
 
     static void GetDLLVersion(char16ptr_t aDLLPath, nsAString& aVersion);
 
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -7801,27 +7801,28 @@ CodeGenerator::visitGetNameCache(LGetNam
     TypedOrValueRegister output(GetValueOutput(ins));
     bool isTypeOf = ins->mir()->accessKind() != MGetNameCache::NAME;
 
     NameIC cache(liveRegs, isTypeOf, scopeChain, ins->mir()->name(), output);
     cache.setProfilerLeavePC(ins->mir()->profilerLeavePc());
     addCache(ins, allocateCache(cache));
 }
 
-typedef bool (*NameICFn)(JSContext *, size_t, HandleObject, MutableHandleValue);
+typedef bool (*NameICFn)(JSContext *, HandleScript, size_t, HandleObject, MutableHandleValue);
 const VMFunction NameIC::UpdateInfo = FunctionInfo<NameICFn>(NameIC::update);
 
 void
 CodeGenerator::visitNameIC(OutOfLineUpdateCache *ool, DataPtr<NameIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->scopeChainReg());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(NameIC::UpdateInfo, lir);
     StoreValueTo(ic->outputReg()).generate(this);
     restoreLiveIgnore(lir, StoreValueTo(ic->outputReg()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 void
@@ -7878,19 +7879,19 @@ CodeGenerator::visitGetPropertyCacheT(LG
     PropertyName *name = ins->mir()->name();
     bool monitoredResult = ins->mir()->monitoredResult();
     TypedOrValueRegister output(ins->mir()->type(), ToAnyRegister(ins->getDef(0)));
 
     addGetPropertyCache(ins, liveRegs, objReg, name, output, monitoredResult,
                         ins->mir()->profilerLeavePc());
 }
 
-typedef bool (*GetPropertyICFn)(JSContext *, size_t, HandleObject, MutableHandleValue);
-const VMFunction GetPropertyIC::UpdateInfo =
-    FunctionInfo<GetPropertyICFn>(GetPropertyIC::update);
+typedef bool (*GetPropertyICFn)(JSContext *, HandleScript, size_t, HandleObject,
+                                MutableHandleValue);
+const VMFunction GetPropertyIC::UpdateInfo = FunctionInfo<GetPropertyICFn>(GetPropertyIC::update);
 
 void
 CodeGenerator::visitGetPropertyIC(OutOfLineUpdateCache *ool, DataPtr<GetPropertyIC> &ic)
 {
     LInstruction *lir = ool->lir();
 
     if (ic->idempotent()) {
         size_t numLocs;
@@ -7898,16 +7899,17 @@ CodeGenerator::visitGetPropertyIC(OutOfL
         size_t locationBase = addCacheLocations(cacheLocs, &numLocs);
         ic->setLocationInfo(locationBase, numLocs);
     }
 
     saveLive(lir);
 
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(GetPropertyIC::UpdateInfo, lir);
     StoreValueTo(ic->output()).generate(this);
     restoreLiveIgnore(lir, StoreValueTo(ic->output()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 void
@@ -7940,29 +7942,30 @@ CodeGenerator::visitGetElementCacheT(LGe
     ConstantOrRegister index = TypedOrValueRegister(MIRType_Int32, ToAnyRegister(ins->index()));
     TypedOrValueRegister output(ins->mir()->type(), ToAnyRegister(ins->output()));
     const MGetElementCache *mir = ins->mir();
 
     addGetElementCache(ins, obj, index, output, mir->monitoredResult(),
                        mir->allowDoubleResult(), mir->profilerLeavePc());
 }
 
-typedef bool (*GetElementICFn)(JSContext *, size_t, HandleObject, HandleValue, MutableHandleValue);
-const VMFunction GetElementIC::UpdateInfo =
-    FunctionInfo<GetElementICFn>(GetElementIC::update);
+typedef bool (*GetElementICFn)(JSContext *, HandleScript, size_t, HandleObject, HandleValue,
+                               MutableHandleValue);
+const VMFunction GetElementIC::UpdateInfo = FunctionInfo<GetElementICFn>(GetElementIC::update);
 
 void
 CodeGenerator::visitGetElementIC(OutOfLineUpdateCache *ool, DataPtr<GetElementIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->index());
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(GetElementIC::UpdateInfo, lir);
     StoreValueTo(ic->output()).generate(this);
     restoreLiveIgnore(lir, StoreValueTo(ic->output()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 void
@@ -7997,30 +8000,31 @@ CodeGenerator::visitSetElementCacheT(LSe
     else
         value = TypedOrValueRegister(ins->mir()->value()->type(), ToAnyRegister(tmp));
 
     addSetElementCache(ins, obj, unboxIndex, temp, tempDouble, tempFloat32, index, value,
                        ins->mir()->strict(), ins->mir()->guardHoles(),
                        ins->mir()->profilerLeavePc());
 }
 
-typedef bool (*SetElementICFn)(JSContext *, size_t, HandleObject, HandleValue, HandleValue);
-const VMFunction SetElementIC::UpdateInfo =
-    FunctionInfo<SetElementICFn>(SetElementIC::update);
+typedef bool (*SetElementICFn)(JSContext *, HandleScript, size_t, HandleObject, HandleValue,
+                               HandleValue);
+const VMFunction SetElementIC::UpdateInfo = FunctionInfo<SetElementICFn>(SetElementIC::update);
 
 void
 CodeGenerator::visitSetElementIC(OutOfLineUpdateCache *ool, DataPtr<SetElementIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->value());
     pushArg(ic->index());
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(SetElementIC::UpdateInfo, lir);
     restoreLive(lir);
 
     masm.jump(ool->rejoin());
 }
 
 void
 CodeGenerator::visitBindNameCache(LBindNameCache *ins)
@@ -8028,28 +8032,28 @@ CodeGenerator::visitBindNameCache(LBindN
     Register scopeChain = ToRegister(ins->scopeChain());
     Register output = ToRegister(ins->output());
     BindNameIC cache(scopeChain, ins->mir()->name(), output);
     cache.setProfilerLeavePC(ins->mir()->profilerLeavePc());
 
     addCache(ins, allocateCache(cache));
 }
 
-typedef JSObject *(*BindNameICFn)(JSContext *, size_t, HandleObject);
-const VMFunction BindNameIC::UpdateInfo =
-    FunctionInfo<BindNameICFn>(BindNameIC::update);
+typedef JSObject *(*BindNameICFn)(JSContext *, HandleScript, size_t, HandleObject);
+const VMFunction BindNameIC::UpdateInfo = FunctionInfo<BindNameICFn>(BindNameIC::update);
 
 void
 CodeGenerator::visitBindNameIC(OutOfLineUpdateCache *ool, DataPtr<BindNameIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->scopeChainReg());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(BindNameIC::UpdateInfo, lir);
     StoreRegisterTo(ic->outputReg()).generate(this);
     restoreLiveIgnore(lir, StoreRegisterTo(ic->outputReg()).clobbered());
 
     masm.jump(ool->rejoin());
 }
 
 typedef bool (*SetPropertyFn)(JSContext *, HandleObject,
@@ -8133,29 +8137,29 @@ CodeGenerator::visitSetPropertyCacheT(LS
     else
         value = TypedOrValueRegister(ins->valueType(), ToAnyRegister(ins->getOperand(1)));
 
     addSetPropertyCache(ins, liveRegs, objReg, ins->mir()->name(), value,
                         ins->mir()->strict(), ins->mir()->needsTypeBarrier(),
                         ins->mir()->profilerLeavePc());
 }
 
-typedef bool (*SetPropertyICFn)(JSContext *, size_t, HandleObject, HandleValue);
-const VMFunction SetPropertyIC::UpdateInfo =
-    FunctionInfo<SetPropertyICFn>(SetPropertyIC::update);
+typedef bool (*SetPropertyICFn)(JSContext *, HandleScript, size_t, HandleObject, HandleValue);
+const VMFunction SetPropertyIC::UpdateInfo = FunctionInfo<SetPropertyICFn>(SetPropertyIC::update);
 
 void
 CodeGenerator::visitSetPropertyIC(OutOfLineUpdateCache *ool, DataPtr<SetPropertyIC> &ic)
 {
     LInstruction *lir = ool->lir();
     saveLive(lir);
 
     pushArg(ic->value());
     pushArg(ic->object());
     pushArg(Imm32(ool->getCacheIndex()));
+    pushArg(ImmGCPtr(gen->info().script()));
     callVM(SetPropertyIC::UpdateInfo, lir);
     restoreLive(lir);
 
     masm.jump(ool->rejoin());
 }
 
 typedef bool (*ThrowFn)(JSContext *, HandleValue);
 static const VMFunction ThrowInfoCodeGen = FunctionInfo<ThrowFn>(js::Throw);
--- a/js/src/jit/IonCaches.cpp
+++ b/js/src/jit/IonCaches.cpp
@@ -466,16 +466,30 @@ IonCache::updateBaseAddress(JitCode *cod
     fallbackLabel_.repoint(code, &masm);
 }
 
 void
 IonCache::initializeAddCacheState(LInstruction *ins, AddCacheState *addState)
 {
 }
 
+static void *
+GetReturnAddressToIonCode(JSContext *cx)
+{
+    JitFrameIterator iter(cx);
+    MOZ_ASSERT(iter.type() == JitFrame_Exit);
+
+    void *returnAddr = iter.returnAddress();
+#ifdef DEBUG
+    ++iter;
+    MOZ_ASSERT(iter.isIonJS());
+#endif
+    return returnAddr;
+}
+
 static void
 GeneratePrototypeGuards(JSContext *cx, IonScript *ion, MacroAssembler &masm, JSObject *obj,
                         JSObject *holder, Register objectReg, Register scratchReg,
                         Label *failures)
 {
     /*
      * The guards here protect against the effects of JSObject::swap(). If the prototype chain
      * is directly altered, then TI will toss the jitcode, so we don't have to worry about
@@ -1238,17 +1252,17 @@ IsCacheableArrayLength(JSContext *cx, Ha
         return false;
     }
 
     return true;
 }
 
 template <class GetPropCache>
 static GetPropertyIC::NativeGetPropCacheability
-CanAttachNativeGetProp(typename GetPropCache::Context cx, const GetPropCache &cache,
+CanAttachNativeGetProp(JSContext *cx, const GetPropCache &cache,
                        HandleObject obj, HandlePropertyName name,
                        MutableHandleNativeObject holder, MutableHandleShape shape,
                        bool skipArrayLen = false)
 {
     if (!obj)
         return GetPropertyIC::CanAttachNone;
 
     // The lookup needs to be universally pure, otherwise we risk calling hooks out
@@ -1306,17 +1320,17 @@ CanAttachNativeGetProp(typename GetPropC
         // effectful. This is handled by allowGetters()
         return GetPropertyIC::CanAttachCallGetter;
     }
 
     return GetPropertyIC::CanAttachNone;
 }
 
 bool
-GetPropertyIC::allowArrayLength(Context cx, HandleObject obj) const
+GetPropertyIC::allowArrayLength(JSContext *cx, HandleObject obj) const
 {
     if (!idempotent())
         return true;
 
     uint32_t locationIndex, numLocations;
     getLocationInfo(&locationIndex, &numLocations);
 
     IonScript *ion = GetTopJitJSScript(cx)->ionScript();
@@ -1809,27 +1823,28 @@ GetPropertyIC::tryAttachArgumentsLength(
 
     MOZ_ASSERT(!hasNormalArgumentsLengthStub_);
     hasNormalArgumentsLengthStub_ = true;
     return linkAndAttachStub(cx, masm, attacher, ion, "ArgsObj length (normal)");
 }
 
 bool
 GetPropertyIC::tryAttachStub(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                             HandleObject obj, HandlePropertyName name,
-                             void *returnAddr, bool *emitted)
+                             HandleObject obj, HandlePropertyName name, bool *emitted)
 {
     MOZ_ASSERT(!*emitted);
 
     if (!canAttachStub())
         return true;
 
     if (!*emitted && !tryAttachArgumentsLength(cx, outerScript, ion, obj, name, emitted))
         return false;
 
+    void *returnAddr = GetReturnAddressToIonCode(cx);
+
     if (!*emitted && !tryAttachProxy(cx, outerScript, ion, obj, name, returnAddr, emitted))
         return false;
 
     if (!*emitted && !tryAttachNative(cx, outerScript, ion, obj, name, returnAddr, emitted))
         return false;
 
     if (!*emitted && !tryAttachUnboxed(cx, outerScript, ion, obj, name, returnAddr, emitted))
         return false;
@@ -1839,38 +1854,36 @@ GetPropertyIC::tryAttachStub(JSContext *
 
     if (!*emitted)
         JitSpew(JitSpew_IonIC, "Failed to attach GETPROP cache");
 
     return true;
 }
 
 /* static */ bool
-GetPropertyIC::update(JSContext *cx, size_t cacheIndex,
+GetPropertyIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
                       HandleObject obj, MutableHandleValue vp)
 {
-    void *returnAddr;
-    RootedScript outerScript(cx, GetTopJitJSScript(cx, &returnAddr));
     IonScript *ion = outerScript->ionScript();
 
     GetPropertyIC &cache = ion->getCache(cacheIndex).toGetProperty();
     RootedPropertyName name(cx, cache.name());
 
     // Override the return value if we are invalidated (bug 728188).
     AutoDetectInvalidation adi(cx, vp, ion);
 
     // If the cache is idempotent, we will redo the op in the interpreter.
     if (cache.idempotent())
         adi.disable();
 
     // For now, just stop generating new stubs once we hit the stub count
     // limit. Once we can make calls from within generated stubs, a new call
     // stub will be generated instead and the previous stubs unlinked.
     bool emitted = false;
-    if (!cache.tryAttachStub(cx, outerScript, ion, obj, name, returnAddr, &emitted))
+    if (!cache.tryAttachStub(cx, outerScript, ion, obj, name, &emitted))
         return false;
 
     if (cache.idempotent() && !emitted) {
         // Invalidate the cache if the property was not found, or was found on
         // a non-native object. This ensures:
         // 1) The property read has no observable side-effects.
         // 2) There's no need to dynamically monitor the return type. This would
         //    be complicated since (due to GVN) there can be multiple pc's
@@ -2928,88 +2941,88 @@ CanAttachSetUnboxed(JSContext *cx, Handl
         *unboxedType = property->type;
         return true;
     }
 
     return false;
 }
 
 bool
-SetPropertyIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj,
+SetPropertyIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
                       HandleValue value)
 {
-    void *returnAddr;
-    RootedScript script(cx, GetTopJitJSScript(cx, &returnAddr));
-    IonScript *ion = script->ionScript();
+    IonScript *ion = outerScript->ionScript();
     SetPropertyIC &cache = ion->getCache(cacheIndex).toSetProperty();
     RootedPropertyName name(cx, cache.name());
     RootedId id(cx, AtomToId(name));
 
     RootedObjectGroup oldGroup(cx, obj->getGroup(cx));
     if (!oldGroup)
         return false;
 
     // Stop generating new stubs once we hit the stub count limit, see
     // GetPropertyCache.
     NativeSetPropCacheability canCache = CanAttachNone;
     bool addedSetterStub = false;
     if (cache.canAttachStub() && !obj->watched()) {
         if (!addedSetterStub && obj->is<ProxyObject>()) {
+            void *returnAddr = GetReturnAddressToIonCode(cx);
             if (IsCacheableDOMProxy(obj)) {
                 DOMProxyShadowsResult shadows = GetDOMProxyShadowsCheck()(cx, obj, id);
                 if (shadows == ShadowCheckFailed)
                     return false;
                 if (shadows == Shadows) {
-                    if (!cache.attachDOMProxyShadowed(cx, script, ion, obj, returnAddr))
+                    if (!cache.attachDOMProxyShadowed(cx, outerScript, ion, obj, returnAddr))
                         return false;
                     addedSetterStub = true;
                 } else {
                     MOZ_ASSERT(shadows == DoesntShadow || shadows == DoesntShadowUnique);
                     if (shadows == DoesntShadowUnique)
                         cache.reset();
-                    if (!cache.attachDOMProxyUnshadowed(cx, script, ion, obj, returnAddr))
+                    if (!cache.attachDOMProxyUnshadowed(cx, outerScript, ion, obj, returnAddr))
                         return false;
                     addedSetterStub = true;
                 }
             }
 
             if (!addedSetterStub && !cache.hasGenericProxyStub()) {
-                if (!cache.attachGenericProxy(cx, script, ion, returnAddr))
+                if (!cache.attachGenericProxy(cx, outerScript, ion, returnAddr))
                     return false;
                 addedSetterStub = true;
             }
         }
 
         RootedShape shape(cx);
         RootedObject holder(cx);
         bool checkTypeset;
         canCache = CanAttachNativeSetProp(cx, obj, id, cache.value(), cache.needsTypeBarrier(),
                                           &holder, &shape, &checkTypeset);
 
         if (!addedSetterStub && canCache == CanAttachSetSlot) {
             RootedNativeObject nobj(cx, &obj->as<NativeObject>());
-            if (!cache.attachSetSlot(cx, script, ion, nobj, shape, checkTypeset))
+            if (!cache.attachSetSlot(cx, outerScript, ion, nobj, shape, checkTypeset))
                 return false;
             addedSetterStub = true;
         }
 
         if (!addedSetterStub && canCache == CanAttachCallSetter) {
-            if (!cache.attachCallSetter(cx, script, ion, obj, holder, shape, returnAddr))
+            void *returnAddr = GetReturnAddressToIonCode(cx);
+            if (!cache.attachCallSetter(cx, outerScript, ion, obj, holder, shape, returnAddr))
                 return false;
             addedSetterStub = true;
         }
 
         checkTypeset = false;
         uint32_t unboxedOffset;
         JSValueType unboxedType;
         if (!addedSetterStub && CanAttachSetUnboxed(cx, obj, id, cache.value(),
                                                     cache.needsTypeBarrier(),
                                                     &checkTypeset, &unboxedOffset, &unboxedType))
         {
-            if (!cache.attachSetUnboxed(cx, script, ion, obj, id, unboxedOffset, unboxedType,
+            if (!cache.attachSetUnboxed(cx, outerScript, ion, obj, id, unboxedOffset, unboxedType,
                                         checkTypeset))
             {
                 return false;
             }
             addedSetterStub = true;
         }
     }
 
@@ -3023,17 +3036,17 @@ SetPropertyIC::update(JSContext *cx, siz
     // The property did not exist before, now we can try to inline the property add.
     bool checkTypeset;
     if (!addedSetterStub && canCache == MaybeCanAttachAddSlot &&
         IsPropertyAddInlineable(&obj->as<NativeObject>(), id,
                                 cache.value(), oldSlots, oldShape, cache.needsTypeBarrier(),
                                 &checkTypeset))
     {
         RootedNativeObject nobj(cx, &obj->as<NativeObject>());
-        if (!cache.attachAddSlot(cx, script, ion, nobj, oldShape, oldGroup, checkTypeset))
+        if (!cache.attachAddSlot(cx, outerScript, ion, nobj, oldShape, oldGroup, checkTypeset))
             return false;
         addedSetterStub = true;
     }
 
     if (!addedSetterStub)
         JitSpew(JitSpew_IonIC, "Failed to attach SETPROP cache");
 
     return true;
@@ -3069,18 +3082,17 @@ EqualStringsHelper(JSString *str1, JSStr
     if (!str2Linear)
         return false;
 
     return EqualChars(&str1->asLinear(), str2Linear);
 }
 
 bool
 GetElementIC::attachGetProp(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                            HandleObject obj, const Value &idval, HandlePropertyName name,
-                            void *returnAddr)
+                            HandleObject obj, const Value &idval, HandlePropertyName name)
 {
     MOZ_ASSERT(index().reg().hasValue());
 
     RootedNativeObject holder(cx);
     RootedShape shape(cx);
 
     GetPropertyIC::NativeGetPropCacheability canCache =
         CanAttachNativeGetProp(cx, *this, obj, name, &holder, &shape,
@@ -3150,17 +3162,19 @@ GetElementIC::attachGetProp(JSContext *c
     masm.bind(&equal);
 
     RepatchStubAppender attacher(*this);
     if (canCache == GetPropertyIC::CanAttachReadSlot) {
         GenerateReadSlot(cx, ion, masm, attacher, obj, holder, shape, object(), output(),
                          &failures);
     } else {
         MOZ_ASSERT(canCache == GetPropertyIC::CanAttachCallGetter);
+
         // Set the frame for bailout safety of the OOL call.
+        void *returnAddr = GetReturnAddressToIonCode(cx);
         if (!GenerateCallGetter(cx, ion, masm, attacher, obj, name, holder, shape, liveRegs_,
                                 object(), output(), returnAddr, &failures))
         {
             return false;
         }
     }
 
     return linkAndAttachStub(cx, masm, attacher, ion, "property");
@@ -3501,21 +3515,19 @@ GetElementIC::attachArgumentsElement(JSC
     }
 
     MOZ_ASSERT(!hasNormalArgumentsStub_);
     hasNormalArgumentsStub_ = true;
     return linkAndAttachStub(cx, masm, attacher, ion, "ArgsObj element (normal)");
 }
 
 bool
-GetElementIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj,
+GetElementIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
                      HandleValue idval, MutableHandleValue res)
 {
-    void *returnAddr;
-    RootedScript outerScript(cx, GetTopJitJSScript(cx, &returnAddr));
     IonScript *ion = outerScript->ionScript();
     GetElementIC &cache = ion->getCache(cacheIndex).toGetElement();
     RootedScript script(cx);
     jsbytecode *pc;
     cache.getScriptedLocation(&script, &pc);
 
     // Override the return value when the script is invalidated (bug 728188).
     AutoDetectInvalidation adi(cx, res, ion);
@@ -3542,17 +3554,17 @@ GetElementIC::update(JSContext *cx, size
             (cache.output().hasValue() || !cache.output().typedReg().isFloat()))
         {
             if (!cache.attachArgumentsElement(cx, outerScript, ion, obj))
                 return false;
             attachedStub = true;
         }
         if (!attachedStub && cache.monitoredResult() && canAttachGetProp(obj, idval, id)) {
             RootedPropertyName name(cx, JSID_TO_ATOM(id)->asPropertyName());
-            if (!cache.attachGetProp(cx, outerScript, ion, obj, idval, name, returnAddr))
+            if (!cache.attachGetProp(cx, outerScript, ion, obj, idval, name))
                 return false;
             attachedStub = true;
         }
         if (!attachedStub && !cache.hasDenseStub() && canAttachDenseElement(obj, idval)) {
             if (!cache.attachDenseElement(cx, outerScript, ion, obj, idval))
                 return false;
             attachedStub = true;
         }
@@ -3892,20 +3904,19 @@ SetElementIC::attachTypedArrayElement(JS
     {
         return false;
     }
 
     return linkAndAttachStub(cx, masm, attacher, ion, "typed array");
 }
 
 bool
-SetElementIC::update(JSContext *cx, size_t cacheIndex, HandleObject obj,
+SetElementIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
                      HandleValue idval, HandleValue value)
 {
-    RootedScript outerScript(cx, GetTopJitJSScript(cx));
     IonScript *ion = outerScript->ionScript();
     SetElementIC &cache = ion->getCache(cacheIndex).toSetElement();
 
     bool attachedStub = false;
     if (cache.canAttachStub()) {
         if (!cache.hasDenseStub() && IsDenseElementSetInlineable(obj, idval)) {
             if (!cache.attachDenseElement(cx, outerScript, ion, obj, idval))
                 return false;
@@ -4061,19 +4072,19 @@ IsCacheableNonGlobalScopeChain(JSObject 
             return false;
         }
     }
 
     MOZ_CRASH("Invalid scope chain");
 }
 
 JSObject *
-BindNameIC::update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain)
+BindNameIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
+                   HandleObject scopeChain)
 {
-    RootedScript outerScript(cx, GetTopJitJSScript(cx));
     IonScript *ion = outerScript->ionScript();
     BindNameIC &cache = ion->getCache(cacheIndex).toBindName();
     HandlePropertyName name = cache.name();
 
     RootedObject holder(cx);
     if (scopeChain->is<GlobalObject>()) {
         holder = scopeChain;
     } else {
@@ -4211,21 +4222,19 @@ IsCacheableNameCallGetter(HandleObject s
         return false;
 
     return IsCacheableGetPropCallNative(obj, holder, shape) ||
         IsCacheableGetPropCallPropertyOp(obj, holder, shape) ||
         IsCacheableGetPropCallScripted(obj, holder, shape);
 }
 
 bool
-NameIC::update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain,
+NameIC::update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject scopeChain,
                MutableHandleValue vp)
 {
-    void *returnAddr;
-    RootedScript outerScript(cx, GetTopJitJSScript(cx, &returnAddr));
     IonScript *ion = outerScript->ionScript();
 
     NameIC &cache = ion->getCache(cacheIndex).toName();
     RootedPropertyName name(cx, cache.name());
 
     RootedScript script(cx);
     jsbytecode *pc;
     cache.getScriptedLocation(&script, &pc);
@@ -4248,18 +4257,22 @@ NameIC::update(JSContext *cx, size_t cac
     if (cache.canAttachStub()) {
         if (IsCacheableNameReadSlot(scopeChain, obj, holder, shape, pc, cache.outputReg())) {
             if (!cache.attachReadSlot(cx, outerScript, ion, scopeChain, obj,
                                       holder.as<NativeObject>(), shape))
             {
                 return false;
             }
         } else if (IsCacheableNameCallGetter(scopeChain, obj, holder, shape)) {
-            if (!cache.attachCallGetter(cx, outerScript, ion, scopeChain, obj, holder, shape, returnAddr))
+            void *returnAddr = GetReturnAddressToIonCode(cx);
+            if (!cache.attachCallGetter(cx, outerScript, ion, scopeChain, obj, holder, shape,
+                                        returnAddr))
+            {
                 return false;
+            }
         }
     }
 
     // Monitor changes to cache entry.
     TypeScript::Monitor(cx, script, pc, vp);
 
     return true;
 }
--- a/js/src/jit/IonCaches.h
+++ b/js/src/jit/IonCaches.h
@@ -633,26 +633,24 @@ class GetPropertyIC : public RepatchIonC
     enum NativeGetPropCacheability {
         CanAttachNone,
         CanAttachReadSlot,
         CanAttachArrayLength,
         CanAttachCallGetter
     };
 
     // Helpers for CanAttachNativeGetProp
-    typedef JSContext * Context;
-    bool allowArrayLength(Context cx, HandleObject obj) const;
+    bool allowArrayLength(JSContext *cx, HandleObject obj) const;
     bool allowGetters() const {
         return monitoredResult() && !idempotent();
     }
 
     // Attach the proper stub, if possible
     bool tryAttachStub(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                       HandleObject obj, HandlePropertyName name,
-                       void *returnAddr, bool *emitted);
+                       HandleObject obj, HandlePropertyName name, bool *emitted);
 
     bool tryAttachProxy(JSContext *cx, HandleScript outerScript, IonScript *ion,
                         HandleObject obj, HandlePropertyName name,
                         void *returnAddr, bool *emitted);
 
     bool tryAttachGenericProxy(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                HandleObject obj, HandlePropertyName name,
                                void *returnAddr, bool *emitted);
@@ -673,17 +671,18 @@ class GetPropertyIC : public RepatchIonC
                           void *returnAddr, bool *emitted);
 
     bool tryAttachTypedArrayLength(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                    HandleObject obj, HandlePropertyName name, bool *emitted);
 
     bool tryAttachArgumentsLength(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                   HandleObject obj, HandlePropertyName name, bool *emitted);
 
-    static bool update(JSContext *cx, size_t cacheIndex, HandleObject obj, MutableHandleValue vp);
+    static bool update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
+                       HandleObject obj, MutableHandleValue vp);
 };
 
 class SetPropertyIC : public RepatchIonCache
 {
   protected:
     // Registers live after the cache, excluding output registers. The initial
     // value of these registers must be preserved by the cache.
     RegisterSet liveRegs_;
@@ -759,17 +758,18 @@ class SetPropertyIC : public RepatchIonC
                             void *returnAddr);
 
     bool attachDOMProxyShadowed(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                 HandleObject obj, void *returnAddr);
 
     bool attachDOMProxyUnshadowed(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                   HandleObject obj, void *returnAddr);
 
-    static bool update(JSContext *cx, size_t cacheIndex, HandleObject obj, HandleValue value);
+    static bool update(JSContext *cx, HandleScript outerScript, size_t cacheIndex,
+                       HandleObject obj, HandleValue value);
 };
 
 class GetElementIC : public RepatchIonCache
 {
   protected:
     RegisterSet liveRegs_;
 
     Register object_;
@@ -841,31 +841,30 @@ class GetElementIC : public RepatchIonCa
     }
 
     static bool canAttachGetProp(JSObject *obj, const Value &idval, jsid id);
     static bool canAttachDenseElement(JSObject *obj, const Value &idval);
     static bool canAttachTypedArrayElement(JSObject *obj, const Value &idval,
                                            TypedOrValueRegister output);
 
     bool attachGetProp(JSContext *cx, HandleScript outerScript, IonScript *ion,
-                       HandleObject obj, const Value &idval, HandlePropertyName name,
-                       void *returnAddr);
+                       HandleObject obj, const Value &idval, HandlePropertyName name);
 
     bool attachDenseElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                             HandleObject obj, const Value &idval);
 
     bool attachTypedArrayElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                  HandleObject tarr, const Value &idval);
 
     bool attachArgumentsElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                 HandleObject obj);
 
     static bool
-    update(JSContext *cx, size_t cacheIndex, HandleObject obj, HandleValue idval,
-           MutableHandleValue vp);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
+           HandleValue idval, MutableHandleValue vp);
 
     void incFailedUpdates() {
         failedUpdates_++;
     }
     void resetFailedUpdates() {
         failedUpdates_ = 0;
     }
     bool shouldDisable() const {
@@ -949,18 +948,18 @@ class SetElementIC : public RepatchIonCa
 
     bool attachDenseElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                             HandleObject obj, const Value &idval);
 
     bool attachTypedArrayElement(JSContext *cx, HandleScript outerScript, IonScript *ion,
                                  HandleObject tarr);
 
     static bool
-    update(JSContext *cx, size_t cacheIndex, HandleObject obj, HandleValue idval,
-           HandleValue value);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject obj,
+           HandleValue idval, HandleValue value);
 };
 
 class BindNameIC : public RepatchIonCache
 {
   protected:
     Register scopeChain_;
     PropertyName *name_;
     Register output_;
@@ -987,17 +986,17 @@ class BindNameIC : public RepatchIonCach
 
     bool attachGlobal(JSContext *cx, HandleScript outerScript, IonScript *ion,
                       HandleObject scopeChain);
 
     bool attachNonGlobal(JSContext *cx, HandleScript outerScript, IonScript *ion,
                          HandleObject scopeChain, HandleObject holder);
 
     static JSObject *
-    update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject scopeChain);
 };
 
 class NameIC : public RepatchIonCache
 {
   protected:
     // Registers live after the cache, excluding output registers. The initial
     // value of these registers must be preserved by the cache.
     RegisterSet liveRegs_;
@@ -1038,17 +1037,18 @@ class NameIC : public RepatchIonCache
                         HandleObject scopeChain, HandleObject holderBase,
                         HandleNativeObject holder, HandleShape shape);
 
     bool attachCallGetter(JSContext *cx, HandleScript outerScript, IonScript *ion,
                           HandleObject scopeChain, HandleObject obj, HandleObject holder,
                           HandleShape shape, void *returnAddr);
 
     static bool
-    update(JSContext *cx, size_t cacheIndex, HandleObject scopeChain, MutableHandleValue vp);
+    update(JSContext *cx, HandleScript outerScript, size_t cacheIndex, HandleObject scopeChain,
+           MutableHandleValue vp);
 };
 
 #undef CACHE_HEADER
 
 // Implement cache casts now that the compiler can see the inheritance.
 #define CACHE_CASTS(ickind)                                             \
     ickind##IC &IonCache::to##ickind()                                  \
     {                                                                   \
--- a/js/src/jit/JitFrames.h
+++ b/js/src/jit/JitFrames.h
@@ -284,26 +284,22 @@ void UpdateJitActivationsForMinorGC(JSRu
 static inline uint32_t
 MakeFrameDescriptor(uint32_t frameSize, FrameType type)
 {
     return (frameSize << FRAMESIZE_SHIFT) | type;
 }
 
 // Returns the JSScript associated with the topmost JIT frame.
 inline JSScript *
-GetTopJitJSScript(JSContext *cx, void **returnAddrOut = nullptr)
+GetTopJitJSScript(JSContext *cx)
 {
     JitFrameIterator iter(cx);
     MOZ_ASSERT(iter.type() == JitFrame_Exit);
     ++iter;
 
-    MOZ_ASSERT(iter.returnAddressToFp() != nullptr);
-    if (returnAddrOut)
-        *returnAddrOut = (void *) iter.returnAddressToFp();
-
     if (iter.isBaselineStub()) {
         ++iter;
         MOZ_ASSERT(iter.isBaselineJS());
     }
 
     MOZ_ASSERT(iter.isScripted());
     return iter.script();
 }
--- a/js/src/jit/VMFunctions.cpp
+++ b/js/src/jit/VMFunctions.cpp
@@ -31,20 +31,19 @@ using namespace js::jit;
 
 namespace js {
 namespace jit {
 
 // Don't explicitly initialize, it's not guaranteed that this initializer will
 // run before the constructors for static VMFunctions.
 /* static */ VMFunction *VMFunction::functions;
 
-AutoDetectInvalidation::AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval,
-                                               IonScript *ionScript)
+AutoDetectInvalidation::AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval)
   : cx_(cx),
-    ionScript_(ionScript ? ionScript : GetTopJitJSScript(cx)->ionScript()),
+    ionScript_(GetTopJitJSScript(cx)->ionScript()),
     rval_(rval),
     disabled_(false)
 { }
 
 void
 VMFunction::addToFunctions()
 {
     static bool initialized = false;
--- a/js/src/jit/VMFunctions.h
+++ b/js/src/jit/VMFunctions.h
@@ -612,17 +612,23 @@ class AutoDetectInvalidation
     JSContext *cx_;
     IonScript *ionScript_;
     MutableHandleValue rval_;
     bool disabled_;
 
     void setReturnOverride();
 
   public:
-    AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval, IonScript *ionScript = nullptr);
+    AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval, IonScript *ionScript)
+      : cx_(cx), ionScript_(ionScript), rval_(rval), disabled_(false)
+    {
+        MOZ_ASSERT(ionScript);
+    }
+
+    AutoDetectInvalidation(JSContext *cx, MutableHandleValue rval);
 
     void disable() {
         MOZ_ASSERT(!disabled_);
         disabled_ = true;
     }
 
     ~AutoDetectInvalidation() {
         if (!disabled_ && ionScript_->invalidated())
--- a/layout/forms/nsFieldSetFrame.cpp
+++ b/layout/forms/nsFieldSetFrame.cpp
@@ -593,17 +593,26 @@ nsFieldSetFrame::Reflow(nsPresContext*  
       contentRect.ISize(wm) = mLegendRect.ISize(wm) +
         aReflowState.ComputedLogicalPadding().IStartEnd(wm);
     }
 
     // place the legend
     LogicalRect actualLegendRect = mLegendRect;
     actualLegendRect.Deflate(wm, legendMargin);
     LogicalPoint actualLegendPos(actualLegendRect.Origin(wm));
-    legendReflowState->ApplyRelativePositioning(&actualLegendPos, containerWidth);
+
+    // Note that legend's writing mode may be different from the fieldset's,
+    // so we need to convert offsets before applying them to it (bug 1134534).
+    LogicalMargin offsets =
+      legendReflowState->ComputedLogicalOffsets().
+        ConvertTo(wm, legendReflowState->GetWritingMode());
+    nsHTMLReflowState::ApplyRelativePositioning(legend, wm, offsets,
+                                                &actualLegendPos,
+                                                containerWidth);
+
     legend->SetPosition(wm, actualLegendPos, containerWidth);
     nsContainerFrame::PositionFrameView(legend);
     nsContainerFrame::PositionChildViews(legend);
   }
 
   // Return our size and our result.
   LogicalSize finalSize(wm, contentRect.ISize(wm) + border.IStartEnd(wm),
                         mLegendSpace + border.BStartEnd(wm) +
--- a/layout/generic/nsTextFrame.cpp
+++ b/layout/generic/nsTextFrame.cpp
@@ -8519,16 +8519,17 @@ nsTextFrame::ReflowText(nsLineLayout& aL
     aLineLayout.NotifyOptionalBreakPosition(this, length, fits,
                                             gfxBreakPriority::eNormalBreak);
   }
   bool breakAfter = forceBreakAfter;
   // length == 0 means either the text is empty or it's all collapsed away
   bool emptyTextAtStartOfLine = atStartOfLine && length == 0;
   if (!breakAfter && charsFit == length && !emptyTextAtStartOfLine &&
       transformedOffset + transformedLength == mTextRun->GetLength() &&
+      !StyleContext()->IsInlineDescendantOfRuby() &&
       (mTextRun->GetFlags() & nsTextFrameUtils::TEXT_HAS_TRAILING_BREAK)) {
     // We placed all the text in the textrun and we have a break opportunity at
     // the end of the textrun. We need to record it because the following
     // content may not care about nsLineBreaker.
 
     // Note that because we didn't break, we can be sure that (thanks to the
     // code up above) textMetrics.mAdvanceWidth includes the width of any
     // trailing whitespace. So we need to subtract trimmableWidth here
--- a/layout/generic/nsTextRunTransformations.cpp
+++ b/layout/generic/nsTextRunTransformations.cpp
@@ -566,18 +566,19 @@ nsCaseTransformTextRunFactory::Transform
     if (ch == uint32_t(-1)) {
       aDeletedCharsArray.AppendElement(true);
       mergeNeeded = true;
     } else {
       aDeletedCharsArray.AppendElement(false);
       aCharsToMergeArray.AppendElement(false);
       if (aTextRun) {
         aStyleArray->AppendElement(charStyle);
-        aCanBreakBeforeArray->AppendElement(inhibitBreakBefore ? false :
-                                            aTextRun->CanBreakLineBefore(i));
+        aCanBreakBeforeArray->AppendElement(
+          inhibitBreakBefore ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
+                             : aTextRun->CanBreakBefore(i));
       }
 
       if (IS_IN_BMP(ch)) {
         aConvertedString.Append(ch);
       } else {
         aConvertedString.Append(H_SURROGATE(ch));
         aConvertedString.Append(L_SURROGATE(ch));
         ++i;
@@ -586,17 +587,18 @@ nsCaseTransformTextRunFactory::Transform
         ++extraChars;
       }
 
       while (extraChars-- > 0) {
         mergeNeeded = true;
         aCharsToMergeArray.AppendElement(true);
         if (aTextRun) {
           aStyleArray->AppendElement(charStyle);
-          aCanBreakBeforeArray->AppendElement(false);
+          aCanBreakBeforeArray->AppendElement(
+            gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
         }
       }
     }
   }
 
   return mergeNeeded;
 }
 
new file mode 100644
--- /dev/null
+++ b/layout/reftests/forms/fieldset/legend-rtl-ref.html
@@ -0,0 +1,5 @@
+<!DOCTYPE html>
+<html>
+<fieldset><legend style="position:relative;left:20px;">Legend</legend></fieldset>
+<fieldset dir="rtl"><legend style="position:relative;right:20px;">Legend</legend></fieldset>
+</html>
new file mode 100644
--- /dev/null
+++ b/layout/reftests/forms/fieldset/legend-rtl.html
@@ -0,0 +1,5 @@
+<!DOCTYPE html>
+<html>
+<fieldset><legend dir="rtl" style="position:relative;left:20px;">Legend</legend></fieldset>
+<fieldset dir="rtl"><legend dir="ltr" style="position:relative;right:20px;">Legend</legend></fieldset>
+</html>
--- a/layout/reftests/forms/fieldset/reftest.list
+++ b/layout/reftests/forms/fieldset/reftest.list
@@ -6,8 +6,9 @@
 == fieldset-scrolled-1.html fieldset-scrolled-1-ref.html
 random-if(B2G) == fieldset-overflow-auto-1.html fieldset-overflow-auto-1-ref.html
 fuzzy-if(winWidget&&!layersGPUAccelerated,102,205) == positioned-container-1.html positioned-container-1-ref.html
 == relpos-legend-1.html relpos-legend-1-ref.html
 == relpos-legend-2.html relpos-legend-2-ref.html
 test-pref(layout.css.sticky.enabled,true) skip-if(B2G&&browserIsRemote) == sticky-legend-1.html sticky-legend-1-ref.html
 == abs-pos-child-sizing.html abs-pos-child-sizing-ref.html
 == overflow-hidden.html overflow-hidden-ref.html
+== legend-rtl.html legend-rtl-ref.html
new file mode 100644
--- /dev/null
+++ b/layout/reftests/text/auto-hyphenation-transformed-1-ref.html
@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html lang="de">
+<head>
+<meta charset="utf-8">
+<title>Test for auto hyphenation with text-transform</title>
+</head>
+<body>
+<div style="width:0px;">
+mas&shy;se<br>ma&shy;ße
+</div>
+<div style="width:0px;">
+MAS&shy;SE<br>MA&shy;SSE
+</div>
+<div style="width:0px;">
+Mas&shy;se<br>Ma&shy;ße
+</div>
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/layout/reftests/text/auto-hyphenation-transformed-1.html
@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html lang="de">
+<head>
+<meta charset="utf-8">
+<title>Test for auto hyphenation with text-transform</title>
+</head>
+<body>
+<div style="width:0px; -moz-hyphens:auto">
+masse<br>maße
+</div>
+<div style="width:0px; -moz-hyphens:auto; text-transform:uppercase">
+masse<br>maße
+</div>
+<div style="width:0px; -moz-hyphens:auto; text-transform:capitalize">
+masse<br>maße
+</div>
+</body>
+</html>
--- a/layout/reftests/text/reftest.list
+++ b/layout/reftests/text/reftest.list
@@ -288,16 +288,18 @@ pref(gfx.font_rendering.graphite.enabled
 == auto-hyphenation-sh-1.html auto-hyphenation-sh-1-ref.html
 == auto-hyphenation-sl-1.html auto-hyphenation-sl-1-ref.html
 == auto-hyphenation-sr-1.html auto-hyphenation-sr-1-ref.html
 == auto-hyphenation-sv-1.html auto-hyphenation-sv-1-ref.html # test swedish patterns
 != auto-hyphenation-sv-1.html auto-hyphenation-sv-1-notref.html # verify swedish != english
 == auto-hyphenation-tr-1.html auto-hyphenation-tr-1-ref.html
 == auto-hyphenation-uk-1.html auto-hyphenation-uk-1-ref.html
 
+== auto-hyphenation-transformed-1.html auto-hyphenation-transformed-1-ref.html
+
 # osx-font-smoothing - with and without subpixel AA, only under OSX
 fails-if(!cocoaWidget||OSX==1006||OSX==1007) != osx-font-smoothing.html osx-font-smoothing-ref.html
 fails-if(!cocoaWidget||OSX==1006||OSX==1007) != osx-font-smoothing-2.html osx-font-smoothing-2-notref.html
 == osx-font-smoothing-2.html osx-font-smoothing-2-ref.html
 
 pref(layout.css.text-align-true-value.enabled,true) == text-align-true.html text-align-true-ref.html
 
 # stray control chars should not be invisible, bug 909344
--- a/media/libvpx/moz.build
+++ b/media/libvpx/moz.build
@@ -37,18 +37,18 @@ if CONFIG['VPX_X86_ASM'] and CONFIG['OS_
 
 if CONFIG['VPX_X86_ASM']:
     SOURCES += files['X86_ASM']
 
     if '64' in CONFIG['OS_TEST']:
         SOURCES += files['X86-64_ASM']
 
     # AVX2 only supported on
-    # Darwin and mingw toolchains right now
-    if CONFIG['OS_TARGET'] == 'Darwin' or (CONFIG['OS_TARGET'] == 'WINNT' and CONFIG['GNU_CC']):
+    # Darwin and Windows toolchains right now
+    if CONFIG['OS_TARGET'] in ('Darwin', 'WINNT'):
         SOURCES += files['AVX2']
 
     #postproc is only enabled on x86 with asm
     SOURCES += files['VP8_POSTPROC']
 
 arm_asm_files = []
 if CONFIG['VPX_ARM_ASM']:
     arm_asm_files += files['ARM_ASM']
--- a/media/libvpx/update.py
+++ b/media/libvpx/update.py
@@ -7,18 +7,18 @@ import os
 import re
 import shutil
 import sys
 import subprocess
 from pprint import pprint
 from StringIO import StringIO
 
 PLATFORMS= [
-  'x86-win32-vs8',
-  'x86_64-win64-vs8',
+  'x86-win32-vs12',
+  'x86_64-win64-vs12',
   'x86-linux-gcc',
   'x86_64-linux-gcc',
   'generic-gnu',
   'x86-darwin9-gcc',
   'x86_64-darwin9-gcc',
   'armv7-android-gcc',
   'x86-win32-gcc',
   'x86_64-win64-gcc',
--- a/media/libvpx/vp8_rtcd.h
+++ b/media/libvpx/vp8_rtcd.h
@@ -4,25 +4,25 @@
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.
  */
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vp8_rtcd_x86_64-win64-vs8.h"
+#include "vp8_rtcd_x86_64-win64-vs12.h"
 #else
 #include "vp8_rtcd_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vp8_rtcd_x86-win32-vs8.h"
+#include "vp8_rtcd_x86-win32-vs12.h"
 #else
 #include "vp8_rtcd_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vp8_rtcd_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vp8_rtcd_x86-win32-vs8.h
rename to media/libvpx/vp8_rtcd_x86-win32-vs12.h
rename from media/libvpx/vp8_rtcd_x86_64-win64-vs8.h
rename to media/libvpx/vp8_rtcd_x86_64-win64-vs12.h
--- a/media/libvpx/vp9_rtcd.h
+++ b/media/libvpx/vp9_rtcd.h
@@ -4,25 +4,25 @@
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.
  */
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vp9_rtcd_x86_64-win64-vs8.h"
+#include "vp9_rtcd_x86_64-win64-vs12.h"
 #else
 #include "vp9_rtcd_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vp9_rtcd_x86-win32-vs8.h"
+#include "vp9_rtcd_x86-win32-vs12.h"
 #else
 #include "vp9_rtcd_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vp9_rtcd_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vp9_rtcd_x86-win32-vs8.h
rename to media/libvpx/vp9_rtcd_x86-win32-vs12.h
--- a/media/libvpx/vp9_rtcd_x86-win32-vs8.h
+++ b/media/libvpx/vp9_rtcd_x86-win32-vs12.h
@@ -26,21 +26,23 @@ union int_mv;
 struct yv12_buffer_config;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
+int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 
 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -51,21 +53,23 @@ RTCD_EXTERN void (*vp9_convolve8_avg_hor
 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -218,24 +222,26 @@ void vp9_fdct16x16_sse2(const int16_t *i
 RTCD_EXTERN void (*vp9_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct16x16_1)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride);
+void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct32x32_1)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride);
+void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fdct4x4)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride);
@@ -270,16 +276,17 @@ int vp9_full_search_sadx8(const struct m
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 
 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
@@ -362,16 +369,17 @@ RTCD_EXTERN void (*vp9_iht8x8_64_add)(co
 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_16_add vp9_iwht4x4_16_add_c
 
 void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_1_add vp9_iwht4x4_1_add_c
 
 void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_4_mmx(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 RTCD_EXTERN void (*vp9_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
@@ -406,16 +414,17 @@ void vp9_lpf_vertical_8_sse2(uint8_t *s,
 RTCD_EXTERN void (*vp9_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vp9_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 
 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 
 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 
 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
@@ -512,16 +521,17 @@ unsigned int vp9_sad32x32_avg_c(const ui
 unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 
 void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad32x32x3 vp9_sad32x32x3_c
 
 void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad32x32x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad32x32x8 vp9_sad32x32x8_c
 
 unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -588,16 +598,17 @@ unsigned int vp9_sad64x64_avg_c(const ui
 unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 
 void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad64x64x3 vp9_sad64x64x3_c
 
 void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad64x64x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad64x64x8 vp9_sad64x64x8_c
 
 unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 RTCD_EXTERN unsigned int (*vp9_sad8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
@@ -669,16 +680,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -694,16 +706,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -734,16 +747,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -759,16 +773,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -817,52 +832,57 @@ void vp9_v_predictor_4x4_sse(uint8_t *ds
 RTCD_EXTERN void (*vp9_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -879,34 +899,38 @@ void vp9_rtcd(void);
 static void setup_rtcd_internal(void)
 {
     int flags = x86_simd_caps();
 
     (void)flags;
 
     vp9_block_error = vp9_block_error_c;
     if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2;
+    if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2;
     vp9_convolve8 = vp9_convolve8_c;
     if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2;
     vp9_convolve8_avg = vp9_convolve8_avg_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
     vp9_convolve8_horiz = vp9_convolve8_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2;
     vp9_convolve8_vert = vp9_convolve8_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
     vp9_convolve_avg = vp9_convolve_avg_c;
     if (flags & HAS_SSE2) vp9_convolve_avg = vp9_convolve_avg_sse2;
     vp9_convolve_copy = vp9_convolve_copy_c;
     if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
@@ -945,20 +969,22 @@ static void setup_rtcd_internal(void)
     vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse;
     vp9_fdct16x16 = vp9_fdct16x16_c;
     if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2;
     vp9_fdct16x16_1 = vp9_fdct16x16_1_c;
     if (flags & HAS_SSE2) vp9_fdct16x16_1 = vp9_fdct16x16_1_sse2;
     vp9_fdct32x32 = vp9_fdct32x32_c;
     if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2;
     vp9_fdct32x32_1 = vp9_fdct32x32_1_c;
     if (flags & HAS_SSE2) vp9_fdct32x32_1 = vp9_fdct32x32_1_sse2;
     vp9_fdct32x32_rd = vp9_fdct32x32_rd_c;
     if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2;
     vp9_fdct4x4 = vp9_fdct4x4_c;
     if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2;
     vp9_fdct4x4_1 = vp9_fdct4x4_1_c;
     if (flags & HAS_SSE2) vp9_fdct4x4_1 = vp9_fdct4x4_1_sse2;
     vp9_fdct8x8 = vp9_fdct8x8_c;
     if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2;
     vp9_fdct8x8_1 = vp9_fdct8x8_1_c;
     if (flags & HAS_SSE2) vp9_fdct8x8_1 = vp9_fdct8x8_1_sse2;
@@ -970,16 +996,17 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2;
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
     vp9_fwht4x4 = vp9_fwht4x4_c;
     if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
     vp9_get16x16var = vp9_get16x16var_c;
     if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
+    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
     vp9_get8x8var = vp9_get8x8var_c;
     if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
     vp9_get_mb_ss = vp9_get_mb_ss_c;
     if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
@@ -1014,16 +1041,17 @@ static void setup_rtcd_internal(void)
     vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
     vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
     vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
+    if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
     vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c;
     if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx;
     vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2;
     vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2;
     vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2;
@@ -1036,16 +1064,17 @@ static void setup_rtcd_internal(void)
     vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2;
     vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2;
     vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2;
     vp9_mse16x16 = vp9_mse16x16_c;
     if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
+    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
     vp9_mse16x8 = vp9_mse16x8_c;
     if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
     vp9_mse8x16 = vp9_mse8x16_c;
     if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
     vp9_mse8x8 = vp9_mse8x8_c;
     if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
     vp9_sad16x16 = vp9_sad16x16_c;
     if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2;
@@ -1078,16 +1107,17 @@ static void setup_rtcd_internal(void)
     vp9_sad32x16x4d = vp9_sad32x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2;
     vp9_sad32x32 = vp9_sad32x32_c;
     if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2;
     vp9_sad32x32_avg = vp9_sad32x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2;
     vp9_sad32x32x4d = vp9_sad32x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2;
     vp9_sad32x64 = vp9_sad32x64_c;
     if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2;
     vp9_sad32x64_avg = vp9_sad32x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2;
     vp9_sad32x64x4d = vp9_sad32x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2;
     vp9_sad4x4 = vp9_sad4x4_c;
     if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse;
@@ -1110,16 +1140,17 @@ static void setup_rtcd_internal(void)
     vp9_sad64x32x4d = vp9_sad64x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2;
     vp9_sad64x64 = vp9_sad64x64_c;
     if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2;
     vp9_sad64x64_avg = vp9_sad64x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2;
     vp9_sad64x64x4d = vp9_sad64x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2;
     vp9_sad8x16 = vp9_sad8x16_c;
     if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2;
     vp9_sad8x16_avg = vp9_sad8x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2;
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
     vp9_sad8x16x4d = vp9_sad8x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2;
@@ -1147,31 +1178,33 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2;
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2;
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
@@ -1186,31 +1219,33 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2;
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2;
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
@@ -1230,34 +1265,39 @@ static void setup_rtcd_internal(void)
     vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c;
     if (flags & HAS_SSE2) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_sse2;
     vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
     vp9_variance16x16 = vp9_variance16x16_c;
     if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
     vp9_variance16x32 = vp9_variance16x32_c;
     if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
     vp9_variance16x8 = vp9_variance16x8_c;
     if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
     vp9_variance32x16 = vp9_variance32x16_c;
     if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
     vp9_variance32x32 = vp9_variance32x32_c;
     if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
     vp9_variance32x64 = vp9_variance32x64_c;
     if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
     vp9_variance4x4 = vp9_variance4x4_c;
     if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
     vp9_variance4x8 = vp9_variance4x8_c;
     if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
     vp9_variance64x32 = vp9_variance64x32_c;
     if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
     vp9_variance64x64 = vp9_variance64x64_c;
     if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
     vp9_variance8x16 = vp9_variance8x16_c;
     if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
     vp9_variance8x4 = vp9_variance8x4_c;
     if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
     vp9_variance8x8 = vp9_variance8x8_c;
     if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
 }
 #endif
rename from media/libvpx/vp9_rtcd_x86_64-win64-vs8.h
rename to media/libvpx/vp9_rtcd_x86_64-win64-vs12.h
--- a/media/libvpx/vp9_rtcd_x86_64-win64-vs8.h
+++ b/media/libvpx/vp9_rtcd_x86_64-win64-vs12.h
@@ -26,21 +26,23 @@ union int_mv;
 struct yv12_buffer_config;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
-#define vp9_block_error vp9_block_error_sse2
+int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
+RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
 
 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -51,21 +53,23 @@ RTCD_EXTERN void (*vp9_convolve8_avg_hor
 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 
 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vp9_convolve_avg vp9_convolve_avg_sse2
 
 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vp9_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
@@ -218,25 +222,27 @@ void vp9_fdct16x16_sse2(const int16_t *i
 #define vp9_fdct16x16 vp9_fdct16x16_sse2
 
 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct16x16_1 vp9_fdct16x16_1_sse2
 
 void vp9_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride);
-#define vp9_fdct32x32 vp9_fdct32x32_sse2
+void vp9_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride);
+RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct32x32_1 vp9_fdct32x32_1_sse2
 
 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride);
-#define vp9_fdct32x32_rd vp9_fdct32x32_rd_sse2
+void vp9_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride);
+RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride);
 
 void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct4x4 vp9_fdct4x4_sse2
 
 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fdct4x4_1 vp9_fdct4x4_1_sse2
@@ -271,17 +277,18 @@ int vp9_full_search_sadx8(const struct m
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_mmx
 
 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_sse2
+void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 
 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get8x8var vp9_get8x8var_sse2
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_sse2
@@ -365,17 +372,18 @@ void vp9_iht8x8_64_add_sse2(const tran_l
 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_16_add vp9_iwht4x4_16_add_c
 
 void vp9_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vp9_iwht4x4_1_add vp9_iwht4x4_1_add_c
 
 void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
-#define vp9_lpf_horizontal_16 vp9_lpf_horizontal_16_sse2
+void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 
 void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 void vp9_lpf_horizontal_4_mmx(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
 #define vp9_lpf_horizontal_4 vp9_lpf_horizontal_4_mmx
 
 void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vp9_lpf_horizontal_4_dual vp9_lpf_horizontal_4_dual_sse2
@@ -409,17 +417,18 @@ void vp9_lpf_vertical_8_sse2(uint8_t *s,
 #define vp9_lpf_vertical_8 vp9_lpf_vertical_8_sse2
 
 void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_sse2
 
 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_sse2
+unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 
 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 #define vp9_mse16x8 vp9_mse16x8_sse2
 
 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
 #define vp9_mse8x16 vp9_mse8x16_sse2
@@ -519,17 +528,18 @@ unsigned int vp9_sad32x32_avg_c(const ui
 unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 #define vp9_sad32x32_avg vp9_sad32x32_avg_sse2
 
 void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad32x32x3 vp9_sad32x32x3_c
 
 void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
-#define vp9_sad32x32x4d vp9_sad32x32x4d_sse2
+void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad32x32x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad32x32x8 vp9_sad32x32x8_c
 
 unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vp9_sad32x64 vp9_sad32x64_sse2
 
@@ -595,17 +605,18 @@ unsigned int vp9_sad64x64_avg_c(const ui
 unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred);
 #define vp9_sad64x64_avg vp9_sad64x64_avg_sse2
 
 void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
 #define vp9_sad64x64x3 vp9_sad64x64x3_c
 
 void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
-#define vp9_sad64x64x4d vp9_sad64x64x4d_sse2
+void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
 
 void vp9_sad64x64x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
 #define vp9_sad64x64x8 vp9_sad64x64x8_c
 
 unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride);
 #define vp9_sad8x16 vp9_sad8x16_sse2
 
@@ -676,16 +687,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -701,16 +713,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 unsigned int vp9_sub_pixel_avg_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
@@ -741,16 +754,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -766,16 +780,17 @@ RTCD_EXTERN unsigned int (*vp9_sub_pixel
 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -825,53 +840,58 @@ void vp9_v_predictor_4x4_sse(uint8_t *ds
 #define vp9_v_predictor_4x4 vp9_v_predictor_4x4_sse
 
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
 
 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_sse2
+unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance16x32 vp9_variance16x32_sse2
 
 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance16x8 vp9_variance16x8_sse2
 
 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_sse2
+unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_sse2
+unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance32x64 vp9_variance32x64_sse2
 
 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance4x4 vp9_variance4x4_sse2
 
 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance4x8 vp9_variance4x8_sse2
 
 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_sse2
+unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_sse2
+unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x16 vp9_variance8x16_sse2
 
 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x4 vp9_variance8x4_sse2
@@ -885,28 +905,33 @@ void vp9_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/x86.h"
 static void setup_rtcd_internal(void)
 {
     int flags = x86_simd_caps();
 
     (void)flags;
 
+    vp9_block_error = vp9_block_error_sse2;
+    if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2;
     vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2;
     vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
     vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2;
     vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
+    if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3;
     vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3;
@@ -927,109 +952,137 @@ static void setup_rtcd_internal(void)
     vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3;
     vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3;
     vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3;
     vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
+    vp9_fdct32x32 = vp9_fdct32x32_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2;
+    vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2;
+    if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2;
     vp9_fdct8x8 = vp9_fdct8x8_sse2;
     if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3;
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
+    vp9_get16x16var = vp9_get16x16var_sse2;
+    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
     vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3;
     vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3;
     vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
+    vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
+    if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
+    vp9_mse16x16 = vp9_mse16x16_sse2;
+    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
     vp9_quantize_b = vp9_quantize_b_c;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
     if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
     vp9_quantize_fp = vp9_quantize_fp_c;
     if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3;
     vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c;
     if (flags & HAS_SSSE3) vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3;
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
+    vp9_sad32x32x4d = vp9_sad32x32x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2;
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
+    vp9_sad64x64x4d = vp9_sad64x64x4d_sse2;
+    if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2;
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
     vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3;
     vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3;
     vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2;
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2;
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3;
     vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3;
     vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3;
     vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2;
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
+    if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2;
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
+    vp9_variance16x16 = vp9_variance16x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
+    vp9_variance32x16 = vp9_variance32x16_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
+    vp9_variance32x32 = vp9_variance32x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
+    vp9_variance64x32 = vp9_variance64x32_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
+    vp9_variance64x64 = vp9_variance64x64_sse2;
+    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
 }
 #endif
 
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 
 #endif
--- a/media/libvpx/vpx_config.asm
+++ b/media/libvpx/vpx_config.asm
@@ -2,12 +2,12 @@
 %include "vpx_config_x86-linux-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,elf64
 %include "vpx_config_x86_64-linux-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,macho32
 %include "vpx_config_x86-darwin9-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,macho64
 %include "vpx_config_x86_64-darwin9-gcc.asm"
 %elifidn __OUTPUT_FORMAT__,win32
-%include "vpx_config_x86-win32-vs8.asm"
+%include "vpx_config_x86-win32-vs12.asm"
 %elifidn __OUTPUT_FORMAT__,x64
-%include "vpx_config_x86_64-win64-vs8.asm"
+%include "vpx_config_x86_64-win64-vs12.asm"
 %endif
--- a/media/libvpx/vpx_config.h
+++ b/media/libvpx/vpx_config.h
@@ -1,22 +1,22 @@
 #if defined(VPX_X86_ASM)
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vpx_config_x86_64-win64-vs8.h"
+#include "vpx_config_x86_64-win64-vs12.h"
 #else
 #include "vpx_config_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vpx_config_x86-win32-vs8.h"
+#include "vpx_config_x86-win32-vs12.h"
 #else
 #include "vpx_config_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vpx_config_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vpx_config_x86-win32-vs8.asm
rename to media/libvpx/vpx_config_x86-win32-vs12.asm
--- a/media/libvpx/vpx_config_x86-win32-vs8.asm
+++ b/media/libvpx/vpx_config_x86-win32-vs12.asm
@@ -12,18 +12,18 @@ HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MIPS64 equ 0
 HAVE_MMX equ 1
 HAVE_SSE equ 1
 HAVE_SSE2 equ 1
 HAVE_SSE3 equ 1
 HAVE_SSSE3 equ 1
 HAVE_SSE4_1 equ 1
-HAVE_AVX equ 0
-HAVE_AVX2 equ 0
+HAVE_AVX equ 1
+HAVE_AVX2 equ 1
 HAVE_ALTIVEC equ 0
 HAVE_VPX_PORTS equ 1
 HAVE_STDINT_H equ 0
 HAVE_ALT_TREE_LAYOUT equ 0
 HAVE_PTHREAD_H equ 0
 HAVE_SYS_MMAN_H equ 0
 HAVE_UNISTD_H equ 0
 CONFIG_EXTERNAL_BUILD equ 1
rename from media/libvpx/vpx_config_x86-win32-vs8.h
rename to media/libvpx/vpx_config_x86-win32-vs12.h
--- a/media/libvpx/vpx_config_x86-win32-vs8.h
+++ b/media/libvpx/vpx_config_x86-win32-vs12.h
@@ -24,18 +24,18 @@
 #define HAVE_DSPR2 0
 #define HAVE_MIPS64 0
 #define HAVE_MMX 1
 #define HAVE_SSE 1
 #define HAVE_SSE2 1
 #define HAVE_SSE3 1
 #define HAVE_SSSE3 1
 #define HAVE_SSE4_1 1
-#define HAVE_AVX 0
-#define HAVE_AVX2 0
+#define HAVE_AVX 1
+#define HAVE_AVX2 1
 #define HAVE_ALTIVEC 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 0
 #define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SYS_MMAN_H 0
 #define HAVE_UNISTD_H 0
 #define CONFIG_EXTERNAL_BUILD 1
rename from media/libvpx/vpx_config_x86_64-win64-vs8.asm
rename to media/libvpx/vpx_config_x86_64-win64-vs12.asm
--- a/media/libvpx/vpx_config_x86_64-win64-vs8.asm
+++ b/media/libvpx/vpx_config_x86_64-win64-vs12.asm
@@ -12,18 +12,18 @@ HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MIPS64 equ 0
 HAVE_MMX equ 1
 HAVE_SSE equ 1
 HAVE_SSE2 equ 1
 HAVE_SSE3 equ 1
 HAVE_SSSE3 equ 1
 HAVE_SSE4_1 equ 1
-HAVE_AVX equ 0
-HAVE_AVX2 equ 0
+HAVE_AVX equ 1
+HAVE_AVX2 equ 1
 HAVE_ALTIVEC equ 0
 HAVE_VPX_PORTS equ 1
 HAVE_STDINT_H equ 0
 HAVE_ALT_TREE_LAYOUT equ 0
 HAVE_PTHREAD_H equ 0
 HAVE_SYS_MMAN_H equ 0
 HAVE_UNISTD_H equ 0
 CONFIG_EXTERNAL_BUILD equ 1
rename from media/libvpx/vpx_config_x86_64-win64-vs8.h
rename to media/libvpx/vpx_config_x86_64-win64-vs12.h
--- a/media/libvpx/vpx_config_x86_64-win64-vs8.h
+++ b/media/libvpx/vpx_config_x86_64-win64-vs12.h
@@ -24,18 +24,18 @@
 #define HAVE_DSPR2 0
 #define HAVE_MIPS64 0
 #define HAVE_MMX 1
 #define HAVE_SSE 1
 #define HAVE_SSE2 1
 #define HAVE_SSE3 1
 #define HAVE_SSSE3 1
 #define HAVE_SSE4_1 1
-#define HAVE_AVX 0
-#define HAVE_AVX2 0
+#define HAVE_AVX 1
+#define HAVE_AVX2 1
 #define HAVE_ALTIVEC 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 0
 #define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SYS_MMAN_H 0
 #define HAVE_UNISTD_H 0
 #define CONFIG_EXTERNAL_BUILD 1
--- a/media/libvpx/vpx_scale_rtcd.h
+++ b/media/libvpx/vpx_scale_rtcd.h
@@ -4,25 +4,25 @@
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.
  */
 
 #if defined(_WIN64)
 /* 64 bit Windows */
 #ifdef _MSC_VER
-#include "vpx_scale_rtcd_x86_64-win64-vs8.h"
+#include "vpx_scale_rtcd_x86_64-win64-vs12.h"
 #else
 #include "vpx_scale_rtcd_x86_64-win64-gcc.h"
 #endif
 
 #elif defined(_WIN32)
 /* 32 bit Windows, MSVC. */
 #ifdef _MSC_VER
-#include "vpx_scale_rtcd_x86-win32-vs8.h"
+#include "vpx_scale_rtcd_x86-win32-vs12.h"
 #else
 #include "vpx_scale_rtcd_x86-win32-gcc.h"
 #endif
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 /* 64 bit MacOS. */
 #include "vpx_scale_rtcd_x86_64-darwin9-gcc.h"
 
rename from media/libvpx/vpx_scale_rtcd_x86-win32-vs8.h
rename to media/libvpx/vpx_scale_rtcd_x86-win32-vs12.h
rename from media/libvpx/vpx_scale_rtcd_x86_64-win64-vs8.h
rename to media/libvpx/vpx_scale_rtcd_x86_64-win64-vs12.h
--- a/media/webrtc/signaling/src/media-conduit/VideoConduit.cpp
+++ b/media/webrtc/signaling/src/media-conduit/VideoConduit.cpp
@@ -1102,17 +1102,18 @@ WebrtcVideoConduit::SendVideoFrame(unsig
   CSFLogDebug(logTag, "%s Inserted a frame", __FUNCTION__);
   return kMediaConduitNoError;
 }
 
 // Transport Layer Callbacks
 MediaConduitErrorCode
 WebrtcVideoConduit::ReceivedRTPPacket(const void *data, int len)
 {
-  CSFLogDebug(logTag, "%s: Channel %d, Len %d ", __FUNCTION__, mChannel, len);
+  CSFLogDebug(logTag, "%s: seq# %u, Channel %d, Len %d ", __FUNCTION__,
+              (uint16_t) ntohs(((uint16_t*) data)[1]), mChannel, len);
 
   // Media Engine should be receiving already.
   if(mEngineReceiving)
   {
     // let the engine know of a RTP packet to decode
     // XXX we need to get passed the time the packet was received
     if(mPtrViENetwork->ReceivedRTPPacket(mChannel, data, len, webrtc::PacketTime()) == -1)
     {
--- a/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@@ -13,24 +13,26 @@
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_utility.h"
 #include "webrtc/system_wrappers/interface/trace.h"
 
 namespace webrtc {
 namespace {
 
-enum Nalu {
-  kSlice = 1,
-  kIdr = 5,
-  kSei = 6,
-  kSps = 7,
-  kPps = 8,
-  kStapA = 24,
-  kFuA = 28
+enum Nalu { // 0-23 from H.264, 24-31 from RFC 6184
+  kSlice = 1, // I/P/B slice
+  kIdr = 5, // IDR slice
+  kSei = 6, // Supplementary Enhancement Info
+  kSeiRecPt = 6, // Recovery Point SEI Payload
+  kSps = 7, // Sequence Parameter Set
+  kPps = 8, // Picture Parameter Set
+  kPrefix = 14, // Prefix
+  kStapA = 24, // Single-Time Aggregation Packet Type A
+  kFuA = 28 // Fragmentation Unit Type A
 };
 
 static const size_t kNalHeaderSize = 1;
 static const size_t kFuAHeaderSize = 2;
 static const size_t kLengthFieldSize = 2;
 
 // Bit masks for FU (A and B) indicators.
 enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
@@ -46,24 +48,48 @@ void ParseSingleNalu(RtpDepacketizer::Pa
   parsed_payload->type.Video.codec = kRtpVideoH264;
   parsed_payload->type.Video.isFirstPacket = true;
   RTPVideoHeaderH264* h264_header =
       &parsed_payload->type.Video.codecHeader.H264;
   h264_header->single_nalu = true;
   h264_header->stap_a = false;
 
   uint8_t nal_type = payload_data[0] & kTypeMask;
+  size_t offset = 0;
   if (nal_type == kStapA) {
-    nal_type = payload_data[3] & kTypeMask;
+    offset = 3;
+    if (offset >= payload_data_length) {
+      return; // XXX malformed
+    }
+    nal_type = payload_data[offset] & kTypeMask;
     h264_header->stap_a = true;
   }
 
+  // key frames start with SPS, PPS, IDR, or Recovery Point SEI
+  // Recovery Point SEI's are used in AIR and GDR refreshes, which don't
+  // send large iframes, and instead use forms of incremental/continuous refresh.
   switch (nal_type) {
+    case kSei: // check if it is a Recovery Point SEI (aka GDR)
+      if (offset+1 >= payload_data_length) {
+        return; // XXX malformed
+      }
+      if (payload_data[offset+1] != kSeiRecPt) {
+        parsed_payload->frame_type = kVideoFrameDelta;
+        break; // some other form of SEI - not a keyframe
+      }
+      // else fall through since GDR is like IDR
     case kSps:
     case kPps:
+      // These are always combined with other packets with the same timestamp...
+      // XXX To support 'solitary' SPS/PPS/etc, either fix the jitter buffer to
+      // accept multiple sessions with the same timestamp, or pass marker info
+      // down into here (SPS/PPS as a pair without an kIdr NALU would still be
+      // painful, but might work).
+      h264_header->single_nalu = false;
+      // fall through...
     case kIdr:
       parsed_payload->frame_type = kVideoFrameKey;
       break;
     default:
       parsed_payload->frame_type = kVideoFrameDelta;
       break;
   }
 }
--- a/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
@@ -459,22 +459,33 @@ int VCMSessionInfo::InsertPacket(const V
       break;
 
   // Check for duplicate packets.
   if (rit != packets_.rend() &&
       (*rit).seqNum == packet.seqNum && (*rit).sizeBytes > 0)
     return -2;
 
   if (packet.codec == kVideoCodecH264) {
-    frame_type_ = packet.frameType;
+    // H.264 can have leading or trailing non-VCL (Video Coding Layer)
+    // NALUs, such as SPS/PPS/SEI and others.  Also, the RTP marker bit is
+    // not reliable for the last packet of a frame (RFC 6184 5.1 - "Decoders
+    // [] MUST NOT rely on this property"), so allow out-of-order packets to
+    // update the first and last seq# range.  Also mark as a key frame if
+    // any packet is of that type.
+    if (frame_type_ != kVideoFrameKey) {
+      frame_type_ = packet.frameType;
+    }
     if (packet.isFirstPacket &&
         (first_packet_seq_num_ == -1 ||
          IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) {
       first_packet_seq_num_ = packet.seqNum;
     }
+    // Note: the code does *not* currently handle the Marker bit being totally
+    // absent from a frame.  It does not, however, depend on it being on the last
+    // packet of the 'frame'/'session'.
     if (packet.markerBit &&
         (last_packet_seq_num_ == -1 ||
          IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) {
       last_packet_seq_num_ = packet.seqNum;
     }
   } else {
     // Only insert media packets between first and last packets (when
     // available).
@@ -508,16 +519,19 @@ int VCMSessionInfo::InsertPacket(const V
     }
   }
 
   // The insert operation invalidates the iterator |rit|.
   PacketIterator packet_list_it = packets_.insert(rit.base(), packet);
 
   int returnLength = InsertBuffer(frame_buffer, packet_list_it);
   UpdateCompleteSession();
+  // We call MakeDecodable() before decoding, which removes packets after a loss
+  // (and which means h.264 mode 1 frames with a loss in the first packet will be
+  // totally removed)
   if (decode_error_mode == kWithErrors)
     decodable_ = true;
   else if (decode_error_mode == kSelectiveErrors)
     UpdateDecodableSession(frame_data);
   return returnLength;
 }
 
 void VCMSessionInfo::InformOfEmptyPacket(uint16_t seq_num) {
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -205,16 +205,22 @@
   },
   "CYCLE_COLLECTOR_ASYNC_SNOW_WHITE_FREEING": {
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "10000",
     "n_buckets": 50,
     "description": "Time spent on one asynchronous SnowWhite freeing (ms)"
   },
+  "DEVICE_RESET_REASON": {
+    "expires_in_version": "never",
+    "kind": "enumerated",
+    "n_values": 10,
+    "description": "GPU Device Reset Reason (ok, hung, removed, reset, internal error, invalid call)"
+  },
   "FORGET_SKIPPABLE_MAX": {
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "10000",
     "n_buckets": 50,
     "description": "Max time spent on one forget skippable (ms)"
   },
   "GC_REASON_2": {
--- a/widget/nsBaseWidget.cpp
+++ b/widget/nsBaseWidget.cpp
@@ -176,46 +176,48 @@ WidgetShutdownObserver::Observe(nsISuppo
 
 void
 nsBaseWidget::Shutdown()
 {
   DestroyCompositor();
   mShutdownObserver = nullptr;
 }
 
-static void DeferredDestroyCompositor(CompositorParent* aCompositorParent,
-                              CompositorChild* aCompositorChild)
+static void DeferredDestroyCompositor(nsRefPtr<CompositorParent> aCompositorParent,
+                                      nsRefPtr<CompositorChild> aCompositorChild)
 {
     // Bug 848949 needs to be fixed before
     // we can close the channel properly
     //aCompositorChild->Close();
-    aCompositorParent->Release();
-    aCompositorChild->Release();
 }
 
 void nsBaseWidget::DestroyCompositor()
 {
   if (mCompositorChild) {
-    mCompositorChild->SendWillStop();
-    mCompositorChild->Destroy();
+    nsRefPtr<CompositorChild> compositorChild = mCompositorChild.forget();
+    nsRefPtr<CompositorParent> compositorParent = mCompositorParent.forget();
+
+    compositorChild->SendWillStop();
+    // New LayerManager, CompositorParent and CompositorChild might be created
+    // as a result of internal GetLayerManager() call.
+    compositorChild->Destroy();
 
     // The call just made to SendWillStop can result in IPC from the
     // CompositorParent to the CompositorChild (e.g. caused by the destruction
     // of shared memory). We need to ensure this gets processed by the
     // CompositorChild before it gets destroyed. It suffices to ensure that
     // events already in the MessageLoop get processed before the
     // CompositorChild is destroyed, so we add a task to the MessageLoop to
     // handle compositor desctruction.
+
+    // The DefferedDestroyCompositor task takes ownership of compositorParent and
+    // will release them when it runs.
     MessageLoop::current()->PostTask(FROM_HERE,
-               NewRunnableFunction(DeferredDestroyCompositor, mCompositorParent,
-                                   mCompositorChild));
-    // The DestroyCompositor task we just added to the MessageLoop will handle
-    // releasing mCompositorParent and mCompositorChild.
-    unused << mCompositorParent.forget();
-    unused << mCompositorChild.forget();
+               NewRunnableFunction(DeferredDestroyCompositor, compositorParent,
+                                   compositorChild));
   }
 }
 
 //-------------------------------------------------------------------------
 //
 // nsBaseWidget destructor
 //
 //-------------------------------------------------------------------------
--- a/widget/tests/test_platform_colors.xul
+++ b/widget/tests/test_platform_colors.xul
@@ -50,17 +50,17 @@ var colors = {
   "threedshadow": ["rgb(224, 224, 224)"],
   "window": ["rgb(255, 255, 255)"],
   "windowframe": ["rgb(204, 204, 204)"],
   "windowtext": ["rgb(0, 0, 0)"],
   "-moz-activehyperlinktext": ["rgb(238, 0, 0)"],
   "-moz-buttondefault": ["rgb(220, 220, 220)"],
   "-moz-buttonhoverface": ["rgb(240, 240, 240)"],
   "-moz-buttonhovertext": ["rgb(0, 0, 0)"],
-  "-moz-cellhighlight": ["rgb(212, 212, 212)"],
+  "-moz-cellhighlight": ["rgb(212, 212, 212)", "rgb(220, 220, 220)"],
   "-moz-cellhighlighttext": ["rgb(0, 0, 0)"],
   "-moz-eventreerow": ["rgb(255, 255, 255)"],
   "-moz-field": ["rgb(255, 255, 255)"],
   "-moz-fieldtext": ["rgb(0, 0, 0)"],
   "-moz-dialog": ["rgb(232, 232, 232)"],
   "-moz-dialogtext": ["rgb(0, 0, 0)"],
   "-moz-dragtargetzone": ["rgb(199, 208, 218)", "rgb(198, 198, 198)", "rgb(180, 213, 255)", "rgb(250, 236, 115)", "rgb(255, 176, 139)", "rgb(255, 209, 129)", "rgb(194, 249, 144)", "rgb(232, 184, 255)"],
   "-moz-hyperlinktext": ["rgb(0, 0, 238)"],