Bug 730907 - Update libvpx to v1.0.0, r=cpearce,khuey
authorTimothy B. Terriberry <tterribe@vt.edu>
Sun, 29 Apr 2012 20:51:44 -0700
changeset 96755 731c4bc9dd37ac8c1f9794d33cbcfa562affd4e8
parent 96754 7e14eb34fba7d166b07372eb39d7f6827c210527
child 96756 4a432c2d1b4185d7136f38accae7f27257d3c77e
push idunknown
push userunknown
push dateunknown
reviewerscpearce, khuey
bugs730907
milestone15.0a1
Bug 730907 - Update libvpx to v1.0.0, r=cpearce,khuey
configure.in
media/libvpx/I1bad27ea.patch
media/libvpx/I256a37c6.patch
media/libvpx/I3915d597.patch
media/libvpx/I42ab00e3.patch
media/libvpx/I6f2b218d.patch
media/libvpx/I8a35831e.patch
media/libvpx/I9713c9f0.patch
media/libvpx/LICENSE
media/libvpx/Makefile.in
media/libvpx/build/make/ads2gas.pl
media/libvpx/compile_errors.patch
media/libvpx/solaris.patch
media/libvpx/textrels.patch
media/libvpx/update.sh
media/libvpx/variance-invoke.patch
media/libvpx/vp8/common/alloccommon.c
media/libvpx/vp8/common/arm/arm_systemdependent.c
media/libvpx/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
media/libvpx/vp8/common/arm/armv6/dequant_idct_v6.asm
media/libvpx/vp8/common/arm/armv6/dequantize_v6.asm
media/libvpx/vp8/common/arm/armv6/idct_blk_v6.c
media/libvpx/vp8/common/arm/armv6/idct_v6.asm
media/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm
media/libvpx/vp8/common/arm/armv6/iwalsh_v6.asm
media/libvpx/vp8/common/arm/armv6/recon_v6.asm
media/libvpx/vp8/common/arm/armv6/vp8_mse16x16_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
media/libvpx/vp8/common/arm/dequantize_arm.c
media/libvpx/vp8/common/arm/dequantize_arm.h
media/libvpx/vp8/common/arm/filter_arm.c
media/libvpx/vp8/common/arm/idct_arm.h
media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm
media/libvpx/vp8/common/arm/neon/dequant_idct_neon.asm
media/libvpx/vp8/common/arm/neon/dequantizeb_neon.asm
media/libvpx/vp8/common/arm/neon/idct_blk_neon.c
media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
media/libvpx/vp8/common/arm/neon/iwalsh_neon.asm
media/libvpx/vp8/common/arm/neon/recon16x16mb_neon.asm
media/libvpx/vp8/common/arm/neon/recon2b_neon.asm
media/libvpx/vp8/common/arm/neon/recon4b_neon.asm
media/libvpx/vp8/common/arm/neon/recon_neon.c
media/libvpx/vp8/common/arm/neon/reconb_neon.asm
media/libvpx/vp8/common/arm/neon/sad16_neon.asm
media/libvpx/vp8/common/arm/neon/sad8_neon.asm
media/libvpx/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm
media/libvpx/vp8/common/arm/neon/variance_neon.asm
media/libvpx/vp8/common/arm/neon/vp8_mse16x16_neon.asm
media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
media/libvpx/vp8/common/arm/recon_arm.h
media/libvpx/vp8/common/arm/reconintra_arm.c
media/libvpx/vp8/common/arm/variance_arm.c
media/libvpx/vp8/common/arm/variance_arm.h
media/libvpx/vp8/common/asm_com_offsets.c
media/libvpx/vp8/common/blockd.h
media/libvpx/vp8/common/common.h
media/libvpx/vp8/common/common_types.h
media/libvpx/vp8/common/default_coef_probs.h
media/libvpx/vp8/common/defaultcoefcounts.c
media/libvpx/vp8/common/defaultcoefcounts.h
media/libvpx/vp8/common/dequantize.c
media/libvpx/vp8/common/dequantize.h
media/libvpx/vp8/common/entropy.c
media/libvpx/vp8/common/entropymode.c
media/libvpx/vp8/common/entropymode.h
media/libvpx/vp8/common/extend.c
media/libvpx/vp8/common/extend.h
media/libvpx/vp8/common/findnearmv.c
media/libvpx/vp8/common/findnearmv.h
media/libvpx/vp8/common/g_common.h
media/libvpx/vp8/common/generic/systemdependent.c
media/libvpx/vp8/common/idct.h
media/libvpx/vp8/common/idct_blk.c
media/libvpx/vp8/common/idctllm.c
media/libvpx/vp8/common/invtrans.c
media/libvpx/vp8/common/invtrans.h
media/libvpx/vp8/common/loopfilter.c
media/libvpx/vp8/common/loopfilter.h
media/libvpx/vp8/common/loopfilter_filters.c
media/libvpx/vp8/common/mbpitch.c
media/libvpx/vp8/common/onyx.h
media/libvpx/vp8/common/onyxc_int.h
media/libvpx/vp8/common/onyxd.h
media/libvpx/vp8/common/postproc.c
media/libvpx/vp8/common/postproc.h
media/libvpx/vp8/common/ppflags.h
media/libvpx/vp8/common/recon.c
media/libvpx/vp8/common/recon.h
media/libvpx/vp8/common/reconinter.c
media/libvpx/vp8/common/reconinter.h
media/libvpx/vp8/common/reconintra.c
media/libvpx/vp8/common/reconintra4x4.c
media/libvpx/vp8/common/sad_c.c
media/libvpx/vp8/common/systemdependent.h
media/libvpx/vp8/common/textblit.c
media/libvpx/vp8/common/threading.h
media/libvpx/vp8/common/type_aliases.h
media/libvpx/vp8/common/variance.h
media/libvpx/vp8/common/variance_c.c
media/libvpx/vp8/common/x86/dequantize_mmx.asm
media/libvpx/vp8/common/x86/dequantize_x86.h
media/libvpx/vp8/common/x86/filter_x86.c
media/libvpx/vp8/common/x86/filter_x86.h
media/libvpx/vp8/common/x86/idct_blk_mmx.c
media/libvpx/vp8/common/x86/idct_blk_sse2.c
media/libvpx/vp8/common/x86/idct_x86.h
media/libvpx/vp8/common/x86/idctllm_mmx.asm
media/libvpx/vp8/common/x86/idctllm_sse2.asm
media/libvpx/vp8/common/x86/iwalsh_mmx.asm
media/libvpx/vp8/common/x86/iwalsh_sse2.asm
media/libvpx/vp8/common/x86/loopfilter_block_sse2.asm
media/libvpx/vp8/common/x86/loopfilter_mmx.asm
media/libvpx/vp8/common/x86/loopfilter_sse2.asm
media/libvpx/vp8/common/x86/loopfilter_x86.c
media/libvpx/vp8/common/x86/postproc_mmx.asm
media/libvpx/vp8/common/x86/postproc_sse2.asm
media/libvpx/vp8/common/x86/postproc_x86.c
media/libvpx/vp8/common/x86/recon_mmx.asm
media/libvpx/vp8/common/x86/recon_sse2.asm
media/libvpx/vp8/common/x86/recon_wrapper_sse2.c
media/libvpx/vp8/common/x86/recon_x86.h
media/libvpx/vp8/common/x86/sad_mmx.asm
media/libvpx/vp8/common/x86/sad_sse2.asm
media/libvpx/vp8/common/x86/sad_sse3.asm
media/libvpx/vp8/common/x86/sad_sse4.asm
media/libvpx/vp8/common/x86/sad_ssse3.asm
media/libvpx/vp8/common/x86/subpixel_mmx.asm
media/libvpx/vp8/common/x86/subpixel_sse2.asm
media/libvpx/vp8/common/x86/subpixel_ssse3.asm
media/libvpx/vp8/common/x86/subpixel_x86.h
media/libvpx/vp8/common/x86/variance_impl_mmx.asm
media/libvpx/vp8/common/x86/variance_impl_sse2.asm
media/libvpx/vp8/common/x86/variance_impl_ssse3.asm
media/libvpx/vp8/common/x86/variance_mmx.c
media/libvpx/vp8/common/x86/variance_sse2.c
media/libvpx/vp8/common/x86/variance_ssse3.c
media/libvpx/vp8/common/x86/variance_x86.h
media/libvpx/vp8/common/x86/vp8_asm_stubs.c
media/libvpx/vp8/common/x86/x86_systemdependent.c
media/libvpx/vp8/decoder/arm/arm_dsystemdependent.c
media/libvpx/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
media/libvpx/vp8/decoder/arm/armv6/dequant_idct_v6.asm
media/libvpx/vp8/decoder/arm/armv6/dequantize_v6.asm
media/libvpx/vp8/decoder/arm/armv6/idct_blk_v6.c
media/libvpx/vp8/decoder/arm/dequantize_arm.c
media/libvpx/vp8/decoder/arm/dequantize_arm.h
media/libvpx/vp8/decoder/arm/neon/dequant_idct_neon.asm
media/libvpx/vp8/decoder/arm/neon/dequantizeb_neon.asm
media/libvpx/vp8/decoder/arm/neon/idct_blk_neon.c
media/libvpx/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
media/libvpx/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
media/libvpx/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
media/libvpx/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
media/libvpx/vp8/decoder/asm_dec_offsets.c
media/libvpx/vp8/decoder/dboolhuff.h
media/libvpx/vp8/decoder/decodemv.c
media/libvpx/vp8/decoder/decodframe.c
media/libvpx/vp8/decoder/dequantize.c
media/libvpx/vp8/decoder/dequantize.h
media/libvpx/vp8/decoder/detokenize.c
media/libvpx/vp8/decoder/error_concealment.c
media/libvpx/vp8/decoder/generic/dsystemdependent.c
media/libvpx/vp8/decoder/idct_blk.c
media/libvpx/vp8/decoder/onyxd_if.c
media/libvpx/vp8/decoder/onyxd_int.h
media/libvpx/vp8/decoder/reconintra_mt.c
media/libvpx/vp8/decoder/reconintra_mt.h
media/libvpx/vp8/decoder/threading.c
media/libvpx/vp8/decoder/x86/dequantize_mmx.asm
media/libvpx/vp8/decoder/x86/dequantize_x86.h
media/libvpx/vp8/decoder/x86/idct_blk_mmx.c
media/libvpx/vp8/decoder/x86/idct_blk_sse2.c
media/libvpx/vp8/decoder/x86/x86_dsystemdependent.c
media/libvpx/vp8/encoder/arm/arm_csystemdependent.c
media/libvpx/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
media/libvpx/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
media/libvpx/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
media/libvpx/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_fast_fdct4x4_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
media/libvpx/vp8/encoder/arm/armv6/walsh_v6.asm
media/libvpx/vp8/encoder/arm/boolhuff_arm.c
media/libvpx/vp8/encoder/arm/dct_arm.c
media/libvpx/vp8/encoder/arm/dct_arm.h
media/libvpx/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
media/libvpx/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm
media/libvpx/vp8/encoder/arm/neon/picklpf_arm.c
media/libvpx/vp8/encoder/arm/neon/sad16_neon.asm
media/libvpx/vp8/encoder/arm/neon/sad8_neon.asm
media/libvpx/vp8/encoder/arm/neon/shortfdct_neon.asm
media/libvpx/vp8/encoder/arm/neon/subtract_neon.asm
media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
media/libvpx/vp8/encoder/arm/picklpf_arm.c
media/libvpx/vp8/encoder/arm/quantize_arm.c
media/libvpx/vp8/encoder/arm/variance_arm.c
media/libvpx/vp8/encoder/arm/variance_arm.h
media/libvpx/vp8/encoder/asm_enc_offsets.c
media/libvpx/vp8/encoder/bitstream.c
media/libvpx/vp8/encoder/bitstream.h
media/libvpx/vp8/encoder/block.h
media/libvpx/vp8/encoder/boolhuff.c
media/libvpx/vp8/encoder/boolhuff.h
media/libvpx/vp8/encoder/defaultcoefcounts.h
media/libvpx/vp8/encoder/encodeframe.c
media/libvpx/vp8/encoder/encodeintra.c
media/libvpx/vp8/encoder/encodemb.c
media/libvpx/vp8/encoder/encodemb.h
media/libvpx/vp8/encoder/encodemv.c
media/libvpx/vp8/encoder/ethreading.c
media/libvpx/vp8/encoder/firstpass.c
media/libvpx/vp8/encoder/generic/csystemdependent.c
media/libvpx/vp8/encoder/lookahead.c
media/libvpx/vp8/encoder/lookahead.h
media/libvpx/vp8/encoder/mcomp.c
media/libvpx/vp8/encoder/mcomp.h
media/libvpx/vp8/encoder/mr_dissim.c
media/libvpx/vp8/encoder/mr_dissim.h
media/libvpx/vp8/encoder/onyx_if.c
media/libvpx/vp8/encoder/onyx_int.h
media/libvpx/vp8/encoder/pickinter.c
media/libvpx/vp8/encoder/pickinter.h
media/libvpx/vp8/encoder/picklpf.c
media/libvpx/vp8/encoder/quantize.c
media/libvpx/vp8/encoder/quantize.h
media/libvpx/vp8/encoder/ratectrl.c
media/libvpx/vp8/encoder/rdopt.c
media/libvpx/vp8/encoder/rdopt.h
media/libvpx/vp8/encoder/sad_c.c
media/libvpx/vp8/encoder/temporal_filter.c
media/libvpx/vp8/encoder/tokenize.c
media/libvpx/vp8/encoder/treewriter.h
media/libvpx/vp8/encoder/variance.h
media/libvpx/vp8/encoder/variance_c.c
media/libvpx/vp8/encoder/x86/encodeopt.asm
media/libvpx/vp8/encoder/x86/quantize_sse2.asm
media/libvpx/vp8/encoder/x86/quantize_sse4.asm
media/libvpx/vp8/encoder/x86/quantize_ssse3.asm
media/libvpx/vp8/encoder/x86/sad_mmx.asm
media/libvpx/vp8/encoder/x86/sad_sse2.asm
media/libvpx/vp8/encoder/x86/sad_sse3.asm
media/libvpx/vp8/encoder/x86/sad_sse4.asm
media/libvpx/vp8/encoder/x86/sad_ssse3.asm
media/libvpx/vp8/encoder/x86/subtract_mmx.asm
media/libvpx/vp8/encoder/x86/subtract_sse2.asm
media/libvpx/vp8/encoder/x86/temporal_filter_apply_sse2.asm
media/libvpx/vp8/encoder/x86/variance_impl_mmx.asm
media/libvpx/vp8/encoder/x86/variance_impl_sse2.asm
media/libvpx/vp8/encoder/x86/variance_impl_ssse3.asm
media/libvpx/vp8/encoder/x86/variance_mmx.c
media/libvpx/vp8/encoder/x86/variance_sse2.c
media/libvpx/vp8/encoder/x86/variance_ssse3.c
media/libvpx/vp8/encoder/x86/variance_x86.h
media/libvpx/vp8/encoder/x86/x86_csystemdependent.c
media/libvpx/vp8/vp8_cx_iface.c
media/libvpx/vp8/vp8_dx_iface.c
media/libvpx/vpx/internal/vpx_codec_internal.h
media/libvpx/vpx/src/vpx_decoder.c
media/libvpx/vpx/src/vpx_encoder.c
media/libvpx/vpx/src/vpx_image.c
media/libvpx/vpx/vp8.h
media/libvpx/vpx/vp8cx.h
media/libvpx/vpx/vp8dx.h
media/libvpx/vpx/vpx_decoder.h
media/libvpx/vpx/vpx_encoder.h
media/libvpx/vpx/vpx_image.h
media/libvpx/vpx/vpx_integer.h
media/libvpx/vpx_config.h
media/libvpx/vpx_config_arm-linux-gcc.c
media/libvpx/vpx_config_arm-linux-gcc.h
media/libvpx/vpx_config_generic-gnu.c
media/libvpx/vpx_config_generic-gnu.h
media/libvpx/vpx_config_x86-darwin9-gcc.asm
media/libvpx/vpx_config_x86-darwin9-gcc.c
media/libvpx/vpx_config_x86-darwin9-gcc.h
media/libvpx/vpx_config_x86-linux-gcc.asm
media/libvpx/vpx_config_x86-linux-gcc.c
media/libvpx/vpx_config_x86-linux-gcc.h
media/libvpx/vpx_config_x86-win32-vs8.asm
media/libvpx/vpx_config_x86-win32-vs8.c
media/libvpx/vpx_config_x86-win32-vs8.h
media/libvpx/vpx_config_x86_64-darwin9-gcc.asm
media/libvpx/vpx_config_x86_64-darwin9-gcc.c
media/libvpx/vpx_config_x86_64-darwin9-gcc.h
media/libvpx/vpx_config_x86_64-linux-gcc.asm
media/libvpx/vpx_config_x86_64-linux-gcc.c
media/libvpx/vpx_config_x86_64-linux-gcc.h
media/libvpx/vpx_config_x86_64-win64-vs8.asm
media/libvpx/vpx_config_x86_64-win64-vs8.c
media/libvpx/vpx_config_x86_64-win64-vs8.h
media/libvpx/vpx_mem/include/vpx_mem_intrnl.h
media/libvpx/vpx_ports/arm.h
media/libvpx/vpx_ports/arm_cpudetect.c
media/libvpx/vpx_ports/config.h
media/libvpx/vpx_ports/x86.h
media/libvpx/vpx_scale/arm/arm_scalesystemdependent.c
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
media/libvpx/vpx_scale/arm/neon/yv12extend_arm.c
media/libvpx/vpx_scale/arm/yv12extend_arm.h
media/libvpx/vpx_scale/generic/scalesystemdependent.c
media/libvpx/vpx_scale/generic/vpxscale.c
media/libvpx/vpx_scale/generic/yv12extend.c
media/libvpx/vpx_scale/generic/yv12extend_generic.h
media/libvpx/vpx_scale/vpxscale.h
media/libvpx/vpx_scale/yv12extend.h
media/libvpx/vpx_version.h
--- a/configure.in
+++ b/configure.in
@@ -305,16 +305,17 @@ MOZ_ARG_WITH_STRING(gonk-toolchain-prefi
 [  --with-gonk-toolchain-prefix=DIR
                           prefix to gonk toolchain commands],
     gonk_toolchain_prefix=$withval)
 
 if test -n "$gonkdir" ; then
     kernel_name=`uname -s | tr "[[:upper:]]" "[[:lower:]]"`
     android_source="$gonkdir"
     ANDROID_SOURCE="$android_source"
+    ANDROID_NDK="${ANDROID_SOURCE}/ndk"
 
     dnl set up compilers
     AS="$gonk_toolchain_prefix"as
     CC="$gonk_toolchain_prefix"gcc
     CXX="$gonk_toolchain_prefix"g++
     CPP="$gonk_toolchain_prefix"cpp
     LD="$gonk_toolchain_prefix"ld
     AR="$gonk_toolchain_prefix"ar
new file mode 100644
--- /dev/null
+++ b/media/libvpx/I1bad27ea.patch
@@ -0,0 +1,624 @@
+# HG changeset patch
+# Parent 5a1a0398f8503451582602525c3e7b35def5d0b9
+# User Timothy B. Terriberry <tterribe@vt.edu>
+Fix variance overflow
+
+Upstream Change-Id: I1bad27ea0720067def6d71a6da5f789508cec265
+
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
++++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+@@ -139,16 +139,16 @@ loop
+     subs    r12, r12, #1
+ 
+     bne     loop
+ 
+     ; return stuff
+     ldr     r6, [sp, #40]       ; get address of sse
+     mul     r0, r8, r8          ; sum * sum
+     str     r11, [r6]           ; store sse
+-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
++    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
+ 
+     ldmfd   sp!, {r4-r12, pc}
+ 
+     ENDP
+ 
+     END
+ 
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
++++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+@@ -164,17 +164,17 @@ loop
+     subs    r12, r12, #1
+ 
+     bne     loop
+ 
+     ; return stuff
+     ldr     r6, [sp, #40]       ; get address of sse
+     mul     r0, r8, r8          ; sum * sum
+     str     r11, [r6]           ; store sse
+-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
++    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
+ 
+     ldmfd   sp!, {r4-r12, pc}
+ 
+     ENDP
+ 
+ c80808080
+     DCD     0x80808080
+ 
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
++++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+@@ -205,17 +205,17 @@ loop
+     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
+ 
+     bne     loop
+ 
+     ; return stuff
+     ldr     r6, [sp, #40]       ; get address of sse
+     mul     r0, r8, r8          ; sum * sum
+     str     r11, [r6]           ; store sse
+-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
++    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
+ 
+     ldmfd   sp!, {r4-r12, pc}
+ 
+     ENDP
+ 
+ c80808080
+     DCD     0x80808080
+ 
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
++++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+@@ -166,17 +166,17 @@ loop
+     subs    r12, r12, #1
+ 
+     bne     loop
+ 
+     ; return stuff
+     ldr     r6, [sp, #40]       ; get address of sse
+     mul     r0, r8, r8          ; sum * sum
+     str     r11, [r6]           ; store sse
+-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
++    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
+ 
+     ldmfd   sp!, {r4-r12, pc}
+ 
+     ENDP
+ 
+ c80808080
+     DCD     0x80808080
+ 
+diff --git a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
+--- a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
++++ b/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
+@@ -72,24 +72,24 @@ variance16x16_neon_loop
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     ;vmov.32        r0, d0[0]                   ;this instruction costs a lot
+     ;vmov.32        r1, d1[0]
+     ;mul            r0, r0, r0
+     ;str            r1, [r12]
+-    ;sub            r0, r1, r0, asr #8
++    ;sub            r0, r1, r0, lsr #8
+ 
+-    ;sum is in [-255x256, 255x256]. sumxsum is 32-bit. Shift to right should
+-    ;have sign-bit exension, which is vshr.s. Have to use s32 to make it right.
++    ; while sum is signed, sum * sum is always positive and must be treated as
++    ; unsigned to avoid propagating the sign bit.
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [r12]              ;store sse
+-    vshr.s32        d10, d10, #8
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #8
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     bx              lr
+ 
+     ENDP
+ 
+ ;================================
+ ;unsigned int vp8_variance16x8_c(
+@@ -140,18 +140,18 @@ variance16x8_neon_loop
+     ldr             r12, [sp]                   ;load *sse from stack
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [r12]              ;store sse
+-    vshr.s32        d10, d10, #7
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #7
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     bx              lr
+ 
+     ENDP
+ 
+ ;=================================
+ ;unsigned int vp8_variance8x16_c(
+@@ -195,18 +195,18 @@ variance8x16_neon_loop
+     ldr             r12, [sp]                   ;load *sse from stack
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [r12]              ;store sse
+-    vshr.s32        d10, d10, #7
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #7
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     bx              lr
+ 
+     ENDP
+ 
+ ;==================================
+ ; r0    unsigned char *src_ptr
+@@ -260,17 +260,17 @@ variance8x8_neon_loop
+     ldr             r12, [sp]                   ;load *sse from stack
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [r12]              ;store sse
+-    vshr.s32        d10, d10, #6
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #6
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     bx              lr
+ 
+     ENDP
+ 
+     END
+diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
++++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+@@ -400,18 +400,18 @@ sub_pixel_variance16x16_neon_loop
+     vpaddl.s32      q0, q8                      ;accumulate sum
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [r6]               ;store sse
+-    vshr.s32        d10, d10, #8
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #8
++    vsub.u32        d0, d1, d10
+ 
+     add             sp, sp, #528
+     vmov.32         r0, d0[0]                   ;return
+ 
+     pop             {r4-r6,pc}
+ 
+     ENDP
+ 
+diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
++++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+@@ -107,18 +107,18 @@ vp8_filt_fpo16x16s_4_0_loop_neon
+     vpaddl.s32      q0, q8                      ;accumulate sum
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [lr]               ;store sse
+-    vshr.s32        d10, d10, #8
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #8
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     pop             {pc}
+     ENDP
+ 
+ ;================================================
+ ;unsigned int vp8_variance_halfpixvar16x16_v_neon
+ ;(
+@@ -203,18 +203,18 @@ vp8_filt_spo16x16s_0_4_loop_neon
+     vpaddl.s32      q0, q8                      ;accumulate sum
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [lr]               ;store sse
+-    vshr.s32        d10, d10, #8
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #8
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     pop             {pc}
+     ENDP
+ 
+ ;================================================
+ ;unsigned int vp8_variance_halfpixvar16x16_hv_neon
+ ;(
+@@ -322,18 +322,18 @@ vp8_filt16x16s_4_4_loop_neon
+     vpaddl.s32      q0, q13                      ;accumulate sum
+ 
+     vpaddl.u32      q1, q15
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [lr]               ;store sse
+-    vshr.s32        d10, d10, #8
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #8
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     pop             {pc}
+     ENDP
+ 
+ ;==============================
+ ; r0    unsigned char  *src_ptr,
+ ; r1    int  src_pixels_per_line,
+@@ -555,18 +555,18 @@ sub_pixel_variance16x16s_neon_loop
+     vpaddl.s32      q0, q8                      ;accumulate sum
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [lr]               ;store sse
+-    vshr.s32        d10, d10, #8
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #8
++    vsub.u32        d0, d1, d10
+ 
+     add             sp, sp, #256
+     vmov.32         r0, d0[0]                   ;return
+ 
+     pop             {r4, pc}
+     ENDP
+ 
+     END
+diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+--- a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
++++ b/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+@@ -201,18 +201,18 @@ sub_pixel_variance8x8_neon_loop
+     vpaddl.s32      q0, q8                      ;accumulate sum
+ 
+     vpaddl.u32      q1, q10
+     vadd.s64        d0, d0, d1
+     vadd.u64        d1, d2, d3
+ 
+     vmull.s32       q5, d0, d0
+     vst1.32         {d1[0]}, [lr]               ;store sse
+-    vshr.s32        d10, d10, #6
+-    vsub.s32        d0, d1, d10
++    vshr.u32        d10, d10, #6
++    vsub.u32        d0, d1, d10
+ 
+     vmov.32         r0, d0[0]                   ;return
+     pop             {r4-r5, pc}
+ 
+     ENDP
+ 
+ ;-----------------
+ 
+diff --git a/media/libvpx/vp8/encoder/variance_c.c b/media/libvpx/vp8/encoder/variance_c.c
+--- a/media/libvpx/vp8/encoder/variance_c.c
++++ b/media/libvpx/vp8/encoder/variance_c.c
+@@ -70,82 +70,82 @@ unsigned int vp8_variance16x16_c(
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+ 
+     variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
+     *sse = var;
+-    return (var - ((avg * avg) >> 8));
++    return (var - ((unsigned int)(avg * avg) >> 8));
+ }
+ 
+ unsigned int vp8_variance8x16_c(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+ 
+     variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
+     *sse = var;
+-    return (var - ((avg * avg) >> 7));
++    return (var - ((unsigned int)(avg * avg) >> 7));
+ }
+ 
+ unsigned int vp8_variance16x8_c(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+ 
+     variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
+     *sse = var;
+-    return (var - ((avg * avg) >> 7));
++    return (var - ((unsigned int)(avg * avg) >> 7));
+ }
+ 
+ 
+ unsigned int vp8_variance8x8_c(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+ 
+     variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
+     *sse = var;
+-    return (var - ((avg * avg) >> 6));
++    return (var - ((unsigned int)(avg * avg) >> 6));
+ }
+ 
+ unsigned int vp8_variance4x4_c(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+ 
+     variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
+     *sse = var;
+-    return (var - ((avg * avg) >> 4));
++    return (var - ((unsigned int)(avg * avg) >> 4));
+ }
+ 
+ 
+ unsigned int vp8_mse16x16_c(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+diff --git a/media/libvpx/vp8/encoder/x86/variance_mmx.c b/media/libvpx/vp8/encoder/x86/variance_mmx.c
+--- a/media/libvpx/vp8/encoder/x86/variance_mmx.c
++++ b/media/libvpx/vp8/encoder/x86/variance_mmx.c
+@@ -86,34 +86,34 @@ unsigned int vp8_variance4x4_mmx(
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
+     *sse = var;
+-    return (var - ((avg * avg) >> 4));
++    return (var - ((unsigned int)(avg * avg) >> 4));
+ 
+ }
+ 
+ unsigned int vp8_variance8x8_mmx(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
+     *sse = var;
+ 
+-    return (var - ((avg * avg) >> 6));
++    return (var - ((unsigned int)(avg * avg) >> 6));
+ 
+ }
+ 
+ unsigned int vp8_mse16x16_mmx(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+@@ -148,17 +148,17 @@ unsigned int vp8_variance16x16_mmx(
+     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
+     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
+     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
+ 
+     var = sse0 + sse1 + sse2 + sse3;
+     avg = sum0 + sum1 + sum2 + sum3;
+     *sse = var;
+-    return (var - ((avg * avg) >> 8));
++    return (var - ((unsigned int)(avg * avg) >> 8));
+ }
+ 
+ unsigned int vp8_variance16x8_mmx(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+     unsigned int *sse)
+@@ -167,17 +167,17 @@ unsigned int vp8_variance16x8_mmx(
+     int sum0, sum1, avg;
+ 
+     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
+     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+ 
+     var = sse0 + sse1;
+     avg = sum0 + sum1;
+     *sse = var;
+-    return (var - ((avg * avg) >> 7));
++    return (var - ((unsigned int)(avg * avg) >> 7));
+ 
+ }
+ 
+ 
+ unsigned int vp8_variance8x16_mmx(
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+@@ -189,17 +189,17 @@ unsigned int vp8_variance8x16_mmx(
+ 
+     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
+     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
+ 
+     var = sse0 + sse1;
+     avg = sum0 + sum1;
+     *sse = var;
+ 
+-    return (var - ((avg * avg) >> 7));
++    return (var - ((unsigned int)(avg * avg) >> 7));
+ 
+ }
+ 
+ 
+ unsigned int vp8_sub_pixel_variance4x4_mmx
+ (
+     const unsigned char  *src_ptr,
+     int  src_pixels_per_line,
+diff --git a/media/libvpx/vp8/encoder/x86/variance_sse2.c b/media/libvpx/vp8/encoder/x86/variance_sse2.c
+--- a/media/libvpx/vp8/encoder/x86/variance_sse2.c
++++ b/media/libvpx/vp8/encoder/x86/variance_sse2.c
+@@ -143,34 +143,34 @@ unsigned int vp8_variance4x4_wmt(
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
+     *sse = var;
+-    return (var - ((avg * avg) >> 4));
++    return (var - ((unsigned int)(avg * avg) >> 4));
+ 
+ }
+ 
+ unsigned int vp8_variance8x8_wmt
+ (
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+     int  recon_stride,
+     unsigned int *sse)
+ {
+     unsigned int var;
+     int avg;
+ 
+     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
+     *sse = var;
+-    return (var - ((avg * avg) >> 6));
++    return (var - ((unsigned int)(avg * avg) >> 6));
+ 
+ }
+ 
+ 
+ unsigned int vp8_variance16x16_wmt
+ (
+     const unsigned char *src_ptr,
+     int  source_stride,
+@@ -215,17 +215,17 @@ unsigned int vp8_variance16x8_wmt
+     int sum0, sum1, avg;
+ 
+     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
+     vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+ 
+     var = sse0 + sse1;
+     avg = sum0 + sum1;
+     *sse = var;
+-    return (var - ((avg * avg) >> 7));
++    return (var - ((unsigned int)(avg * avg) >> 7));
+ 
+ }
+ 
+ unsigned int vp8_variance8x16_wmt
+ (
+     const unsigned char *src_ptr,
+     int  source_stride,
+     const unsigned char *ref_ptr,
+@@ -236,17 +236,17 @@ unsigned int vp8_variance8x16_wmt
+     int sum0, sum1, avg;
+ 
+     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
+     vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
+ 
+     var = sse0 + sse1;
+     avg = sum0 + sum1;
+     *sse = var;
+-    return (var - ((avg * avg) >> 7));
++    return (var - ((unsigned int)(avg * avg) >> 7));
+ 
+ }
+ 
+ unsigned int vp8_sub_pixel_variance4x4_wmt
+ (
+     const unsigned char  *src_ptr,
+     int  src_pixels_per_line,
+     int  xoffset,
+diff --git a/media/libvpx/vp8/encoder/x86/variance_ssse3.c b/media/libvpx/vp8/encoder/x86/variance_ssse3.c
+--- a/media/libvpx/vp8/encoder/x86/variance_ssse3.c
++++ b/media/libvpx/vp8/encoder/x86/variance_ssse3.c
+@@ -107,17 +107,17 @@ unsigned int vp8_sub_pixel_variance16x16
+         vp8_filter_block2d_bil_var_ssse3(
+             src_ptr, src_pixels_per_line,
+             dst_ptr, dst_pixels_per_line, 16,
+             xoffset, yoffset,
+             &xsum0, &xxsum0);
+     }
+ 
+     *sse = xxsum0;
+-    return (xxsum0 - ((xsum0 * xsum0) >> 8));
++    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8));
+ }
+ 
+ unsigned int vp8_sub_pixel_variance16x8_ssse3
+ (
+     const unsigned char  *src_ptr,
+     int  src_pixels_per_line,
+     int  xoffset,
+     int  yoffset,
+@@ -156,10 +156,10 @@ unsigned int vp8_sub_pixel_variance16x8_
+         vp8_filter_block2d_bil_var_ssse3(
+             src_ptr, src_pixels_per_line,
+             dst_ptr, dst_pixels_per_line, 8,
+             xoffset, yoffset,
+             &xsum0, &xxsum0);
+     }
+ 
+     *sse = xxsum0;
+-    return (xxsum0 - ((xsum0 * xsum0) >> 7));
++    return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7));
+ }
new file mode 100644
--- /dev/null
+++ b/media/libvpx/I256a37c6.patch
@@ -0,0 +1,1932 @@
+# HG changeset patch
+# Parent 677ab41568f1a8427e3e43a6ce9b0d7c822b1f7e
+# User Timothy B. Terriberry <tterribe@vt.edu>
+Move SAD and variance functions to common
+
+Upstream Change-Id: I256a37c6de079fe92ce744b1f11e16526d06b50a
+
+This patch contains substantial differences compared to the upstream
+one, as it still uses the old RTCD framework and does not include
+the extra short-circuiting work done in upstream change
+I05ce5b2d34e6d45fb3ec2a450aa99c4f3343bf3a.
+
+diff --git a/media/libvpx/vp8/common/arm/arm_systemdependent.c b/media/libvpx/vp8/common/arm/arm_systemdependent.c
+--- a/media/libvpx/vp8/common/arm/arm_systemdependent.c
++++ b/media/libvpx/vp8/common/arm/arm_systemdependent.c
+@@ -11,16 +11,17 @@
+ 
+ #include "vpx_config.h"
+ #include "vpx_ports/arm.h"
+ #include "vp8/common/pragmas.h"
+ #include "vp8/common/subpixel.h"
+ #include "vp8/common/loopfilter.h"
+ #include "vp8/common/recon.h"
+ #include "vp8/common/idct.h"
++#include "vp8/common/variance.h"
+ #include "vp8/common/onyxc_int.h"
+ 
+ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+ {
+ #if CONFIG_RUNTIME_CPU_DETECT
+     VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+     int flags = arm_cpu_caps();
+     rtcd->flags = flags;
+@@ -63,16 +64,41 @@ void vp8_arch_arm_common_init(VP8_COMMON
+         rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
+         rtcd->recon.intra4x4_predict = vp8_intra4x4_predict_armv6;
+ 
+         rtcd->dequant.block               = vp8_dequantize_b_v6;
+         rtcd->dequant.idct_add            = vp8_dequant_idct_add_v6;
+         rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
+         rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
+ 
++        rtcd->variance.sad16x16              = vp8_sad16x16_armv6;
++        /*rtcd->variance.sad16x8               = vp8_sad16x8_c;
++        rtcd->variance.sad8x16               = vp8_sad8x16_c;
++        rtcd->variance.sad8x8                = vp8_sad8x8_c;
++        rtcd->variance.sad4x4                = vp8_sad4x4_c;*/
++
++        /*rtcd->variance.var4x4                = vp8_variance4x4_c;*/
++        rtcd->variance.var8x8                = vp8_variance8x8_armv6;
++        /*rtcd->variance.var8x16               = vp8_variance8x16_c;
++        rtcd->variance.var16x8               = vp8_variance16x8_c;*/
++        rtcd->variance.var16x16              = vp8_variance16x16_armv6;
++
++        /*rtcd->variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
++        rtcd->variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_armv6;
++        /*rtcd->variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
++        rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
++        rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_armv6;
++        rtcd->variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_armv6;
++        rtcd->variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_armv6;
++        rtcd->variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_armv6;
++
++        rtcd->variance.mse16x16              = vp8_mse16x16_armv6;
++        /*rtcd->variance.getmbss               = vp8_get_mb_ss_c;*/
++
++        /*rtcd->variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
+     }
+ #endif
+ 
+ #if HAVE_ARMV7
+     if (flags & HAS_NEON)
+     {
+         rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
+         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
+@@ -103,13 +129,38 @@ void vp8_arch_arm_common_init(VP8_COMMON
+         rtcd->recon.build_intra_predictors_mby_s =
+             vp8_build_intra_predictors_mby_s_neon;
+ 
+         rtcd->dequant.block               = vp8_dequantize_b_neon;
+         rtcd->dequant.idct_add            = vp8_dequant_idct_add_neon;
+         rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
+         rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
+ 
++        rtcd->variance.sad16x16              = vp8_sad16x16_neon;
++        rtcd->variance.sad16x8               = vp8_sad16x8_neon;
++        rtcd->variance.sad8x16               = vp8_sad8x16_neon;
++        rtcd->variance.sad8x8                = vp8_sad8x8_neon;
++        rtcd->variance.sad4x4                = vp8_sad4x4_neon;
++
++        /*rtcd->variance.var4x4                = vp8_variance4x4_c;*/
++        rtcd->variance.var8x8                = vp8_variance8x8_neon;
++        rtcd->variance.var8x16               = vp8_variance8x16_neon;
++        rtcd->variance.var16x8               = vp8_variance16x8_neon;
++        rtcd->variance.var16x16              = vp8_variance16x16_neon;
++
++        /*rtcd->variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
++        rtcd->variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
++        /*rtcd->variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
++        rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
++        rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
++        rtcd->variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_neon;
++        rtcd->variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_neon;
++        rtcd->variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_neon;
++
++        rtcd->variance.mse16x16              = vp8_mse16x16_neon;
++        /*rtcd->variance.getmbss               = vp8_get_mb_ss_c;*/
++
++        rtcd->variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
+     }
+ #endif
+ 
+ #endif
+ }
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_mse16x16_armv6.asm
+rename from media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
+rename to media/libvpx/vp8/common/arm/armv6/vp8_mse16x16_armv6.asm
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm
+rename from media/libvpx/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
+rename to media/libvpx/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+rename to media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
+rename to media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+rename to media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+rename to media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+diff --git a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+rename to media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+diff --git a/media/libvpx/vp8/encoder/arm/neon/sad16_neon.asm b/media/libvpx/vp8/common/arm/neon/sad16_neon.asm
+rename from media/libvpx/vp8/encoder/arm/neon/sad16_neon.asm
+rename to media/libvpx/vp8/common/arm/neon/sad16_neon.asm
+diff --git a/media/libvpx/vp8/encoder/arm/neon/sad8_neon.asm b/media/libvpx/vp8/common/arm/neon/sad8_neon.asm
+rename from media/libvpx/vp8/encoder/arm/neon/sad8_neon.asm
+rename to media/libvpx/vp8/common/arm/neon/sad8_neon.asm
+diff --git a/media/libvpx/vp8/encoder/arm/neon/variance_neon.asm b/media/libvpx/vp8/common/arm/neon/variance_neon.asm
+rename from media/libvpx/vp8/encoder/arm/neon/variance_neon.asm
+rename to media/libvpx/vp8/common/arm/neon/variance_neon.asm
+diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_mse16x16_neon.asm
+rename from media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+rename to media/libvpx/vp8/common/arm/neon/vp8_mse16x16_neon.asm
+diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
+rename from media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+rename to media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
+diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+rename from media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+rename to media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+diff --git a/media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
+rename from media/libvpx/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+rename to media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
+diff --git a/media/libvpx/vp8/encoder/arm/variance_arm.c b/media/libvpx/vp8/common/arm/variance_arm.c
+rename from media/libvpx/vp8/encoder/arm/variance_arm.c
+rename to media/libvpx/vp8/common/arm/variance_arm.c
+--- a/media/libvpx/vp8/encoder/arm/variance_arm.c
++++ b/media/libvpx/vp8/common/arm/variance_arm.c
+@@ -4,17 +4,17 @@
+  *  Use of this source code is governed by a BSD-style license
+  *  that can be found in the LICENSE file in the root of the source
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ #include "vpx_config.h"
+-#include "vp8/encoder/variance.h"
++#include "vp8/common/variance.h"
+ #include "vp8/common/filter.h"
+ 
+ #if HAVE_ARMV6
+ #include "vp8/common/arm/bilinearfilter_arm.h"
+ 
+ unsigned int vp8_sub_pixel_variance8x8_armv6
+ (
+     const unsigned char  *src_ptr,
+diff --git a/media/libvpx/vp8/encoder/arm/variance_arm.h b/media/libvpx/vp8/common/arm/variance_arm.h
+rename from media/libvpx/vp8/encoder/arm/variance_arm.h
+rename to media/libvpx/vp8/common/arm/variance_arm.h
+diff --git a/media/libvpx/vp8/common/generic/systemdependent.c b/media/libvpx/vp8/common/generic/systemdependent.c
+--- a/media/libvpx/vp8/common/generic/systemdependent.c
++++ b/media/libvpx/vp8/common/generic/systemdependent.c
+@@ -9,16 +9,17 @@
+  */
+ 
+ 
+ #include "vpx_config.h"
+ #include "vp8/common/subpixel.h"
+ #include "vp8/common/loopfilter.h"
+ #include "vp8/common/recon.h"
+ #include "vp8/common/idct.h"
++#include "vp8/common/variance.h"
+ #include "vp8/common/onyxc_int.h"
+ 
+ #if CONFIG_MULTITHREAD
+ #if HAVE_UNISTD_H
+ #include <unistd.h>
+ #elif defined(_WIN32)
+ #include <windows.h>
+ typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
+@@ -110,16 +111,67 @@ void vp8_machine_specific_config(VP8_COM
+     rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_c;
+     rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
+     rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_c;
+     rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_c;
+     rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_c;
+     rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_c;
+     rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
+ 
++    rtcd->variance.sad16x16              = vp8_sad16x16_c;
++    rtcd->variance.sad16x8               = vp8_sad16x8_c;
++    rtcd->variance.sad8x16               = vp8_sad8x16_c;
++    rtcd->variance.sad8x8                = vp8_sad8x8_c;
++    rtcd->variance.sad4x4                = vp8_sad4x4_c;
++
++    rtcd->variance.sad16x16x3            = vp8_sad16x16x3_c;
++    rtcd->variance.sad16x8x3             = vp8_sad16x8x3_c;
++    rtcd->variance.sad8x16x3             = vp8_sad8x16x3_c;
++    rtcd->variance.sad8x8x3              = vp8_sad8x8x3_c;
++    rtcd->variance.sad4x4x3              = vp8_sad4x4x3_c;
++
++    rtcd->variance.sad16x16x8            = vp8_sad16x16x8_c;
++    rtcd->variance.sad16x8x8             = vp8_sad16x8x8_c;
++    rtcd->variance.sad8x16x8             = vp8_sad8x16x8_c;
++    rtcd->variance.sad8x8x8              = vp8_sad8x8x8_c;
++    rtcd->variance.sad4x4x8              = vp8_sad4x4x8_c;
++
++    rtcd->variance.sad16x16x4d           = vp8_sad16x16x4d_c;
++    rtcd->variance.sad16x8x4d            = vp8_sad16x8x4d_c;
++    rtcd->variance.sad8x16x4d            = vp8_sad8x16x4d_c;
++    rtcd->variance.sad8x8x4d             = vp8_sad8x8x4d_c;
++    rtcd->variance.sad4x4x4d             = vp8_sad4x4x4d_c;
++#if ARCH_X86 || ARCH_X86_64
++    rtcd->variance.copy32xn              = vp8_copy32xn_c;
++#endif
++    rtcd->variance.var4x4                = vp8_variance4x4_c;
++    rtcd->variance.var8x8                = vp8_variance8x8_c;
++    rtcd->variance.var8x16               = vp8_variance8x16_c;
++    rtcd->variance.var16x8               = vp8_variance16x8_c;
++    rtcd->variance.var16x16              = vp8_variance16x16_c;
++
++    rtcd->variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
++    rtcd->variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
++    rtcd->variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
++    rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
++    rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
++    rtcd->variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_c;
++    rtcd->variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_c;
++    rtcd->variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_c;
++    rtcd->variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_c;
++
++    rtcd->variance.mse16x16              = vp8_mse16x16_c;
++    rtcd->variance.getmbss               = vp8_get_mb_ss_c;
++
++    rtcd->variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
++#if CONFIG_INTERNAL_STATS
++    rtcd->variance.ssimpf_8x8            = vp8_ssim_parms_8x8_c;
++    rtcd->variance.ssimpf_16x16          = vp8_ssim_parms_16x16_c;
++#endif
++
+ #if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_INTERNAL_STATS)
+     rtcd->postproc.down             = vp8_mbpost_proc_down_c;
+     rtcd->postproc.across           = vp8_mbpost_proc_across_ip_c;
+     rtcd->postproc.downacross       = vp8_post_proc_down_and_across_c;
+     rtcd->postproc.addnoise         = vp8_plane_add_noise_c;
+     rtcd->postproc.blend_mb_inner   = vp8_blend_mb_inner_c;
+     rtcd->postproc.blend_mb_outer   = vp8_blend_mb_outer_c;
+     rtcd->postproc.blend_b          = vp8_blend_b_c;
+diff --git a/media/libvpx/vp8/common/onyxc_int.h b/media/libvpx/vp8/common/onyxc_int.h
+--- a/media/libvpx/vp8/common/onyxc_int.h
++++ b/media/libvpx/vp8/common/onyxc_int.h
+@@ -14,16 +14,17 @@
+ 
+ #include "vpx_config.h"
+ #include "vpx/internal/vpx_codec_internal.h"
+ #include "loopfilter.h"
+ #include "entropymv.h"
+ #include "entropy.h"
+ #include "idct.h"
+ #include "recon.h"
++#include "variance.h"
+ #if CONFIG_POSTPROC
+ #include "postproc.h"
+ #endif
+ #include "dequantize.h"
+ 
+ /*#ifdef PACKET_TESTING*/
+ #include "header.h"
+ /*#endif*/
+@@ -74,16 +75,17 @@ typedef enum
+ typedef struct VP8_COMMON_RTCD
+ {
+ #if CONFIG_RUNTIME_CPU_DETECT
+     vp8_dequant_rtcd_vtable_t        dequant;
+     vp8_idct_rtcd_vtable_t        idct;
+     vp8_recon_rtcd_vtable_t       recon;
+     vp8_subpix_rtcd_vtable_t      subpix;
+     vp8_loopfilter_rtcd_vtable_t  loopfilter;
++    vp8_variance_rtcd_vtable_t    variance;
+ #if CONFIG_POSTPROC
+     vp8_postproc_rtcd_vtable_t    postproc;
+ #endif
+     int                           flags;
+ #else
+     int unused;
+ #endif
+ } VP8_COMMON_RTCD;
+diff --git a/media/libvpx/vp8/common/postproc.c b/media/libvpx/vp8/common/postproc.c
+--- a/media/libvpx/vp8/common/postproc.c
++++ b/media/libvpx/vp8/common/postproc.c
+@@ -12,17 +12,17 @@
+ #include "vpx_config.h"
+ #include "vpx_scale/yv12config.h"
+ #include "postproc.h"
+ #include "common.h"
+ #include "recon.h"
+ #include "vpx_scale/yv12extend.h"
+ #include "vpx_scale/vpxscale.h"
+ #include "systemdependent.h"
+-#include "../encoder/variance.h"
++#include "variance.h"
+ 
+ #include <math.h>
+ #include <stdlib.h>
+ #include <stdio.h>
+ 
+ #define RGB_TO_YUV(t)                                                                       \
+     ( (0.257*(float)(t>>16)) + (0.504*(float)(t>>8&0xff)) + (0.098*(float)(t&0xff)) + 16),  \
+     (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
+diff --git a/media/libvpx/vp8/encoder/sad_c.c b/media/libvpx/vp8/common/sad_c.c
+rename from media/libvpx/vp8/encoder/sad_c.c
+rename to media/libvpx/vp8/common/sad_c.c
+diff --git a/media/libvpx/vp8/encoder/variance.h b/media/libvpx/vp8/common/variance.h
+rename from media/libvpx/vp8/encoder/variance.h
+rename to media/libvpx/vp8/common/variance.h
+--- a/media/libvpx/vp8/encoder/variance.h
++++ b/media/libvpx/vp8/common/variance.h
+@@ -78,31 +78,31 @@
+     ( \
+       const unsigned char  *src_ptr, \
+       int  source_stride, \
+       int  xoffset, \
+       int  yoffset, \
+       const unsigned char *ref_ptr, \
+       int Refstride, \
+       unsigned int *sse \
+-    );
++    )
+ 
+ #define prototype_ssimpf(sym) \
+     void (sym) \
+       ( \
+         unsigned char *s, \
+         int sp, \
+         unsigned char *r, \
+         int rp, \
+         unsigned long *sum_s, \
+         unsigned long *sum_r, \
+         unsigned long *sum_sq_s, \
+         unsigned long *sum_sq_r, \
+         unsigned long *sum_sxr \
+-      );
++      )
+ 
+ #define prototype_getmbss(sym) unsigned int (sym)(const short *)
+ 
+ #define prototype_get16x16prederror(sym)\
+     unsigned int (sym)\
+     (\
+      const unsigned char *src_ptr, \
+      int source_stride, \
+@@ -318,22 +318,22 @@ extern prototype_variance(vp8_variance_m
+ #ifndef vp8_variance_get4x4sse_cs
+ #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_c
+ #endif
+ extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs);
+ 
+ #ifndef vp8_ssimpf_8x8
+ #define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c
+ #endif
+-extern prototype_ssimpf(vp8_ssimpf_8x8)
++extern prototype_ssimpf(vp8_ssimpf_8x8);
+ 
+ #ifndef vp8_ssimpf_16x16
+ #define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c
+ #endif
+-extern prototype_ssimpf(vp8_ssimpf_16x16)
++extern prototype_ssimpf(vp8_ssimpf_16x16);
+ 
+ typedef prototype_sad(*vp8_sad_fn_t);
+ typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
+ typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
+ typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
+ typedef prototype_variance(*vp8_variance_fn_t);
+ typedef prototype_variance2(*vp8_variance2_fn_t);
+ typedef prototype_subpixvariance(*vp8_subpixvariance_fn_t);
+diff --git a/media/libvpx/vp8/encoder/variance_c.c b/media/libvpx/vp8/common/variance_c.c
+rename from media/libvpx/vp8/encoder/variance_c.c
+rename to media/libvpx/vp8/common/variance_c.c
+--- a/media/libvpx/vp8/encoder/variance_c.c
++++ b/media/libvpx/vp8/common/variance_c.c
+@@ -5,17 +5,17 @@
+  *  that can be found in the LICENSE file in the root of the source
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ 
+ #include "variance.h"
+-#include "vp8/common/filter.h"
++#include "filter.h"
+ 
+ 
+ unsigned int vp8_get_mb_ss_c
+ (
+     const short *src_ptr
+ )
+ {
+     unsigned int i = 0, sum = 0;
+@@ -451,8 +451,34 @@ unsigned int vp8_sub_pixel_variance8x16_
+     VFilter = vp8_bilinear_filters[yoffset];
+ 
+ 
+     var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
+     var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
+ 
+     return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+ }
++
++unsigned int vp8_get4x4sse_cs_c
++(
++    const unsigned char *src_ptr,
++    int  source_stride,
++    const unsigned char *ref_ptr,
++    int  recon_stride
++)
++{
++    int distortion = 0;
++    int r, c;
++
++    for (r = 0; r < 4; r++)
++    {
++        for (c = 0; c < 4; c++)
++        {
++            int diff = src_ptr[c] - ref_ptr[c];
++            distortion += diff * diff;
++        }
++
++        src_ptr += source_stride;
++        ref_ptr += recon_stride;
++    }
++
++    return distortion;
++}
+diff --git a/media/libvpx/vp8/encoder/x86/sad_mmx.asm b/media/libvpx/vp8/common/x86/sad_mmx.asm
+rename from media/libvpx/vp8/encoder/x86/sad_mmx.asm
+rename to media/libvpx/vp8/common/x86/sad_mmx.asm
+diff --git a/media/libvpx/vp8/encoder/x86/sad_sse2.asm b/media/libvpx/vp8/common/x86/sad_sse2.asm
+rename from media/libvpx/vp8/encoder/x86/sad_sse2.asm
+rename to media/libvpx/vp8/common/x86/sad_sse2.asm
+diff --git a/media/libvpx/vp8/encoder/x86/sad_sse3.asm b/media/libvpx/vp8/common/x86/sad_sse3.asm
+rename from media/libvpx/vp8/encoder/x86/sad_sse3.asm
+rename to media/libvpx/vp8/common/x86/sad_sse3.asm
+diff --git a/media/libvpx/vp8/encoder/x86/sad_sse4.asm b/media/libvpx/vp8/common/x86/sad_sse4.asm
+rename from media/libvpx/vp8/encoder/x86/sad_sse4.asm
+rename to media/libvpx/vp8/common/x86/sad_sse4.asm
+diff --git a/media/libvpx/vp8/encoder/x86/sad_ssse3.asm b/media/libvpx/vp8/common/x86/sad_ssse3.asm
+rename from media/libvpx/vp8/encoder/x86/sad_ssse3.asm
+rename to media/libvpx/vp8/common/x86/sad_ssse3.asm
+diff --git a/media/libvpx/vp8/encoder/x86/variance_impl_mmx.asm b/media/libvpx/vp8/common/x86/variance_impl_mmx.asm
+rename from media/libvpx/vp8/encoder/x86/variance_impl_mmx.asm
+rename to media/libvpx/vp8/common/x86/variance_impl_mmx.asm
+diff --git a/media/libvpx/vp8/encoder/x86/variance_impl_sse2.asm b/media/libvpx/vp8/common/x86/variance_impl_sse2.asm
+rename from media/libvpx/vp8/encoder/x86/variance_impl_sse2.asm
+rename to media/libvpx/vp8/common/x86/variance_impl_sse2.asm
+diff --git a/media/libvpx/vp8/encoder/x86/variance_impl_ssse3.asm b/media/libvpx/vp8/common/x86/variance_impl_ssse3.asm
+rename from media/libvpx/vp8/encoder/x86/variance_impl_ssse3.asm
+rename to media/libvpx/vp8/common/x86/variance_impl_ssse3.asm
+diff --git a/media/libvpx/vp8/encoder/x86/variance_mmx.c b/media/libvpx/vp8/common/x86/variance_mmx.c
+rename from media/libvpx/vp8/encoder/x86/variance_mmx.c
+rename to media/libvpx/vp8/common/x86/variance_mmx.c
+--- a/media/libvpx/vp8/encoder/x86/variance_mmx.c
++++ b/media/libvpx/vp8/common/x86/variance_mmx.c
+@@ -4,17 +4,17 @@
+  *  Use of this source code is governed by a BSD-style license
+  *  that can be found in the LICENSE file in the root of the source
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ #include "vpx_config.h"
+-#include "vp8/encoder/variance.h"
++#include "vp8/common/variance.h"
+ #include "vp8/common/pragmas.h"
+ #include "vpx_ports/mem.h"
+ #include "vp8/common/x86/filter_x86.h"
+ 
+ extern void filter_block1d_h6_mmx
+ (
+     const unsigned char *src_ptr,
+     unsigned short *output_ptr,
+diff --git a/media/libvpx/vp8/encoder/x86/variance_sse2.c b/media/libvpx/vp8/common/x86/variance_sse2.c
+rename from media/libvpx/vp8/encoder/x86/variance_sse2.c
+rename to media/libvpx/vp8/common/x86/variance_sse2.c
+--- a/media/libvpx/vp8/encoder/x86/variance_sse2.c
++++ b/media/libvpx/vp8/common/x86/variance_sse2.c
+@@ -4,17 +4,17 @@
+  *  Use of this source code is governed by a BSD-style license
+  *  that can be found in the LICENSE file in the root of the source
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ #include "vpx_config.h"
+-#include "vp8/encoder/variance.h"
++#include "vp8/common/variance.h"
+ #include "vp8/common/pragmas.h"
+ #include "vpx_ports/mem.h"
+ #include "vp8/common/x86/filter_x86.h"
+ 
+ extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+ extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+ extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+ extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+diff --git a/media/libvpx/vp8/encoder/x86/variance_ssse3.c b/media/libvpx/vp8/common/x86/variance_ssse3.c
+rename from media/libvpx/vp8/encoder/x86/variance_ssse3.c
+rename to media/libvpx/vp8/common/x86/variance_ssse3.c
+--- a/media/libvpx/vp8/encoder/x86/variance_ssse3.c
++++ b/media/libvpx/vp8/common/x86/variance_ssse3.c
+@@ -4,17 +4,17 @@
+  *  Use of this source code is governed by a BSD-style license
+  *  that can be found in the LICENSE file in the root of the source
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ #include "vpx_config.h"
+-#include "vp8/encoder/variance.h"
++#include "vp8/common/variance.h"
+ #include "vp8/common/pragmas.h"
+ #include "vpx_ports/mem.h"
+ 
+ extern unsigned int vp8_get16x16var_sse2
+ (
+     const unsigned char *src_ptr,
+     int source_stride,
+     const unsigned char *ref_ptr,
+diff --git a/media/libvpx/vp8/encoder/x86/variance_x86.h b/media/libvpx/vp8/common/x86/variance_x86.h
+rename from media/libvpx/vp8/encoder/x86/variance_x86.h
+rename to media/libvpx/vp8/common/x86/variance_x86.h
+--- a/media/libvpx/vp8/encoder/x86/variance_x86.h
++++ b/media/libvpx/vp8/common/x86/variance_x86.h
+@@ -135,18 +135,18 @@ extern prototype_subpixvariance(vp8_sub_
+ extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt);
+ extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt);
+ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
+ extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
+ extern prototype_getmbss(vp8_get_mb_ss_sse2);
+ extern prototype_variance(vp8_mse16x16_wmt);
+ extern prototype_variance2(vp8_get8x8var_sse2);
+ extern prototype_variance2(vp8_get16x16var_sse2);
+-extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2)
+-extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2)
++extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2);
++extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2);
+ 
+ #if !CONFIG_RUNTIME_CPU_DETECT
+ #undef  vp8_variance_sad4x4
+ #define vp8_variance_sad4x4 vp8_sad4x4_wmt
+ 
+ #undef  vp8_variance_sad8x8
+ #define vp8_variance_sad8x8 vp8_sad8x8_wmt
+ 
+diff --git a/media/libvpx/vp8/common/x86/x86_systemdependent.c b/media/libvpx/vp8/common/x86/x86_systemdependent.c
+--- a/media/libvpx/vp8/common/x86/x86_systemdependent.c
++++ b/media/libvpx/vp8/common/x86/x86_systemdependent.c
+@@ -10,16 +10,17 @@
+ 
+ 
+ #include "vpx_config.h"
+ #include "vpx_ports/x86.h"
+ #include "vp8/common/subpixel.h"
+ #include "vp8/common/loopfilter.h"
+ #include "vp8/common/recon.h"
+ #include "vp8/common/idct.h"
++#include "vp8/common/variance.h"
+ #include "vp8/common/pragmas.h"
+ #include "vp8/common/onyxc_int.h"
+ 
+ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
+ {
+ #if CONFIG_RUNTIME_CPU_DETECT
+     VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+     int flags = x86_simd_caps();
+@@ -62,16 +63,43 @@ void vp8_arch_x86_common_init(VP8_COMMON
+         rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_mmx;
+         rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
+         rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_mmx;
+         rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
+         rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_mmx;
+         rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
+         rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_mmx;
+ 
++        rtcd->variance.sad16x16              = vp8_sad16x16_mmx;
++        rtcd->variance.sad16x8               = vp8_sad16x8_mmx;
++        rtcd->variance.sad8x16               = vp8_sad8x16_mmx;
++        rtcd->variance.sad8x8                = vp8_sad8x8_mmx;
++        rtcd->variance.sad4x4                = vp8_sad4x4_mmx;
++
++        rtcd->variance.var4x4                = vp8_variance4x4_mmx;
++        rtcd->variance.var8x8                = vp8_variance8x8_mmx;
++        rtcd->variance.var8x16               = vp8_variance8x16_mmx;
++        rtcd->variance.var16x8               = vp8_variance16x8_mmx;
++        rtcd->variance.var16x16              = vp8_variance16x16_mmx;
++
++        rtcd->variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_mmx;
++        rtcd->variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_mmx;
++        rtcd->variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_mmx;
++        rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_mmx;
++        rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_mmx;
++        rtcd->variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_mmx;
++        rtcd->variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_mmx;
++        rtcd->variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_mmx;
++        rtcd->variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_mmx;
++
++        rtcd->variance.mse16x16              = vp8_mse16x16_mmx;
++        rtcd->variance.getmbss               = vp8_get_mb_ss_mmx;
++
++        rtcd->variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;
++
+ #if CONFIG_POSTPROC
+         rtcd->postproc.down        = vp8_mbpost_proc_down_mmx;
+         /*rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;*/
+         rtcd->postproc.downacross  = vp8_post_proc_down_and_across_mmx;
+         rtcd->postproc.addnoise    = vp8_plane_add_noise_mmx;
+ #endif
+     }
+ 
+@@ -105,26 +133,81 @@ void vp8_arch_x86_common_init(VP8_COMMON
+         rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_sse2;
+         rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
+         rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_sse2;
+         rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
+         rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_sse2;
+         rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
+         rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_sse2;
+ 
++        rtcd->variance.sad16x16              = vp8_sad16x16_wmt;
++        rtcd->variance.sad16x8               = vp8_sad16x8_wmt;
++        rtcd->variance.sad8x16               = vp8_sad8x16_wmt;
++        rtcd->variance.sad8x8                = vp8_sad8x8_wmt;
++        rtcd->variance.sad4x4                = vp8_sad4x4_wmt;
++        rtcd->variance.copy32xn              = vp8_copy32xn_sse2;
++
++        rtcd->variance.var4x4                = vp8_variance4x4_wmt;
++        rtcd->variance.var8x8                = vp8_variance8x8_wmt;
++        rtcd->variance.var8x16               = vp8_variance8x16_wmt;
++        rtcd->variance.var16x8               = vp8_variance16x8_wmt;
++        rtcd->variance.var16x16              = vp8_variance16x16_wmt;
++
++        rtcd->variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_wmt;
++        rtcd->variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_wmt;
++        rtcd->variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_wmt;
++        rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_wmt;
++        rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_wmt;
++        rtcd->variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_wmt;
++        rtcd->variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_wmt;
++        rtcd->variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_wmt;
++        rtcd->variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_wmt;
++
++        rtcd->variance.mse16x16              = vp8_mse16x16_wmt;
++        rtcd->variance.getmbss               = vp8_get_mb_ss_sse2;
++
++        /* rtcd->variance.get4x4sse_cs  not implemented for wmt */;
++
++#if CONFIG_INTERNAL_STATS
++#if ARCH_X86_64
++        rtcd->variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse2;
++        rtcd->variance.ssimpf_16x16          = vp8_ssim_parms_16x16_sse2;
++#endif
++#endif
++
+ #if CONFIG_POSTPROC
+         rtcd->postproc.down        = vp8_mbpost_proc_down_xmm;
+         rtcd->postproc.across      = vp8_mbpost_proc_across_ip_xmm;
+         rtcd->postproc.downacross  = vp8_post_proc_down_and_across_xmm;
+         rtcd->postproc.addnoise    = vp8_plane_add_noise_wmt;
+ #endif
+     }
+ 
+ #endif
+ 
++#if HAVE_SSE3
++
++    if (flags & HAS_SSE3)
++    {
++        rtcd->variance.sad16x16              = vp8_sad16x16_sse3;
++        rtcd->variance.sad16x16x3            = vp8_sad16x16x3_sse3;
++        rtcd->variance.sad16x8x3             = vp8_sad16x8x3_sse3;
++        rtcd->variance.sad8x16x3             = vp8_sad8x16x3_sse3;
++        rtcd->variance.sad8x8x3              = vp8_sad8x8x3_sse3;
++        rtcd->variance.sad4x4x3              = vp8_sad4x4x3_sse3;
++        rtcd->variance.sad16x16x4d           = vp8_sad16x16x4d_sse3;
++        rtcd->variance.sad16x8x4d            = vp8_sad16x8x4d_sse3;
++        rtcd->variance.sad8x16x4d            = vp8_sad8x16x4d_sse3;
++        rtcd->variance.sad8x8x4d             = vp8_sad8x8x4d_sse3;
++        rtcd->variance.sad4x4x4d             = vp8_sad4x4x4d_sse3;
++        rtcd->variance.copy32xn              = vp8_copy32xn_sse3;
++
++    }
++#endif
++
+ #if HAVE_SSSE3
+ 
+     if (flags & HAS_SSSE3)
+     {
+         rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
+         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
+         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
+         rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
+@@ -134,13 +217,30 @@ void vp8_arch_x86_common_init(VP8_COMMON
+         rtcd->recon.build_intra_predictors_mbuv =
+             vp8_build_intra_predictors_mbuv_ssse3;
+         rtcd->recon.build_intra_predictors_mbuv_s =
+             vp8_build_intra_predictors_mbuv_s_ssse3;
+         rtcd->recon.build_intra_predictors_mby =
+             vp8_build_intra_predictors_mby_ssse3;
+         rtcd->recon.build_intra_predictors_mby_s =
+             vp8_build_intra_predictors_mby_s_ssse3;
++
++        rtcd->variance.sad16x16x3            = vp8_sad16x16x3_ssse3;
++        rtcd->variance.sad16x8x3             = vp8_sad16x8x3_ssse3;
++
++        rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ssse3;
++        rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3;
++    }
++#endif
++
++#if HAVE_SSE4_1
++    if (flags & HAS_SSE4_1)
++    {
++        rtcd->variance.sad16x16x8            = vp8_sad16x16x8_sse4;
++        rtcd->variance.sad16x8x8             = vp8_sad16x8x8_sse4;
++        rtcd->variance.sad8x16x8             = vp8_sad8x16x8_sse4;
++        rtcd->variance.sad8x8x8              = vp8_sad8x8x8_sse4;
++        rtcd->variance.sad4x4x8              = vp8_sad4x4x8_sse4;
+     }
+ #endif
+ 
+ #endif
+ }
+diff --git a/media/libvpx/vp8/encoder/arm/arm_csystemdependent.c b/media/libvpx/vp8/encoder/arm/arm_csystemdependent.c
+--- a/media/libvpx/vp8/encoder/arm/arm_csystemdependent.c
++++ b/media/libvpx/vp8/encoder/arm/arm_csystemdependent.c
+@@ -6,17 +6,16 @@
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ 
+ #include "vpx_config.h"
+ #include "vpx_ports/arm.h"
+-#include "vp8/encoder/variance.h"
+ #include "vp8/encoder/onyx_int.h"
+ 
+ extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+ extern void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+ 
+ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
+ {
+@@ -27,42 +26,16 @@ void vp8_arch_arm_encoder_init(VP8_COMP 
+     if (flags & HAS_EDSP)
+     {
+     }
+ #endif
+ 
+ #if HAVE_ARMV6
+     if (flags & HAS_MEDIA)
+     {
+-        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_armv6;
+-        /*cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
+-        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
+-        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
+-        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;*/
+-
+-        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;*/
+-        cpi->rtcd.variance.var8x8                = vp8_variance8x8_armv6;
+-        /*cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
+-        cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;*/
+-        cpi->rtcd.variance.var16x16              = vp8_variance16x16_armv6;
+-
+-        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+-        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_armv6;
+-        /*cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+-        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+-        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_armv6;
+-        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_armv6;
+-        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_armv6;
+-        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_armv6;
+-
+-        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_armv6;
+-        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+-
+-        /*cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
+-
+         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_armv6;
+         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_armv6;
+         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_armv6;
+         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_armv6;
+         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
+ 
+         /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+@@ -74,42 +47,16 @@ void vp8_arch_arm_encoder_init(VP8_COMP 
+         /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;*/
+         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_armv6;
+     }
+ #endif
+ 
+ #if HAVE_ARMV7
+     if (flags & HAS_NEON)
+     {
+-        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
+-        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
+-        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
+-        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
+-        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
+-
+-        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;*/
+-        cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
+-        cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
+-        cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
+-        cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
+-
+-        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+-        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
+-        /*cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+-        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+-        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
+-        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_neon;
+-        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_neon;
+-        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_neon;
+-
+-        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
+-        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+-
+-        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
+-
+         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
+         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
+         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_neon;
+         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_neon;
+         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
+ 
+         /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+diff --git a/media/libvpx/vp8/encoder/encodeframe.c b/media/libvpx/vp8/encoder/encodeframe.c
+--- a/media/libvpx/vp8/encoder/encodeframe.c
++++ b/media/libvpx/vp8/encoder/encodeframe.c
+@@ -93,17 +93,17 @@ static unsigned int tt_activity_measure(
+     unsigned int sse;
+     /* TODO: This could also be done over smaller areas (8x8), but that would
+      *  require extensive changes elsewhere, as lambda is assumed to be fixed
+      *  over an entire MB in most of the code.
+      * Another option is to compute four 8x8 variances, and pick a single
+      *  lambda using a non-linear combination (e.g., the smallest, or second
+      *  smallest, etc.).
+      */
+-    act =     VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer,
++    act =     VARIANCE_INVOKE(&cpi->common.rtcd.variance, var16x16)(x->src.y_buffer,
+                     x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
+     act = act<<4;
+ 
+     /* If the region is flat, lower the activity some more. */
+     if (act < 8<<12)
+         act = act < 5<<12 ? act : 5<<12;
+ 
+     return act;
+diff --git a/media/libvpx/vp8/encoder/encodeintra.c b/media/libvpx/vp8/encoder/encodeintra.c
+--- a/media/libvpx/vp8/encoder/encodeintra.c
++++ b/media/libvpx/vp8/encoder/encodeintra.c
+@@ -50,17 +50,17 @@ int vp8_encode_intra(VP8_COMP *cpi, MACR
+     {
+         for (i = 0; i < 16; i++)
+         {
+             x->e_mbd.block[i].bmi.as_mode = B_DC_PRED;
+             vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
+         }
+     }
+ 
+-    intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
++    intra_pred_var = VARIANCE_INVOKE(&cpi->common.rtcd.variance, getmbss)(x->src_diff);
+ 
+     return intra_pred_var;
+ }
+ 
+ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
+                               MACROBLOCK *x, int ib)
+ {
+     BLOCKD *b = &x->e_mbd.block[ib];
+diff --git a/media/libvpx/vp8/encoder/firstpass.c b/media/libvpx/vp8/encoder/firstpass.c
+--- a/media/libvpx/vp8/encoder/firstpass.c
++++ b/media/libvpx/vp8/encoder/firstpass.c
+@@ -7,17 +7,17 @@
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ #include "math.h"
+ #include "limits.h"
+ #include "block.h"
+ #include "onyx_int.h"
+-#include "variance.h"
++#include "vp8/common/variance.h"
+ #include "encodeintra.h"
+ #include "vp8/common/setupintrarecon.h"
+ #include "mcomp.h"
+ #include "firstpass.h"
+ #include "vpx_scale/vpxscale.h"
+ #include "encodemb.h"
+ #include "vp8/common/extend.h"
+ #include "vp8/common/systemdependent.h"
+@@ -404,17 +404,17 @@ static void zz_motion_search( VP8_COMP *
+     unsigned char *ref_ptr;
+     int ref_stride=d->pre_stride;
+ 
+     // Set up pointers for this macro block recon buffer
+     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
+ 
+     ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre );
+ 
+-    VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
++    VARIANCE_INVOKE(IF_RTCD(&cpi->common.rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
+ }
+ 
+ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
+                                      int_mv *ref_mv, MV *best_mv,
+                                      YV12_BUFFER_CONFIG *recon_buffer,
+                                      int *best_motion_err, int recon_yoffset )
+ {
+     MACROBLOCKD *const xd = & x->e_mbd;
+@@ -428,17 +428,17 @@ static void first_pass_motion_search(VP8
+     int tmp_err;
+     int step_param = 3;                                       //3;          // Dont search over full range for first pass
+     int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3;
+     int n;
+     vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
+     int new_mv_mode_penalty = 256;
+ 
+     // override the default variance function to use MSE
+-    v_fn_ptr.vf    = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16);
++    v_fn_ptr.vf    = VARIANCE_INVOKE(IF_RTCD(&cpi->common.rtcd.variance), mse16x16);
+ 
+     // Set up pointers for this macro block recon buffer
+     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
+ 
+     // Initial step/diamond search centred on best mv
+     tmp_mv.as_int = 0;
+     ref_mv_full.as_mv.col = ref_mv->as_mv.col>>3;
+     ref_mv_full.as_mv.row = ref_mv->as_mv.row>>3;
+diff --git a/media/libvpx/vp8/encoder/generic/csystemdependent.c b/media/libvpx/vp8/encoder/generic/csystemdependent.c
+--- a/media/libvpx/vp8/encoder/generic/csystemdependent.c
++++ b/media/libvpx/vp8/encoder/generic/csystemdependent.c
+@@ -5,78 +5,31 @@
+  *  that can be found in the LICENSE file in the root of the source
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ 
+ #include "vpx_config.h"
+-#include "vp8/encoder/variance.h"
+ #include "vp8/encoder/onyx_int.h"
+ 
+ 
+ void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
+ void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
+ 
+ void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
+                                         YV12_BUFFER_CONFIG *dst_ybc);
+ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
+                                         YV12_BUFFER_CONFIG *dst_ybc);
+ 
+ void vp8_cmachine_specific_config(VP8_COMP *cpi)
+ {
+ #if CONFIG_RUNTIME_CPU_DETECT
+     cpi->rtcd.common                    = &cpi->common.rtcd;
+-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
+-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
+-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
+-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
+-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
+-
+-    cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_c;
+-    cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_c;
+-    cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_c;
+-    cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_c;
+-    cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_c;
+-
+-    cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_c;
+-    cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_c;
+-    cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_c;
+-    cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_c;
+-    cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_c;
+-
+-    cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_c;
+-    cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_c;
+-    cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_c;
+-    cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_c;
+-    cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_c;
+-#if ARCH_X86 || ARCH_X86_64
+-    cpi->rtcd.variance.copy32xn              = vp8_copy32xn_c;
+-#endif
+-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
+-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
+-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
+-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
+-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
+-
+-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
+-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
+-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
+-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
+-    cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_c;
+-    cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_c;
+-    cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_c;
+-    cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_c;
+-
+-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
+-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
+-
+-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
+ 
+     cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
+     cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
+     cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
+     cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
+     cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
+ 
+     cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+@@ -91,20 +44,16 @@ void vp8_cmachine_specific_config(VP8_CO
+     cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
+     cpi->rtcd.quantize.fastquantb_pair       = vp8_fast_quantize_b_pair_c;
+     cpi->rtcd.search.full_search             = vp8_full_search_sad;
+     cpi->rtcd.search.refining_search         = vp8_refining_search_sad;
+     cpi->rtcd.search.diamond_search          = vp8_diamond_search_sad;
+ #if !(CONFIG_REALTIME_ONLY)
+     cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_c;
+ #endif
+-#if CONFIG_INTERNAL_STATS
+-    cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_c;
+-    cpi->rtcd.variance.ssimpf_16x16          = vp8_ssim_parms_16x16_c;
+-#endif
+ #endif
+ 
+     // Pure C:
+     vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
+ 
+ #if ARCH_X86 || ARCH_X86_64
+     vp8_arch_x86_encoder_init(cpi);
+ #endif
+diff --git a/media/libvpx/vp8/encoder/mcomp.h b/media/libvpx/vp8/encoder/mcomp.h
+--- a/media/libvpx/vp8/encoder/mcomp.h
++++ b/media/libvpx/vp8/encoder/mcomp.h
+@@ -8,17 +8,17 @@
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ 
+ #ifndef __INC_MCOMP_H
+ #define __INC_MCOMP_H
+ 
+ #include "block.h"
+-#include "variance.h"
++#include "vp8/common/variance.h"
+ 
+ #ifdef ENTROPY_STATS
+ extern void init_mv_ref_counts();
+ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
+ #endif
+ 
+ 
+ #define MAX_MVSEARCH_STEPS 8                                    // The maximum number of steps in a step search given the largest allowed initial step
+diff --git a/media/libvpx/vp8/encoder/onyx_if.c b/media/libvpx/vp8/encoder/onyx_if.c
+--- a/media/libvpx/vp8/encoder/onyx_if.c
++++ b/media/libvpx/vp8/encoder/onyx_if.c
+@@ -1948,72 +1948,72 @@ struct VP8_COMP* vp8_create_compressor(V
+ #ifdef ENTROPY_STATS
+     init_mv_ref_counts();
+ #endif
+ 
+ #if CONFIG_MULTITHREAD
+     vp8cx_create_encoder_threads(cpi);
+ #endif
+ 
+-    cpi->fn_ptr[BLOCK_16X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
+-    cpi->fn_ptr[BLOCK_16X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
+-    cpi->fn_ptr[BLOCK_16X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
+-    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h);
+-    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
+-    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
+-    cpi->fn_ptr[BLOCK_16X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
+-    cpi->fn_ptr[BLOCK_16X16].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
+-    cpi->fn_ptr[BLOCK_16X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
+-
+-    cpi->fn_ptr[BLOCK_16X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
+-    cpi->fn_ptr[BLOCK_16X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
+-    cpi->fn_ptr[BLOCK_16X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
++    cpi->fn_ptr[BLOCK_16X16].sdf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x16);
++    cpi->fn_ptr[BLOCK_16X16].vf             = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var16x16);
++    cpi->fn_ptr[BLOCK_16X16].svf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, subpixvar16x16);
++    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = VARIANCE_INVOKE(&cpi->common.rtcd.variance, halfpixvar16x16_h);
++    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->common.rtcd.variance, halfpixvar16x16_v);
++    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->common.rtcd.variance, halfpixvar16x16_hv);
++    cpi->fn_ptr[BLOCK_16X16].sdx3f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x16x3);
++    cpi->fn_ptr[BLOCK_16X16].sdx8f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x16x8);
++    cpi->fn_ptr[BLOCK_16X16].sdx4df         = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x16x4d);
++
++    cpi->fn_ptr[BLOCK_16X8].sdf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x8);
++    cpi->fn_ptr[BLOCK_16X8].vf             = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var16x8);
++    cpi->fn_ptr[BLOCK_16X8].svf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, subpixvar16x8);
+     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h  = NULL;
+     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;
+     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
+-    cpi->fn_ptr[BLOCK_16X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
+-    cpi->fn_ptr[BLOCK_16X8].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
+-    cpi->fn_ptr[BLOCK_16X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
+-
+-    cpi->fn_ptr[BLOCK_8X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
+-    cpi->fn_ptr[BLOCK_8X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
+-    cpi->fn_ptr[BLOCK_8X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
++    cpi->fn_ptr[BLOCK_16X8].sdx3f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x8x3);
++    cpi->fn_ptr[BLOCK_16X8].sdx8f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x8x8);
++    cpi->fn_ptr[BLOCK_16X8].sdx4df         = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad16x8x4d);
++
++    cpi->fn_ptr[BLOCK_8X16].sdf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x16);
++    cpi->fn_ptr[BLOCK_8X16].vf             = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var8x16);
++    cpi->fn_ptr[BLOCK_8X16].svf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, subpixvar8x16);
+     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h  = NULL;
+     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;
+     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
+-    cpi->fn_ptr[BLOCK_8X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
+-    cpi->fn_ptr[BLOCK_8X16].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
+-    cpi->fn_ptr[BLOCK_8X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
+-
+-    cpi->fn_ptr[BLOCK_8X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
+-    cpi->fn_ptr[BLOCK_8X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
+-    cpi->fn_ptr[BLOCK_8X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
++    cpi->fn_ptr[BLOCK_8X16].sdx3f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x16x3);
++    cpi->fn_ptr[BLOCK_8X16].sdx8f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x16x8);
++    cpi->fn_ptr[BLOCK_8X16].sdx4df         = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x16x4d);
++
++    cpi->fn_ptr[BLOCK_8X8].sdf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x8);
++    cpi->fn_ptr[BLOCK_8X8].vf             = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var8x8);
++    cpi->fn_ptr[BLOCK_8X8].svf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, subpixvar8x8);
+     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h  = NULL;
+     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;
+     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
+-    cpi->fn_ptr[BLOCK_8X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
+-    cpi->fn_ptr[BLOCK_8X8].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
+-    cpi->fn_ptr[BLOCK_8X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
+-
+-    cpi->fn_ptr[BLOCK_4X4].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
+-    cpi->fn_ptr[BLOCK_4X4].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
+-    cpi->fn_ptr[BLOCK_4X4].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
++    cpi->fn_ptr[BLOCK_8X8].sdx3f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x8x3);
++    cpi->fn_ptr[BLOCK_8X8].sdx8f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x8x8);
++    cpi->fn_ptr[BLOCK_8X8].sdx4df         = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad8x8x4d);
++
++    cpi->fn_ptr[BLOCK_4X4].sdf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad4x4);
++    cpi->fn_ptr[BLOCK_4X4].vf             = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var4x4);
++    cpi->fn_ptr[BLOCK_4X4].svf            = VARIANCE_INVOKE(&cpi->common.rtcd.variance, subpixvar4x4);
+     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h  = NULL;
+     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;
+     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
+-    cpi->fn_ptr[BLOCK_4X4].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
+-    cpi->fn_ptr[BLOCK_4X4].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
+-    cpi->fn_ptr[BLOCK_4X4].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
++    cpi->fn_ptr[BLOCK_4X4].sdx3f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad4x4x3);
++    cpi->fn_ptr[BLOCK_4X4].sdx8f          = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad4x4x8);
++    cpi->fn_ptr[BLOCK_4X4].sdx4df         = VARIANCE_INVOKE(&cpi->common.rtcd.variance, sad4x4x4d);
+ 
+ #if ARCH_X86 || ARCH_X86_64
+-    cpi->fn_ptr[BLOCK_16X16].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
+-    cpi->fn_ptr[BLOCK_16X8].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
+-    cpi->fn_ptr[BLOCK_8X16].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
+-    cpi->fn_ptr[BLOCK_8X8].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
+-    cpi->fn_ptr[BLOCK_4X4].copymem        = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn);
++    cpi->fn_ptr[BLOCK_16X16].copymem        = VARIANCE_INVOKE(&cpi->common.rtcd.variance, copy32xn);
++    cpi->fn_ptr[BLOCK_16X8].copymem        = VARIANCE_INVOKE(&cpi->common.rtcd.variance, copy32xn);
++    cpi->fn_ptr[BLOCK_8X16].copymem        = VARIANCE_INVOKE(&cpi->common.rtcd.variance, copy32xn);
++    cpi->fn_ptr[BLOCK_8X8].copymem        = VARIANCE_INVOKE(&cpi->common.rtcd.variance, copy32xn);
++    cpi->fn_ptr[BLOCK_4X4].copymem        = VARIANCE_INVOKE(&cpi->common.rtcd.variance, copy32xn);
+ #endif
+ 
+     cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
+     cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search);
+     cpi->refining_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, refining_search);
+ 
+     // make sure frame 1 is okay
+     cpi->error_bins[0] = cpi->common.MBs;
+@@ -2410,38 +2410,38 @@ static void generate_psnr_packet(VP8_COM
+     int                      i;
+     unsigned int             width = cpi->common.Width;
+     unsigned int             height = cpi->common.Height;
+ 
+     pkt.kind = VPX_CODEC_PSNR_PKT;
+     sse = calc_plane_error(orig->y_buffer, orig->y_stride,
+                            recon->y_buffer, recon->y_stride,
+                            width, height,
+-                           IF_RTCD(&cpi->rtcd.variance));
++                           IF_RTCD(&cpi->common.rtcd.variance));
+     pkt.data.psnr.sse[0] = sse;
+     pkt.data.psnr.sse[1] = sse;
+     pkt.data.psnr.samples[0] = width * height;
+     pkt.data.psnr.samples[1] = width * height;
+ 
+     width = (width + 1) / 2;
+     height = (height + 1) / 2;
+ 
+     sse = calc_plane_error(orig->u_buffer, orig->uv_stride,
+                            recon->u_buffer, recon->uv_stride,
+                            width, height,
+-                           IF_RTCD(&cpi->rtcd.variance));
++                           IF_RTCD(&cpi->common.rtcd.variance));
+     pkt.data.psnr.sse[0] += sse;
+     pkt.data.psnr.sse[2] = sse;
+     pkt.data.psnr.samples[0] += width * height;
+     pkt.data.psnr.samples[2] = width * height;
+ 
+     sse = calc_plane_error(orig->v_buffer, orig->uv_stride,
+                            recon->v_buffer, recon->uv_stride,
+                            width, height,
+-                           IF_RTCD(&cpi->rtcd.variance));
++                           IF_RTCD(&cpi->common.rtcd.variance));
+     pkt.data.psnr.sse[0] += sse;
+     pkt.data.psnr.sse[3] = sse;
+     pkt.data.psnr.samples[0] += width * height;
+     pkt.data.psnr.samples[3] = width * height;
+ 
+     for (i = 0; i < 4; i++)
+         pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0,
+                                              pkt.data.psnr.sse[i]);
+@@ -3821,17 +3821,17 @@ static void encode_frame_to_data_rate
+ 
+ #if !(CONFIG_REALTIME_ONLY)
+         // Special case handling for forced key frames
+         if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced )
+         {
+             int last_q = Q;
+             int kf_err = vp8_calc_ss_err(cpi->Source,
+                                          &cm->yv12_fb[cm->new_fb_idx],
+-                                         IF_RTCD(&cpi->rtcd.variance));
++                                         IF_RTCD(&cpi->common.rtcd.variance));
+ 
+             // The key frame is not good enough
+             if ( kf_err > ((cpi->ambient_err * 7) >> 3) )
+             {
+                 // Lower q_high
+                 q_high = (Q > q_low) ? (Q - 1) : q_low;
+ 
+                 // Adjust Q
+@@ -4018,17 +4018,17 @@ static void encode_frame_to_data_rate
+ 
+     // Special case code to reduce pulsing when key frames are forced at a
+     // fixed interval. Note the reconstruction error if it is the frame before
+     // the force key frame
+     if ( cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0) )
+     {
+         cpi->ambient_err = vp8_calc_ss_err(cpi->Source,
+                                            &cm->yv12_fb[cm->new_fb_idx],
+-                                           IF_RTCD(&cpi->rtcd.variance));
++                                           IF_RTCD(&cpi->common.rtcd.variance));
+     }
+ 
+     /* This frame's MVs are saved and will be used in next frame's MV predictor.
+      * Last frame has one more line(add to bottom) and one more column(add to
+      * right) than cm->mip. The edge elements are initialized to 0.
+      */
+ #if CONFIG_MULTI_RES_ENCODING
+     if(!cpi->oxcf.mr_encoder_id && cm->show_frame)
+@@ -4963,25 +4963,25 @@ int vp8_get_compressed_data(VP8_COMP *cp
+                 YV12_BUFFER_CONFIG      *pp = &cm->post_proc_buffer;
+                 int y_samples = orig->y_height * orig->y_width ;
+                 int uv_samples = orig->uv_height * orig->uv_width ;
+                 int t_samples = y_samples + 2 * uv_samples;
+                 int64_t sq_error, sq_error2;
+ 
+                 ye = calc_plane_error(orig->y_buffer, orig->y_stride,
+                   recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height,
+-                  IF_RTCD(&cpi->rtcd.variance));
++                  IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                 ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
+                   recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
+-                  IF_RTCD(&cpi->rtcd.variance));
++                  IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                 ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
+                   recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
+-                  IF_RTCD(&cpi->rtcd.variance));
++                  IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                 sq_error = ye + ue + ve;
+ 
+                 frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
+ 
+                 cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye);
+                 cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue);
+                 cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve);
+@@ -4991,39 +4991,39 @@ int vp8_get_compressed_data(VP8_COMP *cp
+                     double frame_psnr2, frame_ssim2 = 0;
+                     double weight = 0;
+ 
+                     vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0, IF_RTCD(&cm->rtcd.postproc));
+                     vp8_clear_system_state();
+ 
+                     ye = calc_plane_error(orig->y_buffer, orig->y_stride,
+                       pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height,
+-                      IF_RTCD(&cpi->rtcd.variance));
++                      IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                     ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
+                       pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
+-                      IF_RTCD(&cpi->rtcd.variance));
++                      IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                     ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
+                       pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
+-                      IF_RTCD(&cpi->rtcd.variance));
++                      IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                     sq_error2 = ye + ue + ve;
+ 
+                     frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2);
+ 
+                     cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye);
+                     cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue);
+                     cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve);
+                     cpi->total_sq_error2 += sq_error2;
+                     cpi->totalp  += frame_psnr2;
+ 
+                     frame_ssim2 = vp8_calc_ssim(cpi->Source,
+                       &cm->post_proc_buffer, 1, &weight,
+-                      IF_RTCD(&cpi->rtcd.variance));
++                      IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                     cpi->summed_quality += frame_ssim2 * weight;
+                     cpi->summed_weights += weight;
+ 
+                     if (cpi->oxcf.number_of_layers > 1)
+                     {
+                          int i;
+ 
+@@ -5043,17 +5043,17 @@ int vp8_get_compressed_data(VP8_COMP *cp
+                     }
+                 }
+             }
+ 
+             if (cpi->b_calculate_ssimg)
+             {
+                 double y, u, v, frame_all;
+                 frame_all =  vp8_calc_ssimg(cpi->Source, cm->frame_to_show,
+-                    &y, &u, &v, IF_RTCD(&cpi->rtcd.variance));
++                    &y, &u, &v, IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                 if (cpi->oxcf.number_of_layers > 1)
+                 {
+                     int i;
+ 
+                     for (i=cpi->current_layer;
+                          i<cpi->oxcf.number_of_layers; i++)
+                     {
+diff --git a/media/libvpx/vp8/encoder/onyx_int.h b/media/libvpx/vp8/encoder/onyx_int.h
+--- a/media/libvpx/vp8/encoder/onyx_int.h
++++ b/media/libvpx/vp8/encoder/onyx_int.h
+@@ -13,17 +13,17 @@
+ #define __INC_VP8_INT_H
+ 
+ #include <stdio.h>
+ #include "vpx_config.h"
+ #include "vp8/common/onyx.h"
+ #include "treewriter.h"
+ #include "tokenize.h"
+ #include "vp8/common/onyxc_int.h"
+-#include "variance.h"
++#include "vp8/common/variance.h"
+ #include "dct.h"
+ #include "encodemb.h"
+ #include "quantize.h"
+ #include "vp8/common/entropy.h"
+ #include "vp8/common/threading.h"
+ #include "vpx_ports/mem.h"
+ #include "vpx/internal/vpx_codec_internal.h"
+ #include "mcomp.h"
+@@ -220,17 +220,16 @@ typedef struct
+     int ithread;
+     void *ptr1;
+ } LPFTHREAD_DATA;
+ 
+ 
+ typedef struct VP8_ENCODER_RTCD
+ {
+     VP8_COMMON_RTCD            *common;
+-    vp8_variance_rtcd_vtable_t  variance;
+     vp8_fdct_rtcd_vtable_t      fdct;
+     vp8_encodemb_rtcd_vtable_t  encodemb;
+     vp8_quantize_rtcd_vtable_t  quantize;
+     vp8_search_rtcd_vtable_t    search;
+     vp8_temporal_rtcd_vtable_t  temporal;
+ } VP8_ENCODER_RTCD;
+ 
+ enum
+diff --git a/media/libvpx/vp8/encoder/pickinter.c b/media/libvpx/vp8/encoder/pickinter.c
+--- a/media/libvpx/vp8/encoder/pickinter.c
++++ b/media/libvpx/vp8/encoder/pickinter.c
+@@ -16,17 +16,17 @@
+ #include "encodeintra.h"
+ #include "vp8/common/entropymode.h"
+ #include "pickinter.h"
+ #include "vp8/common/findnearmv.h"
+ #include "encodemb.h"
+ #include "vp8/common/reconinter.h"
+ #include "vp8/common/reconintra.h"
+ #include "vp8/common/reconintra4x4.h"
+-#include "variance.h"
++#include "vp8/common/variance.h"
+ #include "mcomp.h"
+ #include "rdopt.h"
+ #include "vpx_mem/vpx_mem.h"
+ 
+ #if CONFIG_RUNTIME_CPU_DETECT
+ #define IF_RTCD(x) (x)
+ #else
+ #define IF_RTCD(x)  NULL
+@@ -90,42 +90,16 @@ static int get_inter_mbpred_error(MACROB
+     else
+     {
+         return vfp->vf(what, what_stride, in_what, in_what_stride, sse);
+     }
+ 
+ }
+ 
+ 
+-unsigned int vp8_get4x4sse_cs_c
+-(
+-    const unsigned char *src_ptr,
+-    int  source_stride,
+-    const unsigned char *ref_ptr,
+-    int  recon_stride
+-)
+-{
+-    int distortion = 0;
+-    int r, c;
+-
+-    for (r = 0; r < 4; r++)
+-    {
+-        for (c = 0; c < 4; c++)
+-        {
+-            int diff = src_ptr[c] - ref_ptr[c];
+-            distortion += diff * diff;
+-        }
+-
+-        src_ptr += source_stride;
+-        ref_ptr += recon_stride;
+-    }
+-
+-    return distortion;
+-}
+-
+ static int get_prediction_error(BLOCK *be, BLOCKD *b, const vp8_variance_rtcd_vtable_t *rtcd)
+ {
+     unsigned char *sptr;
+     unsigned char *dptr;
+     sptr = (*(be->base_src) + be->src);
+     dptr = b->predictor;
+ 
+     return VARIANCE_INVOKE(rtcd, get4x4sse_cs)(sptr, be->src_stride, dptr, 16);
+@@ -153,17 +127,17 @@ static int pick_intra4x4block(
+     for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++)
+     {
+         int this_rd;
+ 
+         rate = mode_costs[mode];
+         RECON_INVOKE(&rtcd->common->recon, intra4x4_predict)
+                      (*(b->base_dst) + b->dst, b->dst_stride,
+                       mode, b->predictor, 16);
+-        distortion = get_prediction_error(be, b, &rtcd->variance);
++        distortion = get_prediction_error(be, b, &rtcd->common->variance);
+         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ 
+         if (this_rd < best_rd)
+         {
+             *bestrate = rate;
+             *bestdistortion = distortion;
+             best_rd = this_rd;
+             *best_mode = mode;
+@@ -671,17 +645,17 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, 
+             if (distortion2 == INT_MAX)
+             {
+                 this_rd = INT_MAX;
+             }
+             else
+             {
+                 rate2 += rate;
+                 distortion2 = VARIANCE_INVOKE
+-                                (&cpi->rtcd.variance, var16x16)(
++                                (&cpi->common.rtcd.variance, var16x16)(
+                                     *(b->base_src), b->src_stride,
+                                     x->e_mbd.predictor, 16, &sse);
+                 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ 
+                 if (this_rd < best_intra_rd)
+                 {
+                     best_intra_rd = this_rd;
+                     *returnintra = distortion2;
+@@ -696,17 +670,17 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, 
+             break;
+ 
+         case DC_PRED:
+         case V_PRED:
+         case H_PRED:
+         case TM_PRED:
+             RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
+                 (&x->e_mbd);
+-            distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
++            distortion2 = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var16x16)
+                                           (*(b->base_src), b->src_stride,
+                                           x->e_mbd.predictor, 16, &sse);
+             rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
+             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ 
+             if (this_rd < best_intra_rd)
+             {
+                 best_intra_rd = this_rd;
+@@ -933,17 +907,17 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, 
+ 
+             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ 
+             if (sse < x->encode_breakout)
+             {
+                 // Check u and v to make sure skip is ok
+                 int sse2 = 0;
+ 
+-                sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
++                sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->common.rtcd.variance));
+ 
+                 if (sse2 * 2 < x->encode_breakout)
+                     x->skip = 1;
+                 else
+                     x->skip = 0;
+             }
+ 
+             break;
+@@ -1067,17 +1041,17 @@ void vp8_pick_intra_mode(VP8_COMP *cpi, 
+ 
+     pick_intra_mbuv_mode(x);
+ 
+     for (mode = DC_PRED; mode <= TM_PRED; mode ++)
+     {
+         x->e_mbd.mode_info_context->mbmi.mode = mode;
+         RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
+             (&x->e_mbd);
+-        distortion = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
++        distortion = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var16x16)
+             (*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse);
+         rate = x->mbmode_cost[x->e_mbd.frame_type][mode];
+         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ 
+         if (error16x16 > this_rd)
+         {
+             error16x16 = this_rd;
+             best_mode = mode;
+diff --git a/media/libvpx/vp8/encoder/picklpf.c b/media/libvpx/vp8/encoder/picklpf.c
+--- a/media/libvpx/vp8/encoder/picklpf.c
++++ b/media/libvpx/vp8/encoder/picklpf.c
+@@ -179,30 +179,30 @@ void vp8cx_pick_filter_level_fast(YV12_B
+ 
+     // Get the err using the previous frame's filter value.
+ 
+     /* Copy the unfiltered / processed recon buffer to the new buffer */
+     vp8_yv12_copy_partial_frame_ptr(saved_frame, cm->frame_to_show);
+     vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
+ 
+     best_err = calc_partial_ssl_err(sd, cm->frame_to_show,
+-                                    IF_RTCD(&cpi->rtcd.variance));
++                                    IF_RTCD(&cpi->common.rtcd.variance));
+ 
+     filt_val -= 1 + (filt_val > 10);
+ 
+     // Search lower filter levels
+     while (filt_val >= min_filter_level)
+     {
+         // Apply the loop filter
+         vp8_yv12_copy_partial_frame_ptr(saved_frame, cm->frame_to_show);
+         vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
+ 
+         // Get the err for filtered frame
+         filt_err = calc_partial_ssl_err(sd, cm->frame_to_show,
+-                                        IF_RTCD(&cpi->rtcd.variance));
++                                        IF_RTCD(&cpi->common.rtcd.variance));
+ 
+         // Update the best case record or exit loop.
+         if (filt_err < best_err)
+         {
+             best_err = filt_err;
+             best_filt_val = filt_val;
+         }
+         else
+@@ -224,17 +224,17 @@ void vp8cx_pick_filter_level_fast(YV12_B
+         {
+             // Apply the loop filter
+             vp8_yv12_copy_partial_frame_ptr(saved_frame, cm->frame_to_show);
+ 
+             vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
+ 
+             // Get the err for filtered frame
+             filt_err = calc_partial_ssl_err(sd, cm->frame_to_show,
+-                                            IF_RTCD(&cpi->rtcd.variance));
++                                            IF_RTCD(&cpi->common.rtcd.variance));
+ 
+             // Update the best case record or exit loop.
+             if (filt_err < best_err)
+             {
+                 // Do not raise filter level if improvement is < 1 part in 4096
+                 best_err = filt_err - (filt_err >> 10);
+ 
+                 best_filt_val = filt_val;
+@@ -318,17 +318,17 @@ void vp8cx_pick_filter_level(YV12_BUFFER
+ 
+     /* Copy the unfiltered / processed recon buffer to the new buffer */
+     vp8_yv12_copy_y_ptr(saved_frame, cm->frame_to_show);
+ 
+     vp8cx_set_alt_lf_level(cpi, filt_mid);
+     vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
+ 
+     best_err = vp8_calc_ss_err(sd, cm->frame_to_show,
+-                               IF_RTCD(&cpi->rtcd.variance));
++                               IF_RTCD(&cpi->common.rtcd.variance));
+ 
+     ss_err[filt_mid] = best_err;
+ 
+     filt_best = filt_mid;
+ 
+     while (filter_step > 0)
+     {
+         Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; //PGW change 12/12/06 for small images
+@@ -345,17 +345,17 @@ void vp8cx_pick_filter_level(YV12_BUFFER
+             if(ss_err[filt_low] == 0)
+             {
+                 // Get Low filter error score
+                 vp8_yv12_copy_y_ptr(saved_frame, cm->frame_to_show);
+                 vp8cx_set_alt_lf_level(cpi, filt_low);
+                 vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
+ 
+                 filt_err = vp8_calc_ss_err(sd, cm->frame_to_show,
+-                                           IF_RTCD(&cpi->rtcd.variance));
++                                           IF_RTCD(&cpi->common.rtcd.variance));
+                 ss_err[filt_low] = filt_err;
+             }
+             else
+                 filt_err = ss_err[filt_low];
+ 
+             // If value is close to the best so far then bias towards a lower loop filter value.
+             if ((filt_err - Bias) < best_err)
+             {
+@@ -372,17 +372,17 @@ void vp8cx_pick_filter_level(YV12_BUFFER
+         {
+             if(ss_err[filt_high] == 0)
+             {
+                 vp8_yv12_copy_y_ptr(saved_frame, cm->frame_to_show);
+                 vp8cx_set_alt_lf_level(cpi, filt_high);
+                 vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
+ 
+                 filt_err = vp8_calc_ss_err(sd, cm->frame_to_show,
+-                                           IF_RTCD(&cpi->rtcd.variance));
++                                           IF_RTCD(&cpi->common.rtcd.variance));
+                 ss_err[filt_high] = filt_err;
+             }
+             else
+                 filt_err = ss_err[filt_high];
+ 
+             // Was it better than the previous best?
+             if (filt_err < (best_err - Bias))
+             {
+diff --git a/media/libvpx/vp8/encoder/rdopt.c b/media/libvpx/vp8/encoder/rdopt.c
+--- a/media/libvpx/vp8/encoder/rdopt.c
++++ b/media/libvpx/vp8/encoder/rdopt.c
+@@ -23,17 +23,17 @@
+ #include "vp8/common/entropymode.h"
+ #include "vp8/common/reconinter.h"
+ #include "vp8/common/reconintra.h"
+ #include "vp8/common/reconintra4x4.h"
+ #include "vp8/common/findnearmv.h"
+ #include "encodemb.h"
+ #include "quantize.h"
+ #include "vp8/common/idct.h"
+-#include "variance.h"
++#include "vp8/common/variance.h"
+ #include "mcomp.h"
+ #include "rdopt.h"
+ #include "vpx_mem/vpx_mem.h"
+ #include "dct.h"
+ #include "vp8/common/systemdependent.h"
+ 
+ #if CONFIG_RUNTIME_CPU_DETECT
+ #define IF_RTCD(x)  (x)
+@@ -2132,30 +2132,30 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cp
+                 unsigned int sse;
+                 unsigned int var;
+                 int threshold = (xd->block[0].dequant[1]
+                             * xd->block[0].dequant[1] >>4);
+ 
+                 if(threshold < x->encode_breakout)
+                     threshold = x->encode_breakout;
+ 
+-                var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
++                var = VARIANCE_INVOKE(&cpi->common.rtcd.variance, var16x16)
+                         (*(b->base_src), b->src_stride,
+                         x->e_mbd.predictor, 16, &sse);
+ 
+                 if (sse < threshold)
+                 {
+                      unsigned int q2dc = xd->block[24].dequant[0];
+                     /* If theres is no codeable 2nd order dc
+                        or a very small uniform pixel change change */
+                     if ((sse - var < q2dc * q2dc >>4) ||
+                         (sse /2 > var && sse-var < 64))
+                     {
+                         // Check u and v to make sure skip is ok
+-                        int sse2=  VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
++                        int sse2=  VP8_UVSSE(x, IF_RTCD(&cpi->common.rtcd.variance));
+                         if (sse2 * 2 < threshold)
+                         {
+                             x->skip = 1;
+                             distortion2 = sse + sse2;
+                             rate2 = 500;
+ 
+                             /* for best_yrd calculation */
+                             rate_uv = 0;
+diff --git a/media/libvpx/vp8/encoder/x86/x86_csystemdependent.c b/media/libvpx/vp8/encoder/x86/x86_csystemdependent.c
+--- a/media/libvpx/vp8/encoder/x86/x86_csystemdependent.c
++++ b/media/libvpx/vp8/encoder/x86/x86_csystemdependent.c
+@@ -6,17 +6,16 @@
+  *  tree. An additional intellectual property rights grant can be found
+  *  in the file PATENTS.  All contributing project authors may
+  *  be found in the AUTHORS file in the root of the source tree.
+  */
+ 
+ 
+ #include "vpx_config.h"
+ #include "vpx_ports/x86.h"
+-#include "vp8/encoder/variance.h"
+ #include "vp8/encoder/onyx_int.h"
+ 
+ 
+ #if HAVE_MMX
+ void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
+ {
+     vp8_short_fdct4x4_mmx(input,   output,    pitch);
+     vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
+@@ -122,43 +121,16 @@ void vp8_arch_x86_encoder_init(VP8_COMP 
+      * you modify any of the function mappings present in this file, be sure
+      * to also update them in static mapings (<arch>/filename_<arch>.h)
+      */
+ 
+     /* Override default functions with fastest ones for this CPU. */
+ #if HAVE_MMX
+     if (flags & HAS_MMX)
+     {
+-        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_mmx;
+-        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_mmx;
+-        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_mmx;
+-        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_mmx;
+-        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_mmx;
+-
+-        cpi->rtcd.variance.var4x4                = vp8_variance4x4_mmx;
+-        cpi->rtcd.variance.var8x8                = vp8_variance8x8_mmx;
+-        cpi->rtcd.variance.var8x16               = vp8_variance8x16_mmx;
+-        cpi->rtcd.variance.var16x8               = vp8_variance16x8_mmx;
+-        cpi->rtcd.variance.var16x16              = vp8_variance16x16_mmx;
+-
+-        cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_mmx;
+-        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_mmx;
+-        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_mmx;
+-        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_mmx;
+-        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_mmx;
+-        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_mmx;
+-        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_mmx;
+-        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_mmx;
+-        cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_mmx;
+-
+-        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_mmx;
+-        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_mmx;
+-
+-        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;
+-
+         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;
+         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx;
+         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx;
+         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx;
+ 
+         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
+ 
+         cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx;
+@@ -170,44 +142,16 @@ void vp8_arch_x86_encoder_init(VP8_COMP 
+ 
+         /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
+     }
+ #endif
+ 
+ #if HAVE_SSE2
+     if (flags & HAS_SSE2)
+     {
+-        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_wmt;
+-        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_wmt;
+-        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_wmt;
+-        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_wmt;
+-        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_wmt;
+-        cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse2;
+-
+-        cpi->rtcd.variance.var4x4                = vp8_variance4x4_wmt;
+-        cpi->rtcd.variance.var8x8                = vp8_variance8x8_wmt;
+-        cpi->rtcd.variance.var8x16               = vp8_variance8x16_wmt;
+-        cpi->rtcd.variance.var16x8               = vp8_variance16x8_wmt;
+-        cpi->rtcd.variance.var16x16              = vp8_variance16x16_wmt;
+-
+-        cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_wmt;
+-        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_wmt;
+-        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_wmt;
+-        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_wmt;
+-        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_wmt;
+-        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_wmt;
+-        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_wmt;
+-        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_wmt;
+-        cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_wmt;
+-
+-        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_wmt;
+-        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_sse2;
+-
+-        /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */;
+-
+         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2;
+         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_sse2;
+         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
+         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;
+ 
+         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ;
+ 
+         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
+@@ -219,69 +163,40 @@ void vp8_arch_x86_encoder_init(VP8_COMP 
+ 
+         cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse2;
+         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
+ 
+ #if !(CONFIG_REALTIME_ONLY)
+         cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse2;
+ #endif
+ 
+-#if CONFIG_INTERNAL_STATS
+-#if ARCH_X86_64
+-        cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse2;
+-        cpi->rtcd.variance.ssimpf_16x16          = vp8_ssim_parms_16x16_sse2;
+-#endif
+-#endif
+     }
+ #endif
+ 
+ #if HAVE_SSE3
+     if (flags & HAS_SSE3)
+     {
+-        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_sse3;
+-        cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_sse3;
+-        cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_sse3;
+-        cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_sse3;
+-        cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_sse3;
+-        cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_sse3;
+         cpi->rtcd.search.full_search             = vp8_full_search_sadx3;
+-        cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_sse3;
+-        cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_sse3;
+-        cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_sse3;
+-        cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_sse3;
+-        cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3;
+-        cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse3;
+         cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4;
+         cpi->rtcd.search.refining_search         = vp8_refining_search_sadx4;
+     }
+ #endif
+ 
+ #if HAVE_SSSE3
+     if (flags & HAS_SSSE3)
+     {
+-        cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3;
+-        cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3;
+-
+-        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ssse3;
+-        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3;
+-
+         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3;
+     }
+ #endif
+ 
+ 
+ 
+ #if HAVE_SSE4_1
+     if (flags & HAS_SSE4_1)
+     {
+-        cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_sse4;
+-        cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_sse4;
+-        cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4;
+-        cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4;
+-        cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4;
+         cpi->rtcd.search.full_search             = vp8_full_search_sadx8;
+ 
+         cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse4;
+     }
+ #endif
+ 
+ #endif
+ }
new file mode 100644
--- /dev/null
+++ b/media/libvpx/I3915d597.patch
@@ -0,0 +1,41 @@
+# HG changeset patch
+# Parent 0d104ba08fe98d5024014fbf45af06100ef7b6f9
+# User Timothy B. Terriberry <tterribe@vt.edu>
+Align internal mfqe framebuffer dimensions
+
+Upstream Change-Id: I3915d597cd66886a24f4ef39752751ebe6425066
+
+diff --git a/media/libvpx/vp8/common/postproc.c b/media/libvpx/vp8/common/postproc.c
+--- a/media/libvpx/vp8/common/postproc.c
++++ b/media/libvpx/vp8/common/postproc.c
+@@ -938,20 +938,26 @@ int vp8_post_proc_frame(VP8_COMMON *oci,
+         return 0;
+     }
+ 
+     /* Allocate post_proc_buffer_int if needed */
+     if ((flags & VP8D_MFQE) && !oci->post_proc_buffer_int_used)
+     {
+         if ((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK))
+         {
+-            if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer_int, oci->Width, oci->Height, VP8BORDERINPIXELS) >= 0)
+-            {
+-                oci->post_proc_buffer_int_used = 1;
+-            }
++            int width = (oci->Width + 15) & ~15;
++            int height = (oci->Height + 15) & ~15;
++
++            if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer_int,
++                                            width, height, VP8BORDERINPIXELS))
++                vpx_internal_error(&oci->error, VPX_CODEC_MEM_ERROR,
++                                   "Failed to allocate MFQE framebuffer");
++
++            oci->post_proc_buffer_int_used = 1;
++
+             // insure that postproc is set to all 0's so that post proc
+             // doesn't pull random data in from edge
+             vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,126,(&oci->post_proc_buffer)->frame_size);
+ 
+         }
+     }
+ 
+ #if ARCH_X86||ARCH_X86_64
new file mode 100644
--- /dev/null
+++ b/media/libvpx/I42ab00e3.patch
@@ -0,0 +1,33 @@
+# HG changeset patch
+# Parent 5668c50552abba043eff64ffc5d8abde43d7964a
+# User Timothy B. Terriberry <tterribe@vt.edu>
+Support Android x86 NDK build
+
+Upstream Change-Id: I42ab00e3255208ba95d7f9b9a8a3605ff58da8e1
+
+diff --git a/media/libvpx/vp8/common/x86/postproc_x86.c b/media/libvpx/vp8/common/x86/postproc_x86.c
+new file mode 100644
+--- /dev/null
++++ b/media/libvpx/vp8/common/x86/postproc_x86.c
+@@ -0,0 +1,21 @@
++/*
++ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
++ *
++ *  Use of this source code is governed by a BSD-style license
++ *  that can be found in the LICENSE file in the root of the source
++ *  tree. An additional intellectual property rights grant can be found
++ *  in the file PATENTS.  All contributing project authors may
++ *  be found in the AUTHORS file in the root of the source tree.
++ */
++
++/* On Android NDK, rand is inlined function, but postproc needs rand symbol */
++#if defined(__ANDROID__)
++#define rand __rand
++#include <stdlib.h>
++#undef rand
++
++extern int rand(void)
++{
++  return __rand();
++}
++#endif
new file mode 100644
--- /dev/null
+++ b/media/libvpx/I6f2b218d.patch
@@ -0,0 +1,348 @@
+# HG changeset patch
+# Parent d02aa133060bcdfd44634038c8c28654c718c2ff
+# User Timothy B. Terriberry <tterribe@vt.edu>
+remove __inline for compiler compatibility
+
+Upstream Change-Id: I6f2b218dfc808b73212bbb90c69e2b6cc1fa90ce
+
+diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/common/loopfilter_filters.c
+--- a/media/libvpx/vp8/common/loopfilter_filters.c
++++ b/media/libvpx/vp8/common/loopfilter_filters.c
+@@ -10,50 +10,50 @@
+ 
+ 
+ #include <stdlib.h>
+ #include "loopfilter.h"
+ #include "onyxc_int.h"
+ 
+ typedef unsigned char uc;
+ 
+-static __inline signed char vp8_signed_char_clamp(int t)
++static signed char vp8_signed_char_clamp(int t)
+ {
+     t = (t < -128 ? -128 : t);
+     t = (t > 127 ? 127 : t);
+     return (signed char) t;
+ }
+ 
+ 
+ /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+-static __inline signed char vp8_filter_mask(uc limit, uc blimit,
+-                                     uc p3, uc p2, uc p1, uc p0,
+-                                     uc q0, uc q1, uc q2, uc q3)
++static signed char vp8_filter_mask(uc limit, uc blimit,
++                            uc p3, uc p2, uc p1, uc p0,
++                            uc q0, uc q1, uc q2, uc q3)
+ {
+     signed char mask = 0;
+     mask |= (abs(p3 - p2) > limit);
+     mask |= (abs(p2 - p1) > limit);
+     mask |= (abs(p1 - p0) > limit);
+     mask |= (abs(q1 - q0) > limit);
+     mask |= (abs(q2 - q1) > limit);
+     mask |= (abs(q3 - q2) > limit);
+     mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > blimit);
+     return mask - 1;
+ }
+ 
+ /* is there high variance internal edge ( 11111111 yes, 00000000 no) */
+-static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
++static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
+ {
+     signed char hev = 0;
+     hev  |= (abs(p1 - p0) > thresh) * -1;
+     hev  |= (abs(q1 - q0) > thresh) * -1;
+     return hev;
+ }
+ 
+-static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
++static void vp8_filter(signed char mask, uc hev, uc *op1,
+         uc *op0, uc *oq0, uc *oq1)
+ 
+ {
+     signed char ps0, qs0;
+     signed char ps1, qs1;
+     signed char vp8_filter, Filter1, Filter2;
+     signed char u;
+ 
+@@ -153,17 +153,17 @@ void vp8_loop_filter_vertical_edge_c
+ 
+         vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
+ 
+         s += p;
+     }
+     while (++i < count * 8);
+ }
+ 
+-static __inline void vp8_mbfilter(signed char mask, uc hev,
++static void vp8_mbfilter(signed char mask, uc hev,
+                            uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
+ {
+     signed char s, u;
+     signed char vp8_filter, Filter1, Filter2;
+     signed char ps2 = (signed char) * op2 ^ 0x80;
+     signed char ps1 = (signed char) * op1 ^ 0x80;
+     signed char ps0 = (signed char) * op0 ^ 0x80;
+     signed char qs0 = (signed char) * oq0 ^ 0x80;
+@@ -274,27 +274,27 @@ void vp8_mbloop_filter_vertical_edge_c
+ 
+         s += p;
+     }
+     while (++i < count * 8);
+ 
+ }
+ 
+ /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+-static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
++static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
+ {
+ /* Why does this cause problems for win32?
+  * error C2143: syntax error : missing ';' before 'type'
+  *  (void) limit;
+  */
+     signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= blimit) * -1;
+     return mask;
+ }
+ 
+-static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
++static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
+ {
+     signed char vp8_filter, Filter1, Filter2;
+     signed char p1 = (signed char) * op1 ^ 0x80;
+     signed char p0 = (signed char) * op0 ^ 0x80;
+     signed char q0 = (signed char) * oq0 ^ 0x80;
+     signed char q1 = (signed char) * oq1 ^ 0x80;
+     signed char u;
+ 
+diff --git a/media/libvpx/vp8/common/onyx.h b/media/libvpx/vp8/common/onyx.h
+--- a/media/libvpx/vp8/common/onyx.h
++++ b/media/libvpx/vp8/common/onyx.h
+@@ -67,17 +67,17 @@ extern "C"
+     {
+         FRAMEFLAGS_KEY    = 1,
+         FRAMEFLAGS_GOLDEN = 2,
+         FRAMEFLAGS_ALTREF = 4,
+     } FRAMETYPE_FLAGS;
+ 
+ 
+ #include <assert.h>
+-    static __inline void Scale2Ratio(int mode, int *hr, int *hs)
++    static void Scale2Ratio(int mode, int *hr, int *hs)
+     {
+         switch (mode)
+         {
+         case    NORMAL:
+             *hr = 1;
+             *hs = 1;
+             break;
+         case    FOURFIVE:
+diff --git a/media/libvpx/vp8/encoder/encodemv.c b/media/libvpx/vp8/encoder/encodemv.c
+--- a/media/libvpx/vp8/encoder/encodemv.c
++++ b/media/libvpx/vp8/encoder/encodemv.c
+@@ -181,17 +181,17 @@ void vp8_build_component_cost_table(int 
+ 
+ // Motion vector probability table update depends on benefit.
+ // Small correction allows for the fact that an update to an MV probability
+ // may have benefit in subsequent frames as well as the current one.
+ 
+ #define MV_PROB_UPDATE_CORRECTION   -1
+ 
+ 
+-__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2])
++static void calc_prob(vp8_prob *p, const unsigned int ct[2])
+ {
+     const unsigned int tot = ct[0] + ct[1];
+ 
+     if (tot)
+     {
+         const vp8_prob x = ((ct[0] * 255) / tot) & -2;
+         *p = x ? x : 1;
+     }
+diff --git a/media/libvpx/vp8/encoder/rdopt.c b/media/libvpx/vp8/encoder/rdopt.c
+--- a/media/libvpx/vp8/encoder/rdopt.c
++++ b/media/libvpx/vp8/encoder/rdopt.c
+@@ -1326,17 +1326,17 @@ static void rd_check_segment(VP8_COMP *c
+         {
+             bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
+             bsi->modes[i] = x->partition_info->bmi[i].mode;
+             bsi->eobs[i] = x->e_mbd.eobs[i];
+         }
+     }
+ }
+ 
+-static __inline
++static
+ void vp8_cal_step_param(int sr, int *sp)
+ {
+     int step = 0;
+ 
+     if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
+     else if (sr < 1) sr = 1;
+ 
+     while (sr>>=1)
+diff --git a/media/libvpx/vp8/encoder/sad_c.c b/media/libvpx/vp8/encoder/sad_c.c
+--- a/media/libvpx/vp8/encoder/sad_c.c
++++ b/media/libvpx/vp8/encoder/sad_c.c
+@@ -34,17 +34,17 @@ unsigned int vp8_sad16x16_c(
+         src_ptr += src_stride;
+         ref_ptr += ref_stride;
+     }
+ 
+     return sad;
+ }
+ 
+ 
+-static __inline
++static
+ unsigned int sad_mx_n_c(
+     const unsigned char *src_ptr,
+     int  src_stride,
+     const unsigned char *ref_ptr,
+     int  ref_stride,
+     int m,
+     int n)
+ {
+diff --git a/media/libvpx/vp8/encoder/tokenize.c b/media/libvpx/vp8/encoder/tokenize.c
+--- a/media/libvpx/vp8/encoder/tokenize.c
++++ b/media/libvpx/vp8/encoder/tokenize.c
+@@ -480,17 +480,17 @@ void print_context_counters()
+ 
+ 
+ void vp8_tokenize_initialize()
+ {
+     fill_value_tokens();
+ }
+ 
+ 
+-static __inline void stuff2nd_order_b
++static void stuff2nd_order_b
+ (
+     TOKENEXTRA **tp,
+     ENTROPY_CONTEXT *a,
+     ENTROPY_CONTEXT *l,
+     VP8_COMP *cpi
+ )
+ {
+     int pt; /* near block/prev token context index */
+@@ -504,17 +504,17 @@ static __inline void stuff2nd_order_b
+     ++t;
+ 
+     *tp = t;
+     pt = 0;
+     *a = *l = pt;
+ 
+ }
+ 
+-static __inline void stuff1st_order_b
++static void stuff1st_order_b
+ (
+     TOKENEXTRA **tp,
+     ENTROPY_CONTEXT *a,
+     ENTROPY_CONTEXT *l,
+     int type,
+     VP8_COMP *cpi
+ )
+ {
+@@ -528,17 +528,17 @@ static __inline void stuff1st_order_b
+     t->skip_eob_node = 0;
+     ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
+     ++t;
+     *tp = t;
+     pt = 0; /* 0 <-> all coeff data is zero */
+     *a = *l = pt;
+ 
+ }
+-static __inline
++static
+ void stuff1st_order_buv
+ (
+     TOKENEXTRA **tp,
+     ENTROPY_CONTEXT *a,
+     ENTROPY_CONTEXT *l,
+     VP8_COMP *cpi
+ )
+ {
+diff --git a/media/libvpx/vp8/encoder/treewriter.h b/media/libvpx/vp8/encoder/treewriter.h
+--- a/media/libvpx/vp8/encoder/treewriter.h
++++ b/media/libvpx/vp8/encoder/treewriter.h
+@@ -37,28 +37,28 @@ typedef BOOL_CODER vp8_writer;
+ 
+ #define vp8_cost_bit( x, b) vp8_cost_zero( (b)?  vp8_complement(x) : (x) )
+ 
+ /* VP8BC version is scaled by 2^20 rather than 2^8; see bool_coder.h */
+ 
+ 
+ /* Both of these return bits, not scaled bits. */
+ 
+-static __inline unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p)
++static unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p)
+ {
+     /* Imitate existing calculation */
+ 
+     return ((ct[0] * vp8_cost_zero(p))
+             + (ct[1] * vp8_cost_one(p))) >> 8;
+ }
+ 
+ /* Small functions to write explicit values and tokens, as well as
+    estimate their lengths. */
+ 
+-static __inline void vp8_treed_write
++static void vp8_treed_write
+ (
+     vp8_writer *const w,
+     vp8_tree t,
+     const vp8_prob *const p,
+     int v,
+     int n               /* number of bits in v, assumed nonzero */
+ )
+ {
+@@ -67,28 +67,28 @@ static __inline void vp8_treed_write
+     do
+     {
+         const int b = (v >> --n) & 1;
+         vp8_write(w, b, p[i>>1]);
+         i = t[i+b];
+     }
+     while (n);
+ }
+-static __inline void vp8_write_token
++static void vp8_write_token
+ (
+     vp8_writer *const w,
+     vp8_tree t,
+     const vp8_prob *const p,
+     vp8_token *const x
+ )
+ {
+     vp8_treed_write(w, t, p, x->value, x->Len);
+ }
+ 
+-static __inline int vp8_treed_cost(
++static int vp8_treed_cost(
+     vp8_tree t,
+     const vp8_prob *const p,
+     int v,
+     int n               /* number of bits in v, assumed nonzero */
+ )
+ {
+     int c = 0;
+     vp8_tree_index i = 0;
+@@ -98,17 +98,17 @@ static __inline int vp8_treed_cost(
+         const int b = (v >> --n) & 1;
+         c += vp8_cost_bit(p[i>>1], b);
+         i = t[i+b];
+     }
+     while (n);
+ 
+     return c;
+ }
+-static __inline int vp8_cost_token
++static int vp8_cost_token
+ (
+     vp8_tree t,
+     const vp8_prob *const p,
+     vp8_token *const x
+ )
+ {
+     return vp8_treed_cost(t, p, x->value, x->Len);
+ }
new file mode 100644
--- /dev/null
+++ b/media/libvpx/I8a35831e.patch
@@ -0,0 +1,29 @@
+# HG changeset patch
+# Parent 919fe109d4a2e1768470d78c59d5b8ecc1a34a53
+# User Timothy B. Terriberry <tterribe@vt.edu>
+fix potential use of uninitialized rate_y
+
+Upstream Change-Id: I8a35831e8f08b549806d0c2c6900d42af883f78f
+
+diff --git a/media/libvpx/vp8/encoder/rdopt.c b/media/libvpx/vp8/encoder/rdopt.c
+--- a/media/libvpx/vp8/encoder/rdopt.c
++++ b/media/libvpx/vp8/encoder/rdopt.c
+@@ -742,17 +742,17 @@ static int rd_pick_intra4x4mby_modes(VP8
+         if(total_rd >= (int64_t)best_rd)
+             break;
+     }
+ 
+     if(total_rd >= (int64_t)best_rd)
+         return INT_MAX;
+ 
+     *Rate = cost;
+-    *rate_y += tot_rate_y;
++    *rate_y = tot_rate_y;
+     *Distortion = distortion;
+ 
+     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
+ }
+ 
+ 
+ static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
+                                       MACROBLOCK *x,
new file mode 100644
--- /dev/null
+++ b/media/libvpx/I9713c9f0.patch
@@ -0,0 +1,33 @@
+# HG changeset patch
+# Parent d0bb669a2e26f138d416333b21f55151503dcdf9
+# User Timothy B. Terriberry <tterribe@vt.edu>
+decoder: reset segmentation map on keyframes
+
+Upstream Change-Id: I9713c9f070eb37b31b3b029d9ef96be9b6ea2def
+
+diff --git a/media/libvpx/vp8/decoder/decodframe.c b/media/libvpx/vp8/decoder/decodframe.c
+--- a/media/libvpx/vp8/decoder/decodframe.c
++++ b/media/libvpx/vp8/decoder/decodframe.c
+@@ -830,16 +830,22 @@ int vp8_decode_frame(VP8D_COMP *pbi)
+             for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
+             {
+                 /* If not explicitly set value is defaulted to 255 by memset above */
+                 if (vp8_read_bit(bc))
+                     xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8);
+             }
+         }
+     }
++    else
++    {
++        /* No segmentation updates on this frame */
++        xd->update_mb_segmentation_map = 0;
++        xd->update_mb_segmentation_data = 0;
++    }
+ 
+     /* Read the loop filter level and type */
+     pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc);
+     pc->filter_level = vp8_read_literal(bc, 6);
+     pc->sharpness_level = vp8_read_literal(bc, 3);
+ 
+     /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */
+     xd->mode_ref_lf_delta_update = 0;
--- a/media/libvpx/LICENSE
+++ b/media/libvpx/LICENSE
@@ -1,25 +1,26 @@
-Copyright (c) 2010, Google Inc. All rights reserved.
+Copyright (c) 2010, The WebM Project authors. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met:
 
   * Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.
 
   * Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in
     the documentation and/or other materials provided with the
     distribution.
 
-  * Neither the name of Google nor the names of its contributors may
-    be used to endorse or promote products derived from this software
-    without specific prior written permission.
+  * Neither the name of Google, nor the WebM Project, nor the names
+    of its contributors may be used to endorse or promote products
+    derived from this software without specific prior written
+    permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
--- a/media/libvpx/Makefile.in
+++ b/media/libvpx/Makefile.in
@@ -72,16 +72,17 @@ VPATH += \
   $(srcdir)/build/make \
   $(srcdir)/vpx \
   $(srcdir)/vpx/src \
   $(srcdir)/vpx_mem \
   $(srcdir)/vpx_mem/include \
   $(srcdir)/vpx_ports \
   $(srcdir)/vpx_scale \
   $(srcdir)/vpx_scale/arm \
+  $(srcdir)/vpx_scale/arm/neon \
   $(srcdir)/vpx_scale/generic \
   $(srcdir)/vp8 \
   $(srcdir)/vp8/common \
   $(srcdir)/vp8/common/arm \
   $(srcdir)/vp8/common/arm/armv6 \
   $(srcdir)/vp8/common/arm/neon \
   $(srcdir)/vp8/common/generic \
   $(srcdir)/vp8/common/x86 \
@@ -110,69 +111,66 @@ EXPORTS_vpx = \
   vpx_codec_impl_bottom.h \
   vpx_codec_impl_top.h \
   vpx_decoder.h \
   vpx_decoder_compat.h \
   vpx_encoder.h\
   vpx_image.h \
   vpx_mem_intrnl.h \
   vpx_mem.h \
-  config.h \
   mem.h \
   vpx_integer.h \
   vpx_timer.h \
   arm.h \
   x86.h \
   scale_mode.h \
   vpxscale.h \
   yv12config.h \
   yv12extend.h \
   $(NULL)
 
 CSRCS += \
   vpx_config_c.c \
-  systemdependent.c \
+  vp8_dx_iface.c \
   alloccommon.c \
   blockd.c \
   debugmodes.c \
-  defaultcoefcounts.c \
-  dsystemdependent.c \
+  dequantize.c \
   entropy.c \
   entropymode.c \
   entropymv.c \
   extend.c \
   filter.c \
   findnearmv.c \
+  idct_blk.c \
   idctllm.c \
-  invtrans.c \
   loopfilter.c \
   loopfilter_filters.c \
   mbpitch.c \
   modecont.c \
   modecontext.c \
   postproc.c \
   quant_common.c \
-  recon.c \
   reconinter.c \
   reconintra.c \
   reconintra4x4.c \
+  sad_c.c \
   setupintrarecon.c \
   swapyv12buffer.c \
-  textblit.c \
   treecoder.c \
+  variance_c.c \
+  systemdependent.c \
   dboolhuff.c \
   decodemv.c \
   decodframe.c \
-  dequantize.c \
   detokenize.c \
   reconintra_mt.c \
-  idct_blk.c \
   onyxd_if.c \
   threading.c \
-  vp8_dx_iface.c \
+  dsystemdependent.c \
   vpx_codec.c \
   vpx_decoder.c \
   vpx_decoder_compat.c \
   vpx_encoder.c \
   vpx_image.c \
   vpx_mem.c \
   gen_scalers.c \
   vpxscale.c \
@@ -198,338 +196,352 @@ VPATH += \
   $(srcdir)/vp8/encoder/x86 \
   $(srcdir)/vpx_scale/arm \
   $(srcdir)/vpx_scale/arm/neon \
   $(NULL)
 
 CSRCS += \
   vp8_cx_iface.c \
   bitstream.c \
-  boolhuff.c \
   dct.c \
   encodeframe.c \
   encodeintra.c \
   encodemb.c \
   encodemv.c \
   ethreading.c \
   firstpass.c \
   lookahead.c \
   mcomp.c \
   modecosts.c \
+  mr_dissim.c \
   onyx_if.c \
+  pickinter.c \
   picklpf.c \
-  pickinter.c \
   psnr.c \
   quantize.c \
   ratectrl.c \
   rdopt.c \
-  sad_c.c \
   segmentation.c \
   temporal_filter.c \
   tokenize.c \
   treewriter.c \
-  variance_c.c \
   csystemdependent.c \
   $(NULL)
 endif
 
 ifdef VPX_X86_ASM
 # Building on an x86 platform with a supported assembler, include
 # the optimized assembly in the build.
 
 CSRCS += \
+  filter_x86.c \
   idct_blk_mmx.c \
   idct_blk_sse2.c \
   loopfilter_x86.c \
+  postproc_x86.c \
   recon_wrapper_sse2.c \
+  variance_mmx.c \
+  variance_sse2.c \
+  variance_ssse3.c \
   vp8_asm_stubs.c \
   x86_systemdependent.c \
   x86_dsystemdependent.c \
   $(NULL)
 
 ASFILES += \
+  dequantize_mmx.asm \
   idctllm_mmx.asm \
   idctllm_sse2.asm \
   iwalsh_mmx.asm \
   iwalsh_sse2.asm \
   loopfilter_mmx.asm \
   loopfilter_sse2.asm \
   postproc_mmx.asm \
   postproc_sse2.asm \
   recon_mmx.asm \
   recon_sse2.asm \
+  sad_mmx.asm \
+  sad_sse2.asm \
+  sad_sse3.asm \
+  sad_sse4.asm \
+  sad_ssse3.asm \
   subpixel_mmx.asm \
   subpixel_sse2.asm \
   subpixel_ssse3.asm \
-  dequantize_mmx.asm \
+  variance_impl_mmx.asm \
+  variance_impl_sse2.asm \
+  variance_impl_ssse3.asm \
   emms.asm \
   $(NULL)
 
+ifeq (64,$(findstring 64,$(OS_TEST)))
+ASFILES += loopfilter_block_sse2.asm
+endif
+
 ifdef MOZ_VP8_ENCODER
 
 CSRCS += \
-  variance_mmx.c \
-  variance_sse2.c \
-  variance_ssse3.c \
   x86_csystemdependent.c \
   $(NULL)
 
+# Files which depend on asm_enc_offsets.asm
+VPX_ASM_ENC_OFFSETS_SRCS = \
+  quantize_sse2.asm \
+  quantize_sse4.asm \
+  quantize_ssse3.asm \
+  $(NULL)
+
 ASFILES += \
   dct_mmx.asm \
   dct_sse2.asm \
   encodeopt.asm \
   fwalsh_sse2.asm \
   quantize_mmx.asm \
-  quantize_sse2.asm \
-  quantize_ssse3.asm \
-  quantize_sse4.asm \
-  sad_mmx.asm \
-  sad_sse2.asm \
-  sad_sse3.asm \
-  sad_ssse3.asm \
-  sad_sse4.asm \
   subtract_mmx.asm \
   subtract_sse2.asm \
   temporal_filter_apply_sse2.asm \
-  variance_impl_mmx.asm \
-  variance_impl_sse2.asm \
-  variance_impl_ssse3.asm \
-  $(NULL)
-
-# Files which depend on asm_enc_offsets.asm
-VPX_ASM_ENC_OFFSETS_SRCS = \
-  quantize_sse2.asm \
-  quantize_ssse3.asm \
-  quantize_sse4.asm \
+  $(VPX_ASM_ENC_OFFSETS_SRCS) \
   $(NULL)
 
 endif
 
 endif
 
 ifdef VPX_ARM_ASM
 # Building on an ARM platform with a supported assembler, include
 # the optimized assembly in the build.
 
-# The Android NDK doesn't pre-define anything to indicate the OS it's on, so
-# do it for them.
 ifeq ($(OS_TARGET),Android)
+
+# Older versions of the Android NDK don't pre-define anything to indicate the
+# OS they're on, so do it for them.
 DEFINES += -D__linux__
+
+# For cpu-features.h
+LOCAL_INCLUDES += -I$(ANDROID_NDK)/sources/android/cpufeatures
+# For cpu-features.c
+VPATH += $(ANDROID_NDK)/sources/android/cpufeatures
+
+CSRCS += cpu-features.c
+
 endif
 
 CSRCS += \
   arm_cpudetect.c \
   arm_systemdependent.c \
   bilinearfilter_arm.c \
+  dequantize_arm.c \
   filter_arm.c \
   loopfilter_arm.c \
   reconintra_arm.c \
-  arm_dsystemdependent.c \
-  dequantize_arm.c \
+  variance_arm.c \
   idct_blk_v6.c \
   idct_blk_neon.c \
-  recon_neon.c \
+  arm_dsystemdependent.c \
+  arm_scalesystemdependent.c \
+  yv12extend_arm.c \
+  $(NULL)
+
+# Files which depend on asm_com_offsets.asm
+VPX_ASM_COM_OFFSETS_SRCS = \
+  vp8_vpxyv12_copy_y_neon.asm \
+  vp8_vpxyv12_copyframe_func_neon.asm \
+  vp8_vpxyv12_extendframeborders_neon.asm \
   $(NULL)
 
 VPX_ASFILES = \
   bilinearfilter_v6.asm \
+  copymem16x16_v6.asm \
   copymem8x4_v6.asm \
   copymem8x8_v6.asm \
-  copymem16x16_v6.asm \
   dc_only_idct_add_v6.asm \
-  iwalsh_v6.asm \
+  dequant_idct_v6.asm \
+  dequantize_v6.asm \
   filter_v6.asm \
   idct_v6.asm \
+  intra4x4_predict_v6.asm \
+  iwalsh_v6.asm \
   loopfilter_v6.asm \
-  recon_v6.asm \
   simpleloopfilter_v6.asm \
   sixtappredict8x4_v6.asm \
+  bilinearpredict16x16_neon.asm \
   bilinearpredict4x4_neon.asm \
   bilinearpredict8x4_neon.asm \
   bilinearpredict8x8_neon.asm \
-  bilinearpredict16x16_neon.asm \
+  buildintrapredictorsmby_neon.asm \
+  copymem16x16_neon.asm \
   copymem8x4_neon.asm \
   copymem8x8_neon.asm \
-  copymem16x16_neon.asm \
   dc_only_idct_add_neon.asm \
+  dequant_idct_neon.asm \
+  dequantizeb_neon.asm \
+  idct_dequant_0_2x_neon.asm \
+  idct_dequant_full_2x_neon.asm \
   iwalsh_neon.asm \
   loopfilter_neon.asm \
   loopfiltersimplehorizontaledge_neon.asm \
   loopfiltersimpleverticaledge_neon.asm \
   mbloopfilter_neon.asm \
-  recon2b_neon.asm \
-  recon4b_neon.asm \
-  reconb_neon.asm \
-  shortidct4x4llm_1_neon.asm \
+  save_neon_reg.asm \
+  vp8_mse16x16_armv6.asm \
+  vp8_sad16x16_armv6.asm \
+  vp8_variance16x16_armv6.asm \
+  vp8_variance8x8_armv6.asm \
+  vp8_variance_halfpixvar16x16_h_armv6.asm \
+  vp8_variance_halfpixvar16x16_hv_armv6.asm \
+  vp8_variance_halfpixvar16x16_v_armv6.asm \
   shortidct4x4llm_neon.asm \
+  sad16_neon.asm \
+  sad8_neon.asm \
+  sixtappredict16x16_neon.asm \
   sixtappredict4x4_neon.asm \
   sixtappredict8x4_neon.asm \
   sixtappredict8x8_neon.asm \
-  sixtappredict16x16_neon.asm \
-  recon16x16mb_neon.asm \
-  buildintrapredictorsmby_neon.asm \
-  save_neon_reg.asm \
-  dequant_dc_idct_v6.asm \
-  dequant_idct_v6.asm \
-  dequantize_v6.asm \
-  idct_dequant_dc_full_2x_neon.asm \
-  idct_dequant_dc_0_2x_neon.asm \
-  dequant_idct_neon.asm \
-  idct_dequant_full_2x_neon.asm \
-  idct_dequant_0_2x_neon.asm \
-  dequantizeb_neon.asm \
+  variance_neon.asm \
+  vp8_mse16x16_neon.asm \
+  vp8_subpixelvariance8x8_neon.asm \
+  vp8_subpixelvariance16x16_neon.asm \
+  vp8_subpixelvariance16x16s_neon.asm \
+  $(VPX_ASM_COM_OFFSETS_SRCS) \
   $(NULL)
 
 ifdef MOZ_VP8_ENCODER
 CSRCS += \
   arm_csystemdependent.c \
+  boolhuff_arm.c \
   dct_arm.c \
   picklpf_arm.c \
-  variance_arm.c \
-  $(NULL)
-
-VPX_ASFILES += \
-  vp8_packtokens_armv5.asm \
-  vp8_packtokens_mbrow_armv5.asm \
-  vp8_packtokens_partitions_armv5.asm \
-  vp8_fast_fdct4x4_armv6.asm \
-  vp8_fast_quantize_b_armv6.asm \
-  vp8_mse16x16_armv6.asm \
-  vp8_sad16x16_armv6.asm \
-  vp8_subtract_armv6.asm \
-  vp8_variance16x16_armv6.asm \
-  vp8_variance8x8_armv6.asm \
-  vp8_variance_halfpixvar16x16_h_armv6.asm \
-  vp8_variance_halfpixvar16x16_hv_armv6.asm \
-  vp8_variance_halfpixvar16x16_v_armv6.asm \
-  walsh_v6.asm \
-  fastfdct4x4_neon.asm \
-  fastfdct8x4_neon.asm \
-  fastquantizeb_neon.asm \
-  sad16_neon.asm \
-  sad8_neon.asm \
-  shortfdct_neon.asm \
-  subtract_neon.asm \
-  variance_neon.asm \
-  vp8_memcpy_neon.asm \
-  vp8_mse16x16_neon.asm \
-  vp8_shortwalsh4x4_neon.asm \
-  vp8_subpixelvariance8x8_neon.asm \
-  vp8_subpixelvariance16x16_neon.asm \
-  vp8_subpixelvariance16x16s_neon.asm \
-  vp8_vpxyv12_copyframeyonly_neon.asm \
-  $(NULL)
-
-# Files which depend on asm_com_offsets.asm
-VPX_ASM_COM_OFFSETS_SRCS = \
-  vp8_vpxyv12_copyframeyonly_neon.asm \
   $(NULL)
 
 # Files which depend on asm_enc_offsets.asm
 VPX_ASM_ENC_OFFSETS_SRCS = \
+  boolhuff_armv5te.asm \
   vp8_packtokens_armv5.asm \
   vp8_packtokens_mbrow_armv5.asm \
   vp8_packtokens_partitions_armv5.asm \
   vp8_fast_quantize_b_armv6.asm \
   vp8_subtract_armv6.asm \
   fastquantizeb_neon.asm \
   subtract_neon.asm \
   $(NULL)
 
+VPX_ASFILES += \
+  vp8_short_fdct4x4_armv6.asm \
+  walsh_v6.asm \
+  shortfdct_neon.asm \
+  vp8_memcpy_neon.asm \
+  vp8_shortwalsh4x4_neon.asm \
+  $(VPX_ASM_ENC_OFFSETS_SRCS) \
+  $(NULL)
+
 endif
 
 ifdef VPX_AS_CONVERSION
 # The ARM asm is written in ARM RVCT syntax, but we actually build it with
 # gas using GNU syntax. Add some rules to perform the conversion.
 VPX_CONVERTED_ASFILES = $(addsuffix .$(ASM_SUFFIX), $(VPX_ASFILES))
 
 ASFILES += $(VPX_CONVERTED_ASFILES)
 GARBAGE += $(VPX_CONVERTED_ASFILES)
 
 %.asm.$(ASM_SUFFIX): %.asm
 	$(VPX_AS_CONVERSION) < $< > $@
 else
 ASFILES += $(VPX_ASFILES)
 endif
 
+else
+ifdef MOZ_VP8_ENCODER
+# boolhuff_armv5te.asm defines the same functions as boolhuff.c instead of
+# using RTCD, so we have to make sure we only add one of the two.
+CSRCS += boolhuff.c
+endif
 endif
 
-ifdef MOZ_VP8_ENCODER
 ifdef VPX_NEED_OBJ_INT_EXTRACT
 
 # We don't have a compiler that supports a compatible inline asm syntax, so we
 # have to resort to extracting asm offsets from a compiled object. This only
 # works if we have the appropriate system headers obj_int_extract needs to
 # parse that format, and so only has limited support for cross-compilation.
 
 ifdef VPX_ARM_ASM
 VPX_OIE_FORMAT := rvds
 else
 VPX_OIE_FORMAT := gas
 endif
 
 HOST_CSRCS = obj_int_extract.c
 HOST_PROGRAM = host_obj_int_extract$(HOST_BIN_SUFFIX)
 
-GARBAGE += \
-  asm_com_offsets.$(OBJ_SUFFIX) asm_com_offsets.asm \
-  asm_enc_offsets.$(OBJ_SUFFIX) asm_enc_offsets.asm \
-  $(NULL)
+GARBAGE += asm_com_offsets.$(OBJ_SUFFIX) asm_com_offsets.asm
+
+ifdef MOZ_VP8_ENCODER
+GARBAGE += asm_enc_offsets.$(OBJ_SUFFIX) asm_enc_offsets.asm
+endif
 
 else
 
 # We can extract the asm offsets directly from generated assembly using inline
 # asm. This is the preferred method.
 
 asm_com_offsets.s: CFLAGS += -DINLINE_ASM
-asm_enc_offsets.s: CFLAGS += -DINLINE_ASM
+
+OFFSET_PATTERN := '^[a-zA-Z0-9_]* EQU'
 
 asm_com_offsets.asm: asm_com_offsets.s
-	grep \\\<EQU\\\> $< | sed -e 's/[$$\#]//g' \
+	grep $(OFFSET_PATTERN) $< | sed -e 's/[$$\#]//g' \
 	    $(if $(VPX_AS_CONVERSION),| $(VPX_AS_CONVERSION)) > $@
 
+GARBAGE += asm_com_offsets.s asm_com_offsets.asm
+
+ifdef MOZ_VP8_ENCODER
+
+asm_enc_offsets.s: CFLAGS += -DINLINE_ASM
+
 asm_enc_offsets.asm: asm_enc_offsets.s
-	grep \\\<EQU\\\> $< | sed -e 's/[$$\#]//g' \
+	grep $(OFFSET_PATTERN) $< | sed -e 's/[$$\#]//g' \
 	    $(if $(VPX_AS_CONVERSION),| $(VPX_AS_CONVERSION)) > $@
 
-GARBAGE += \
-  asm_com_offsets.s asm_com_offsets.asm \
-  asm_enc_offsets.s asm_enc_offsets.asm \
-  $(NULL)
+GARBAGE += asm_enc_offsets.s asm_enc_offsets.asm
 
 endif
+
 endif
 
 include $(topsrcdir)/config/rules.mk
 
 # This must be after rules.mk in order to use $(OBJ_SUFFIX) outside a
 # recursively-expanded variable.
-ifdef MOZ_VP8_ENCODER
-
 ifdef VPX_NEED_OBJ_INT_EXTRACT
 
 asm_com_offsets.asm: asm_com_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
 	./$(HOST_PROGRAM) $(VPX_OIE_FORMAT) $< \
 	    $(if $(VPX_AS_CONVERSION),| $(VPX_AS_CONVERSION)) > $@
 
+ifdef MOZ_VP8_ENCODER
+
 asm_enc_offsets.asm: asm_enc_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
 	./$(HOST_PROGRAM) $(VPX_OIE_FORMAT) $< \
 	    $(if $(VPX_AS_CONVERSION),| $(VPX_AS_CONVERSION)) > $@
 
 endif
+endif
 
 # These dependencies are not generated automatically, so do it manually.
 ifdef VPX_AS_CONVERSION
 $(addsuffix .$(OBJ_SUFFIX), $(VPX_ASM_COM_OFFSETS_SRCS)): asm_com_offsets.asm
+ifdef MOZ_VP8_ENCODER
 $(addsuffix .$(OBJ_SUFFIX), $(VPX_ASM_ENC_OFFSETS_SRCS)): asm_enc_offsets.asm
+endif
 else
 $(patsubst %.$(ASM_SUFFIX),%.$(OBJ_SUFFIX), $(VPX_ASM_COM_OFFSETS_SRCS)): asm_com_offsets.asm
+ifdef MOZ_VP8_ENCODER
 $(patsubst %.$(ASM_SUFFIX),%.$(OBJ_SUFFIX), $(VPX_ASM_ENC_OFFSETS_SRCS)): asm_enc_offsets.asm
 endif
-
 endif
 
 # Workaround a bug of Sun Studio (CR 6963410)
 ifdef SOLARIS_SUNPRO_CC
 ifeq (86,$(findstring 86,$(OS_TEST)))
 filter.o: filter.c Makefile.in
 	$(REPORT_BUILD)
 	@$(MAKE_DEPS_AUTO_CC)
old mode 100644
new mode 100755
--- a/media/libvpx/build/make/ads2gas.pl
+++ b/media/libvpx/build/make/ads2gas.pl
@@ -121,25 +121,27 @@ while (<STDIN>)
     # Labels need trailing colon
 #   s/^(\w+)/$1:/ if !/EQU/;
     # put the colon at the end of the line in the macro
     s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;
 
     # ALIGN directive
     s/ALIGN/.balign/g;
 
-    # Strip ARM
-    s/\sARM/@ ARM/g;
+    # ARM code
+    s/\sARM/.arm/g;
+
+    # eabi_attributes numerical equivalents can be found in the
+    # "ARM IHI 0045C" document.
 
-    # Strip REQUIRE8
-    #s/\sREQUIRE8/@ REQUIRE8/g;
-    s/\sREQUIRE8/@ /g;      #EQU cause problem
+    # REQUIRE8 Stack is required to be 8-byte aligned
+    s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g;
 
-    # Strip PRESERVE8
-    s/\sPRESERVE8/@ PRESERVE8/g;
+    # PRESERVE8 Stack 8-byte align is preserved
+    s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g;
 
     # Use PROC and ENDP to give the symbols a .size directive.
     # This makes them show up properly in debugging tools like gdb and valgrind.
     if (/\bPROC\b/)
     {
         my $proc;
         /^_([\.0-9A-Z_a-z]\w+)\b/;
         $proc = $1;
--- a/media/libvpx/compile_errors.patch
+++ b/media/libvpx/compile_errors.patch
@@ -1,12 +1,69 @@
+# HG changeset patch
+# Parent f73b7e96c0735448035ec4786f37dcf7d7644e00
+# User Timothy B. Terriberry <tterribe@vt.edu>
+Patch to fix errors including C headers in C++
+
+diff --git a/media/libvpx/vp8/common/onyx.h b/media/libvpx/vp8/common/onyx.h
+--- a/media/libvpx/vp8/common/onyx.h
++++ b/media/libvpx/vp8/common/onyx.h
+@@ -55,24 +55,24 @@ extern "C"
+ 
+     typedef enum
+     {
+         MODE_REALTIME       = 0x0,
+         MODE_GOODQUALITY    = 0x1,
+         MODE_BESTQUALITY    = 0x2,
+         MODE_FIRSTPASS      = 0x3,
+         MODE_SECONDPASS     = 0x4,
+-        MODE_SECONDPASS_BEST = 0x5,
++        MODE_SECONDPASS_BEST = 0x5
+     } MODE;
+ 
+     typedef enum
+     {
+         FRAMEFLAGS_KEY    = 1,
+         FRAMEFLAGS_GOLDEN = 2,
+-        FRAMEFLAGS_ALTREF = 4,
++        FRAMEFLAGS_ALTREF = 4
+     } FRAMETYPE_FLAGS;
+ 
+ 
+ #include <assert.h>
+     static void Scale2Ratio(int mode, int *hr, int *hs)
+     {
+         switch (mode)
+         {
+diff --git a/media/libvpx/vp8/encoder/onyx_int.h b/media/libvpx/vp8/encoder/onyx_int.h
+--- a/media/libvpx/vp8/encoder/onyx_int.h
++++ b/media/libvpx/vp8/encoder/onyx_int.h
+@@ -158,17 +158,17 @@ typedef enum
+     THR_NEW1           = 13,
+     THR_NEW2           = 14,
+     THR_NEW3           = 15,
+ 
+     THR_SPLIT1         = 16,
+     THR_SPLIT2         = 17,
+     THR_SPLIT3         = 18,
+ 
+-    THR_B_PRED         = 19,
++    THR_B_PRED         = 19
+ }
+ THR_MODES;
+ 
+ typedef enum
+ {
+     DIAMOND = 0,
+     NSTEP = 1,
+     HEX = 2
 diff --git a/media/libvpx/vpx/vp8.h b/media/libvpx/vpx/vp8.h
 --- a/media/libvpx/vpx/vp8.h
 +++ b/media/libvpx/vpx/vp8.h
-@@ -41,33 +41,33 @@ enum vp8_com_control_id
+@@ -41,34 +41,34 @@ enum vp8_com_control_id
      VP8_SET_REFERENCE           = 1,    /**< pass in an external frame into decoder to be used as reference frame */
      VP8_COPY_REFERENCE          = 2,    /**< get a copy of reference frame from the decoder */
      VP8_SET_POSTPROC            = 3,    /**< set the decoder's post processing settings  */
      VP8_SET_DBG_COLOR_REF_FRAME = 4,    /**< set the reference frames to color for each macroblock */
      VP8_SET_DBG_COLOR_MB_MODES  = 5,    /**< set which macro block modes to color */
      VP8_SET_DBG_COLOR_B_MODES   = 6,    /**< set which blocks modes to color */
      VP8_SET_DBG_DISPLAY_MV      = 7,    /**< set which motion vector modes to draw */
      VP8_COMMON_CTRL_ID_MAX,
@@ -22,18 +79,60 @@ diff --git a/media/libvpx/vpx/vp8.h b/me
  {
      VP8_NOFILTERING             = 0,
      VP8_DEBLOCK                 = 1<<0,
      VP8_DEMACROBLOCK            = 1<<1,
      VP8_ADDNOISE                = 1<<2,
      VP8_DEBUG_TXT_FRAME_INFO    = 1<<3, /**< print frame information */
      VP8_DEBUG_TXT_MBLK_MODES    = 1<<4, /**< print macro block modes over each macro block */
      VP8_DEBUG_TXT_DC_DIFF       = 1<<5, /**< print dc diff for each macro block */
--    VP8_DEBUG_TXT_RATE_INFO     = 1<<6, /**< print video rate info (encoder only) */
-+    VP8_DEBUG_TXT_RATE_INFO     = 1<<6  /**< print video rate info (encoder only) */
+     VP8_DEBUG_TXT_RATE_INFO     = 1<<6, /**< print video rate info (encoder only) */
+-    VP8_MFQE                    = 1<<10,
++    VP8_MFQE                    = 1<<10
  };
  
  /*!\brief post process flags
   *
   * This define a structure that describe the post processing settings. For
   * the best objective measure (using the PSNR metric) set post_proc_flag
   * to VP8_DEBLOCK and deblocking_level to 1.
   */
+diff --git a/media/libvpx/vpx/vp8cx.h b/media/libvpx/vpx/vp8cx.h
+--- a/media/libvpx/vpx/vp8cx.h
++++ b/media/libvpx/vpx/vp8cx.h
+@@ -172,17 +172,17 @@ enum vp8e_enc_control_id
+      * per-frame bitrate, with the special (and default) value 0 meaning
+      * unlimited, or no additional clamping beyond the codec's built-in
+      * algorithm.
+      *
+      * For example, to allocate no more than 4.5 frames worth of bitrate
+      * to a keyframe, set this to 450.
+      *
+      */
+-    VP8E_SET_MAX_INTRA_BITRATE_PCT,
++    VP8E_SET_MAX_INTRA_BITRATE_PCT
+ };
+ 
+ /*!\brief vpx 1-D scaling mode
+  *
+  * This set of constants define 1-D vpx scaling modes
+  */
+ typedef enum vpx_scaling_mode_1d
+ {
+@@ -253,17 +253,17 @@ typedef enum
+  *
+  */
+ 
+ typedef enum
+ {
+     VP8_ONE_TOKENPARTITION   = 0,
+     VP8_TWO_TOKENPARTITION   = 1,
+     VP8_FOUR_TOKENPARTITION  = 2,
+-    VP8_EIGHT_TOKENPARTITION = 3,
++    VP8_EIGHT_TOKENPARTITION = 3
+ } vp8e_token_partitions;
+ 
+ 
+ /*!\brief VP8 model tuning parameters
+  *
+  * Changes the encoder to tune for certain types of input material.
+  *
+  */
--- a/media/libvpx/solaris.patch
+++ b/media/libvpx/solaris.patch
@@ -1,26 +1,8 @@
-diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/common/loopfilter_filters.c
---- a/media/libvpx/vp8/common/loopfilter_filters.c
-+++ b/media/libvpx/vp8/common/loopfilter_filters.c
-@@ -11,10 +11,14 @@
- 
- #include <stdlib.h>
- #include "loopfilter.h"
- #include "onyxc_int.h"
- 
-+#ifdef __SUNPRO_C
-+#define __inline inline
-+#endif
-+
- typedef unsigned char uc;
- 
- static __inline signed char vp8_signed_char_clamp(int t)
- {
-     t = (t < -128 ? -128 : t);
 diff --git a/media/libvpx/vpx_ports/mem.h b/media/libvpx/vpx_ports/mem.h
 --- a/media/libvpx/vpx_ports/mem.h
 +++ b/media/libvpx/vpx_ports/mem.h
 @@ -9,17 +9,17 @@
   */
  
  
  #ifndef VPX_PORTS_MEM_H
@@ -36,17 +18,17 @@ diff --git a/media/libvpx/vpx_ports/mem.
  #else
  #warning No alignment directives known for this compiler.
  #define DECLARE_ALIGNED(n,typ,val)  typ val
  #endif
  #endif
 diff --git a/media/libvpx/vpx_ports/x86.h b/media/libvpx/vpx_ports/x86.h
 --- a/media/libvpx/vpx_ports/x86.h
 +++ b/media/libvpx/vpx_ports/x86.h
-@@ -45,16 +45,36 @@
+@@ -45,16 +45,36 @@ typedef enum
  #define cpuid(func,ax,bx,cx,dx)\
      __asm__ __volatile__ (\
                            "mov %%ebx, %%edi   \n\t" \
                            "cpuid              \n\t" \
                            "xchg %%edi, %%ebx  \n\t" \
                            : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
                            : "a" (func));
  #endif
@@ -73,17 +55,17 @@ diff --git a/media/libvpx/vpx_ports/x86.
  #else
  #if ARCH_X86_64
  void __cpuid(int CPUInfo[4], int info_type);
  #pragma intrinsic(__cpuid)
  #define cpuid(func,a,b,c,d) do{\
          int regs[4];\
          __cpuid(regs,func); a=regs[0];  b=regs[1];  c=regs[2];  d=regs[3];\
      } while(0)
-@@ -108,29 +128,36 @@ unsigned __int64 __rdtsc(void);
+@@ -131,29 +151,36 @@ unsigned __int64 __rdtsc(void);
  #endif
  static unsigned int
  x86_readtsc(void)
  {
  #if defined(__GNUC__) && __GNUC__
      unsigned int tsc;
      __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
      return tsc;
@@ -110,17 +92,17 @@ diff --git a/media/libvpx/vpx_ports/x86.
  #else
  #if ARCH_X86_64
  #define x86_pause_hint()\
      _mm_pause();
  #else
  #define x86_pause_hint()\
      __asm pause
  #endif
-@@ -144,16 +171,29 @@ x87_set_control_word(unsigned short mode
+@@ -167,16 +194,29 @@ x87_set_control_word(unsigned short mode
  }
  static unsigned short
  x87_get_control_word(void)
  {
      unsigned short mode;
      __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
      return mode;
  }
new file mode 100644
--- /dev/null
+++ b/media/libvpx/textrels.patch
@@ -0,0 +1,164 @@
+# HG changeset patch
+# Parent f7a8c8a419870421138438970a0514e79353ae34
+# User Timothy B. Terriberry <tterribe@vt.edu>
+Bug 730903 - Fix text relocations in libvpx variance functions.
+
+diff --git a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
+--- a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
++++ b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
+@@ -4,16 +4,21 @@
+ ;  Use of this source code is governed by a BSD-style license
+ ;  that can be found in the LICENSE file in the root of the source
+ ;  tree. An additional intellectual property rights grant can be found
+ ;  in the file PATENTS.  All contributing project authors may
+ ;  be found in the AUTHORS file in the root of the source tree.
+ ;
+ 
+ 
++bilinear_taps_coeff
++    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
++
++;-----------------
++
+     EXPORT  |vp8_sub_pixel_variance16x16_neon_func|
+     ARM
+     REQUIRE8
+     PRESERVE8
+ 
+     AREA ||.text||, CODE, READONLY, ALIGN=2
+ ; r0    unsigned char  *src_ptr,
+ ; r1    int  src_pixels_per_line,
+@@ -22,17 +27,17 @@
+ ; stack(r4) unsigned char *dst_ptr,
+ ; stack(r5) int dst_pixels_per_line,
+ ; stack(r6) unsigned int *sse
+ ;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon.
+ 
+ |vp8_sub_pixel_variance16x16_neon_func| PROC
+     push            {r4-r6, lr}
+ 
+-    ldr             r12, _BilinearTaps_coeff_
++    adr             r12, bilinear_taps_coeff
+     ldr             r4, [sp, #16]           ;load *dst_ptr from stack
+     ldr             r5, [sp, #20]           ;load dst_pixels_per_line from stack
+     ldr             r6, [sp, #24]           ;load *sse from stack
+ 
+     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+     beq             secondpass_bfilter16x16_only
+ 
+     add             r2, r12, r2, lsl #3     ;calculate filter location
+@@ -410,16 +415,9 @@ sub_pixel_variance16x16_neon_loop
+ 
+     add             sp, sp, #528
+     vmov.32         r0, d0[0]                   ;return
+ 
+     pop             {r4-r6,pc}
+ 
+     ENDP
+ 
+-;-----------------
+-
+-_BilinearTaps_coeff_
+-    DCD     bilinear_taps_coeff
+-bilinear_taps_coeff
+-    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
+-
+     END
+diff --git a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
+--- a/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
++++ b/media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
+@@ -22,17 +22,17 @@
+ ; stack(r4) unsigned char *dst_ptr,
+ ; stack(r5) int dst_pixels_per_line,
+ ; stack(r6) unsigned int *sse
+ ;note: most of the code is copied from bilinear_predict8x8_neon and vp8_variance8x8_neon.
+ 
+ |vp8_sub_pixel_variance8x8_neon| PROC
+     push            {r4-r5, lr}
+ 
+-    ldr             r12, _BilinearTaps_coeff_
++    adr             r12, bilinear_taps_coeff
+     ldr             r4, [sp, #12]           ;load *dst_ptr from stack
+     ldr             r5, [sp, #16]           ;load dst_pixels_per_line from stack
+     ldr             lr, [sp, #20]           ;load *sse from stack
+ 
+     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
+     beq             skip_firstpass_filter
+ 
+ ;First pass: output_height lines x output_width columns (9x8)
+@@ -211,14 +211,12 @@ sub_pixel_variance8x8_neon_loop
+ 
+     vmov.32         r0, d0[0]                   ;return
+     pop             {r4-r5, pc}
+ 
+     ENDP
+ 
+ ;-----------------
+ 
+-_BilinearTaps_coeff_
+-    DCD     bilinear_taps_coeff
+ bilinear_taps_coeff
+     DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
+ 
+     END
+diff --git a/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+--- a/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm
++++ b/media/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+@@ -93,17 +93,17 @@
+     vsub.s16        q10, q12            ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
+     vsub.s16        q11, q13
+ 
+     ldr             r6, [r3, #vp8_blockd_qcoeff]
+ 
+     vmul.s16        q2, q6, q4          ; x * Dequant
+     vmul.s16        q3, q7, q5
+ 
+-    ldr             r0, _inv_zig_zag_   ; load ptr of inverse zigzag table
++    adr             r0, inv_zig_zag     ; load ptr of inverse zigzag table
+ 
+     vceq.s16        q8, q8              ; set q8 to all 1
+ 
+     vst1.s16        {q10, q11}, [r6]    ; store: qcoeff = x2
+ 
+     vmul.s16        q12, q6, q10        ; x2 * Dequant
+     vmul.s16        q13, q7, q11
+ 
+@@ -176,17 +176,17 @@
+     vshr.s16        q3, q1, #15
+ 
+     vld1.s16        {q14, q15}, [r5@128]; load round_ptr [0-15]
+     vld1.s16        {q8, q9}, [r4@128]  ; load quant_ptr [0-15]
+ 
+     vadd.s16        q12, q14            ; x + Round
+     vadd.s16        q13, q15
+ 
+-    ldr             r0, _inv_zig_zag_   ; load ptr of inverse zigzag table
++    adr             r0, inv_zig_zag     ; load ptr of inverse zigzag table
+ 
+     vqdmulh.s16     q12, q8             ; y = ((Round+abs(z)) * Quant) >> 16
+     vqdmulh.s16     q13, q9
+ 
+     vld1.16         {q10, q11}, [r0@128]; load inverse scan order
+ 
+     vceq.s16        q8, q8              ; set q8 to all 1
+ 
+@@ -242,19 +242,16 @@ zero_output
+     vst1.s16        {q0, q1}, [r7@128]  ; dqcoeff = 0
+ 
+     ldmfd           sp!, {r4-r7}
+     bx              lr
+ 
+     ENDP
+ 
+ ; default inverse zigzag table is defined in vp8/common/entropy.c
+-_inv_zig_zag_
+-    DCD inv_zig_zag
+-
+     ALIGN 16    ; enable use of @128 bit aligned loads
+ inv_zig_zag
+     DCW 0x0001, 0x0002, 0x0006, 0x0007
+     DCW 0x0003, 0x0005, 0x0008, 0x000d
+     DCW 0x0004, 0x0009, 0x000c, 0x000e
+     DCW 0x000a, 0x000b, 0x000f, 0x0010
+ 
+     END
--- a/media/libvpx/update.sh
+++ b/media/libvpx/update.sh
@@ -38,200 +38,202 @@
 if [ $# -lt 1 ]; then
   echo Usage: update.sh /path/to/libvpx/
   echo The libvpx dir must contain a directory "objdir" with the following directories configured in it:
   echo   * objdir/x86-win32-vs8
   echo   * objdir/x86-linux-gcc
   echo   * objdir/generic-gnu
   echo   * objdir/x86-darwin9-gcc
   echo   * objdir/x86_64-darwin9-gcc
+  echo   * objdir/armv7-linux-gcc
   echo You can configure these from objdir/$target with the following command:
-  echo $ ..configure --target=$target --disable-vp8-encoder --disable-examples --disable-install-docs
+  echo $ ../../configure --target=$target --disable-vp8-encoder --disable-examples --disable-install-docs
   echo On Mac, you also need --enable-pic
   exit -1
 fi
 
 # These are relative to SDK source dir.
 commonFiles=(
   vp8/vp8_cx_iface.c
   vp8/vp8_dx_iface.c
   vp8/common/alloccommon.c
   vp8/common/asm_com_offsets.c
   vp8/common/blockd.c
   vp8/common/debugmodes.c
-  vp8/common/defaultcoefcounts.c
+  vp8/common/dequantize.c
   vp8/common/entropy.c
   vp8/common/entropymode.c
   vp8/common/entropymv.c
   vp8/common/extend.c
   vp8/common/filter.c
   vp8/common/findnearmv.c
+  vp8/common/idct_blk.c
   vp8/common/idctllm.c
-  vp8/common/invtrans.c
   vp8/common/loopfilter.c
   vp8/common/loopfilter_filters.c
   vp8/common/mbpitch.c
   vp8/common/modecont.c
   vp8/common/modecontext.c
   vp8/common/postproc.c
   vp8/common/quant_common.c
-  vp8/common/recon.c
   vp8/common/reconinter.c
+  vp8/common/reconintra.c
   vp8/common/reconintra4x4.c
-  vp8/common/reconintra.c
   vp8/common/setupintrarecon.c
   vp8/common/swapyv12buffer.c
-  vp8/common/textblit.c
   vp8/common/treecoder.c
   vp8/common/arm/arm_systemdependent.c
   vp8/common/arm/bilinearfilter_arm.c
+  vp8/common/arm/dequantize_arm.c
   vp8/common/arm/filter_arm.c
   vp8/common/arm/loopfilter_arm.c
   vp8/common/arm/reconintra_arm.c
-  vp8/common/arm/neon/recon_neon.c
+  vp8/common/arm/armv6/idct_blk_v6.c
+  vp8/common/arm/neon/idct_blk_neon.c
   vp8/common/generic/systemdependent.c
+  vp8/common/x86/filter_x86.c
+  vp8/common/x86/idct_blk_mmx.c
+  vp8/common/x86/idct_blk_sse2.c
   vp8/common/x86/loopfilter_x86.c
   vp8/common/x86/recon_wrapper_sse2.c
   vp8/common/x86/vp8_asm_stubs.c
   vp8/common/x86/x86_systemdependent.c
+  vp8/decoder/asm_dec_offsets.c
   vp8/decoder/dboolhuff.c
   vp8/decoder/decodemv.c
   vp8/decoder/decodframe.c
-  vp8/decoder/dequantize.c
   vp8/decoder/detokenize.c
   vp8/decoder/error_concealment.c
-  vp8/decoder/idct_blk.c
   vp8/decoder/onyxd_if.c
   vp8/decoder/reconintra_mt.c
   vp8/decoder/threading.c
   vp8/decoder/arm/arm_dsystemdependent.c
-  vp8/decoder/arm/dequantize_arm.c
-  vp8/decoder/arm/armv6/idct_blk_v6.c
-  vp8/decoder/arm/neon/idct_blk_neon.c
   vp8/decoder/generic/dsystemdependent.c
-  vp8/decoder/x86/idct_blk_mmx.c
-  vp8/decoder/x86/idct_blk_sse2.c
   vp8/decoder/x86/x86_dsystemdependent.c
   vp8/encoder/asm_enc_offsets.c
   vp8/encoder/bitstream.c
   vp8/encoder/boolhuff.c
   vp8/encoder/dct.c
   vp8/encoder/encodeframe.c
   vp8/encoder/encodeintra.c
   vp8/encoder/encodemb.c
   vp8/encoder/encodemv.c
   vp8/encoder/ethreading.c
   vp8/encoder/firstpass.c
   vp8/encoder/lookahead.c
   vp8/encoder/mcomp.c
   vp8/encoder/modecosts.c
+  vp8/encoder/mr_dissim.c
   vp8/encoder/onyx_if.c
+  vp8/encoder/pickinter.c
   vp8/encoder/picklpf.c
-  vp8/encoder/pickinter.c
   vp8/encoder/psnr.c
   vp8/encoder/quantize.c
   vp8/encoder/ratectrl.c
   vp8/encoder/rdopt.c
   vp8/encoder/sad_c.c
   vp8/encoder/segmentation.c
   vp8/encoder/temporal_filter.c
   vp8/encoder/tokenize.c
   vp8/encoder/treewriter.c
   vp8/encoder/variance_c.c
   vp8/encoder/arm/arm_csystemdependent.c
+  vp8/encoder/arm/boolhuff_arm.c
   vp8/encoder/arm/dct_arm.c
-  vp8/encoder/arm/picklpf_arm.c
+  vp8/encoder/arm/quantize_arm.c
   vp8/encoder/arm/variance_arm.c
+  vp8/encoder/arm/neon/picklpf_arm.c
   vp8/encoder/generic/csystemdependent.c
   vp8/encoder/x86/variance_mmx.c
   vp8/encoder/x86/variance_sse2.c
   vp8/encoder/x86/variance_ssse3.c
   vp8/encoder/x86/x86_csystemdependent.c
   vpx/src/vpx_codec.c
   vpx/src/vpx_decoder.c
   vpx/src/vpx_decoder_compat.c
   vpx/src/vpx_encoder.c
   vpx/src/vpx_image.c
   vpx_mem/vpx_mem.c
+  vpx_ports/arm_cpudetect.c
+  vpx_scale/arm/neon/yv12extend_arm.c
   vpx_scale/generic/gen_scalers.c
   vpx_scale/generic/scalesystemdependent.c
   vpx_scale/generic/vpxscale.c
   vpx_scale/generic/yv12config.c
   vpx_scale/generic/yv12extend.c
   vp8/common/alloccommon.h
   vp8/common/blockd.h
   vp8/common/coefupdateprobs.h
   vp8/common/common.h
-  vp8/common/common_types.h
-  vp8/common/defaultcoefcounts.h
+  vp8/common/default_coef_probs.h
+  vp8/common/dequantize.h
   vp8/common/entropy.h
   vp8/common/entropymode.h
   vp8/common/entropymv.h
   vp8/common/extend.h
   vp8/common/filter.h
   vp8/common/findnearmv.h
-  vp8/common/g_common.h
   vp8/common/header.h
   vp8/common/idct.h
   vp8/common/invtrans.h
   vp8/common/loopfilter.h
   vp8/common/modecont.h
   vp8/common/mv.h
+  vp8/common/onyx.h
   vp8/common/onyxc_int.h
   vp8/common/onyxd.h
-  vp8/common/onyx.h
   vp8/common/postproc.h
   vp8/common/ppflags.h
   vp8/common/pragmas.h
   vp8/common/quant_common.h
   vp8/common/recon.h
   vp8/common/reconinter.h
+  vp8/common/reconintra.h
   vp8/common/reconintra4x4.h
-  vp8/common/reconintra.h
   vp8/common/setupintrarecon.h
   vp8/common/subpixel.h
   vp8/common/swapyv12buffer.h
   vp8/common/systemdependent.h
   vp8/common/threading.h
   vp8/common/treecoder.h
-  vp8/common/type_aliases.h
   vp8/common/arm/bilinearfilter_arm.h
+  vp8/common/arm/dequantize_arm.h
   vp8/common/arm/idct_arm.h
   vp8/common/arm/loopfilter_arm.h
   vp8/common/arm/recon_arm.h
   vp8/common/arm/subpixel_arm.h
+  vp8/common/x86/dequantize_x86.h
+  vp8/common/x86/filter_x86.h
   vp8/common/x86/idct_x86.h
   vp8/common/x86/loopfilter_x86.h
   vp8/common/x86/postproc_x86.h
   vp8/common/x86/recon_x86.h
   vp8/common/x86/subpixel_x86.h
   vp8/decoder/dboolhuff.h
   vp8/decoder/decodemv.h
   vp8/decoder/decoderthreading.h
-  vp8/decoder/dequantize.h
   vp8/decoder/detokenize.h
   vp8/decoder/ec_types.h
   vp8/decoder/error_concealment.h
   vp8/decoder/onyxd_int.h
   vp8/decoder/reconintra_mt.h
   vp8/decoder/treereader.h
-  vp8/decoder/arm/dequantize_arm.h
-  vp8/decoder/x86/dequantize_x86.h
-  vp8/encoder/asm_enc_offsets.h
   vp8/encoder/bitstream.h
+  vp8/encoder/block.h
   vp8/encoder/boolhuff.h
-  vp8/encoder/block.h
   vp8/encoder/dct.h
+  vp8/encoder/defaultcoefcounts.h
   vp8/encoder/encodeintra.h
   vp8/encoder/encodemb.h
   vp8/encoder/encodemv.h
   vp8/encoder/firstpass.h
   vp8/encoder/lookahead.h
   vp8/encoder/mcomp.h
   vp8/encoder/modecosts.h
+  vp8/encoder/mr_dissim.h
   vp8/encoder/onyx_int.h
   vp8/encoder/pickinter.h
   vp8/encoder/psnr.h
   vp8/encoder/quantize.h
   vp8/encoder/ratectrl.h
   vp8/encoder/rdopt.h
   vp8/encoder/segmentation.h
   vp8/encoder/temporal_filter.h
@@ -258,115 +260,108 @@ commonFiles=(
   vpx/vpx_codec_impl_top.h
   vpx/vpx_decoder_compat.h
   vpx/vpx_decoder.h
   vpx/vpx_encoder.h
   vpx/vpx_image.h
   vpx/vpx_integer.h
   vpx_mem/include/vpx_mem_intrnl.h
   vpx_mem/vpx_mem.h
-  vpx_ports/arm_cpudetect.c
+  vpx_ports/arm.h
   vpx_ports/asm_offsets.h
-  vpx_ports/config.h
   vpx_ports/mem.h
   vpx_ports/vpx_timer.h
-  vpx_ports/arm.h
   vpx_ports/x86.h
   vpx_scale/scale_mode.h
   vpx_scale/vpxscale.h
   vpx_scale/yv12config.h
   vpx_scale/yv12extend.h
+  vpx_scale/arm/yv12extend_arm.h
+  vpx_scale/generic/yv12extend_generic.h
   vp8/common/arm/armv6/bilinearfilter_v6.asm
+  vp8/common/arm/armv6/copymem16x16_v6.asm
   vp8/common/arm/armv6/copymem8x4_v6.asm
   vp8/common/arm/armv6/copymem8x8_v6.asm
-  vp8/common/arm/armv6/copymem16x16_v6.asm
   vp8/common/arm/armv6/dc_only_idct_add_v6.asm
+  vp8/common/arm/armv6/dequant_idct_v6.asm
+  vp8/common/arm/armv6/dequantize_v6.asm
   vp8/common/arm/armv6/iwalsh_v6.asm
   vp8/common/arm/armv6/filter_v6.asm
   vp8/common/arm/armv6/idct_v6.asm
+  vp8/common/arm/armv6/intra4x4_predict_v6.asm
+  vp8/common/arm/armv6/iwalsh_v6.asm
   vp8/common/arm/armv6/loopfilter_v6.asm
-  vp8/common/arm/armv6/recon_v6.asm
   vp8/common/arm/armv6/simpleloopfilter_v6.asm
   vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+  vp8/common/arm/neon/bilinearpredict16x16_neon.asm
   vp8/common/arm/neon/bilinearpredict4x4_neon.asm
   vp8/common/arm/neon/bilinearpredict8x4_neon.asm
   vp8/common/arm/neon/bilinearpredict8x8_neon.asm
-  vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+  vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+  vp8/common/arm/neon/copymem16x16_neon.asm
   vp8/common/arm/neon/copymem8x4_neon.asm
   vp8/common/arm/neon/copymem8x8_neon.asm
-  vp8/common/arm/neon/copymem16x16_neon.asm
   vp8/common/arm/neon/dc_only_idct_add_neon.asm
+  vp8/common/arm/neon/dequant_idct_neon.asm
+  vp8/common/arm/neon/dequantizeb_neon.asm
+  vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
+  vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
   vp8/common/arm/neon/iwalsh_neon.asm
   vp8/common/arm/neon/loopfilter_neon.asm
   vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
   vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
   vp8/common/arm/neon/mbloopfilter_neon.asm
-  vp8/common/arm/neon/recon2b_neon.asm
-  vp8/common/arm/neon/recon4b_neon.asm
-  vp8/common/arm/neon/reconb_neon.asm
-  vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+  vp8/common/arm/neon/save_neon_reg.asm
   vp8/common/arm/neon/shortidct4x4llm_neon.asm
+  vp8/common/arm/neon/sixtappredict16x16_neon.asm
   vp8/common/arm/neon/sixtappredict4x4_neon.asm
   vp8/common/arm/neon/sixtappredict8x4_neon.asm
   vp8/common/arm/neon/sixtappredict8x8_neon.asm
-  vp8/common/arm/neon/sixtappredict16x16_neon.asm
-  vp8/common/arm/neon/recon16x16mb_neon.asm
-  vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
-  vp8/common/arm/neon/save_neon_reg.asm
-  vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
-  vp8/decoder/arm/armv6/dequant_idct_v6.asm
-  vp8/decoder/arm/armv6/dequantize_v6.asm
-  vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
-  vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
-  vp8/decoder/arm/neon/dequant_idct_neon.asm
-  vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
-  vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
-  vp8/decoder/arm/neon/dequantizeb_neon.asm
+  vp8/common/x86/dequantize_mmx.asm
   vp8/common/x86/idctllm_mmx.asm
   vp8/common/x86/idctllm_sse2.asm
   vp8/common/x86/iwalsh_mmx.asm
   vp8/common/x86/iwalsh_sse2.asm
+  vp8/common/x86/loopfilter_block_sse2.asm
   vp8/common/x86/loopfilter_mmx.asm
   vp8/common/x86/loopfilter_sse2.asm
   vp8/common/x86/postproc_mmx.asm
   vp8/common/x86/postproc_sse2.asm
   vp8/common/x86/recon_mmx.asm
   vp8/common/x86/recon_sse2.asm
   vp8/common/x86/subpixel_mmx.asm
   vp8/common/x86/subpixel_sse2.asm
   vp8/common/x86/subpixel_ssse3.asm
-  vp8/decoder/x86/dequantize_mmx.asm
+  vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
   vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
   vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
   vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
-  vp8/encoder/arm/armv6/vp8_fast_fdct4x4_armv6.asm
   vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
   vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
   vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
+  vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm
   vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
   vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
   vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
   vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
   vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
   vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
   vp8/encoder/arm/armv6/walsh_v6.asm
-  vp8/encoder/arm/neon/fastfdct4x4_neon.asm
-  vp8/encoder/arm/neon/fastfdct8x4_neon.asm
   vp8/encoder/arm/neon/fastquantizeb_neon.asm
   vp8/encoder/arm/neon/sad16_neon.asm
   vp8/encoder/arm/neon/sad8_neon.asm
   vp8/encoder/arm/neon/shortfdct_neon.asm
   vp8/encoder/arm/neon/subtract_neon.asm
   vp8/encoder/arm/neon/variance_neon.asm
   vp8/encoder/arm/neon/vp8_memcpy_neon.asm
   vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
   vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
-  vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
   vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
   vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+  vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
   vp8/encoder/x86/dct_mmx.asm
   vp8/encoder/x86/dct_sse2.asm
   vp8/encoder/x86/encodeopt.asm
   vp8/encoder/x86/fwalsh_sse2.asm
   vp8/encoder/x86/quantize_mmx.asm
   vp8/encoder/x86/quantize_sse2.asm
   vp8/encoder/x86/quantize_ssse3.asm
   vp8/encoder/x86/quantize_sse4.asm
@@ -378,17 +373,19 @@ commonFiles=(
   vp8/encoder/x86/subtract_mmx.asm
   vp8/encoder/x86/subtract_sse2.asm
   vp8/encoder/x86/temporal_filter_apply_sse2.asm
   vp8/encoder/x86/variance_impl_mmx.asm
   vp8/encoder/x86/variance_impl_sse2.asm
   vp8/encoder/x86/variance_impl_ssse3.asm
   vpx_ports/emms.asm
   vpx_ports/x86_abi_support.asm
-  vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
+  vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
+  vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+  vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
   build/make/ads2gas.pl
   build/make/obj_int_extract.c
   LICENSE
   PATENTS
 )
 
 # configure files specific to x86-win32-vs8
 cp $1/objdir/x86-win32-vs8/vpx_config.c vpx_config_x86-win32-vs8.c
@@ -428,19 +425,91 @@ cp $1/objdir/generic-gnu/vpx_config.h vp
 
 # Copy common source files into mozilla tree.
 for f in ${commonFiles[@]}
 do
   mkdir -p -v `dirname $f`
   cp -v $1/$f $f
 done
 
+# This has to be renamed because there's already a scalesystemdependent.c in
+# vpx_scale/generic/
+cp -v $1/vpx_scale/arm/scalesystemdependent.c \
+         vpx_scale/arm/arm_scalesystemdependent.c
+
+# Upstream patch to fix variance overflow.
+patch -p3 < I1bad27ea.patch
+
+# Upstream patch to remove __inline for compiler compatibility.
+patch -p3 < I6f2b218d.patch
+
+# Patch to move SAD and variance functions to common (based on an upstream
+# patch).
+patch -p3 < I256a37c6.patch
+
+# These get moved by I256a37c6.patch above, but patch won't do the actual move
+# for us.
+encoderMovedFiles=(
+  vp8/encoder/sad_c.c
+  vp8/encoder/variance_c.c
+  vp8/encoder/arm/variance_arm.c
+  vp8/encoder/x86/variance_mmx.c
+  vp8/encoder/x86/variance_sse2.c
+  vp8/encoder/x86/variance_ssse3.c
+  vp8/encoder/variance.h
+  vp8/encoder/arm/variance_arm.h
+  vp8/encoder/x86/variance_x86.h
+  vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
+  vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
+  vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+  vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
+  vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+  vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+  vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+  vp8/encoder/arm/neon/sad16_neon.asm
+  vp8/encoder/arm/neon/sad8_neon.asm
+  vp8/encoder/arm/neon/variance_neon.asm
+  vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+  vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+  vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+  vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+  vp8/encoder/x86/sad_mmx.asm
+  vp8/encoder/x86/sad_sse2.asm
+  vp8/encoder/x86/sad_sse3.asm
+  vp8/encoder/x86/sad_ssse3.asm
+  vp8/encoder/x86/sad_sse4.asm
+  vp8/encoder/x86/variance_impl_mmx.asm
+  vp8/encoder/x86/variance_impl_sse2.asm
+  vp8/encoder/x86/variance_impl_ssse3.asm
+)
+
+# Move encoder source files into the common tree.
+for f in ${encoderMovedFiles[@]}
+do
+  mv -v $f ${f/encoder/common}
+done
+
+# Patch to fix text relocations in the variance functions.
+patch -p3 < textrels.patch
+
+# Patch to use VARIANCE_INVOKE in multiframe_quality_enhance_block().
+patch -p3 < variance-invoke.patch
+
+# Upstream patch to fix potential use of uninitialized rate_y.
+patch -p3 < I8a35831e.patch
+
+# Upstream patch to reset segmentation map on keyframes.
+patch -p3 < I9713c9f0.patch
+
+# Upstream patch to support Android x86 NDK build.
+patch -p3 < I42ab00e3.patch
+
+# Upstream patch to align internal mfqe framebuffer dimensions.
+patch -p3 < I3915d597.patch
+
 # Patch to compile with Sun Studio on Solaris
 patch -p3 < solaris.patch
 
 # Patch to fix errors including C headers in C++
 patch -p3 < compile_errors.patch
 
-# Patch to fix MV clamping in the v0.9.7-p1 release.
-patch -p3 < bug696390.patch
-
 # Patch to permit vpx users to specify their own <stdint.h> types.
 patch -p3 < stdint.patch
new file mode 100644
--- /dev/null
+++ b/media/libvpx/variance-invoke.patch
@@ -0,0 +1,101 @@
+# HG changeset patch
+# Parent f294d64ffb01bf6d8190377708405bf5f4fe30fa
+# User Timothy B. Terriberry <tterribe@vt.edu>
+Bug 730907 - Use VARIANCE_INVOKE in multiframe_quality_enhance_block()
+
+diff --git a/media/libvpx/vp8/common/postproc.c b/media/libvpx/vp8/common/postproc.c
+--- a/media/libvpx/vp8/common/postproc.c
++++ b/media/libvpx/vp8/common/postproc.c
+@@ -706,17 +706,18 @@ static void multiframe_quality_enhance_b
+     unsigned char *u,
+     unsigned char *v,
+     int y_stride,
+     int uv_stride,
+     unsigned char *yd,
+     unsigned char *ud,
+     unsigned char *vd,
+     int yd_stride,
+-    int uvd_stride
++    int uvd_stride,
++    vp8_variance_rtcd_vtable_t *rtcd
+ )
+ {
+     static const unsigned char VP8_ZEROS[16]=
+     {
+          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+     };
+     int blksizeby2 = blksize >> 1;
+     int qdiff = qcurr - qprev;
+@@ -727,28 +728,28 @@ static void multiframe_quality_enhance_b
+     unsigned char *up;
+     unsigned char *udp;
+     unsigned char *vp;
+     unsigned char *vdp;
+ 
+     unsigned int act, sse, sad, thr;
+     if (blksize == 16)
+     {
+-        act = (vp8_variance_var16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
+-        sad = (vp8_variance_sad16x16(y, y_stride, yd, yd_stride, 0)+128)>>8;
++        act = (VARIANCE_INVOKE(rtcd, var16x16)(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
++        sad = (VARIANCE_INVOKE(rtcd, sad16x16)(y, y_stride, yd, yd_stride, 0)+128)>>8;
+     }
+     else if (blksize == 8)
+     {
+-        act = (vp8_variance_var8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
+-        sad = (vp8_variance_sad8x8(y, y_stride, yd, yd_stride, 0)+32)>>6;
++        act = (VARIANCE_INVOKE(rtcd, var8x8)(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
++        sad = (VARIANCE_INVOKE(rtcd, sad8x8)(y, y_stride, yd, yd_stride, 0)+32)>>6;
+     }
+     else
+     {
+-        act = (vp8_variance_var4x4(yd, yd_stride, VP8_ZEROS, 0, &sse)+8)>>4;
+-        sad = (vp8_variance_sad4x4(y, y_stride, yd, yd_stride, 0)+8)>>4;
++        act = (VARIANCE_INVOKE(rtcd, var4x4)(yd, yd_stride, VP8_ZEROS, 0, &sse)+8)>>4;
++        sad = (VARIANCE_INVOKE(rtcd, sad4x4)(y, y_stride, yd, yd_stride, 0)+8)>>4;
+     }
+     /* thr = qdiff/8 + log2(act) + log4(qprev) */
+     thr = (qdiff>>3);
+     while (act>>=1) thr++;
+     while (qprev>>=2) thr++;
+     if (sad < thr)
+     {
+         static const int roundoff = (1 << (MFQE_PRECISION - 1));
+@@ -859,33 +860,35 @@ void vp8_multiframe_quality_enhance
+                                                              u_ptr + 4*(i*show->uv_stride+j),
+                                                              v_ptr + 4*(i*show->uv_stride+j),
+                                                              show->y_stride,
+                                                              show->uv_stride,
+                                                              yd_ptr + 8*(i*dest->y_stride+j),
+                                                              ud_ptr + 4*(i*dest->uv_stride+j),
+                                                              vd_ptr + 4*(i*dest->uv_stride+j),
+                                                              dest->y_stride,
+-                                                             dest->uv_stride);
++                                                             dest->uv_stride,
++                                                             &cm->rtcd.variance);
+                 }
+                 else
+                 {
+                     multiframe_quality_enhance_block(16,
+                                                      qcurr,
+                                                      qprev,
+                                                      y_ptr,
+                                                      u_ptr,
+                                                      v_ptr,
+                                                      show->y_stride,
+                                                      show->uv_stride,
+                                                      yd_ptr,
+                                                      ud_ptr,
+                                                      vd_ptr,
+                                                      dest->y_stride,
+-                                                     dest->uv_stride);
++                                                     dest->uv_stride,
++                                                     &cm->rtcd.variance);
+ 
+                 }
+             }
+             else
+             {
+                 vp8_recon_copy16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
+                 vp8_recon_copy8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
+                 vp8_recon_copy8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
--- a/media/libvpx/vp8/common/alloccommon.c
+++ b/media/libvpx/vp8/common/alloccommon.c
@@ -4,17 +4,17 @@
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
 #include "blockd.h"
 #include "vpx_mem/vpx_mem.h"
 #include "onyxc_int.h"
 #include "findnearmv.h"
 #include "entropymode.h"
 #include "systemdependent.h"
 
 
@@ -38,16 +38,18 @@ void vp8_de_alloc_frame_buffers(VP8_COMM
 {
     int i;
 
     for (i = 0; i < NUM_YV12_BUFFERS; i++)
         vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
 
     vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
     vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
+    if (oci->post_proc_buffer_int_used)
+        vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
 
     vpx_free(oci->above_context);
     vpx_free(oci->mip);
     vpx_free(oci->prev_mip);
 
     oci->above_context = 0;
     oci->mip = 0;
     oci->prev_mip = 0;
@@ -96,16 +98,18 @@ int vp8_alloc_frame_buffers(VP8_COMMON *
     }
 
     if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
     {
         vp8_de_alloc_frame_buffers(oci);
         return 1;
     }
 
+    oci->post_proc_buffer_int_used = 0;
+
     oci->mb_rows = height >> 4;
     oci->mb_cols = width >> 4;
     oci->MBs = oci->mb_rows * oci->mb_cols;
     oci->mode_info_stride = oci->mb_cols + 1;
     oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
 
     if (!oci->mip)
     {
@@ -181,17 +185,17 @@ void vp8_setup_version(VP8_COMMON *cm)
         cm->use_bilinear_mc_filter = 0;
         cm->full_pixel = 0;
         break;
     }
 }
 void vp8_create_common(VP8_COMMON *oci)
 {
     vp8_machine_specific_config(oci);
-    vp8_default_coef_probs(oci);
+
     vp8_init_mbmode_probs(oci);
     vp8_default_bmode_probs(oci->fc.bmode_prob);
 
     oci->mb_no_coeff_skip = 1;
     oci->no_lpf = 0;
     oci->filter_type = NORMAL_LOOPFILTER;
     oci->use_bilinear_mc_filter = 0;
     oci->full_pixel = 0;
--- a/media/libvpx/vp8/common/arm/arm_systemdependent.c
+++ b/media/libvpx/vp8/common/arm/arm_systemdependent.c
@@ -4,24 +4,24 @@
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
 #include "vpx_ports/arm.h"
-#include "vp8/common/g_common.h"
 #include "vp8/common/pragmas.h"
 #include "vp8/common/subpixel.h"
 #include "vp8/common/loopfilter.h"
 #include "vp8/common/recon.h"
 #include "vp8/common/idct.h"
+#include "vp8/common/variance.h"
 #include "vp8/common/onyxc_int.h"
 
 void vp8_arch_arm_common_init(VP8_COMMON *ctx)
 {
 #if CONFIG_RUNTIME_CPU_DETECT
     VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
     int flags = arm_cpu_caps();
     rtcd->flags = flags;
@@ -40,75 +40,127 @@ void vp8_arch_arm_common_init(VP8_COMMON
         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
         rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
         rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
         rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
         rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
         rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
 
-        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
         rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_v6;
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_v6;
 
         rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
         rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
         rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
         rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
         rtcd->loopfilter.simple_mb_v =
                 vp8_loop_filter_simple_vertical_edge_armv6;
         rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
         rtcd->loopfilter.simple_mb_h =
                 vp8_loop_filter_simple_horizontal_edge_armv6;
         rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
 
         rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
         rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
         rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
-        rtcd->recon.recon       = vp8_recon_b_armv6;
-        rtcd->recon.recon2      = vp8_recon2b_armv6;
-        rtcd->recon.recon4      = vp8_recon4b_armv6;
+        rtcd->recon.intra4x4_predict = vp8_intra4x4_predict_armv6;
+
+        rtcd->dequant.block               = vp8_dequantize_b_v6;
+        rtcd->dequant.idct_add            = vp8_dequant_idct_add_v6;
+        rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
+        rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
+
+        rtcd->variance.sad16x16              = vp8_sad16x16_armv6;
+        /*rtcd->variance.sad16x8               = vp8_sad16x8_c;
+        rtcd->variance.sad8x16               = vp8_sad8x16_c;
+        rtcd->variance.sad8x8                = vp8_sad8x8_c;
+        rtcd->variance.sad4x4                = vp8_sad4x4_c;*/
+
+        /*rtcd->variance.var4x4                = vp8_variance4x4_c;*/
+        rtcd->variance.var8x8                = vp8_variance8x8_armv6;
+        /*rtcd->variance.var8x16               = vp8_variance8x16_c;
+        rtcd->variance.var16x8               = vp8_variance16x8_c;*/
+        rtcd->variance.var16x16              = vp8_variance16x16_armv6;
+
+        /*rtcd->variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+        rtcd->variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_armv6;
+        /*rtcd->variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+        rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_armv6;
+        rtcd->variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_armv6;
+        rtcd->variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_armv6;
+        rtcd->variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_armv6;
+
+        rtcd->variance.mse16x16              = vp8_mse16x16_armv6;
+        /*rtcd->variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        /*rtcd->variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
     }
 #endif
 
 #if HAVE_ARMV7
     if (flags & HAS_NEON)
     {
         rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
         rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
         rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
         rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
         rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
         rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
 
-        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
         rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
 
         rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
         rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
         rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
         rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
         rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
         rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
         rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
         rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
 
         rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
         rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
         rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
-        rtcd->recon.recon       = vp8_recon_b_neon;
-        rtcd->recon.recon2      = vp8_recon2b_neon;
-        rtcd->recon.recon4      = vp8_recon4b_neon;
-        rtcd->recon.recon_mb    = vp8_recon_mb_neon;
         rtcd->recon.build_intra_predictors_mby =
             vp8_build_intra_predictors_mby_neon;
         rtcd->recon.build_intra_predictors_mby_s =
             vp8_build_intra_predictors_mby_s_neon;
+
+        rtcd->dequant.block               = vp8_dequantize_b_neon;
+        rtcd->dequant.idct_add            = vp8_dequant_idct_add_neon;
+        rtcd->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
+        rtcd->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
+
+        rtcd->variance.sad16x16              = vp8_sad16x16_neon;
+        rtcd->variance.sad16x8               = vp8_sad16x8_neon;
+        rtcd->variance.sad8x16               = vp8_sad8x16_neon;
+        rtcd->variance.sad8x8                = vp8_sad8x8_neon;
+        rtcd->variance.sad4x4                = vp8_sad4x4_neon;
+
+        /*rtcd->variance.var4x4                = vp8_variance4x4_c;*/
+        rtcd->variance.var8x8                = vp8_variance8x8_neon;
+        rtcd->variance.var8x16               = vp8_variance8x16_neon;
+        rtcd->variance.var16x8               = vp8_variance16x8_neon;
+        rtcd->variance.var16x16              = vp8_variance16x16_neon;
+
+        /*rtcd->variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+        rtcd->variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
+        /*rtcd->variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        rtcd->variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+        rtcd->variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
+        rtcd->variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_neon;
+        rtcd->variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_neon;
+        rtcd->variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_neon;
+
+        rtcd->variance.mse16x16              = vp8_mse16x16_neon;
+        /*rtcd->variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        rtcd->variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
     }
 #endif
 
 #endif
 }
--- a/media/libvpx/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
@@ -6,62 +6,65 @@
 ;  tree. All contributing project authors may be found in the AUTHORS
 ;  file in the root of the source tree.
 ;
 
     EXPORT  |vp8_dc_only_idct_add_v6|
 
     AREA    |.text|, CODE, READONLY
 
-;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
-;                             unsigned char *dst_ptr, int pitch, int stride)
+;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
+;                            int pred_stride, unsigned char *dst_ptr,
+;                            int dst_stride)
 ; r0  input_dc
 ; r1  pred_ptr
-; r2  dest_ptr
-; r3  pitch
-; sp  stride
+; r2  pred_stride
+; r3  dst_ptr
+; sp  dst_stride
 
 |vp8_dc_only_idct_add_v6| PROC
-    stmdb       sp!, {r4 - r7, lr}
+    stmdb       sp!, {r4 - r7}
 
     add         r0, r0, #4                ; input_dc += 4
     ldr         r12, c0x0000FFFF
-    ldr         r4, [r1], r3
-    ldr         r6, [r1], r3
+    ldr         r4, [r1], r2
     and         r0, r12, r0, asr #3       ; input_dc >> 3 + mask
-    ldr         lr, [sp, #20]
+    ldr         r6, [r1], r2
     orr         r0, r0, r0, lsl #16       ; a1 | a1
 
+    ldr         r12, [sp, #16]            ; dst stride
+
     uxtab16     r5, r0, r4                ; a1+2 | a1+0
     uxtab16     r4, r0, r4, ror #8        ; a1+3 | a1+1
     uxtab16     r7, r0, r6
     uxtab16     r6, r0, r6, ror #8
     usat16      r5, #8, r5
     usat16      r4, #8, r4
     usat16      r7, #8, r7
     usat16      r6, #8, r6
     orr         r5, r5, r4, lsl #8
     orr         r7, r7, r6, lsl #8
-    ldr         r4, [r1], r3
+    ldr         r4, [r1], r2
+    str         r5, [r3], r12
     ldr         r6, [r1]
-    str         r5, [r2], lr
-    str         r7, [r2], lr
+    str         r7, [r3], r12
 
     uxtab16     r5, r0, r4
     uxtab16     r4, r0, r4, ror #8
     uxtab16     r7, r0, r6
     uxtab16     r6, r0, r6, ror #8
     usat16      r5, #8, r5
     usat16      r4, #8, r4
     usat16      r7, #8, r7
     usat16      r6, #8, r6
     orr         r5, r5, r4, lsl #8
     orr         r7, r7, r6, lsl #8
-    str         r5, [r2], lr
-    str         r7, [r2]
+    str         r5, [r3], r12
+    str         r7, [r3]
 
-    ldmia       sp!, {r4 - r7, pc}
+    ldmia       sp!, {r4 - r7}
+    bx          lr
 
     ENDP  ; |vp8_dc_only_idct_add_v6|
 
 ; Constant Pool
 c0x0000FFFF DCD 0x0000FFFF
     END
rename from media/libvpx/vp8/decoder/arm/armv6/dequant_idct_v6.asm
rename to media/libvpx/vp8/common/arm/armv6/dequant_idct_v6.asm
--- a/media/libvpx/vp8/decoder/arm/armv6/dequant_idct_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/dequant_idct_v6.asm
@@ -5,25 +5,22 @@
 ;  grant that can be found in the LICENSE file in the root of the source
 ;  tree. All contributing project authors may be found in the AUTHORS
 ;  file in the root of the source tree.
 ;
 
     EXPORT |vp8_dequant_idct_add_v6|
 
     AREA |.text|, CODE, READONLY
-;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
-; unsigned char *dest, int pitch, int stride)
-; r0 = input
+;void vp8_dequant_idct_v6(short *input, short *dq,
+;                         unsigned char *dest, int stride)
+; r0 = q
 ; r1 = dq
-; r2 = pred
-; r3 = dest
-; sp + 36 = pitch  ; +4 = 40
-; sp + 40 = stride  ; +4 = 44
-
+; r2 = dst
+; r3 = stride
 
 |vp8_dequant_idct_add_v6| PROC
     stmdb   sp!, {r4-r11, lr}
 
     ldr     r4, [r0]                ;input
     ldr     r5, [r1], #4            ;dq
 
     sub     sp, sp, #4
@@ -122,58 +119,55 @@ vp8_dequant_idct_loop2_v6
     smulwt  r11, r4, r8
     smulwb  r9, r4, r8
     pkhbt   r1, r7, r1, lsl #16
     uadd16  r8, r1, r8
     pkhbt   r11, r9, r11, lsl #16
     usub16  r1, r12, r8
     uadd16  r8, r11, r6
     ldr     r9, c0x00040004
-    ldr     r12, [sp, #40]
+    ldr     r12, [sp]               ; get stride from stack
     uadd16  r6, r10, r8
     usub16  r7, r10, r8
     uadd16  r7, r7, r9
     uadd16  r6, r6, r9
     uadd16  r10, r14, r1
     usub16  r1, r14, r1
     uadd16  r10, r10, r9
     uadd16  r1, r1, r9
-    ldr     r11, [r2], r12
+    ldr     r11, [r2]               ; load input from dst
     mov     r8, r7, asr #3
     pkhtb   r9, r8, r10, asr #19
     mov     r8, r1, asr #3
     pkhtb   r8, r8, r6, asr #19
     uxtb16  lr, r11, ror #8
     qadd16  r9, r9, lr
     uxtb16  lr, r11
     qadd16  r8, r8, lr
     usat16  r9, #8, r9
     usat16  r8, #8, r8
     orr     r9, r8, r9, lsl #8
-    ldr     r11, [r2], r12
-    ldr     lr, [sp]
-    ldr     r12, [sp, #44]
+    ldr     r11, [r2, r12]          ; load input from dst
     mov     r7, r7, lsl #16
     mov     r1, r1, lsl #16
     mov     r10, r10, lsl #16
     mov     r6, r6, lsl #16
     mov     r7, r7, asr #3
     pkhtb   r7, r7, r10, asr #19
     mov     r1, r1, asr #3
     pkhtb   r1, r1, r6, asr #19
     uxtb16  r8, r11, ror #8
     qadd16  r7, r7, r8
     uxtb16  r8, r11
     qadd16  r1, r1, r8
     usat16  r7, #8, r7
     usat16  r1, #8, r1
     orr     r1, r1, r7, lsl #8
-    str     r9, [lr], r12
-    str     r1, [lr], r12
-    str     lr, [sp]
+    str     r9, [r2], r12           ; store output to dst
+    str     r1, [r2], r12           ; store output to dst
     bne     vp8_dequant_idct_loop2_v6
 
 ; vpx_memset
     sub     r0, r0, #32
     add     sp, sp, #4
 
     mov     r12, #0
     str     r12, [r0]
rename from media/libvpx/vp8/decoder/arm/armv6/dequantize_v6.asm
rename to media/libvpx/vp8/common/arm/armv6/dequantize_v6.asm
rename from media/libvpx/vp8/decoder/arm/armv6/idct_blk_v6.c
rename to media/libvpx/vp8/common/arm/armv6/idct_blk_v6.c
--- a/media/libvpx/vp8/decoder/arm/armv6/idct_blk_v6.c
+++ b/media/libvpx/vp8/common/arm/armv6/idct_blk_v6.c
@@ -3,149 +3,114 @@
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
 #include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
-
-void vp8_dequant_dc_idct_add_y_block_v6
-            (short *q, short *dq, unsigned char *pre,
-             unsigned char *dst, int stride, char *eobs, short *dc)
-{
-    int i;
-
-    for (i = 0; i < 4; i++)
-    {
-        if (eobs[0] > 1)
-            vp8_dequant_dc_idct_add_v6 (q, dq, pre, dst, 16, stride, dc[0]);
-        else
-            vp8_dc_only_idct_add_v6 (dc[0], pre, dst, 16, stride);
-
-        if (eobs[1] > 1)
-            vp8_dequant_dc_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
-        else
-            vp8_dc_only_idct_add_v6 (dc[1], pre+4, dst+4, 16, stride);
+#include "vp8/common/dequantize.h"
 
-        if (eobs[2] > 1)
-            vp8_dequant_dc_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
-        else
-            vp8_dc_only_idct_add_v6 (dc[2], pre+8, dst+8, 16, stride);
 
-        if (eobs[3] > 1)
-            vp8_dequant_dc_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
-        else
-            vp8_dc_only_idct_add_v6 (dc[3], pre+12, dst+12, 16, stride);
-
-        q    += 64;
-        dc   += 4;
-        pre  += 64;
-        dst  += 4*stride;
-        eobs += 4;
-    }
-}
-
-void vp8_dequant_idct_add_y_block_v6
-            (short *q, short *dq, unsigned char *pre,
-             unsigned char *dst, int stride, char *eobs)
+void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
+                                     unsigned char *dst,
+                                     int stride, char *eobs)
 {
     int i;
 
     for (i = 0; i < 4; i++)
     {
         if (eobs[0] > 1)
-            vp8_dequant_idct_add_v6 (q, dq, pre, dst, 16, stride);
-        else
+            vp8_dequant_idct_add_v6 (q, dq, dst, stride);
+        else if (eobs[0] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dst, 16, stride);
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
             ((int *)q)[0] = 0;
         }
 
         if (eobs[1] > 1)
-            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride);
-        else
+            vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
+        else if (eobs[1] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dst+4, 16, stride);
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
             ((int *)(q+16))[0] = 0;
         }
 
         if (eobs[2] > 1)
-            vp8_dequant_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride);
-        else
+            vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
+        else if (eobs[2] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[32]*dq[0], pre+8, dst+8, 16, stride);
+            vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
             ((int *)(q+32))[0] = 0;
         }
 
         if (eobs[3] > 1)
-            vp8_dequant_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride);
-        else
+            vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
+        else if (eobs[3] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[48]*dq[0], pre+12, dst+12, 16, stride);
+            vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
             ((int *)(q+48))[0] = 0;
         }
 
         q    += 64;
-        pre  += 64;
         dst  += 4*stride;
         eobs += 4;
     }
 }
 
-void vp8_dequant_idct_add_uv_block_v6
-            (short *q, short *dq, unsigned char *pre,
-             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
+                                      unsigned char *dstu,
+                                      unsigned char *dstv,
+                                      int stride, char *eobs)
 {
     int i;
 
     for (i = 0; i < 2; i++)
     {
         if (eobs[0] > 1)
-            vp8_dequant_idct_add_v6 (q, dq, pre, dstu, 8, stride);
-        else
+            vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
+        else if (eobs[0] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstu, 8, stride);
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
             ((int *)q)[0] = 0;
         }
 
         if (eobs[1] > 1)
-            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstu+4, 8, stride);
-        else
+            vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
+        else if (eobs[1] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
+                                                  dstu+4, stride);
             ((int *)(q+16))[0] = 0;
         }
 
         q    += 32;
-        pre  += 32;
         dstu += 4*stride;
         eobs += 2;
     }
 
     for (i = 0; i < 2; i++)
     {
         if (eobs[0] > 1)
-            vp8_dequant_idct_add_v6 (q, dq, pre, dstv, 8, stride);
-        else
+            vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
+        else if (eobs[0] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstv, 8, stride);
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
             ((int *)q)[0] = 0;
         }
 
         if (eobs[1] > 1)
-            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstv+4, 8, stride);
-        else
+            vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
+        else if (eobs[1] == 1)
         {
-            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
+                                                  dstv+4, stride);
             ((int *)(q+16))[0] = 0;
         }
 
         q    += 32;
-        pre  += 32;
         dstv += 4*stride;
         eobs += 2;
     }
 }
--- a/media/libvpx/vp8/common/arm/armv6/idct_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/idct_v6.asm
@@ -4,342 +4,199 @@
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
 ;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
-;                   r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 r11 r12     r14
-    EXPORT  |vp8_short_idct4x4llm_1_v6|
-    EXPORT  |vp8_short_idct4x4llm_v6|
-    EXPORT  |vp8_short_idct4x4llm_v6_scott|
     EXPORT  |vp8_short_idct4x4llm_v6_dual|
 
     AREA    |.text|, CODE, READONLY
 
-;********************************************************************************
-;*  void short_idct4x4llm_1_v6(INT16 * input, INT16 * output, INT32 pitch)
-;*      r0  INT16 * input
-;*      r1  INT16 * output
-;*      r2  INT32 pitch
-;*  bench:  3/5
-;********************************************************************************
+
+; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
+;                             unsigned char *dst, int stride)
+; r0    short* input
+; r1    unsigned char* pred
+; r2    int pitch
+; r3    unsigned char* dst
+; sp    int stride
 
-|vp8_short_idct4x4llm_1_v6| PROC         ;   cycles  in  out pit
-            ;
-    ldrsh   r0, [r0]    ; load input[0] 1, r0 un 2
-    add r0, r0, #4  ;   1   +4
-    stmdb   sp!, {r4, r5, lr}   ; make room for wide writes 1                   backup
-    mov r0, r0, asr #3  ; (input[0] + 4) >> 3   1, r0 req`d ^1  >> 3
-    pkhbt   r4, r0, r0, lsl #16 ; pack r0 into r4   1, r0 req`d ^1                  pack
-    mov r5, r4  ; expand                        expand
+|vp8_short_idct4x4llm_v6_dual| PROC
+    stmdb   sp!, {r4-r11, lr}
+
+    sub     sp, sp, #4
+
+    mov     r4, #0x00008A00         ; sin
+    orr     r4, r4, #0x0000008C     ; sinpi8sqrt2
+
+    mov     r5, #0x00004E00         ; cos
+    orr     r5, r5, #0x0000007B     ; cospi8sqrt2minus1
+    orr     r5, r5, #1<<31          ; loop counter on top bit
 
-    strd    r4, [r1], r2    ; *output = r0, post inc    1
-    strd    r4, [r1], r2    ;   1
-    strd    r4, [r1], r2    ;   1
-    strd    r4, [r1]    ;   1
-            ;
-    ldmia   sp!, {r4, r5, pc}   ; replace vars, return                      restore
-    ENDP        ; |vp8_short_idct4x4llm_1_v6|
-;********************************************************************************
-;********************************************************************************
-;********************************************************************************
+loop1_dual
+    ldr     r6, [r0, #(4*2)]        ; i5 | i4
+    ldr     r12, [r0, #(12*2)]      ; i13|i12
+    ldr     r14, [r0, #(8*2)]       ; i9 | i8
+
+    smulbt  r9, r5, r6              ; (ip[5] * cospi8sqrt2minus1) >> 16
+    smulbb  r7, r5, r6              ; (ip[4] * cospi8sqrt2minus1) >> 16
+    smulwt  r10, r4, r6             ; (ip[5] * sinpi8sqrt2) >> 16
+    smulwb  r8, r4, r6              ; (ip[4] * sinpi8sqrt2) >> 16
 
-;********************************************************************************
-;*  void short_idct4x4llm_v6(INT16 * input, INT16 * output, INT32 pitch)
-;*      r0  INT16 * input
-;*      r1  INT16 * output
-;*      r2  INT32 pitch
-;*  bench:
-;********************************************************************************
+    smulbt  r11, r5, r12            ; (ip[13] * cospi8sqrt2minus1) >> 16
+    pkhtb   r7, r9, r7, asr #16     ; 5c | 4c
+    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
+    uadd16  r6, r6, r7              ; 5c+5 | 4c+4
+
+    smulwt  r7, r4, r12             ; (ip[13] * sinpi8sqrt2) >> 16
+    smulbb  r9, r5, r12             ; (ip[12] * cospi8sqrt2minus1) >> 16
+    smulwb  r10, r4, r12            ; (ip[12] * sinpi8sqrt2) >> 16
+
+    subs    r5, r5, #1<<31          ; i--
 
-|vp8_short_idct4x4llm_v6| PROC           ;   cycles  in  out pit
-            ;
-    stmdb   sp!, {r4-r11, lr}   ; backup registers  1                   backup
-            ;
-    mov r4, #0x00004E00 ;   1                   cst
-    orr r4, r4, #0x0000007B ; cospi8sqrt2minus1
-    mov r5, #0x00008A00 ;   1                       cst
-    orr r5, r5, #0x0000008C ; sinpi8sqrt2
-            ;
-    mov r6, #4  ; i=4   1                           i
-loop1           ;
-    ldrsh   r12, [r0, #8]   ; input[4]  1, r12 unavail 2                                                    [4]
-    ldrsh   r3, [r0, #24]   ; input[12] 1, r3 unavail 2             [12]
-    ldrsh   r8, [r0, #16]   ; input[8]  1, r8 unavail 2                                 [8]
-    ldrsh   r7, [r0], #0x2  ; input[0]  1, r7 unavail 2 ++                          [0]
-    smulwb  r10, r5, r12    ; ([4] * sinpi8sqrt2) >> 16 1, r10 un 2, r12/r5 ^1                                          t1
-    smulwb  r11, r4, r3 ; ([12] * cospi8sqrt2minus1) >> 16  1, r11 un 2, r3/r4 ^1                                               t2
-    add r9, r7, r8  ; a1 = [0] + [8]    1                                       a1
-    sub r7, r7, r8  ; b1 = [0] - [8]    1                               b1
-    add r11, r3, r11    ; temp2 1
-    rsb r11, r11, r10   ; c1 = temp1 - temp2    1                                               c1
-    smulwb  r3, r5, r3  ; ([12] * sinpi8sqrt2) >> 16    1, r3 un 2, r3/r5 ^ 1               t2
-    smulwb  r10, r4, r12    ; ([4] * cospi8sqrt2minus1) >> 16   1, r10 un 2, r12/r4 ^1                                          t1
-    add r8, r7, r11 ; b1 + c1   1                                   b+c
-    strh    r8, [r1, r2]    ; out[pitch] = b1+c1    1
-    sub r7, r7, r11 ; b1 - c1   1                               b-c
-    add r10, r12, r10   ; temp1 1
-    add r3, r10, r3 ; d1 = temp1 + temp2    1               d1
-    add r10, r9, r3 ; a1 + d1   1                                           a+d
-    sub r3, r9, r3  ; a1 - d1   1               a-d
-    add r8, r2, r2  ; pitch * 2 1                                   p*2
-    strh    r7, [r1, r8]    ; out[pitch*2] = b1-c1  1
-    add r7, r2, r2, lsl #1  ; pitch * 3 1                               p*3
-    strh    r3, [r1, r7]    ; out[pitch*3] = a1-d1  1
-    subs    r6, r6, #1  ; i--   1                           --
-    strh    r10, [r1], #0x2 ; out[0] = a1+d1    1       ++
-    bne loop1   ; if i>0, continue
-            ;
-    sub r1, r1, #8  ; set up out for next loop  1       -4
-            ; for this iteration, input=prev output
-    mov r6, #4  ; i=4   1                           i
-;   b   returnfull
-loop2           ;
-    ldrsh   r11, [r1, #2]   ; input[1]  1, r11 un 2                                             [1]
-    ldrsh   r8, [r1, #6]    ; input[3]  1, r8 un 2                                  [3]
-    ldrsh   r3, [r1, #4]    ; input[2]  1, r3 un 2              [2]
-    ldrsh   r0, [r1]    ; input[0]  1, r0 un 2  [0]
-    smulwb  r9, r5, r11 ; ([1] * sinpi8sqrt2) >> 16 1, r9 un 2, r5/r11 ^1                                       t1
-    smulwb  r10, r4, r8 ; ([3] * cospi8sqrt2minus1) >> 16   1, r10 un 2, r4/r8 ^1                                           t2
-    add r7, r0, r3  ; a1 = [0] + [2]    1                               a1
-    sub r0, r0, r3  ; b1 = [0] - [2]    1   b1
-    add r10, r8, r10    ; temp2 1
-    rsb r9, r10, r9 ; c1 = temp1 - temp2    1                                       c1
-    smulwb  r8, r5, r8  ; ([3] * sinpi8sqrt2) >> 16 1, r8 un 2, r5/r8 ^1                                    t2
-    smulwb  r10, r4, r11    ; ([1] * cospi8sqrt2minus1) >> 16   1, r10 un 2, r4/r11 ^1                                          t1
-    add r3, r0, r9  ; b1+c1 1               b+c
-    add r3, r3, #4  ; b1+c1+4   1               +4
-    add r10, r11, r10   ; temp1 1
-    mov r3, r3, asr #3  ; b1+c1+4 >> 3  1, r3 ^1                >>3
-    strh    r3, [r1, #2]    ; out[1] = b1+c1    1
-    add r10, r10, r8    ; d1 = temp1 + temp2    1                                           d1
-    add r3, r7, r10 ; a1+d1 1               a+d
-    add r3, r3, #4  ; a1+d1+4   1               +4
-    sub r7, r7, r10 ; a1-d1 1                               a-d
-    add r7, r7, #4  ; a1-d1+4   1                               +4
-    mov r3, r3, asr #3  ; a1+d1+4 >> 3  1, r3 ^1                >>3
-    mov r7, r7, asr #3  ; a1-d1+4 >> 3  1, r7 ^1                                >>3
-    strh    r7, [r1, #6]    ; out[3] = a1-d1    1
-    sub r0, r0, r9  ; b1-c1 1   b-c
-    add r0, r0, #4  ; b1-c1+4   1   +4
-    subs    r6, r6, #1  ; i--   1                           --
-    mov r0, r0, asr #3  ; b1-c1+4 >> 3  1, r0 ^1    >>3
-    strh    r0, [r1, #4]    ; out[2] = b1-c1    1
-    strh    r3, [r1], r2    ; out[0] = a1+d1    1
-;   add r1, r1, r2  ; out += pitch  1       ++
-    bne loop2   ; if i>0, continue
-returnfull          ;
-    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
-    ENDP
+    pkhtb   r9, r11, r9, asr #16    ; 13c | 12c
+    ldr     r11, [r0]               ; i1 | i0
+    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
+    uadd16  r7, r12, r9             ; 13c+13 | 12c+12
+
+    usub16  r7, r8, r7              ; c
+    uadd16  r6, r6, r10             ; d
+    uadd16  r10, r11, r14           ; a
+    usub16  r8, r11, r14            ; b
+
+    uadd16  r9, r10, r6             ; a+d
+    usub16  r10, r10, r6            ; a-d
+    uadd16  r6, r8, r7              ; b+c
+    usub16  r7, r8, r7              ; b-c
+
+    ; use input buffer to store intermediate results
+    str      r6, [r0, #(4*2)]       ; o5 | o4
+    str      r7, [r0, #(8*2)]       ; o9 | o8
+    str      r10,[r0, #(12*2)]      ; o13|o12
+    str      r9, [r0], #4           ; o1 | o0
+
+    bcs loop1_dual
 
-;********************************************************************************
-;********************************************************************************
-;********************************************************************************
+    sub     r0, r0, #8              ; reset input/output
+    str     r0, [sp]
+
+loop2_dual
+
+    ldr     r6, [r0, #(4*2)]        ; i5 | i4
+    ldr     r12,[r0, #(2*2)]        ; i3 | i2
+    ldr     r14,[r0, #(6*2)]        ; i7 | i6
+    ldr     r0, [r0, #(0*2)]        ; i1 | i0
 
-;********************************************************************************
-;*  void short_idct4x4llm_v6_scott(INT16 * input, INT16 * output, INT32 pitch)
-;*      r0  INT16 * input
-;*      r1  INT16 * output
-;*      r2  INT32 pitch
-;*  bench:
-;********************************************************************************
+    smulbt  r9, r5, r6              ; (ip[5] * cospi8sqrt2minus1) >> 16
+    smulbt  r7, r5, r0              ; (ip[1] * cospi8sqrt2minus1) >> 16
+    smulwt  r10, r4, r6             ; (ip[5] * sinpi8sqrt2) >> 16
+    smulwt  r8, r4, r0              ; (ip[1] * sinpi8sqrt2) >> 16
+
+    pkhbt   r11, r6, r0, lsl #16    ; i0 | i4
+    pkhtb   r7, r7, r9, asr #16     ; 1c | 5c
+    pkhtb   r0, r0, r6, asr #16     ; i1 | i5
+    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1
+
+    uadd16  r0, r7, r0              ; 1c+1 | 5c+5 = temp2
+    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6
+    uadd16  r10, r11, r9            ; a
+    usub16  r9, r11, r9             ; b
+    pkhtb   r6, r12, r14, asr #16   ; i3 | i7
 
-|vp8_short_idct4x4llm_v6_scott| PROC         ;   cycles  in  out pit
-;   mov r0, #0  ;
-;   ldr r0, [r0]    ;
-    stmdb   sp!, {r4 - r11, lr} ; backup registers  1                   backup
-            ;
-    mov r3, #0x00004E00 ;                   cos
-    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
-    mov r4, #0x00008A00 ;                       sin
-    orr r4, r4, #0x0000008C ; sinpi8sqrt2
-            ;
-    mov r5, #0x2    ; i                         i
-            ;
-short_idct4x4llm_v6_scott_loop1          ;
-    ldr r10, [r0, #(4*2)]   ; i5 | i4                                               5,4
-    ldr r11, [r0, #(12*2)]  ; i13 | i12                                                 13,12
-            ;
-    smulwb  r6, r4, r10 ; ((ip[4] * sinpi8sqrt2) >> 16)                             lt1
-    smulwb  r7, r3, r11 ; ((ip[12] * cospi8sqrt2minus1) >> 16)                                  lt2
-            ;
-    smulwb  r12, r3, r10    ; ((ip[4] * cospi8sqrt2misu1) >> 16)                                                        l2t2
-    smulwb  r14, r4, r11    ; ((ip[12] * sinpi8sqrt2) >> 16)                                                                l2t1
-            ;
-    add r6, r6, r7  ; partial c1                                lt1-lt2
-    add r12, r12, r14   ; partial d1                                                        l2t2+l2t1
-            ;
-    smulwt  r14, r4, r10    ; ((ip[5] * sinpi8sqrt2) >> 16)                                                             ht1
-    smulwt  r7, r3, r11 ; ((ip[13] * cospi8sqrt2minus1) >> 16)                                  ht2
-            ;
-    smulwt  r8, r3, r10 ; ((ip[5] * cospi8sqrt2minus1) >> 16)                                       h2t1
-    smulwt  r9, r4, r11 ; ((ip[13] * sinpi8sqrt2) >> 16)                                            h2t2
-            ;
-    add r7, r14, r7 ; partial c1_2                                  ht1+ht2
-    sub r8, r8, r9  ; partial d1_2                                      h2t1-h2t2
-            ;
-    pkhbt   r6, r6, r7, lsl #16 ; partial c1_2 | partial c1_1                               pack
-    pkhbt   r12, r12, r8, lsl #16   ; partial d1_2 | partial d1_1                                                       pack
-            ;
-    usub16  r6, r6, r10 ; c1_2 | c1_1                               c
-    uadd16  r12, r12, r11   ; d1_2 | d1_1                                                       d
-            ;
-    ldr r10, [r0, #0]   ; i1 | i0                                               1,0
-    ldr r11, [r0, #(8*2)]   ; i9 | i10                                                  9,10
-            ;
-;;;;;;  add r0, r0, #0x4    ;       +4
-;;;;;;  add r1, r1, #0x4    ;           +4
-            ;
-    uadd16  r8, r10, r11    ; i1 + i9 | i0 + i8 aka a1                                      a
-    usub16  r9, r10, r11    ; i1 - i9 | i0 - i8 aka b1                                          b
-            ;
-    uadd16  r7, r8, r12 ; a1 + d1 pair                                  a+d
-    usub16  r14, r8, r12    ; a1 - d1 pair                                                              a-d
-            ;
-    str r7, [r1]    ; op[0] = a1 + d1
-    str r14, [r1, r2]   ; op[pitch*3] = a1 - d1
-            ;
-    add r0, r0, #0x4    ; op[pitch] = b1 + c1       ++
-    add r1, r1, #0x4    ; op[pitch*2] = b1 - c1         ++
-            ;
-    subs    r5, r5, #0x1    ;                           --
-    bne short_idct4x4llm_v6_scott_loop1  ;
-            ;
-    sub r1, r1, #16 ; reset output ptr
-    mov r5, #0x4    ;
-    mov r0, r1  ; input = output
-            ;
-short_idct4x4llm_v6_scott_loop2          ;
-            ;
-    subs    r5, r5, #0x1    ;
-    bne short_idct4x4llm_v6_scott_loop2  ;
-            ;
-    ldmia   sp!, {r4 - r11, pc} ;
-    ENDP        ;
-            ;
-;********************************************************************************
-;********************************************************************************
-;********************************************************************************
+    subs    r5, r5, #1<<31          ; i--
+
+    smulbt  r7, r5, r6              ; (ip[3] * cospi8sqrt2minus1) >> 16
+    smulwt  r11, r4, r6             ; (ip[3] * sinpi8sqrt2) >> 16
+    smulbb  r12, r5, r6             ; (ip[7] * cospi8sqrt2minus1) >> 16
+    smulwb  r14, r4, r6             ; (ip[7] * sinpi8sqrt2) >> 16
+
+    pkhtb   r7, r7, r12, asr #16    ; 3c | 7c
+    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1
+
+    uadd16  r6, r7, r6              ; 3c+3 | 7c+7 = temp2
+    usub16  r12, r8, r6             ; c (o1 | o5)
+    uadd16  r6, r11, r0             ; d (o3 | o7)
+    uadd16  r7, r10, r6             ; a+d
+
+    mov     r8, #4                  ; set up 4's
+    orr     r8, r8, #0x40000        ; 4|4
 
-;********************************************************************************
-;*  void short_idct4x4llm_v6_dual(INT16 * input, INT16 * output, INT32 pitch)
-;*      r0  INT16 * input
-;*      r1  INT16 * output
-;*      r2  INT32 pitch
-;*  bench:
-;********************************************************************************
+    usub16  r6, r10, r6             ; a-d
+    uadd16  r6, r6, r8              ; a-d+4, 3|7
+    uadd16  r7, r7, r8              ; a+d+4, 0|4
+    uadd16  r10, r9, r12            ; b+c
+    usub16  r0, r9, r12             ; b-c
+    uadd16  r10, r10, r8            ; b+c+4, 1|5
+    uadd16  r8, r0, r8              ; b-c+4, 2|6
+
+    ldr     lr, [sp, #40]           ; dst stride
+
+    ldrb    r0, [r1]                ; pred p0
+    ldrb    r11, [r1, #1]           ; pred p1
+    ldrb    r12, [r1, #2]           ; pred p2
 
-|vp8_short_idct4x4llm_v6_dual| PROC          ;   cycles  in  out pit
-            ;
-    stmdb   sp!, {r4-r11, lr}   ; backup registers  1                   backup
-    mov r3, #0x00004E00 ;                   cos
-    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
-    mov r4, #0x00008A00 ;                       sin
-    orr r4, r4, #0x0000008C ; sinpi8sqrt2
-    mov r5, #0x2    ; i=2                           i
-loop1_dual
-    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
-    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
-    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
+    add     r0, r0, r7, asr #19     ; p0 + o0
+    add     r11, r11, r10, asr #19  ; p1 + o1
+    add     r12, r12, r8, asr #19   ; p2 + o2
+
+    usat    r0, #8, r0              ; d0 = clip8(p0 + o0)
+    usat    r11, #8, r11            ; d1 = clip8(p1 + o1)
+    usat    r12, #8, r12            ; d2 = clip8(p2 + o2)
+
+    add     r0, r0, r11, lsl #8     ; |--|--|d1|d0|
+
+    ldrb    r11, [r1, #3]           ; pred p3
+
+    add     r0, r0, r12, lsl #16    ; |--|d2|d1|d0|
 
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
-    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
-    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
-    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
-    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
-    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
-    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
-    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
-    subs    r5, r5, #0x1    ; i--                           --
-    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
-    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
-    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
-    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
-    usub16  r7, r8, r7  ; c                                 c
-    uadd16  r6, r6, r10 ; d                             d
-    uadd16  r10, r11, r14   ; a                                             a
-    usub16  r8, r11, r14    ; b                                     b
-    uadd16  r9, r10, r6 ; a+d                                           a+d
-    usub16  r10, r10, r6    ; a-d                                               a-d
-    uadd16  r6, r8, r7  ; b+c                               b+c
-    usub16  r7, r8, r7  ; b-c                                   b-c
-    str r6, [r1, r2]    ; o5 | o4
-    add r6, r2, r2  ; pitch * 2                             p2
-    str r7, [r1, r6]    ; o9 | o8
-    add r6,  r6, r2 ; pitch * 3                             p3
-    str r10, [r1, r6]   ; o13 | o12
-    str r9, [r1], #0x4  ; o1 | o0           ++
-    bne loop1_dual  ;
-    mov r5, #0x2    ; i=2                           i
-    sub r0, r1, #8  ; reset input/output        i/o
-loop2_dual
-    ldr r6, [r0, r2]    ; i5 | i4                               5|4
-    ldr r1, [r0]    ; i1 | i0           1|0
-    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
-    add r14, r2, #0x4   ; pitch + 2                                                             p+2
-    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
-    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
-    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
-    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1                                      tc1
-    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
-    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
-    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
-    uadd16  r10, r11, r9    ; a                                             a
-    usub16  r9, r11, r9 ; b                                         b
-    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
-    subs    r5, r5, #0x1    ; i--                           --
-    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
-    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
-    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
-    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
+    add     r11, r11, r6, asr #19   ; p3 + o3
+
+    sxth    r7, r7                  ;
+    sxth    r10, r10                ;
+
+    usat    r11, #8, r11            ; d3 = clip8(p3 + o3)
+
+    sxth    r8, r8                  ;
+    sxth    r6, r6                  ;
+
+    add     r0, r0, r11, lsl #24    ; |d3|d2|d1|d0|
+
+    ldrb    r12, [r1, r2]!          ; pred p4
+    str     r0, [r3], lr
+    ldrb    r11, [r1, #1]           ; pred p5
+
+    add     r12, r12, r7, asr #3    ; p4 + o4
+    add     r11, r11, r10, asr #3   ; p5 + o5
+
+    usat    r12, #8, r12            ; d4 = clip8(p4 + o4)
+    usat    r11, #8, r11            ; d5 = clip8(p5 + o5)
 
-    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
-    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
-    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
-    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
-    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
-    uadd16  r7, r10, r6 ; a+d                                   a+d
-    mov r8, #0x4    ; set up 4's                                        4
-    orr r8, r8, #0x40000    ;                                       4|4
-    usub16  r6, r10, r6 ; a-d                               a-d
-    uadd16  r6, r6, r8  ; a-d+4                             3|7
-    uadd16  r7, r7, r8  ; a+d+4                                 0|4
-    uadd16  r10, r9, r12    ; b+c                                               b+c
-    usub16  r1, r9, r12 ; b-c           b-c
-    uadd16  r10, r10, r8    ; b+c+4                                             1|5
-    uadd16  r1, r1, r8  ; b-c+4         2|6
-    mov r8, r10, asr #19    ; o1 >> 3
-    strh    r8, [r0, #2]    ; o1
-    mov r8, r1, asr #19 ; o2 >> 3
-    strh    r8, [r0, #4]    ; o2
-    mov r8, r6, asr #19 ; o3 >> 3
-    strh    r8, [r0, #6]    ; o3
-    mov r8, r7, asr #19 ; o0 >> 3
-    strh    r8, [r0], r2    ; o0        +p
-    sxth    r10, r10    ;
-    mov r8, r10, asr #3 ; o5 >> 3
-    strh    r8, [r0, #2]    ; o5
-    sxth    r1, r1  ;
-    mov r8, r1, asr #3  ; o6 >> 3
-    strh    r8, [r0, #4]    ; o6
-    sxth    r6, r6  ;
-    mov r8, r6, asr #3  ; o7 >> 3
-    strh    r8, [r0, #6]    ; o7
-    sxth    r7, r7  ;
-    mov r8, r7, asr #3  ; o4 >> 3
-    strh    r8, [r0], r2    ; o4        +p
-;;;;;   subs    r5, r5, #0x1    ; i--                           --
-    bne loop2_dual  ;
-            ;
-    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
+    ldrb    r7, [r1, #2]            ; pred p6
+    ldrb    r10, [r1, #3]           ; pred p6
+
+    add     r12, r12, r11, lsl #8   ; |--|--|d5|d4|
+
+    add     r7, r7, r8, asr #3      ; p6 + o6
+    add     r10, r10, r6, asr #3    ; p7 + o7
+
+    ldr     r0, [sp]                ; load input pointer
+
+    usat    r7, #8, r7              ; d6 = clip8(p6 + o6)
+    usat    r10, #8, r10            ; d7 = clip8(p7 + o7)
+
+    add     r12, r12, r7, lsl #16   ; |--|d6|d5|d4|
+    add     r12, r12, r10, lsl #24  ; |d7|d6|d5|d4|
+
+    str     r12, [r3], lr
+    add     r0, r0, #16
+    add     r1, r1, r2              ; pred + pitch
+
+    bcs loop2_dual
+
+    add     sp, sp, #4              ; idct_output buffer
+    ldmia   sp!, {r4 - r11, pc}
+
     ENDP
 
     END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm
@@ -0,0 +1,606 @@
+;
+;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_intra4x4_predict_armv6|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+
+;void vp8_intra4x4_predict(unsigned char *src, int src_stride, int b_mode,
+;                          unsigned char *dst, int dst_stride)
+
+|vp8_intra4x4_predict_armv6| PROC
+    push        {r4-r12, lr}
+
+
+    cmp         r2, #10
+    addlt       pc, pc, r2, lsl #2       ; position independent switch
+    pop         {r4-r12, pc}             ; default
+    b           b_dc_pred
+    b           b_tm_pred
+    b           b_ve_pred
+    b           b_he_pred
+    b           b_ld_pred
+    b           b_rd_pred
+    b           b_vr_pred
+    b           b_vl_pred
+    b           b_hd_pred
+    b           b_hu_pred
+
+b_dc_pred
+    ; load values
+    ldr         r8, [r0, -r1]            ; Above
+    ldrb        r4, [r0, #-1]!           ; Left[0]
+    mov         r9, #0
+    ldrb        r5, [r0, r1]             ; Left[1]
+    ldrb        r6, [r0, r1, lsl #1]!    ; Left[2]
+    usad8       r12, r8, r9
+    ldrb        r7, [r0, r1]             ; Left[3]
+
+    ; calculate dc
+    add         r4, r4, r5
+    add         r4, r4, r6
+    add         r4, r4, r7
+    add         r4, r4, r12
+    add         r4, r4, #4
+    ldr         r0, [sp, #40]           ; load stride
+    mov         r12, r4, asr #3         ; (expected_dc + 4) >> 3
+
+    add         r12, r12, r12, lsl #8
+    add         r3, r3, r0
+    add         r12, r12, r12, lsl #16
+
+    ; store values
+    str         r12, [r3, -r0]
+    str         r12, [r3]
+    str         r12, [r3, r0]
+    str         r12, [r3, r0, lsl #1]
+
+    pop        {r4-r12, pc}
+
+b_tm_pred
+    sub         r10, r0, #1             ; Left
+    ldr         r8, [r0, -r1]           ; Above
+    ldrb        r9, [r10, -r1]          ; top_left
+    ldrb        r4, [r0, #-1]!          ; Left[0]
+    ldrb        r5, [r10, r1]!          ; Left[1]
+    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
+    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
+    ldr         r0, [sp, #40]           ; load stride
+
+
+    add         r9, r9, r9, lsl #16     ; [tl|tl]
+    uxtb16      r10, r8                 ; a[2|0]
+    uxtb16      r11, r8, ror #8         ; a[3|1]
+    ssub16      r10, r10, r9            ; a[2|0] - [tl|tl]
+    ssub16      r11, r11, r9            ; a[3|1] - [tl|tl]
+
+    add         r4, r4, r4, lsl #16     ; l[0|0]
+    add         r5, r5, r5, lsl #16     ; l[1|1]
+    add         r6, r6, r6, lsl #16     ; l[2|2]
+    add         r7, r7, r7, lsl #16     ; l[3|3]
+
+    sadd16      r1, r4, r10             ; l[0|0] + a[2|0] - [tl|tl]
+    sadd16      r2, r4, r11             ; l[0|0] + a[3|1] - [tl|tl]
+    usat16      r1, #8, r1
+    usat16      r2, #8, r2
+
+    sadd16      r4, r5, r10             ; l[1|1] + a[2|0] - [tl|tl]
+    sadd16      r5, r5, r11             ; l[1|1] + a[3|1] - [tl|tl]
+
+    add         r12, r1, r2, lsl #8     ; [3|2|1|0]
+    str         r12, [r3], r0
+
+    usat16      r4, #8, r4
+    usat16      r5, #8, r5
+
+    sadd16      r1, r6, r10             ; l[2|2] + a[2|0] - [tl|tl]
+    sadd16      r2, r6, r11             ; l[2|2] + a[3|1] - [tl|tl]
+
+    add         r12, r4, r5, lsl #8     ; [3|2|1|0]
+    str         r12, [r3], r0
+
+    usat16      r1, #8, r1
+    usat16      r2, #8, r2
+
+    sadd16      r4, r7, r10             ; l[3|3] + a[2|0] - [tl|tl]
+    sadd16      r5, r7, r11             ; l[3|3] + a[3|1] - [tl|tl]
+
+    add         r12, r1, r2, lsl #8     ; [3|2|1|0]
+
+    usat16      r4, #8, r4
+    usat16      r5, #8, r5
+
+    str         r12, [r3], r0
+
+    add         r12, r4, r5, lsl #8     ; [3|2|1|0]
+    str         r12, [r3], r0
+
+    pop        {r4-r12, pc}
+
+b_ve_pred
+    ldr         r8, [r0, -r1]!          ; a[3|2|1|0]
+    ldr         r11, c00FF00FF
+    ldrb        r9, [r0, #-1]           ; top_left
+    ldrb        r10, [r0, #4]           ; a[4]
+
+    ldr         r0, c00020002
+
+    uxtb16      r4, r8                  ; a[2|0]
+    uxtb16      r5, r8, ror #8          ; a[3|1]
+    ldr         r2, [sp, #40]           ; stride
+    pkhbt       r9, r9, r5, lsl #16     ; a[1|-1]
+
+    add         r9, r9, r4, lsl #1      ;[a[1]+2*a[2]       | tl+2*a[0]       ]
+    uxtab16     r9, r9, r5              ;[a[1]+2*a[2]+a[3]  | tl+2*a[0]+a[1]  ]
+    uxtab16     r9, r9, r0              ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
+
+    add         r0, r0, r10, lsl #16    ;[a[4]+2            |                 2]
+    add         r0, r0, r4, asr #16     ;[a[4]+2            |            a[2]+2]
+    add         r0, r0, r5, lsl #1      ;[a[4]+2*a[3]+2     |     a[2]+2*a[1]+2]
+    uadd16      r4, r4, r0              ;[a[4]+2*a[3]+a[2]+2|a[2]+2*a[1]+a[0]+2]
+
+    and         r9, r11, r9, asr #2
+    and         r4, r11, r4, asr #2
+    add         r3, r3, r2              ; dst + dst_stride
+    add         r9, r9, r4, lsl #8
+
+    ; store values
+    str         r9, [r3, -r2]
+    str         r9, [r3]
+    str         r9, [r3, r2]
+    str         r9, [r3, r2, lsl #1]
+
+    pop        {r4-r12, pc}
+
+
+b_he_pred
+    sub         r10, r0, #1             ; Left
+    ldrb        r4, [r0, #-1]!          ; Left[0]
+    ldrb        r8, [r10, -r1]          ; top_left
+    ldrb        r5, [r10, r1]!          ; Left[1]
+    ldrb        r6, [r0, r1, lsl #1]    ; Left[2]
+    ldrb        r7, [r10, r1, lsl #1]   ; Left[3]
+
+    add         r8, r8, r4              ; tl   + l[0]
+    add         r9, r4, r5              ; l[0] + l[1]
+    add         r10, r5, r6             ; l[1] + l[2]
+    add         r11, r6, r7             ; l[2] + l[3]
+
+    mov         r0, #2<<14
+
+    add         r8, r8, r9              ; tl + 2*l[0] + l[1]
+    add         r4, r9, r10             ; l[0] + 2*l[1] + l[2]
+    add         r5, r10, r11            ; l[1] + 2*l[2] + l[3]
+    add         r6, r11, r7, lsl #1     ; l[2] + 2*l[3] + l[3]
+
+
+    add         r8, r0, r8, lsl #14     ; (tl + 2*l[0] + l[1])>>2 in top half
+    add         r9, r0, r4, lsl #14     ; (l[0] + 2*l[1] + l[2])>>2 in top half
+    add         r10,r0, r5, lsl #14     ; (l[1] + 2*l[2] + l[3])>>2 in top half
+    add         r11,r0, r6, lsl #14     ; (l[2] + 2*l[3] + l[3])>>2 in top half
+
+    pkhtb       r8, r8, r8, asr #16     ; l[-|0|-|0]
+    pkhtb       r9, r9, r9, asr #16     ; l[-|1|-|1]
+    pkhtb       r10, r10, r10, asr #16  ; l[-|2|-|2]
+    pkhtb       r11, r11, r11, asr #16  ; l[-|3|-|3]
+
+    ldr         r0, [sp, #40]           ; stride
+
+    add         r8, r8, r8, lsl #8      ; l[0|0|0|0]
+    add         r9, r9, r9, lsl #8      ; l[1|1|1|1]
+    add         r10, r10, r10, lsl #8   ; l[2|2|2|2]
+    add         r11, r11, r11, lsl #8   ; l[3|3|3|3]
+
+    ; store values
+    str         r8, [r3], r0
+    str         r9, [r3]
+    str         r10, [r3, r0]
+    str         r11, [r3, r0, lsl #1]
+
+    pop        {r4-r12, pc}
+
+b_ld_pred
+    ldr         r4, [r0, -r1]!          ; Above
+    ldr         r12, c00020002
+    ldr         r5, [r0, #4]
+    ldr         lr,  c00FF00FF
+
+    uxtb16      r6, r4                  ; a[2|0]
+    uxtb16      r7, r4, ror #8          ; a[3|1]
+    uxtb16      r8, r5                  ; a[6|4]
+    uxtb16      r9, r5, ror #8          ; a[7|5]
+    pkhtb       r10, r6, r8             ; a[2|4]
+    pkhtb       r11, r7, r9             ; a[3|5]
+
+
+    add         r4, r6, r7, lsl #1      ; [a2+2*a3      |      a0+2*a1]
+    add         r4, r4, r10, ror #16    ; [a2+2*a3+a4   |   a0+2*a1+a2]
+    uxtab16     r4, r4, r12             ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
+
+    add         r5, r7, r10, ror #15    ; [a3+2*a4      |      a1+2*a2]
+    add         r5, r5, r11, ror #16    ; [a3+2*a4+a5   |   a1+2*a2+a3]
+    uxtab16     r5, r5, r12             ; [a3+2*a4+a5+2 | a1+2*a2+a3+2]
+
+    pkhtb       r7, r9, r8, asr #16
+    add         r6, r8, r9, lsl #1      ; [a6+2*a7      |      a4+2*a5]
+    uadd16      r6, r6, r7              ; [a6+2*a7+a7   |   a4+2*a5+a6]
+    uxtab16     r6, r6, r12             ; [a6+2*a7+a7+2 | a4+2*a5+a6+2]
+
+    uxth        r7, r9                  ; [                         a5]
+    add         r7, r7, r8, asr #15     ; [                    a5+2*a6]
+    add         r7, r7, r9, asr #16     ; [                 a5+2*a6+a7]
+    uxtah       r7, r7, r12             ; [               a5+2*a6+a7+2]
+
+    ldr         r0, [sp, #40]           ; stride
+
+    ; scale down
+    and         r4, lr, r4, asr #2
+    and         r5, lr, r5, asr #2
+    and         r6, lr, r6, asr #2
+    mov         r7, r7, asr #2
+
+    add         r8, r4, r5, lsl #8      ; [3|2|1|0]
+    str         r8, [r3], r0
+
+    mov         r9, r8, lsr #8
+    add         r9, r9, r6, lsl #24     ; [4|3|2|1]
+    str         r9, [r3], r0
+
+    mov         r10, r9, lsr #8
+    add         r10, r10, r7, lsl #24   ; [5|4|3|2]
+    str         r10, [r3], r0
+
+    mov         r6, r6, lsr #16
+    mov         r11, r10, lsr #8
+    add         r11, r11, r6, lsl #24   ; [6|5|4|3]
+    str         r11, [r3], r0
+
+    pop        {r4-r12, pc}
+
+b_rd_pred
+    sub         r12, r0, r1             ; Above = src - src_stride
+    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
+    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
+    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
+    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
+    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
+    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+
+
+    uxtb16      r9, lr                  ; p[7|5]
+    uxtb16      r10, lr, ror #8         ; p[8|6]
+    add         r4, r4, r6, lsl #16     ; p[2|0]
+    add         r5, r5, r7, lsl #16     ; p[3|1]
+    add         r6, r6, r8, lsl #16     ; p[4|2]
+    pkhbt       r7, r7, r9, lsl #16     ; p[5|3]
+    pkhbt       r8, r8, r10, lsl #16    ; p[6|4]
+
+    ldr         r12, c00020002
+    ldr         lr,  c00FF00FF
+
+    add         r4, r4, r5, lsl #1      ; [p2+2*p3      |      p0+2*p1]
+    add         r4, r4, r6              ; [p2+2*p3+p4   |   p0+2*p1+p2]
+    uxtab16     r4, r4, r12             ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
+
+    add         r5, r5, r6, lsl #1      ; [p3+2*p4      |      p1+2*p2]
+    add         r5, r5, r7              ; [p3+2*p4+p5   |   p1+2*p2+p3]
+    uxtab16     r5, r5, r12             ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
+
+    add         r6, r7, r8, lsl #1      ; [p5+2*p6      |      p3+2*p4]
+    add         r6, r6, r9              ; [p5+2*p6+p7   |   p3+2*p4+p5]
+    uxtab16     r6, r6, r12             ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
+
+    add         r7, r8, r9, lsl #1      ; [p6+2*p7      |      p4+2*p5]
+    add         r7, r7, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
+    uxtab16     r7, r7, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
+
+    ldr         r0, [sp, #40]           ; stride
+
+    ; scale down
+    and         r7, lr, r7, asr #2
+    and         r6, lr, r6, asr #2
+    and         r5, lr, r5, asr #2
+    and         r4, lr, r4, asr #2
+
+    add         r8, r6, r7, lsl #8      ; [6|5|4|3]
+    str         r8, [r3], r0
+
+    mov         r9, r8, lsl #8          ; [5|4|3|-]
+    uxtab       r9, r9, r4, ror #16     ; [5|4|3|2]
+    str         r9, [r3], r0
+
+    mov         r10, r9, lsl #8         ; [4|3|2|-]
+    uxtab       r10, r10, r5            ; [4|3|2|1]
+    str         r10, [r3], r0
+
+    mov         r11, r10, lsl #8        ; [3|2|1|-]
+    uxtab       r11, r11, r4            ; [3|2|1|0]
+    str         r11, [r3], r0
+
+    pop        {r4-r12, pc}
+
+b_vr_pred
+    sub         r12, r0, r1             ; Above = src - src_stride
+    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
+    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
+    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
+    ldrb        r6, [r12, r1, lsl #1]   ; l[1] = pp[2]
+    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
+    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+
+    add         r5, r5, r7, lsl #16     ; p[3|1]
+    add         r6, r6, r8, lsl #16     ; p[4|2]
+    uxtb16      r9, lr                  ; p[7|5]
+    uxtb16      r10, lr, ror #8         ; p[8|6]
+    pkhbt       r7, r7, r9, lsl #16     ; p[5|3]
+    pkhbt       r8, r8, r10, lsl #16    ; p[6|4]
+
+    ldr         r4,  c00010001
+    ldr         r12, c00020002
+    ldr         lr,  c00FF00FF
+
+    add         r5, r5, r6, lsl #1      ; [p3+2*p4      |      p1+2*p2]
+    add         r5, r5, r7              ; [p3+2*p4+p5   |   p1+2*p2+p3]
+    uxtab16     r5, r5, r12             ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
+
+    add         r6, r6, r7, lsl #1      ; [p4+2*p5      |      p2+2*p3]
+    add         r6, r6, r8              ; [p4+2*p5+p6   |   p2+2*p3+p4]
+    uxtab16     r6, r6, r12             ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
+
+    uadd16      r11, r8, r9             ; [p6+p7        |        p4+p5]
+    uhadd16     r11, r11, r4            ; [(p6+p7+1)>>1 | (p4+p5+1)>>1]
+                                        ; [F|E]
+
+    add         r7, r7, r8, lsl #1      ; [p5+2*p6      |      p3+2*p4]
+    add         r7, r7, r9              ; [p5+2*p6+p7   |   p3+2*p4+p5]
+    uxtab16     r7, r7, r12             ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
+
+    uadd16      r2, r9, r10             ; [p7+p8        |        p5+p6]
+    uhadd16     r2, r2, r4              ; [(p7+p8+1)>>1 | (p5+p6+1)>>1]
+                                        ; [J|I]
+
+    add         r8, r8, r9, lsl #1      ; [p6+2*p7      |      p4+2*p5]
+    add         r8, r8, r10             ; [p6+2*p7+p8   |   p4+2*p5+p6]
+    uxtab16     r8, r8, r12             ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
+
+    ldr         r0, [sp, #40]           ; stride
+
+    ; scale down
+    and         r5, lr, r5, asr #2      ; [B|A]
+    and         r6, lr, r6, asr #2      ; [D|C]
+    and         r7, lr, r7, asr #2      ; [H|G]
+    and         r8, lr, r8, asr #2      ; [L|K]
+
+    add         r12, r11, r2, lsl #8    ; [J|F|I|E]
+    str         r12, [r3], r0
+
+    add         r12, r7, r8, lsl #8     ; [L|H|K|G]
+    str         r12, [r3], r0
+
+    pkhbt       r2, r6, r2, lsl #16     ; [-|I|-|C]
+    add         r2, r2, r11, lsl #8     ; [F|I|E|C]
+
+    pkhtb       r12, r6, r5             ; [-|D|-|A]
+    pkhtb       r10, r7, r5, asr #16    ; [-|H|-|B]
+    str         r2, [r3], r0
+    add         r12, r12, r10, lsl #8   ; [H|D|B|A]
+    str         r12, [r3], r0
+
+    pop        {r4-r12, pc}
+
+b_vl_pred
+    ldr         r4, [r0, -r1]!          ; [3|2|1|0]
+    ldr         r12, c00020002
+    ldr         r5, [r0, #4]            ; [7|6|5|4]
+    ldr         lr,  c00FF00FF
+    ldr         r2,  c00010001
+
+    mov         r0, r4, lsr #16         ; [-|-|3|2]
+    add         r0, r0, r5, lsl #16     ; [5|4|3|2]
+    uxtb16      r6, r4                  ; [2|0]
+    uxtb16      r7, r4, ror #8          ; [3|1]
+    uxtb16      r8, r0                  ; [4|2]
+    uxtb16      r9, r0, ror #8          ; [5|3]
+    uxtb16      r10, r5                 ; [6|4]
+    uxtb16      r11, r5, ror #8         ; [7|5]
+
+    uadd16      r4, r6, r7              ; [p2+p3        |        p0+p1]
+    uhadd16     r4, r4, r2              ; [(p2+p3+1)>>1 | (p0+p1+1)>>1]
+                                        ; [B|A]
+
+    add         r5, r6, r7, lsl #1      ; [p2+2*p3      |      p0+2*p1]
+    add         r5, r5, r8              ; [p2+2*p3+p4   |   p0+2*p1+p2]
+    uxtab16     r5, r5, r12             ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
+
+    uadd16      r6, r7, r8              ; [p3+p4        |        p1+p2]
+    uhadd16     r6, r6, r2              ; [(p3+p4+1)>>1 | (p1+p2+1)>>1]
+                                        ; [F|E]
+
+    add         r7, r7, r8, lsl #1      ; [p3+2*p4      |      p1+2*p2]
+    add         r7, r7, r9              ; [p3+2*p4+p5   |   p1+2*p2+p3]
+    uxtab16     r7, r7, r12             ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
+
+    add         r8, r8, r9, lsl #1      ; [p4+2*p5      |      p2+2*p3]
+    add         r8, r8, r10             ; [p4+2*p5+p6   |   p2+2*p3+p4]
+    uxtab16     r8, r8, r12             ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
+
+    add         r9, r9, r10, lsl #1     ; [p5+2*p6      |      p3+2*p4]
+    add         r9, r9, r11             ; [p5+2*p6+p7   |   p3+2*p4+p5]
+    uxtab16     r9, r9, r12             ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
+
+    ldr         r0, [sp, #40]           ; stride
+
+    ; scale down
+    and         r5, lr, r5, asr #2      ; [D|C]
+    and         r7, lr, r7, asr #2      ; [H|G]
+    and         r8, lr, r8, asr #2      ; [I|D]
+    and         r9, lr, r9, asr #2      ; [J|H]
+
+
+    add         r10, r4, r6, lsl #8     ; [F|B|E|A]
+    str         r10, [r3], r0
+
+    add         r5, r5, r7, lsl #8      ; [H|C|G|D]
+    str         r5, [r3], r0
+
+    pkhtb       r12, r8, r4, asr #16    ; [-|I|-|B]
+    pkhtb       r10, r9, r8             ; [-|J|-|D]
+
+    add         r12, r6, r12, lsl #8    ; [I|F|B|E]
+    str         r12, [r3], r0
+
+    add         r10, r7, r10, lsl #8    ; [J|H|D|G]
+    str         r10, [r3], r0
+
+    pop        {r4-r12, pc}
+
+b_hd_pred
+    sub         r12, r0, r1             ; Above = src - src_stride
+    ldrb        r7, [r0, #-1]!          ; l[0] = pp[3]
+    ldr         lr, [r12]               ; Above = pp[8|7|6|5]
+    ldrb        r8, [r12, #-1]!         ; tl   = pp[4]
+    ldrb        r6, [r0, r1]            ; l[1] = pp[2]
+    ldrb        r5, [r0, r1, lsl #1]    ; l[2] = pp[1]
+    ldrb        r4, [r12, r1, lsl #2]   ; l[3] = pp[0]
+
+    uxtb16      r9, lr                  ; p[7|5]
+    uxtb16      r10, lr, ror #8         ; p[8|6]
+
+    add         r4, r4, r5, lsl #16     ; p[1|0]
+    add         r5, r5, r6, lsl #16     ; p[2|1]
+    add         r6, r6, r7, lsl #16     ; p[3|2]
+    add         r7, r7, r8, lsl #16     ; p[4|3]
+
+    ldr         r12, c00020002
+    ldr         lr,  c00FF00FF
+    ldr         r2,  c00010001
+
+    pkhtb       r8, r7, r9              ; p[4|5]
+    pkhtb       r1, r9, r10             ; p[7|6]
+    pkhbt       r10, r8, r10, lsl #16   ; p[6|5]
+
+
+    uadd16      r11, r4, r5             ; [p1+p2        |        p0+p1]
+    uhadd16     r11, r11, r2            ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
+                                        ; [B|A]
+
+    add         r4, r4, r5, lsl #1      ; [p1+2*p2      |      p0+2*p1]
+    add         r4, r4, r6              ; [p1+2*p2+p3   |   p0+2*p1+p2]
+    uxtab16     r4, r4, r12             ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
+
+    uadd16      r0, r6, r7              ; [p3+p4        |        p2+p3]
+    uhadd16     r0, r0, r2              ; [(p3+p4+1)>>1 | (p2+p3+1)>>1]
+                                        ; [F|E]
+
+    add         r5, r6, r7, lsl #1      ; [p3+2*p4      |      p2+2*p3]
+    add         r5, r5, r8, ror #16     ; [p3+2*p4+p5   |   p2+2*p3+p4]
+    uxtab16     r5, r5, r12             ; [p3+2*p4+p5+2 | p2+2*p3+p4+2]
+
+    add         r6, r12, r8, ror #16    ; [p5+2         |         p4+2]
+    add         r6, r6, r10, lsl #1     ; [p5+2+2*p6    |    p4+2+2*p5]
+    uxtab16     r6, r6, r1              ; [p5+2+2*p6+p7 | p4+2+2*p5+p6]
+
+    ; scale down
+    and         r4, lr, r4, asr #2      ; [D|C]
+    and         r5, lr, r5, asr #2      ; [H|G]
+    and         r6, lr, r6, asr #2      ; [J|I]
+
+    ldr         lr, [sp, #40]           ; stride
+
+    pkhtb       r2, r0, r6              ; [-|F|-|I]
+    pkhtb       r12, r6, r5, asr #16    ; [-|J|-|H]
+    add         r12, r12, r2, lsl #8    ; [F|J|I|H]
+    add         r2, r0, r5, lsl #8      ; [H|F|G|E]
+    mov         r12, r12, ror #24       ; [J|I|H|F]
+    str         r12, [r3], lr
+
+
+    mov         r7, r11, asr #16        ; [-|-|-|B]
+    str         r2, [r3], lr
+    add         r7, r7, r0, lsl #16     ; [-|E|-|B]
+    add         r7, r7, r4, asr #8      ; [-|E|D|B]
+    add         r7, r7, r5, lsl #24     ; [G|E|D|B]
+    str         r7, [r3], lr
+
+    add         r5, r11, r4, lsl #8     ; [D|B|C|A]
+    str         r5, [r3], lr
+
+    pop        {r4-r12, pc}
+
+
+
+b_hu_pred
+    ldrb        r4, [r0, #-1]!          ; Left[0]
+    ldr         r12, c00020002
+    ldrb        r5, [r0, r1]!           ; Left[1]
+    ldr         lr,  c00FF00FF
+    ldrb        r6, [r0, r1]!           ; Left[2]
+    ldr         r2,  c00010001
+    ldrb        r7, [r0, r1]            ; Left[3]
+
+
+    add         r4, r4, r5, lsl #16     ; [1|0]
+    add         r5, r5, r6, lsl #16     ; [2|1]
+    add         r9, r6, r7, lsl #16     ; [3|2]
+
+    uadd16      r8, r4, r5              ; [p1+p2        |        p0+p1]
+    uhadd16     r8, r8, r2              ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
+                                        ; [B|A]
+
+    add         r4, r4, r5, lsl #1      ; [p1+2*p2      |      p0+2*p1]
+    add         r4, r4, r9              ; [p1+2*p2+p3   |   p0+2*p1+p2]
+    uxtab16     r4, r4, r12             ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
+    ldr         r2, [sp, #40]           ; stride
+    and         r4, lr, r4, asr #2      ; [D|C]
+
+    add         r10, r6, r7             ; [p2+p3]
+    add         r11, r10, r7, lsl #1    ; [p2+3*p3]
+    add         r10, r10, #1
+    add         r11, r11, #2
+    mov         r10, r10, asr #1        ; [E]
+    mov         r11, r11, asr #2        ; [F]
+
+    add         r9, r7, r9, asr #8      ; [-|-|G|G]
+    add         r0, r8, r4, lsl #8      ; [D|B|C|A]
+    add         r7, r9, r9, lsl #16     ; [G|G|G|G]
+
+    str         r0, [r3], r2
+
+    mov         r1, r8, asr #16         ; [-|-|-|B]
+    add         r1, r1, r4, asr #8      ; [-|-|D|B]
+    add         r1, r1, r10, lsl #16    ; [-|E|D|B]
+    add         r1, r1, r11, lsl #24    ; [F|E|D|B]
+    str         r1, [r3], r2
+
+    add         r10, r11, lsl #8        ; [-|-|F|E]
+    add         r10, r10, r9, lsl #16   ; [G|G|F|E]
+    str         r10, [r3]
+
+    str         r7, [r3, r2]
+
+    pop        {r4-r12, pc}
+
+    ENDP
+
+; constants
+c00010001
+    DCD         0x00010001
+c00020002
+    DCD         0x00020002
+c00FF00FF
+    DCD         0x00FF00FF
+
+    END
--- a/media/libvpx/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/iwalsh_v6.asm
@@ -4,37 +4,36 @@
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
 ;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
     EXPORT |vp8_short_inv_walsh4x4_v6|
-    EXPORT |vp8_short_inv_walsh4x4_1_v6|
 
     ARM
     REQUIRE8
     PRESERVE8
 
     AREA    |.text|, CODE, READONLY  ; name this block of code
 
-;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
+;short vp8_short_inv_walsh4x4_v6(short *input, short *mb_dqcoeff)
 |vp8_short_inv_walsh4x4_v6| PROC
 
-    stmdb       sp!, {r4 - r11, lr}
+    stmdb       sp!, {r4 - r12, lr}
 
-    ldr         r2, [r0], #4         ; [1  |  0]
-    ldr         r3, [r0], #4         ; [3  |  2]
-    ldr         r4, [r0], #4         ; [5  |  4]
-    ldr         r5, [r0], #4         ; [7  |  6]
-    ldr         r6, [r0], #4         ; [9  |  8]
-    ldr         r7, [r0], #4         ; [11 | 10]
-    ldr         r8, [r0], #4         ; [13 | 12]
-    ldr         r9, [r0]             ; [15 | 14]
+    ldr         r2, [r0, #0]         ; [1  |  0]
+    ldr         r3, [r0, #4]         ; [3  |  2]
+    ldr         r4, [r0, #8]         ; [5  |  4]
+    ldr         r5, [r0, #12]        ; [7  |  6]
+    ldr         r6, [r0, #16]        ; [9  |  8]
+    ldr         r7, [r0, #20]        ; [11 | 10]
+    ldr         r8, [r0, #24]        ; [13 | 12]
+    ldr         r9, [r0, #28]        ; [15 | 14]
 
     qadd16      r10, r2, r8          ; a1 [1+13  |  0+12]
     qadd16      r11, r4, r6          ; b1 [5+9   |  4+8]
     qsub16      r12, r4, r6          ; c1 [5-9   |  4-8]
     qsub16      lr, r2, r8           ; d1 [1-13  |  0-12]
 
     qadd16      r2, r10, r11         ; a1 + b1 [1  |  0]
     qadd16      r4, r12, lr          ; c1 + d1 [5  |  4]
@@ -64,89 +63,74 @@
     qaddsubx    r4, r12, lr          ; [b2|c2] [c1+d1 | a1-b1]
     qaddsubx    r5, lr, r12          ; [a2|d2] [b1+a1 | d1-c1]
 
     qadd16      r2, r2, r10          ; [b2+3|c2+3]
     qadd16      r3, r3, r10          ; [a2+3|d2+3]
     qadd16      r4, r4, r10          ; [b2+3|c2+3]
     qadd16      r5, r5, r10          ; [a2+3|d2+3]
 
-    asr         r12, r2, #3          ; [1  |  x]
-    pkhtb       r12, r12, r3, asr #19; [1  |  0]
-    lsl         lr, r3, #16          ; [~3 |  x]
-    lsl         r2, r2, #16          ; [~2 |  x]
-    asr         lr, lr, #3           ; [3  |  x]
-    pkhtb       lr, lr, r2, asr #19  ; [3  |  2]
+    asr         r12, r3, #19         ; [0]
+    strh        r12, [r1], #32
+    asr         lr, r2, #19          ; [1]
+    strh        lr, [r1], #32
+    sxth        r2, r2
+    sxth        r3, r3
+    asr         r2, r2, #3           ; [2]
+    strh        r2, [r1], #32
+    asr         r3, r3, #3           ; [3]
+    strh        r3, [r1], #32
 
-    asr         r2, r4, #3           ; [5  |  x]
-    pkhtb       r2, r2, r5, asr #19  ; [5  |  4]
-    lsl         r3, r5, #16          ; [~7 |  x]
-    lsl         r4, r4, #16          ; [~6 |  x]
-    asr         r3, r3, #3           ; [7  |  x]
-    pkhtb       r3, r3, r4, asr #19  ; [7  |  6]
-
-    str         r12, [r1], #4
-    str         lr, [r1], #4
-    str         r2, [r1], #4
-    str         r3, [r1], #4
+    asr         r12, r5, #19         ; [4]
+    strh        r12, [r1], #32
+    asr         lr, r4, #19          ; [5]
+    strh        lr, [r1], #32
+    sxth        r4, r4
+    sxth        r5, r5
+    asr         r4, r4, #3           ; [6]
+    strh        r4, [r1], #32
+    asr         r5, r5, #3           ; [7]
+    strh        r5, [r1], #32
 
     qsubaddx    r2, r6, r7           ; [c1|a1] [9-10  |  8+11]
     qaddsubx    r3, r6, r7           ; [b1|d1] [9+10  |  8-11]
     qsubaddx    r4, r8, r9           ; [c1|a1] [13-14 | 12+15]
     qaddsubx    r5, r8, r9           ; [b1|d1] [13+14 | 12-15]
 
     qaddsubx    r6, r2, r3           ; [b2|c2] [c1+d1 | a1-b1]
     qaddsubx    r7, r3, r2           ; [a2|d2] [b1+a1 | d1-c1]
     qaddsubx    r8, r4, r5           ; [b2|c2] [c1+d1 | a1-b1]
     qaddsubx    r9, r5, r4           ; [a2|d2] [b1+a1 | d1-c1]
 
     qadd16      r6, r6, r10          ; [b2+3|c2+3]
     qadd16      r7, r7, r10          ; [a2+3|d2+3]
     qadd16      r8, r8, r10          ; [b2+3|c2+3]
     qadd16      r9, r9, r10          ; [a2+3|d2+3]
 
-    asr         r2, r6, #3           ; [9  |  x]
-    pkhtb       r2, r2, r7, asr #19  ; [9  |  8]
-    lsl         r3, r7, #16          ; [~11|  x]
-    lsl         r4, r6, #16          ; [~10|  x]
-    asr         r3, r3, #3           ; [11 |  x]
-    pkhtb       r3, r3, r4, asr #19  ; [11 | 10]
+    asr         r12, r7, #19         ; [8]
+    strh        r12, [r1], #32
+    asr         lr, r6, #19          ; [9]
+    strh        lr, [r1], #32
+    sxth        r6, r6
+    sxth        r7, r7
+    asr         r6, r6, #3           ; [10]
+    strh        r6, [r1], #32
+    asr         r7, r7, #3           ; [11]
+    strh        r7, [r1], #32
 
-    asr         r4, r8, #3           ; [13 |  x]
-    pkhtb       r4, r4, r9, asr #19  ; [13 | 12]
-    lsl         r5, r9, #16          ; [~15|  x]
-    lsl         r6, r8, #16          ; [~14|  x]
-    asr         r5, r5, #3           ; [15 |  x]
-    pkhtb       r5, r5, r6, asr #19  ; [15 | 14]
+    asr         r12, r9, #19         ; [12]
+    strh        r12, [r1], #32
+    asr         lr, r8, #19          ; [13]
+    strh        lr, [r1], #32
+    sxth        r8, r8
+    sxth        r9, r9
+    asr         r8, r8, #3           ; [14]
+    strh        r8, [r1], #32
+    asr         r9, r9, #3           ; [15]
+    strh        r9, [r1], #32
 
-    str         r2, [r1], #4
-    str         r3, [r1], #4
-    str         r4, [r1], #4
-    str         r5, [r1]
-
-    ldmia       sp!, {r4 - r11, pc}
+    ldmia       sp!, {r4 - r12, pc}
     ENDP        ; |vp8_short_inv_walsh4x4_v6|
 
 
-;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
-|vp8_short_inv_walsh4x4_1_v6| PROC
-
-    ldrsh       r2, [r0]             ; [0]
-    add         r2, r2, #3           ; [0] + 3
-    asr         r2, r2, #3           ; a1 ([0]+3) >> 3
-    lsl         r2, r2, #16          ; [a1 |  x]
-    orr         r2, r2, r2, lsr #16  ; [a1 | a1]
-
-    str         r2, [r1], #4
-    str         r2, [r1], #4
-    str         r2, [r1], #4
-    str         r2, [r1], #4
-    str         r2, [r1], #4
-    str         r2, [r1], #4
-    str         r2, [r1], #4
-    str         r2, [r1]
-
-    bx          lr
-    ENDP        ; |vp8_short_inv_walsh4x4_1_v6|
-
 ; Constant Pool
 c0x00030003 DCD 0x00030003
     END
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/armv6/recon_v6.asm
+++ /dev/null
@@ -1,281 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_recon_b_armv6|
-    EXPORT  |vp8_recon2b_armv6|
-    EXPORT  |vp8_recon4b_armv6|
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-prd     RN  r0
-dif     RN  r1
-dst     RN  r2
-stride      RN  r3
-
-;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
-; R0 char* pred_ptr
-; R1 short * dif_ptr
-; R2 char * dst_ptr
-; R3 int stride
-
-; Description:
-; Loop through the block adding the Pred and Diff together.  Clamp and then
-; store back into the Dst.
-
-; Restrictions :
-; all buffers are expected to be 4 byte aligned coming in and
-; going out.
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-;
-;
-;
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_recon_b_armv6| PROC
-    stmdb   sp!, {r4 - r9, lr}
-
-    ;0, 1, 2, 3
-    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
-    ldr     r6, [dif, #0]           ;     1 |     0
-    ldr     r7, [dif, #4]           ;     3 |     2
-
-    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
-    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
-
-    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
-    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
-
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    add     dif, dif, #32
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst], stride
-
-    ;0, 1, 2, 3
-    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
-;;  ldr     r6, [dif, #8]           ;     1 |     0
-;;  ldr     r7, [dif, #12]          ;     3 |     2
-    ldr     r6, [dif, #0]           ;     1 |     0
-    ldr     r7, [dif, #4]           ;     3 |     2
-
-    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
-    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
-
-    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
-    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
-
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    add     dif, dif, #32
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst], stride
-
-    ;0, 1, 2, 3
-    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
-;;  ldr     r6, [dif, #16]          ;     1 |     0
-;;  ldr     r7, [dif, #20]          ;     3 |     2
-    ldr     r6, [dif, #0]           ;     1 |     0
-    ldr     r7, [dif, #4]           ;     3 |     2
-
-    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
-    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
-
-    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
-    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
-
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    add     dif, dif, #32
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst], stride
-
-    ;0, 1, 2, 3
-    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
-;;  ldr     r6, [dif, #24]          ;     1 |     0
-;;  ldr     r7, [dif, #28]          ;     3 |     2
-    ldr     r6, [dif, #0]           ;     1 |     0
-    ldr     r7, [dif, #4]           ;     3 |     2
-
-    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
-    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
-
-    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
-    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
-
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst], stride
-
-    ldmia   sp!, {r4 - r9, pc}
-
-    ENDP    ; |recon_b|
-
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-;
-;
-;
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-; R0 char  *pred_ptr
-; R1 short *dif_ptr
-; R2 char  *dst_ptr
-; R3 int stride
-|vp8_recon4b_armv6| PROC
-    stmdb   sp!, {r4 - r9, lr}
-
-    mov     lr, #4
-
-recon4b_loop
-    ;0, 1, 2, 3
-    ldr     r4, [prd], #4           ; 3 | 2 | 1 | 0
-    ldr     r6, [dif, #0]           ;     1 |     0
-    ldr     r7, [dif, #4]           ;     3 |     2
-
-    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
-    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
-
-    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
-    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
-
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst]
-
-    ;4, 5, 6, 7
-    ldr     r4, [prd], #4
-;;  ldr     r6, [dif, #32]
-;;  ldr     r7, [dif, #36]
-    ldr     r6, [dif, #8]
-    ldr     r7, [dif, #12]
-
-    pkhbt   r8, r6, r7, lsl #16
-    pkhtb   r9, r7, r6, asr #16
-
-    uxtab16 r8, r8, r4
-    uxtab16 r9, r9, r4, ror #8
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst, #4]
-
-    ;8, 9, 10, 11
-    ldr     r4, [prd], #4
-;;  ldr     r6, [dif, #64]
-;;  ldr     r7, [dif, #68]
-    ldr     r6, [dif, #16]
-    ldr     r7, [dif, #20]
-
-    pkhbt   r8, r6, r7, lsl #16
-    pkhtb   r9, r7, r6, asr #16
-
-    uxtab16 r8, r8, r4
-    uxtab16 r9, r9, r4, ror #8
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst, #8]
-
-    ;12, 13, 14, 15
-    ldr     r4, [prd], #4
-;;  ldr     r6, [dif, #96]
-;;  ldr     r7, [dif, #100]
-    ldr     r6, [dif, #24]
-    ldr     r7, [dif, #28]
-
-    pkhbt   r8, r6, r7, lsl #16
-    pkhtb   r9, r7, r6, asr #16
-
-    uxtab16 r8, r8, r4
-    uxtab16 r9, r9, r4, ror #8
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst, #12]
-
-    add     dst, dst, stride
-;;  add     dif, dif, #8
-    add     dif, dif, #32
-
-    subs    lr, lr, #1
-    bne     recon4b_loop
-
-    ldmia   sp!, {r4 - r9, pc}
-
-    ENDP    ; |Recon4B|
-
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-;
-;
-;
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-; R0 char  *pred_ptr
-; R1 short *dif_ptr
-; R2 char  *dst_ptr
-; R3 int stride
-|vp8_recon2b_armv6| PROC
-    stmdb   sp!, {r4 - r9, lr}
-
-    mov     lr, #4
-
-recon2b_loop
-    ;0, 1, 2, 3
-    ldr     r4, [prd], #4
-    ldr     r6, [dif, #0]
-    ldr     r7, [dif, #4]
-
-    pkhbt   r8, r6, r7, lsl #16
-    pkhtb   r9, r7, r6, asr #16
-
-    uxtab16 r8, r8, r4
-    uxtab16 r9, r9, r4, ror #8
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst]
-
-    ;4, 5, 6, 7
-    ldr     r4, [prd], #4
-;;  ldr     r6, [dif, #32]
-;;  ldr     r7, [dif, #36]
-    ldr     r6, [dif, #8]
-    ldr     r7, [dif, #12]
-
-    pkhbt   r8, r6, r7, lsl #16
-    pkhtb   r9, r7, r6, asr #16
-
-    uxtab16 r8, r8, r4
-    uxtab16 r9, r9, r4, ror #8
-    usat16  r8, #8, r8
-    usat16  r9, #8, r9
-    orr     r8, r8, r9, lsl #8
-
-    str     r8, [dst, #4]
-
-    add     dst, dst, stride
-;;  add     dif, dif, #8
-    add     dif, dif, #16
-
-    subs    lr, lr, #1
-    bne     recon2b_loop
-
-    ldmia   sp!, {r4 - r9, pc}
-
-    ENDP    ; |Recon2B|
-
-    END
rename from media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
rename to media/libvpx/vp8/common/arm/armv6/vp8_mse16x16_armv6.asm
rename from media/libvpx/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
rename to media/libvpx/vp8/common/arm/armv6/vp8_sad16x16_armv6.asm
rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
rename to media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
@@ -139,16 +139,16 @@ loop
     subs    r12, r12, #1
 
     bne     loop
 
     ; return stuff
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
     ENDP
 
     END
 
rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
rename to media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
rename to media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
@@ -164,17 +164,17 @@ loop
     subs    r12, r12, #1
 
     bne     loop
 
     ; return stuff
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
     ENDP
 
 c80808080
     DCD     0x80808080
 
rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
rename to media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
@@ -205,17 +205,17 @@ loop
     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
 
     bne     loop
 
     ; return stuff
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
     ENDP
 
 c80808080
     DCD     0x80808080
 
rename from media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
rename to media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
--- a/media/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
@@ -166,17 +166,17 @@ loop
     subs    r12, r12, #1
 
     bne     loop
 
     ; return stuff
     ldr     r6, [sp, #40]       ; get address of sse
     mul     r0, r8, r8          ; sum * sum
     str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
 
     ldmfd   sp!, {r4-r12, pc}
 
     ENDP
 
 c80808080
     DCD     0x80808080
 
rename from media/libvpx/vp8/decoder/arm/dequantize_arm.c
rename to media/libvpx/vp8/common/arm/dequantize_arm.c
--- a/media/libvpx/vp8/decoder/arm/dequantize_arm.c
+++ b/media/libvpx/vp8/common/arm/dequantize_arm.c
@@ -4,43 +4,40 @@
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
-#include "vpx_ports/config.h"
-#include "vp8/decoder/dequantize.h"
+#include "vpx_config.h"
+#include "vp8/common/dequantize.h"
 #include "vp8/common/idct.h"
-#include "vpx_mem/vpx_mem.h"
 
 #if HAVE_ARMV7
 extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
 #endif
 
 #if HAVE_ARMV6
 extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
 #endif
 
 #if HAVE_ARMV7
 
-void vp8_dequantize_b_neon(BLOCKD *d)
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
 {
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = d->dequant;
 
     vp8_dequantize_b_loop_neon(Q, DQC, DQ);
 }
 #endif
 
 #if HAVE_ARMV6
-void vp8_dequantize_b_v6(BLOCKD *d)
+void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
 {
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = d->dequant;
 
     vp8_dequantize_b_loop_v6(Q, DQC, DQ);
 }
 #endif
rename from media/libvpx/vp8/decoder/arm/dequantize_arm.h
rename to media/libvpx/vp8/common/arm/dequantize_arm.h
--- a/media/libvpx/vp8/decoder/arm/dequantize_arm.h
+++ b/media/libvpx/vp8/common/arm/dequantize_arm.h
@@ -10,64 +10,50 @@
 
 
 #ifndef DEQUANTIZE_ARM_H
 #define DEQUANTIZE_ARM_H
 
 #if HAVE_ARMV6
 extern prototype_dequant_block(vp8_dequantize_b_v6);
 extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
-extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
-extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6);
 extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
 
-#undef vp8_dequant_idct_add
+#undef  vp8_dequant_idct_add
 #define vp8_dequant_idct_add vp8_dequant_idct_add_v6
 
-#undef vp8_dequant_dc_idct_add
-#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
-
-#undef vp8_dequant_dc_idct_add_y_block
-#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_v6
-
-#undef vp8_dequant_idct_add_y_block
+#undef  vp8_dequant_idct_add_y_block
 #define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
 
-#undef vp8_dequant_idct_add_uv_block
+#undef  vp8_dequant_idct_add_uv_block
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
 #endif
 #endif
 
 #if HAVE_ARMV7
 extern prototype_dequant_block(vp8_dequantize_b_neon);
 extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
-extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
-extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neon);
 extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
 
+
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #defin