Bug 1063356 - Update libvpx source. r=kinetik
authorRalph Giles <giles@mozilla.com>
Tue, 07 Oct 2014 09:49:40 -0700
changeset 211092 3093663c5c228510cea59ef68bdefe8ec286cbe0
parent 211091 7bca0cbbecf16ebe4afd887cd38a62f7977bdd06
child 211093 3363b89ef1b760a6243feef1457bd1b1e63d120b
push id11501
push usercbook@mozilla.com
push dateMon, 20 Oct 2014 13:53:11 +0000
treeherderb2g-inbound@d46a0089b8a6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskinetik
bugs1063356
milestone36.0a1
Bug 1063356 - Update libvpx source. r=kinetik Results of running ./update.py --ndk ~/android/android-ndk-r9 --commit c731d6a4f19eea861ceb2ff31399420b2452eb74
media/libvpx/PATENTS
media/libvpx/README_MOZILLA
media/libvpx/build/make/ads2gas.pl
media/libvpx/build/make/obj_int_extract.c
media/libvpx/build/make/thumb.pm
media/libvpx/sources.mozbuild
media/libvpx/third_party/x86inc/x86inc.asm
media/libvpx/vp8/common/alloccommon.h
media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
media/libvpx/vp8/common/arm/bilinearfilter_arm.h
media/libvpx/vp8/common/arm/dequantize_arm.c
media/libvpx/vp8/common/arm/loopfilter_arm.c
media/libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
media/libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
media/libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
media/libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
media/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
media/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
media/libvpx/vp8/common/arm/neon/copymem16x16_neon.asm
media/libvpx/vp8/common/arm/neon/copymem8x4_neon.asm
media/libvpx/vp8/common/arm/neon/copymem8x8_neon.asm
media/libvpx/vp8/common/arm/neon/copymem_neon.c
media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm
media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c
media/libvpx/vp8/common/arm/neon/dequant_idct_neon.asm
media/libvpx/vp8/common/arm/neon/dequant_idct_neon.c
media/libvpx/vp8/common/arm/neon/dequantizeb_neon.asm
media/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c
media/libvpx/vp8/common/arm/neon/iwalsh_neon.asm
media/libvpx/vp8/common/arm/neon/iwalsh_neon.c
media/libvpx/vp8/common/arm/neon/loopfilter_neon.asm
media/libvpx/vp8/common/arm/neon/loopfilter_neon.c
media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
media/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
media/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c
media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.asm
media/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c
media/libvpx/vp8/common/arm/neon/reconintra_neon.c
media/libvpx/vp8/common/arm/neon/sad16_neon.asm
media/libvpx/vp8/common/arm/neon/sad8_neon.asm
media/libvpx/vp8/common/arm/neon/sad_neon.c
media/libvpx/vp8/common/arm/neon/save_reg_neon.asm
media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm
media/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c
media/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm
media/libvpx/vp8/common/arm/neon/sixtappredict4x4_neon.asm
media/libvpx/vp8/common/arm/neon/sixtappredict8x4_neon.asm
media/libvpx/vp8/common/arm/neon/sixtappredict8x8_neon.asm
media/libvpx/vp8/common/arm/neon/sixtappredict_neon.c
media/libvpx/vp8/common/arm/neon/variance_neon.asm
media/libvpx/vp8/common/arm/neon/variance_neon.c
media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
media/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
media/libvpx/vp8/common/arm/reconintra_arm.c
media/libvpx/vp8/common/blockd.h
media/libvpx/vp8/common/coefupdateprobs.h
media/libvpx/vp8/common/common.h
media/libvpx/vp8/common/default_coef_probs.h
media/libvpx/vp8/common/entropy.h
media/libvpx/vp8/common/entropymode.h
media/libvpx/vp8/common/entropymv.h
media/libvpx/vp8/common/extend.h
media/libvpx/vp8/common/filter.h
media/libvpx/vp8/common/findnearmv.h
media/libvpx/vp8/common/header.h
media/libvpx/vp8/common/invtrans.h
media/libvpx/vp8/common/loopfilter.c
media/libvpx/vp8/common/loopfilter.h
media/libvpx/vp8/common/modecont.h
media/libvpx/vp8/common/mv.h
media/libvpx/vp8/common/onyx.h
media/libvpx/vp8/common/onyxc_int.h
media/libvpx/vp8/common/onyxd.h
media/libvpx/vp8/common/postproc.c
media/libvpx/vp8/common/postproc.h
media/libvpx/vp8/common/ppflags.h
media/libvpx/vp8/common/pragmas.h
media/libvpx/vp8/common/quant_common.h
media/libvpx/vp8/common/reconinter.h
media/libvpx/vp8/common/reconintra4x4.h
media/libvpx/vp8/common/setupintrarecon.h
media/libvpx/vp8/common/swapyv12buffer.h
media/libvpx/vp8/common/systemdependent.h
media/libvpx/vp8/common/threading.h
media/libvpx/vp8/common/treecoder.h
media/libvpx/vp8/common/variance.h
media/libvpx/vp8/common/vp8_entropymodedata.h
media/libvpx/vp8/common/x86/filter_x86.h
media/libvpx/vp8/common/x86/loopfilter_block_sse2.asm
media/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm
media/libvpx/vp8/common/x86/loopfilter_mmx.asm
media/libvpx/vp8/common/x86/loopfilter_sse2.asm
media/libvpx/vp8/common/x86/postproc_mmx.asm
media/libvpx/vp8/common/x86/postproc_sse2.asm
media/libvpx/vp8/common/x86/postproc_x86.c
media/libvpx/vp8/common/x86/recon_sse2.asm
media/libvpx/vp8/common/x86/variance_impl_mmx.asm
media/libvpx/vp8/common/x86/variance_mmx.c
media/libvpx/vp8/common/x86/variance_sse2.c
media/libvpx/vp8/common/x86/variance_ssse3.c
media/libvpx/vp8/decoder/dboolhuff.c
media/libvpx/vp8/decoder/dboolhuff.h
media/libvpx/vp8/decoder/decodeframe.c
media/libvpx/vp8/decoder/decodemv.h
media/libvpx/vp8/decoder/decoderthreading.h
media/libvpx/vp8/decoder/decodframe.c
media/libvpx/vp8/decoder/detokenize.h
media/libvpx/vp8/decoder/ec_types.h
media/libvpx/vp8/decoder/error_concealment.c
media/libvpx/vp8/decoder/error_concealment.h
media/libvpx/vp8/decoder/onyxd_if.c
media/libvpx/vp8/decoder/onyxd_int.h
media/libvpx/vp8/decoder/treereader.h
media/libvpx/vp8/encoder/arm/neon/denoising_neon.c
media/libvpx/vp8/encoder/arm/neon/picklpf_arm.c
media/libvpx/vp8/encoder/arm/neon/shortfdct_neon.asm
media/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c
media/libvpx/vp8/encoder/arm/neon/subtract_neon.asm
media/libvpx/vp8/encoder/arm/neon/subtract_neon.c
media/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
media/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c
media/libvpx/vp8/encoder/bitstream.c
media/libvpx/vp8/encoder/bitstream.h
media/libvpx/vp8/encoder/block.h
media/libvpx/vp8/encoder/boolhuff.h
media/libvpx/vp8/encoder/dct_value_cost.h
media/libvpx/vp8/encoder/dct_value_tokens.h
media/libvpx/vp8/encoder/defaultcoefcounts.h
media/libvpx/vp8/encoder/denoising.c
media/libvpx/vp8/encoder/denoising.h
media/libvpx/vp8/encoder/encodeframe.c
media/libvpx/vp8/encoder/encodeframe.h
media/libvpx/vp8/encoder/encodeintra.h
media/libvpx/vp8/encoder/encodemb.h
media/libvpx/vp8/encoder/encodemv.h
media/libvpx/vp8/encoder/ethreading.c
media/libvpx/vp8/encoder/firstpass.c
media/libvpx/vp8/encoder/firstpass.h
media/libvpx/vp8/encoder/lookahead.h
media/libvpx/vp8/encoder/mcomp.c
media/libvpx/vp8/encoder/mcomp.h
media/libvpx/vp8/encoder/modecosts.h
media/libvpx/vp8/encoder/mr_dissim.c
media/libvpx/vp8/encoder/mr_dissim.h
media/libvpx/vp8/encoder/onyx_if.c
media/libvpx/vp8/encoder/onyx_int.h
media/libvpx/vp8/encoder/pickinter.c
media/libvpx/vp8/encoder/pickinter.h
media/libvpx/vp8/encoder/picklpf.c
media/libvpx/vp8/encoder/psnr.c
media/libvpx/vp8/encoder/psnr.h
media/libvpx/vp8/encoder/quantize.h
media/libvpx/vp8/encoder/ratectrl.c
media/libvpx/vp8/encoder/ratectrl.h
media/libvpx/vp8/encoder/rdopt.c
media/libvpx/vp8/encoder/rdopt.h
media/libvpx/vp8/encoder/segmentation.h
media/libvpx/vp8/encoder/temporal_filter.c
media/libvpx/vp8/encoder/tokenize.c
media/libvpx/vp8/encoder/tokenize.h
media/libvpx/vp8/encoder/treewriter.h
media/libvpx/vp8/encoder/x86/denoising_sse2.c
media/libvpx/vp8/encoder/x86/quantize_sse2.c
media/libvpx/vp8/encoder/x86/quantize_sse4.asm
media/libvpx/vp8/encoder/x86/quantize_sse4.c
media/libvpx/vp8/encoder/x86/quantize_ssse3.asm
media/libvpx/vp8/encoder/x86/quantize_ssse3.c
media/libvpx/vp8/encoder/x86/ssim_opt_x86_64.asm
media/libvpx/vp8/vp8_cx_iface.c
media/libvpx/vp8/vp8_dx_iface.c
media/libvpx/vp8_rtcd_armv7-android-gcc.h
media/libvpx/vp8_rtcd_generic-gnu.h
media/libvpx/vp8_rtcd_x86-darwin9-gcc.h
media/libvpx/vp8_rtcd_x86-linux-gcc.h
media/libvpx/vp8_rtcd_x86-win32-gcc.h
media/libvpx/vp8_rtcd_x86-win32-vs8.h
media/libvpx/vp8_rtcd_x86_64-darwin9-gcc.h
media/libvpx/vp8_rtcd_x86_64-linux-gcc.h
media/libvpx/vp8_rtcd_x86_64-win64-gcc.h
media/libvpx/vp8_rtcd_x86_64-win64-vs8.h
media/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c
media/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_idct16x16_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_idct4x4_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
media/libvpx/vp9/common/arm/neon/vp9_loopfilter_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
media/libvpx/vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
media/libvpx/vp9/common/generic/vp9_systemdependent.c
media/libvpx/vp9/common/vp9_alloccommon.c
media/libvpx/vp9/common/vp9_alloccommon.h
media/libvpx/vp9/common/vp9_blockd.c
media/libvpx/vp9/common/vp9_blockd.h
media/libvpx/vp9/common/vp9_common.h
media/libvpx/vp9/common/vp9_common_data.c
media/libvpx/vp9/common/vp9_common_data.h
media/libvpx/vp9/common/vp9_convolve.c
media/libvpx/vp9/common/vp9_convolve.h
media/libvpx/vp9/common/vp9_debugmodes.c
media/libvpx/vp9/common/vp9_default_coef_probs.h
media/libvpx/vp9/common/vp9_entropy.c
media/libvpx/vp9/common/vp9_entropy.h
media/libvpx/vp9/common/vp9_entropymode.c
media/libvpx/vp9/common/vp9_entropymode.h
media/libvpx/vp9/common/vp9_entropymv.c
media/libvpx/vp9/common/vp9_entropymv.h
media/libvpx/vp9/common/vp9_enums.h
media/libvpx/vp9/common/vp9_extend.c
media/libvpx/vp9/common/vp9_extend.h
media/libvpx/vp9/common/vp9_filter.c
media/libvpx/vp9/common/vp9_filter.h
media/libvpx/vp9/common/vp9_findnearmv.c
media/libvpx/vp9/common/vp9_findnearmv.h
media/libvpx/vp9/common/vp9_frame_buffers.c
media/libvpx/vp9/common/vp9_frame_buffers.h
media/libvpx/vp9/common/vp9_idct.c
media/libvpx/vp9/common/vp9_idct.h
media/libvpx/vp9/common/vp9_loopfilter.c
media/libvpx/vp9/common/vp9_loopfilter.h
media/libvpx/vp9/common/vp9_loopfilter_filters.c
media/libvpx/vp9/common/vp9_mv.h
media/libvpx/vp9/common/vp9_mvref_common.c
media/libvpx/vp9/common/vp9_mvref_common.h
media/libvpx/vp9/common/vp9_onyx.h
media/libvpx/vp9/common/vp9_onyxc_int.h
media/libvpx/vp9/common/vp9_postproc.c
media/libvpx/vp9/common/vp9_postproc.h
media/libvpx/vp9/common/vp9_ppflags.h
media/libvpx/vp9/common/vp9_pragmas.h
media/libvpx/vp9/common/vp9_pred_common.c
media/libvpx/vp9/common/vp9_pred_common.h
media/libvpx/vp9/common/vp9_prob.c
media/libvpx/vp9/common/vp9_prob.h
media/libvpx/vp9/common/vp9_quant_common.c
media/libvpx/vp9/common/vp9_quant_common.h
media/libvpx/vp9/common/vp9_reconinter.c
media/libvpx/vp9/common/vp9_reconinter.h
media/libvpx/vp9/common/vp9_reconintra.c
media/libvpx/vp9/common/vp9_reconintra.h
media/libvpx/vp9/common/vp9_sadmxn.h
media/libvpx/vp9/common/vp9_scale.c
media/libvpx/vp9/common/vp9_scale.h
media/libvpx/vp9/common/vp9_scan.c
media/libvpx/vp9/common/vp9_scan.h
media/libvpx/vp9/common/vp9_seg_common.c
media/libvpx/vp9/common/vp9_seg_common.h
media/libvpx/vp9/common/vp9_systemdependent.h
media/libvpx/vp9/common/vp9_thread.c
media/libvpx/vp9/common/vp9_thread.h
media/libvpx/vp9/common/vp9_tile_common.c
media/libvpx/vp9/common/vp9_tile_common.h
media/libvpx/vp9/common/vp9_treecoder.c
media/libvpx/vp9/common/vp9_treecoder.h
media/libvpx/vp9/common/x86/vp9_asm_stubs.c
media/libvpx/vp9/common/x86/vp9_copy_sse2.asm
media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
media/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h
media/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c
media/libvpx/vp9/common/x86/vp9_idct_ssse3_x86_64.asm
media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
media/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
media/libvpx/vp9/common/x86/vp9_loopfilter_mmx.asm
media/libvpx/vp9/common/x86/vp9_postproc_mmx.asm
media/libvpx/vp9/common/x86/vp9_postproc_sse2.asm
media/libvpx/vp9/common/x86/vp9_postproc_x86.h
media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
media/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
media/libvpx/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm
media/libvpx/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
media/libvpx/vp9/decoder/vp9_dboolhuff.c
media/libvpx/vp9/decoder/vp9_dboolhuff.h
media/libvpx/vp9/decoder/vp9_decodeframe.c
media/libvpx/vp9/decoder/vp9_decodeframe.h
media/libvpx/vp9/decoder/vp9_decodemv.c
media/libvpx/vp9/decoder/vp9_decodemv.h
media/libvpx/vp9/decoder/vp9_decoder.c
media/libvpx/vp9/decoder/vp9_decoder.h
media/libvpx/vp9/decoder/vp9_decodframe.c
media/libvpx/vp9/decoder/vp9_decodframe.h
media/libvpx/vp9/decoder/vp9_detokenize.c
media/libvpx/vp9/decoder/vp9_detokenize.h
media/libvpx/vp9/decoder/vp9_dsubexp.c
media/libvpx/vp9/decoder/vp9_dsubexp.h
media/libvpx/vp9/decoder/vp9_dthread.c
media/libvpx/vp9/decoder/vp9_dthread.h
media/libvpx/vp9/decoder/vp9_onyxd.h
media/libvpx/vp9/decoder/vp9_onyxd_if.c
media/libvpx/vp9/decoder/vp9_onyxd_int.h
media/libvpx/vp9/decoder/vp9_read_bit_buffer.c
media/libvpx/vp9/decoder/vp9_read_bit_buffer.h
media/libvpx/vp9/decoder/vp9_reader.c
media/libvpx/vp9/decoder/vp9_reader.h
media/libvpx/vp9/decoder/vp9_thread.c
media/libvpx/vp9/decoder/vp9_thread.h
media/libvpx/vp9/decoder/vp9_treereader.h
media/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c
media/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
media/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c
media/libvpx/vp9/encoder/arm/neon/vp9_subtract_neon.c
media/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
media/libvpx/vp9/encoder/vp9_aq_complexity.c
media/libvpx/vp9/encoder/vp9_aq_complexity.h
media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
media/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
media/libvpx/vp9/encoder/vp9_aq_variance.c
media/libvpx/vp9/encoder/vp9_aq_variance.h
media/libvpx/vp9/encoder/vp9_bitstream.c
media/libvpx/vp9/encoder/vp9_bitstream.h
media/libvpx/vp9/encoder/vp9_block.h
media/libvpx/vp9/encoder/vp9_boolhuff.c
media/libvpx/vp9/encoder/vp9_boolhuff.h
media/libvpx/vp9/encoder/vp9_context_tree.c
media/libvpx/vp9/encoder/vp9_context_tree.h
media/libvpx/vp9/encoder/vp9_cost.c
media/libvpx/vp9/encoder/vp9_cost.h
media/libvpx/vp9/encoder/vp9_dct.c
media/libvpx/vp9/encoder/vp9_dct.h
media/libvpx/vp9/encoder/vp9_denoiser.c
media/libvpx/vp9/encoder/vp9_denoiser.h
media/libvpx/vp9/encoder/vp9_encodeframe.c
media/libvpx/vp9/encoder/vp9_encodeframe.h
media/libvpx/vp9/encoder/vp9_encodeintra.c
media/libvpx/vp9/encoder/vp9_encodeintra.h
media/libvpx/vp9/encoder/vp9_encodemb.c
media/libvpx/vp9/encoder/vp9_encodemb.h
media/libvpx/vp9/encoder/vp9_encodemv.c
media/libvpx/vp9/encoder/vp9_encodemv.h
media/libvpx/vp9/encoder/vp9_encoder.c
media/libvpx/vp9/encoder/vp9_encoder.h
media/libvpx/vp9/encoder/vp9_extend.c
media/libvpx/vp9/encoder/vp9_extend.h
media/libvpx/vp9/encoder/vp9_firstpass.c
media/libvpx/vp9/encoder/vp9_firstpass.h
media/libvpx/vp9/encoder/vp9_lookahead.c
media/libvpx/vp9/encoder/vp9_lookahead.h
media/libvpx/vp9/encoder/vp9_mbgraph.c
media/libvpx/vp9/encoder/vp9_mbgraph.h
media/libvpx/vp9/encoder/vp9_mcomp.c
media/libvpx/vp9/encoder/vp9_mcomp.h
media/libvpx/vp9/encoder/vp9_modecosts.c
media/libvpx/vp9/encoder/vp9_modecosts.h
media/libvpx/vp9/encoder/vp9_onyx_if.c
media/libvpx/vp9/encoder/vp9_onyx_int.h
media/libvpx/vp9/encoder/vp9_picklpf.c
media/libvpx/vp9/encoder/vp9_picklpf.h
media/libvpx/vp9/encoder/vp9_pickmode.c
media/libvpx/vp9/encoder/vp9_pickmode.h
media/libvpx/vp9/encoder/vp9_psnr.c
media/libvpx/vp9/encoder/vp9_psnr.h
media/libvpx/vp9/encoder/vp9_quantize.c
media/libvpx/vp9/encoder/vp9_quantize.h
media/libvpx/vp9/encoder/vp9_ratectrl.c
media/libvpx/vp9/encoder/vp9_ratectrl.h
media/libvpx/vp9/encoder/vp9_rd.c
media/libvpx/vp9/encoder/vp9_rd.h
media/libvpx/vp9/encoder/vp9_rdopt.c
media/libvpx/vp9/encoder/vp9_rdopt.h
media/libvpx/vp9/encoder/vp9_resize.c
media/libvpx/vp9/encoder/vp9_resize.h
media/libvpx/vp9/encoder/vp9_sad.c
media/libvpx/vp9/encoder/vp9_sad_c.c
media/libvpx/vp9/encoder/vp9_segmentation.c
media/libvpx/vp9/encoder/vp9_segmentation.h
media/libvpx/vp9/encoder/vp9_speed_features.c
media/libvpx/vp9/encoder/vp9_speed_features.h
media/libvpx/vp9/encoder/vp9_ssim.h
media/libvpx/vp9/encoder/vp9_subexp.c
media/libvpx/vp9/encoder/vp9_subexp.h
media/libvpx/vp9/encoder/vp9_svc_layercontext.c
media/libvpx/vp9/encoder/vp9_svc_layercontext.h
media/libvpx/vp9/encoder/vp9_temporal_filter.c
media/libvpx/vp9/encoder/vp9_temporal_filter.h
media/libvpx/vp9/encoder/vp9_tokenize.c
media/libvpx/vp9/encoder/vp9_tokenize.h
media/libvpx/vp9/encoder/vp9_treewriter.c
media/libvpx/vp9/encoder/vp9_treewriter.h
media/libvpx/vp9/encoder/vp9_vaq.c
media/libvpx/vp9/encoder/vp9_vaq.h
media/libvpx/vp9/encoder/vp9_variance.c
media/libvpx/vp9/encoder/vp9_variance.h
media/libvpx/vp9/encoder/vp9_variance_c.c
media/libvpx/vp9/encoder/vp9_write_bit_buffer.c
media/libvpx/vp9/encoder/vp9_write_bit_buffer.h
media/libvpx/vp9/encoder/vp9_writer.c
media/libvpx/vp9/encoder/vp9_writer.h
media/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c
media/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c
media/libvpx/vp9/encoder/x86/vp9_dct_avx2.c
media/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm
media/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
media/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
media/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
media/libvpx/vp9/encoder/x86/vp9_mcomp_x86.h
media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm
media/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
media/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
media/libvpx/vp9/encoder/x86/vp9_sad_mmx.asm
media/libvpx/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
media/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
media/libvpx/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
media/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
media/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c
media/libvpx/vp9/encoder/x86/vp9_variance_impl_mmx.asm
media/libvpx/vp9/encoder/x86/vp9_variance_impl_sse2.asm
media/libvpx/vp9/encoder/x86/vp9_variance_mmx.c
media/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
media/libvpx/vp9/vp9_cx_iface.c
media/libvpx/vp9/vp9_dx_iface.c
media/libvpx/vp9/vp9_iface_common.h
media/libvpx/vp9_rtcd_armv7-android-gcc.h
media/libvpx/vp9_rtcd_generic-gnu.h
media/libvpx/vp9_rtcd_x86-darwin9-gcc.h
media/libvpx/vp9_rtcd_x86-linux-gcc.h
media/libvpx/vp9_rtcd_x86-win32-gcc.h
media/libvpx/vp9_rtcd_x86-win32-vs8.h
media/libvpx/vp9_rtcd_x86_64-darwin9-gcc.h
media/libvpx/vp9_rtcd_x86_64-linux-gcc.h
media/libvpx/vp9_rtcd_x86_64-win64-gcc.h
media/libvpx/vp9_rtcd_x86_64-win64-vs8.h
media/libvpx/vpx/internal/vpx_codec_internal.h
media/libvpx/vpx/internal/vpx_psnr.h
media/libvpx/vpx/src/svc_encodeframe.c
media/libvpx/vpx/src/vpx_codec.c
media/libvpx/vpx/src/vpx_decoder.c
media/libvpx/vpx/src/vpx_encoder.c
media/libvpx/vpx/src/vpx_image.c
media/libvpx/vpx/src/vpx_psnr.c
media/libvpx/vpx/svc_context.h
media/libvpx/vpx/vp8.h
media/libvpx/vpx/vp8cx.h
media/libvpx/vpx/vp8dx.h
media/libvpx/vpx/vpx_codec.h
media/libvpx/vpx/vpx_decoder.h
media/libvpx/vpx/vpx_encoder.h
media/libvpx/vpx/vpx_frame_buffer.h
media/libvpx/vpx/vpx_image.h
media/libvpx/vpx/vpx_integer.h
media/libvpx/vpx_config_armv7-android-gcc.asm
media/libvpx/vpx_config_armv7-android-gcc.h
media/libvpx/vpx_config_generic-gnu.asm
media/libvpx/vpx_config_generic-gnu.h
media/libvpx/vpx_config_x86-darwin9-gcc.asm
media/libvpx/vpx_config_x86-darwin9-gcc.h
media/libvpx/vpx_config_x86-linux-gcc.asm
media/libvpx/vpx_config_x86-linux-gcc.h
media/libvpx/vpx_config_x86-win32-gcc.asm
media/libvpx/vpx_config_x86-win32-gcc.h
media/libvpx/vpx_config_x86-win32-vs8.asm
media/libvpx/vpx_config_x86-win32-vs8.h
media/libvpx/vpx_config_x86_64-darwin9-gcc.asm
media/libvpx/vpx_config_x86_64-darwin9-gcc.h
media/libvpx/vpx_config_x86_64-linux-gcc.asm
media/libvpx/vpx_config_x86_64-linux-gcc.h
media/libvpx/vpx_config_x86_64-win64-gcc.asm
media/libvpx/vpx_config_x86_64-win64-gcc.h
media/libvpx/vpx_config_x86_64-win64-vs8.asm
media/libvpx/vpx_config_x86_64-win64-vs8.h
media/libvpx/vpx_mem/include/vpx_mem_intrnl.h
media/libvpx/vpx_mem/include/vpx_mem_tracker.h
media/libvpx/vpx_mem/vpx_mem.c
media/libvpx/vpx_mem/vpx_mem.h
media/libvpx/vpx_ports/arm.h
media/libvpx/vpx_ports/arm_cpudetect.c
media/libvpx/vpx_ports/asm_offsets.h
media/libvpx/vpx_ports/config.h
media/libvpx/vpx_ports/emmintrin_compat.h
media/libvpx/vpx_ports/mem.h
media/libvpx/vpx_ports/mem_ops.h
media/libvpx/vpx_ports/mem_ops_aligned.h
media/libvpx/vpx_ports/vpx_once.h
media/libvpx/vpx_ports/vpx_timer.h
media/libvpx/vpx_ports/x86.h
media/libvpx/vpx_ports/x86_abi_support.asm
media/libvpx/vpx_ports/x86_cpuid.c
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
media/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
media/libvpx/vpx_scale/arm/neon/yv12extend_arm.c
media/libvpx/vpx_scale/generic/yv12config.c
media/libvpx/vpx_scale/generic/yv12extend.c
media/libvpx/vpx_scale/vpx_scale.h
media/libvpx/vpx_scale/yv12config.h
media/libvpx/vpx_scale_rtcd_armv7-android-gcc.h
media/libvpx/vpx_scale_rtcd_generic-gnu.h
media/libvpx/vpx_scale_rtcd_x86-darwin9-gcc.h
media/libvpx/vpx_scale_rtcd_x86-linux-gcc.h
media/libvpx/vpx_scale_rtcd_x86-win32-gcc.h
media/libvpx/vpx_scale_rtcd_x86-win32-vs8.h
media/libvpx/vpx_scale_rtcd_x86_64-darwin9-gcc.h
media/libvpx/vpx_scale_rtcd_x86_64-linux-gcc.h
media/libvpx/vpx_scale_rtcd_x86_64-win64-gcc.h
media/libvpx/vpx_scale_rtcd_x86_64-win64-vs8.h
media/libvpx/vpx_version.h
--- a/media/libvpx/PATENTS
+++ b/media/libvpx/PATENTS
@@ -1,22 +1,23 @@
 Additional IP Rights Grant (Patents)
+------------------------------------
 
-"This implementation" means the copyrightable works distributed by
-Google as part of the WebM Project.
+"These implementations" means the copyrightable works that implement the WebM
+codecs distributed by Google as part of the WebM Project.
 
-Google hereby grants to you a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable (except as stated in this section)
-patent license to make, have made, use, offer to sell, sell, import,
-transfer, and otherwise run, modify and propagate the contents of this
-implementation of VP8, where such license applies only to those patent
-claims, both currently owned by Google and acquired in the future,
-licensable by Google that are necessarily infringed by this
-implementation of VP8. This grant does not include claims that would be
-infringed only as a consequence of further modification of this
-implementation. If you or your agent or exclusive licensee institute or
-order or agree to the institution of patent litigation against any
-entity (including a cross-claim or counterclaim in a lawsuit) alleging
-that this implementation of VP8 or any code incorporated within this
-implementation of VP8 constitutes direct or contributory patent
-infringement, or inducement of patent infringement, then any patent
-rights granted to you under this License for this implementation of VP8
-shall terminate as of the date such litigation is filed.
+Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
+royalty-free, irrevocable (except as stated in this section) patent license to
+make, have made, use, offer to sell, sell, import, transfer, and otherwise
+run, modify and propagate the contents of these implementations of WebM, where
+such license applies only to those patent claims, both currently owned by
+Google and acquired in the future, licensable by Google that are necessarily
+infringed by these implementations of WebM. This grant does not include claims
+that would be infringed only as a consequence of further modification of these
+implementations. If you or your agent or exclusive licensee institute or order
+or agree to the institution of patent litigation or any other patent
+enforcement activity against any entity (including a cross-claim or
+counterclaim in a lawsuit) alleging that any of these implementations of WebM
+or any code incorporated within any of these implementations of WebM
+constitutes direct or contributory patent infringement, or inducement of
+patent infringement, then any patent rights granted to you under this License
+for these implementations of WebM shall terminate as of the date such
+litigation is filed.
--- a/media/libvpx/README_MOZILLA
+++ b/media/libvpx/README_MOZILLA
@@ -3,9 +3,9 @@ git repository using the update.py scrip
 made were those applied by update.py and the addition of
 moz.build and Makefile.in build files for the
 Mozilla build system.
 
 The libvpx git repository is:
 
     https://gerrit.chromium.org/gerrit/webm/libvpx
 
-The git commit ID used was 2e88f2f2ec777259bda1714e72f1ecd2519bceb5
+The git commit ID used was c731d6a4f19eea861ceb2ff31399420b2452eb74
--- a/media/libvpx/build/make/ads2gas.pl
+++ b/media/libvpx/build/make/ads2gas.pl
@@ -1,9 +1,9 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 ##
 ##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
 ##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
--- a/media/libvpx/build/make/obj_int_extract.c
+++ b/media/libvpx/build/make/obj_int_extract.c
@@ -16,41 +16,57 @@
 
 #include "vpx_config.h"
 #include "vpx/vpx_integer.h"
 
 typedef enum {
   OUTPUT_FMT_PLAIN,
   OUTPUT_FMT_RVDS,
   OUTPUT_FMT_GAS,
+  OUTPUT_FMT_C_HEADER,
 } output_fmt_t;
 
 int log_msg(const char *fmt, ...) {
   int res;
   va_list ap;
   va_start(ap, fmt);
   res = vfprintf(stderr, fmt, ap);
   va_end(ap);
   return res;
 }
 
 #if defined(__GNUC__) && __GNUC__
+
+#if defined(FORCE_PARSE_ELF)
+
+#if defined(__MACH__)
+#undef __MACH__
+#endif
+
+#if !defined(__ELF__)
+#define __ELF__
+#endif
+#endif
+
 #if defined(__MACH__)
 
 #include <mach-o/loader.h>
 #include <mach-o/nlist.h>
 
 int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
   switch (mode) {
     case OUTPUT_FMT_RVDS:
       printf("%-40s EQU %5d\n", name, val);
       return 0;
-    case  OUTPUT_FMT_GAS:
+    case OUTPUT_FMT_GAS:
       printf(".set %-40s, %5d\n", name, val);
       return 0;
+    case OUTPUT_FMT_C_HEADER:
+      printf("#define %-40s %5d\n", name, val);
+      return 0;
     default:
       log_msg("Unsupported mode: %d", mode);
       return 1;
   }
 }
 
 int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
   int i, j;
@@ -316,17 +332,17 @@ int parse_elf_section(elf_obj_t *elf, in
     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
   }
 
   return 0;
 bail:
   return 1;
 }
 
-char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
+const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
   if (elf->bits == 32) {
     Elf32_Shdr shdr;
 
     if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
       log_msg("Failed to parse ELF string table: section %d, index %d\n",
               s_idx, idx);
       return "";
     }
@@ -486,16 +502,23 @@ int parse_elf(uint8_t *buf, size_t sz, o
                 break;
               case OUTPUT_FMT_GAS:
                 printf(".equ %-40s, %5d\n",
                        parse_elf_string_table(&elf,
                                               shdr.sh_link,
                                               sym.st_name),
                        val);
                 break;
+              case OUTPUT_FMT_C_HEADER:
+                printf("#define %-40s %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
               default:
                 printf("%s = %d\n",
                        parse_elf_string_table(&elf,
                                               shdr.sh_link,
                                               sym.st_name),
                        val);
             }
           }
@@ -650,17 +673,21 @@ int parse_coff(uint8_t *buf, size_t sz) 
     sectionlist[i] = malloc(strlen(sectionname) + 1);
 
     if (sectionlist[i] == NULL) {
       log_msg("Allocating storage for %s failed\n", sectionname);
       goto bail;
     }
     strcpy(sectionlist[i], sectionname);
 
-    if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20);
+    // check if it's .rdata and is not a COMDAT section.
+    if (!strcmp(sectionname, ".rdata") &&
+        (get_le32(ptr + 36) & 0x1000) == 0) {
+      sectionrawdata_ptr = get_le32(ptr + 20);
+    }
 
     ptr += 40;
   }
 
   // log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
   // log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
 
   /*  The compiler puts the data with non-zero offset in .rdata section, but puts the data with
@@ -757,25 +784,28 @@ int main(int argc, char **argv) {
   long int file_size;
 
   if (argc < 2 || argc > 3) {
     fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
     fprintf(stderr, "  <obj file>\tobject file to parse\n");
     fprintf(stderr, "Output Formats:\n");
     fprintf(stderr, "  gas  - compatible with GNU assembler\n");
     fprintf(stderr, "  rvds - compatible with armasm\n");
+    fprintf(stderr, "  cheader - c/c++ header file\n");
     goto bail;
   }
 
   f = argv[2];
 
   if (!strcmp(argv[1], "rvds"))
     mode = OUTPUT_FMT_RVDS;
   else if (!strcmp(argv[1], "gas"))
     mode = OUTPUT_FMT_GAS;
+  else if (!strcmp(argv[1], "cheader"))
+    mode = OUTPUT_FMT_C_HEADER;
   else
     f = argv[1];
 
   fp = fopen(f, "rb");
 
   if (!fp) {
     perror("Unable to open file");
     goto bail;
--- a/media/libvpx/build/make/thumb.pm
+++ b/media/libvpx/build/make/thumb.pm
@@ -1,9 +1,9 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 ##
 ##  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
 ##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
@@ -19,17 +19,17 @@ sub FixThumbInstructions($$)
     # Write additions with shifts, such as "add r10, r11, lsl #8",
     # in three operand form, "add r10, r10, r11, lsl #8".
     s/(add\s+)(r\d+),\s*(r\d+),\s*(lsl #\d+)/$1$2, $2, $3, $4/g;
 
     # Convert additions with a non-constant shift into a sequence
     # with left shift, addition and a right shift (to restore the
     # register to the original value). Currently the right shift
     # isn't necessary in the code base since the values in these
-    # registers aren't used, but doing the shift for consitency.
+    # registers aren't used, but doing the shift for consistency.
     # This converts instructions such as "add r12, r12, r5, lsl r4"
     # into the sequence "lsl r5, r4", "add r12, r12, r5", "lsr r5, r4".
     s/^(\s*)(add)(\s+)(r\d+),\s*(r\d+),\s*(r\d+),\s*lsl (r\d+)/$1lsl$3$6, $7\n$1$2$3$4, $5, $6\n$1lsr$3$6, $7/g;
 
     # Convert loads with right shifts in the indexing into a
     # sequence of an add, load and sub. This converts
     # "ldrb r4, [r9, lr, asr #1]" into "add r9, r9, lr, asr #1",
     # "ldrb r9, [r9]", "sub r9, r9, lr, asr #1".
@@ -46,17 +46,17 @@ sub FixThumbInstructions($$)
     # "addne src, src, pstep, lsl #1". In a couple of cases where
     # this is used, it's used for two subsequent load instructions,
     # where a hand-written version of it could merge two subsequent
     # add and sub instructions.
     s/^(\s*)((ldr|str|pld)(ne)?)(\s+)(r\d+,\s*)?\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6\[$7\]\n$1add$4$5$7, $7, $8/g;
 
     # Convert register post indexing to a separate add instruction.
     # This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]",
-    # "add r0, r2".
+    # "addne r0, r0, r2".
     s/^(\s*)((ldr|str)(ne)?[bhd]?)(\s+)(\w+),(\s*\w+,)?\s*\[(\w+)\],\s*(\w+)/$1$2$5$6,$7 [$8]\n$1add$4$5$8, $8, $9/g;
 
     # Convert a conditional addition to the pc register into a series of
     # instructions. This converts "addlt pc, pc, r3, lsl #2" into
     # "itttt lt", "movlt.n r12, pc", "addlt.w r12, #12",
     # "addlt.w r12, r12, r3, lsl #2", "movlt.n pc, r12".
     # This assumes that r12 is free at this point.
     s/^(\s*)addlt(\s+)pc,\s*pc,\s*(\w+),\s*lsl\s*#(\d+)/$1itttt$2lt\n$1movlt.n$2r12, pc\n$1addlt.w$2r12, #12\n$1addlt.w$2r12, r12, $3, lsl #($4-$branch_shift_offset)\n$1movlt.n$2pc, r12/g;
--- a/media/libvpx/sources.mozbuild
+++ b/media/libvpx/sources.mozbuild
@@ -1,341 +1,342 @@
 files = {
- 'ARM_ASM': [
-    'vp8/common/arm/armv6/bilinearfilter_v6.asm',
-    'vp8/common/arm/armv6/copymem16x16_v6.asm',
-    'vp8/common/arm/armv6/copymem8x4_v6.asm',
-    'vp8/common/arm/armv6/copymem8x8_v6.asm',
-    'vp8/common/arm/armv6/dc_only_idct_add_v6.asm',
-    'vp8/common/arm/armv6/dequant_idct_v6.asm',
-    'vp8/common/arm/armv6/dequantize_v6.asm',
-    'vp8/common/arm/armv6/filter_v6.asm',
-    'vp8/common/arm/armv6/idct_blk_v6.c',
-    'vp8/common/arm/armv6/idct_v6.asm',
-    'vp8/common/arm/armv6/intra4x4_predict_v6.asm',
-    'vp8/common/arm/armv6/iwalsh_v6.asm',
-    'vp8/common/arm/armv6/loopfilter_v6.asm',
-    'vp8/common/arm/armv6/simpleloopfilter_v6.asm',
-    'vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
-    'vp8/common/arm/armv6/vp8_sad16x16_armv6.asm',
-    'vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
-    'vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
-    'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
-    'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
-    'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
-    'vp8/common/arm/bilinearfilter_arm.c',
-    'vp8/common/arm/dequantize_arm.c',
-    'vp8/common/arm/filter_arm.c',
-    'vp8/common/arm/loopfilter_arm.c',
-    'vp8/common/arm/neon/bilinearpredict16x16_neon.asm',
-    'vp8/common/arm/neon/bilinearpredict4x4_neon.asm',
-    'vp8/common/arm/neon/bilinearpredict8x4_neon.asm',
-    'vp8/common/arm/neon/bilinearpredict8x8_neon.asm',
-    'vp8/common/arm/neon/buildintrapredictorsmby_neon.asm',
-    'vp8/common/arm/neon/copymem16x16_neon.asm',
-    'vp8/common/arm/neon/copymem8x4_neon.asm',
-    'vp8/common/arm/neon/copymem8x8_neon.asm',
-    'vp8/common/arm/neon/dc_only_idct_add_neon.asm',
-    'vp8/common/arm/neon/dequant_idct_neon.asm',
-    'vp8/common/arm/neon/dequantizeb_neon.asm',
-    'vp8/common/arm/neon/idct_blk_neon.c',
-    'vp8/common/arm/neon/idct_dequant_0_2x_neon.asm',
-    'vp8/common/arm/neon/idct_dequant_full_2x_neon.asm',
-    'vp8/common/arm/neon/iwalsh_neon.asm',
-    'vp8/common/arm/neon/loopfilter_neon.asm',
-    'vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm',
-    'vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm',
-    'vp8/common/arm/neon/mbloopfilter_neon.asm',
-    'vp8/common/arm/neon/sad16_neon.asm',
-    'vp8/common/arm/neon/sad8_neon.asm',
-    'vp8/common/arm/neon/save_reg_neon.asm',
-    'vp8/common/arm/neon/shortidct4x4llm_neon.asm',
-    'vp8/common/arm/neon/sixtappredict16x16_neon.asm',
-    'vp8/common/arm/neon/sixtappredict4x4_neon.asm',
-    'vp8/common/arm/neon/sixtappredict8x4_neon.asm',
-    'vp8/common/arm/neon/sixtappredict8x8_neon.asm',
-    'vp8/common/arm/neon/variance_neon.asm',
-    'vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm',
-    'vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm',
-    'vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm',
-    'vp8/common/arm/reconintra_arm.c',
-    'vp8/common/arm/variance_arm.c',
-    'vp8/encoder/arm/armv5te/boolhuff_armv5te.asm',
-    'vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm',
-    'vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm',
-    'vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm',
-    'vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm',
-    'vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
-    'vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
-    'vp8/encoder/arm/armv6/vp8_subtract_armv6.asm',
-    'vp8/encoder/arm/armv6/walsh_v6.asm',
-    'vp8/encoder/arm/boolhuff_arm.c',
-    'vp8/encoder/arm/dct_arm.c',
-    'vp8/encoder/arm/neon/fastquantizeb_neon.asm',
-    'vp8/encoder/arm/neon/picklpf_arm.c',
-    'vp8/encoder/arm/neon/shortfdct_neon.asm',
-    'vp8/encoder/arm/neon/subtract_neon.asm',
-    'vp8/encoder/arm/neon/vp8_memcpy_neon.asm',
-    'vp8/encoder/arm/neon/vp8_mse16x16_neon.asm',
-    'vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm',
-    'vp8/encoder/arm/quantize_arm.c',
-    'vp9/common/arm/neon/vp9_avg_neon.asm',
-    'vp9/common/arm/neon/vp9_convolve8_avg_neon.asm',
-    'vp9/common/arm/neon/vp9_convolve8_neon.asm',
-    'vp9/common/arm/neon/vp9_convolve_neon.c',
-    'vp9/common/arm/neon/vp9_copy_neon.asm',
-    'vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm',
-    'vp9/common/arm/neon/vp9_idct16x16_neon.c',
-    'vp9/common/arm/neon/vp9_loopfilter_neon.asm',
-    'vp9/common/arm/neon/vp9_mb_lpf_neon.asm',
-    'vp9/common/arm/neon/vp9_save_reg_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm',
-    'vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm',
-    'vpx_ports/arm_cpudetect.c',
-    'vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm',
-    'vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm',
-    'vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm',
-    'vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm',
-    'vpx_scale/arm/neon/yv12extend_arm.c'
- ],
- 'AVX2': ['vp9/common/x86/vp9_loopfilter_intrin_avx2.c'],
- 'UNIFIED_SOURCES': [
-    'vp8/common/alloccommon.c',
-    'vp8/common/blockd.c',
-    'vp8/common/debugmodes.c',
-    'vp8/common/dequantize.c',
-    'vp8/common/entropy.c',
-    'vp8/common/entropymode.c',
-    'vp8/common/entropymv.c',
-    'vp8/common/extend.c',
-    'vp8/common/filter.c',
-    'vp8/common/findnearmv.c',
-    'vp8/common/generic/systemdependent.c',
-    'vp8/common/idct_blk.c',
-    'vp8/common/idctllm.c',
-    'vp8/common/loopfilter.c',
-    'vp8/common/loopfilter_filters.c',
-    'vp8/common/mbpitch.c',
-    'vp8/common/modecont.c',
-    'vp8/common/quant_common.c',
-    'vp8/common/reconinter.c',
-    'vp8/common/reconintra.c',
-    'vp8/common/reconintra4x4.c',
-    'vp8/common/setupintrarecon.c',
-    'vp8/common/swapyv12buffer.c',
-    'vp8/common/treecoder.c',
-    'vp8/common/variance_c.c',
-    'vp8/decoder/dboolhuff.c',
-    'vp8/decoder/decodemv.c',
-    'vp8/decoder/decodframe.c',
-    'vp8/decoder/detokenize.c',
-    'vp8/decoder/onyxd_if.c',
-    'vp8/decoder/threading.c',
-    'vp8/encoder/bitstream.c',
-    'vp8/encoder/dct.c',
-    'vp8/encoder/denoising.c',
-    'vp8/encoder/encodeframe.c',
-    'vp8/encoder/encodeintra.c',
-    'vp8/encoder/encodemb.c',
-    'vp8/encoder/encodemv.c',
-    'vp8/encoder/ethreading.c',
-    'vp8/encoder/firstpass.c',
-    'vp8/encoder/lookahead.c',
-    'vp8/encoder/mcomp.c',
-    'vp8/encoder/modecosts.c',
-    'vp8/encoder/mr_dissim.c',
-    'vp8/encoder/onyx_if.c',
-    'vp8/encoder/pickinter.c',
-    'vp8/encoder/picklpf.c',
-    'vp8/encoder/psnr.c',
-    'vp8/encoder/quantize.c',
-    'vp8/encoder/ratectrl.c',
-    'vp8/encoder/rdopt.c',
-    'vp8/encoder/segmentation.c',
-    'vp8/encoder/temporal_filter.c',
-    'vp8/encoder/tokenize.c',
-    'vp8/encoder/treewriter.c',
-    'vp8/vp8_cx_iface.c',
-    'vp9/common/generic/vp9_systemdependent.c',
-    'vp9/common/vp9_alloccommon.c',
-    'vp9/common/vp9_common_data.c',
-    'vp9/common/vp9_convolve.c',
-    'vp9/common/vp9_debugmodes.c',
-    'vp9/common/vp9_entropy.c',
-    'vp9/common/vp9_entropymode.c',
-    'vp9/common/vp9_extend.c',
-    'vp9/common/vp9_filter.c',
-    'vp9/common/vp9_findnearmv.c',
-    'vp9/common/vp9_idct.c',
-    'vp9/common/vp9_loopfilter.c',
-    'vp9/common/vp9_loopfilter_filters.c',
-    'vp9/common/vp9_mvref_common.c',
-    'vp9/common/vp9_pred_common.c',
-    'vp9/common/vp9_quant_common.c',
-    'vp9/common/vp9_reconinter.c',
-    'vp9/common/vp9_reconintra.c',
-    'vp9/common/vp9_scale.c',
-    'vp9/common/vp9_scan.c',
-    'vp9/common/vp9_seg_common.c',
-    'vp9/common/vp9_tile_common.c',
-    'vp9/common/vp9_treecoder.c',
-    'vp9/decoder/vp9_dboolhuff.c',
-    'vp9/decoder/vp9_decodemv.c',
-    'vp9/decoder/vp9_decodframe.c',
-    'vp9/decoder/vp9_detokenize.c',
-    'vp9/decoder/vp9_dsubexp.c',
-    'vp9/decoder/vp9_onyxd_if.c',
-    'vp9/decoder/vp9_thread.c',
-    'vp9/encoder/vp9_boolhuff.c',
-    'vp9/encoder/vp9_dct.c',
-    'vp9/encoder/vp9_encodeframe.c',
-    'vp9/encoder/vp9_encodeintra.c',
-    'vp9/encoder/vp9_encodemb.c',
-    'vp9/encoder/vp9_encodemv.c',
-    'vp9/encoder/vp9_firstpass.c',
-    'vp9/encoder/vp9_lookahead.c',
-    'vp9/encoder/vp9_mbgraph.c',
-    'vp9/encoder/vp9_mcomp.c',
-    'vp9/encoder/vp9_modecosts.c',
-    'vp9/encoder/vp9_onyx_if.c',
-    'vp9/encoder/vp9_picklpf.c',
-    'vp9/encoder/vp9_psnr.c',
-    'vp9/encoder/vp9_quantize.c',
-    'vp9/encoder/vp9_ratectrl.c',
-    'vp9/encoder/vp9_rdopt.c',
-    'vp9/encoder/vp9_sad_c.c',
-    'vp9/encoder/vp9_segmentation.c',
-    'vp9/encoder/vp9_subexp.c',
-    'vp9/encoder/vp9_temporal_filter.c',
-    'vp9/encoder/vp9_tokenize.c',
-    'vp9/encoder/vp9_treewriter.c',
-    'vp9/encoder/vp9_vaq.c',
-    'vp9/encoder/vp9_variance_c.c',
-    'vp9/vp9_cx_iface.c',
-    'vp9/vp9_dx_iface.c',
-    'vpx/src/vpx_codec.c',
-    'vpx/src/vpx_decoder.c',
-    'vpx/src/vpx_encoder.c',
-    'vpx/src/vpx_image.c',
-    'vpx_scale/generic/gen_scalers.c',
-    'vpx_scale/generic/vpx_scale.c',
-    'vpx_scale/generic/yv12config.c',
-    'vpx_scale/generic/yv12extend.c',
-    'vpx_scale/vpx_scale_rtcd.c'
-    ],
- 'SOURCES': [
-    'vp8/common/rtcd.c',
-    'vp8/common/sad_c.c',
-    'vp8/vp8_dx_iface.c',
-    'vp9/common/vp9_entropymv.c',
-    'vp9/common/vp9_rtcd.c',
-    'vp9/encoder/vp9_bitstream.c',
-    'vpx/src/svc_encodeframe.c',
-    'vpx_mem/vpx_mem.c',
-    ],
+ 'ARM_ASM': ['vp8/common/arm/armv6/bilinearfilter_v6.asm',
+             'vp8/common/arm/armv6/copymem16x16_v6.asm',
+             'vp8/common/arm/armv6/copymem8x4_v6.asm',
+             'vp8/common/arm/armv6/copymem8x8_v6.asm',
+             'vp8/common/arm/armv6/dc_only_idct_add_v6.asm',
+             'vp8/common/arm/armv6/dequant_idct_v6.asm',
+             'vp8/common/arm/armv6/dequantize_v6.asm',
+             'vp8/common/arm/armv6/filter_v6.asm',
+             'vp8/common/arm/armv6/idct_blk_v6.c',
+             'vp8/common/arm/armv6/idct_v6.asm',
+             'vp8/common/arm/armv6/intra4x4_predict_v6.asm',
+             'vp8/common/arm/armv6/iwalsh_v6.asm',
+             'vp8/common/arm/armv6/loopfilter_v6.asm',
+             'vp8/common/arm/armv6/simpleloopfilter_v6.asm',
+             'vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
+             'vp8/common/arm/armv6/vp8_sad16x16_armv6.asm',
+             'vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
+             'vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
+             'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
+             'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
+             'vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
+             'vp8/common/arm/bilinearfilter_arm.c',
+             'vp8/common/arm/dequantize_arm.c',
+             'vp8/common/arm/filter_arm.c',
+             'vp8/common/arm/loopfilter_arm.c',
+             'vp8/common/arm/neon/bilinearpredict_neon.c',
+             'vp8/common/arm/neon/copymem_neon.c',
+             'vp8/common/arm/neon/dc_only_idct_add_neon.c',
+             'vp8/common/arm/neon/dequant_idct_neon.c',
+             'vp8/common/arm/neon/dequantizeb_neon.c',
+             'vp8/common/arm/neon/idct_blk_neon.c',
+             'vp8/common/arm/neon/idct_dequant_0_2x_neon.c',
+             'vp8/common/arm/neon/idct_dequant_full_2x_neon.c',
+             'vp8/common/arm/neon/iwalsh_neon.c',
+             'vp8/common/arm/neon/loopfilter_neon.c',
+             'vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c',
+             'vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c',
+             'vp8/common/arm/neon/mbloopfilter_neon.c',
+             'vp8/common/arm/neon/reconintra_neon.c',
+             'vp8/common/arm/neon/sad_neon.c',
+             'vp8/common/arm/neon/shortidct4x4llm_neon.c',
+             'vp8/common/arm/neon/sixtappredict_neon.c',
+             'vp8/common/arm/neon/variance_neon.c',
+             'vp8/common/arm/neon/vp8_subpixelvariance_neon.c',
+             'vp8/common/arm/variance_arm.c',
+             'vp8/encoder/arm/armv5te/boolhuff_armv5te.asm',
+             'vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm',
+             'vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm',
+             'vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm',
+             'vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm',
+             'vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
+             'vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
+             'vp8/encoder/arm/armv6/vp8_subtract_armv6.asm',
+             'vp8/encoder/arm/armv6/walsh_v6.asm',
+             'vp8/encoder/arm/boolhuff_arm.c',
+             'vp8/encoder/arm/dct_arm.c',
+             'vp8/encoder/arm/neon/denoising_neon.c',
+             'vp8/encoder/arm/neon/fastquantizeb_neon.asm',
+             'vp8/encoder/arm/neon/shortfdct_neon.c',
+             'vp8/encoder/arm/neon/subtract_neon.c',
+             'vp8/encoder/arm/neon/vp8_mse16x16_neon.asm',
+             'vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c',
+             'vp8/encoder/arm/quantize_arm.c',
+             'vp9/common/arm/neon/vp9_avg_neon.asm',
+             'vp9/common/arm/neon/vp9_convolve8_avg_neon.asm',
+             'vp9/common/arm/neon/vp9_convolve8_neon.asm',
+             'vp9/common/arm/neon/vp9_convolve_neon.c',
+             'vp9/common/arm/neon/vp9_copy_neon.asm',
+             'vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct16x16_1_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct16x16_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct16x16_neon.c',
+             'vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct32x32_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct4x4_1_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct4x4_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct8x8_1_add_neon.asm',
+             'vp9/common/arm/neon/vp9_idct8x8_add_neon.asm',
+             'vp9/common/arm/neon/vp9_iht4x4_add_neon.asm',
+             'vp9/common/arm/neon/vp9_iht8x8_add_neon.asm',
+             'vp9/common/arm/neon/vp9_loopfilter_16_neon.asm',
+             'vp9/common/arm/neon/vp9_loopfilter_16_neon.c',
+             'vp9/common/arm/neon/vp9_loopfilter_neon.asm',
+             'vp9/common/arm/neon/vp9_mb_lpf_neon.asm',
+             'vp9/common/arm/neon/vp9_reconintra_neon.asm',
+             'vp9/common/arm/neon/vp9_save_reg_neon.asm',
+             'vp9/encoder/arm/neon/vp9_dct_neon.c',
+             'vp9/encoder/arm/neon/vp9_quantize_neon.c',
+             'vp9/encoder/arm/neon/vp9_sad_neon.c',
+             'vp9/encoder/arm/neon/vp9_subtract_neon.c',
+             'vp9/encoder/arm/neon/vp9_variance_neon.c',
+             'vpx_ports/arm_cpudetect.c'],
+ 'AVX2': ['vp9/common/x86/vp9_loopfilter_intrin_avx2.c',
+          'vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c',
+          'vp9/encoder/x86/vp9_dct32x32_avx2.c',
+          'vp9/encoder/x86/vp9_dct_avx2.c',
+          'vp9/encoder/x86/vp9_error_intrin_avx2.c',
+          'vp9/encoder/x86/vp9_sad4d_intrin_avx2.c',
+          'vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c',
+          'vp9/encoder/x86/vp9_variance_avx2.c',
+          'vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c'],
  'ERROR_CONCEALMENT': ['vp8/decoder/error_concealment.c'],
  'EXPORTS': ['vpx/vp8.h',
              'vpx/vp8cx.h',
              'vpx/vp8dx.h',
              'vpx/vpx_codec.h',
              'vpx/vpx_decoder.h',
              'vpx/vpx_encoder.h',
+             'vpx/vpx_frame_buffer.h',
              'vpx/vpx_image.h',
              'vpx/vpx_integer.h',
              'vpx_mem/include/vpx_mem_intrnl.h',
              'vpx_mem/vpx_mem.h',
              'vpx_ports/arm.h',
              'vpx_ports/mem.h',
              'vpx_ports/vpx_timer.h',
              'vpx_ports/x86.h',
              'vpx_scale/vpx_scale.h',
              'vpx_scale/yv12config.h'],
+ 'SOURCES': ['vp8/common/rtcd.c',
+             'vp8/common/sad_c.c',
+             'vp8/encoder/bitstream.c',
+             'vp8/encoder/onyx_if.c',
+             'vp8/vp8_dx_iface.c',
+             'vp9/common/vp9_alloccommon.c',
+             'vp9/common/vp9_blockd.c',
+             'vp9/common/vp9_common_data.c',
+             'vp9/common/vp9_convolve.c',
+             'vp9/common/vp9_debugmodes.c',
+             'vp9/common/vp9_entropy.c',
+             'vp9/common/vp9_entropymode.c',
+             'vp9/common/vp9_entropymv.c',
+             'vp9/common/vp9_filter.c',
+             'vp9/common/vp9_frame_buffers.c',
+             'vp9/common/vp9_idct.c',
+             'vp9/common/vp9_loopfilter.c',
+             'vp9/common/vp9_loopfilter_filters.c',
+             'vp9/common/vp9_mvref_common.c',
+             'vp9/common/vp9_pred_common.c',
+             'vp9/common/vp9_prob.c',
+             'vp9/common/vp9_quant_common.c',
+             'vp9/common/vp9_reconinter.c',
+             'vp9/common/vp9_reconintra.c',
+             'vp9/common/vp9_rtcd.c',
+             'vp9/common/vp9_scale.c',
+             'vp9/common/vp9_scan.c',
+             'vp9/common/vp9_seg_common.c',
+             'vp9/common/vp9_thread.c',
+             'vp9/common/vp9_tile_common.c',
+             'vp9/decoder/vp9_decodeframe.c',
+             'vp9/decoder/vp9_decodemv.c',
+             'vp9/decoder/vp9_decoder.c',
+             'vp9/decoder/vp9_detokenize.c',
+             'vp9/decoder/vp9_dsubexp.c',
+             'vp9/decoder/vp9_dthread.c',
+             'vp9/decoder/vp9_reader.c',
+             'vp9/encoder/vp9_aq_complexity.c',
+             'vp9/encoder/vp9_aq_cyclicrefresh.c',
+             'vp9/encoder/vp9_aq_variance.c',
+             'vp9/encoder/vp9_bitstream.c',
+             'vp9/encoder/vp9_context_tree.c',
+             'vp9/encoder/vp9_cost.c',
+             'vp9/encoder/vp9_dct.c',
+             'vp9/encoder/vp9_encodeframe.c',
+             'vp9/encoder/vp9_encodemb.c',
+             'vp9/encoder/vp9_encodemv.c',
+             'vp9/encoder/vp9_encoder.c',
+             'vp9/encoder/vp9_extend.c',
+             'vp9/encoder/vp9_firstpass.c',
+             'vp9/encoder/vp9_lookahead.c',
+             'vp9/encoder/vp9_mbgraph.c',
+             'vp9/encoder/vp9_mcomp.c',
+             'vp9/encoder/vp9_picklpf.c',
+             'vp9/encoder/vp9_pickmode.c',
+             'vp9/encoder/vp9_quantize.c',
+             'vp9/encoder/vp9_ratectrl.c',
+             'vp9/encoder/vp9_rd.c',
+             'vp9/encoder/vp9_rdopt.c',
+             'vp9/encoder/vp9_resize.c',
+             'vp9/encoder/vp9_sad.c',
+             'vp9/encoder/vp9_segmentation.c',
+             'vp9/encoder/vp9_speed_features.c',
+             'vp9/encoder/vp9_subexp.c',
+             'vp9/encoder/vp9_svc_layercontext.c',
+             'vp9/encoder/vp9_temporal_filter.c',
+             'vp9/encoder/vp9_tokenize.c',
+             'vp9/encoder/vp9_treewriter.c',
+             'vp9/encoder/vp9_variance.c',
+             'vp9/encoder/vp9_write_bit_buffer.c',
+             'vp9/encoder/vp9_writer.c',
+             'vp9/vp9_cx_iface.c',
+             'vp9/vp9_dx_iface.c',
+             'vpx/src/svc_encodeframe.c',
+             'vpx/src/vpx_encoder.c',
+             'vpx_mem/vpx_mem.c',
+             'vpx_scale/generic/yv12config.c',
+             'vpx_scale/generic/yv12extend.c',
+             'vpx_scale/vpx_scale_rtcd.c'],
+ 'UNIFIED_SOURCES': ['vp8/common/alloccommon.c',
+                     'vp8/common/blockd.c',
+                     'vp8/common/debugmodes.c',
+                     'vp8/common/dequantize.c',
+                     'vp8/common/entropy.c',
+                     'vp8/common/entropymode.c',
+                     'vp8/common/entropymv.c',
+                     'vp8/common/extend.c',
+                     'vp8/common/filter.c',
+                     'vp8/common/findnearmv.c',
+                     'vp8/common/generic/systemdependent.c',
+                     'vp8/common/idct_blk.c',
+                     'vp8/common/idctllm.c',
+                     'vp8/common/loopfilter.c',
+                     'vp8/common/loopfilter_filters.c',
+                     'vp8/common/mbpitch.c',
+                     'vp8/common/modecont.c',
+                     'vp8/common/quant_common.c',
+                     'vp8/common/reconinter.c',
+                     'vp8/common/reconintra.c',
+                     'vp8/common/reconintra4x4.c',
+                     'vp8/common/setupintrarecon.c',
+                     'vp8/common/swapyv12buffer.c',
+                     'vp8/common/treecoder.c',
+                     'vp8/common/variance_c.c',
+                     'vp8/decoder/dboolhuff.c',
+                     'vp8/decoder/decodeframe.c',
+                     'vp8/decoder/decodemv.c',
+                     'vp8/decoder/detokenize.c',
+                     'vp8/decoder/onyxd_if.c',
+                     'vp8/decoder/threading.c',
+                     'vp8/encoder/dct.c',
+                     'vp8/encoder/denoising.c',
+                     'vp8/encoder/encodeframe.c',
+                     'vp8/encoder/encodeintra.c',
+                     'vp8/encoder/encodemb.c',
+                     'vp8/encoder/encodemv.c',
+                     'vp8/encoder/ethreading.c',
+                     'vp8/encoder/firstpass.c',
+                     'vp8/encoder/lookahead.c',
+                     'vp8/encoder/mcomp.c',
+                     'vp8/encoder/modecosts.c',
+                     'vp8/encoder/mr_dissim.c',
+                     'vp8/encoder/pickinter.c',
+                     'vp8/encoder/picklpf.c',
+                     'vp8/encoder/quantize.c',
+                     'vp8/encoder/ratectrl.c',
+                     'vp8/encoder/rdopt.c',
+                     'vp8/encoder/segmentation.c',
+                     'vp8/encoder/temporal_filter.c',
+                     'vp8/encoder/tokenize.c',
+                     'vp8/encoder/treewriter.c',
+                     'vp8/vp8_cx_iface.c',
+                     'vp9/decoder/vp9_read_bit_buffer.c',
+                     'vpx/src/vpx_codec.c',
+                     'vpx/src/vpx_decoder.c',
+                     'vpx/src/vpx_image.c',
+                     'vpx/src/vpx_psnr.c',
+                     'vpx_scale/generic/gen_scalers.c',
+                     'vpx_scale/generic/vpx_scale.c'],
  'VP8_POSTPROC': ['vp8/common/mfqe.c', 'vp8/common/postproc.c'],
  'VP9_POSTPROC': ['vp9/common/vp9_postproc.c'],
  'X86-64_ASM': ['third_party/x86inc/x86inc.asm',
-                'vp8/common/x86/loopfilter_block_sse2.asm',
-                'vp9/encoder/x86/vp9_quantize_ssse3.asm'],
- 'X86_ASM': [
-    'vp8/common/x86/dequantize_mmx.asm',
-    'vp8/common/x86/filter_x86.c',
-    'vp8/common/x86/idct_blk_mmx.c',
-    'vp8/common/x86/idct_blk_sse2.c',
-    'vp8/common/x86/idctllm_mmx.asm',
-    'vp8/common/x86/idctllm_sse2.asm',
-    'vp8/common/x86/iwalsh_mmx.asm',
-    'vp8/common/x86/iwalsh_sse2.asm',
-    'vp8/common/x86/loopfilter_mmx.asm',
-    'vp8/common/x86/loopfilter_sse2.asm',
-    'vp8/common/x86/loopfilter_x86.c',
-    'vp8/common/x86/mfqe_sse2.asm',
-    'vp8/common/x86/postproc_mmx.asm',
-    'vp8/common/x86/postproc_sse2.asm',
-    'vp8/common/x86/postproc_x86.c',
-    'vp8/common/x86/recon_mmx.asm',
-    'vp8/common/x86/recon_sse2.asm',
-    'vp8/common/x86/recon_wrapper_sse2.c',
-    'vp8/common/x86/sad_mmx.asm',
-    'vp8/common/x86/sad_sse2.asm',
-    'vp8/common/x86/sad_sse3.asm',
-    'vp8/common/x86/sad_sse4.asm',
-    'vp8/common/x86/sad_ssse3.asm',
-    'vp8/common/x86/subpixel_mmx.asm',
-    'vp8/common/x86/subpixel_sse2.asm',
-    'vp8/common/x86/subpixel_ssse3.asm',
-    'vp8/common/x86/variance_impl_mmx.asm',
-    'vp8/common/x86/variance_impl_sse2.asm',
-    'vp8/common/x86/variance_impl_ssse3.asm',
-    'vp8/common/x86/variance_mmx.c',
-    'vp8/common/x86/variance_sse2.c',
-    'vp8/common/x86/variance_ssse3.c',
-    'vp8/common/x86/vp8_asm_stubs.c',
-    'vp8/encoder/x86/dct_mmx.asm',
-    'vp8/encoder/x86/dct_sse2.asm',
-    'vp8/encoder/x86/denoising_sse2.c',
-    'vp8/encoder/x86/encodeopt.asm',
-    'vp8/encoder/x86/fwalsh_sse2.asm',
-    'vp8/encoder/x86/quantize_mmx.asm',
-    'vp8/encoder/x86/quantize_sse2.c',
-    'vp8/encoder/x86/quantize_sse4.asm',
-    'vp8/encoder/x86/quantize_ssse3.asm',
-    'vp8/encoder/x86/subtract_mmx.asm',
-    'vp8/encoder/x86/subtract_sse2.asm',
-    'vp8/encoder/x86/temporal_filter_apply_sse2.asm',
-    'vp8/encoder/x86/vp8_enc_stubs_mmx.c',
-    'vp8/encoder/x86/vp8_enc_stubs_sse2.c',
-    'vp9/common/x86/vp9_asm_stubs.c',
-    'vp9/common/x86/vp9_copy_sse2.asm',
-    'vp9/common/x86/vp9_idct_intrin_sse2.c',
-    'vp9/common/x86/vp9_intrapred_sse2.asm',
-    'vp9/common/x86/vp9_intrapred_ssse3.asm',
-    'vp9/common/x86/vp9_loopfilter_intrin_sse2.c',
-    'vp9/common/x86/vp9_loopfilter_mmx.asm',
-    'vp9/common/x86/vp9_subpixel_8t_sse2.asm',
-    'vp9/common/x86/vp9_subpixel_8t_ssse3.asm',
-    'vp9/encoder/x86/vp9_dct32x32_sse2.c',
-    'vp9/encoder/x86/vp9_dct_sse2.c',
-    'vp9/encoder/x86/vp9_error_sse2.asm',
-    'vp9/encoder/x86/vp9_sad4d_sse2.asm',
-    'vp9/encoder/x86/vp9_sad_mmx.asm',
-    'vp9/encoder/x86/vp9_sad_sse2.asm',
-    'vp9/encoder/x86/vp9_sad_sse3.asm',
-    'vp9/encoder/x86/vp9_sad_sse4.asm',
-    'vp9/encoder/x86/vp9_sad_ssse3.asm',
-    'vp9/encoder/x86/vp9_subpel_variance.asm',
-    'vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm',
-    'vp9/encoder/x86/vp9_subtract_sse2.asm',
-    'vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm',
-    'vp9/encoder/x86/vp9_variance_impl_mmx.asm',
-    'vp9/encoder/x86/vp9_variance_impl_sse2.asm',
-    'vp9/encoder/x86/vp9_variance_mmx.c',
-    'vp9/encoder/x86/vp9_variance_sse2.c',
-    'vpx_ports/emms.asm',
-    'vpx_ports/x86_cpuid.c',
-    ]
+                'vp8/common/x86/loopfilter_block_sse2_x86_64.asm',
+                'vp8/encoder/x86/ssim_opt_x86_64.asm',
+                'vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm',
+                'vp9/encoder/x86/vp9_ssim_opt_x86_64.asm'],
+ 'X86_ASM': ['vp8/common/x86/dequantize_mmx.asm',
+             'vp8/common/x86/filter_x86.c',
+             'vp8/common/x86/idct_blk_mmx.c',
+             'vp8/common/x86/idct_blk_sse2.c',
+             'vp8/common/x86/idctllm_mmx.asm',
+             'vp8/common/x86/idctllm_sse2.asm',
+             'vp8/common/x86/iwalsh_mmx.asm',
+             'vp8/common/x86/iwalsh_sse2.asm',
+             'vp8/common/x86/loopfilter_mmx.asm',
+             'vp8/common/x86/loopfilter_sse2.asm',
+             'vp8/common/x86/loopfilter_x86.c',
+             'vp8/common/x86/mfqe_sse2.asm',
+             'vp8/common/x86/postproc_mmx.asm',
+             'vp8/common/x86/postproc_sse2.asm',
+             'vp8/common/x86/recon_mmx.asm',
+             'vp8/common/x86/recon_sse2.asm',
+             'vp8/common/x86/recon_wrapper_sse2.c',
+             'vp8/common/x86/sad_mmx.asm',
+             'vp8/common/x86/sad_sse2.asm',
+             'vp8/common/x86/sad_sse3.asm',
+             'vp8/common/x86/sad_sse4.asm',
+             'vp8/common/x86/sad_ssse3.asm',
+             'vp8/common/x86/subpixel_mmx.asm',
+             'vp8/common/x86/subpixel_sse2.asm',
+             'vp8/common/x86/subpixel_ssse3.asm',
+             'vp8/common/x86/variance_impl_mmx.asm',
+             'vp8/common/x86/variance_impl_sse2.asm',
+             'vp8/common/x86/variance_impl_ssse3.asm',
+             'vp8/common/x86/variance_mmx.c',
+             'vp8/common/x86/variance_sse2.c',
+             'vp8/common/x86/variance_ssse3.c',
+             'vp8/common/x86/vp8_asm_stubs.c',
+             'vp8/encoder/x86/dct_mmx.asm',
+             'vp8/encoder/x86/dct_sse2.asm',
+             'vp8/encoder/x86/denoising_sse2.c',
+             'vp8/encoder/x86/encodeopt.asm',
+             'vp8/encoder/x86/fwalsh_sse2.asm',
+             'vp8/encoder/x86/quantize_mmx.asm',
+             'vp8/encoder/x86/quantize_sse2.c',
+             'vp8/encoder/x86/quantize_sse4.c',
+             'vp8/encoder/x86/quantize_ssse3.c',
+             'vp8/encoder/x86/subtract_mmx.asm',
+             'vp8/encoder/x86/subtract_sse2.asm',
+             'vp8/encoder/x86/temporal_filter_apply_sse2.asm',
+             'vp8/encoder/x86/vp8_enc_stubs_mmx.c',
+             'vp8/encoder/x86/vp8_enc_stubs_sse2.c',
+             'vp9/common/x86/vp9_asm_stubs.c',
+             'vp9/common/x86/vp9_copy_sse2.asm',
+             'vp9/common/x86/vp9_idct_intrin_sse2.c',
+             'vp9/common/x86/vp9_idct_intrin_ssse3.c',
+             'vp9/common/x86/vp9_idct_ssse3_x86_64.asm',
+             'vp9/common/x86/vp9_intrapred_sse2.asm',
+             'vp9/common/x86/vp9_intrapred_ssse3.asm',
+             'vp9/common/x86/vp9_loopfilter_intrin_sse2.c',
+             'vp9/common/x86/vp9_loopfilter_mmx.asm',
+             'vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c',
+             'vp9/common/x86/vp9_subpixel_8t_sse2.asm',
+             'vp9/common/x86/vp9_subpixel_8t_ssse3.asm',
+             'vp9/common/x86/vp9_subpixel_bilinear_sse2.asm',
+             'vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm',
+             'vp9/encoder/x86/vp9_dct32x32_sse2.c',
+             'vp9/encoder/x86/vp9_dct_mmx.asm',
+             'vp9/encoder/x86/vp9_dct_sse2.c',
+             'vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm',
+             'vp9/encoder/x86/vp9_error_sse2.asm',
+             'vp9/encoder/x86/vp9_sad4d_sse2.asm',
+             'vp9/encoder/x86/vp9_sad_sse2.asm',
+             'vp9/encoder/x86/vp9_sad_sse3.asm',
+             'vp9/encoder/x86/vp9_sad_sse4.asm',
+             'vp9/encoder/x86/vp9_sad_ssse3.asm',
+             'vp9/encoder/x86/vp9_subpel_variance.asm',
+             'vp9/encoder/x86/vp9_subtract_sse2.asm',
+             'vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm',
+             'vp9/encoder/x86/vp9_variance_sse2.c',
+             'vpx_ports/emms.asm']
 }
--- a/media/libvpx/third_party/x86inc/x86inc.asm
+++ b/media/libvpx/third_party/x86inc/x86inc.asm
@@ -229,20 +229,20 @@ ALIGNMODE k7
     %define r%1d %3
     %define r%1w %4
     %define r%1b %5
     %if %0 == 5
         %define r%1m  %3
         %define r%1mp %2
     %elif ARCH_X86_64 ; memory
         %define r%1m [rsp + stack_offset + %6]
-        %define r%1mp qword r %+ %1m
+        %define r%1mp qword r %+ %1 %+ m
     %else
         %define r%1m [esp + stack_offset + %6]
-        %define r%1mp dword r %+ %1m
+        %define r%1mp dword r %+ %1 %+ m
     %endif
     %define r%1  %2
 %endmacro
 
 %macro DECLARE_REG_SIZE 2
     %define r%1q r%1
     %define e%1q r%1
     %define r%1d e%1
@@ -390,16 +390,33 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
         CAT_XDEFINE arg_name, %%i, %1
         %assign %%i %%i+1
         %rotate 1
     %endrep
     %xdefine stack_offset %%stack_offset
     %assign n_arg_names %0
 %endmacro
 
+%if ARCH_X86_64
+%macro ALLOC_STACK 2  ; stack_size, num_regs
+  %assign %%stack_aligment ((mmsize + 15) & ~15)
+  %assign stack_size_padded %1
+
+  %assign %%reg_num (%2 - 1)
+  %xdefine rsp_tmp r %+ %%reg_num
+  mov  rsp_tmp, rsp
+  sub  rsp, stack_size_padded
+  and  rsp, ~(%%stack_aligment - 1)
+%endmacro
+
+%macro RESTORE_STACK 0  ; reset rsp register
+  mov  rsp, rsp_tmp
+%endmacro
+%endif
+
 %if WIN64 ; Windows x64 ;=================================================
 
 DECLARE_REG 0,  rcx, ecx,  cx,   cl
 DECLARE_REG 1,  rdx, edx,  dx,   dl
 DECLARE_REG 2,  R8,  R8D,  R8W,  R8B
 DECLARE_REG 3,  R9,  R9D,  R9W,  R9B
 DECLARE_REG 4,  R10, R10D, R10W, R10B, 40
 DECLARE_REG 5,  R11, R11D, R11W, R11B, 48
@@ -587,26 +604,30 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
 %endmacro
 %macro cglobal_internal 1-2+
     %ifndef cglobaled_%1
         %xdefine %1 mangle(program_name %+ _ %+ %1)
         %xdefine %1.skip_prologue %1 %+ .skip_prologue
         CAT_XDEFINE cglobaled_, %1, 1
     %endif
     %xdefine current_function %1
-    %ifidn __OUTPUT_FORMAT__,elf
-        global %1:function hidden
-    %elifidn __OUTPUT_FORMAT__,elf32
-        global %1:function hidden
-    %elifidn __OUTPUT_FORMAT__,elf64
-        global %1:function hidden
-    %elifidn __OUTPUT_FORMAT__,macho32
-        global %1:private_extern
-    %elifidn __OUTPUT_FORMAT__,macho64
-        global %1:private_extern
+    %ifdef CHROMIUM
+        %ifidn __OUTPUT_FORMAT__,elf
+            global %1:function hidden
+        %elifidn __OUTPUT_FORMAT__,elf32
+            global %1:function hidden
+        %elifidn __OUTPUT_FORMAT__,elf64
+            global %1:function hidden
+        %elifidn __OUTPUT_FORMAT__,macho32
+            global %1:private_extern
+        %elifidn __OUTPUT_FORMAT__,macho64
+            global %1:private_extern
+        %else
+            global %1
+        %endif
     %else
         global %1
     %endif
     align function_align
     %1:
     RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
     %assign stack_offset 0
     %if %0 > 1
--- a/media/libvpx/vp8/common/alloccommon.h
+++ b/media/libvpx/vp8/common/alloccommon.h
@@ -4,20 +4,28 @@
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
-#ifndef __INC_ALLOCCOMMON_H
-#define __INC_ALLOCCOMMON_H
+#ifndef VP8_COMMON_ALLOCCOMMON_H_
+#define VP8_COMMON_ALLOCCOMMON_H_
 
 #include "onyxc_int.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void vp8_create_common(VP8_COMMON *oci);
 void vp8_remove_common(VP8_COMMON *oci);
 void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
 int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
 void vp8_setup_version(VP8_COMMON *oci);
 
+#ifdef __cplusplus
+}  // extern "C"
 #endif
+
+#endif  // VP8_COMMON_ALLOCCOMMON_H_
--- a/media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
@@ -48,17 +48,17 @@ loop
     sel     r6, r9, lr          ; select bytes with negative difference
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 2nd 4 pixels
     ldr     r4, [r0, #4]        ; load 4 src pixels
@@ -72,17 +72,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 3rd 4 pixels
     ldr     r4, [r0, #8]        ; load 4 src pixels
@@ -96,17 +96,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 4th 4 pixels
     ldr     r4, [r0, #12]       ; load 4 src pixels
@@ -122,17 +122,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
 
 
--- a/media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
@@ -46,17 +46,17 @@ loop
     sel     r8, r9, lr          ; select bytes with negative difference
 
     ; calculate partial sums
     usad8   r6, r10, lr         ; calculate sum of positive differences
     usad8   r7, r8, lr          ; calculate sum of negative differences
     orr     r8, r8, r10         ; differences of all 4 pixels
     ; calculate total sum
     add    r4, r4, r6           ; add positive differences to sum
-    sub    r4, r4, r7           ; substract negative differences from sum
+    sub    r4, r4, r7           ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r7, r8              ; byte (two pixels) to halfwords
     uxtb16  r10, r8, ror #8     ; another two pixels to halfwords
     smlad   r5, r7, r7, r5      ; dual signed multiply, add and accumulate (1)
 
     ; 2nd 4 pixels
     ldr     r6, [r0, #0x4]      ; load 4 src pixels
@@ -72,17 +72,17 @@ loop
 
     ; calculate partial sums
     usad8   r6, r10, lr         ; calculate sum of positive differences
     usad8   r7, r8, lr          ; calculate sum of negative differences
     orr     r8, r8, r10         ; differences of all 4 pixels
 
     ; calculate total sum
     add     r4, r4, r6          ; add positive differences to sum
-    sub     r4, r4, r7          ; substract negative differences from sum
+    sub     r4, r4, r7          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r7, r8              ; byte (two pixels) to halfwords
     uxtb16  r10, r8, ror #8     ; another two pixels to halfwords
     smlad   r5, r7, r7, r5      ; dual signed multiply, add and accumulate (1)
     subs    r12, r12, #1        ; next row
     smlad   r5, r10, r10, r5    ; dual signed multiply, add and accumulate (2)
 
--- a/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
@@ -53,17 +53,17 @@ loop
     sel     r6, r6, lr          ; select bytes with negative difference
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 2nd 4 pixels
     ldr     r4, [r0, #4]        ; load 4 src pixels
@@ -84,17 +84,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 3rd 4 pixels
     ldr     r4, [r0, #8]        ; load 4 src pixels
@@ -115,17 +115,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 4th 4 pixels
     ldr     r4, [r0, #12]       ; load 4 src pixels
@@ -148,17 +148,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
 
     subs    r12, r12, #1
--- a/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
@@ -64,17 +64,17 @@ loop
     sel     r6, r6, lr          ; select bytes with negative difference
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 2nd 4 pixels
     ldr     r4, [r0, #4]        ; load source pixels a, row N
@@ -106,17 +106,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 3rd 4 pixels
     ldr     r4, [r0, #8]        ; load source pixels a, row N
@@ -148,17 +148,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 4th 4 pixels
     ldr     r4, [r0, #12]       ; load source pixels a, row N
@@ -190,17 +190,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     subs    r12, r12, #1
     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
 
--- a/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+++ b/media/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
@@ -54,17 +54,17 @@ loop
     sel     r6, r6, lr          ; select bytes with negative difference
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 2nd 4 pixels
     ldr     r4, [r0, #4]        ; load 4 src pixels
@@ -85,17 +85,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 3rd 4 pixels
     ldr     r4, [r0, #8]        ; load 4 src pixels
@@ -116,17 +116,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
 
     ; 4th 4 pixels
     ldr     r4, [r0, #12]       ; load 4 src pixels
@@ -149,17 +149,17 @@ loop
 
     ; calculate partial sums
     usad8   r4, r7, lr          ; calculate sum of positive differences
     usad8   r5, r6, lr          ; calculate sum of negative differences
     orr     r6, r6, r7          ; differences of all 4 pixels
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
     uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
     smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
     smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
 
 
--- a/media/libvpx/vp8/common/arm/bilinearfilter_arm.h
+++ b/media/libvpx/vp8/common/arm/bilinearfilter_arm.h
@@ -4,18 +4,22 @@
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
-#ifndef BILINEARFILTER_ARM_H
-#define BILINEARFILTER_ARM_H
+#ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
+#define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 extern void vp8_filter_block2d_bil_first_pass_armv6
 (
     const unsigned char  *src_ptr,
     unsigned short       *dst_ptr,
     unsigned int          src_pitch,
     unsigned int          height,
     unsigned int          width,
@@ -27,9 +31,13 @@ extern void vp8_filter_block2d_bil_secon
     const unsigned short *src_ptr,
     unsigned char        *dst_ptr,
     int                   dst_pitch,
     unsigned int          height,
     unsigned int          width,
     const short         *vp8_filter
 );
 
-#endif /* BILINEARFILTER_ARM_H */
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
--- a/media/libvpx/vp8/common/arm/dequantize_arm.c
+++ b/media/libvpx/vp8/common/arm/dequantize_arm.c
@@ -7,36 +7,19 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #include "vpx_config.h"
 #include "vp8/common/blockd.h"
 
-#if HAVE_NEON
-extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
-#endif
-
 #if HAVE_MEDIA
 extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
-#endif
 
-#if HAVE_NEON
-
-void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
-{
-    short *DQ  = d->dqcoeff;
-    short *Q   = d->qcoeff;
-
-    vp8_dequantize_b_loop_neon(Q, DQC, DQ);
-}
-#endif
-
-#if HAVE_MEDIA
 void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
 {
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
 
     vp8_dequantize_b_loop_v6(Q, DQC, DQ);
 }
 #endif
--- a/media/libvpx/vp8/common/arm/loopfilter_arm.c
+++ b/media/libvpx/vp8/common/arm/loopfilter_arm.c
@@ -29,21 +29,21 @@ extern prototype_loopfilter(vp8_mbloop_f
 typedef void loopfilter_y_neon(unsigned char *src, int pitch,
         unsigned char blimit, unsigned char limit, unsigned char thresh);
 typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
         unsigned char blimit, unsigned char limit, unsigned char thresh,
         unsigned char *v);
 
 extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
 extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
+extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
+extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
+
 extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
 extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
-
-extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
-extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
 extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
 extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
 #endif
 
 #if HAVE_MEDIA
 /* ARMV6/MEDIA loopfilter functions*/
 /* Horizontal MB filtering */
 void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ /dev/null
@@ -1,357 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_bilinear_predict16x16_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; r4    unsigned char *dst_ptr,
-; stack(r5) int  dst_pitch
-
-|vp8_bilinear_predict16x16_neon| PROC
-    push            {r4-r5, lr}
-
-    adr             r12, bifilter16_coeff
-    ldr             r4, [sp, #12]           ;load parameters from stack
-    ldr             r5, [sp, #16]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             secondpass_bfilter16x16_only
-
-    add             r2, r12, r2, lsl #3     ;calculate filter location
-
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-
-    vld1.s32        {d31}, [r2]             ;load first_pass filter
-
-    beq             firstpass_bfilter16x16_only
-
-    sub             sp, sp, #272            ;reserve space on stack for temporary storage
-    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
-    mov             lr, sp
-    vld1.u8         {d5, d6, d7}, [r0], r1
-
-    mov             r2, #3                  ;loop counter
-    vld1.u8         {d8, d9, d10}, [r0], r1
-
-    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
-    vld1.u8         {d11, d12, d13}, [r0], r1
-
-    vdup.8          d1, d31[4]
-
-;First Pass: output_height lines x output_width columns (17x16)
-filt_blk2d_fp16x16_loop_neon
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q8, d3, d0
-    vmull.u8        q9, d5, d0
-    vmull.u8        q10, d6, d0
-    vmull.u8        q11, d8, d0
-    vmull.u8        q12, d9, d0
-    vmull.u8        q13, d11, d0
-    vmull.u8        q14, d12, d0
-
-    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
-    vext.8          d5, d5, d6, #1
-    vext.8          d8, d8, d9, #1
-    vext.8          d11, d11, d12, #1
-
-    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q9, d5, d1
-    vmlal.u8        q11, d8, d1
-    vmlal.u8        q13, d11, d1
-
-    vext.8          d3, d3, d4, #1
-    vext.8          d6, d6, d7, #1
-    vext.8          d9, d9, d10, #1
-    vext.8          d12, d12, d13, #1
-
-    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q10, d6, d1
-    vmlal.u8        q12, d9, d1
-    vmlal.u8        q14, d12, d1
-
-    subs            r2, r2, #1
-
-    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
-    vqrshrn.u16    d15, q8, #7
-    vqrshrn.u16    d16, q9, #7
-    vqrshrn.u16    d17, q10, #7
-    vqrshrn.u16    d18, q11, #7
-    vqrshrn.u16    d19, q12, #7
-    vqrshrn.u16    d20, q13, #7
-
-    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
-    vqrshrn.u16    d21, q14, #7
-    vld1.u8         {d5, d6, d7}, [r0], r1
-
-    vst1.u8         {d14, d15, d16, d17}, [lr]!     ;store result
-    vld1.u8         {d8, d9, d10}, [r0], r1
-    vst1.u8         {d18, d19, d20, d21}, [lr]!
-    vld1.u8         {d11, d12, d13}, [r0], r1
-
-    bne             filt_blk2d_fp16x16_loop_neon
-
-;First-pass filtering for rest 5 lines
-    vld1.u8         {d14, d15, d16}, [r0], r1
-
-    vmull.u8        q9, d2, d0              ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q10, d3, d0
-    vmull.u8        q11, d5, d0
-    vmull.u8        q12, d6, d0
-    vmull.u8        q13, d8, d0
-    vmull.u8        q14, d9, d0
-
-    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
-    vext.8          d5, d5, d6, #1
-    vext.8          d8, d8, d9, #1
-
-    vmlal.u8        q9, d2, d1              ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q11, d5, d1
-    vmlal.u8        q13, d8, d1
-
-    vext.8          d3, d3, d4, #1
-    vext.8          d6, d6, d7, #1
-    vext.8          d9, d9, d10, #1
-
-    vmlal.u8        q10, d3, d1             ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q12, d6, d1
-    vmlal.u8        q14, d9, d1
-
-    vmull.u8        q1, d11, d0
-    vmull.u8        q2, d12, d0
-    vmull.u8        q3, d14, d0
-    vmull.u8        q4, d15, d0
-
-    vext.8          d11, d11, d12, #1       ;construct src_ptr[1]
-    vext.8          d14, d14, d15, #1
-
-    vmlal.u8        q1, d11, d1             ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q3, d14, d1
-
-    vext.8          d12, d12, d13, #1
-    vext.8          d15, d15, d16, #1
-
-    vmlal.u8        q2, d12, d1             ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q4, d15, d1
-
-    vqrshrn.u16    d10, q9, #7              ;shift/round/saturate to u8
-    vqrshrn.u16    d11, q10, #7
-    vqrshrn.u16    d12, q11, #7
-    vqrshrn.u16    d13, q12, #7
-    vqrshrn.u16    d14, q13, #7
-    vqrshrn.u16    d15, q14, #7
-    vqrshrn.u16    d16, q1, #7
-    vqrshrn.u16    d17, q2, #7
-    vqrshrn.u16    d18, q3, #7
-    vqrshrn.u16    d19, q4, #7
-
-    vst1.u8         {d10, d11, d12, d13}, [lr]!         ;store result
-    vst1.u8         {d14, d15, d16, d17}, [lr]!
-    vst1.u8         {d18, d19}, [lr]!
-
-;Second pass: 16x16
-;secondpass_filter
-    add             r3, r12, r3, lsl #3
-    sub             lr, lr, #272
-
-    vld1.u32        {d31}, [r3]             ;load second_pass filter
-
-    vld1.u8         {d22, d23}, [lr]!       ;load src data
-
-    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
-    vdup.8          d1, d31[4]
-    mov             r12, #4                 ;loop counter
-
-filt_blk2d_sp16x16_loop_neon
-    vld1.u8         {d24, d25}, [lr]!
-    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
-    vld1.u8         {d26, d27}, [lr]!
-    vmull.u8        q2, d23, d0
-    vld1.u8         {d28, d29}, [lr]!
-    vmull.u8        q3, d24, d0
-    vld1.u8         {d30, d31}, [lr]!
-
-    vmull.u8        q4, d25, d0
-    vmull.u8        q5, d26, d0
-    vmull.u8        q6, d27, d0
-    vmull.u8        q7, d28, d0
-    vmull.u8        q8, d29, d0
-
-    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
-    vmlal.u8        q2, d25, d1
-    vmlal.u8        q3, d26, d1
-    vmlal.u8        q4, d27, d1
-    vmlal.u8        q5, d28, d1
-    vmlal.u8        q6, d29, d1
-    vmlal.u8        q7, d30, d1
-    vmlal.u8        q8, d31, d1
-
-    subs            r12, r12, #1
-
-    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
-    vqrshrn.u16    d3, q2, #7
-    vqrshrn.u16    d4, q3, #7
-    vqrshrn.u16    d5, q4, #7
-    vqrshrn.u16    d6, q5, #7
-    vqrshrn.u16    d7, q6, #7
-    vqrshrn.u16    d8, q7, #7
-    vqrshrn.u16    d9, q8, #7
-
-    vst1.u8         {d2, d3}, [r4], r5      ;store result
-    vst1.u8         {d4, d5}, [r4], r5
-    vst1.u8         {d6, d7}, [r4], r5
-    vmov            q11, q15
-    vst1.u8         {d8, d9}, [r4], r5
-
-    bne             filt_blk2d_sp16x16_loop_neon
-
-    add             sp, sp, #272
-
-    pop             {r4-r5,pc}
-
-;--------------------
-firstpass_bfilter16x16_only
-    mov             r2, #4                      ;loop counter
-    vdup.8          d0, d31[0]                  ;first_pass filter (d0 d1)
-    vdup.8          d1, d31[4]
-
-;First Pass: output_height lines x output_width columns (16x16)
-filt_blk2d_fpo16x16_loop_neon
-    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
-    vld1.u8         {d5, d6, d7}, [r0], r1
-    vld1.u8         {d8, d9, d10}, [r0], r1
-    vld1.u8         {d11, d12, d13}, [r0], r1
-
-    pld             [r0]
-    pld             [r0, r1]
-    pld             [r0, r1, lsl #1]
-
-    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q8, d3, d0
-    vmull.u8        q9, d5, d0
-    vmull.u8        q10, d6, d0
-    vmull.u8        q11, d8, d0
-    vmull.u8        q12, d9, d0
-    vmull.u8        q13, d11, d0
-    vmull.u8        q14, d12, d0
-
-    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
-    vext.8          d5, d5, d6, #1
-    vext.8          d8, d8, d9, #1
-    vext.8          d11, d11, d12, #1
-
-    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q9, d5, d1
-    vmlal.u8        q11, d8, d1
-    vmlal.u8        q13, d11, d1
-
-    vext.8          d3, d3, d4, #1
-    vext.8          d6, d6, d7, #1
-    vext.8          d9, d9, d10, #1
-    vext.8          d12, d12, d13, #1
-
-    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * vp8_filter[1])
-    vmlal.u8        q10, d6, d1
-    vmlal.u8        q12, d9, d1
-    vmlal.u8        q14, d12, d1
-
-    subs            r2, r2, #1
-
-    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
-    vqrshrn.u16    d15, q8, #7
-    vqrshrn.u16    d16, q9, #7
-    vqrshrn.u16    d17, q10, #7
-    vqrshrn.u16    d18, q11, #7
-    vqrshrn.u16    d19, q12, #7
-    vqrshrn.u16    d20, q13, #7
-    vst1.u8         {d14, d15}, [r4], r5        ;store result
-    vqrshrn.u16    d21, q14, #7
-
-    vst1.u8         {d16, d17}, [r4], r5
-    vst1.u8         {d18, d19}, [r4], r5
-    vst1.u8         {d20, d21}, [r4], r5
-
-    bne             filt_blk2d_fpo16x16_loop_neon
-    pop             {r4-r5,pc}
-
-;---------------------
-secondpass_bfilter16x16_only
-;Second pass: 16x16
-;secondpass_filter
-    add             r3, r12, r3, lsl #3
-    mov             r12, #4                     ;loop counter
-    vld1.u32        {d31}, [r3]                 ;load second_pass filter
-    vld1.u8         {d22, d23}, [r0], r1        ;load src data
-
-    vdup.8          d0, d31[0]                  ;second_pass filter parameters (d0 d1)
-    vdup.8          d1, d31[4]
-
-filt_blk2d_spo16x16_loop_neon
-    vld1.u8         {d24, d25}, [r0], r1
-    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
-    vld1.u8         {d26, d27}, [r0], r1
-    vmull.u8        q2, d23, d0
-    vld1.u8         {d28, d29}, [r0], r1
-    vmull.u8        q3, d24, d0
-    vld1.u8         {d30, d31}, [r0], r1
-
-    vmull.u8        q4, d25, d0
-    vmull.u8        q5, d26, d0
-    vmull.u8        q6, d27, d0
-    vmull.u8        q7, d28, d0
-    vmull.u8        q8, d29, d0
-
-    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
-    vmlal.u8        q2, d25, d1
-    vmlal.u8        q3, d26, d1
-    vmlal.u8        q4, d27, d1
-    vmlal.u8        q5, d28, d1
-    vmlal.u8        q6, d29, d1
-    vmlal.u8        q7, d30, d1
-    vmlal.u8        q8, d31, d1
-
-    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
-    vqrshrn.u16    d3, q2, #7
-    vqrshrn.u16    d4, q3, #7
-    vqrshrn.u16    d5, q4, #7
-    vqrshrn.u16    d6, q5, #7
-    vqrshrn.u16    d7, q6, #7
-    vqrshrn.u16    d8, q7, #7
-    vqrshrn.u16    d9, q8, #7
-
-    vst1.u8         {d2, d3}, [r4], r5      ;store result
-    subs            r12, r12, #1
-    vst1.u8         {d4, d5}, [r4], r5
-    vmov            q11, q15
-    vst1.u8         {d6, d7}, [r4], r5
-    vst1.u8         {d8, d9}, [r4], r5
-
-    bne             filt_blk2d_spo16x16_loop_neon
-    pop             {r4-r5,pc}
-
-    ENDP
-
-;-----------------
-
-bifilter16_coeff
-    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
-    END
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ /dev/null
@@ -1,130 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_bilinear_predict4x4_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; r4    unsigned char *dst_ptr,
-; stack(lr) int  dst_pitch
-
-|vp8_bilinear_predict4x4_neon| PROC
-    push            {r4, lr}
-
-    adr             r12, bifilter4_coeff
-    ldr             r4, [sp, #8]            ;load parameters from stack
-    ldr             lr, [sp, #12]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             skip_firstpass_filter
-
-;First pass: output_height lines x output_width columns (5x4)
-    vld1.u8         {d2}, [r0], r1          ;load src data
-    add             r2, r12, r2, lsl #3     ;calculate Hfilter location (2coeffsx4bytes=8bytes)
-
-    vld1.u8         {d3}, [r0], r1
-    vld1.u32        {d31}, [r2]             ;first_pass filter
-
-    vld1.u8         {d4}, [r0], r1
-    vdup.8          d0, d31[0]              ;first_pass filter (d0-d1)
-    vld1.u8         {d5}, [r0], r1
-    vdup.8          d1, d31[4]
-    vld1.u8         {d6}, [r0], r1
-
-    vshr.u64        q4, q1, #8              ;construct src_ptr[1]
-    vshr.u64        q5, q2, #8
-    vshr.u64        d12, d6, #8
-
-    vzip.32         d2, d3                  ;put 2-line data in 1 register (src_ptr[0])
-    vzip.32         d4, d5
-    vzip.32         d8, d9                  ;put 2-line data in 1 register (src_ptr[1])
-    vzip.32         d10, d11
-
-    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q8, d4, d0
-    vmull.u8        q9, d6, d0
-
-    vmlal.u8        q7, d8, d1              ;(src_ptr[1] * vp8_filter[1])
-    vmlal.u8        q8, d10, d1
-    vmlal.u8        q9, d12, d1
-
-    vqrshrn.u16    d28, q7, #7              ;shift/round/saturate to u8
-    vqrshrn.u16    d29, q8, #7
-    vqrshrn.u16    d30, q9, #7
-
-;Second pass: 4x4
-secondpass_filter
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-    beq             skip_secondpass_filter
-
-    add             r3, r12, r3, lsl #3 ;calculate Vfilter location
-    vld1.u32        {d31}, [r3]         ;load second_pass filter
-
-    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0-d5)
-    vdup.8          d1, d31[4]
-
-    vmull.u8        q1, d28, d0
-    vmull.u8        q2, d29, d0
-
-    vext.8          d26, d28, d29, #4       ;construct src_ptr[pixel_step]
-    vext.8          d27, d29, d30, #4
-
-    vmlal.u8        q1, d26, d1
-    vmlal.u8        q2, d27, d1
-
-    add             r0, r4, lr
-    add             r1, r0, lr
-    add             r2, r1, lr
-
-    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
-    vqrshrn.u16    d3, q2, #7
-
-    vst1.32         {d2[0]}, [r4]           ;store result
-    vst1.32         {d2[1]}, [r0]
-    vst1.32         {d3[0]}, [r1]
-    vst1.32         {d3[1]}, [r2]
-
-    pop             {r4, pc}
-
-;--------------------
-skip_firstpass_filter
-
-    vld1.32         {d28[0]}, [r0], r1      ;load src data
-    vld1.32         {d28[1]}, [r0], r1
-    vld1.32         {d29[0]}, [r0], r1
-    vld1.32         {d29[1]}, [r0], r1
-    vld1.32         {d30[0]}, [r0], r1
-
-    b               secondpass_filter
-
-;---------------------
-skip_secondpass_filter
-    vst1.32         {d28[0]}, [r4], lr      ;store result
-    vst1.32         {d28[1]}, [r4], lr
-    vst1.32         {d29[0]}, [r4], lr
-    vst1.32         {d29[1]}, [r4], lr
-
-    pop             {r4, pc}
-
-    ENDP
-
-;-----------------
-
-bifilter4_coeff
-    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
-    END
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ /dev/null
@@ -1,135 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_bilinear_predict8x4_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; r4    unsigned char *dst_ptr,
-; stack(lr) int  dst_pitch
-
-|vp8_bilinear_predict8x4_neon| PROC
-    push            {r4, lr}
-
-    adr             r12, bifilter8x4_coeff
-    ldr             r4, [sp, #8]            ;load parameters from stack
-    ldr             lr, [sp, #12]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             skip_firstpass_filter
-
-;First pass: output_height lines x output_width columns (5x8)
-    add             r2, r12, r2, lsl #3     ;calculate filter location
-
-    vld1.u8         {q1}, [r0], r1          ;load src data
-    vld1.u32        {d31}, [r2]             ;load first_pass filter
-    vld1.u8         {q2}, [r0], r1
-    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
-    vld1.u8         {q3}, [r0], r1
-    vdup.8          d1, d31[4]
-    vld1.u8         {q4}, [r0], r1
-
-    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
-    vld1.u8         {q5}, [r0], r1
-    vmull.u8        q7, d4, d0
-    vmull.u8        q8, d6, d0
-    vmull.u8        q9, d8, d0
-    vmull.u8        q10, d10, d0
-
-    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
-    vext.8          d5, d4, d5, #1
-    vext.8          d7, d6, d7, #1
-    vext.8          d9, d8, d9, #1
-    vext.8          d11, d10, d11, #1
-
-    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
-    vmlal.u8        q7, d5, d1
-    vmlal.u8        q8, d7, d1
-    vmlal.u8        q9, d9, d1
-    vmlal.u8        q10, d11, d1
-
-    vqrshrn.u16    d22, q6, #7              ;shift/round/saturate to u8
-    vqrshrn.u16    d23, q7, #7
-    vqrshrn.u16    d24, q8, #7
-    vqrshrn.u16    d25, q9, #7
-    vqrshrn.u16    d26, q10, #7
-
-;Second pass: 4x8
-secondpass_filter
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-    beq             skip_secondpass_filter
-
-    add             r3, r12, r3, lsl #3
-    add             r0, r4, lr
-
-    vld1.u32        {d31}, [r3]             ;load second_pass filter
-    add             r1, r0, lr
-
-    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
-    vdup.8          d1, d31[4]
-
-    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q2, d23, d0
-    vmull.u8        q3, d24, d0
-    vmull.u8        q4, d25, d0
-
-    vmlal.u8        q1, d23, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
-    vmlal.u8        q2, d24, d1
-    vmlal.u8        q3, d25, d1
-    vmlal.u8        q4, d26, d1
-
-    add             r2, r1, lr
-
-    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
-    vqrshrn.u16    d3, q2, #7
-    vqrshrn.u16    d4, q3, #7
-    vqrshrn.u16    d5, q4, #7
-
-    vst1.u8         {d2}, [r4]              ;store result
-    vst1.u8         {d3}, [r0]
-    vst1.u8         {d4}, [r1]
-    vst1.u8         {d5}, [r2]
-
-    pop             {r4, pc}
-
-;--------------------
-skip_firstpass_filter
-    vld1.u8         {d22}, [r0], r1         ;load src data
-    vld1.u8         {d23}, [r0], r1
-    vld1.u8         {d24}, [r0], r1
-    vld1.u8         {d25}, [r0], r1
-    vld1.u8         {d26}, [r0], r1
-
-    b               secondpass_filter
-
-;---------------------
-skip_secondpass_filter
-    vst1.u8         {d22}, [r4], lr         ;store result
-    vst1.u8         {d23}, [r4], lr
-    vst1.u8         {d24}, [r4], lr
-    vst1.u8         {d25}, [r4], lr
-
-    pop             {r4, pc}
-
-    ENDP
-
-;-----------------
-
-bifilter8x4_coeff
-    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
-    END
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_bilinear_predict8x8_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0    unsigned char  *src_ptr,
-; r1    int  src_pixels_per_line,
-; r2    int  xoffset,
-; r3    int  yoffset,
-; r4    unsigned char *dst_ptr,
-; stack(lr) int  dst_pitch
-
-|vp8_bilinear_predict8x8_neon| PROC
-    push            {r4, lr}
-
-    adr             r12, bifilter8_coeff
-    ldr             r4, [sp, #8]            ;load parameters from stack
-    ldr             lr, [sp, #12]           ;load parameters from stack
-
-    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
-    beq             skip_firstpass_filter
-
-;First pass: output_height lines x output_width columns (9x8)
-    add             r2, r12, r2, lsl #3     ;calculate filter location
-
-    vld1.u8         {q1}, [r0], r1          ;load src data
-    vld1.u32        {d31}, [r2]             ;load first_pass filter
-    vld1.u8         {q2}, [r0], r1
-    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
-    vld1.u8         {q3}, [r0], r1
-    vdup.8          d1, d31[4]
-    vld1.u8         {q4}, [r0], r1
-
-    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q7, d4, d0
-    vmull.u8        q8, d6, d0
-    vmull.u8        q9, d8, d0
-
-    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
-    vext.8          d5, d4, d5, #1
-    vext.8          d7, d6, d7, #1
-    vext.8          d9, d8, d9, #1
-
-    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
-    vmlal.u8        q7, d5, d1
-    vmlal.u8        q8, d7, d1
-    vmlal.u8        q9, d9, d1
-
-    vld1.u8         {q1}, [r0], r1          ;load src data
-    vqrshrn.u16    d22, q6, #7              ;shift/round/saturate to u8
-    vld1.u8         {q2}, [r0], r1
-    vqrshrn.u16    d23, q7, #7
-    vld1.u8         {q3}, [r0], r1
-    vqrshrn.u16    d24, q8, #7
-    vld1.u8         {q4}, [r0], r1
-    vqrshrn.u16    d25, q9, #7
-
-    ;first_pass filtering on the rest 5-line data
-    vld1.u8         {q5}, [r0], r1
-
-    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q7, d4, d0
-    vmull.u8        q8, d6, d0
-    vmull.u8        q9, d8, d0
-    vmull.u8        q10, d10, d0
-
-    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
-    vext.8          d5, d4, d5, #1
-    vext.8          d7, d6, d7, #1
-    vext.8          d9, d8, d9, #1
-    vext.8          d11, d10, d11, #1
-
-    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
-    vmlal.u8        q7, d5, d1
-    vmlal.u8        q8, d7, d1
-    vmlal.u8        q9, d9, d1
-    vmlal.u8        q10, d11, d1
-
-    vqrshrn.u16    d26, q6, #7              ;shift/round/saturate to u8
-    vqrshrn.u16    d27, q7, #7
-    vqrshrn.u16    d28, q8, #7
-    vqrshrn.u16    d29, q9, #7
-    vqrshrn.u16    d30, q10, #7
-
-;Second pass: 8x8
-secondpass_filter
-    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
-    beq             skip_secondpass_filter
-
-    add             r3, r12, r3, lsl #3
-    add             r0, r4, lr
-
-    vld1.u32        {d31}, [r3]             ;load second_pass filter
-    add             r1, r0, lr
-
-    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
-    vdup.8          d1, d31[4]
-
-    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
-    vmull.u8        q2, d23, d0
-    vmull.u8        q3, d24, d0
-    vmull.u8        q4, d25, d0
-    vmull.u8        q5, d26, d0
-    vmull.u8        q6, d27, d0
-    vmull.u8        q7, d28, d0
-    vmull.u8        q8, d29, d0
-
-    vmlal.u8        q1, d23, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
-    vmlal.u8        q2, d24, d1
-    vmlal.u8        q3, d25, d1
-    vmlal.u8        q4, d26, d1
-    vmlal.u8        q5, d27, d1
-    vmlal.u8        q6, d28, d1
-    vmlal.u8        q7, d29, d1
-    vmlal.u8        q8, d30, d1
-
-    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
-    vqrshrn.u16    d3, q2, #7
-    vqrshrn.u16    d4, q3, #7
-    vqrshrn.u16    d5, q4, #7
-    vqrshrn.u16    d6, q5, #7
-    vqrshrn.u16    d7, q6, #7
-    vqrshrn.u16    d8, q7, #7
-    vqrshrn.u16    d9, q8, #7
-
-    vst1.u8         {d2}, [r4]              ;store result
-    vst1.u8         {d3}, [r0]
-    vst1.u8         {d4}, [r1], lr
-    vst1.u8         {d5}, [r1], lr
-    vst1.u8         {d6}, [r1], lr
-    vst1.u8         {d7}, [r1], lr
-    vst1.u8         {d8}, [r1], lr
-    vst1.u8         {d9}, [r1], lr
-
-    pop             {r4, pc}
-
-;--------------------
-skip_firstpass_filter
-    vld1.u8         {d22}, [r0], r1         ;load src data
-    vld1.u8         {d23}, [r0], r1
-    vld1.u8         {d24}, [r0], r1
-    vld1.u8         {d25}, [r0], r1
-    vld1.u8         {d26}, [r0], r1
-    vld1.u8         {d27}, [r0], r1
-    vld1.u8         {d28}, [r0], r1
-    vld1.u8         {d29}, [r0], r1
-    vld1.u8         {d30}, [r0], r1
-
-    b               secondpass_filter
-
-;---------------------
-skip_secondpass_filter
-    vst1.u8         {d22}, [r4], lr         ;store result
-    vst1.u8         {d23}, [r4], lr
-    vst1.u8         {d24}, [r4], lr
-    vst1.u8         {d25}, [r4], lr
-    vst1.u8         {d26}, [r4], lr
-    vst1.u8         {d27}, [r4], lr
-    vst1.u8         {d28}, [r4], lr
-    vst1.u8         {d29}, [r4], lr
-
-    pop             {r4, pc}
-
-    ENDP
-
-;-----------------
-
-bifilter8_coeff
-    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
@@ -0,0 +1,699 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const uint8_t bifilter4_coeff[8][2] = {
+    {128,   0},
+    {112,  16},
+    { 96,  32},
+    { 80,  48},
+    { 64,  64},
+    { 48,  80},
+    { 32,  96},
+    { 16, 112}
+};
+
+void vp8_bilinear_predict4x4_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8;
+    uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8;
+    uint8x16_t q1u8, q2u8;
+    uint16x8_t q1u16, q2u16;
+    uint16x8_t q7u16, q8u16, q9u16;
+    uint64x2_t q4u64, q5u64;
+    uint64x1_t d12u64;
+    uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2;
+
+    if (xoffset == 0) {  // skip_1stpass_filter
+        uint32x2_t d28u32 = vdup_n_u32(0);
+        uint32x2_t d29u32 = vdup_n_u32(0);
+        uint32x2_t d30u32 = vdup_n_u32(0);
+
+        d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0);
+        src_ptr += src_pixels_per_line;
+        d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1);
+        src_ptr += src_pixels_per_line;
+        d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0);
+        src_ptr += src_pixels_per_line;
+        d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1);
+        src_ptr += src_pixels_per_line;
+        d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0);
+        d28u8 = vreinterpret_u8_u32(d28u32);
+        d29u8 = vreinterpret_u8_u32(d29u32);
+        d30u8 = vreinterpret_u8_u32(d30u32);
+    } else {
+        d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d6u8 = vld1_u8(src_ptr);
+
+        q1u8 = vcombine_u8(d2u8, d3u8);
+        q2u8 = vcombine_u8(d4u8, d5u8);
+
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+        q4u64  = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
+        q5u64  = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
+        d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
+
+        d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
+                           vreinterpret_u32_u8(vget_high_u8(q1u8)));
+        d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
+                           vreinterpret_u32_u8(vget_high_u8(q2u8)));
+        d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
+                           vreinterpret_u32_u64(vget_high_u64(q4u64)));
+        d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),
+                           vreinterpret_u32_u64(vget_high_u64(q5u64)));
+
+        q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
+        q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
+        q9u16 = vmull_u8(d6u8, d0u8);
+
+        q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8);
+        q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8);
+        q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8);
+
+        d28u8 = vqrshrn_n_u16(q7u16, 7);
+        d29u8 = vqrshrn_n_u16(q8u16, 7);
+        d30u8 = vqrshrn_n_u16(q9u16, 7);
+    }
+
+    // secondpass_filter
+    if (yoffset == 0) {  // skip_2ndpass_filter
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1);
+    } else {
+        d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+        q1u16 = vmull_u8(d28u8, d0u8);
+        q2u16 = vmull_u8(d29u8, d0u8);
+
+        d26u8 = vext_u8(d28u8, d29u8, 4);
+        d27u8 = vext_u8(d29u8, d30u8, 4);
+
+        q1u16 = vmlal_u8(q1u16, d26u8, d1u8);
+        q2u16 = vmlal_u8(q2u16, d27u8, d1u8);
+
+        d2u8 = vqrshrn_n_u16(q1u16, 7);
+        d3u8 = vqrshrn_n_u16(q2u16, 7);
+
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
+        dst_ptr += dst_pitch;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
+    }
+    return;
+}
+
+void vp8_bilinear_predict8x4_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8;
+    uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8;
+    uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
+    uint16x8_t q1u16, q2u16, q3u16, q4u16;
+    uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
+
+    if (xoffset == 0) {  // skip_1stpass_filter
+        d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d26u8 = vld1_u8(src_ptr);
+    } else {
+        q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q5u8 = vld1q_u8(src_ptr);
+
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+        q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+        q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+
+        d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+        d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+        d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+        d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+
+        q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+        q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+        q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
+
+        d22u8 = vqrshrn_n_u16(q6u16, 7);
+        d23u8 = vqrshrn_n_u16(q7u16, 7);
+        d24u8 = vqrshrn_n_u16(q8u16, 7);
+        d25u8 = vqrshrn_n_u16(q9u16, 7);
+        d26u8 = vqrshrn_n_u16(q10u16, 7);
+    }
+
+    // secondpass_filter
+    if (yoffset == 0) {  // skip_2ndpass_filter
+        vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d25u8);
+    } else {
+        d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+        q1u16 = vmull_u8(d22u8, d0u8);
+        q2u16 = vmull_u8(d23u8, d0u8);
+        q3u16 = vmull_u8(d24u8, d0u8);
+        q4u16 = vmull_u8(d25u8, d0u8);
+
+        q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
+        q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
+        q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
+        q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
+
+        d2u8 = vqrshrn_n_u16(q1u16, 7);
+        d3u8 = vqrshrn_n_u16(q2u16, 7);
+        d4u8 = vqrshrn_n_u16(q3u16, 7);
+        d5u8 = vqrshrn_n_u16(q4u16, 7);
+
+        vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d5u8);
+    }
+    return;
+}
+
+void vp8_bilinear_predict8x8_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8;
+    uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8;
+    uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
+    uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16;
+    uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
+
+    if (xoffset == 0) {  // skip_1stpass_filter
+        d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d30u8 = vld1_u8(src_ptr);
+    } else {
+        q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+        q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+
+        d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+        d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+        d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+
+        q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+        q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+
+        d22u8 = vqrshrn_n_u16(q6u16, 7);
+        d23u8 = vqrshrn_n_u16(q7u16, 7);
+        d24u8 = vqrshrn_n_u16(q8u16, 7);
+        d25u8 = vqrshrn_n_u16(q9u16, 7);
+
+        // first_pass filtering on the rest 5-line data
+        q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q5u8 = vld1q_u8(src_ptr);
+
+        q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+        q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+
+        d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+        d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+        d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+        d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+
+        q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+        q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+        q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
+
+        d26u8 = vqrshrn_n_u16(q6u16, 7);
+        d27u8 = vqrshrn_n_u16(q7u16, 7);
+        d28u8 = vqrshrn_n_u16(q8u16, 7);
+        d29u8 = vqrshrn_n_u16(q9u16, 7);
+        d30u8 = vqrshrn_n_u16(q10u16, 7);
+    }
+
+    // secondpass_filter
+    if (yoffset == 0) {  // skip_2ndpass_filter
+        vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d29u8);
+    } else {
+        d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+        q1u16 = vmull_u8(d22u8, d0u8);
+        q2u16 = vmull_u8(d23u8, d0u8);
+        q3u16 = vmull_u8(d24u8, d0u8);
+        q4u16 = vmull_u8(d25u8, d0u8);
+        q5u16 = vmull_u8(d26u8, d0u8);
+        q6u16 = vmull_u8(d27u8, d0u8);
+        q7u16 = vmull_u8(d28u8, d0u8);
+        q8u16 = vmull_u8(d29u8, d0u8);
+
+        q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
+        q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
+        q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
+        q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
+        q5u16 = vmlal_u8(q5u16, d27u8, d1u8);
+        q6u16 = vmlal_u8(q6u16, d28u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d29u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d30u8, d1u8);
+
+        d2u8 = vqrshrn_n_u16(q1u16, 7);
+        d3u8 = vqrshrn_n_u16(q2u16, 7);
+        d4u8 = vqrshrn_n_u16(q3u16, 7);
+        d5u8 = vqrshrn_n_u16(q4u16, 7);
+        d6u8 = vqrshrn_n_u16(q5u16, 7);
+        d7u8 = vqrshrn_n_u16(q6u16, 7);
+        d8u8 = vqrshrn_n_u16(q7u16, 7);
+        d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+        vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d9u8);
+    }
+    return;
+}
+
+void vp8_bilinear_predict16x16_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    int i;
+    unsigned char tmp[272];
+    unsigned char *tmpp;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
+    uint8x8_t d19u8, d20u8, d21u8;
+    uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
+    uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8;
+    uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16;
+    uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16;
+
+    if (xoffset == 0) {  // secondpass_bfilter16x16_only
+        d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+        q11u8 = vld1q_u8(src_ptr);
+        src_ptr += src_pixels_per_line;
+        for (i = 4; i > 0; i--) {
+            q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+            q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+            q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+            q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+
+            q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
+            q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
+            q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
+            q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
+            q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
+            q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
+            q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
+            q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
+
+            q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
+            q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
+            q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
+            q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
+            q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
+            q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
+            q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
+            q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
+
+            d2u8 = vqrshrn_n_u16(q1u16, 7);
+            d3u8 = vqrshrn_n_u16(q2u16, 7);
+            d4u8 = vqrshrn_n_u16(q3u16, 7);
+            d5u8 = vqrshrn_n_u16(q4u16, 7);
+            d6u8 = vqrshrn_n_u16(q5u16, 7);
+            d7u8 = vqrshrn_n_u16(q6u16, 7);
+            d8u8 = vqrshrn_n_u16(q7u16, 7);
+            d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+            q1u8 = vcombine_u8(d2u8, d3u8);
+            q2u8 = vcombine_u8(d4u8, d5u8);
+            q3u8 = vcombine_u8(d6u8, d7u8);
+            q4u8 = vcombine_u8(d8u8, d9u8);
+
+            q11u8 = q15u8;
+
+            vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
+        }
+        return;
+    }
+
+    if (yoffset == 0) {  // firstpass_bfilter16x16_only
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+        for (i = 4; i > 0 ; i--) {
+            d2u8 = vld1_u8(src_ptr);
+            d3u8 = vld1_u8(src_ptr + 8);
+            d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+            d5u8 = vld1_u8(src_ptr);
+            d6u8 = vld1_u8(src_ptr + 8);
+            d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+            d8u8 = vld1_u8(src_ptr);
+            d9u8 = vld1_u8(src_ptr + 8);
+            d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+            d11u8 = vld1_u8(src_ptr);
+            d12u8 = vld1_u8(src_ptr + 8);
+            d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+            q7u16  = vmull_u8(d2u8, d0u8);
+            q8u16  = vmull_u8(d3u8, d0u8);
+            q9u16  = vmull_u8(d5u8, d0u8);
+            q10u16 = vmull_u8(d6u8, d0u8);
+            q11u16 = vmull_u8(d8u8, d0u8);
+            q12u16 = vmull_u8(d9u8, d0u8);
+            q13u16 = vmull_u8(d11u8, d0u8);
+            q14u16 = vmull_u8(d12u8, d0u8);
+
+            d2u8  = vext_u8(d2u8, d3u8, 1);
+            d5u8  = vext_u8(d5u8, d6u8, 1);
+            d8u8  = vext_u8(d8u8, d9u8, 1);
+            d11u8 = vext_u8(d11u8, d12u8, 1);
+
+            q7u16  = vmlal_u8(q7u16, d2u8, d1u8);
+            q9u16  = vmlal_u8(q9u16, d5u8, d1u8);
+            q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
+            q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
+
+            d3u8  = vext_u8(d3u8, d4u8, 1);
+            d6u8  = vext_u8(d6u8, d7u8, 1);
+            d9u8  = vext_u8(d9u8, d10u8, 1);
+            d12u8 = vext_u8(d12u8, d13u8, 1);
+
+            q8u16  = vmlal_u8(q8u16,  d3u8, d1u8);
+            q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
+            q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
+            q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
+
+            d14u8 = vqrshrn_n_u16(q7u16, 7);
+            d15u8 = vqrshrn_n_u16(q8u16, 7);
+            d16u8 = vqrshrn_n_u16(q9u16, 7);
+            d17u8 = vqrshrn_n_u16(q10u16, 7);
+            d18u8 = vqrshrn_n_u16(q11u16, 7);
+            d19u8 = vqrshrn_n_u16(q12u16, 7);
+            d20u8 = vqrshrn_n_u16(q13u16, 7);
+            d21u8 = vqrshrn_n_u16(q14u16, 7);
+
+            q7u8 = vcombine_u8(d14u8, d15u8);
+            q8u8 = vcombine_u8(d16u8, d17u8);
+            q9u8 = vcombine_u8(d18u8, d19u8);
+            q10u8 =vcombine_u8(d20u8, d21u8);
+
+            vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch;
+        }
+        return;
+    }
+
+    d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+    d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+    d2u8 = vld1_u8(src_ptr);
+    d3u8 = vld1_u8(src_ptr + 8);
+    d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+    d5u8 = vld1_u8(src_ptr);
+    d6u8 = vld1_u8(src_ptr + 8);
+    d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+    d8u8 = vld1_u8(src_ptr);
+    d9u8 = vld1_u8(src_ptr + 8);
+    d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+    d11u8 = vld1_u8(src_ptr);
+    d12u8 = vld1_u8(src_ptr + 8);
+    d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+    // First Pass: output_height lines x output_width columns (17x16)
+    tmpp = tmp;
+    for (i = 3; i > 0; i--) {
+        q7u16  = vmull_u8(d2u8, d0u8);
+        q8u16  = vmull_u8(d3u8, d0u8);
+        q9u16  = vmull_u8(d5u8, d0u8);
+        q10u16 = vmull_u8(d6u8, d0u8);
+        q11u16 = vmull_u8(d8u8, d0u8);
+        q12u16 = vmull_u8(d9u8, d0u8);
+        q13u16 = vmull_u8(d11u8, d0u8);
+        q14u16 = vmull_u8(d12u8, d0u8);
+
+        d2u8  = vext_u8(d2u8, d3u8, 1);
+        d5u8  = vext_u8(d5u8, d6u8, 1);
+        d8u8  = vext_u8(d8u8, d9u8, 1);
+        d11u8 = vext_u8(d11u8, d12u8, 1);
+
+        q7u16  = vmlal_u8(q7u16, d2u8, d1u8);
+        q9u16  = vmlal_u8(q9u16, d5u8, d1u8);
+        q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
+        q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
+
+        d3u8  = vext_u8(d3u8, d4u8, 1);
+        d6u8  = vext_u8(d6u8, d7u8, 1);
+        d9u8  = vext_u8(d9u8, d10u8, 1);
+        d12u8 = vext_u8(d12u8, d13u8, 1);
+
+        q8u16  = vmlal_u8(q8u16,  d3u8, d1u8);
+        q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
+        q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
+        q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
+
+        d14u8 = vqrshrn_n_u16(q7u16, 7);
+        d15u8 = vqrshrn_n_u16(q8u16, 7);
+        d16u8 = vqrshrn_n_u16(q9u16, 7);
+        d17u8 = vqrshrn_n_u16(q10u16, 7);
+        d18u8 = vqrshrn_n_u16(q11u16, 7);
+        d19u8 = vqrshrn_n_u16(q12u16, 7);
+        d20u8 = vqrshrn_n_u16(q13u16, 7);
+        d21u8 = vqrshrn_n_u16(q14u16, 7);
+
+        d2u8 = vld1_u8(src_ptr);
+        d3u8 = vld1_u8(src_ptr + 8);
+        d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+        d5u8 = vld1_u8(src_ptr);
+        d6u8 = vld1_u8(src_ptr + 8);
+        d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+        d8u8 = vld1_u8(src_ptr);
+        d9u8 = vld1_u8(src_ptr + 8);
+        d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+        d11u8 = vld1_u8(src_ptr);
+        d12u8 = vld1_u8(src_ptr + 8);
+        d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+        q7u8 = vcombine_u8(d14u8, d15u8);
+        q8u8 = vcombine_u8(d16u8, d17u8);
+        q9u8 = vcombine_u8(d18u8, d19u8);
+        q10u8 = vcombine_u8(d20u8, d21u8);
+
+        vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
+        vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
+        vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16;
+        vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16;
+    }
+
+    // First-pass filtering for rest 5 lines
+    d14u8 = vld1_u8(src_ptr);
+    d15u8 = vld1_u8(src_ptr + 8);
+    d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+    q9u16  = vmull_u8(d2u8, d0u8);
+    q10u16 = vmull_u8(d3u8, d0u8);
+    q11u16 = vmull_u8(d5u8, d0u8);
+    q12u16 = vmull_u8(d6u8, d0u8);
+    q13u16 = vmull_u8(d8u8, d0u8);
+    q14u16 = vmull_u8(d9u8, d0u8);
+
+    d2u8  = vext_u8(d2u8, d3u8, 1);
+    d5u8  = vext_u8(d5u8, d6u8, 1);
+    d8u8  = vext_u8(d8u8, d9u8, 1);
+
+    q9u16  = vmlal_u8(q9u16, d2u8, d1u8);
+    q11u16 = vmlal_u8(q11u16, d5u8, d1u8);
+    q13u16 = vmlal_u8(q13u16, d8u8, d1u8);
+
+    d3u8  = vext_u8(d3u8, d4u8, 1);
+    d6u8  = vext_u8(d6u8, d7u8, 1);
+    d9u8  = vext_u8(d9u8, d10u8, 1);
+
+    q10u16 = vmlal_u8(q10u16, d3u8, d1u8);
+    q12u16 = vmlal_u8(q12u16, d6u8, d1u8);
+    q14u16 = vmlal_u8(q14u16, d9u8, d1u8);
+
+    q1u16 = vmull_u8(d11u8, d0u8);
+    q2u16 = vmull_u8(d12u8, d0u8);
+    q3u16 = vmull_u8(d14u8, d0u8);
+    q4u16 = vmull_u8(d15u8, d0u8);
+
+    d11u8 = vext_u8(d11u8, d12u8, 1);
+    d14u8 = vext_u8(d14u8, d15u8, 1);
+
+    q1u16 = vmlal_u8(q1u16, d11u8, d1u8);
+    q3u16 = vmlal_u8(q3u16, d14u8, d1u8);
+
+    d12u8 = vext_u8(d12u8, d13u8, 1);
+    d15u8 = vext_u8(d15u8, d16u8, 1);
+
+    q2u16 = vmlal_u8(q2u16, d12u8, d1u8);
+    q4u16 = vmlal_u8(q4u16, d15u8, d1u8);
+
+    d10u8 = vqrshrn_n_u16(q9u16, 7);
+    d11u8 = vqrshrn_n_u16(q10u16, 7);
+    d12u8 = vqrshrn_n_u16(q11u16, 7);
+    d13u8 = vqrshrn_n_u16(q12u16, 7);
+    d14u8 = vqrshrn_n_u16(q13u16, 7);
+    d15u8 = vqrshrn_n_u16(q14u16, 7);
+    d16u8 = vqrshrn_n_u16(q1u16, 7);
+    d17u8 = vqrshrn_n_u16(q2u16, 7);
+    d18u8 = vqrshrn_n_u16(q3u16, 7);
+    d19u8 = vqrshrn_n_u16(q4u16, 7);
+
+    q5u8 = vcombine_u8(d10u8, d11u8);
+    q6u8 = vcombine_u8(d12u8, d13u8);
+    q7u8 = vcombine_u8(d14u8, d15u8);
+    q8u8 = vcombine_u8(d16u8, d17u8);
+    q9u8 = vcombine_u8(d18u8, d19u8);
+
+    vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q9u8);
+
+    // secondpass_filter
+    d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+    d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+    tmpp = tmp;
+    q11u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    for (i = 4; i > 0; i--) {
+        q12u8 = vld1q_u8(tmpp); tmpp += 16;
+        q13u8 = vld1q_u8(tmpp); tmpp += 16;
+        q14u8 = vld1q_u8(tmpp); tmpp += 16;
+        q15u8 = vld1q_u8(tmpp); tmpp += 16;
+
+        q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
+        q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
+        q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
+        q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
+        q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
+        q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
+        q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
+
+        q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
+        q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
+        q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
+        q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
+        q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
+        q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
+        q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
+        q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
+
+        d2u8 = vqrshrn_n_u16(q1u16, 7);
+        d3u8 = vqrshrn_n_u16(q2u16, 7);
+        d4u8 = vqrshrn_n_u16(q3u16, 7);
+        d5u8 = vqrshrn_n_u16(q4u16, 7);
+        d6u8 = vqrshrn_n_u16(q5u16, 7);
+        d7u8 = vqrshrn_n_u16(q6u16, 7);
+        d8u8 = vqrshrn_n_u16(q7u16, 7);
+        d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+        q1u8 = vcombine_u8(d2u8, d3u8);
+        q2u8 = vcombine_u8(d4u8, d5u8);
+        q3u8 = vcombine_u8(d6u8, d7u8);
+        q4u8 = vcombine_u8(d8u8, d9u8);
+
+        q11u8 = q15u8;
+
+        vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
+        vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
+        vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
+        vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
+    }
+    return;
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ /dev/null
@@ -1,584 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_build_intra_predictors_mby_neon_func|
-    EXPORT  |vp8_build_intra_predictors_mby_s_neon_func|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0    unsigned char *y_buffer
-; r1    unsigned char *ypred_ptr
-; r2    int y_stride
-; r3    int mode
-; stack int Up
-; stack int Left
-
-|vp8_build_intra_predictors_mby_neon_func| PROC
-    push            {r4-r8, lr}
-
-    cmp             r3, #0
-    beq             case_dc_pred
-    cmp             r3, #1
-    beq             case_v_pred
-    cmp             r3, #2
-    beq             case_h_pred
-    cmp             r3, #3
-    beq             case_tm_pred
-
-case_dc_pred
-    ldr             r4, [sp, #24]       ; Up
-    ldr             r5, [sp, #28]       ; Left
-
-    ; Default the DC average to 128
-    mov             r12, #128
-    vdup.u8         q0, r12
-
-    ; Zero out running sum
-    mov             r12, #0
-
-    ; compute shift and jump
-    adds            r7, r4, r5
-    beq             skip_dc_pred_up_left
-
-    ; Load above row, if it exists
-    cmp             r4, #0
-    beq             skip_dc_pred_up
-
-    sub             r6, r0, r2
-    vld1.8          {q1}, [r6]
-    vpaddl.u8       q2, q1
-    vpaddl.u16      q3, q2
-    vpaddl.u32      q4, q3
-
-    vmov.32         r4, d8[0]
-    vmov.32         r6, d9[0]
-
-    add             r12, r4, r6
-
-    ; Move back to interger registers
-
-skip_dc_pred_up
-
-    cmp             r5, #0
-    beq             skip_dc_pred_left
-
-    sub             r0, r0, #1
-
-    ; Load left row, if it exists
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0]
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-skip_dc_pred_left
-    add             r7, r7, #3          ; Shift
-    sub             r4, r7, #1
-    mov             r5, #1
-    add             r12, r12, r5, lsl r4
-    mov             r5, r12, lsr r7     ; expected_dc
-
-    vdup.u8         q0, r5
-
-skip_dc_pred_up_left
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-
-    pop             {r4-r8,pc}
-case_v_pred
-    ; Copy down above row
-    sub             r6, r0, r2
-    vld1.8          {q0}, [r6]
-
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q0}, [r1]!
-    pop             {r4-r8,pc}
-
-case_h_pred
-    ; Load 4x yleft_col
-    sub             r0, r0, #1
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q1}, [r1]!
-    vst1.u8         {q2}, [r1]!
-    vst1.u8         {q3}, [r1]!
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q1}, [r1]!
-    vst1.u8         {q2}, [r1]!
-    vst1.u8         {q3}, [r1]!
-
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q1}, [r1]!
-    vst1.u8         {q2}, [r1]!
-    vst1.u8         {q3}, [r1]!
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q1}, [r1]!
-    vst1.u8         {q2}, [r1]!
-    vst1.u8         {q3}, [r1]!
-
-    pop             {r4-r8,pc}
-
-case_tm_pred
-    ; Load yabove_row
-    sub             r3, r0, r2
-    vld1.8          {q8}, [r3]
-
-    ; Load ytop_left
-    sub             r3, r3, #1
-    ldrb            r7, [r3]
-
-    vdup.u16        q7, r7
-
-    ; Compute yabove_row - ytop_left
-    mov             r3, #1
-    vdup.u8         q0, r3
-
-    vmull.u8        q4, d16, d0
-    vmull.u8        q5, d17, d0
-
-    vsub.s16        q4, q4, q7
-    vsub.s16        q5, q5, q7
-
-    ; Load 4x yleft_col
-    sub             r0, r0, #1
-    mov             r12, #4
-
-case_tm_pred_loop
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u16        q0, r3
-    vdup.u16        q1, r4
-    vdup.u16        q2, r5
-    vdup.u16        q3, r6
-
-    vqadd.s16       q8, q0, q4
-    vqadd.s16       q9, q0, q5
-
-    vqadd.s16       q10, q1, q4
-    vqadd.s16       q11, q1, q5
-
-    vqadd.s16       q12, q2, q4
-    vqadd.s16       q13, q2, q5
-
-    vqadd.s16       q14, q3, q4
-    vqadd.s16       q15, q3, q5
-
-    vqshrun.s16     d0, q8, #0
-    vqshrun.s16     d1, q9, #0
-
-    vqshrun.s16     d2, q10, #0
-    vqshrun.s16     d3, q11, #0
-
-    vqshrun.s16     d4, q12, #0
-    vqshrun.s16     d5, q13, #0
-
-    vqshrun.s16     d6, q14, #0
-    vqshrun.s16     d7, q15, #0
-
-    vst1.u8         {q0}, [r1]!
-    vst1.u8         {q1}, [r1]!
-    vst1.u8         {q2}, [r1]!
-    vst1.u8         {q3}, [r1]!
-
-    subs            r12, r12, #1
-    bne             case_tm_pred_loop
-
-    pop             {r4-r8,pc}
-
-    ENDP
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; r0    unsigned char *y_buffer
-; r1    unsigned char *ypred_ptr
-; r2    int y_stride
-; r3    int mode
-; stack int Up
-; stack int Left
-
-|vp8_build_intra_predictors_mby_s_neon_func| PROC
-    push            {r4-r8, lr}
-
-    mov             r1, r0      ;   unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
-
-    cmp             r3, #0
-    beq             case_dc_pred_s
-    cmp             r3, #1
-    beq             case_v_pred_s
-    cmp             r3, #2
-    beq             case_h_pred_s
-    cmp             r3, #3
-    beq             case_tm_pred_s
-
-case_dc_pred_s
-    ldr             r4, [sp, #24]       ; Up
-    ldr             r5, [sp, #28]       ; Left
-
-    ; Default the DC average to 128
-    mov             r12, #128
-    vdup.u8         q0, r12
-
-    ; Zero out running sum
-    mov             r12, #0
-
-    ; compute shift and jump
-    adds            r7, r4, r5
-    beq             skip_dc_pred_up_left_s
-
-    ; Load above row, if it exists
-    cmp             r4, #0
-    beq             skip_dc_pred_up_s
-
-    sub             r6, r0, r2
-    vld1.8          {q1}, [r6]
-    vpaddl.u8       q2, q1
-    vpaddl.u16      q3, q2
-    vpaddl.u32      q4, q3
-
-    vmov.32         r4, d8[0]
-    vmov.32         r6, d9[0]
-
-    add             r12, r4, r6
-
-    ; Move back to interger registers
-
-skip_dc_pred_up_s
-
-    cmp             r5, #0
-    beq             skip_dc_pred_left_s
-
-    sub             r0, r0, #1
-
-    ; Load left row, if it exists
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0]
-
-    add             r12, r12, r3
-    add             r12, r12, r4
-    add             r12, r12, r5
-    add             r12, r12, r6
-
-skip_dc_pred_left_s
-    add             r7, r7, #3          ; Shift
-    sub             r4, r7, #1
-    mov             r5, #1
-    add             r12, r12, r5, lsl r4
-    mov             r5, r12, lsr r7     ; expected_dc
-
-    vdup.u8         q0, r5
-
-skip_dc_pred_up_left_s
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-
-    pop             {r4-r8,pc}
-case_v_pred_s
-    ; Copy down above row
-    sub             r6, r0, r2
-    vld1.8          {q0}, [r6]
-
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q0}, [r1], r2
-    pop             {r4-r8,pc}
-
-case_h_pred_s
-    ; Load 4x yleft_col
-    sub             r0, r0, #1
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q1}, [r1], r2
-    vst1.u8         {q2}, [r1], r2
-    vst1.u8         {q3}, [r1], r2
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q1}, [r1], r2
-    vst1.u8         {q2}, [r1], r2
-    vst1.u8         {q3}, [r1], r2
-
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q1}, [r1], r2
-    vst1.u8         {q2}, [r1], r2
-    vst1.u8         {q3}, [r1], r2
-
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u8         q0, r3
-    vdup.u8         q1, r4
-    vdup.u8         q2, r5
-    vdup.u8         q3, r6
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q1}, [r1], r2
-    vst1.u8         {q2}, [r1], r2
-    vst1.u8         {q3}, [r1], r2
-
-    pop             {r4-r8,pc}
-
-case_tm_pred_s
-    ; Load yabove_row
-    sub             r3, r0, r2
-    vld1.8          {q8}, [r3]
-
-    ; Load ytop_left
-    sub             r3, r3, #1
-    ldrb            r7, [r3]
-
-    vdup.u16        q7, r7
-
-    ; Compute yabove_row - ytop_left
-    mov             r3, #1
-    vdup.u8         q0, r3
-
-    vmull.u8        q4, d16, d0
-    vmull.u8        q5, d17, d0
-
-    vsub.s16        q4, q4, q7
-    vsub.s16        q5, q5, q7
-
-    ; Load 4x yleft_col
-    sub             r0, r0, #1
-    mov             r12, #4
-
-case_tm_pred_loop_s
-    ldrb            r3, [r0], r2
-    ldrb            r4, [r0], r2
-    ldrb            r5, [r0], r2
-    ldrb            r6, [r0], r2
-    vdup.u16        q0, r3
-    vdup.u16        q1, r4
-    vdup.u16        q2, r5
-    vdup.u16        q3, r6
-
-    vqadd.s16       q8, q0, q4
-    vqadd.s16       q9, q0, q5
-
-    vqadd.s16       q10, q1, q4
-    vqadd.s16       q11, q1, q5
-
-    vqadd.s16       q12, q2, q4
-    vqadd.s16       q13, q2, q5
-
-    vqadd.s16       q14, q3, q4
-    vqadd.s16       q15, q3, q5
-
-    vqshrun.s16     d0, q8, #0
-    vqshrun.s16     d1, q9, #0
-
-    vqshrun.s16     d2, q10, #0
-    vqshrun.s16     d3, q11, #0
-
-    vqshrun.s16     d4, q12, #0
-    vqshrun.s16     d5, q13, #0
-
-    vqshrun.s16     d6, q14, #0
-    vqshrun.s16     d7, q15, #0
-
-    vst1.u8         {q0}, [r1], r2
-    vst1.u8         {q1}, [r1], r2
-    vst1.u8         {q2}, [r1], r2
-    vst1.u8         {q3}, [r1], r2
-
-    subs            r12, r12, #1
-    bne             case_tm_pred_loop_s
-
-    pop             {r4-r8,pc}
-
-    ENDP
-
-
-    END
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/copymem16x16_neon.asm
+++ /dev/null
@@ -1,59 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_copy_mem16x16_neon|
-    ; ARM
-    ; REQUIRE8
-    ; PRESERVE8
-
-    AREA    Block, CODE, READONLY ; name this block of code
-;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem16x16_neon| PROC
-
-    vld1.u8     {q0}, [r0], r1
-    vld1.u8     {q1}, [r0], r1
-    vld1.u8     {q2}, [r0], r1
-    vst1.u8     {q0}, [r2], r3
-    vld1.u8     {q3}, [r0], r1
-    vst1.u8     {q1}, [r2], r3
-    vld1.u8     {q4}, [r0], r1
-    vst1.u8     {q2}, [r2], r3
-    vld1.u8     {q5}, [r0], r1
-    vst1.u8     {q3}, [r2], r3
-    vld1.u8     {q6}, [r0], r1
-    vst1.u8     {q4}, [r2], r3
-    vld1.u8     {q7}, [r0], r1
-    vst1.u8     {q5}, [r2], r3
-    vld1.u8     {q8}, [r0], r1
-    vst1.u8     {q6}, [r2], r3
-    vld1.u8     {q9}, [r0], r1
-    vst1.u8     {q7}, [r2], r3
-    vld1.u8     {q10}, [r0], r1
-    vst1.u8     {q8}, [r2], r3
-    vld1.u8     {q11}, [r0], r1
-    vst1.u8     {q9}, [r2], r3
-    vld1.u8     {q12}, [r0], r1
-    vst1.u8     {q10}, [r2], r3
-    vld1.u8     {q13}, [r0], r1
-    vst1.u8     {q11}, [r2], r3
-    vld1.u8     {q14}, [r0], r1
-    vst1.u8     {q12}, [r2], r3
-    vld1.u8     {q15}, [r0], r1
-    vst1.u8     {q13}, [r2], r3
-    vst1.u8     {q14}, [r2], r3
-    vst1.u8     {q15}, [r2], r3
-
-    mov     pc, lr
-
-    ENDP  ; |vp8_copy_mem16x16_neon|
-
-    END
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/copymem8x4_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_copy_mem8x4_neon|
-    ; ARM
-    ; REQUIRE8
-    ; PRESERVE8
-
-    AREA    Block, CODE, READONLY ; name this block of code
-;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem8x4_neon| PROC
-    vld1.u8     {d0}, [r0], r1
-    vld1.u8     {d1}, [r0], r1
-    vst1.u8     {d0}, [r2], r3
-    vld1.u8     {d2}, [r0], r1
-    vst1.u8     {d1}, [r2], r3
-    vld1.u8     {d3}, [r0], r1
-    vst1.u8     {d2}, [r2], r3
-    vst1.u8     {d3}, [r2], r3
-
-    mov     pc, lr
-
-    ENDP  ; |vp8_copy_mem8x4_neon|
-
-    END
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/copymem8x8_neon.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_copy_mem8x8_neon|
-    ; ARM
-    ; REQUIRE8
-    ; PRESERVE8
-
-    AREA    Block, CODE, READONLY ; name this block of code
-;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem8x8_neon| PROC
-
-    vld1.u8     {d0}, [r0], r1
-    vld1.u8     {d1}, [r0], r1
-    vst1.u8     {d0}, [r2], r3
-    vld1.u8     {d2}, [r0], r1
-    vst1.u8     {d1}, [r2], r3
-    vld1.u8     {d3}, [r0], r1
-    vst1.u8     {d2}, [r2], r3
-    vld1.u8     {d4}, [r0], r1
-    vst1.u8     {d3}, [r2], r3
-    vld1.u8     {d5}, [r0], r1
-    vst1.u8     {d4}, [r2], r3
-    vld1.u8     {d6}, [r0], r1
-    vst1.u8     {d5}, [r2], r3
-    vld1.u8     {d7}, [r0], r1
-    vst1.u8     {d6}, [r2], r3
-    vst1.u8     {d7}, [r2], r3
-
-    mov     pc, lr
-
-    ENDP  ; |vp8_copy_mem8x8_neon|
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/copymem_neon.c
@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_copy_mem8x4_neon(
+        unsigned char *src,
+        int src_stride,
+        unsigned char *dst,
+        int dst_stride) {
+    uint8x8_t vtmp;
+    int r;
+
+    for (r = 0; r < 4; r++) {
+        vtmp = vld1_u8(src);
+        vst1_u8(dst, vtmp);
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
+void vp8_copy_mem8x8_neon(
+        unsigned char *src,
+        int src_stride,
+        unsigned char *dst,
+        int dst_stride) {
+    uint8x8_t vtmp;
+    int r;
+
+    for (r = 0; r < 8; r++) {
+        vtmp = vld1_u8(src);
+        vst1_u8(dst, vtmp);
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
+void vp8_copy_mem16x16_neon(
+        unsigned char *src,
+        int src_stride,
+        unsigned char *dst,
+        int dst_stride) {
+    int r;
+    uint8x16_t qtmp;
+
+    for (r = 0; r < 16; r++) {
+        qtmp = vld1q_u8(src);
+        vst1q_u8(dst, qtmp);
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm
+++ /dev/null
@@ -1,54 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dc_only_idct_add_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
-;                            int pred_stride, unsigned char *dst_ptr,
-;                            int dst_stride)
-
-; r0  input_dc
-; r1  pred_ptr
-; r2  pred_stride
-; r3  dst_ptr
-; sp  dst_stride
-
-|vp8_dc_only_idct_add_neon| PROC
-    add             r0, r0, #4
-    asr             r0, r0, #3
-    ldr             r12, [sp]
-    vdup.16         q0, r0
-
-    vld1.32         {d2[0]}, [r1], r2
-    vld1.32         {d2[1]}, [r1], r2
-    vld1.32         {d4[0]}, [r1], r2
-    vld1.32         {d4[1]}, [r1]
-
-    vaddw.u8        q1, q0, d2
-    vaddw.u8        q2, q0, d4
-
-    vqmovun.s16     d2, q1
-    vqmovun.s16     d4, q2
-
-    vst1.32         {d2[0]}, [r3], r12
-    vst1.32         {d2[1]}, [r3], r12
-    vst1.32         {d4[0]}, [r3], r12
-    vst1.32         {d4[1]}, [r3]
-
-    bx              lr
-
-    ENDP
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dc_only_idct_add_neon(
+        int16_t input_dc,
+        unsigned char *pred_ptr,
+        int pred_stride,
+        unsigned char *dst_ptr,
+        int dst_stride) {
+    int i;
+    uint16_t a1 = ((input_dc + 4) >> 3);
+    uint32x2_t d2u32 = vdup_n_u32(0);
+    uint8x8_t d2u8;
+    uint16x8_t q1u16;
+    uint16x8_t qAdd;
+
+    qAdd = vdupq_n_u16(a1);
+
+    for (i = 0; i < 2; i++) {
+        d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
+        pred_ptr += pred_stride;
+        d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
+        pred_ptr += pred_stride;
+
+        q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
+        d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
+        dst_ptr += dst_stride;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
+        dst_ptr += dst_stride;
+    }
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/dequant_idct_neon.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_idct_add_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_add_neon(short *input, short *dq,
-;                           unsigned char *dest, int stride)
-; r0    short *input,
-; r1    short *dq,
-; r2    unsigned char *dest
-; r3    int stride
-
-|vp8_dequant_idct_add_neon| PROC
-    vld1.16         {q3, q4}, [r0]
-    vld1.16         {q5, q6}, [r1]
-
-    add             r1, r2, r3              ; r1 = dest + stride
-    lsl             r3, #1                  ; 2x stride
-
-    vld1.32         {d14[0]}, [r2], r3
-    vld1.32         {d14[1]}, [r1], r3
-    vld1.32         {d15[0]}, [r2]
-    vld1.32         {d15[1]}, [r1]
-
-    adr             r12, cospi8sqrt2minus1  ; pointer to the first constant
-
-    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
-    vmul.i16        q2, q4, q6
-
-;|short_idct4x4llm_neon| PROC
-    vld1.16         {d0}, [r12]
-    vswp            d3, d4                  ;q2(vp[4] vp[12])
-
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2
-    vqadd.s16       q4, q4, q2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-; memset(input, 0, 32) -- 32bytes
-    vmov.i16        q14, #0
-
-    vswp            d3, d4
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vmov            q15, q14
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2
-    vqadd.s16       q4, q4, q2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vst1.16         {q14, q15}, [r0]
-
-    vrshr.s16       d2, d2, #3
-    vrshr.s16       d3, d3, #3
-    vrshr.s16       d4, d4, #3
-    vrshr.s16       d5, d5, #3
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-    vaddw.u8        q1, q1, d14
-    vaddw.u8        q2, q2, d15
-
-    sub             r2, r2, r3
-    sub             r1, r1, r3
-
-    vqmovun.s16     d0, q1
-    vqmovun.s16     d1, q2
-
-    vst1.32         {d0[0]}, [r2], r3
-    vst1.32         {d0[1]}, [r1], r3
-    vst1.32         {d1[0]}, [r2]
-    vst1.32         {d1[1]}, [r1]
-
-    bx             lr
-
-    ENDP           ; |vp8_dequant_idct_add_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2       DCD 0x8a8c8a8c
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/dequant_idct_neon.c
@@ -0,0 +1,142 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2       = 35468;
+
+void vp8_dequant_idct_add_neon(
+        int16_t *input,
+        int16_t *dq,
+        unsigned char *dst,
+        int stride) {
+    unsigned char *dst0;
+    int32x2_t d14, d15;
+    int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+    int16x8_t q1, q2, q3, q4, q5, q6;
+    int16x8_t qEmpty = vdupq_n_s16(0);
+    int32x2x2_t d2tmp0, d2tmp1;
+    int16x4x2_t d2tmp2, d2tmp3;
+
+    d14 = d15 = vdup_n_s32(0);
+
+    // load input
+    q3 = vld1q_s16(input);
+    vst1q_s16(input, qEmpty);
+    input += 8;
+    q4 = vld1q_s16(input);
+    vst1q_s16(input, qEmpty);
+
+    // load dq
+    q5 = vld1q_s16(dq);
+    dq += 8;
+    q6 = vld1q_s16(dq);
+
+    // load src from dst
+    dst0 = dst;
+    d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
+    dst0 += stride;
+    d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
+    dst0 += stride;
+    d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
+    dst0 += stride;
+    d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
+
+    q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
+                                         vreinterpretq_u16_s16(q5)));
+    q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
+                                         vreinterpretq_u16_s16(q6)));
+
+    d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
+    d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
+
+    q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
+
+    q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+    q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+    q3 = vshrq_n_s16(q3, 1);
+    q4 = vshrq_n_s16(q4, 1);
+
+    q3 = vqaddq_s16(q3, q2);
+    q4 = vqaddq_s16(q4, q2);
+
+    d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+    d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+                      vreinterpret_s16_s32(d2tmp1.val[0]));
+    d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+                      vreinterpret_s16_s32(d2tmp1.val[1]));
+
+    // loop 2
+    q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
+
+    q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+    q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+    d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
+    d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
+
+    q3 = vshrq_n_s16(q3, 1);
+    q4 = vshrq_n_s16(q4, 1);
+
+    q3 = vqaddq_s16(q3, q2);
+    q4 = vqaddq_s16(q4, q2);
+
+    d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+    d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    d2 = vrshr_n_s16(d2, 3);
+    d3 = vrshr_n_s16(d3, 3);
+    d4 = vrshr_n_s16(d4, 3);
+    d5 = vrshr_n_s16(d5, 3);
+
+    d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+                      vreinterpret_s16_s32(d2tmp1.val[0]));
+    d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+                      vreinterpret_s16_s32(d2tmp1.val[1]));
+
+    q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
+    q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
+
+    q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
+                                        vreinterpret_u8_s32(d14)));
+    q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
+                                        vreinterpret_u8_s32(d15)));
+
+    d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
+    d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
+
+    dst0 = dst;
+    vst1_lane_s32((int32_t *)dst0, d14, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst0, d14, 1);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst0, d15, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst0, d15, 1);
+    return;
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/dequantizeb_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequantize_b_loop_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0    short *Q,
-; r1    short *DQC
-; r2    short *DQ
-|vp8_dequantize_b_loop_neon| PROC
-    vld1.16         {q0, q1}, [r0]
-    vld1.16         {q2, q3}, [r1]
-
-    vmul.i16        q4, q0, q2
-    vmul.i16        q5, q1, q3
-
-    vst1.16         {q4, q5}, [r2]
-
-    bx             lr
-
-    ENDP
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
@@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "vp8/common/blockd.h"
+
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
+    int16x8x2_t qQ, qDQC, qDQ;
+
+    qQ   = vld2q_s16(d->qcoeff);
+    qDQC = vld2q_s16(DQC);
+
+    qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
+    qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
+
+    vst2q_s16(d->dqcoeff, qDQ);
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |idct_dequant_0_2x_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;void idct_dequant_0_2x_neon(short *q, short dq,
-;                            unsigned char *dst, int stride);
-; r0   *q
-; r1   dq
-; r2   *dst
-; r3   stride
-|idct_dequant_0_2x_neon| PROC
-    push            {r4, r5}
-
-    add             r12, r2, #4
-    vld1.32         {d2[0]}, [r2], r3
-    vld1.32         {d8[0]}, [r12], r3
-    vld1.32         {d2[1]}, [r2], r3
-    vld1.32         {d8[1]}, [r12], r3
-    vld1.32         {d4[0]}, [r2], r3
-    vld1.32         {d10[0]}, [r12], r3
-    vld1.32         {d4[1]}, [r2], r3
-    vld1.32         {d10[1]}, [r12], r3
-
-    ldrh            r12, [r0]               ; lo q
-    ldrh            r4, [r0, #32]           ; hi q
-    mov             r5, #0
-    strh            r5, [r0]
-    strh            r5, [r0, #32]
-
-    sxth            r12, r12                ; lo
-    mul             r0, r12, r1
-    add             r0, r0, #4
-    asr             r0, r0, #3
-    vdup.16         q0, r0
-    sxth            r4, r4                  ; hi
-    mul             r0, r4, r1
-    add             r0, r0, #4
-    asr             r0, r0, #3
-    vdup.16         q3, r0
-
-    vaddw.u8        q1, q0, d2              ; lo
-    vaddw.u8        q2, q0, d4
-    vaddw.u8        q4, q3, d8              ; hi
-    vaddw.u8        q5, q3, d10
-
-    sub             r2, r2, r3, lsl #2      ; dst - 4*stride
-    add             r0, r2, #4
-
-    vqmovun.s16     d2, q1                  ; lo
-    vqmovun.s16     d4, q2
-    vqmovun.s16     d8, q4                  ; hi
-    vqmovun.s16     d10, q5
-
-    vst1.32         {d2[0]}, [r2], r3       ; lo
-    vst1.32         {d8[0]}, [r0], r3       ; hi
-    vst1.32         {d2[1]}, [r2], r3
-    vst1.32         {d8[1]}, [r0], r3
-    vst1.32         {d4[0]}, [r2], r3
-    vst1.32         {d10[0]}, [r0], r3
-    vst1.32         {d4[1]}, [r2]
-    vst1.32         {d10[1]}, [r0]
-
-    pop             {r4, r5}
-    bx              lr
-
-    ENDP            ; |idct_dequant_0_2x_neon|
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
@@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void idct_dequant_0_2x_neon(
+        int16_t *q,
+        int16_t dq,
+        unsigned char *dst,
+        int stride) {
+    unsigned char *dst0;
+    int i, a0, a1;
+    int16x8x2_t q2Add;
+    int32x2_t d2s32, d4s32;
+    uint8x8_t d2u8, d4u8;
+    uint16x8_t q1u16, q2u16;
+
+    a0 = ((q[0] * dq) + 4) >> 3;
+    a1 = ((q[16] * dq) + 4) >> 3;
+    q[0] = q[16] = 0;
+    q2Add.val[0] = vdupq_n_s16((int16_t)a0);
+    q2Add.val[1] = vdupq_n_s16((int16_t)a1);
+
+    for (i = 0; i < 2; i++, dst += 4) {
+        dst0 = dst;
+        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
+        dst0 += stride;
+        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
+        dst0 += stride;
+        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
+        dst0 += stride;
+        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
+
+        q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+                         vreinterpret_u8_s32(d2s32));
+        q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+                         vreinterpret_u8_s32(d4s32));
+
+        d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+        d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
+
+        d2s32 = vreinterpret_s32_u8(d2u8);
+        d4s32 = vreinterpret_s32_u8(d4u8);
+
+        dst0 = dst;
+        vst1_lane_s32((int32_t *)dst0, d2s32, 0);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d2s32, 1);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d4s32, 0);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d4s32, 1);
+    }
+    return;
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
+++ /dev/null
@@ -1,196 +0,0 @@
-;
-;  Copyright (c) 2010 The Webm project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |idct_dequant_full_2x_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;void idct_dequant_full_2x_neon(short *q, short *dq,
-;                               unsigned char *dst, int stride);
-; r0    *q,
-; r1    *dq,
-; r2    *dst
-; r3    stride
-|idct_dequant_full_2x_neon| PROC
-    vld1.16         {q0, q1}, [r1]          ; dq (same l/r)
-    vld1.16         {q2, q3}, [r0]          ; l q
-    add             r0, r0, #32
-    vld1.16         {q4, q5}, [r0]          ; r q
-    add             r12, r2, #4
-
-    ; interleave the predictors
-    vld1.32         {d28[0]}, [r2],  r3     ; l pre
-    vld1.32         {d28[1]}, [r12], r3     ; r pre
-    vld1.32         {d29[0]}, [r2],  r3
-    vld1.32         {d29[1]}, [r12], r3
-    vld1.32         {d30[0]}, [r2],  r3
-    vld1.32         {d30[1]}, [r12], r3
-    vld1.32         {d31[0]}, [r2],  r3
-    vld1.32         {d31[1]}, [r12]
-
-    adr             r1, cospi8sqrt2minus1   ; pointer to the first constant
-
-    ; dequant: q[i] = q[i] * dq[i]
-    vmul.i16        q2, q2, q0
-    vmul.i16        q3, q3, q1
-    vmul.i16        q4, q4, q0
-    vmul.i16        q5, q5, q1
-
-    vld1.16         {d0}, [r1]
-
-    ; q2: l0r0  q3: l8r8
-    ; q4: l4r4  q5: l12r12
-    vswp            d5, d8
-    vswp            d7, d10
-
-    ; _CONSTANTS_ * 4,12 >> 16
-    ; q6:  4 * sinpi : c1/temp1
-    ; q7: 12 * sinpi : d1/temp2
-    ; q8:  4 * cospi
-    ; q9: 12 * cospi
-    vqdmulh.s16     q6, q4, d0[2]           ; sinpi8sqrt2
-    vqdmulh.s16     q7, q5, d0[2]
-    vqdmulh.s16     q8, q4, d0[0]           ; cospi8sqrt2minus1
-    vqdmulh.s16     q9, q5, d0[0]
-
-    vqadd.s16       q10, q2, q3             ; a1 = 0 + 8
-    vqsub.s16       q11, q2, q3             ; b1 = 0 - 8
-
-    ; vqdmulh only accepts signed values. this was a problem because
-    ; our constant had the high bit set, and was treated as a negative value.
-    ; vqdmulh also doubles the value before it shifts by 16. we need to
-    ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
-    ; so we can shift the constant without losing precision. this avoids
-    ; shift again afterward, but also avoids the sign issue. win win!
-    ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
-    ; pre-shift it
-    vshr.s16        q8, q8, #1
-    vshr.s16        q9, q9, #1
-
-    ; q4:  4 +  4 * cospi : d1/temp1
-    ; q5: 12 + 12 * cospi : c1/temp2
-    vqadd.s16       q4, q4, q8
-    vqadd.s16       q5, q5, q9
-
-    ; c1 = temp1 - temp2
-    ; d1 = temp1 + temp2
-    vqsub.s16       q2, q6, q5
-    vqadd.s16       q3, q4, q7
-
-    ; [0]: a1+d1
-    ; [1]: b1+c1
-    ; [2]: b1-c1
-    ; [3]: a1-d1
-    vqadd.s16       q4, q10, q3
-    vqadd.s16       q5, q11, q2
-    vqsub.s16       q6, q11, q2
-    vqsub.s16       q7, q10, q3
-
-    ; rotate
-    vtrn.32         q4, q6
-    vtrn.32         q5, q7
-    vtrn.16         q4, q5
-    vtrn.16         q6, q7
-    ; idct loop 2
-    ; q4: l 0, 4, 8,12 r 0, 4, 8,12
-    ; q5: l 1, 5, 9,13 r 1, 5, 9,13
-    ; q6: l 2, 6,10,14 r 2, 6,10,14
-    ; q7: l 3, 7,11,15 r 3, 7,11,15
-
-    ; q8:  1 * sinpi : c1/temp1
-    ; q9:  3 * sinpi : d1/temp2
-    ; q10: 1 * cospi
-    ; q11: 3 * cospi
-    vqdmulh.s16     q8, q5, d0[2]           ; sinpi8sqrt2
-    vqdmulh.s16     q9, q7, d0[2]
-    vqdmulh.s16     q10, q5, d0[0]          ; cospi8sqrt2minus1
-    vqdmulh.s16     q11, q7, d0[0]
-
-    vqadd.s16       q2, q4, q6             ; a1 = 0 + 2
-    vqsub.s16       q3, q4, q6             ; b1 = 0 - 2
-
-    ; see note on shifting above
-    vshr.s16        q10, q10, #1
-    vshr.s16        q11, q11, #1
-
-    ; q10: 1 + 1 * cospi : d1/temp1
-    ; q11: 3 + 3 * cospi : c1/temp2
-    vqadd.s16       q10, q5, q10
-    vqadd.s16       q11, q7, q11
-
-    ; q8: c1 = temp1 - temp2
-    ; q9: d1 = temp1 + temp2
-    vqsub.s16       q8, q8, q11
-    vqadd.s16       q9, q10, q9
-
-    ; a1+d1
-    ; b1+c1
-    ; b1-c1
-    ; a1-d1
-    vqadd.s16       q4, q2, q9
-    vqadd.s16       q5, q3, q8
-    vqsub.s16       q6, q3, q8
-    vqsub.s16       q7, q2, q9
-
-    ; +4 >> 3 (rounding)
-    vrshr.s16       q4, q4, #3              ; lo
-    vrshr.s16       q5, q5, #3
-    vrshr.s16       q6, q6, #3              ; hi
-    vrshr.s16       q7, q7, #3
-
-    vtrn.32         q4, q6
-    vtrn.32         q5, q7
-    vtrn.16         q4, q5
-    vtrn.16         q6, q7
-
-    ; adding pre
-    ; input is still packed. pre was read interleaved
-    vaddw.u8        q4, q4, d28
-    vaddw.u8        q5, q5, d29
-    vaddw.u8        q6, q6, d30
-    vaddw.u8        q7, q7, d31
-
-    vmov.i16        q14, #0
-    vmov            q15, q14
-    vst1.16         {q14, q15}, [r0]        ; write over high input
-    sub             r0, r0, #32
-    vst1.16         {q14, q15}, [r0]        ; write over low input
-
-    sub             r2, r2, r3, lsl #2      ; dst - 4*stride
-    add             r1, r2, #4              ; hi
-
-    ;saturate and narrow
-    vqmovun.s16     d0, q4                  ; lo
-    vqmovun.s16     d1, q5
-    vqmovun.s16     d2, q6                  ; hi
-    vqmovun.s16     d3, q7
-
-    vst1.32         {d0[0]}, [r2], r3       ; lo
-    vst1.32         {d0[1]}, [r1], r3       ; hi
-    vst1.32         {d1[0]}, [r2], r3
-    vst1.32         {d1[1]}, [r1], r3
-    vst1.32         {d2[0]}, [r2], r3
-    vst1.32         {d2[1]}, [r1], r3
-    vst1.32         {d3[0]}, [r2]
-    vst1.32         {d3[1]}, [r1]
-
-    bx             lr
-
-    ENDP           ; |idct_dequant_full_2x_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b
-; because the lowest bit in 0x8a8c is 0, we can pre-shift this
-sinpi8sqrt2       DCD 0x4546
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c
@@ -0,0 +1,185 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2       = 17734;
+// because the lowest bit in 0x8a8c is 0, we can pre-shift this
+
+void idct_dequant_full_2x_neon(
+        int16_t *q,
+        int16_t *dq,
+        unsigned char *dst,
+        int stride) {
+    unsigned char *dst0, *dst1;
+    int32x2_t d28, d29, d30, d31;
+    int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
+    int16x8_t qEmpty = vdupq_n_s16(0);
+    int32x4x2_t q2tmp0, q2tmp1;
+    int16x8x2_t q2tmp2, q2tmp3;
+    int16x4_t dLow0, dLow1, dHigh0, dHigh1;
+
+    d28 = d29 = d30 = d31 = vdup_n_s32(0);
+
+    // load dq
+    q0 = vld1q_s16(dq);
+    dq += 8;
+    q1 = vld1q_s16(dq);
+
+    // load q
+    q2 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+    q += 8;
+    q3 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+    q += 8;
+    q4 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+    q += 8;
+    q5 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+
+    // load src from dst
+    dst0 = dst;
+    dst1 = dst + 4;
+    d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
+    dst0 += stride;
+    d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
+    dst1 += stride;
+    d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
+    dst0 += stride;
+    d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
+    dst1 += stride;
+
+    d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
+    dst0 += stride;
+    d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
+    dst1 += stride;
+    d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
+    d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
+
+    q2 = vmulq_s16(q2, q0);
+    q3 = vmulq_s16(q3, q1);
+    q4 = vmulq_s16(q4, q0);
+    q5 = vmulq_s16(q5, q1);
+
+    // vswp
+    dLow0 = vget_low_s16(q2);
+    dHigh0 = vget_high_s16(q2);
+    dLow1 = vget_low_s16(q4);
+    dHigh1 = vget_high_s16(q4);
+    q2 = vcombine_s16(dLow0, dLow1);
+    q4 = vcombine_s16(dHigh0, dHigh1);
+
+    dLow0 = vget_low_s16(q3);
+    dHigh0 = vget_high_s16(q3);
+    dLow1 = vget_low_s16(q5);
+    dHigh1 = vget_high_s16(q5);
+    q3 = vcombine_s16(dLow0, dLow1);
+    q5 = vcombine_s16(dHigh0, dHigh1);
+
+    q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
+    q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
+    q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
+    q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
+
+    q10 = vqaddq_s16(q2, q3);
+    q11 = vqsubq_s16(q2, q3);
+
+    q8 = vshrq_n_s16(q8, 1);
+    q9 = vshrq_n_s16(q9, 1);
+
+    q4 = vqaddq_s16(q4, q8);
+    q5 = vqaddq_s16(q5, q9);
+
+    q2 = vqsubq_s16(q6, q5);
+    q3 = vqaddq_s16(q7, q4);
+
+    q4 = vqaddq_s16(q10, q3);
+    q5 = vqaddq_s16(q11, q2);
+    q6 = vqsubq_s16(q11, q2);
+    q7 = vqsubq_s16(q10, q3);
+
+    q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
+    q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
+    q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
+                       vreinterpretq_s16_s32(q2tmp1.val[0]));
+    q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
+                       vreinterpretq_s16_s32(q2tmp1.val[1]));
+
+    // loop 2
+    q8  = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
+    q9  = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
+    q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
+    q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
+
+    q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
+    q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
+
+    q10 = vshrq_n_s16(q10, 1);
+    q11 = vshrq_n_s16(q11, 1);
+
+    q10 = vqaddq_s16(q2tmp2.val[1], q10);
+    q11 = vqaddq_s16(q2tmp3.val[1], q11);
+
+    q8 = vqsubq_s16(q8, q11);
+    q9 = vqaddq_s16(q9, q10);
+
+    q4 = vqaddq_s16(q2, q9);
+    q5 = vqaddq_s16(q3, q8);
+    q6 = vqsubq_s16(q3, q8);
+    q7 = vqsubq_s16(q2, q9);
+
+    q4 = vrshrq_n_s16(q4, 3);
+    q5 = vrshrq_n_s16(q5, 3);
+    q6 = vrshrq_n_s16(q6, 3);
+    q7 = vrshrq_n_s16(q7, 3);
+
+    q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
+    q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
+    q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
+                       vreinterpretq_s16_s32(q2tmp1.val[0]));
+    q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
+                       vreinterpretq_s16_s32(q2tmp1.val[1]));
+
+    q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
+                                          vreinterpret_u8_s32(d28)));
+    q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
+                                          vreinterpret_u8_s32(d29)));
+    q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
+                                          vreinterpret_u8_s32(d30)));
+    q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
+                                          vreinterpret_u8_s32(d31)));
+
+    d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
+    d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
+    d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
+    d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
+
+    dst0 = dst;
+    dst1 = dst + 4;
+    vst1_lane_s32((int32_t *)dst0, d28, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst1, d28, 1);
+    dst1 += stride;
+    vst1_lane_s32((int32_t *)dst0, d29, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst1, d29, 1);
+    dst1 += stride;
+
+    vst1_lane_s32((int32_t *)dst0, d30, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst1, d30, 1);
+    dst1 += stride;
+    vst1_lane_s32((int32_t *)dst0, d31, 0);
+    vst1_lane_s32((int32_t *)dst1, d31, 1);
+    return;
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/iwalsh_neon.asm
+++ /dev/null
@@ -1,87 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-    EXPORT  |vp8_short_inv_walsh4x4_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-
-;short vp8_short_inv_walsh4x4_neon(short *input, short *mb_dqcoeff)
-|vp8_short_inv_walsh4x4_neon| PROC
-
-    ; read in all four lines of values: d0->d3
-    vld1.i16 {q0-q1}, [r0@128]
-
-    ; first for loop
-    vadd.s16 d4, d0, d3 ;a = [0] + [12]
-    vadd.s16 d6, d1, d2 ;b = [4] + [8]
-    vsub.s16 d5, d0, d3 ;d = [0] - [12]
-    vsub.s16 d7, d1, d2 ;c = [4] - [8]
-
-    vadd.s16 q0, q2, q3 ; a+b d+c
-    vsub.s16 q1, q2, q3 ; a-b d-c
-
-    vtrn.32 d0, d2 ;d0:  0  1  8  9
-                   ;d2:  2  3 10 11
-    vtrn.32 d1, d3 ;d1:  4  5 12 13
-                   ;d3:  6  7 14 15
-
-    vtrn.16 d0, d1 ;d0:  0  4  8 12
-                   ;d1:  1  5  9 13
-    vtrn.16 d2, d3 ;d2:  2  6 10 14
-                   ;d3:  3  7 11 15
-
-    ; second for loop
-
-    vadd.s16 d4, d0, d3 ;a = [0] + [3]
-    vadd.s16 d6, d1, d2 ;b = [1] + [2]
-    vsub.s16 d5, d0, d3 ;d = [0] - [3]
-    vsub.s16 d7, d1, d2 ;c = [1] - [2]
-
-    vmov.i16 q8, #3
-
-    vadd.s16 q0, q2, q3 ; a+b d+c
-    vsub.s16 q1, q2, q3 ; a-b d-c
-
-    vadd.i16 q0, q0, q8 ;e/f += 3
-    vadd.i16 q1, q1, q8 ;g/h += 3
-
-    vshr.s16 q0, q0, #3 ;e/f >> 3
-    vshr.s16 q1, q1, #3 ;g/h >> 3
-
-    mov      r2, #64
-    add      r3, r1, #32
-
-    vst1.i16 d0[0], [r1],r2
-    vst1.i16 d1[0], [r3],r2
-    vst1.i16 d2[0], [r1],r2
-    vst1.i16 d3[0], [r3],r2
-
-    vst1.i16 d0[1], [r1],r2
-    vst1.i16 d1[1], [r3],r2
-    vst1.i16 d2[1], [r1],r2
-    vst1.i16 d3[1], [r3],r2
-
-    vst1.i16 d0[2], [r1],r2
-    vst1.i16 d1[2], [r3],r2
-    vst1.i16 d2[2], [r1],r2
-    vst1.i16 d3[2], [r3],r2
-
-    vst1.i16 d0[3], [r1],r2
-    vst1.i16 d1[3], [r3],r2
-    vst1.i16 d2[3], [r1]
-    vst1.i16 d3[3], [r3]
-
-    bx lr
-    ENDP    ; |vp8_short_inv_walsh4x4_neon|
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/iwalsh_neon.c
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_short_inv_walsh4x4_neon(
+        int16_t *input,
+        int16_t *mb_dqcoeff) {
+    int16x8_t q0s16, q1s16, q2s16, q3s16;
+    int16x4_t d4s16, d5s16, d6s16, d7s16;
+    int16x4x2_t v2tmp0, v2tmp1;
+    int32x2x2_t v2tmp2, v2tmp3;
+    int16x8_t qAdd3;
+
+    q0s16 = vld1q_s16(input);
+    q1s16 = vld1q_s16(input + 8);
+
+    // 1st for loop
+    d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+    d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+    d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+    d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+
+    q2s16 = vcombine_s16(d4s16, d5s16);
+    q3s16 = vcombine_s16(d6s16, d7s16);
+
+    q0s16 = vaddq_s16(q2s16, q3s16);
+    q1s16 = vsubq_s16(q2s16, q3s16);
+
+    v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
+                      vreinterpret_s32_s16(vget_low_s16(q1s16)));
+    v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
+                      vreinterpret_s32_s16(vget_high_s16(q1s16)));
+    v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
+                      vreinterpret_s16_s32(v2tmp3.val[0]));
+    v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
+                      vreinterpret_s16_s32(v2tmp3.val[1]));
+
+    // 2nd for loop
+    d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
+    d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
+    d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
+    d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
+    q2s16 = vcombine_s16(d4s16, d5s16);
+    q3s16 = vcombine_s16(d6s16, d7s16);
+
+    qAdd3 = vdupq_n_s16(3);
+
+    q0s16 = vaddq_s16(q2s16, q3s16);
+    q1s16 = vsubq_s16(q2s16, q3s16);
+
+    q0s16 = vaddq_s16(q0s16, qAdd3);
+    q1s16 = vaddq_s16(q1s16, qAdd3);
+
+    q0s16 = vshrq_n_s16(q0s16, 3);
+    q1s16 = vshrq_n_s16(q1s16, 3);
+
+    // store
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
+    mb_dqcoeff += 16;
+    return;
+}
deleted file mode 100644
--- a/media/libvpx/vp8/common/arm/neon/loopfilter_neon.asm
+++ /dev/null
@@ -1,397 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
-    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
-    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
-    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
-    ARM
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *src
-; r1    int pitch
-; r2    unsigned char blimit
-; r3    unsigned char limit
-; sp    unsigned char thresh,
-|vp8_loop_filter_horizontal_edge_y_neon| PROC
-    push        {lr}
-    vdup.u8     q0, r2                     ; duplicate blimit
-    vdup.u8     q1, r3                     ; duplicate limit
-    sub         r2, r0, r1, lsl #2         ; move src pointer down by 4 lines
-    ldr         r3, [sp, #4]               ; load thresh
-    add         r12, r2, r1
-    add         r1, r1, r1
-
-    vdup.u8     q2, r3                     ; duplicate thresh
-
-    vld1.u8     {q3}, [r2@128], r1              ; p3
-    vld1.u8     {q4}, [r12@128], r1             ; p2
-    vld1.u8     {q5}, [r2@128], r1              ; p1
-    vld1.u8     {q6}, [r12@128], r1             ; p0
-    vld1.u8     {q7}, [r2@128], r1              ; q0
-    vld1.u8     {q8}, [r12@128], r1             ; q1
-    vld1.u8     {q9}, [r2@128]                  ; q2
-    vld1.u8     {q10}, [r12@128]                ; q3
-
-    sub         r2, r2, r1, lsl #1
-    sub         r12, r12, r1, lsl #1
-
-    bl          vp8_loop_filter_neon
-
-    vst1.u8     {q5}, [r2@128], r1              ; store op1
-    vst1.u8     {q6}, [r12@128], r1             ; store op0
-    vst1.u8     {q7}, [r2@128], r1              ; store oq0
-    vst1.u8     {q8}, [r12@128], r1             ; store oq1
-
-    pop         {pc}
-    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
-
-
-; r0    unsigned char *u,
-; r1    int pitch,
-; r2    unsigned char blimit
-; r3    unsigned char limit
-; sp    unsigned char thresh,
-; sp+4  unsigned char *v
-|vp8_loop_filter_horizontal_edge_uv_neon| PROC
-    push        {lr}
-    vdup.u8     q0, r2                      ; duplicate blimit
-    vdup.u8     q1, r3                      ; duplicate limit
-    ldr         r12, [sp, #4]               ; load thresh
-    ldr         r2, [sp, #8]                ; load v ptr
-    vdup.u8     q2, r12                     ; duplicate thresh
-
-    sub         r3, r0, r1, lsl #2          ; move u pointer down by 4 lines
-    sub         r12, r2, r1, lsl #2         ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r3@64], r1              ; p3
-    vld1.u8     {d7}, [r12@64], r1             ; p3
-    vld1.u8     {d8}, [r3@64], r1              ; p2
-    vld1.u8     {d9}, [r12@64], r1             ; p2
-    vld1.u8     {d10}, [r3@64], r1             ; p1
-    vld1.u8     {d11}, [r12@64], r1            ; p1
-    vld1.u8     {d12}, [r3@64], r1             ; p0
-    vld1.u8     {d13}, [r12@64], r1            ; p0
-    vld1.u8     {d14}, [r3@64], r1             ; q0
-    vld1.u8     {d15}, [r12@64], r1            ; q0
-    vld1.u8     {d16}, [r3@64], r1             ; q1
-    vld1.u8     {d17}, [r12@64], r1            ; q1
-    vld1.u8     {d18}, [r3@64], r1             ; q2
-    vld1.u8     {d19}, [r12@64], r1            ; q2
-    vld1.u8     {d20}, [r3@64]                 ; q3
-    vld1.u8     {d21}, [r12@64]                ; q3
-
-    bl          vp8_loop_filter_neon
-
-    sub         r0, r0, r1, lsl #1
-    sub         r2, r2, r1, lsl #1
-
-    vst1.u8     {d10}, [r0@64], r1             ; store u op1
-    vst1.u8     {d11}, [r2@64], r1             ; store v op1
-    vst1.u8     {d12}, [r0@64], r1             ; store u op0
-    vst1.u8     {d13}, [r2@64], r1             ; store v op0
-    vst1.u8     {d14}, [r0@64], r1             ; store u oq0
-    vst1.u8     {d15}, [r2@64], r1             ; store v oq0
-    vst1.u8     {d16}, [r0@64]                 ; store u oq1
-    vst1.u8     {d17}, [r2@64]                 ; store v oq1
-
-    pop         {pc}
-    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
-
-; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
-;                                           const signed char *flimit,
-;                                           const signed char *limit,
-;                                           const signed char *thresh,
-;                                           int count)
-; r0    unsigned char *src
-; r1    int pitch
-; r2    unsigned char blimit
-; r3    unsigned char limit
-; sp    unsigned char thresh,
-
-|vp8_loop_filter_vertical_edge_y_neon| PROC
-    push        {lr}
-    vdup.u8     q0, r2                     ; duplicate blimit
-    vdup.u8     q1, r3                     ; duplicate limit
-    sub         r2, r0, #4                 ; src ptr down by 4 columns
-    add         r1, r1, r1
-    ldr         r3, [sp, #4]               ; load thresh
-    add         r12, r2, r1, asr #1
-
-    vld1.u8     {d6}, [r2], r1
-    vld1.u8     {d8}, [r12], r1
-    vld1.u8     {d10}, [r2], r1
-    vld1.u8     {d12}, [r12], r1
-    vld1.u8     {d14}, [r2], r1
-    vld1.u8     {d16}, [r12], r1
-    vld1.u8     {d18}, [r2], r1
-    vld1.u8     {d20}, [r12], r1
-
-    vld1.u8     {d7}, [r2], r1              ; load second 8-line src data
-    vld1.u8     {d9}, [r12], r1
-    vld1.u8     {d11}, [r2], r1
-    vld1.u8     {d13}, [r12], r1
-    vld1.u8     {d15}, [r2], r1
-    vld1.u8     {d17}, [r12], r1
-    vld1.u8     {d19}, [r2]
-    vld1.u8     {d21}, [r12]
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vdup.u8     q2, r3                     ; duplicate thresh
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    bl          vp8_loop_filter_neon
-
-    vswp        d12, d11
-    vswp        d16, d13
-
-    sub         r0, r0, #2                 ; dst ptr
-
-    vswp        d14, d12
-    vswp        d16, d15
-
-    add         r12, r0, r1, asr #1
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r12], r1
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r12], r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r12], r1
-
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r12], r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r12], r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0]
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r12]
-
-    pop         {pc}
-    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
-
-; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
-;                                            const signed char *flimit,
-;                                            const signed char *limit,
-;                                            const signed char *thresh,
-;                                            unsigned char *v)
-; r0    unsigned char *u,
-; r1    int pitch,
-; r2    unsigned char blimit
-; r3    unsigned char limit
-; sp    unsigned char thresh,
-; sp+4  unsigned char *v
-|vp8_loop_filter_vertical_edge_uv_neon| PROC
-    push        {lr}
-    vdup.u8     q0, r2                      ; duplicate blimit
-    sub         r12, r0, #4                 ; move u pointer down by 4 columns
-    ldr         r2, [sp, #8]                ; load v ptr
-    vdup.u8     q1, r3                      ; duplicate limit
-    sub         r3, r2, #4                  ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r12], r1             ;load u data
-    vld1.u8     {d7}, [r3], r1              ;load v data
-    vld1.u8     {d8}, [r12], r1
-    vld1.u8     {d9}, [r3], r1
-    vld1.u8     {d10}, [r12], r1
-    vld1.u8     {d11}, [r3], r1
-    vld1.u8     {d12}, [r12], r1
-    vld1.u8     {d13}, [r3], r1
-    vld1.u8     {d14}, [r12], r1
-    vld1.u8     {d15}, [r3], r1
-    vld1.u8     {d16}, [r12], r1
-    vld1.u8     {d17}, [r3], r1
-    vld1.u8     {d18}, [r12], r1
-    vld1.u8     {d19}, [r3], r1
-    vld1.u8     {d20}, [r12]
-    vld1.u8     {d21}, [r3]
-
-    ldr        r12, [sp, #4]               ; load thresh
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vdup.u8     q2, r12                     ; duplicate thresh
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    bl          vp8_loop_filter_neon
-
-    vswp        d12, d11
-    vswp        d16, d13
-    vswp        d14, d12
-    vswp        d16, d15
-
-    sub         r0, r0, #2
-    sub         r2, r2, #2
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0]
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
-
-    pop         {pc}
-    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
-
-; void vp8_loop_filter_neon();
-; This is a helper function for the loopfilters. The invidual functions do the
-; necessary load, transpose (if necessary) and store.
-
-; r0-r3 PRESERVE
-; q0    flimit
-; q1    limit
-; q2    thresh
-; q3    p3
-; q4    p2
-; q5    p1
-; q6    p0
-; q7    q0
-; q8    q1
-; q9    q2
-; q10   q3
-|vp8_loop_filter_neon| PROC
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vmov.u8     q10, #0x80                   ; 0x80
-
-    vabd.u8     q2, q5, q8                  ; a = abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; b = abs(p0 - q0) * 2
-
-    vcge.u8     q15, q1, q15
-
-    ; vp8_filter() function
-    ; convert to signed
-    veor        q7, q7, q10                 ; qs0
-    vshr.u8     q2, q2, #1                  ; a = a / 2
-    veor        q6, q6, q10                 ; ps0
-
-    veor        q5, q5, q10                 ; ps1
-    vqadd.u8    q9, q9, q2                  ; a = b + a
-
-    veor        q8, q8, q10                 ; qs1
-
-    vmov.u8     q10, #3                     ; #3
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vcge.u8     q9, q0, q9                  ; (a > flimit * 2 + limit) * -1
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vmov.u8     q9, #4                      ; #4
-
-    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d2, q2
-    vqmovn.s16  d3, q11
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vqadd.s8    q2, q1, q10                 ; Filter2 = clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-
-
-    vqadd.s8    q11, q6, q2                 ; u = clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = clamp(qs0 - Filter1)
-
-    ; outer tap adjustments: ++vp8_filter >> 1
-    vrshr.s8    q1, q1, #1
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-    vmov.u8     q0, #0x80                   ; 0x80
-    vqadd.s8    q13, q5, q1                 ; u = clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = clamp(qs1 - vp8_filter)
-
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
-
-;-----------------
-
-    END
new file mode 100644
--- /dev/null
+++ b/media/libvpx/vp8/common/arm/neon/loopfilter_neon.c
@@ -0,0 +1,549 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+
+static INLINE void vp8_loop_filter_neon(
+        uint8x16_t qblimit,  // flimit
+        uint8x16_t qlimit,   // limit
+        uint8x16_t qthresh,  // thresh
+        uint8x16_t q3,       // p3
+        uint8x16_t q4,       // p2
+        uint8x16_t q5,       // p1
+        uint8x16_t q6,       // p0
+        uint8x16_t q7,       // q0
+        uint8x16_t q8,       // q1
+        uint8x16_t q9,       // q2
+        uint8x16_t q10,      // q3
+        uint8x16_t *q5r,     // p1
+        uint8x16_t *q6r,     // p0
+        uint8x16_t *q7r,     // q0
+        uint8x16_t *q8r) {   // q1
+    uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
+    int16x8_t q2s16, q11s16;
+    uint16x8_t q4u16;
+    int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
+    int8x8_t d2s8, d3s8;
+
+    q11u8 = vabdq_u8(q3, q4);
+    q12u8 = vabdq_u8(q4, q5);
+    q13u8 = vabdq_u8(q5, q6);
+    q14u8 = vabdq_u8(q8, q7);
+    q3    = vabdq_u8(q9, q8);
+    q4    = vabdq_u8(q10, q9);
+
+    q11u8 = vmaxq_u8(q11u8, q12u8);
+    q12u8 = vmaxq_u8(q13u8, q14u8);
+    q3    = vmaxq_u8(q3, q4);
+    q15u8 = vmaxq_u8(q11u8, q12u8);
+
+    q9 = vabdq_u8(q6, q7);
+
+    // vp8_hevmask
+    q13u8 = vcgtq_u8(q13u8, qthresh);
+    q14u8 = vcgtq_u8(q14u8, qthresh);
+    q15u8 = vmaxq_u8(q15u8, q3);
+
+    q2u8 = vabdq_u8(q5, q8);
+    q9 = vqaddq_u8(q9, q9);
+