Bug 671818 - Use unaligned loads for chroma in ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, r=regression, a=jpr
authorTimothy B. Terriberry <tterribe@vt.edu>
Wed, 03 Aug 2011 15:50:04 -0700
changeset 72774 454007c65ceae9fc4d290cef6fe7010c3a8b106e
parent 72773 0bf3da55f665f8af56c42c1e16f8016d7e895b5a
child 72775 25b41e0dcd7d872131f2cc8a1c81449c9d15db3d
push id302
push usertterriberry@mozilla.com
push dateWed, 10 Aug 2011 16:10:39 +0000
treeherdermozilla-aurora@454007c65cea [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersregression, jpr
bugs671818
milestone7.0a2
Bug 671818 - Use unaligned loads for chroma in ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, r=regression, a=jpr
gfx/ycbcr/yuv_row_arm.s
--- a/gfx/ycbcr/yuv_row_arm.s
+++ b/gfx/ycbcr/yuv_row_arm.s
@@ -214,22 +214,22 @@ s42xbily_neon_loop:
     VRSHRN.S16  D9, Q5, #8
     MOV         r12,r12,ASR #17
     VADD.I8     Q8, Q10,Q4     @ Q8 = a+((b-a)*xweight+128>>8)
     @ Start extracting the chroma x coordinates, and load Cb and Cr.
     AND         r12,r12,#~15   @ Read 16-byte aligned blocks
     VDUP.I32    Q9, r9         @ Q9 = source_uv_xoffs_q16 x 4
     ADD         r14,r2, r12
     VADD.I32    Q10,Q0, Q9
-    VLD1.64     {D8, D9, D10,D11},[r14,:128]   @ Load Cb
+    VLD1.64     {D8, D9, D10,D11},[r14]        @ Load Cb
     PLD         [r14,#64]
     VADD.I32    Q11,Q1, Q9
     ADD         r14,r3, r12
     VADD.I32    Q12,Q2, Q9
-    VLD1.64     {D12,D13,D14,D15},[r14,:128]   @ Load Cr
+    VLD1.64     {D12,D13,D14,D15},[r14]        @ Load Cr
     PLD         [r14,#64]
     VADD.I32    Q13,Q3, Q9
     VRSHRN.S32  D20,Q10,#9     @ Q10 = <xEwExCwCxAwAx8w8x6w6x4w4x2w2x0w0>
     VRSHRN.S32  D21,Q11,#9
     VDUP.I8     Q9, r12
     VRSHRN.S32  D22,Q12,#9     @ Q11 = <xFwFxDwDxBwBx9w9x7w7x5w5x3w3x1w1>
     VRSHRN.S32  D23,Q13,#9
     @ We don't actually need the x weights, but we get them for free.