Bug 926838 - [Part 3] Fix GCC compiler errors, text relocation and build errors on emulators. r=ehsan
authorJW Wang <jwwang@mozilla.com>
Wed, 13 Nov 2013 11:07:29 +0800
changeset 274126 a5fc6164d6271f5d14439030428dbeb1e527627d
parent 274125 2605d73cc96631d7b1d61a8cb4b5d585635144fb
child 274127 af920a0895a7c461de2648464601e7e9dddb3d19
push id863
push userraliiev@mozilla.com
push dateMon, 03 Aug 2015 13:22:43 +0000
treeherdermozilla-release@f6321b14228d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan
bugs926838
milestone40.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 926838 - [Part 3] Fix GCC compiler errors, text relocation and build errors on emulators. r=ehsan
media/openmax_dl/dl/api/armCOMM_s.h
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S
--- a/media/openmax_dl/dl/api/armCOMM_s.h
+++ b/media/openmax_dl/dl/api/armCOMM_s.h
@@ -165,16 +165,19 @@
         @ a compliant function.
 	.macro	M_START name, rreg, dreg
 	.set	_Workspace, 0
 
 	@ Define the function and make it external.
 	.global	\name
 	.func	\name
 	.section	.text.\name,"ax",%progbits
+	.arch armv7-a
+	.fpu neon
+	.object_arch armv4
 	.align	2
 \name :		
 .fnstart
 	@ Save specified R registers
 	_M_GETRREGLIST	\rreg
 	_M_PUSH_RREG
 
 	@ Save specified D registers
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S
@@ -88,23 +88,23 @@
         RSB      dstStep,outPointStep,#16
 
 
         @// Loop on 2 grps at a time for the last stage
 
 radix2lsGrpLoop\name :
         @ dWr = [pTwiddle[0].Re, pTwiddle[1].Re]
         @ dWi = [pTwiddle[0].Im, pTwiddle[1].Im]
-        VLD2    {dWr,dWi},[pTwiddle :64]!
+        VLD2    {dWr,dWi},[pTwiddle, :64]!
 
         @ dXr0 = [pSrc[0].Re, pSrc[2].Re]
         @ dXi0 = [pSrc[0].Im, pSrc[2].Im]
         @ dXr1 = [pSrc[1].Re, pSrc[3].Re]
         @ dXi1 = [pSrc[1].Im, pSrc[3].Im]
-        VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
+        VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc, :128]!
         SUBS    grpCount,grpCount,#4                   @// grpCount is multiplied by 2
 
         .ifeqs  "\inverse", "TRUE"
             VMUL   qT0,dWr,dXr1
             VMLA   qT0,dWi,dXi1                       @// real part
             VMUL   qT1,dWr,dXi1
             VMLS   qT1,dWi,dXr1                       @// imag part
 
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
@@ -113,37 +113,37 @@
         @// pT0+1 increments pT0 by 8 bytes
         @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
         @// Note: outPointStep = pointStep for firststage
 
         MOV     pointStep,subFFTNum,LSL #1
 
 
         @// Update pSubFFTSize and pSubFFTNum regs
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0]
         @// subFFTSize = 1 for the first stage
         MOV     subFFTSize,#4
 
         @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
         LSR     grpSize,subFFTNum,#2
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
         MOV     subFFTNum,grpSize
 
 
         @// Calculate the step of input data for the next set
         @//MOV     setStep,pointStep,LSL #1
         MOV     setStep,grpSize,LSL #4
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
         @// setStep = 3*pointStep
         ADD     setStep,setStep,pointStep
         @// setStep = - 3*pointStep+16
         RSB     setStep,setStep,#16
 
         @//  data[3] & update pSrc for the next set
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep
+        VLD2    {dXr3,dXi3},[pSrc, :128],setStep
         @// step1 = 2*pointStep
         MOV     step1,pointStep,LSL #1
 
         VADD    qY0,qX0,qX2
 
         @// step3 = -pointStep
         RSB     step3,pointStep,#0
 
@@ -158,76 +158,76 @@ radix4fsGrpZeroSetLoop\name :
         SUBS    setCount,setCount,#2
 
 
         @// finish first stage of 4 point FFT
 
 
         VSUB    qY2,qX0,qX2
 
-        VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],step1          @//  data[0]
         VADD    qY1,qX1,qX3
-        VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
+        VLD2    {dXr2,dXi2},[pSrc, :128],step3          @//  data[2]
         VSUB    qY3,qX1,qX3
 
 
         @// finish second stage of 4 point FFT
 
         .ifeqs "\inverse", "TRUE"
 
-            VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
             VADD    qZ0,qY0,qY1
 
             @//  data[3] & update pSrc for the next set, but not if it's the
             @//  last iteration so that we don't read past the end of the 
             @//  input array.
             BEQ     radix4SkipLastUpdateInv\name
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
+            VLD2    {dXr3,dXi3},[pSrc, :128],setStep
 radix4SkipLastUpdateInv\name:
             VSUB    dZr3,dYr2,dYi3
 
-            VST2    {dZr0,dZi0},[pDst :128],outPointStep
+            VST2    {dZr0,dZi0},[pDst, :128],outPointStep
             VADD    dZi3,dYi2,dYr3
 
             VSUB    qZ1,qY0,qY1
-            VST2    {dZr3,dZi3},[pDst :128],outPointStep
+            VST2    {dZr3,dZi3},[pDst, :128],outPointStep
 
             VADD    dZr2,dYr2,dYi3
-            VST2    {dZr1,dZi1},[pDst :128],outPointStep
+            VST2    {dZr1,dZi1},[pDst, :128],outPointStep
             VSUB    dZi2,dYi2,dYr3
 
             VADD    qY0,qX0,qX2                     @// u0 for next iteration
-            VST2    {dZr2,dZi2},[pDst :128],setStep
+            VST2    {dZr2,dZi2},[pDst, :128],setStep
 
 
         .else
 
-            VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
             VADD    qZ0,qY0,qY1
 
             @//  data[3] & update pSrc for the next set, but not if it's the
             @//  last iteration so that we don't read past the end of the 
             @//  input array.
             BEQ     radix4SkipLastUpdateFwd\name
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
+            VLD2    {dXr3,dXi3},[pSrc, :128],setStep
 radix4SkipLastUpdateFwd\name:
             VADD    dZr2,dYr2,dYi3
 
-            VST2    {dZr0,dZi0},[pDst :128],outPointStep
+            VST2    {dZr0,dZi0},[pDst, :128],outPointStep
             VSUB    dZi2,dYi2,dYr3
 
             VSUB    qZ1,qY0,qY1
-            VST2    {dZr2,dZi2},[pDst :128],outPointStep
+            VST2    {dZr2,dZi2},[pDst, :128],outPointStep
 
             VSUB    dZr3,dYr2,dYi3
-            VST2    {dZr1,dZi1},[pDst :128],outPointStep
+            VST2    {dZr1,dZi1},[pDst, :128],outPointStep
             VADD    dZi3,dYi2,dYr3
 
             VADD    qY0,qX0,qX2                     @// u0 for next iteration
-            VST2    {dZr3,dZi3},[pDst :128],setStep
+            VST2    {dZr3,dZi3},[pDst, :128],setStep
 
         .endif
 
         BGT     radix4fsGrpZeroSetLoop\name
 
         @// reset pSrc to pDst for the next stage
         SUB     pSrc,pDst,pointStep                     @// pDst -= 2*grpSize
         MOV     pDst,pPingPongBuf
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S
@@ -134,42 +134,42 @@
 
 
         @// pOut0+1 increments pOut0 by 8 bytes
         @// pOut0+outPointStep == increment of 8*outPointStep bytes
         MOV     outPointStep,subFFTSize,LSL #3
 
         @// Update grpCount and grpSize rightaway
 
-        VLD2    {dW1r,dW1i},[pTwiddle :128]             @// [wi|wr]
+        VLD2    {dW1r,dW1i},[pTwiddle, :128]             @// [wi|wr]
         MOV     step16,#16
         LSL     grpCount,subFFTSize,#2
 
-        VLD1    dW2r,[pTwiddle :64]                     @// [wi|wr]
+        VLD1    dW2r,[pTwiddle, :64]                     @// [wi|wr]
         MOV     subFFTNum,#1                            @//after the last stage
 
-        VLD1    dW3r,[pTwiddle :64],step16              @// [wi|wr]
+        VLD1    dW3r,[pTwiddle, :64],step16              @// [wi|wr]
         MOV     stepTwiddle,#0
 
-        VLD1    dW2i,[pTwiddle :64]!                    @// [wi|wr]
+        VLD1    dW2i,[pTwiddle, :64]!                    @// [wi|wr]
         SUB     grpTwStep,stepTwiddle,#8                @// grpTwStep = -8 to start with
 
         @// update subFFTSize for the next stage
         MOV     subFFTSize,grpCount
-        VLD1    dW3i,[pTwiddle :64],grpTwStep           @// [wi|wr]
+        VLD1    dW3i,[pTwiddle, :64],grpTwStep           @// [wi|wr]
         MOV     dstStep,outPointStep,LSL #1
 
         @// AC.r AC.i BD.r BD.i
-        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]!
+        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc, :256]!
         ADD     dstStep,dstStep,outPointStep            @// dstStep = 3*outPointStep
         RSB     dstStep,dstStep,#16                     @// dstStep = - 3*outPointStep+16
         MOV     step24,#24
 
         @// AC.r AC.i BD.r BD.i
-        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]!
+        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc, :256]!
 
 
         @// Process two groups at a time
 
 radix4lsGrpLoop\name :
 
         VZIP    dW2r,dW2i
         ADD     stepTwiddle,stepTwiddle,#16
@@ -199,69 +199,69 @@ radix4lsGrpLoop\name :
 
             VMUL   dZr1,dW1r,dXr1
             VMLS   dZr1,dW1i,dXi1                       @// real part
             VMUL   dZi1,dW1r,dXi1
             VMLA   dZi1,dW1i,dXr1                       @// imag part
 
         .endif
 
-        VLD2    {dW1r,dW1i},[pTwiddle :128],stepTwiddle      @// [wi|wr]
+        VLD2    {dW1r,dW1i},[pTwiddle, :128],stepTwiddle      @// [wi|wr]
 
         .ifeqs  "\inverse", "TRUE"
             VMUL   dZr2,dW2r,dXr2
             VMLA   dZr2,dW2i,dXi2                       @// real part
             VMUL   dZi2,dW2r,dXi2
-            VLD1   dW2r,[pTwiddle :64],step16           @// [wi|wr]
+            VLD1   dW2r,[pTwiddle, :64],step16           @// [wi|wr]
             VMLS   dZi2,dW2i,dXr2                       @// imag part
 
         .else
 
             VMUL   dZr2,dW2r,dXr2
             VMLS   dZr2,dW2i,dXi2                       @// real part
             VMUL   dZi2,dW2r,dXi2
-            VLD1    dW2r,[pTwiddle :64],step16          @// [wi|wr]
+            VLD1    dW2r,[pTwiddle, :64],step16          @// [wi|wr]
             VMLA   dZi2,dW2i,dXr2                       @// imag part
 
         .endif
 
 
-        VLD1    dW2i,[pTwiddle :64],twStep              @// [wi|wr]
+        VLD1    dW2i,[pTwiddle, :64],twStep              @// [wi|wr]
 
         @// move qX0 so as to load for the next iteration
         VMOV     qZ0,qX0
 
         .ifeqs  "\inverse", "TRUE"
             VMUL   dZr3,dW3r,dXr3
             VMLA   dZr3,dW3i,dXi3                       @// real part
             VMUL   dZi3,dW3r,dXi3
-            VLD1    dW3r,[pTwiddle :64],step24
+            VLD1    dW3r,[pTwiddle, :64],step24
             VMLS   dZi3,dW3i,dXr3                       @// imag part
 
         .else
 
             VMUL   dZr3,dW3r,dXr3
             VMLS   dZr3,dW3i,dXi3                       @// real part
             VMUL   dZi3,dW3r,dXi3
-            VLD1    dW3r,[pTwiddle :64],step24
+            VLD1    dW3r,[pTwiddle, :64],step24
             VMLA   dZi3,dW3i,dXr3                       @// imag part
 
         .endif
 
-        VLD1    dW3i,[pTwiddle :64],grpTwStep           @// [wi|wr]
+        VLD1    dW3i,[pTwiddle, :64],grpTwStep           @// [wi|wr]
 
         @// Don't do the load on the last iteration so we don't read past the end
         @// of pSrc.
         addeq   pSrc, pSrc, #64
         beq     radix4lsSkipRead\name
         @// AC.r AC.i BD.r BD.i
-        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]!
+        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc, :256]!
 
         @// AC.r AC.i BD.r BD.i
-        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]!
+        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc, :256]!
 radix4lsSkipRead\name:
 
         @// finish first stage of 4 point FFT
 
         VADD    qY0,qZ0,qZ2
         VSUB    qY2,qZ0,qZ2
         VADD    qY1,qZ1,qZ3
         VSUB    qY3,qZ1,qZ3
@@ -269,47 +269,47 @@ radix4lsSkipRead\name:
 
         @// finish second stage of 4 point FFT
 
         .ifeqs  "\inverse", "TRUE"
 
             VSUB    qZ0,qY2,qY1
 
             VADD    dZr3,dYr0,dYi3
-            VST2    {dZr0,dZi0},[pDst :128],outPointStep
+            VST2    {dZr0,dZi0},[pDst, :128],outPointStep
             VSUB    dZi3,dYi0,dYr3
 
             VADD    qZ2,qY2,qY1
-            VST2    {dZr3,dZi3},[pDst :128],outPointStep
+            VST2    {dZr3,dZi3},[pDst, :128],outPointStep
 
             VSUB    dZr1,dYr0,dYi3
-            VST2    {dZr2,dZi2},[pDst :128],outPointStep
+            VST2    {dZr2,dZi2},[pDst, :128],outPointStep
             VADD    dZi1,dYi0,dYr3
 
             @// dstStep = -outPointStep + 16
-            VST2    {dZr1,dZi1},[pDst :128],dstStep
+            VST2    {dZr1,dZi1},[pDst, :128],dstStep
 
 
         .else
 
             VSUB    qZ0,qY2,qY1
 
             VSUB    dZr1,dYr0,dYi3
-            VST2    {dZr0,dZi0},[pDst :128],outPointStep
+            VST2    {dZr0,dZi0},[pDst, :128],outPointStep
             VADD    dZi1,dYi0,dYr3
 
             VADD    qZ2,qY2,qY1
-            VST2    {dZr1,dZi1},[pDst :128],outPointStep
+            VST2    {dZr1,dZi1},[pDst, :128],outPointStep
 
             VADD    dZr3,dYr0,dYi3
-            VST2    {dZr2,dZi2},[pDst :128],outPointStep
+            VST2    {dZr2,dZi2},[pDst, :128],outPointStep
             VSUB    dZi3,dYi0,dYr3
 
             @// dstStep = -outPointStep + 16
-            VST2    {dZr3,dZi3},[pDst :128],dstStep
+            VST2    {dZr3,dZi3},[pDst, :128],dstStep
 
 
         .endif
 
         BGT     radix4lsGrpLoop\name
 
 
         @// Reset and Swap pSrc and pDst for the next stage
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
@@ -245,71 +245,71 @@ radix4SetLoop\name :
 radix4SkipRead\name:
         SUBS    setCount,setCount,#2
 
         @// finish first stage of 4 point FFT
         VADD    qY0,qX0,qZ2
         VSUB    qY2,qX0,qZ2
 
         @//  data[0] for next iteration
-        VLD2    {dXr0,dXi0},[pSrc :128]!
+        VLD2    {dXr0,dXi0},[pSrc, :128]!
         VADD    qY1,qZ1,qZ3
         VSUB    qY3,qZ1,qZ3
 
         @// finish second stage of 4 point FFT
 
         VSUB    qZ0,qY2,qY1
 
 
         .ifeqs  "\inverse", "TRUE"
 
             VADD    dZr3,dYr0,dYi3
-            VST2    {dZr0,dZi0},[pDst :128],outPointStep
+            VST2    {dZr0,dZi0},[pDst, :128],outPointStep
             VSUB    dZi3,dYi0,dYr3
 
             VADD    qZ2,qY2,qY1
-            VST2    {dZr3,dZi3},[pDst :128],outPointStep
+            VST2    {dZr3,dZi3},[pDst, :128],outPointStep
 
             VSUB    dZr1,dYr0,dYi3
-            VST2    {dZr2,dZi2},[pDst :128],outPointStep
+            VST2    {dZr2,dZi2},[pDst, :128],outPointStep
             VADD    dZi1,dYi0,dYr3
 
-            VST2    {dZr1,dZi1},[pDst :128],dstStep
+            VST2    {dZr1,dZi1},[pDst, :128],dstStep
 
 
         .else
 
             VSUB    dZr1,dYr0,dYi3
-            VST2    {dZr0,dZi0},[pDst :128],outPointStep
+            VST2    {dZr0,dZi0},[pDst, :128],outPointStep
             VADD    dZi1,dYi0,dYr3
 
             VADD    qZ2,qY2,qY1
-            VST2    {dZr1,dZi1},[pDst :128],outPointStep
+            VST2    {dZr1,dZi1},[pDst, :128],outPointStep
 
             VADD    dZr3,dYr0,dYi3
-            VST2    {dZr2,dZi2},[pDst :128],outPointStep
+            VST2    {dZr2,dZi2},[pDst, :128],outPointStep
             VSUB    dZi3,dYi0,dYr3
 
-            VST2    {dZr3,dZi3},[pDst :128],dstStep
+            VST2    {dZr3,dZi3},[pDst, :128],dstStep
 
 
         .endif
 
         @// increment to data[1] of the next set
         ADD     pSrc,pSrc,pointStep
         BGT     radix4SetLoop\name
 
 
-        VLD1     dW1,[pTwiddle :64],stepTwiddle    @//[wi | wr]
+        VLD1     dW1,[pTwiddle, :64],stepTwiddle    @//[wi | wr]
         @// subtract 4 since grpCount multiplied by 4
         SUBS    grpCount,grpCount,#4
-        VLD1     dW2,[pTwiddle :64],stepTwiddle    @//[wi | wr]
+        VLD1     dW2,[pTwiddle, :64],stepTwiddle    @//[wi | wr]
         @// increment pSrc for the next grp
         ADD     pSrc,pSrc,srcStep
-        VLD1     dW3,[pTwiddle :64],twStep         @//[wi | wr]
+        VLD1     dW3,[pTwiddle, :64],twStep         @//[wi | wr]
         BGT     radix4GrpLoop\name
 
 
         @// Reset and Swap pSrc and pDst for the next stage
         MOV     t1,pDst
         @// pDst -= 2*size; pSrc -= 8*size bytes
         SUB     pDst,pSrc,outPointStep,LSL #2
         SUB     pSrc,t1,outPointStep
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
@@ -168,61 +168,57 @@
 #define qY4     Q10.F32
 #define qY5     Q14.F32
 #define qY6     Q11.F32
 #define qY7     Q15.F32
 
 #define dT0     D14.F32
 #define dT1     D15.F32
 
-@// Define constants
-        @ sqrt(1/2)
-ONEBYSQRT2:     .float  0.7071067811865476e0
-
 
         .MACRO FFTSTAGE scaled, inverse, name
 
         @// Define stack arguments
 
         @// Update pSubFFTSize and pSubFFTNum regs
         @// subFFTSize = 1 for the first stage
         MOV     subFFTSize,#8
-        LDR     t0,=ONEBYSQRT2
+        ADR     t0,ONEBYSQRT2\name
 
         @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
         LSR     grpSize,subFFTNum,#3
         MOV     subFFTNum,grpSize
 
 
         @// pT0+1 increments pT0 by 8 bytes
         @// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
         @// Note: outPointStep = pointStep for firststage
 
         MOV     pointStep,grpSize,LSL #3
 
 
         @// Calculate the step of input data for the next set
         @//MOV     step1,pointStep,LSL #1             @// step1 = 2*pointStep
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep     @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep     @//  data[0]
         MOV     step1,grpSize,LSL #4
 
         MOV     step2,pointStep,LSL #3
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep     @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep     @//  data[1]
         SUB     step2,step2,pointStep                 @// step2 = 7*pointStep
         @// setStep = - 7*pointStep+16
         RSB     setStep,step2,#16
 
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep     @//  data[2]
-        VLD2    {dXr3,dXi3},[pSrc :128],pointStep     @//  data[3]
-        VLD2    {dXr4,dXi4},[pSrc :128],pointStep     @//  data[4]
-        VLD2    {dXr5,dXi5},[pSrc :128],pointStep     @//  data[5]
-        VLD2    {dXr6,dXi6},[pSrc :128],pointStep     @//  data[6]
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep     @//  data[2]
+        VLD2    {dXr3,dXi3},[pSrc, :128],pointStep     @//  data[3]
+        VLD2    {dXr4,dXi4},[pSrc, :128],pointStep     @//  data[4]
+        VLD2    {dXr5,dXi5},[pSrc, :128],pointStep     @//  data[5]
+        VLD2    {dXr6,dXi6},[pSrc, :128],pointStep     @//  data[6]
         @//  data[7] & update pSrc for the next set
         @//  setStep = -7*pointStep + 16
-        VLD2    {dXr7,dXi7},[pSrc :128],setStep
+        VLD2    {dXr7,dXi7},[pSrc, :128],setStep
         @// grp = 0 a special case since all the twiddle factors are 1
         @// Loop on the sets
 
 radix8fsGrpZeroSetLoop\name :
 
         @// Decrement setcount
         SUBS    setCount,setCount,#2
 
@@ -240,162 +236,162 @@ radix8fsGrpZeroSetLoop\name :
         VSUB    qV2,qU0,qU4
         VADD    qV4,qU2,qU6
         VSUB    qV6,qU2,qU6
 
         @// finish third stage of 8 point FFT
 
         VADD    qY0,qV0,qV4
         VSUB    qY4,qV0,qV4
-        VST2    {dYr0,dYi0},[pDst :128],step1         @// store y0
+        VST2    {dYr0,dYi0},[pDst, :128],step1         @// store y0
 
         .ifeqs  "\inverse", "TRUE"
 
             VSUB    dYr2,dVr2,dVi6
             VADD    dYi2,dVi2,dVr6
 
             VADD    dYr6,dVr2,dVi6
-            VST2    {dYr2,dYi2},[pDst :128],step1     @// store y2
+            VST2    {dYr2,dYi2},[pDst, :128],step1     @// store y2
             VSUB    dYi6,dVi2,dVr6
 
             VSUB    qU1,qX0,qX4
-            VST2    {dYr4,dYi4},[pDst :128],step1     @// store y4
+            VST2    {dYr4,dYi4},[pDst, :128],step1     @// store y4
 
             VSUB    qU3,qX1,qX5
             VSUB    qU5,qX2,qX6
-            VST2    {dYr6,dYi6},[pDst :128],step1     @// store y6
+            VST2    {dYr6,dYi6},[pDst, :128],step1     @// store y6
 
         .ELSE
 
             VADD    dYr6,dVr2,dVi6
             VSUB    dYi6,dVi2,dVr6
 
             VSUB    dYr2,dVr2,dVi6
-            VST2    {dYr6,dYi6},[pDst :128],step1     @// store y2
+            VST2    {dYr6,dYi6},[pDst, :128],step1     @// store y2
             VADD    dYi2,dVi2,dVr6
 
 
             VSUB    qU1,qX0,qX4
-            VST2    {dYr4,dYi4},[pDst :128],step1     @// store y4
+            VST2    {dYr4,dYi4},[pDst, :128],step1     @// store y4
             VSUB    qU3,qX1,qX5
             VSUB    qU5,qX2,qX6
-            VST2    {dYr2,dYi2},[pDst :128],step1     @// store y6
+            VST2    {dYr2,dYi2},[pDst, :128],step1     @// store y6
 
 
         .ENDIF
 
         @// finish first stage of 8 point FFT
 
         VSUB    qU7,qX3,qX7
         VLD1    dT0[0], [t0]
 
         @// finish second stage of 8 point FFT
 
         VSUB    dVr1,dUr1,dUi5
         @//  data[0] for next iteration
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep
         VADD    dVi1,dUi1,dUr5
         VADD    dVr3,dUr1,dUi5
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep     @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep     @//  data[1]
         VSUB    dVi3,dUi1,dUr5
 
         VSUB    dVr5,dUr3,dUi7
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep     @//  data[2]
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep     @//  data[2]
         VADD    dVi5,dUi3,dUr7
         VADD    dVr7,dUr3,dUi7
-        VLD2    {dXr3,dXi3},[pSrc :128],pointStep     @//  data[3]
+        VLD2    {dXr3,dXi3},[pSrc, :128],pointStep     @//  data[3]
         VSUB    dVi7,dUi3,dUr7
 
         @// finish third stage of 8 point FFT
 
         .ifeqs  "\inverse", "TRUE"
 
             @// calculate a*v5
             VMUL    dT1,dVr5,dT0[0]                   @// use dVi0 for dT1
 
-            VLD2    {dXr4,dXi4},[pSrc :128],pointStep @//  data[4]
+            VLD2    {dXr4,dXi4},[pSrc, :128],pointStep @//  data[4]
             VMUL    dVi5,dVi5,dT0[0]
 
-            VLD2    {dXr5,dXi5},[pSrc :128],pointStep @//  data[5]
+            VLD2    {dXr5,dXi5},[pSrc, :128],pointStep @//  data[5]
             VSUB    dVr5,dT1,dVi5                     @// a * V5
             VADD    dVi5,dT1,dVi5
 
-            VLD2    {dXr6,dXi6},[pSrc :128],pointStep @//  data[6]
+            VLD2    {dXr6,dXi6},[pSrc, :128],pointStep @//  data[6]
 
             @// calculate  b*v7
             VMUL    dT1,dVr7,dT0[0]
             VMUL    dVi7,dVi7,dT0[0]
 
             VADD    qY1,qV1,qV5
             VSUB    qY5,qV1,qV5
 
 
             VADD    dVr7,dT1,dVi7                     @// b * V7
             VSUB    dVi7,dVi7,dT1
             SUB     pDst, pDst, step2                 @// set pDst to y1
 
             @// On the last iteration,  this will read past the end of pSrc, 
             @// so skip this read.
             BEQ     radix8SkipLastUpdateInv\name
-            VLD2    {dXr7,dXi7},[pSrc :128],setStep   @//  data[7]
+            VLD2    {dXr7,dXi7},[pSrc, :128],setStep   @//  data[7]
 radix8SkipLastUpdateInv\name:
 
             VSUB    dYr3,dVr3,dVr7
             VSUB    dYi3,dVi3,dVi7
-            VST2    {dYr1,dYi1},[pDst :128],step1     @// store y1
+            VST2    {dYr1,dYi1},[pDst, :128],step1     @// store y1
             VADD    dYr7,dVr3,dVr7
             VADD    dYi7,dVi3,dVi7
 
 
-            VST2    {dYr3,dYi3},[pDst :128],step1     @// store y3
-            VST2    {dYr5,dYi5},[pDst :128],step1     @// store y5
-            VST2    {dYr7,dYi7},[pDst :128]           @// store y7
+            VST2    {dYr3,dYi3},[pDst, :128],step1     @// store y3
+            VST2    {dYr5,dYi5},[pDst, :128],step1     @// store y5
+            VST2    {dYr7,dYi7},[pDst, :128]           @// store y7
             ADD pDst, pDst, #16
 
         .ELSE
 
             @// calculate  b*v7
             VMUL    dT1,dVr7,dT0[0]
-            VLD2    {dXr4,dXi4},[pSrc :128],pointStep @//  data[4]
+            VLD2    {dXr4,dXi4},[pSrc, :128],pointStep @//  data[4]
             VMUL    dVi7,dVi7,dT0[0]
 
-            VLD2    {dXr5,dXi5},[pSrc :128],pointStep @//  data[5]
+            VLD2    {dXr5,dXi5},[pSrc, :128],pointStep @//  data[5]
             VADD    dVr7,dT1,dVi7                     @// b * V7
             VSUB    dVi7,dVi7,dT1
 
-            VLD2    {dXr6,dXi6},[pSrc :128],pointStep @//  data[6]
+            VLD2    {dXr6,dXi6},[pSrc, :128],pointStep @//  data[6]
 
             @// calculate a*v5
             VMUL    dT1,dVr5,dT0[0]                   @// use dVi0 for dT1
             VMUL    dVi5,dVi5,dT0[0]
 
             VADD    dYr7,dVr3,dVr7
             VADD    dYi7,dVi3,dVi7
             SUB     pDst, pDst, step2                 @// set pDst to y1
 
             VSUB    dVr5,dT1,dVi5                     @// a * V5
             VADD    dVi5,dT1,dVi5
 
             @// On the last iteration,  this will read past the end of pSrc, 
             @// so skip this read.
             BEQ     radix8SkipLastUpdateFwd\name
-            VLD2    {dXr7,dXi7},[pSrc :128],setStep   @//  data[7]
+            VLD2    {dXr7,dXi7},[pSrc, :128],setStep   @//  data[7]
 radix8SkipLastUpdateFwd\name:
 
             VSUB    qY5,qV1,qV5
 
             VSUB    dYr3,dVr3,dVr7
-            VST2    {dYr7,dYi7},[pDst :128],step1     @// store y1
+            VST2    {dYr7,dYi7},[pDst, :128],step1     @// store y1
             VSUB    dYi3,dVi3,dVi7
             VADD    qY1,qV1,qV5
 
 
-            VST2    {dYr5,dYi5},[pDst :128],step1     @// store y3
-            VST2    {dYr3,dYi3},[pDst :128],step1     @// store y5
-            VST2    {dYr1,dYi1},[pDst :128]!          @// store y7
+            VST2    {dYr5,dYi5},[pDst, :128],step1     @// store y3
+            VST2    {dYr3,dYi3},[pDst, :128],step1     @// store y5
+            VST2    {dYr1,dYi1},[pDst, :128]!          @// store y7
 
         .ENDIF
 
 
         @// update pDst for the next set
         SUB     pDst, pDst, step2
         BGT     radix8fsGrpZeroSetLoop\name
 
@@ -410,17 +406,17 @@ radix8SkipLastUpdateFwd\name:
 
 
         @// Allocate stack memory required by the function
 
 
         M_START armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe,r4
             FFTSTAGE "FALSE","FALSE",FWD
         M_END
-
+ONEBYSQRT2FWD:     .float  0.7071067811865476e0
 
         M_START armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe,r4
             FFTSTAGE "FALSE","TRUE",INV
         M_END
-
+ONEBYSQRT2INV:     .float  0.7071067811865476e0
 
 
         .end
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
@@ -116,38 +116,38 @@
         .MACRO FFTSTAGE scaled, inverse, name
 
         @// Define stack arguments
 
         MOV     pointStep,subFFTNum
         @// Update pSubFFTSize and pSubFFTNum regs
 
 
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0]
         @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
         LSR     grpSize,subFFTNum,#2
         MOV     subFFTNum,grpSize
 
 
         @// pT0+1 increments pT0 by 4 bytes
         @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
         @// Note: outPointStep = pointStep for firststage
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
 
 
         @// Calculate the step of input data for the next set
         @//MOV     setStep,pointStep,LSL #1
         MOV     setStep,grpSize,LSL #3
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
         MOV     step1,setStep
         ADD     setStep,setStep,pointStep             @// setStep = 3*pointStep
         RSB     setStep,setStep,#16                   @// setStep = - 3*pointStep+16
 
 
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3]
+        VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3]
         MOV     subFFTSize,#4                         @// subFFTSize = 1 for the first stage
 
 
         .ifeqs  "\scaled", "TRUE"
             VHADD    qY0,qX0,qX2             @// u0
         .ELSE
             VADD   qY0,qX0,qX2               @// u0
         .ENDIF
@@ -161,119 +161,119 @@ grpZeroSetLoop\name:
 
         .ifeqs "\scaled", "TRUE"
 
             @// finish first stage of 4 point FFT
 
             VHSUB    qY2,qX0,qX2             @// u1
             SUBS    setCount,setCount,#4                    @// decrement the set loop counter
 
-            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
+            VLD2    {dXr0,dXi0},[pSrc, :128],step1          @//  data[0]
             VHADD    qY1,qX1,qX3             @// u2
-            VLD2    {dXr2,dXi2},[pSrc :128],step3
+            VLD2    {dXr2,dXi2},[pSrc, :128],step3
             VHSUB    qY3,qX1,qX3             @// u3
 
 
 
             @// finish second stage of 4 point FFT
 
-            VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
             VHADD    qZ0,qY0,qY1             @// y0
 
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
+            VLD2    {dXr3,dXi3},[pSrc, :128],setStep
 
 
             .ifeqs  "\inverse", "TRUE"
 
                 VHSUB    dZr3,dYr2,dYi3                  @// y3
                 VHADD    dZi3,dYi2,dYr3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
 
                 VHSUB    qZ1,qY0,qY1                     @// y2
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
 
                 VHADD    dZr2,dYr2,dYi3                  @// y1
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VHSUB    dZi2,dYi2,dYr3
 
                 VHADD    qY0,qX0,qX2                     @// u0 (next loop)
-                VST2    {dZr2,dZi2},[pDst :128],setStep
+                VST2    {dZr2,dZi2},[pDst, :128],setStep
 
 
             .ELSE
 
                 VHADD    dZr2,dYr2,dYi3                  @// y1
                 VHSUB    dZi2,dYi2,dYr3
 
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHSUB    qZ1,qY0,qY1                     @// y2
 
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VHSUB    dZr3,dYr2,dYi3                  @// y3
                 VHADD    dZi3,dYi2,dYr3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VHADD    qY0,qX0,qX2                     @// u0 (next loop)
-                VST2    {dZr3,dZi3},[pDst :128],setStep
+                VST2    {dZr3,dZi3},[pDst, :128],setStep
 
             .ENDIF
 
 
         .ELSE
 
             @// finish first stage of 4 point FFT
 
             VSUB    qY2,qX0,qX2             @// u1
             SUBS    setCount,setCount,#4                    @// decrement the set loop counter
 
-            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
+            VLD2    {dXr0,dXi0},[pSrc, :128],step1          @//  data[0]
             VADD    qY1,qX1,qX3             @// u2
-            VLD2    {dXr2,dXi2},[pSrc :128],step3
+            VLD2    {dXr2,dXi2},[pSrc, :128],step3
             VSUB    qY3,qX1,qX3             @// u3
 
 
 
             @// finish second stage of 4 point FFT
 
-            VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
             VADD    qZ0,qY0,qY1             @// y0
 
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
+            VLD2    {dXr3,dXi3},[pSrc, :128],setStep
 
 
             .ifeqs  "\inverse", "TRUE"
 
                 VSUB    dZr3,dYr2,dYi3                  @// y3
                 VADD    dZi3,dYi2,dYr3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
 
                 VSUB    qZ1,qY0,qY1                     @// y2
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
 
                 VADD    dZr2,dYr2,dYi3                  @// y1
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VSUB    dZi2,dYi2,dYr3
 
                 VADD    qY0,qX0,qX2                     @// u0 (next loop)
-                VST2    {dZr2,dZi2},[pDst :128],setStep
+                VST2    {dZr2,dZi2},[pDst, :128],setStep
 
 
             .ELSE
 
                 VADD    dZr2,dYr2,dYi3                  @// y1
                 VSUB    dZi2,dYi2,dYr3
 
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VSUB    qZ1,qY0,qY1                     @// y2
 
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VSUB    dZr3,dYr2,dYi3                  @// y3
                 VADD    dZi3,dYi2,dYr3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VADD    qY0,qX0,qX2                     @// u0 (next loop)
-                VST2    {dZr3,dZi3},[pDst :128],setStep
+                VST2    {dZr3,dZi3},[pDst, :128],setStep
 
             .ENDIF
 
 
         .ENDIF
 
         BGT     grpZeroSetLoop\name
 
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
@@ -158,44 +158,44 @@
 #define qZ3                             Q14.S16
 
 
         .MACRO FFTSTAGE scaled, inverse , name
 
         @// Define stack arguments
 
         MOV     pw2,pTwiddle
-        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
+        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2, :256]!
 
         MOV     pw3,pTwiddle
         MOV     pw1,pTwiddle
         @// pOut0+1 increments pOut0 by 8 bytes
         @// pOut0+outPointStep == increment of 4*outPointStep bytes
         MOV     outPointStep,subFFTSize,LSL #2
 
-        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
+        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3, :64]!
         MOV     subFFTNum,#1                            @//after the last stage
         LSL     grpCount,subFFTSize,#2
 
 
         @// Update grpCount and grpSize rightaway
-        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
+        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3, :64]!
 
         @// update subFFTSize for the next stage
         MOV     subFFTSize,grpCount
         MOV     dstStep,outPointStep,LSL #1
 
-        VLD2 {dW1r,dW1i}, [pw1 :128]!
+        VLD2 {dW1r,dW1i}, [pw1, :128]!
 
 
         ADD     dstStep,dstStep,outPointStep                @// dstStep = 3*outPointStep
         RSB     dstStep,dstStep,#16                         @// dstStep = - 3*outPointStep+16
 
-        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
-        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
 
         @// Process 4 groups at a time
 
 grpLoop\name:
 
 
         @// Rearrange the third twiddle
         VUZP    dW3r,dW3i
@@ -220,17 +220,17 @@ grpLoop\name:
             VMULL   qT1,dXi1,dW1r
             VMLAL   qT1,dXr1,dW1i                       @// imag part
 
         .ENDIF
 
         @// Load the first twiddle for 4 groups : w^1
         @// w^1 twiddle (i+0,i+1,i+2,i+3)       for group 0,1,2,3
 
-        VLD2 {dW1r,dW1i}, [pw1 :128]!
+        VLD2 {dW1r,dW1i}, [pw1, :128]!
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT2,dXr2,dW2r
             VMLAL   qT2,dXi2,dW2i                       @// real part
             VMULL   qT3,dXi2,dW2r
             VMLSL   qT3,dXr2,dW2i                       @// imag part
 
         .ELSE
@@ -257,117 +257,117 @@ grpLoop\name:
             VMLSL   qT0,dXi3,dW3i                       @// real part
             VMULL   qT1,dXi3,dW3r
             VMLAL   qT1,dXr3,dW3i                       @// imag part
 
         .ENDIF
 
         @// Load the second twiddle for 4 groups : w^2
         @// w^2 twiddle (2i+0,2i+2,2i+4,2i+6)   for group 0,1,2,3
-        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
+        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2, :256]!
 
 
         VRSHRN  dZr2,qT2,#15
         VRSHRN  dZi2,qT3,#15
 
         @// Load the third twiddle for 4 groups : w^3
         @// w^3 twiddle (3i+0,3i+3,3i+6,3i+9)   for group 0,1,2,3
 
-        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
+        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3, :64]!
 
         VRSHRN  dZr3,qT0,#15
         VRSHRN  dZi3,qT1,#15
 
-        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
+        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3, :64]!
 
         .ifeqs "\scaled", "TRUE"
 
             @// finish first stage of 4 point FFT
 
             VHADD    qY0,qX0,qZ2
             VHSUB    qY2,qX0,qZ2
             VHADD    qY1,qZ1,qZ3
-            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
 
             VHSUB    qY3,qZ1,qZ3
 
             @// finish second stage of 4 point FFT
 
             VHSUB    qZ0,qY2,qY1
             VHADD    qZ2,qY2,qY1
-            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
 
 
             .ifeqs "\inverse", "TRUE"
 
                 VHADD    dZr3,dYr0,dYi3                          @// y3 = u0-ju3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHSUB    dZi3,dYi0,dYr3
 
                 VHSUB    dZr1,dYr0,dYi3                          @// y1 = u0+ju3
                 VHADD    dZi1,dYi0,dYr3
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
-                VST2    {dZr1,dZi1},[pDst :128],dstStep              @// dstStep = -3*outPointStep + 16
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],dstStep              @// dstStep = -3*outPointStep + 16
 
             .ELSE
 
                 VHSUB    dZr1,dYr0,dYi3                          @// y1 = u0+ju3
                 VHADD    dZi1,dYi0,dYr3
 
                 VHADD    dZr3,dYr0,dYi3                          @// y3 = u0-ju3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHSUB    dZi3,dYi0,dYr3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
-                VST2    {dZr3,dZi3},[pDst :128],dstStep              @// dstStep = -3*outPointStep + 16
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep              @// dstStep = -3*outPointStep + 16
 
             .ENDIF
 
         .ELSE
 
             @// finish first stage of 4 point FFT
 
             VADD    qY0,qX0,qZ2
             VSUB    qY2,qX0,qZ2
             VADD    qY1,qZ1,qZ3
-            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
 
             VSUB    qY3,qZ1,qZ3
 
             @// finish second stage of 4 point FFT
 
             VSUB    qZ0,qY2,qY1
             VADD    qZ2,qY2,qY1
-            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
 
 
             .ifeqs "\inverse", "TRUE"
 
                 VADD    dZr3,dYr0,dYi3                          @// y3 = u0-ju3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VSUB    dZi3,dYi0,dYr3
 
                 VSUB    dZr1,dYr0,dYi3                          @// y1 = u0+ju3
                 VADD    dZi1,dYi0,dYr3
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
-                VST2    {dZr1,dZi1},[pDst :128],dstStep              @// dstStep = -3*outPointStep + 16
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],dstStep              @// dstStep = -3*outPointStep + 16
 
             .ELSE
 
                 VSUB    dZr1,dYr0,dYi3                          @// y1 = u0+ju3
                 VADD    dZi1,dYi0,dYr3
 
                 VADD    dZr3,dYr0,dYi3                          @// y3 = u0-ju3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VSUB    dZi3,dYi0,dYr3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
-                VST2    {dZr3,dZi3},[pDst :128],dstStep              @// dstStep = -3*outPointStep + 16
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep              @// dstStep = -3*outPointStep + 16
 
             .ENDIF
 
 
 
 
         .ENDIF
 
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
@@ -145,40 +145,40 @@
         MOV     stepTwiddle,#0
         SMULBB  outPointStep,grpCount,subFFTNum
 
         @// pT0+1 increments pT0 by 4 bytes
         @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
 
         LSL     pointStep,subFFTNum,#2                      @// 2*grpSize
 
-        VLD1     dW1,[pTwiddle :64]                             @//[wi | wr]
+        VLD1     dW1,[pTwiddle, :64]                             @//[wi | wr]
         MOV     srcStep,pointStep,LSL #1                    @// srcStep = 2*pointStep
-        VLD1     dW2,[pTwiddle :64]                             @//[wi | wr]
+        VLD1     dW2,[pTwiddle, :64]                             @//[wi | wr]
         ADD     setStep,srcStep,pointStep                   @// setStep = 3*pointStep
         SUB     srcStep,srcStep,#16                         @// srcStep = 2*pointStep-16
-        VLD1     dW3,[pTwiddle :64]
+        VLD1     dW3,[pTwiddle, :64]
         @//RSB     setStep,setStep,#16                      @// setStep = - 3*pointStep+16
         RSB     setStep,setStep,#0                          @// setStep = - 3*pointStep
 
         MOV     dstStep,outPointStep,LSL #1
         ADD     dstStep,dstStep,outPointStep                @// dstStep = 3*outPointStep
         RSB     dstStep,dstStep,#16                         @// dstStep = - 3*outPointStep+16
 
 
 
 grpLoop\name:
 
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0]
         ADD      stepTwiddle,stepTwiddle,pointStep
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
         ADD      pTwiddle,pTwiddle,stepTwiddle               @// set pTwiddle to the first point
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
         MOV      twStep,stepTwiddle,LSL #2
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & reset pSrc
+        VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3] & reset pSrc
 
         SUB      twStep,stepTwiddle,twStep                   @// twStep = -3*stepTwiddle
 
 
         MOV      setCount,pointStep,LSR #2
         ADD     pSrc,pSrc,#16                         @// set pSrc to data[0] of the next set
         ADD     pSrc,pSrc,pointStep                   @// increment to data[1] of the next set
 
@@ -197,17 +197,17 @@ setLoop\name:
         .ELSE
             VMULL   qT0,dXr1,dW1[0]
             VMLSL   qT0,dXi1,dW1[1]                       @// real part
             VMULL   qT1,dXi1,dW1[0]
             VMLAL   qT1,dXr1,dW1[1]                       @// imag part
 
         .ENDIF
 
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT2,dXr2,dW2[0]
             VMLAL   qT2,dXi2,dW2[1]                       @// real part
             VMULL   qT3,dXi2,dW2[0]
             VMLSL   qT3,dXr2,dW2[1]                       @// imag part
 
         .ELSE
@@ -217,17 +217,17 @@ setLoop\name:
             VMLAL   qT3,dXr2,dW2[1]                       @// imag part
 
         .ENDIF
 
         VRSHRN  dZr1,qT0,#15
         VRSHRN  dZi1,qT1,#15
 
 
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dXr3,dW3[0]
             VMLAL   qT0,dXi3,dW3[1]                       @// real part
             VMULL   qT1,dXi3,dW3[0]
             VMLSL   qT1,dXr3,dW3[1]                       @// imag part
 
         .ELSE
@@ -239,64 +239,64 @@ setLoop\name:
         .ENDIF
 
         VRSHRN  dZr2,qT2,#15
         VRSHRN  dZi2,qT3,#15
 
 
         VRSHRN  dZr3,qT0,#15
         VRSHRN  dZi3,qT1,#15
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
+        VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3] & update pSrc for the next set
 
 
         .ifeqs "\scaled", "TRUE"
 
             @// finish first stage of 4 point FFT
             VHADD    qY0,qX0,qZ2
             VHSUB    qY2,qX0,qZ2
 
-            VLD2    {dXr0,dXi0},[pSrc :128]!          @//  data[0]
+            VLD2    {dXr0,dXi0},[pSrc, :128]!          @//  data[0]
             VHADD    qY1,qZ1,qZ3
             VHSUB    qY3,qZ1,qZ3
 
 
             @// finish second stage of 4 point FFT
 
             .ifeqs  "\inverse", "TRUE"
 
                 VHSUB    qZ0,qY2,qY1
 
                 VHADD    dZr2,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHSUB    dZi2,dYi0,dYr3
 
                 VHADD    qZ1,qY2,qY1
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
 
                 VHSUB    dZr3,dYr0,dYi3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VHADD    dZi3,dYi0,dYr3
-                VST2    {dZr3,dZi3},[pDst :128],dstStep
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep
 
 
             .ELSE
 
                 VHSUB    qZ0,qY2,qY1
 
                 VHSUB    dZr3,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHADD    dZi3,dYi0,dYr3
 
                 VHADD    qZ1,qY2,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
 
                 VHADD    dZr2,dYr0,dYi3
                 VHSUB    dZi2,dYi0,dYr3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
-                VST2    {dZr2,dZi2},[pDst :128],dstStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],dstStep
 
 
             .ENDIF
 
 
         .ELSE
 
             @// finish first stage of 4 point FFT
@@ -311,59 +311,59 @@ setLoop\name:
             @// finish second stage of 4 point FFT
 
 
             .ifeqs  "\inverse", "TRUE"
 
                 VSUB    qZ0,qY2,qY1
 
                 VADD    dZr2,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VSUB    dZi2,dYi0,dYr3
 
                 VADD    qZ1,qY2,qY1
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
 
                 VSUB    dZr3,dYr0,dYi3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VADD    dZi3,dYi0,dYr3
-                VST2    {dZr3,dZi3},[pDst :128],dstStep
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep
 
 
             .ELSE
 
                 VSUB    qZ0,qY2,qY1
 
                 VSUB    dZr3,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VADD    dZi3,dYi0,dYr3
 
                 VADD    qZ1,qY2,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
 
                 VADD    dZr2,dYr0,dYi3
                 VSUB    dZi2,dYi0,dYr3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
-                VST2    {dZr2,dZi2},[pDst :128],dstStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],dstStep
 
 
             .ENDIF
 
 
 
         .ENDIF
 
         ADD     pSrc,pSrc,pointStep                         @// increment to data[1] of the next set
         BGT     setLoop\name
 
-        VLD1     dW1,[pTwiddle :64],stepTwiddle                 @//[wi | wr]
+        VLD1     dW1,[pTwiddle, :64],stepTwiddle                 @//[wi | wr]
         SUBS    grpCount,grpCount,#4                        @// subtract 4 since grpCount multiplied by 4
-        VLD1     dW2,[pTwiddle :64],stepTwiddle                 @//[wi | wr]
+        VLD1     dW2,[pTwiddle, :64],stepTwiddle                 @//[wi | wr]
         ADD     pSrc,pSrc,srcStep                           @// increment pSrc for the next grp
-        VLD1     dW3,[pTwiddle :64],twStep                      @//[wi | wr]
+        VLD1     dW3,[pTwiddle, :64],twStep                      @//[wi | wr]
 
 
 
         BGT     grpLoop\name
 
 
         @// Reset and Swap pSrc and pDst for the next stage
         MOV     t1,pDst
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
@@ -213,32 +213,32 @@
         @// pT0+pointStep = increment of 4*pointStep bytes = grpSize/2 bytes
         @// Note: outPointStep = pointStep for firststage
 
         MOV     pointStep,grpSize,LSL #2
 
 
         @// Calculate the step of input data for the next set
         @//MOV     step1,pointStep,LSL #1                      @// step1 = 2*pointStep
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0]
         MOV     step1,grpSize,LSL #3
 
         MOV     step2,pointStep,LSL #3
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
         SUB     step2,step2,pointStep                          @// step2 = 7*pointStep
         RSB     setStep,step2,#16                              @// setStep = - 7*pointStep+16
 
 
 
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
-        VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
-        VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
-        VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
-        VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
-        VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7] & update pSrc for the next set
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
+        VLD2    {dXr3,dXi3},[pSrc, :128],pointStep          @//  data[3]
+        VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
+        VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
+        VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
+        VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7] & update pSrc for the next set
                                                       @//  setStep = -7*pointStep + 16
         @// grp = 0 a special case since all the twiddle factors are 1
         @// Loop on the sets : 4 sets at a time
 
 grpZeroSetLoop\name:
 
         @// Decrement setcount
         SUBS    setCount,setCount,#4                    @// decrement the set loop counter
@@ -258,157 +258,157 @@ grpZeroSetLoop\name:
             VHSUB    qV2,qU0,qU4
             VHADD    qV4,qU2,qU6
             VHSUB    qV6,qU2,qU6
 
             @// finish third stage of 8 point FFT
 
             VHADD    qY0,qV0,qV4
             VHSUB    qY4,qV0,qV4
-            VST2    {dYr0,dYi0},[pDst :128],step1                    @// store y0
+            VST2    {dYr0,dYi0},[pDst, :128],step1                    @// store y0
 
             .ifeqs  "\inverse", "TRUE"
 
                 VHSUB    dYr2,dVr2,dVi6
                 VHADD    dYi2,dVi2,dVr6
 
                 VHADD    dYr6,dVr2,dVi6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y2
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y2
                 VHSUB    dYi6,dVi2,dVr6
 
                 VHSUB    qU1,qX0,qX4
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
 
                 VHSUB    qU3,qX1,qX5
                 VHSUB    qU5,qX2,qX6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y6
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y6
 
             .ELSE
 
                 VHADD    dYr6,dVr2,dVi6
                 VHSUB    dYi6,dVi2,dVr6
 
                 VHSUB    dYr2,dVr2,dVi6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y2
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y2
                 VHADD    dYi2,dVi2,dVr6
 
 
                 VHSUB    qU1,qX0,qX4
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
                 VHSUB    qU3,qX1,qX5
                 VHSUB    qU5,qX2,qX6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y6
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y6
 
 
             .ENDIF
 
             @// finish first stage of 8 point FFT
 
             VHSUB    qU7,qX3,qX7
             VMOV    dT0[0],t0
 
             @// finish second stage of 8 point FFT
 
             VHSUB    dVr1,dUr1,dUi5
-            VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0] for next iteration
+            VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0] for next iteration
             VHADD    dVi1,dUi1,dUr5
             VHADD    dVr3,dUr1,dUi5
-            VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
             VHSUB    dVi3,dUi1,dUr5
 
             VHSUB    dVr5,dUr3,dUi7
-            VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+            VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
             VHADD    dVi5,dUi3,dUr7
             VHADD    dVr7,dUr3,dUi7
-            VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
+            VLD2    {dXr3,dXi3},[pSrc, :128],pointStep          @//  data[3]
             VHSUB    dVi7,dUi3,dUr7
 
             @// finish third stage of 8 point FFT
 
             .ifeqs  "\inverse", "TRUE"
 
                 @// calculate a*v5
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi5,dVi5,dT0[0]
 
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
 
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
                 VQRDMULH    dVi7,dVi7,dT0[0]
 
                 VHADD    qY1,qV1,qV5
                 VHSUB    qY5,qV1,qV5
 
 
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]
 
 
                 VHSUB    dYr3,dVr3,dVr7
                 VHSUB    dYi3,dVi3,dVi7
-                VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
+                VST2    {dYr1,dYi1},[pDst, :128],step1                    @// store y1
                 VHADD    dYr7,dVr3,dVr7
                 VHADD    dYi7,dVi3,dVi7
 
 
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y3
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y5
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y3
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y5
 #if 0
-                VST2    {dYr7,dYi7},[pDst :128],#16                      @// store y7
+                VST2    {dYr7,dYi7},[pDst, :128],#16                      @// store y7
 #else
-                VST2    {dYr7,dYi7},[pDst :128]!                      @// store y7
+                VST2    {dYr7,dYi7},[pDst, :128]!                      @// store y7
 #endif
             .ELSE
 
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi7,dVi7,dT0[0]
 
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
 
                 @// calculate a*v5
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
                 VQRDMULH    dVi5,dVi5,dT0[0]
 
                 VHADD    dYr7,dVr3,dVr7
                 VHADD    dYi7,dVi3,dVi7
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]
 
                 VHSUB    qY5,qV1,qV5
 
                 VHSUB    dYr3,dVr3,dVr7
-                VST2    {dYr7,dYi7},[pDst :128],step1                    @// store y1
+                VST2    {dYr7,dYi7},[pDst, :128],step1                    @// store y1
                 VHSUB    dYi3,dVi3,dVi7
                 VHADD    qY1,qV1,qV5
 
 
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y3
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y5
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y3
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y5
 #if 0
-                VST2    {dYr1,dYi1},[pDst :128],#16                      @// store y7
+                VST2    {dYr1,dYi1},[pDst, :128],#16                      @// store y7
 #else
-                VST2    {dYr1,dYi1},[pDst :128]!                      @// store y7
+                VST2    {dYr1,dYi1},[pDst, :128]!                      @// store y7
 #endif
 
             .ENDIF
 
 
 
         .ELSE
             @// finish first stage of 8 point FFT
@@ -424,157 +424,157 @@ grpZeroSetLoop\name:
             VSUB    qV2,qU0,qU4
             VADD    qV4,qU2,qU6
             VSUB    qV6,qU2,qU6
 
             @// finish third stage of 8 point FFT
 
             VADD    qY0,qV0,qV4
             VSUB    qY4,qV0,qV4
-            VST2    {dYr0,dYi0},[pDst :128],step1                    @// store y0
+            VST2    {dYr0,dYi0},[pDst, :128],step1                    @// store y0
 
             .ifeqs  "\inverse", "TRUE"
 
                 VSUB    dYr2,dVr2,dVi6
                 VADD    dYi2,dVi2,dVr6
 
                 VADD    dYr6,dVr2,dVi6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y2
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y2
                 VSUB    dYi6,dVi2,dVr6
 
                 VSUB    qU1,qX0,qX4
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
 
                 VSUB    qU3,qX1,qX5
                 VSUB    qU5,qX2,qX6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y6
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y6
 
             .ELSE
 
                 VADD    dYr6,dVr2,dVi6
                 VSUB    dYi6,dVi2,dVr6
 
                 VSUB    dYr2,dVr2,dVi6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y2
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y2
                 VADD    dYi2,dVi2,dVr6
 
 
                 VSUB    qU1,qX0,qX4
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
                 VSUB    qU3,qX1,qX5
                 VSUB    qU5,qX2,qX6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y6
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y6
 
 
             .ENDIF
 
             @// finish first stage of 8 point FFT
 
             VSUB    qU7,qX3,qX7
             VMOV    dT0[0],t0
 
             @// finish second stage of 8 point FFT
 
             VSUB    dVr1,dUr1,dUi5
-            VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0] for next iteration
+            VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0] for next iteration
             VADD    dVi1,dUi1,dUr5
             VADD    dVr3,dUr1,dUi5
-            VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
             VSUB    dVi3,dUi1,dUr5
 
             VSUB    dVr5,dUr3,dUi7
-            VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+            VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
             VADD    dVi5,dUi3,dUr7
             VADD    dVr7,dUr3,dUi7
-            VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
+            VLD2    {dXr3,dXi3},[pSrc, :128],pointStep          @//  data[3]
             VSUB    dVi7,dUi3,dUr7
 
             @// finish third stage of 8 point FFT
 
             .ifeqs  "\inverse", "TRUE"
 
                 @// calculate a*v5
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi5,dVi5,dT0[0]
 
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
 
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
                 VQRDMULH    dVi7,dVi7,dT0[0]
 
                 VADD    qY1,qV1,qV5
                 VSUB    qY5,qV1,qV5
 
 
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]
 
 
                 VSUB    dYr3,dVr3,dVr7
                 VSUB    dYi3,dVi3,dVi7
-                VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
+                VST2    {dYr1,dYi1},[pDst, :128],step1                    @// store y1
                 VADD    dYr7,dVr3,dVr7
                 VADD    dYi7,dVi3,dVi7
 
 
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y3
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y5
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y3
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y5
 #if 0
-                VST2    {dYr7,dYi7},[pDst :128],#16                      @// store y7
+                VST2    {dYr7,dYi7},[pDst, :128],#16                      @// store y7
 #else
-                VST2    {dYr7,dYi7},[pDst :128]!                      @// store y7
+                VST2    {dYr7,dYi7},[pDst, :128]!                      @// store y7
 #endif
             .ELSE
 
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi7,dVi7,dT0[0]
 
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
 
                 @// calculate a*v5
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
                 VQRDMULH    dVi5,dVi5,dT0[0]
 
                 VADD    dYr7,dVr3,dVr7
                 VADD    dYi7,dVi3,dVi7
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]
 
                 VSUB    qY5,qV1,qV5
 
                 VSUB    dYr3,dVr3,dVr7
-                VST2    {dYr7,dYi7},[pDst :128],step1                    @// store y1
+                VST2    {dYr7,dYi7},[pDst, :128],step1                    @// store y1
                 VSUB    dYi3,dVi3,dVi7
                 VADD    qY1,qV1,qV5
 
 
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y3
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y5
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y3
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y5
 #if 0
-                VST2    {dYr1,dYi1},[pDst :128],#16                      @// store y7
+                VST2    {dYr1,dYi1},[pDst, :128],#16                      @// store y7
 #else
-                VST2    {dYr1,dYi1},[pDst :128]!                      @// store y7
+                VST2    {dYr1,dYi1},[pDst, :128]!                      @// store y7
 #endif
 
             .ENDIF
 
 
         .ENDIF
 
         SUB     pDst, pDst, step2                               @// update pDst for the next set
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
@@ -95,19 +95,19 @@
         MOV     subFFTSize,grpCount
                                
         RSB      dstStep,outPointStep,#16
         
         
         @// Loop on 2 grps at a time for the last stage
 
 grpLoop\name :	
-        VLD2    {dWr,dWi},[pTwiddle :64]!
+        VLD2    {dWr,dWi},[pTwiddle, :64]!
         
-        VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
+        VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc, :128]!
         SUBS    grpCount,grpCount,#4                   @// grpCount is multiplied by 2 
         
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dWr,dXr1
             VMLAL   qT0,dWi,dXi1                       @// real part
             VMULL   qT1,dWr,dXi1
             VMLSL   qT1,dWi,dXr1                       @// imag part
                 
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
@@ -121,33 +121,33 @@
         @// pT0+1 increments pT0 by 8 bytes
         @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
         @// Note: outPointStep = pointStep for firststage
         
         MOV     pointStep,subFFTNum,LSL #1
         
         
         @// Update pSubFFTSize and pSubFFTNum regs
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0]
         MOV     subFFTSize,#4                                 @// subFFTSize = 1 for the first stage
         
         @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
         LSR     grpSize,subFFTNum,#2
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]  
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]  
         MOV     subFFTNum,grpSize
         
                                        
         @// Calculate the step of input data for the next set
         @//MOV     setStep,pointStep,LSL #1
         MOV     setStep,grpSize,LSL #4
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
         ADD     setStep,setStep,pointStep                   @// setStep = 3*pointStep
         RSB     setStep,setStep,#16                         @// setStep = - 3*pointStep+16
         
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
+        VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3] & update pSrc for the next set
         MOV     step1,pointStep,LSL #1                      @// step1 = 2*pointStep
         
         .ifeqs "\scaled", "TRUE"
             VHADD    qY0,qX0,qX2
         .else
             VADD    qY0,qX0,qX2
         .endif
             
@@ -164,128 +164,128 @@ grpZeroSetLoop\name :
         SUBS    setCount,setCount,#2                    @// decrement the set loop counter           
         
         .ifeqs "\scaled", "TRUE" 
         
             @// finish first stage of 4 point FFT 
                         
             VHSUB    qY2,qX0,qX2
             
-            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
+            VLD2    {dXr0,dXi0},[pSrc, :128],step1          @//  data[0]
             VHADD    qY1,qX1,qX3
-            VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
+            VLD2    {dXr2,dXi2},[pSrc, :128],step3          @//  data[2]
             VHSUB    qY3,qX1,qX3
             
                        
             @// finish second stage of 4 point FFT 
                                                 
             .ifeqs "\inverse", "TRUE"
                    
-                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+                VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
                 VHADD    qZ0,qY0,qY1
             
-                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set    
+                VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3] & update pSrc for the next set    
                 VHSUB    dZr3,dYr2,dYi3
                 
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHADD    dZi3,dYi2,dYr3
                 
                 VHSUB    qZ1,qY0,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
                 
                 VHADD    dZr2,dYr2,dYi3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VHSUB    dZi2,dYi2,dYr3
                 
                 VHADD    qY0,qX0,qX2                     @// u0 for next iteration
-                VST2    {dZr2,dZi2},[pDst :128],setStep
+                VST2    {dZr2,dZi2},[pDst, :128],setStep
                 
                 
             .else
                 
-                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+                VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
                 VHADD    qZ0,qY0,qY1
             
-                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
+                VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3] & update pSrc for the next set
                 VHADD    dZr2,dYr2,dYi3
             
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHSUB    dZi2,dYi2,dYr3
             
                 VHSUB    qZ1,qY0,qY1
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
             
                 VHSUB    dZr3,dYr2,dYi3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VHADD    dZi3,dYi2,dYr3
             
                 VHADD    qY0,qX0,qX2                     @// u0 for next iteration
-                VST2    {dZr3,dZi3},[pDst :128],setStep
+                VST2    {dZr3,dZi3},[pDst, :128],setStep
             
             .endif
             
         
         
         .else
         
             @// finish first stage of 4 point FFT 
             
             
             VSUB    qY2,qX0,qX2
             
-            VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
+            VLD2    {dXr0,dXi0},[pSrc, :128],step1          @//  data[0]
             VADD    qY1,qX1,qX3
-            VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
+            VLD2    {dXr2,dXi2},[pSrc, :128],step3          @//  data[2]
             VSUB    qY3,qX1,qX3
             
                        
             @// finish second stage of 4 point FFT 
                                                 
             .ifeqs "\inverse", "TRUE" 
                    
-                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+                VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
                 VADD    qZ0,qY0,qY1
             
-                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set    
+                VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3] & update pSrc for the next set    
                 VSUB    dZr3,dYr2,dYi3
                 
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VADD    dZi3,dYi2,dYr3
                 
                 VSUB    qZ1,qY0,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
                 
                 VADD    dZr2,dYr2,dYi3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VSUB    dZi2,dYi2,dYr3
                 
                 VADD    qY0,qX0,qX2                     @// u0 for next iteration
-                VST2    {dZr2,dZi2},[pDst :128],setStep
+                VST2    {dZr2,dZi2},[pDst, :128],setStep
                 
                 
             .else
                 
-                VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
+                VLD2    {dXr1,dXi1},[pSrc, :128],step1          @//  data[1]
                 VADD    qZ0,qY0,qY1
             
-                VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
+                VLD2    {dXr3,dXi3},[pSrc, :128],setStep            @//  data[3] & update pSrc for the next set
                 VADD    dZr2,dYr2,dYi3
             
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VSUB    dZi2,dYi2,dYr3
             
                 VSUB    qZ1,qY0,qY1
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
             
                 VSUB    dZr3,dYr2,dYi3
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
                 VADD    dZi3,dYi2,dYr3
             
                 VADD    qY0,qX0,qX2                     @// u0 for next iteration
-                VST2    {dZr3,dZi3},[pDst :128],setStep
+                VST2    {dZr3,dZi3},[pDst, :128],setStep
             
             .endif
             
         .endif
         
         BGT     grpZeroSetLoop\name
         
         @// reset pSrc to pDst for the next stage
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S
@@ -142,40 +142,40 @@
         
         
         @// pOut0+1 increments pOut0 by 8 bytes
         @// pOut0+outPointStep == increment of 8*outPointStep bytes 
         MOV     outPointStep,subFFTSize,LSL #3
         
         @// Update grpCount and grpSize rightaway 
         
-        VLD2    {dW1r,dW1i},[pTwiddle :128]                          @// [wi|wr]
+        VLD2    {dW1r,dW1i},[pTwiddle, :128]                          @// [wi|wr]
         MOV     step16,#16
         LSL     grpCount,subFFTSize,#2
         
-        VLD1    dW2r,[pTwiddle :64]                             @// [wi|wr]
+        VLD1    dW2r,[pTwiddle, :64]                             @// [wi|wr]
         MOV     subFFTNum,#1                            @//after the last stage
         
-        VLD1    dW3r,[pTwiddle :64],step16                     @// [wi|wr]
+        VLD1    dW3r,[pTwiddle, :64],step16                     @// [wi|wr]
         MOV     stepTwiddle,#0
         
-        VLD1    dW2i,[pTwiddle :64]!                            @// [wi|wr]
+        VLD1    dW2i,[pTwiddle, :64]!                            @// [wi|wr]
         SUB     grpTwStep,stepTwiddle,#8                    @// grpTwStep = -8 to start with       
         
         @// update subFFTSize for the next stage
         MOV     subFFTSize,grpCount
-        VLD1    dW3i,[pTwiddle :64],grpTwStep                           @// [wi|wr]
+        VLD1    dW3i,[pTwiddle, :64],grpTwStep                           @// [wi|wr]
         MOV     dstStep,outPointStep,LSL #1
         
-        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
         ADD     dstStep,dstStep,outPointStep                @// dstStep = 3*outPointStep
         RSB     dstStep,dstStep,#16                         @// dstStep = - 3*outPointStep+16
         MOV     step24,#24 
 
-        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
         
 
         @// Process two groups at a time
         
 grpLoop\name :	
         
         VZIP    dW2r,dW2i
         ADD     stepTwiddle,stepTwiddle,#16                 @// increment for the next iteration
@@ -204,68 +204,68 @@ grpLoop\name :
         
             VMULL   qT0,dW1r,dXr1
             VMLSL   qT0,dW1i,dXi1                       @// real part
             VMULL   qT1,dW1r,dXi1
             VMLAL   qT1,dW1i,dXr1                       @// imag part
                     
         .endif
         
-        VLD2    {dW1r,dW1i},[pTwiddle :128],stepTwiddle      @// [wi|wr]
+        VLD2    {dW1r,dW1i},[pTwiddle, :128],stepTwiddle      @// [wi|wr]
         
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT2,dW2r,dXr2
             VMLAL   qT2,dW2i,dXi2                       @// real part
             VMULL   qT3,dW2r,dXi2
-            VLD1    dW2r,[pTwiddle :64],step16                  @// [wi|wr]
+            VLD1    dW2r,[pTwiddle, :64],step16                  @// [wi|wr]
             VMLSL   qT3,dW2i,dXr2                       @// imag part
                 
         .else
         
             VMULL   qT2,dW2r,dXr2
             VMLSL   qT2,dW2i,dXi2                       @// real part
             VMULL   qT3,dW2r,dXi2
-            VLD1    dW2r,[pTwiddle :64],step16                  @// [wi|wr]
+            VLD1    dW2r,[pTwiddle, :64],step16                  @// [wi|wr]
             VMLAL   qT3,dW2i,dXr2                       @// imag part
                     
         .endif
         
         
         VRSHRN  dZr1,qT0,#31
-        VLD1    dW2i,[pTwiddle :64],twStep                  @// [wi|wr] 
+        VLD1    dW2i,[pTwiddle, :64],twStep                  @// [wi|wr] 
         VRSHRN  dZi1,qT1,#31
         
         VMOV     qZ0,qX0                                @// move qX0 so as to load for the next iteration
-        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
         
                 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT4,dW3r,dXr3
             VMLAL   qT4,dW3i,dXi3                       @// real part
             VMULL   qT5,dW3r,dXi3
-            VLD1    dW3r,[pTwiddle :64],step24
+            VLD1    dW3r,[pTwiddle, :64],step24
             VMLSL   qT5,dW3i,dXr3                       @// imag part
                 
         .else
         
             VMULL   qT4,dW3r,dXr3
             VMLSL   qT4,dW3i,dXi3                       @// real part
             VMULL   qT5,dW3r,dXi3
-            VLD1    dW3r,[pTwiddle :64],step24
+            VLD1    dW3r,[pTwiddle, :64],step24
             VMLAL   qT5,dW3i,dXr3                       @// imag part
                     
         .endif
         
         VRSHRN  dZr2,qT2,#31
-        VLD1    dW3i,[pTwiddle :64],grpTwStep                           @// [wi|wr]
+        VLD1    dW3i,[pTwiddle, :64],grpTwStep                           @// [wi|wr]
         VRSHRN  dZi2,qT3,#31
         
         VRSHRN  dZr3,qT4,#31
         VRSHRN  dZi3,qT5,#31
-        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc, :256]! @// AC.r AC.i BD.r BD.i
         
                 
         .ifeqs "\scaled", "TRUE"
         
             @// finish first stage of 4 point FFT 
             
             VHADD    qY0,qZ0,qZ2
             VHSUB    qY2,qZ0,qZ2
@@ -275,45 +275,45 @@ grpLoop\name :
                         
             @// finish second stage of 4 point FFT 
             
             .ifeqs  "\inverse", "TRUE"
 
                 VHSUB    qZ0,qY2,qY1
             
                 VHADD    dZr3,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHSUB    dZi3,dYi0,dYr3
                                 
                 VHADD    qZ2,qY2,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
             
                 VHSUB    dZr1,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VHADD    dZi1,dYi0,dYr3
                 
-                VST2    {dZr1,dZi1},[pDst :128],dstStep              @// dstStep = -outPointStep + 16
+                VST2    {dZr1,dZi1},[pDst, :128],dstStep              @// dstStep = -outPointStep + 16
             
                                 
             .else
                 
                 VHSUB    qZ0,qY2,qY1
             
                 VHSUB    dZr1,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHADD    dZi1,dYi0,dYr3
             
                 VHADD    qZ2,qY2,qY1
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
             
                 VHADD    dZr3,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VHSUB    dZi3,dYi0,dYr3
                 
-                VST2    {dZr3,dZi3},[pDst :128],dstStep              @// dstStep = -outPointStep + 16
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep              @// dstStep = -outPointStep + 16
 
             
             .endif
             
         
         
         .else
         
@@ -327,45 +327,45 @@ grpLoop\name :
                         
             @// finish second stage of 4 point FFT 
             
             .ifeqs  "\inverse", "TRUE"
 
                 VSUB    qZ0,qY2,qY1
             
                 VADD    dZr3,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VSUB    dZi3,dYi0,dYr3
                                 
                 VADD    qZ2,qY2,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
             
                 VSUB    dZr1,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VADD    dZi1,dYi0,dYr3
                 
-                VST2    {dZr1,dZi1},[pDst :128],dstStep              @// dstStep = -outPointStep + 16
+                VST2    {dZr1,dZi1},[pDst, :128],dstStep              @// dstStep = -outPointStep + 16
             
                                 
             .else
                 
                 VSUB    qZ0,qY2,qY1
             
                 VSUB    dZr1,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VADD    dZi1,dYi0,dYr3
             
                 VADD    qZ2,qY2,qY1
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
             
                 VADD    dZr3,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VSUB    dZi3,dYi0,dYr3
                 
-                VST2    {dZr3,dZi3},[pDst :128],dstStep              @// dstStep = -outPointStep + 16
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep              @// dstStep = -outPointStep + 16
 
             
             .endif
             
         .endif
         
         BGT     grpLoop\name
            
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
@@ -263,108 +263,108 @@ setLoop\name :
             @// finish second stage of 4 point FFT 
             
             VHSUB    qZ0,qY2,qY1
             
             
             .ifeqs  "\inverse", "TRUE"
                 
                 VHADD    dZr3,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHSUB    dZi3,dYi0,dYr3
                 
                 VHADD    qZ2,qY2,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
             
                 VHSUB    dZr1,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VHADD    dZi1,dYi0,dYr3
             
-                VST2    {dZr1,dZi1},[pDst :128],dstStep
+                VST2    {dZr1,dZi1},[pDst, :128],dstStep
                 
                 
             .else
                 
                 VHSUB    dZr1,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VHADD    dZi1,dYi0,dYr3
             
                 VHADD    qZ2,qY2,qY1
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
             
                 VHADD    dZr3,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VHSUB    dZi3,dYi0,dYr3
             
-                VST2    {dZr3,dZi3},[pDst :128],dstStep
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep
 
             
             .endif
         
         
         .else
         
             @// finish first stage of 4 point FFT 
             VADD    qY0,qX0,qZ2
             VSUB    qY2,qX0,qZ2
                         
-            VLD2    {dXr0,dXi0},[pSrc :128]!          @//  data[0] for next iteration
+            VLD2    {dXr0,dXi0},[pSrc, :128]!          @//  data[0] for next iteration
             VADD    qY1,qZ1,qZ3
             VSUB    qY3,qZ1,qZ3
             
             @// finish second stage of 4 point FFT 
             
             VSUB    qZ0,qY2,qY1
             
             
             .ifeqs  "\inverse", "TRUE"
                 
                 VADD    dZr3,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VSUB    dZi3,dYi0,dYr3
                 
                 VADD    qZ2,qY2,qY1
-                VST2    {dZr3,dZi3},[pDst :128],outPointStep
+                VST2    {dZr3,dZi3},[pDst, :128],outPointStep
             
                 VSUB    dZr1,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VADD    dZi1,dYi0,dYr3
             
-                VST2    {dZr1,dZi1},[pDst :128],dstStep
+                VST2    {dZr1,dZi1},[pDst, :128],dstStep
                 
                 
             .else
                 
                 VSUB    dZr1,dYr0,dYi3
-                VST2    {dZr0,dZi0},[pDst :128],outPointStep
+                VST2    {dZr0,dZi0},[pDst, :128],outPointStep
                 VADD    dZi1,dYi0,dYr3
             
                 VADD    qZ2,qY2,qY1
-                VST2    {dZr1,dZi1},[pDst :128],outPointStep
+                VST2    {dZr1,dZi1},[pDst, :128],outPointStep
             
                 VADD    dZr3,dYr0,dYi3
-                VST2    {dZr2,dZi2},[pDst :128],outPointStep
+                VST2    {dZr2,dZi2},[pDst, :128],outPointStep
                 VSUB    dZi3,dYi0,dYr3
             
-                VST2    {dZr3,dZi3},[pDst :128],dstStep
+                VST2    {dZr3,dZi3},[pDst, :128],dstStep
 
             
             .endif
             
         .endif
         
         ADD     pSrc,pSrc,pointStep                         @// increment to data[1] of the next set              
         BGT     setLoop\name
         
         
-        VLD1     dW1,[pTwiddle :64],stepTwiddle                  @//[wi | wr]
+        VLD1     dW1,[pTwiddle, :64],stepTwiddle                  @//[wi | wr]
         SUBS    grpCount,grpCount,#4                    @// subtract 4 since grpCount multiplied by 4               
-        VLD1     dW2,[pTwiddle :64],stepTwiddle                  @//[wi | wr]
+        VLD1     dW2,[pTwiddle, :64],stepTwiddle                  @//[wi | wr]
         ADD     pSrc,pSrc,srcStep                       @// increment pSrc for the next grp
-        VLD1     dW3,[pTwiddle :64],twStep                       @//[wi | wr]
+        VLD1     dW3,[pTwiddle, :64],twStep                       @//[wi | wr]
         BGT     grpLoop\name
 
                 
         @// Reset and Swap pSrc and pDst for the next stage
         MOV     t1,pDst
         SUB     pDst,pSrc,outPointStep,LSL #2                  @// pDst -= 2*size; pSrc -= 8*size bytes           
         SUB     pSrc,t1,outPointStep    
         
--- a/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S
+++ b/media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S
@@ -208,30 +208,30 @@
         @// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
         @// Note: outPointStep = pointStep for firststage
         
         MOV     pointStep,grpSize,LSL #3
         
                                        
         @// Calculate the step of input data for the next set
         @//MOV     step1,pointStep,LSL #1                      @// step1 = 2*pointStep
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0]
         MOV     step1,grpSize,LSL #4
         
         MOV     step2,pointStep,LSL #3
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+        VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
         SUB     step2,step2,pointStep                          @// step2 = 7*pointStep
         RSB     setStep,step2,#16                              @// setStep = - 7*pointStep+16
         
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
-        VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3] 
-        VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
-        VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
-        VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
-        VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7] & update pSrc for the next set
+        VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
+        VLD2    {dXr3,dXi3},[pSrc, :128],pointStep          @//  data[3] 
+        VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
+        VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
+        VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
+        VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7] & update pSrc for the next set
                                                       @//  setStep = -7*pointStep + 16  
         @// grp = 0 a special case since all the twiddle factors are 1
         @// Loop on the sets
 
 grpZeroSetLoop\name :	
                                                       
         @// Decrement setcount
         SUBS    setCount,setCount,#2                    @// decrement the set loop counter           
@@ -251,151 +251,151 @@ grpZeroSetLoop\name :
             VHSUB    qV2,qU0,qU4
             VHADD    qV4,qU2,qU6
             VHSUB    qV6,qU2,qU6
             
             @// finish third stage of 8 point FFT 
             
             VHADD    qY0,qV0,qV4
             VHSUB    qY4,qV0,qV4
-            VST2    {dYr0,dYi0},[pDst :128],step1                    @// store y0
+            VST2    {dYr0,dYi0},[pDst, :128],step1                    @// store y0
             
             .ifeqs	"\inverse", "TRUE"
                 
                 VHSUB    dYr2,dVr2,dVi6
                 VHADD    dYi2,dVi2,dVr6
                 
                 VHADD    dYr6,dVr2,dVi6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y2
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y2
                 VHSUB    dYi6,dVi2,dVr6
             
                 VHSUB    qU1,qX0,qX4                    
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
             
                 VHSUB    qU3,qX1,qX5
                 VHSUB    qU5,qX2,qX6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y6
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y6
             
             .ELSE
             
                 VHADD    dYr6,dVr2,dVi6
                 VHSUB    dYi6,dVi2,dVr6
                 
                 VHSUB    dYr2,dVr2,dVi6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y2
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y2
                 VHADD    dYi2,dVi2,dVr6
                 
                                 
                 VHSUB    qU1,qX0,qX4
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
                 VHSUB    qU3,qX1,qX5
                 VHSUB    qU5,qX2,qX6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y6
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y6
 
             
             .ENDIF
             
             @// finish first stage of 8 point FFT 
             
             VHSUB    qU7,qX3,qX7
             VMOV    dT0[0],t0                                   
             
             @// finish second stage of 8 point FFT 
             
             VHSUB    dVr1,dUr1,dUi5
-            VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0] for next iteration
+            VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0] for next iteration
             VHADD    dVi1,dUi1,dUr5
             VHADD    dVr3,dUr1,dUi5
-            VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
             VHSUB    dVi3,dUi1,dUr5
                         
             VHSUB    dVr5,dUr3,dUi7
-            VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+            VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
             VHADD    dVi5,dUi3,dUr7
             VHADD    dVr7,dUr3,dUi7
-            VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
+            VLD2    {dXr3,dXi3},[pSrc, :128],pointStep          @//  data[3]
             VHSUB    dVi7,dUi3,dUr7
             
             @// finish third stage of 8 point FFT 
             
             .ifeqs	"\inverse", "TRUE"
             
                 @// calculate a*v5 
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi5,dVi5,dT0[0]
                             
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
                 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
                 
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
                 VQRDMULH    dVi7,dVi7,dT0[0]
                 
                 VHADD    qY1,qV1,qV5
                 VHSUB    qY5,qV1,qV5
                 
                             
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
                 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]            
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]            
                 
                 
                 VHSUB    dYr3,dVr3,dVr7
                 VHSUB    dYi3,dVi3,dVi7
-                VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
+                VST2    {dYr1,dYi1},[pDst, :128],step1                    @// store y1
                 VHADD    dYr7,dVr3,dVr7
                 VHADD    dYi7,dVi3,dVi7
 
                 
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y3
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y5
-                VST2    {dYr7,dYi7},[pDst :128]!                      @// store y7
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y3
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y5
+                VST2    {dYr7,dYi7},[pDst, :128]!                      @// store y7
 
             .ELSE
             
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi7,dVi7,dT0[0]
                 
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
                 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
                 
                 @// calculate a*v5 
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
                 VQRDMULH    dVi5,dVi5,dT0[0]
 
                 VHADD    dYr7,dVr3,dVr7
                 VHADD    dYi7,dVi3,dVi7
                 SUB     pDst, pDst, step2                           @// set pDst to y1
             
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]            
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]            
                 
                 VHSUB    qY5,qV1,qV5
                 
                 VHSUB    dYr3,dVr3,dVr7
-                VST2    {dYr7,dYi7},[pDst :128],step1                    @// store y1
+                VST2    {dYr7,dYi7},[pDst, :128],step1                    @// store y1
                 VHSUB    dYi3,dVi3,dVi7
                 VHADD    qY1,qV1,qV5
                 
                 
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y3
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y5
-                VST2    {dYr1,dYi1},[pDst :128]!                      @// store y7
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y3
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y5
+                VST2    {dYr1,dYi1},[pDst, :128]!                      @// store y7
             
             .ENDIF
             
             
            
         .ELSE
             @// finish first stage of 8 point FFT 
             
@@ -410,151 +410,151 @@ grpZeroSetLoop\name :
             VSUB    qV2,qU0,qU4
             VADD    qV4,qU2,qU6
             VSUB    qV6,qU2,qU6
             
             @// finish third stage of 8 point FFT 
             
             VADD    qY0,qV0,qV4
             VSUB    qY4,qV0,qV4
-            VST2    {dYr0,dYi0},[pDst :128],step1                    @// store y0
+            VST2    {dYr0,dYi0},[pDst, :128],step1                    @// store y0
             
             .ifeqs	"\inverse", "TRUE"
                 
                 VSUB    dYr2,dVr2,dVi6
                 VADD    dYi2,dVi2,dVr6
                 
                 VADD    dYr6,dVr2,dVi6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y2
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y2
                 VSUB    dYi6,dVi2,dVr6
             
                 VSUB    qU1,qX0,qX4                    
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
             
                 VSUB    qU3,qX1,qX5
                 VSUB    qU5,qX2,qX6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y6
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y6
             
             .ELSE
             
                 VADD    dYr6,dVr2,dVi6
                 VSUB    dYi6,dVi2,dVr6
                 
                 VSUB    dYr2,dVr2,dVi6
-                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y2
+                VST2    {dYr6,dYi6},[pDst, :128],step1                    @// store y2
                 VADD    dYi2,dVi2,dVr6
                 
                                 
                 VSUB    qU1,qX0,qX4
-                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
+                VST2    {dYr4,dYi4},[pDst, :128],step1                    @// store y4
                 VSUB    qU3,qX1,qX5
                 VSUB    qU5,qX2,qX6
-                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y6
+                VST2    {dYr2,dYi2},[pDst, :128],step1                    @// store y6
 
             
             .ENDIF
             
             @// finish first stage of 8 point FFT 
             
             VSUB    qU7,qX3,qX7
             VMOV    dT0[0],t0                                   
             
             @// finish second stage of 8 point FFT 
             
             VSUB    dVr1,dUr1,dUi5
-            VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0] for next iteration
+            VLD2    {dXr0,dXi0},[pSrc, :128],pointStep          @//  data[0] for next iteration
             VADD    dVi1,dUi1,dUr5
             VADD    dVr3,dUr1,dUi5
-            VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
+            VLD2    {dXr1,dXi1},[pSrc, :128],pointStep          @//  data[1]
             VSUB    dVi3,dUi1,dUr5
                         
             VSUB    dVr5,dUr3,dUi7
-            VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+            VLD2    {dXr2,dXi2},[pSrc, :128],pointStep          @//  data[2]
             VADD    dVi5,dUi3,dUr7
             VADD    dVr7,dUr3,dUi7
-            VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
+            VLD2    {dXr3,dXi3},[pSrc, :128],pointStep          @//  data[3]
             VSUB    dVi7,dUi3,dUr7
             
             @// finish third stage of 8 point FFT 
             
             .ifeqs	"\inverse", "TRUE"
             
                 @// calculate a*v5 
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi5,dVi5,dT0[0]
                             
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
                 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
                 
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
                 VQRDMULH    dVi7,dVi7,dT0[0]
                 
                 VADD    qY1,qV1,qV5
                 VSUB    qY5,qV1,qV5
                 
                             
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
                 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]            
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]            
                 
                 
                 VSUB    dYr3,dVr3,dVr7
                 VSUB    dYi3,dVi3,dVi7
-                VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
+                VST2    {dYr1,dYi1},[pDst, :128],step1                    @// store y1
                 VADD    dYr7,dVr3,dVr7
                 VADD    dYi7,dVi3,dVi7
 
                 
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y3
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y5
-                VST2    {dYr7,dYi7},[pDst :128]!                      @// store y7
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y3
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y5
+                VST2    {dYr7,dYi7},[pDst, :128]!                      @// store y7
 
             .ELSE
             
                 @// calculate  b*v7
                 VQRDMULH    dT1,dVr7,dT0[0]
-                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
+                VLD2    {dXr4,dXi4},[pSrc, :128],pointStep          @//  data[4]
                 VQRDMULH    dVi7,dVi7,dT0[0]
                 
-                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
+                VLD2    {dXr5,dXi5},[pSrc, :128],pointStep          @//  data[5]
                 VADD    dVr7,dT1,dVi7                               @// b * V7
                 VSUB    dVi7,dVi7,dT1
                 
-                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
+                VLD2    {dXr6,dXi6},[pSrc, :128],pointStep          @//  data[6]
                 
                 @// calculate a*v5 
                 VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
                 VQRDMULH    dVi5,dVi5,dT0[0]
 
                 VADD    dYr7,dVr3,dVr7
                 VADD    dYi7,dVi3,dVi7
                 SUB     pDst, pDst, step2                           @// set pDst to y1
             
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]            
+                VLD2    {dXr7,dXi7},[pSrc, :128],setStep            @//  data[7]            
                 
                 VSUB    qY5,qV1,qV5
                 
                 VSUB    dYr3,dVr3,dVr7
-                VST2    {dYr7,dYi7},[pDst :128],step1                    @// store y1
+                VST2    {dYr7,dYi7},[pDst, :128],step1                    @// store y1
                 VSUB    dYi3,dVi3,dVi7
                 VADD    qY1,qV1,qV5
                 
                 
-                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y3
-                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y5
-                VST2    {dYr1,dYi1},[pDst :128]!                      @// store y7
+                VST2    {dYr5,dYi5},[pDst, :128],step1                    @// store y3
+                VST2    {dYr3,dYi3},[pDst, :128],step1                    @// store y5
+                VST2    {dYr1,dYi1},[pDst, :128]!                      @// store y7
             
             .ENDIF
             
             
         .ENDIF
         
         SUB     pDst, pDst, step2                               @// update pDst for the next set
         BGT     grpZeroSetLoop\name
--- a/media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
+++ b/media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
@@ -123,18 +123,16 @@
 #define dW1       d5.f32
 #define dY0       d10.f32
 #define dY1       d11.f32
 #define dY2       d12.f32
 #define dY3       d13.f32
 
 #define half      d0.f32
 
-HALF:   .float  0.5
-
     @// Allocate stack memory required by the function
 
     @// Write function header
         M_START     omxSP_FFTFwd_RToCCS_F32_Sfs,r11,d15
 
 @ Structure offsets for the FFTSpec
         .set    ARMsFFTSpec_N, 0
         .set    ARMsFFTSpec_pBitRev, 4
@@ -295,17 +293,17 @@ finalComplexToRealFixup:
         SUB     step1,step1,#8                    @// (N/4-1)*8 bytes
 
         @// F(k) = 1/2[Z(k) +  Z'(N/2-k)] -j*W^(k) [Z(k) -  Z'(N/2-k)]
         @// Note: W^k is stored as negative values in the table
         @// Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1)
         @// since both of them require Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
 
 
-        LDR     t0, =HALF
+        ADR     t0, HALF
         VLD1    half[0], [t0]
 
 evenOddButterflyLoop:
 
 
         VLD1    dW0r,[argTwiddle],step1
         VLD1    dW1r,[argTwiddle]!
 
@@ -397,10 +395,10 @@ lastElement:
         VST1    dX0r[1],[argDst]!
 
 End:
         @// Set return value
         MOV     result, #OMX_Sts_NoErr
 
         @// Write function tail
         M_END
-
+HALF:   .float  0.5
         .end
--- a/media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S
+++ b/media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S
@@ -193,20 +193,20 @@ FFTEnd:                                 
         VMOV    one, 1.0
         VDIV    one, one, fN            @ one = dScale[0] = 1 / fftSize
 
         @ Scale data, doing 2 complex values at a time (because N is
         @ always even).
 
         @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
 scaleFFTData:
-        VLD1    {qX0},[pSrc :128]            @// pSrc contains pDst pointer
+        VLD1    {qX0},[pSrc, :128]            @// pSrc contains pDst pointer
         SUBS    subFFTSize,subFFTSize,#2
         VMUL    qX0, qX0, dScale[0]
-        VST1    {qX0},[pSrc :128]!
+        VST1    {qX0},[pSrc, :128]!
 
         BGT     scaleFFTData
 End:
         @// Set return value
         MOV     result, #OMX_Sts_NoErr
 
         @// Write function tail
         M_END