Bug 979894. Add unwind information for pixman arm functions. r=jseward
authorJeff Muizelaar <jmuizelaar@mozilla.com>
Mon, 10 Mar 2014 12:31:58 -0400
changeset 191046 4535980dba452f5132a22661d153622eb448d5b5
parent 191045 232ae3020845218c2e4c642059d7713eea71b6f3
child 191047 d250a7f22fef74f2f713dcd11aac8b6c6ad14e4e
push id474
push userasasaki@mozilla.com
push dateMon, 02 Jun 2014 21:01:02 +0000
treeherdermozilla-release@967f4cf1b31c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjseward
bugs979894
milestone30.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 979894. Add unwind information for pixman arm functions. r=jseward This isn't technically correct because these functions will never call functions that throw exceptions, however it lets the profiler unwind them. The unwinding will also probably be wrong during prologue/epilogue. The right solution is probably to use cfi.
gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
gfx/cairo/libpixman/src/pixman-arm-simd-asm.h
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
@@ -949,16 +949,17 @@ generate_composite_function \
  * registers. Additionally, this function needs all the NEON registers,
  * so it has to save d8-d15 registers which are callee saved according
  * to ABI. These registers are restored from 'cleanup' macro. All the
  * other NEON registers are caller saved, so can be clobbered freely
  * without introducing any problems.
  */
 .macro pixman_composite_over_n_8_0565_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d11[0]}, [DUMMY]
     vdup.8      d8, d11[0]
     vdup.8      d9, d11[1]
     vdup.8      d10, d11[2]
     vdup.8      d11, d11[3]
 .endm
 
@@ -976,16 +977,17 @@ generate_composite_function \
     pixman_composite_over_8888_8_0565_process_pixblock_head, \
     pixman_composite_over_8888_8_0565_process_pixblock_tail, \
     pixman_composite_over_8888_8_0565_process_pixblock_tail_head
 
 /******************************************************************************/
 
 .macro pixman_composite_over_8888_n_0565_init
     add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d24[0]}, [DUMMY]
     vdup.8      d24, d24[3]
 .endm
 
 .macro pixman_composite_over_8888_n_0565_cleanup
     vpop        {d8-d15}
 .endm
@@ -1442,16 +1444,17 @@ generate_composite_function \
     vmull.u8    q8, d25, d4
     vmull.u8    q9, d25, d5
     vmull.u8    q10, d25, d6
     vmull.u8    q11, d25, d7
 .endm
 
 .macro pixman_composite_over_n_8_8888_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d11[0]}, [DUMMY]
     vdup.8      d8, d11[0]
     vdup.8      d9, d11[1]
     vdup.8      d10, d11[2]
     vdup.8      d11, d11[3]
 .endm
 
@@ -1513,16 +1516,17 @@ generate_composite_function \
     fetch_mask_pixblock
     cache_preload 32, 32
     vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
     pixman_composite_over_n_8_8_process_pixblock_head
 .endm
 
 .macro pixman_composite_over_n_8_8_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d8[0]}, [DUMMY]
     vdup.8      d8, d8[3]
 .endm
 
 .macro pixman_composite_over_n_8_8_cleanup
     vpop        {d8-d15}
 .endm
@@ -1614,16 +1618,17 @@ generate_composite_function \
         vqadd.u8    q15, q1, q15
     cache_preload 8, 8
     pixman_composite_over_n_8888_8888_ca_process_pixblock_head
     vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
 .macro pixman_composite_over_n_8888_8888_ca_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d11[0]}, [DUMMY]
     vdup.8      d8, d11[0]
     vdup.8      d9, d11[1]
     vdup.8      d10, d11[2]
     vdup.8      d11, d11[3]
 .endm
 
@@ -1783,16 +1788,17 @@ generate_composite_function \
             vmull.u8    q7,  d17, d25
             vmull.u8    q6,  d16, d24
             vmull.u8    q11, d18, d26
     vst1.16     {d28, d29}, [DST_W, :128]!
 .endm
 
 .macro pixman_composite_over_n_8888_0565_ca_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d11[0]}, [DUMMY]
     vdup.8      d8, d11[0]
     vdup.8      d9, d11[1]
     vdup.8      d10, d11[2]
     vdup.8      d11, d11[3]
 .endm
 
@@ -1896,16 +1902,17 @@ generate_composite_function \
     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
     fetch_mask_pixblock
     cache_preload 32, 32
     pixman_composite_add_n_8_8_process_pixblock_head
 .endm
 
 .macro pixman_composite_add_n_8_8_init
     add         DUMMY, sp, #ARGS_STACK_OFFSET
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d11[0]}, [DUMMY]
     vdup.8      d11, d11[3]
 .endm
 
 .macro pixman_composite_add_n_8_8_cleanup
     vpop        {d8-d15}
 .endm
@@ -2202,16 +2209,17 @@ generate_composite_function_single_scanl
     fetch_src_pixblock
     cache_preload 8, 8
     pixman_composite_over_8888_n_8888_process_pixblock_head
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
 .macro pixman_composite_over_8888_n_8888_init
     add         DUMMY, sp, #48
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d15[0]}, [DUMMY]
     vdup.8      d15, d15[3]
 .endm
 
 .macro pixman_composite_over_8888_n_8888_cleanup
     vpop        {d8-d15}
 .endm
@@ -2574,16 +2582,17 @@ generate_composite_function \
     10,  /* dst_r_basereg */ \
     8,  /* src_basereg   */ \
     15  /* mask_basereg  */
 
 /******************************************************************************/
 
 .macro pixman_composite_over_0565_n_0565_init
     add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+    .vsave      {d8-d15}
     vpush       {d8-d15}
     vld1.32     {d15[0]}, [DUMMY]
     vdup.8      d15, d15[3]
 .endm
 
 .macro pixman_composite_over_0565_n_0565_cleanup
     vpop        {d8-d15}
 .endm
@@ -3127,23 +3136,26 @@ pixman_asm_function fname
     WIDTH     .req      ip
     TMP1      .req      r3
     TMP2      .req      r4
     PF_OFFS   .req      r7
     TMP3      .req      r8
     TMP4      .req      r9
     STRIDE    .req      r2
 
+    .fnstart
     mov       ip, sp
+    .save     {r4, r5, r6, r7, r8, r9}
     push      {r4, r5, r6, r7, r8, r9}
     mov       PF_OFFS, #prefetch_distance
     ldmia     ip, {WB, X, UX, WIDTH}
     mul       PF_OFFS, PF_OFFS, UX
 
 .if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
+    .vsave    {d8-d15}
     vpush     {d8-d15}
 .endif
 
     sub       STRIDE, BOTTOM, TOP
     .unreq    BOTTOM
 
     cmp       WIDTH, #0
     ble       3f
@@ -3227,16 +3239,17 @@ 2:
     beq       3f
     bilinear_interpolate_last_pixel src_fmt, dst_fmt
 3:
 .if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
     vpop      {d8-d15}
 .endif
     pop       {r4, r5, r6, r7, r8, r9}
     bx        lr
+    .fnend
 
     .unreq    OUT
     .unreq    TOP
     .unreq    WT
     .unreq    WB
     .unreq    X
     .unreq    UX
     .unreq    WIDTH
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
@@ -634,16 +634,18 @@ 2:
     .func fname
     .global fname
     /* For ELF format also set function visibility to hidden */
 #ifdef __ELF__
     .hidden fname
     .type fname, %function
 #endif
 fname:
+    .fnstart
+    .save       {r4-r12, lr}
     push        {r4-r12, lr}        /* save all registers */
 
 /*
  * Select prefetch type for this function. If prefetch distance is
  * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
  * has to be used instead of ADVANCED.
  */
     .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
@@ -811,16 +813,17 @@ fname:
     PF mov      PF_DST, DST_R
     PF mov      PF_MASK, MASK
     /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
     PF mov      PF_CTL, H, lsl #4
     PF add      PF_CTL, #(prefetch_distance - 0x10)
 
     init
 .if regs_shortage
+    .save       {r0, r1}
     push        {r0, r1}
 .endif
     subs        H, H, #1
 .if regs_shortage
     str         H, [sp, #4] /* save updated height to stack */
 .else
     mov         ORIG_W, W
 .endif
@@ -896,16 +899,17 @@ 1:
                             process_pixblock_tail_head
     advance_to_next_scanline 8b
 9:
 .if regs_shortage
     pop         {r0, r1}
 .endif
     cleanup
     pop         {r4-r12, pc}  /* exit */
+    .fnend
 
     .purgem     fetch_src_pixblock
     .purgem     pixld_src
 
     .unreq      SRC
     .unreq      MASK
     .unreq      DST_R
     .unreq      DST_W
@@ -948,16 +952,17 @@ 9:
     .func fname
     .global fname
     /* For ELF format also set function visibility to hidden */
 #ifdef __ELF__
     .hidden fname
     .type fname, %function
 #endif
 fname:
+    .fnstart
     .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
 /*
  * Make some macro arguments globally visible and accessible
  * from other macros
  */
     .set src_bpp, src_bpp_
     .set mask_bpp, mask_bpp_
     .set dst_w_bpp, dst_w_bpp_
@@ -982,16 +987,17 @@ fname:
     DST_R       .req        r6
     SRC_WIDTH_FIXED .req        r7
 
     .macro pixld_src x:vararg
         pixld_s x
     .endm
 
     ldr         UNIT_X, [sp]
+    .save       {r4-r8, lr}
     push        {r4-r8, lr}
     ldr         SRC_WIDTH_FIXED, [sp, #(24 + 4)]
     .if mask_bpp != 0
     ldr         MASK, [sp, #(24 + 8)]
     .endif
 .else
     /*
      * Assign symbolic names to registers
@@ -1097,16 +1103,17 @@ 8:
     .unreq      DST_R
     .unreq      DST_W
     .unreq      W
 .endif
 
     .purgem     fetch_src_pixblock
     .purgem     pixld_src
 
+    .fnend
     .endfunc
 .endm
 
 .macro generate_composite_function_single_scanline x:vararg
     generate_composite_function_scanline 0, x
 .endm
 
 .macro generate_composite_function_nearest_scanline x:vararg
@@ -1123,16 +1130,17 @@ 8:
 
 /*
  * Prologue/epilogue variant which additionally saves/restores d8-d15
  * registers (they need to be saved/restored by callee according to ABI).
  * This is required if the code needs to use all the NEON registers.
  */
 
 .macro default_init_need_all_regs
+    .vsave      {d8-d15}
     vpush       {d8-d15}
 .endm
 
 .macro default_cleanup_need_all_regs
     vpop        {d8-d15}
 .endm
 
 /******************************************************************************/
--- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h
@@ -675,22 +675,25 @@ 142:
     WK0         .req    r8  /* pixel data registers */
     WK1         .req    r9
     WK2         .req    r10
     WK3         .req    r11
     SCRATCH     .req    r12
     ORIG_W      .req    r14 /* width (pixels) */
 
 fname:
+        .fnstart
+	.save   {r4-r11, lr}
         push    {r4-r11, lr}        /* save all registers */
 
         subs    Y, Y, #1
         blo     199f
 
 #ifdef DEBUG_PARAMS
+	.pad    #9*4
         sub     sp, sp, #9*4
 #endif
 
  .if src_bpp > 0
         ldr     SRC, [sp, #ARGS_STACK_OFFSET]
         ldr     STRIDE_S, [sp, #ARGS_STACK_OFFSET+4]
  .endif
  .if mask_bpp > 0
@@ -852,16 +855,17 @@ 197:
 198:
         cleanup
 
 #ifdef DEBUG_PARAMS
         add     sp, sp, #9*4 /* junk the debug copy of arguments */
 #endif
 199:
         pop     {r4-r11, pc}  /* exit */
+	.fnend
 
  .ltorg
 
     .unreq  X
     .unreq  Y
     .unreq  DST
     .unreq  STRIDE_D
     .unreq  SRC