Bug 1501523 - part 2 - remove separate jpeg_nbits_table copies from assembly files; r=aosmond
authorNathan Froyd <froydnj@mozilla.com>
Wed, 24 Oct 2018 16:06:48 -0400
changeset 491136 67bada7dcad029d68c0abe1d7c03fc225352f2ec
parent 491135 cd76fd701f82dc9211bc8323e9948dedc866af9f
child 491137 65d4a4b363db91978dddc7d013fb40bd433f3da3
push id247
push userfmarier@mozilla.com
push dateSat, 27 Oct 2018 01:06:44 +0000
reviewersaosmond
bugs1501523
milestone65.0a1
Bug 1501523 - part 2 - remove separate jpeg_nbits_table copies from assembly files; r=aosmond Continuing the theme from the first part, the optimized assembly files for x86-64 and i386 include their own private copies of jpeg_nbits_table. There's no need for them to do so; they can make use of the C copy that we commonized in the first patch.
media/libjpeg/assembly-tables.diff
media/libjpeg/simd/i386/jchuff-sse2.asm
media/libjpeg/simd/x86_64/jchuff-sse2.asm
media/update-libjpeg.sh
new file mode 100644
--- /dev/null
+++ b/media/libjpeg/assembly-tables.diff
@@ -0,0 +1,70 @@
+diff --git a/media/libjpeg/simd/i386/jchuff-sse2.asm b/media/libjpeg/simd/i386/jchuff-sse2.asm
+index 6ea69f6..fea4de3 100644
+--- a/media/libjpeg/simd/i386/jchuff-sse2.asm
++++ b/media/libjpeg/simd/i386/jchuff-sse2.asm
+@@ -27,11 +27,10 @@
+ 
+     alignz      32
+     GLOBAL_DATA(jconst_huff_encode_one_block)
++    EXTERN      EXTN(jpeg_nbits_table)
+ 
+ EXTN(jconst_huff_encode_one_block):
+ 
+-%include "jpeg_nbits_table.inc"
+-
+     alignz      32
+ 
+ ; --------------------------------------------------------------------------
+@@ -233,7 +232,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
+ 
+     ; Find the number of bits needed for the magnitude of the coefficient
+     movpic      ebp, POINTER [esp+gotptr]                        ; load GOT address (ebp)
+-    movzx       edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)]  ; nbits = JPEG_NBITS(temp);
++    movzx       edx, byte [GOTOFF(ebp, EXTN(jpeg_nbits_table) + ecx)]  ; nbits = JPEG_NBITS(temp);
+     mov         DWORD [esp+temp2], edx                           ; backup nbits in temp2
+ 
+     ; Emit the Huffman-coded symbol for the number of bits
+@@ -305,7 +304,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
+ .ERLOOP:
+     movsx       eax, word [esi]                                  ; temp = t1[k];
+     movpic      edx, POINTER [esp+gotptr]                        ; load GOT address (edx)
+-    movzx       eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)]  ; nbits = JPEG_NBITS(temp);
++    movzx       eax, byte [GOTOFF(edx, EXTN(jpeg_nbits_table) + eax)]  ; nbits = JPEG_NBITS(temp);
+     mov         DWORD [esp+temp2], eax
+     ; Emit Huffman symbol for run length / number of bits
+     shl         ecx, 4                        ; temp3 = (r << 4) + nbits;
+diff --git a/media/libjpeg/simd/x86_64/jchuff-sse2.asm b/media/libjpeg/simd/x86_64/jchuff-sse2.asm
+index 1b091ad..5ec8b1a 100644
+--- a/media/libjpeg/simd/x86_64/jchuff-sse2.asm
++++ b/media/libjpeg/simd/x86_64/jchuff-sse2.asm
+@@ -27,11 +27,10 @@
+ 
+     alignz      32
+     GLOBAL_DATA(jconst_huff_encode_one_block)
++    EXTERN      EXTN(jpeg_nbits_table)
+ 
+ EXTN(jconst_huff_encode_one_block):
+ 
+-%include "jpeg_nbits_table.inc"
+-
+     alignz      32
+ 
+ ; --------------------------------------------------------------------------
+@@ -222,7 +221,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
+     add         ebx, esi                ; temp2 += temp3;
+ 
+     ; Find the number of bits needed for the magnitude of the coefficient
+-    lea         r11, [rel jpeg_nbits_table]
++    lea         r11, [rel EXTN(jpeg_nbits_table)]
+     movzx       rdi, byte [r11 + rdi]         ; nbits = JPEG_NBITS(temp);
+     ; Emit the Huffman-coded symbol for the number of bits
+     mov         r11d,  INT [r14 + rdi * 4]    ; code = dctbl->ehufco[nbits];
+@@ -289,7 +288,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
+     lea         rsi, [rsi+r12*2]             ; k += r;
+     shr         r11, cl                      ; index >>= r;
+     movzx       rdi, word [rsi]              ; temp = t1[k];
+-    lea         rbx, [rel jpeg_nbits_table]
++    lea         rbx, [rel EXTN(jpeg_nbits_table)]
+     movzx       rdi, byte [rbx + rdi]        ; nbits = JPEG_NBITS(temp);
+ .BRLOOP:
+     cmp         r12, 16                 ; while (r > 15) {
--- a/media/libjpeg/simd/i386/jchuff-sse2.asm
+++ b/media/libjpeg/simd/i386/jchuff-sse2.asm
@@ -22,21 +22,20 @@
 
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
     alignz      32
     GLOBAL_DATA(jconst_huff_encode_one_block)
+    EXTERN      EXTN(jpeg_nbits_table)
 
 EXTN(jconst_huff_encode_one_block):
 
-%include "jpeg_nbits_table.inc"
-
     alignz      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
     BITS        32
 
 ; These macros perform the same task as the emit_bits() function in the
 ; original libjpeg code.  In addition to reducing overhead by explicitly
@@ -228,17 +227,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
 
     ; For a negative input, want temp2 = bitwise complement of abs(input)
     ; This code assumes we are on a two's complement machine
     add         esi, edx                ; temp2 += temp3;
     mov         DWORD [esp+temp], esi   ; backup temp2 in temp
 
     ; Find the number of bits needed for the magnitude of the coefficient
     movpic      ebp, POINTER [esp+gotptr]                        ; load GOT address (ebp)
-    movzx       edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)]  ; nbits = JPEG_NBITS(temp);
+    movzx       edx, byte [GOTOFF(ebp, EXTN(jpeg_nbits_table) + ecx)]  ; nbits = JPEG_NBITS(temp);
     mov         DWORD [esp+temp2], edx                           ; backup nbits in temp2
 
     ; Emit the Huffman-coded symbol for the number of bits
     mov         ebp, POINTER [eax+24]         ; After this point, arguments are not accessible anymore
     mov         eax,  INT [ebp + edx * 4]     ; code = dctbl->ehufco[nbits];
     movzx       ecx, byte [ebp + edx + 1024]  ; size = dctbl->ehufsi[nbits];
     EMIT_BITS   eax                           ; EMIT_BITS(code, size)
 
@@ -300,17 +299,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     mov         eax, INT [ebp + 240 * 4]      ; code_0xf0 = actbl->ehufco[0xf0];
     movzx       ecx, byte [ebp + 1024 + 240]  ; size_0xf0 = actbl->ehufsi[0xf0];
     EMIT_BITS   eax                           ; EMIT_BITS(code_0xf0, size_0xf0)
     mov         ecx, DWORD [esp+temp]
     jmp         .BRLOOP
 .ERLOOP:
     movsx       eax, word [esi]                                  ; temp = t1[k];
     movpic      edx, POINTER [esp+gotptr]                        ; load GOT address (edx)
-    movzx       eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)]  ; nbits = JPEG_NBITS(temp);
+    movzx       eax, byte [GOTOFF(edx, EXTN(jpeg_nbits_table) + eax)]  ; nbits = JPEG_NBITS(temp);
     mov         DWORD [esp+temp2], eax
     ; Emit Huffman symbol for run length / number of bits
     shl         ecx, 4                        ; temp3 = (r << 4) + nbits;
     add         ecx, eax
     mov         eax,  INT [ebp + ecx * 4]     ; code = actbl->ehufco[temp3];
     movzx       ecx, byte [ebp + ecx + 1024]  ; size = actbl->ehufsi[temp3];
     EMIT_BITS   eax
 
--- a/media/libjpeg/simd/x86_64/jchuff-sse2.asm
+++ b/media/libjpeg/simd/x86_64/jchuff-sse2.asm
@@ -22,21 +22,20 @@
 
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
     alignz      32
     GLOBAL_DATA(jconst_huff_encode_one_block)
+    EXTERN      EXTN(jpeg_nbits_table)
 
 EXTN(jconst_huff_encode_one_block):
 
-%include "jpeg_nbits_table.inc"
-
     alignz      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
     BITS        64
 
 ; These macros perform the same task as the emit_bits() function in the
 ; original libjpeg code.  In addition to reducing overhead by explicitly
@@ -217,17 +216,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     xor         edi, esi                ; temp ^= temp3;
     sub         edi, esi                ; temp -= temp3;
 
     ; For a negative input, want temp2 = bitwise complement of abs(input)
     ; This code assumes we are on a two's complement machine
     add         ebx, esi                ; temp2 += temp3;
 
     ; Find the number of bits needed for the magnitude of the coefficient
-    lea         r11, [rel jpeg_nbits_table]
+    lea         r11, [rel EXTN(jpeg_nbits_table)]
     movzx       rdi, byte [r11 + rdi]         ; nbits = JPEG_NBITS(temp);
     ; Emit the Huffman-coded symbol for the number of bits
     mov         r11d,  INT [r14 + rdi * 4]    ; code = dctbl->ehufco[nbits];
     movzx       esi, byte [r14 + rdi + 1024]  ; size = dctbl->ehufsi[nbits];
     EMIT_BITS   r11, esi                      ; EMIT_BITS(code, size)
 
     ; Mask off any extra bits in code
     mov         esi, 1
@@ -284,17 +283,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     lea         rsi, [t1]
 .BLOOP:
     bsf         r12, r11                     ; r = __builtin_ctzl(index);
     jz          .ELOOP
     mov         rcx, r12
     lea         rsi, [rsi+r12*2]             ; k += r;
     shr         r11, cl                      ; index >>= r;
     movzx       rdi, word [rsi]              ; temp = t1[k];
-    lea         rbx, [rel jpeg_nbits_table]
+    lea         rbx, [rel EXTN(jpeg_nbits_table)]
     movzx       rdi, byte [rbx + rdi]        ; nbits = JPEG_NBITS(temp);
 .BRLOOP:
     cmp         r12, 16                 ; while (r > 15) {
     jl          .ERLOOP
     EMIT_BITS   r13, r14d               ; EMIT_BITS(code_0xf0, size_0xf0)
     sub         r12, 16                 ; r -= 16;
     jmp         .BRLOOP
 .ERLOOP:
--- a/media/update-libjpeg.sh
+++ b/media/update-libjpeg.sh
@@ -14,18 +14,19 @@ rm -rf $srcdir/libjpeg
 repo=$1
 tag=${2-HEAD}
 
 (cd $repo; git archive --prefix=media/libjpeg/ $tag) | (cd $srcdir/..; tar xf -)
 
 cd $srcdir/libjpeg
 cp win/jsimdcfg.inc simd/
 
-revert_files="1050342.diff externalize-table.diff jconfig.h jconfigint.h jpeg_nbits_table.c moz.build MOZCHANGES mozilla.diff simd/jsimdcfg.inc"
+revert_files="1050342.diff assembly-tables.diff externalize-table.diff jconfig.h jconfigint.h jpeg_nbits_table.c moz.build MOZCHANGES mozilla.diff simd/jsimdcfg.inc"
 if test -d ${topsrcdir}/.hg; then
     hg revert --no-backup $revert_files
 elif test -d ${topsrcdir}/.git; then
     git checkout HEAD -- $revert_files
 fi
 
 patch -p0 -i mozilla.diff
 patch -p0 -i 1050342.diff
 patch -p3 -i externalize-table.diff
+patch -p3 -i assembly-tables.diff