Bug 1561088 - emit unwind information for libffi aarch64/win assembly; r=dmajor,gsvelto
authorNathan Froyd <froydnj@mozilla.com>
Thu, 27 Jun 2019 13:36:48 +0000
changeset 543187 dcae89181bb2f880af70b17048a56230406fa5f4
parent 543186 54e370fa94317e2bad7f9be76991acbaa3375a41
child 543188 d3f8d2b25603a61597f89035a31156575d932402
push id2131
push userffxbld-merge
push dateMon, 26 Aug 2019 18:30:20 +0000
treeherdermozilla-release@b19ffb3ca153 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdmajor, gsvelto
bugs1561088
milestone69.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1561088 - emit unwind information for libffi aarch64/win assembly; r=dmajor,gsvelto The hand-written assembly for libffi on aarch64/windows doesn't emit unwind information. If we ever tried to unwind through these functions, they'd look like leaf functions, which is decidedly not true and would cause great pain. For whatever reason, the original aarch64 libffi functions used x21/x22/x23/x24 as their (callee-saved) scratch registers. This convention works on windows as well, but the unwind information on windows mandates that we start saving callee-saved registers starting from x19, rather than x21. Rather than rewriting the assembly to use x19/x20 instead of x21/x22, which would be a large change, we chose instead to simply save/restore extra registers in the prolog/epilog. This change does make the stack frame sizes slightly bigger, but an extra 16 bytes in libffi stack frames should not matter. The `-TC` change is necessary to make the compiler play nicely with .asm file suffixes. Differential Revision: https://phabricator.services.mozilla.com/D35714
config/external/ffi/moz.build
config/external/ffi/preprocess_libffi_asm.py
js/src/ctypes/libffi/src/aarch64/win64.asm
--- a/config/external/ffi/moz.build
+++ b/config/external/ffi/moz.build
@@ -73,17 +73,28 @@ else:
     ffi_srcs = ()
     if CONFIG['FFI_TARGET'] == 'ARM':
         ffi_srcs = ('sysv.S', 'ffi.c')
         if CONFIG['CC_TYPE'] == 'clang':
             ASFLAGS += ['-no-integrated-as']
     elif CONFIG['FFI_TARGET'] == 'AARCH64':
         ffi_srcs = ('sysv.S', 'ffi.c')
     elif CONFIG['FFI_TARGET'] == 'ARM64_WIN64':
-        ffi_srcs = ('win64.asm', 'ffi.c')
+        ffi_srcs = ['ffi.c']
+
+        GENERATED_FILES += ['win64_aarch.asm']
+        asm = GENERATED_FILES['win64_aarch.asm']
+        asm.inputs = [
+            '/js/src/ctypes/libffi/src/aarch64/win64.asm',
+            '!../../../js/src/ctypes/libffi/fficonfig.h',
+            '!../../../js/src/ctypes/libffi/include/ffi.h',
+        ]
+        asm.script = 'preprocess_libffi_asm.py'
+        asm.flags = ['$(DEFINES)', '$(LOCAL_INCLUDES)']
+        SOURCES += ['!win64_aarch.asm']
     elif CONFIG['FFI_TARGET'] == 'X86':
         ffi_srcs = ('ffi.c', 'sysv.S', 'win32.S')
     elif CONFIG['FFI_TARGET'] == 'X86_64':
         ffi_srcs = ('ffi64.c', 'unix64.S', 'ffi.c', 'sysv.S')
     elif CONFIG['FFI_TARGET'] == 'X86_WIN32':
         ffi_srcs = ['ffi.c']
         # MinGW Build for 32 bit
         if CONFIG['CC_TYPE'] in ('gcc', 'clang'):
--- a/config/external/ffi/preprocess_libffi_asm.py
+++ b/config/external/ffi/preprocess_libffi_asm.py
@@ -10,14 +10,15 @@ import os
 import shlex
 import subprocess
 
 
 def main(output, input_asm, ffi_h, ffi_config_h, defines, includes):
     defines = shlex.split(defines)
     includes = shlex.split(includes)
     # CPP uses -E which generates #line directives. -EP suppresses them.
-    cpp = buildconfig.substs['CPP'] + ['-EP']
+    # -TC forces the compiler to treat the input as C.
+    cpp = buildconfig.substs['CPP'] + ['-EP'] + ['-TC']
     input_asm = mozpath.relpath(input_asm, os.getcwd())
     args = cpp + defines + includes + [input_asm]
     print(' '.join(args))
     preprocessed = subprocess.check_output(args)
     output.write(preprocessed)
--- a/js/src/ctypes/libffi/src/aarch64/win64.asm
+++ b/js/src/ctypes/libffi/src/aarch64/win64.asm
@@ -14,19 +14,21 @@
 ;; THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
 ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 ;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 ;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 ;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 ;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
+#include "ksarm64.h"
+
 ;; Hand-converted from the sysv.S file in this directory.
 
-        AREA |.text|, CODE, ARM64
+        TEXTAREA
 
    ;; ffi_call_SYSV()
 
    ;; Create a stack frame, setup an argument context, call the callee
    ;; and extract the result.
 
    ;; The maximum required argument stack size is provided,
    ;; ffi_call_SYSV() allocates that stack space then calls the
@@ -53,40 +55,36 @@
    ;; x1 &context
    ;; x2 &ecif
    ;; x3 bytes
    ;; x4 fn
 
    ;; This function uses the following stack frame layout:
 
    ;; ==
-   ;;              saved x30(lr)
-   ;; x29(fp)->    saved x29(fp)
    ;;              saved x24
    ;;              saved x23
    ;;              saved x22
-   ;; sp'    ->    saved x21
+   ;;              saved x21
+   ;;              saved x20
+   ;;              saved x19
+   ;;              saved x30(lr)
+   ;; x29(fp)->    saved x29(fp)
    ;;              ...
    ;; sp     ->    (constructed callee stack arguments)
    ;; ==
 
    ;; Voila!
 
-	EXPORT |ffi_call_SYSV|
-
-|ffi_call_SYSV| PROC
-;#define ffi_call_SYSV_FS (8 * 4)
+        NESTED_ENTRY |ffi_call_SYSV|
 
-        stp     x29, x30, [sp, #-16]!
-
-        mov     x29, sp
-        sub     sp, sp, #32 	; ffi_call_SYSV_FS
-
-        stp     x21, x22, [sp, #0]
-        stp     x23, x24, [sp, #16]
+        PROLOG_SAVE_REG_PAIR x29, x30, #-64!
+        PROLOG_SAVE_REG_PAIR x19, x20, #16
+        PROLOG_SAVE_REG_PAIR x21, x22, #32
+        PROLOG_SAVE_REG_PAIR x23, x24, #48
 
         mov     x21, x1
         mov     x22, x2
         mov     x24, x4
 
         ; Allocate the stack space for the actual arguments, many
         ; arguments will be passed in registers, but we assume
         ; worst case and allocate sufficient stack for ALL of
@@ -140,29 +138,24 @@ noload_call
         ; Save the vector argument passing registers.
         stp     q0, q1, [x21, #8*32 + 0]
         stp     q2, q3, [x21, #8*32 + 32]
         stp     q4, q5, [x21, #8*32 + 64]
         stp     q6, q7, [x21, #8*32 + 96]
 
 nosave_call
         ; All done, unwind our stack frame.
-        ldp     x21, x22, [x29,  # - 32] ; ffi_call_SYSV_FS
-
-        ldp     x23, x24, [x29,  # - 32 + 16] ; ffi_call_SYSV_FS
-
-        mov     sp, x29
+        EPILOG_STACK_RESTORE
+        EPILOG_RESTORE_REG_PAIR x19, x20, #16
+        EPILOG_RESTORE_REG_PAIR x21, x22, #32
+        EPILOG_RESTORE_REG_PAIR x23, x24, #48
+        EPILOG_RESTORE_REG_PAIR x29, x30, #64!
+        EPILOG_RETURN
 
-        ldp     x29, x30, [sp], #16
-
-        ret
-
-	ENDP
-
-; #define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
+        NESTED_END |ffi_call_SYSV|
 
    ;; ffi_closure_SYSV
 
    ;; Closure invocation glue. This is the low level code invoked directly by
    ;; the closure trampoline to setup and call a closure.
 
    ;; On entry x17 points to a struct trampoline_data, x16 has been clobbered
    ;; all other registers are preserved.
@@ -181,37 +174,37 @@ nosave_call
    ;; {
    ;;      UINT64 *ffi_closure;
    ;;      UINT64 flags;
    ;; };
 
    ;; This function uses the following stack frame layout:
 
    ;; ==
+   ;;              saved x22
+   ;;              saved x21
+   ;;              saved x20
+   ;;              saved x19
    ;;              saved x30(lr)
    ;; x29(fp)->    saved x29(fp)
-   ;;              saved x22
-   ;;              saved x21
    ;;              ...
    ;; sp     ->    call_context
    ;; ==
 
    ;; Voila!
 
 	IMPORT |ffi_closure_SYSV_inner|
-	EXPORT |ffi_closure_SYSV|
 
-|ffi_closure_SYSV| PROC
-        stp     x29, x30, [sp, #-16]!
+        NESTED_ENTRY |ffi_closure_SYSV|
 
-        mov     x29, sp
+        PROLOG_SAVE_REG_PAIR fp, lr, #-48!
+        PROLOG_SAVE_REG_PAIR x19, x20, #16
+        PROLOG_SAVE_REG_PAIR x21, x22, #32
 
-        sub     sp, sp, #256+512+16
-
-        stp     x21, x22, [x29, #-16]
+        sub     sp, sp, #256+512
 
         ; Load x21 with &call_context.
         mov     x21, sp
         ; Preserve our struct trampoline_data
         mov     x22, x17
 
         ; Save the rest of the argument passing registers.
         stp     x0, x1, [x21, #0]
@@ -255,18 +248,17 @@ noload_closure
         ; Load the result passing core registers.
         ldp     x0, x1, [x21,  #0]
         ldp     x2, x3, [x21, #16]
         ldp     x4, x5, [x21, #32]
         ldp     x6, x7, [x21, #48]
         ; Note nothing useful is returned in x8.
 
         ; We are done, unwind our frame.
-        ldp     x21, x22, [x29,  #-16]
-
-        mov     sp, x29
+        EPILOG_STACK_RESTORE
+        EPILOG_RESTORE_REG_PAIR x19, x20, #16
+        EPILOG_RESTORE_REG_PAIR x21, x22, #32
+        EPILOG_RESTORE_REG_PAIR x29, x30, #48!
+        EPILOG_RETURN
 
-        ldp     x29, x30, [sp], #16
+        NESTED_END |ffi_closure_SYSV|
 
-        ret
-
-	ENDP
-	END
+        END