Bug 1576303 - Fix and tune xptcall for ppc64le, and harmonize with x86_64's. r=tcampbell,froydnj a=lizzard
authorCameron Kaiser <spectre@floodgap.com>
Thu, 19 Sep 2019 19:45:20 +0000
changeset 555275 73d7b1a30ec50a1057348f2c6a04b08dae70c847
parent 555274 87c8abc9206bdc3235620a06f9f18fba4547e3e3
child 555276 ebe8cd92f00173021ba335de15ad279ad0985866
push id2165
push userffxbld-merge
push dateMon, 14 Oct 2019 16:30:58 +0000
treeherdermozilla-release@0eae18af659f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstcampbell, froydnj, lizzard
bugs1576303
milestone70.0
Bug 1576303 - Fix and tune xptcall for ppc64le, and harmonize with x86_64's. r=tcampbell,froydnj a=lizzard Fix handling of ABI arguments in xptcinvoke for PPC64 platforms. Previously, non-floating-point arguments would advance the index of which floating-point register to use. This cleans up both of the platform-specific invoke and stubs to be better aligned to the x86_64 model. Differential Revision: https://phabricator.services.mozilla.com/D46421
xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_ppc64_linux.S
xpcom/reflect/xptcall/md/unix/xptcinvoke_ppc64_linux.cpp
xpcom/reflect/xptcall/md/unix/xptcstubs_ppc64_linux.cpp
--- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_ppc64_linux.S
+++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_ppc64_linux.S
@@ -146,20 +146,20 @@ NS_InvokeByIndex:
         lfd     f12, -(5*8)(r31)
         lfd     f13, -(4*8)(r31)
 
         bctrl                           # Do it
 
         ld      r2,STACK_TOC(r1)        # Load our own TOC pointer
         ld      r1,0(r1)                # Revert stack frame
         ld      0,16(r1)                # Reload lr
+        mtlr    0
         ld      29,-24(r1)              # Restore NVGPRS
         ld      30,-16(r1)
         ld      31,-8(r1)
-        mtlr    0
         blr
 
 #if _CALL_ELF == 2
         .size   NS_InvokeByIndex,.-NS_InvokeByIndex
 #else
         .size   NS_InvokeByIndex,.-.NS_InvokeByIndex
 #endif
 
--- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_ppc64_linux.cpp
+++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_ppc64_linux.cpp
@@ -1,97 +1,130 @@
 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Platform specific code to invoke XPCOM methods on native objects
 
+#include "xptcprivate.h"
+
 // The purpose of NS_InvokeByIndex() is to map a platform
 // independent call to the platform ABI. To do that,
 // NS_InvokeByIndex() has to determine the method to call via vtable
 // access. The parameters for the method are read from the
 // nsXPTCVariant* and prepared for the native ABI.
-
-// The PowerPC64 platform ABI can be found here:
-// http://www.freestandards.org/spec/ELF/ppc64/
+//
+// Prior to POWER8, all 64-bit Power ISA systems used ELF v1 ABI, found
+// here:
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html
 // and in particular:
-// http://www.freestandards.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#FUNC-CALL
-
-#include <stdio.h>
-#include "xptcprivate.h"
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-CALL
+// Little-endian ppc64le, however, uses ELF v2 ABI, which is here:
+//   http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+// and in particular section 2.2, page 22. However, most big-endian ppc64
+// systems still use ELF v1, so this file should support both.
 
-// 8 integral parameters are passed in registers, not including 'that'
-#define GPR_COUNT     7
+// 7 integral parameters are passed in registers, not including |this|
+// (i.e., r3-r10, with r3 being |this|).
+const uint32_t GPR_COUNT = 7;
 
-// 8 floating point parameters are passed in registers, floats are
-// promoted to doubles when passed in registers
-#define FPR_COUNT     13
-
-extern "C" uint32_t
-invoke_count_words(uint32_t paramCount, nsXPTCVariant* s)
-{
-    return uint32_t(((paramCount * 2) + 3) & ~3);
-}
+// 13 floating point parameters are passed in registers, either single or
+// double precision (i.e., f1-f13).
+const uint32_t FPR_COUNT = 13;
 
-extern "C" void
-invoke_copy_to_stack(uint64_t* gpregs,
-                     double* fpregs,
-                     uint32_t paramCount,
-                     nsXPTCVariant* s,
-                     uint64_t* d)
+// Both ABIs use the same register assignment strategy, as per this
+// example from V1 ABI section 3.2.3 and V2 ABI section 2.2.3.2 [page 43]:
+//
+// typedef struct {
+//   int    a;
+//   double dd;
+// } sparm;
+// sparm   s, t;
+// int     c, d, e;
+// long double ld;
+// double  ff, gg, hh;
+//
+// x = func(c, ff, d, ld, s, gg, t, e, hh);
+//
+// Parameter     Register     Offset in parameter save area
+// c             r3           0-7    (not stored in parameter save area)
+// ff            f1           8-15   (not stored)
+// d             r5           16-23  (not stored)
+// ld            f2,f3        24-39  (not stored)
+// s             r8,r9        40-55  (not stored)
+// gg            f4           56-63  (not stored)
+// t             (none)       64-79  (stored in parameter save area)
+// e             (none)       80-87  (stored)
+// hh            f5           88-95  (not stored)
+//
+// i.e., each successive FPR usage skips a GPR, but not the other way around.
+
+extern "C" void invoke_copy_to_stack(uint64_t* gpregs, double* fpregs,
+                                     uint32_t paramCount, nsXPTCVariant* s,
+                                     uint64_t* d)
 {
-    uint64_t tempu64;
+    uint32_t nr_gpr = 0u;
+    uint32_t nr_fpr = 0u;
+    uint64_t value = 0u;
 
-    for(uint32_t i = 0; i < paramCount; i++, s++) {
-        if(s->IsIndirect())
-            tempu64 = (uint64_t) &s->val;
+    for (uint32_t i = 0; i < paramCount; i++, s++) {
+        if (s->IsIndirect())
+            value = (uint64_t) &s->val;
         else {
-            switch(s->type) {
-            case nsXPTType::T_FLOAT:                                  break;
-            case nsXPTType::T_DOUBLE:                                 break;
-            case nsXPTType::T_I8:     tempu64 = s->val.i8;            break;
-            case nsXPTType::T_I16:    tempu64 = s->val.i16;           break;
-            case nsXPTType::T_I32:    tempu64 = s->val.i32;           break;
-            case nsXPTType::T_I64:    tempu64 = s->val.i64;           break;
-            case nsXPTType::T_U8:     tempu64 = s->val.u8;            break;
-            case nsXPTType::T_U16:    tempu64 = s->val.u16;           break;
-            case nsXPTType::T_U32:    tempu64 = s->val.u32;           break;
-            case nsXPTType::T_U64:    tempu64 = s->val.u64;           break;
-            case nsXPTType::T_BOOL:   tempu64 = s->val.b;             break;
-            case nsXPTType::T_CHAR:   tempu64 = s->val.c;             break;
-            case nsXPTType::T_WCHAR:  tempu64 = s->val.wc;            break;
-            default:                  tempu64 = (uint64_t) s->val.p;  break;
+            switch (s->type) {
+            case nsXPTType::T_FLOAT:                                break;
+            case nsXPTType::T_DOUBLE:                               break;
+            case nsXPTType::T_I8:     value = s->val.i8;            break;
+            case nsXPTType::T_I16:    value = s->val.i16;           break;
+            case nsXPTType::T_I32:    value = s->val.i32;           break;
+            case nsXPTType::T_I64:    value = s->val.i64;           break;
+            case nsXPTType::T_U8:     value = s->val.u8;            break;
+            case nsXPTType::T_U16:    value = s->val.u16;           break;
+            case nsXPTType::T_U32:    value = s->val.u32;           break;
+            case nsXPTType::T_U64:    value = s->val.u64;           break;
+            case nsXPTType::T_BOOL:   value = s->val.b;             break;
+            case nsXPTType::T_CHAR:   value = s->val.c;             break;
+            case nsXPTType::T_WCHAR:  value = s->val.wc;            break;
+            default:                  value = (uint64_t) s->val.p;  break;
             }
         }
 
         if (!s->IsIndirect() && s->type == nsXPTType::T_DOUBLE) {
-            if (i < FPR_COUNT)
-                fpregs[i]    = s->val.d;
-            else
-                *(double *)d = s->val.d;
+            if (nr_fpr < FPR_COUNT) {
+                fpregs[nr_fpr++] = s->val.d;
+                nr_gpr++;
+            } else {
+                *((double *)d) = s->val.d;
+                d++;
+            }
         }
         else if (!s->IsIndirect() && s->type == nsXPTType::T_FLOAT) {
-            if (i < FPR_COUNT) {
-                fpregs[i]   = s->val.f; // if passed in registers, floats are promoted to doubles
+            if (nr_fpr < FPR_COUNT) {
+                // Single-precision floats are passed in FPRs too.
+                fpregs[nr_fpr++] = s->val.f;
+                nr_gpr++;
             } else {
-                float *p = (float *)d;
-#ifndef __LITTLE_ENDIAN__
+#ifdef __LITTLE_ENDIAN__
+                *((float *)d) = s->val.f;
+#else
+                // Big endian needs adjustment to point to the least
+                // significant word.
+                float* p = (float*)d;
                 p++;
+                *p = s->val.f;
 #endif
-                *p = s->val.f;
+                d++;
             }
         }
         else {
-            if (i < GPR_COUNT)
-                gpregs[i] = tempu64;
-            else
-                *d = tempu64;
+            if (nr_gpr < GPR_COUNT) {
+                gpregs[nr_gpr++] = value;
+            } else {
+                *d++ = value;
+            }
         }
-        if (i >= 7)
-            d++;
     }
 }
 
 EXPORT_XPCOM_API(nsresult)
-NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
-                   uint32_t paramCount, nsXPTCVariant* params);
-
+NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, uint32_t paramCount,
+                 nsXPTCVariant* params);
--- a/xpcom/reflect/xptcall/md/unix/xptcstubs_ppc64_linux.cpp
+++ b/xpcom/reflect/xptcall/md/unix/xptcstubs_ppc64_linux.cpp
@@ -2,174 +2,200 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Implement shared vtbl methods.
 
 #include "xptcprivate.h"
 
-// The Linux/PPC64 ABI passes the first 8 integral
-// parameters and the first 13 floating point parameters in registers
-// (r3-r10 and f1-f13), no stack space is allocated for these by the
-// caller.  The rest of the parameters are passed in the caller's stack
-// area. The stack pointer has to retain 16-byte alignment.
+// Prior to POWER8, all 64-bit Power ISA systems used ELF v1 ABI, found
+// here:
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html
+// and in particular:
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-CALL
+// Little-endian ppc64le, however, uses ELF v2 ABI, which is here:
+//   http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+// and in particular section 2.2, page 22. However, most big-endian ppc64
+// systems still use ELF v1, so this file should support both.
+//
+// Both ABIs pass the first 8 integral parameters and the first 13 floating
+// point parameters in registers r3-r10 and f1-f13. No stack space is
+// allocated for these by the caller. The rest of the parameters are passed
+// in the caller's stack area. The stack pointer must stay 16-byte aligned.
 
-// The PowerPC64 platform ABI can be found here:
-// http://www.freestandards.org/spec/ELF/ppc64/
-// and in particular:
-// http://www.freestandards.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#FUNC-CALL
-
-#define PARAM_BUFFER_COUNT      16
-#define GPR_COUNT                7
-#define FPR_COUNT               13
+const uint32_t PARAM_BUFFER_COUNT   = 16;
+const uint32_t GPR_COUNT            = 7;
+const uint32_t FPR_COUNT            = 13;
 
 // PrepareAndDispatch() is called by SharedStub() and calls the actual method.
 //
 // - 'args[]' contains the arguments passed on stack
-// - 'gprData[]' contains the arguments passed in integer registers
-// - 'fprData[]' contains the arguments passed in floating point registers
+// - 'gpregs[]' contains the arguments passed in integer registers
+// - 'fpregs[]' contains the arguments passed in floating point registers
 //
 // The parameters are mapped into an array of type 'nsXPTCMiniVariant'
 // and then the method gets called.
-#include <stdio.h>
+//
+// Both ABIs use the same register assignment strategy, as per this
+// example from V1 ABI section 3.2.3 and V2 ABI section 2.2.3.2 [page 43]:
+//
+// typedef struct {
+//   int    a;
+//   double dd;
+// } sparm;
+// sparm   s, t;
+// int     c, d, e;
+// long double ld;
+// double  ff, gg, hh;
+//
+// x = func(c, ff, d, ld, s, gg, t, e, hh);
+//
+// Parameter     Register     Offset in parameter save area
+// c             r3           0-7    (not stored in parameter save area)
+// ff            f1           8-15   (not stored)
+// d             r5           16-23  (not stored)
+// ld            f2,f3        24-39  (not stored)
+// s             r8,r9        40-55  (not stored)
+// gg            f4           56-63  (not stored)
+// t             (none)       64-79  (stored in parameter save area)
+// e             (none)       80-87  (stored)
+// hh            f5           88-95  (not stored)
+//
+// i.e., each successive FPR usage skips a GPR, but not the other way around.
+
 extern "C" nsresult ATTRIBUTE_USED
-PrepareAndDispatch(nsXPTCStubBase* self,
-                   uint64_t methodIndex,
-                   uint64_t* args,
-                   uint64_t *gprData,
-                   double *fprData)
+PrepareAndDispatch(nsXPTCStubBase * self, uint32_t methodIndex,
+                   uint64_t * args, uint64_t * gpregs, double *fpregs)
 {
     nsXPTCMiniVariant paramBuffer[PARAM_BUFFER_COUNT];
     nsXPTCMiniVariant* dispatchParams = nullptr;
     const nsXPTMethodInfo* info;
     uint32_t paramCount;
     uint32_t i;
 
     NS_ASSERTION(self,"no self");
 
     self->mEntry->GetMethodInfo(uint16_t(methodIndex), &info);
     NS_ASSERTION(info,"no method info");
-    if (! info)
+    if (!info)
         return NS_ERROR_UNEXPECTED;
 
     paramCount = info->GetParamCount();
 
     // setup variant array pointer
     if(paramCount > PARAM_BUFFER_COUNT)
         dispatchParams = new nsXPTCMiniVariant[paramCount];
     else
         dispatchParams = paramBuffer;
 
     NS_ASSERTION(dispatchParams,"no place for params");
 
     const uint8_t indexOfJSContext = info->IndexOfJSContext();
 
     uint64_t* ap = args;
-    uint32_t iCount = 0;
-    uint32_t fpCount = 0;
-    uint64_t tempu64;
+    // |that| is implicit in the calling convention; we really do start at the
+    // first GPR (as opposed to x86_64).
+    uint32_t nr_gpr = 0;
+    uint32_t nr_fpr = 0;
+    uint64_t value;
 
     for(i = 0; i < paramCount; i++) {
         const nsXPTParamInfo& param = info->GetParam(i);
         const nsXPTType& type = param.GetType();
         nsXPTCMiniVariant* dp = &dispatchParams[i];
 
         if (i == indexOfJSContext) {
-            if (iCount < GPR_COUNT)
-                iCount++;
+            if (nr_gpr < GPR_COUNT)
+                nr_gpr++;
             else
                 ap++;
         }
 
         if (!param.IsOut() && type == nsXPTType::T_DOUBLE) {
-            if (fpCount < FPR_COUNT) {
-                dp->val.d = fprData[fpCount++];
-            }
-            else
-                dp->val.d = *(double*) ap;
-        } else if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
-            if (fpCount < FPR_COUNT) {
-                dp->val.f = (float) fprData[fpCount++]; // in registers floats are passed as doubles
+            if (nr_fpr < FPR_COUNT) {
+                dp->val.d = fpregs[nr_fpr++];
+                nr_gpr++;
+            } else {
+                dp->val.d = *(double*)ap++;
             }
-            else {
-                float *p = (float *)ap;
-#ifndef __LITTLE_ENDIAN__
+            continue;
+        }
+        if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
+            if (nr_fpr < FPR_COUNT) {
+                // Single-precision floats are passed in FPRs too.
+                dp->val.f = (float)fpregs[nr_fpr++];
+                nr_gpr++;
+            } else {
+#ifdef __LITTLE_ENDIAN__
+                dp->val.f = *(float*)ap++;
+#else
+                // Big endian needs adjustment to point to the least
+                // significant word.
+                float* p = (float*)ap;
                 p++;
-#endif
                 dp->val.f = *p;
+                ap++;
+#endif
             }
-        } else { /* integer type or pointer */
-            if (iCount < GPR_COUNT)
-                tempu64 = gprData[iCount];
-            else
-                tempu64 = *ap;
+            continue;
+        }
+        if (nr_gpr < GPR_COUNT)
+            value = gpregs[nr_gpr++];
+        else
+            value = *ap++;
 
-            if (param.IsOut() || !type.IsArithmetic())
-                dp->val.p = (void*) tempu64;
-            else if (type == nsXPTType::T_I8)
-                dp->val.i8  = (int8_t)   tempu64;
-            else if (type == nsXPTType::T_I16)
-                dp->val.i16 = (int16_t)  tempu64;
-            else if (type == nsXPTType::T_I32)
-                dp->val.i32 = (int32_t)  tempu64;
-            else if (type == nsXPTType::T_I64)
-                dp->val.i64 = (int64_t)  tempu64;
-            else if (type == nsXPTType::T_U8)
-                dp->val.u8  = (uint8_t)  tempu64;
-            else if (type == nsXPTType::T_U16)
-                dp->val.u16 = (uint16_t) tempu64;
-            else if (type == nsXPTType::T_U32)
-                dp->val.u32 = (uint32_t) tempu64;
-            else if (type == nsXPTType::T_U64)
-                dp->val.u64 = (uint64_t) tempu64;
-            else if (type == nsXPTType::T_BOOL)
-                dp->val.b   = (bool)   tempu64;
-            else if (type == nsXPTType::T_CHAR)
-                dp->val.c   = (char)     tempu64;
-            else if (type == nsXPTType::T_WCHAR)
-                dp->val.wc  = (wchar_t)  tempu64;
-            else
-                NS_ERROR("bad type");
+        if (param.IsOut() || !type.IsArithmetic()) {
+            dp->val.p = (void*) value;
+            continue;
         }
 
-        if (iCount < GPR_COUNT)
-            iCount++;  // gprs are skipped for fp args, so this always needs inc
-        else
-            ap++;
+        switch (type) {
+        case nsXPTType::T_I8:      dp->val.i8  = (int8_t)   value; break;
+        case nsXPTType::T_I16:     dp->val.i16 = (int16_t)  value; break;
+        case nsXPTType::T_I32:     dp->val.i32 = (int32_t)  value; break;
+        case nsXPTType::T_I64:     dp->val.i64 = (int64_t)  value; break;
+        case nsXPTType::T_U8:      dp->val.u8  = (uint8_t)  value; break;
+        case nsXPTType::T_U16:     dp->val.u16 = (uint16_t) value; break;
+        case nsXPTType::T_U32:     dp->val.u32 = (uint32_t) value; break;
+        case nsXPTType::T_U64:     dp->val.u64 = (uint64_t) value; break;
+        case nsXPTType::T_BOOL:    dp->val.b   = (bool)     value; break;
+        case nsXPTType::T_CHAR:    dp->val.c   = (char)     value; break;
+        case nsXPTType::T_WCHAR:   dp->val.wc  = (wchar_t)  value; break;
+
+        default:
+            NS_ERROR("bad type");
+            break;
+        }
     }
 
     nsresult result = self->mOuter->CallMethod((uint16_t) methodIndex, info,
                                                dispatchParams);
 
     if (dispatchParams != paramBuffer)
         delete [] dispatchParams;
 
     return result;
 }
 
 // Load r11 with the constant 'n' and branch to SharedStub().
 //
+// As G++3 ABI contains the length of the functionname in the mangled
+// name, it is difficult to get a generic assembler mechanism like
+// in the G++ 2.95 case.
 // XXX Yes, it's ugly that we're relying on gcc's name-mangling here;
 // however, it's quick, dirty, and'll break when the ABI changes on
 // us, which is what we want ;-).
-
-
-// gcc-3 version
-//
-// As G++3 ABI contains the length of the functionname in the mangled
-// name, it is difficult to get a generic assembler mechanism like
-// in the G++ 2.95 case.
 // Create names would be like:
 // _ZN14nsXPTCStubBase5Stub1Ev
 // _ZN14nsXPTCStubBase6Stub12Ev
 // _ZN14nsXPTCStubBase7Stub123Ev
 // _ZN14nsXPTCStubBase8Stub1234Ev
 // etc.
-// Use assembler directives to get the names right...
+// Use assembler directives to get the names right.
 
 #if _CALL_ELF == 2
 # define STUB_ENTRY(n)                                                  \
 __asm__ (                                                               \
         ".section \".text\" \n\t"                                       \
         ".align 2 \n\t"                                                 \
         ".if "#n" < 10 \n\t"                                            \
         ".globl _ZN14nsXPTCStubBase5Stub"#n"Ev \n\t"                    \
@@ -245,13 +271,13 @@ PrepareAndDispatch(nsXPTCStubBase* self,
         "li     11,"#n" \n\t"                                           \
         "b      SharedStub \n"                                          \
 );
 #endif
 
 #define SENTINEL_ENTRY(n)                                               \
 nsresult nsXPTCStubBase::Sentinel##n()                                  \
 {                                                                       \
-    NS_ERROR("nsXPTCStubBase::Sentinel called");                  \
+    NS_ERROR("nsXPTCStubBase::Sentinel called");                        \
     return NS_ERROR_NOT_IMPLEMENTED;                                    \
 }
 
 #include "xptcstubsdef.inc"