Bug 875656 - IonMonkey: Juggle registers around to reduce the number of temporaries needed by LConcat. r=bhackett
authorDan Gohman <sunfish@google.com>
Thu, 24 Oct 2013 20:34:54 -0700
changeset 165875 f030f97fcf107fbd5da84b1bea47a3580426bc29
parent 165874 4dfcb90f5fcd4d3731bc9f70082a0b07b41168b3
child 165876 186e834d87dc7b95ccb5f7b51bee6620932584f1
child 165943 2f3d1761b05915b2dd6713a5bcdee6548628a973
push id3066
push userakeybl@mozilla.com
push dateMon, 09 Dec 2013 19:58:46 +0000
treeherdermozilla-beta@a31a0dce83aa [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbhackett
bugs875656
milestone27.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 875656 - IonMonkey: Juggle registers around to reduce the number of temporaries needed by LConcat. r=bhackett
js/src/jit/CodeGenerator.cpp
js/src/jit/LIR-Common.h
js/src/jit/Lowering.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/x64/Assembler-x64.h
js/src/jit/x86/Assembler-x86.h
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -4345,40 +4345,42 @@ CodeGenerator::visitConcat(LConcat *lir)
 {
     Register lhs = ToRegister(lir->lhs());
     Register rhs = ToRegister(lir->rhs());
 
     Register output = ToRegister(lir->output());
 
     JS_ASSERT(lhs == CallTempReg0);
     JS_ASSERT(rhs == CallTempReg1);
-    JS_ASSERT(ToRegister(lir->temp1()) == CallTempReg2);
-    JS_ASSERT(ToRegister(lir->temp2()) == CallTempReg3);
-    JS_ASSERT(ToRegister(lir->temp3()) == CallTempReg4);
-    JS_ASSERT(ToRegister(lir->temp4()) == CallTempReg5);
-    JS_ASSERT(output == CallTempReg6);
+    JS_ASSERT(ToRegister(lir->temp1()) == CallTempReg0);
+    JS_ASSERT(ToRegister(lir->temp2()) == CallTempReg1);
+    JS_ASSERT(ToRegister(lir->temp3()) == CallTempReg2);
+    JS_ASSERT(ToRegister(lir->temp4()) == CallTempReg3);
+    JS_ASSERT(ToRegister(lir->temp5()) == CallTempReg4);
+    JS_ASSERT(output == CallTempReg5);
 
     return emitConcat(lir, lhs, rhs, output);
 }
 
 bool
 CodeGenerator::visitConcatPar(LConcatPar *lir)
 {
     DebugOnly<Register> slice = ToRegister(lir->forkJoinSlice());
     Register lhs = ToRegister(lir->lhs());
     Register rhs = ToRegister(lir->rhs());
     Register output = ToRegister(lir->output());
 
     JS_ASSERT(lhs == CallTempReg0);
     JS_ASSERT(rhs == CallTempReg1);
-    JS_ASSERT((Register)slice == CallTempReg5);
-    JS_ASSERT(ToRegister(lir->temp1()) == CallTempReg2);
-    JS_ASSERT(ToRegister(lir->temp2()) == CallTempReg3);
-    JS_ASSERT(ToRegister(lir->temp3()) == CallTempReg4);
-    JS_ASSERT(output == CallTempReg6);
+    JS_ASSERT((Register)slice == CallTempReg4);
+    JS_ASSERT(ToRegister(lir->temp1()) == CallTempReg0);
+    JS_ASSERT(ToRegister(lir->temp2()) == CallTempReg1);
+    JS_ASSERT(ToRegister(lir->temp3()) == CallTempReg2);
+    JS_ASSERT(ToRegister(lir->temp4()) == CallTempReg3);
+    JS_ASSERT(output == CallTempReg5);
 
     return emitConcat(lir, lhs, rhs, output);
 }
 
 static void
 CopyStringChars(MacroAssembler &masm, Register to, Register from, Register len, Register scratch)
 {
     // Copy |len| jschars from |from| to |to|. Assumes len > 0 (checked below in
@@ -4408,23 +4410,22 @@ IonCompartment::generateStringConcatStub
 {
     MacroAssembler masm(cx);
 
     Register lhs = CallTempReg0;
     Register rhs = CallTempReg1;
     Register temp1 = CallTempReg2;
     Register temp2 = CallTempReg3;
     Register temp3 = CallTempReg4;
-    Register temp4 = CallTempReg5;
-    Register output = CallTempReg6;
-
-    // In parallel execution, we pass in the ForkJoinSlice in CallTempReg5, as
-    // by the time we need to use the temp4 we no longer have need of the
+    Register output = CallTempReg5;
+
+    // In parallel execution, we pass in the ForkJoinSlice in CallTempReg4, as
+    // by the time we need to use the temp3 we no longer have need of the
     // slice.
-    Register forkJoinSlice = CallTempReg5;
+    Register forkJoinSlice = CallTempReg4;
 
     Label failure, failurePopTemps;
 
     // If lhs is empty, return rhs.
     Label leftEmpty;
     masm.loadStringLength(lhs, temp1);
     masm.branchTest32(Assembler::Zero, temp1, temp1, &leftEmpty);
 
@@ -4509,34 +4510,35 @@ IonCompartment::generateStringConcatStub
     masm.orPtr(Imm32(JSString::FIXED_FLAGS), temp2);
     masm.storePtr(temp2, Address(output, JSString::offsetOfLengthAndFlags()));
 
     // Set chars pointer, keep in temp2 for copy loop below.
     masm.computeEffectiveAddress(Address(output, JSShortString::offsetOfInlineStorage()), temp2);
     masm.storePtr(temp2, Address(output, JSShortString::offsetOfChars()));
 
     {
-        // We use temp4 in this block, which in parallel execution also holds
+        // We use temp3 in this block, which in parallel execution also holds
         // a live ForkJoinSlice pointer. If we are compiling for parallel
         // execution, be sure to save and restore the ForkJoinSlice.
         if (mode == ParallelExecution)
-            masm.push(temp4);
+            masm.push(temp3);
 
         // Copy lhs chars. Temp1 still holds the lhs length. Note that this
-        // advances temp2 to point to the next char.
-        masm.loadPtr(Address(lhs, JSString::offsetOfChars()), temp3);
-        CopyStringChars(masm, temp2, temp3, temp1, temp4);
+        // advances temp2 to point to the next char. Note that this also
+        // repurposes the lhs register.
+        masm.loadPtr(Address(lhs, JSString::offsetOfChars()), lhs);
+        CopyStringChars(masm, temp2, lhs, temp1, temp3);
 
         // Copy rhs chars.
-        masm.loadPtr(Address(rhs, JSString::offsetOfChars()), temp3);
         masm.loadStringLength(rhs, temp1);
-        CopyStringChars(masm, temp2, temp3, temp1, temp4);
+        masm.loadPtr(Address(rhs, JSString::offsetOfChars()), rhs);
+        CopyStringChars(masm, temp2, rhs, temp1, temp3);
 
         if (mode == ParallelExecution)
-            masm.pop(temp4);
+            masm.pop(temp3);
     }
 
     // Null-terminate.
     masm.store16(Imm32(0), Address(temp2, 0));
     masm.ret();
 
     masm.bind(&failurePopTemps);
     masm.pop(temp2);
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -2591,29 +2591,32 @@ class LBinaryV : public LCallInstruction
         return js_CodeName[jsop_];
     }
 
     static const size_t LhsInput = 0;
     static const size_t RhsInput = BOX_PIECES;
 };
 
 // Adds two string, returning a string.
-class LConcat : public LInstructionHelper<1, 2, 4>
+class LConcat : public LInstructionHelper<1, 2, 5>
 {
   public:
     LIR_HEADER(Concat)
 
     LConcat(const LAllocation &lhs, const LAllocation &rhs, const LDefinition &temp1,
-            const LDefinition &temp2, const LDefinition &temp3, const LDefinition &temp4) {
+            const LDefinition &temp2, const LDefinition &temp3, const LDefinition &temp4,
+            const LDefinition &temp5)
+    {
         setOperand(0, lhs);
         setOperand(1, rhs);
         setTemp(0, temp1);
         setTemp(1, temp2);
         setTemp(2, temp3);
         setTemp(3, temp4);
+        setTemp(4, temp5);
     }
 
     const LAllocation *lhs() {
         return this->getOperand(0);
     }
     const LAllocation *rhs() {
         return this->getOperand(1);
     }
@@ -2624,32 +2627,37 @@ class LConcat : public LInstructionHelpe
         return this->getTemp(1);
     }
     const LDefinition *temp3() {
         return this->getTemp(2);
     }
     const LDefinition *temp4() {
         return this->getTemp(3);
     }
-};
-
-class LConcatPar : public LInstructionHelper<1, 3, 3>
+    const LDefinition *temp5() {
+        return this->getTemp(4);
+    }
+};
+
+class LConcatPar : public LInstructionHelper<1, 3, 4>
 {
   public:
     LIR_HEADER(ConcatPar)
 
     LConcatPar(const LAllocation &slice, const LAllocation &lhs, const LAllocation &rhs,
-               const LDefinition &temp1, const LDefinition &temp2, const LDefinition &temp3)
+               const LDefinition &temp1, const LDefinition &temp2, const LDefinition &temp3,
+               const LDefinition &temp4)
     {
         setOperand(0, slice);
         setOperand(1, lhs);
         setOperand(2, rhs);
         setTemp(0, temp1);
         setTemp(1, temp2);
         setTemp(2, temp3);
+        setTemp(3, temp4);
     }
 
     const LAllocation *forkJoinSlice() {
         return this->getOperand(0);
     }
     const LAllocation *lhs() {
         return this->getOperand(1);
     }
@@ -2660,16 +2668,19 @@ class LConcatPar : public LInstructionHe
         return this->getTemp(0);
     }
     const LDefinition *temp2() {
         return this->getTemp(1);
     }
     const LDefinition *temp3() {
         return this->getTemp(2);
     }
+    const LDefinition *temp4() {
+        return this->getTemp(3);
+    }
 };
 
 // Get uint16 character code from a string.
 class LCharCodeAt : public LInstructionHelper<1, 2, 0>
 {
   public:
     LIR_HEADER(CharCodeAt)
 
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -1477,45 +1477,47 @@ LIRGenerator::visitConcat(MConcat *ins)
 {
     MDefinition *lhs = ins->getOperand(0);
     MDefinition *rhs = ins->getOperand(1);
 
     JS_ASSERT(lhs->type() == MIRType_String);
     JS_ASSERT(rhs->type() == MIRType_String);
     JS_ASSERT(ins->type() == MIRType_String);
 
-    LConcat *lir = new LConcat(useFixed(lhs, CallTempReg0),
-                               useFixed(rhs, CallTempReg1),
+    LConcat *lir = new LConcat(useFixedAtStart(lhs, CallTempReg0),
+                               useFixedAtStart(rhs, CallTempReg1),
+                               tempFixed(CallTempReg0),
+                               tempFixed(CallTempReg1),
                                tempFixed(CallTempReg2),
                                tempFixed(CallTempReg3),
-                               tempFixed(CallTempReg4),
-                               tempFixed(CallTempReg5));
-    if (!defineFixed(lir, ins, LAllocation(AnyRegister(CallTempReg6))))
+                               tempFixed(CallTempReg4));
+    if (!defineFixed(lir, ins, LAllocation(AnyRegister(CallTempReg5))))
         return false;
     return assignSafepoint(lir, ins);
 }
 
 bool
 LIRGenerator::visitConcatPar(MConcatPar *ins)
 {
     MDefinition *slice = ins->forkJoinSlice();
     MDefinition *lhs = ins->lhs();
     MDefinition *rhs = ins->rhs();
 
     JS_ASSERT(lhs->type() == MIRType_String);
     JS_ASSERT(rhs->type() == MIRType_String);
     JS_ASSERT(ins->type() == MIRType_String);
 
-    LConcatPar *lir = new LConcatPar(useFixed(slice, CallTempReg5),
-                                     useFixed(lhs, CallTempReg0),
-                                     useFixed(rhs, CallTempReg1),
+    LConcatPar *lir = new LConcatPar(useFixed(slice, CallTempReg4),
+                                     useFixedAtStart(lhs, CallTempReg0),
+                                     useFixedAtStart(rhs, CallTempReg1),
+                                     tempFixed(CallTempReg0),
+                                     tempFixed(CallTempReg1),
                                      tempFixed(CallTempReg2),
-                                     tempFixed(CallTempReg3),
-                                     tempFixed(CallTempReg4));
-    if (!defineFixed(lir, ins, LAllocation(AnyRegister(CallTempReg6))))
+                                     tempFixed(CallTempReg3));
+    if (!defineFixed(lir, ins, LAllocation(AnyRegister(CallTempReg5))))
         return false;
     return assignSafepoint(lir, ins);
 }
 
 bool
 LIRGenerator::visitCharCodeAt(MCharCodeAt *ins)
 {
     MDefinition *str = ins->getOperand(0);
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -51,17 +51,16 @@ static MOZ_CONSTEXPR_VAR Register Scratc
 static MOZ_CONSTEXPR_VAR Register OsrFrameReg = r3;
 static MOZ_CONSTEXPR_VAR Register ArgumentsRectifierReg = r8;
 static MOZ_CONSTEXPR_VAR Register CallTempReg0 = r5;
 static MOZ_CONSTEXPR_VAR Register CallTempReg1 = r6;
 static MOZ_CONSTEXPR_VAR Register CallTempReg2 = r7;
 static MOZ_CONSTEXPR_VAR Register CallTempReg3 = r8;
 static MOZ_CONSTEXPR_VAR Register CallTempReg4 = r0;
 static MOZ_CONSTEXPR_VAR Register CallTempReg5 = r1;
-static MOZ_CONSTEXPR_VAR Register CallTempReg6 = r2;
 
 static MOZ_CONSTEXPR_VAR Register IntArgReg0 = r0;
 static MOZ_CONSTEXPR_VAR Register IntArgReg1 = r1;
 static MOZ_CONSTEXPR_VAR Register IntArgReg2 = r2;
 static MOZ_CONSTEXPR_VAR Register IntArgReg3 = r3;
 static MOZ_CONSTEXPR_VAR Register GlobalReg = r10;
 static MOZ_CONSTEXPR_VAR Register HeapReg = r11;
 static MOZ_CONSTEXPR_VAR Register CallTempNonArgRegs[] = { r5, r6, r7, r8 };
--- a/js/src/jit/x64/Assembler-x64.h
+++ b/js/src/jit/x64/Assembler-x64.h
@@ -70,24 +70,24 @@ static MOZ_CONSTEXPR_VAR Register JSRetu
 static MOZ_CONSTEXPR_VAR Register JSReturnReg_Data = JSReturnReg;
 
 static MOZ_CONSTEXPR_VAR Register ReturnReg = rax;
 static MOZ_CONSTEXPR_VAR Register ScratchReg = r11;
 static MOZ_CONSTEXPR_VAR Register HeapReg = r15;
 static MOZ_CONSTEXPR_VAR FloatRegister ReturnFloatReg = xmm0;
 static MOZ_CONSTEXPR_VAR FloatRegister ScratchFloatReg = xmm15;
 
+// Avoid rbp, which is the FramePointer, which is unavailable in some modes.
 static MOZ_CONSTEXPR_VAR Register ArgumentsRectifierReg = r8;
 static MOZ_CONSTEXPR_VAR Register CallTempReg0 = rax;
 static MOZ_CONSTEXPR_VAR Register CallTempReg1 = rdi;
 static MOZ_CONSTEXPR_VAR Register CallTempReg2 = rbx;
 static MOZ_CONSTEXPR_VAR Register CallTempReg3 = rcx;
 static MOZ_CONSTEXPR_VAR Register CallTempReg4 = rsi;
 static MOZ_CONSTEXPR_VAR Register CallTempReg5 = rdx;
-static MOZ_CONSTEXPR_VAR Register CallTempReg6 = rbp;
 
 // Different argument registers for WIN64
 #if defined(_WIN64)
 static MOZ_CONSTEXPR_VAR Register IntArgReg0 = rcx;
 static MOZ_CONSTEXPR_VAR Register IntArgReg1 = rdx;
 static MOZ_CONSTEXPR_VAR Register IntArgReg2 = r8;
 static MOZ_CONSTEXPR_VAR Register IntArgReg3 = r9;
 static MOZ_CONSTEXPR_VAR uint32_t NumIntArgRegs = 4;
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -41,24 +41,24 @@ static MOZ_CONSTEXPR_VAR FloatRegister I
 static MOZ_CONSTEXPR_VAR Register JSReturnReg_Type = ecx;
 static MOZ_CONSTEXPR_VAR Register JSReturnReg_Data = edx;
 static MOZ_CONSTEXPR_VAR Register StackPointer = esp;
 static MOZ_CONSTEXPR_VAR Register FramePointer = ebp;
 static MOZ_CONSTEXPR_VAR Register ReturnReg = eax;
 static MOZ_CONSTEXPR_VAR FloatRegister ReturnFloatReg = xmm0;
 static MOZ_CONSTEXPR_VAR FloatRegister ScratchFloatReg = xmm7;
 
+// Avoid ebp, which is the FramePointer, which is unavailable in some modes.
 static MOZ_CONSTEXPR_VAR Register ArgumentsRectifierReg = esi;
 static MOZ_CONSTEXPR_VAR Register CallTempReg0 = edi;
 static MOZ_CONSTEXPR_VAR Register CallTempReg1 = eax;
 static MOZ_CONSTEXPR_VAR Register CallTempReg2 = ebx;
 static MOZ_CONSTEXPR_VAR Register CallTempReg3 = ecx;
 static MOZ_CONSTEXPR_VAR Register CallTempReg4 = esi;
 static MOZ_CONSTEXPR_VAR Register CallTempReg5 = edx;
-static MOZ_CONSTEXPR_VAR Register CallTempReg6 = ebp;
 
 // We have no arg regs, so our NonArgRegs are just our CallTempReg*
 static MOZ_CONSTEXPR_VAR Register CallTempNonArgRegs[] = { edi, eax, ebx, ecx, esi, edx };
 static const uint32_t NumCallTempNonArgRegs =
     mozilla::ArrayLength(CallTempNonArgRegs);
 
 class ABIArgGenerator
 {