Bug 1564942 - Part 4: Lower MPow to a series of shift-instructions when the base operand is a power of two. r=jandem
authorAndré Bargull <andre.bargull@gmail.com>
Wed, 10 Jun 2020 13:49:46 +0000
changeset 598948 796d8685f8ce4b049e5c7a2d6150cbcb102c6a69
parent 598947 7765391a4142c32cc359872f4774249ab65950ee
child 598949 4a3b83f31a8821f5b4cb2932d07acc036be8a277
push id13310
push userffxbld-merge
push dateMon, 29 Jun 2020 14:50:06 +0000
treeherdermozilla-beta@15a59a0afa5c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs1564942
milestone79.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1564942 - Part 4: Lower MPow to a series of shift-instructions when the base operand is a power of two. r=jandem In-tree users of `Math.pow` show that the function is often called with the base operand equal to two. This case can easily be optimised to a series of shift-instructions for any power of two. For now this optimisation is only taken for 2^i with i in {1..8} to avoid generating too many consecutive shift-instructions. 2^8 = 256 was chosen as the limit, because it is the maximum power of two base operand for `Math.pow` used in-tree. Differential Revision: https://phabricator.services.mozilla.com/D37587
js/src/jit-test/tests/ion/pow-base-power-of-two-bailouts.js
js/src/jit-test/tests/ion/pow-base-power-of-two.js
js/src/jit/CodeGenerator.cpp
js/src/jit/Lowering.cpp
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/arm64/Lowering-arm64.cpp
js/src/jit/arm64/Lowering-arm64.h
js/src/jit/mips-shared/Lowering-mips-shared.cpp
js/src/jit/mips-shared/Lowering-mips-shared.h
js/src/jit/none/Lowering-none.h
js/src/jit/shared/LIR-shared.h
js/src/jit/x86-shared/Lowering-x86-shared.cpp
js/src/jit/x86-shared/Lowering-x86-shared.h
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/ion/pow-base-power-of-two-bailouts.js
@@ -0,0 +1,85 @@
+// Lowering provides a specialisation when the base operand is a constant which
+// is a power of two.
+//
+// Test bailout conditions for this optimisation.
+
+function test(x) {
+    function pow(x, y) { return `Math.pow(${x}, ${y})` };
+    function exp(x, y) { return `((${x}) ** ${y})` };
+
+    function make(fn) {
+        return Function("y, z", `
+            // Load from array to prevent constant-folding.
+            // (Ion is currently not smart enough to realise that both array
+            // values are the same.)
+            var ys = [y, y];
+            var zs = [z, z];
+            for (var i = 0; i < 1000; ++i) {
+                assertEq(${fn(x, "ys[i & 1]")}, zs[i & 1]);
+            }
+        `);
+    }
+
+    function double(v) {
+        // NB: Math.cbrt() always returns a double value.
+        return Math.cbrt(v * v * v)
+    }
+
+    // Find the first power which will exceed the Int32 range by computing ⌈log_x(2 ^ 31)⌉.
+    var limit = Math.ceil(Math.log2(2 ** 31) / Math.log2(x));
+    assertEq(Math.pow(x, limit - 1) < 2 ** 31, true);
+    assertEq(Math.pow(x, limit) >= 2 ** 31, true);
+
+    function* args(first, last) {
+        // Run the test function a few times without a bailout.
+        for (var i = 0; i < 3; ++i) {
+            yield first;
+        }
+
+        // |last| should trigger a bailout.
+        yield last;
+    }
+
+    // Test precision loss when the result exceeds 2**31.
+    for (var fn of [make(pow), make(exp)]) {
+        for (var y of args(limit - 1, limit)) {
+            // Ensure the callee always sees a double to avoid an early Bailout_ArgumentCheck.
+            var z = double(Math.pow(x, y));
+            fn(y, z);
+        }
+    }
+
+    // Test precision loss when the result is a fractional number.
+    for (var fn of [make(pow), make(exp)]) {
+        for (var y of args(0, -1)) {
+            // Ensure the callee always sees a double to avoid an early Bailout_ArgumentCheck.
+            var z = double(Math.pow(x, y));
+            fn(y, z);
+        }
+    }
+
+    // Find the first negative power which can be represented as a double
+    var negLimit = -Math.floor(1074 / Math.log2(x));
+
+    // Test precision loss when the result is a non-zero, fractional number.
+    for (var fn of [make(pow), make(exp)]) {
+        for (var y of args(limit - 1, limit)) {
+            // Ensure the callee always sees a double to avoid an early Bailout_ArgumentCheck.
+            var z = double(Math.pow(x, y));
+            fn(y, z);
+        }
+    }
+}
+
+function* range(a, b, fn) {
+    for (var i = a; i <= b; ++i) {
+        yield fn(i);
+    }
+}
+
+// Only 2^i with |i| ∈ {1..8} currently triggers the optimisation, but also test
+// the next power-of-two values.
+
+for (var x of range(1, 10, i => 2 ** i)) {
+    test(x);
+}
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/ion/pow-base-power-of-two.js
@@ -0,0 +1,75 @@
+// Lowering provides a specialisation when the base operand is a constant which
+// is a power of two.
+
+loadRelativeToScript("../../../tests/non262/Math/shell.js");
+
+function test(x, y, z) {
+    function pow(x, y) { return `Math.pow(${x}, ${y})` };
+    function exp(x, y) { return `((${x}) ** ${y})` };
+
+    function make(fn, x, y, z) {
+        return Function(`
+            // Load from array to prevent constant-folding.
+            // (Ion is currently not smart enough to realise that both array
+            // values are the same.)
+            var ys = [${y}, ${y}];
+            var zs = [${z}, ${z}];
+            for (var i = 0; i < 1000; ++i) {
+                assertNear(${fn(x, "ys[i & 1]")}, zs[i & 1]);
+            }
+        `);
+    }
+
+    function double(v) {
+        // NB: Math.cbrt() always returns a double value.
+        return `Math.cbrt(${v * v * v})`;
+    }
+
+    function addTests(fn) {
+        tests.push(make(fn, x, y, z));
+        tests.push(make(fn, x, double(y), z));
+        tests.push(make(fn, double(x), y, z));
+        tests.push(make(fn, double(x), double(y), z));
+    }
+
+    var tests = [];
+    addTests(pow);
+    addTests(exp);
+
+    for (var i = 0; i < tests.length; ++i) {
+        for (var j = 0; j < 2; ++j) {
+            tests[i]();
+        }
+    }
+}
+
+function* range(a, b, fn) {
+    for (var i = a; i <= b; ++i) {
+        yield fn(i);
+    }
+}
+
+// Only 2^i with |i| ∈ {1..8} currently triggers the optimisation, but also test
+// the next power-of-two values, 1 and 0, and negative base-of-two values.
+var values = [
+    ...range(1, 10, i => 2 ** i),
+    1,
+    0,
+    ...range(1, 4, i => -(2 ** i)),
+];
+
+for (var x of values) {
+    test(x, 0, 1);
+    test(x, 1, x);
+    test(x, 2, x * x);
+
+    // 0**(negative) is Infinity, 1**(negative) is 1.
+    if (Math.abs(x) > 1) {
+        test(x, -1076, 0);
+    }
+
+    // (negative)**(odd-negative) is -0.
+    if (x > 1) {
+        test(x, -1075, 0);
+    }
+}
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -8260,16 +8260,49 @@ void CodeGenerator::visitPowD(LPowD* ins
   masm.setupUnalignedABICall(temp);
   masm.passABIArg(value, MoveOp::DOUBLE);
   masm.passABIArg(power, MoveOp::DOUBLE);
   masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, ecmaPow), MoveOp::DOUBLE);
 
   MOZ_ASSERT(ToFloatRegister(ins->output()) == ReturnDoubleReg);
 }
 
+void CodeGenerator::visitPowOfTwoI(LPowOfTwoI* ins) {
+  Register power = ToRegister(ins->power());
+  Register output = ToRegister(ins->output());
+
+  uint32_t base = ins->base();
+  MOZ_ASSERT(mozilla::IsPowerOfTwo(base));
+
+  uint32_t n = mozilla::FloorLog2(base);
+  MOZ_ASSERT(n != 0);
+
+  // Hacker's Delight, 2nd edition, theorem D2.
+  auto ceilingDiv = [](uint32_t x, uint32_t y) { return (x + y - 1) / y; };
+
+  // Take bailout if |power| is greater-or-equals |log_y(2^31)| or is negative.
+  // |2^(n*y) < 2^31| must hold, hence |n*y < 31| resp. |y < 31/n|.
+  //
+  // Note: it's important for this condition to match the code in CacheIR.cpp
+  // (CanAttachInt32Pow) to prevent failure loops.
+  bailoutCmp32(Assembler::AboveOrEqual, power, Imm32(ceilingDiv(31, n)),
+               ins->snapshot());
+
+  // Compute (2^n)^y as 2^(n*y) using repeated shifts. We could directly scale
+  // |power| and perform a single shift, but due to the lack of necessary
+  // MacroAssembler functionality, like multiplying a register with an
+  // immediate, we restrict the number of generated shift instructions when
+  // lowering this operation.
+  masm.move32(Imm32(1), output);
+  do {
+    masm.lshift32(power, output);
+    n--;
+  } while (n > 0);
+}
+
 void CodeGenerator::visitSqrtD(LSqrtD* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
   FloatRegister output = ToFloatRegister(ins->output());
   masm.sqrtDouble(input, output);
 }
 
 void CodeGenerator::visitSqrtF(LSqrtF* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3,16 +3,17 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "jit/Lowering.h"
 
 #include "mozilla/DebugOnly.h"
 #include "mozilla/EndianUtils.h"
+#include "mozilla/MathAlgorithms.h"
 
 #include <type_traits>
 
 #include "jit/JitSpewer.h"
 #include "jit/LIR.h"
 #include "jit/MIR.h"
 #include "jit/MIRGraph.h"
 #include "util/Memory.h"
@@ -1527,16 +1528,26 @@ void LIRGenerator::visitHypot(MHypot* in
 void LIRGenerator::visitPow(MPow* ins) {
   MDefinition* input = ins->input();
   MDefinition* power = ins->power();
 
   if (ins->type() == MIRType::Int32) {
     MOZ_ASSERT(input->type() == MIRType::Int32);
     MOZ_ASSERT(power->type() == MIRType::Int32);
 
+    if (input->isConstant()) {
+      // Restrict this optimization to |base <= 256| to avoid generating too
+      // many consecutive shift instructions.
+      int32_t base = input->toConstant()->toInt32();
+      if (2 <= base && base <= 256 && mozilla::IsPowerOfTwo(uint32_t(base))) {
+        lowerPowOfTwoI(ins);
+        return;
+      }
+    }
+
     auto* lir = new (alloc())
         LPowII(useRegister(input), useRegister(power), temp(), temp());
     assignSnapshot(lir, Bailout_PrecisionLoss);
     define(lir, ins);
     return;
   }
 
   MOZ_ASSERT(ins->type() == MIRType::Double);
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -466,16 +466,25 @@ void LIRGeneratorARM::lowerUrshD(MUrsh* 
   MOZ_ASSERT(lhs->type() == MIRType::Int32);
   MOZ_ASSERT(rhs->type() == MIRType::Int32);
 
   LUrshD* lir = new (alloc())
       LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
   define(lir, mir);
 }
 
+void LIRGeneratorARM::lowerPowOfTwoI(MPow* mir) {
+  int32_t base = mir->input()->toConstant()->toInt32();
+  MDefinition* power = mir->power();
+
+  auto* lir = new (alloc()) LPowOfTwoI(base, useRegister(power));
+  assignSnapshot(lir, Bailout_PrecisionLoss);
+  define(lir, mir);
+}
+
 void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
   if (ins->type() == MIRType::Int32) {
     define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins);
   } else if (ins->type() == MIRType::Float32) {
     define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins);
   } else {
     MOZ_ASSERT(ins->type() == MIRType::Double);
     define(new (alloc()) LNegD(useRegisterAtStart(ins->input())), ins);
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -37,16 +37,18 @@ class LIRGeneratorARM : public LIRGenera
   void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
                           size_t lirIndex);
   void defineInt64Phi(MPhi* phi, size_t lirIndex);
 
   void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
   void lowerUrshD(MUrsh* mir);
 
+  void lowerPowOfTwoI(MPow* mir);
+
   void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                    MDefinition* input);
   void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                    MDefinition* lhs, MDefinition* rhs);
 
   void lowerForALUInt64(
       LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
       MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
--- a/js/src/jit/arm64/Lowering-arm64.cpp
+++ b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -309,16 +309,25 @@ void LIRGeneratorARM64::lowerUrshD(MUrsh
   MOZ_ASSERT(lhs->type() == MIRType::Int32);
   MOZ_ASSERT(rhs->type() == MIRType::Int32);
 
   LUrshD* lir = new (alloc())
       LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
   define(lir, mir);
 }
 
+void LIRGeneratorARM64::lowerPowOfTwoI(MPow* mir) {
+  int32_t base = mir->input()->toConstant()->toInt32();
+  MDefinition* power = mir->power();
+
+  auto* lir = new (alloc()) LPowOfTwoI(base, useRegister(power));
+  assignSnapshot(lir, Bailout_PrecisionLoss);
+  define(lir, mir);
+}
+
 void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
   switch (ins->type()) {
     case MIRType::Int32:
       define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins);
       break;
     case MIRType::Float32:
       define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins);
       break;
--- a/js/src/jit/arm64/Lowering-arm64.h
+++ b/js/src/jit/arm64/Lowering-arm64.h
@@ -39,16 +39,18 @@ class LIRGeneratorARM64 : public LIRGene
   void lowerInt64PhiInput(MPhi*, uint32_t, LBlock*, size_t) {
     MOZ_CRASH("NYI");
   }
   void defineInt64Phi(MPhi*, size_t) { MOZ_CRASH("NYI"); }
   void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
   void lowerUrshD(MUrsh* mir);
 
+  void lowerPowOfTwoI(MPow* mir);
+
   void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                    MDefinition* input);
   void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                    MDefinition* lhs, MDefinition* rhs);
 
   void lowerForALUInt64(
       LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
       MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
--- a/js/src/jit/mips-shared/Lowering-mips-shared.cpp
+++ b/js/src/jit/mips-shared/Lowering-mips-shared.cpp
@@ -297,16 +297,25 @@ void LIRGeneratorMIPSShared::lowerUrshD(
   MOZ_ASSERT(lhs->type() == MIRType::Int32);
   MOZ_ASSERT(rhs->type() == MIRType::Int32);
 
   LUrshD* lir = new (alloc())
       LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
   define(lir, mir);
 }
 
+void LIRGeneratorMIPSShared::lowerPowOfTwoI(MPow* mir) {
+  int32_t base = mir->input()->toConstant()->toInt32();
+  MDefinition* power = mir->power();
+
+  auto* lir = new (alloc()) LPowOfTwoI(base, useRegister(power));
+  assignSnapshot(lir, Bailout_PrecisionLoss);
+  define(lir, mir);
+}
+
 void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
   if (ins->type() == MIRType::Int32) {
     define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins);
   } else if (ins->type() == MIRType::Float32) {
     define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins);
   } else {
     MOZ_ASSERT(ins->type() == MIRType::Double);
     define(new (alloc()) LNegD(useRegisterAtStart(ins->input())), ins);
--- a/js/src/jit/mips-shared/Lowering-mips-shared.h
+++ b/js/src/jit/mips-shared/Lowering-mips-shared.h
@@ -25,16 +25,18 @@ class LIRGeneratorMIPSShared : public LI
   LDefinition tempByteOpRegister();
 
   bool needTempForPostBarrier() { return false; }
 
   void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
   void lowerUrshD(MUrsh* mir);
 
+  void lowerPowOfTwoI(MPow* mir);
+
   void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                    MDefinition* input);
   void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                    MDefinition* lhs, MDefinition* rhs);
 
   void lowerForALUInt64(
       LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
       MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
--- a/js/src/jit/none/Lowering-none.h
+++ b/js/src/jit/none/Lowering-none.h
@@ -35,16 +35,17 @@ class LIRGeneratorNone : public LIRGener
   void lowerUntypedPhiInput(MPhi*, uint32_t, LBlock*, size_t) { MOZ_CRASH(); }
   void lowerInt64PhiInput(MPhi*, uint32_t, LBlock*, size_t) { MOZ_CRASH(); }
   void defineInt64Phi(MPhi*, size_t) { MOZ_CRASH(); }
   void lowerForShift(LInstructionHelper<1, 2, 0>*, MDefinition*, MDefinition*,
                      MDefinition*) {
     MOZ_CRASH();
   }
   void lowerUrshD(MUrsh*) { MOZ_CRASH(); }
+  void lowerPowOfTwoI(MPow*) { MOZ_CRASH(); }
   template <typename T>
   void lowerForALU(T, MDefinition*, MDefinition*, MDefinition* v = nullptr) {
     MOZ_CRASH();
   }
   template <typename T>
   void lowerForFPU(T, MDefinition*, MDefinition*, MDefinition* v = nullptr) {
     MOZ_CRASH();
   }
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -2516,16 +2516,31 @@ class LPowD : public LCallInstructionHel
     setTemp(0, temp);
   }
 
   const LAllocation* value() { return getOperand(0); }
   const LAllocation* power() { return getOperand(1); }
   const LDefinition* temp() { return getTemp(0); }
 };
 
+// Constant of a power of two raised to an integer power.
+class LPowOfTwoI : public LInstructionHelper<1, 1, 0> {
+  uint32_t base_;
+
+ public:
+  LIR_HEADER(PowOfTwoI)
+  LPowOfTwoI(uint32_t base, const LAllocation& power)
+      : LInstructionHelper(classOpcode), base_(base) {
+    setOperand(0, power);
+  }
+
+  uint32_t base() const { return base_; }
+  const LAllocation* power() { return getOperand(0); }
+};
+
 // Sign value of an integer.
 class LSignI : public LInstructionHelper<1, 1, 0> {
  public:
   LIR_HEADER(SignI)
   explicit LSignI(const LAllocation& input) : LInstructionHelper(classOpcode) {
     setOperand(0, input);
   }
 };
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -434,16 +434,27 @@ void LIRGeneratorX86Shared::lowerUrshD(M
   LUse lhsUse = useRegisterAtStart(lhs);
   LAllocation rhsAlloc =
       rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx);
 
   LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
   define(lir, mir);
 }
 
+void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) {
+  int32_t base = mir->input()->toConstant()->toInt32();
+  MDefinition* power = mir->power();
+
+  // shift operator should be in register ecx;
+  // x86 can't shift a non-ecx register.
+  auto* lir = new (alloc()) LPowOfTwoI(base, useFixed(power, ecx));
+  assignSnapshot(lir, Bailout_PrecisionLoss);
+  define(lir, mir);
+}
+
 void LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) {
   MDefinition* opd = ins->input();
   MOZ_ASSERT(opd->type() == MIRType::Double);
 
   LDefinition maybeTemp =
       Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
   define(new (alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
 }
--- a/js/src/jit/x86-shared/Lowering-x86-shared.h
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.h
@@ -40,16 +40,17 @@ class LIRGeneratorX86Shared : public LIR
   void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
                                MDefinition* lhs, MDefinition* rhs);
   void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
   void lowerDivI(MDiv* div);
   void lowerModI(MMod* mod);
   void lowerUDiv(MDiv* div);
   void lowerUMod(MMod* mod);
   void lowerUrshD(MUrsh* mir);
+  void lowerPowOfTwoI(MPow* mir);
   void lowerTruncateDToInt32(MTruncateToInt32* ins);
   void lowerTruncateFToInt32(MTruncateToInt32* ins);
   void lowerCompareExchangeTypedArrayElement(
       MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters);
   void lowerAtomicExchangeTypedArrayElement(
       MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters);
   void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins,
                                          bool useI386ByteRegisters);