Bug 1507820: Pin HeapReg in Cranelift and use it as the heap base; r=lth
authorBenjamin Bouvier <benj@benj.me>
Wed, 11 Sep 2019 15:58:09 +0000
changeset 492777 cbe50166fcf07817941f6e122bffc40aad7a2969
parent 492776 008417a25693a324e5ead68b28e3ecd2c5cb46aa
child 492778 443a01b10c73c235a4b0b76be37fc241ca1bcc5b
push id95143
push userbbouvier@mozilla.com
push dateThu, 12 Sep 2019 07:52:43 +0000
treeherderautoland@cbe50166fcf0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslth
bugs1507820
milestone71.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1507820: Pin HeapReg in Cranelift and use it as the heap base; r=lth Differential Revision: https://phabricator.services.mozilla.com/D45017
js/src/wasm/WasmCraneliftCompile.cpp
js/src/wasm/cranelift/baldrapi.h
js/src/wasm/cranelift/build.rs
js/src/wasm/cranelift/src/isa.rs
js/src/wasm/cranelift/src/wasm2clif.rs
--- a/js/src/wasm/WasmCraneliftCompile.cpp
+++ b/js/src/wasm/WasmCraneliftCompile.cpp
@@ -91,23 +91,16 @@ static bool GenerateCraneliftCode(WasmMa
   uint32_t funcBase = masm.currentOffset();
   if (!masm.appendRawCode(func.code, func.codeSize)) {
     return false;
   }
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
   uint32_t codeEnd = masm.currentOffset();
 #endif
 
-  // Cranelift isn't aware of pinned registers in general, so we need to reload
-  // both TLS and pinned regs from the stack.
-  // TODO(bug 1507820): We should teach Cranelift to reload this register
-  // itself, so we don't have to do it manually.
-  masm.loadWasmTlsRegFromFrame();
-  masm.loadWasmPinnedRegsFromTls();
-
   wasm::GenerateFunctionEpilogue(masm, func.framePushed, offsets);
 
   if (func.numRodataRelocs > 0) {
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
     constexpr size_t jumptableElementSize = 4;
 
     MOZ_ASSERT(func.jumptablesSize % jumptableElementSize == 0);
 
@@ -288,16 +281,17 @@ CraneliftStaticEnvironment::CraneliftSta
 #endif
 #if defined(XP_WIN)
       platformIsWindows(true),
 #else
       platformIsWindows(false),
 #endif
       staticMemoryBound(0),
       memoryGuardSize(0),
+      memoryBaseTlsOffset(offsetof(TlsData, memoryBase)),
       instanceTlsOffset(offsetof(TlsData, instance)),
       interruptTlsOffset(offsetof(TlsData, interrupt)),
       cxTlsOffset(offsetof(TlsData, cx)),
       realmCxOffset(JSContext::offsetOfRealm()),
       realmTlsOffset(offsetof(TlsData, realm)),
       realmFuncImportTlsOffset(offsetof(FuncImportTls, realm)) {
 }
 
--- a/js/src/wasm/cranelift/baldrapi.h
+++ b/js/src/wasm/cranelift/baldrapi.h
@@ -64,16 +64,17 @@ struct CraneliftStaticEnvironment {
   bool hasPopcnt;
   bool hasAvx;
   bool hasBmi1;
   bool hasBmi2;
   bool hasLzcnt;
   bool platformIsWindows;
   size_t staticMemoryBound;
   size_t memoryGuardSize;
+  size_t memoryBaseTlsOffset;
   size_t instanceTlsOffset;
   size_t interruptTlsOffset;
   size_t cxTlsOffset;
   size_t realmCxOffset;
   size_t realmTlsOffset;
   size_t realmFuncImportTlsOffset;
 
   // Not bindgen'd because it's inlined.
--- a/js/src/wasm/cranelift/build.rs
+++ b/js/src/wasm/cranelift/build.rs
@@ -69,17 +69,17 @@ fn main() {
                 .split_whitespace()
                 .map(|s| s.to_owned())
                 .collect();
             for flag in extra_flags {
                 bindings = bindings.clang_arg(flag);
             }
         }
         None => {
-            println!("cargo:warning={}", "MOZ_TOPOBJDIR should be set by default, otherwise the build is not guaranted to finish.");
+            println!("cargo:warning=MOZ_TOPOBJDIR should be set by default, otherwise the build is not guaranted to finish.");
         }
     }
 
     let bindings = bindings
         .generate()
         .expect("Unable to generate baldrapi.h bindings");
 
     // Write the bindings to the $OUT_DIR/bindings.rs file.
--- a/js/src/wasm/cranelift/src/isa.rs
+++ b/js/src/wasm/cranelift/src/isa.rs
@@ -147,16 +147,21 @@ fn make_shared_flags(
         None => None,
     }
     .unwrap_or(true);
     sb.set(
         "jump_tables_enabled",
         if jump_tables_enabled { "true" } else { "false" },
     )?;
 
+    if cfg!(feature = "cranelift_x86") && cfg!(target_pointer_width = "64") {
+        sb.enable("enable_pinned_reg")?;
+        sb.enable("use_pinned_reg_as_heap_base")?;
+    }
+
     Ok(settings::Flags::new(sb))
 }
 
 #[cfg(feature = "cranelift_x86")]
 fn make_isa_specific(env: &StaticEnvironment) -> DashResult<isa::Builder> {
     let mut ib = isa::lookup_by_name("x86_64-unknown-unknown").map_err(BasicError::from)?;
 
     if !env.hasSse2 {
@@ -186,17 +191,16 @@ fn make_isa_specific(env: &StaticEnviron
     }
     if env.hasLzcnt {
         ib.enable("has_lzcnt").map_err(BasicError::from)?;
     }
 
     Ok(ib)
 }
 
-/// TODO: SM runs on more than x86 chips. Support them.
 #[cfg(not(feature = "cranelift_x86"))]
 fn make_isa_specific(_env: &StaticEnvironment) -> DashResult<isa::Builder> {
     Err("Platform not supported yet!".into())
 }
 
 /// Allocate a `TargetISA` object that can be used to generate code for the CPU we're running on.
 pub fn make_isa(env: &StaticEnvironment) -> DashResult<Box<dyn isa::TargetIsa>> {
     // Parse flags defined by the environment variable.
--- a/js/src/wasm/cranelift/src/wasm2clif.rs
+++ b/js/src/wasm/cranelift/src/wasm2clif.rs
@@ -243,17 +243,17 @@ impl<'a, 'b, 'c> TransEnv<'a, 'b, 'c> {
                     global_type: native_pointer_type(),
                 });
                 self.instance_gv = gv.into();
                 gv
             }
         };
         let ga = pos.ins().global_value(native_pointer_type(), gv);
         pos.ins()
-            .load(native_pointer_type(), ir::MemFlags::new(), ga, 0)
+            .load(native_pointer_type(), ir::MemFlags::trusted(), ga, 0)
     }
 
     /// Generate code that loads the current instance pointer.
     fn load_interrupt_flag(&mut self, pos: &mut FuncCursor) -> ir::Value {
         let gv = match self.interrupt_gv.expand() {
             Some(gv) => gv,
             None => {
                 // We need to allocate the global variable.
@@ -263,17 +263,18 @@ impl<'a, 'b, 'c> TransEnv<'a, 'b, 'c> {
                     offset: imm64(self.static_env.interruptTlsOffset),
                     global_type: native_pointer_type(),
                 });
                 self.interrupt_gv = gv.into();
                 gv
             }
         };
         let ga = pos.ins().global_value(native_pointer_type(), gv);
-        pos.ins().load(ir::types::I32, ir::MemFlags::new(), ga, 0)
+        pos.ins()
+            .load(ir::types::I32, ir::MemFlags::trusted(), ga, 0)
     }
 
     /// Get a `FuncRef` for the given symbolic address.
     /// Uses the closure to create the signature if necessary.
     fn symbolic_funcref<MKSIG: FnOnce() -> ir::Signature>(
         &mut self,
         func: &mut ir::Function,
         sym: bindings::SymbolicAddress,
@@ -305,49 +306,44 @@ impl<'a, 'b, 'c> TransEnv<'a, 'b, 'c> {
                 .func
                 .create_global_value(ir::GlobalValueData::IAddImm {
                     base: vmctx,
                     offset: imm64(self.static_env.cxTlsOffset),
                     global_type: native_pointer_type(),
                 })
                 .into();
         }
+
         if self.realm_addr.is_none() {
             let vmctx = self.get_vmctx_gv(&mut pos.func);
             self.realm_addr = pos
                 .func
                 .create_global_value(ir::GlobalValueData::IAddImm {
                     base: vmctx,
                     offset: imm64(self.static_env.realmTlsOffset),
                     global_type: native_pointer_type(),
                 })
                 .into();
         }
 
         let ptr = native_pointer_type();
-        let mut flags = ir::MemFlags::new();
-        flags.set_aligned();
-        flags.set_notrap();
-
+        let flags = ir::MemFlags::trusted();
         let cx_addr_val = pos.ins().global_value(ptr, self.cx_addr.unwrap());
         let cx = pos.ins().load(ptr, flags, cx_addr_val, 0);
         let realm_addr_val = pos.ins().global_value(ptr, self.realm_addr.unwrap());
         let realm = pos.ins().load(ptr, flags, realm_addr_val, 0);
         pos.ins()
             .store(flags, realm, cx, offset32(self.static_env.realmCxOffset));
     }
 
     /// Update the JSContext's realm value in preparation for making an indirect call through
     /// an external table.
     fn switch_to_indirect_callee_realm(&mut self, pos: &mut FuncCursor, vmctx: ir::Value) {
         let ptr = native_pointer_type();
-        let mut flags = ir::MemFlags::new();
-        flags.set_aligned();
-        flags.set_notrap();
-
+        let flags = ir::MemFlags::trusted();
         let cx = pos
             .ins()
             .load(ptr, flags, vmctx, offset32(self.static_env.cxTlsOffset));
         let realm = pos
             .ins()
             .load(ptr, flags, vmctx, offset32(self.static_env.realmTlsOffset));
         pos.ins()
             .store(flags, realm, cx, offset32(self.static_env.realmCxOffset));
@@ -357,32 +353,47 @@ impl<'a, 'b, 'c> TransEnv<'a, 'b, 'c> {
     /// function.
     fn switch_to_import_realm(
         &mut self,
         pos: &mut FuncCursor,
         vmctx: ir::Value,
         gv_addr: ir::Value,
     ) {
         let ptr = native_pointer_type();
-        let mut flags = ir::MemFlags::new();
-        flags.set_aligned();
-        flags.set_notrap();
-
+        let flags = ir::MemFlags::trusted();
         let cx = pos
             .ins()
             .load(ptr, flags, vmctx, offset32(self.static_env.cxTlsOffset));
         let realm = pos.ins().load(
             ptr,
             flags,
             gv_addr,
             offset32(self.static_env.realmFuncImportTlsOffset),
         );
         pos.ins()
             .store(flags, realm, cx, offset32(self.static_env.realmCxOffset));
     }
+
+    fn load_pinned_reg(&self, pos: &mut FuncCursor, vmctx: ir::Value) {
+        if cfg!(feature = "cranelift_x86") && cfg!(target_pointer_width = "64") {
+            let heap_base = pos.ins().load(
+                native_pointer_type(),
+                ir::MemFlags::trusted(),
+                vmctx,
+                self.static_env.memoryBaseTlsOffset as i32,
+            );
+            pos.ins().set_pinned_reg(heap_base);
+        }
+    }
+
+    fn reload_tls_and_pinned_regs(&mut self, pos: &mut FuncCursor) {
+        let vmctx_gv = self.get_vmctx_gv(&mut pos.func);
+        let vmctx = pos.ins().global_value(native_pointer_type(), vmctx_gv);
+        self.load_pinned_reg(pos, vmctx);
+    }
 }
 
 impl<'a, 'b, 'c> FuncEnvironment for TransEnv<'a, 'b, 'c> {
     fn target_config(&self) -> TargetFrontendConfig {
         self.isa.frontend_config()
     }
 
     fn pointer_type(&self) -> ir::Type {
@@ -574,17 +585,17 @@ impl<'a, 'b, 'c> FuncEnvironment for Tra
                 let imm = wsig.id_immediate() as i64;
                 Some(pos.ins().iconst(native_pointer_type(), imm))
             }
             bindings::FuncTypeIdDescKind::Global => {
                 let gv = self.sig_global(pos.func, wsig.id_tls_offset());
                 let addr = pos.ins().global_value(native_pointer_type(), gv);
                 Some(
                     pos.ins()
-                        .load(native_pointer_type(), ir::MemFlags::new(), addr, 0),
+                        .load(native_pointer_type(), ir::MemFlags::trusted(), addr, 0),
                 )
             }
         };
 
         // 2. Bounds check the callee against the table length.
         let (bound_gv, base_gv) = {
             let table_data = &pos.func.tables[table];
             (table_data.bound_gv, table_data.base_gv)
@@ -606,47 +617,51 @@ impl<'a, 'b, 'c> FuncEnvironment for Tra
         } else {
             callee
         };
         let callee_scaled = pos.ins().imul_imm(callee_x, wtable.entry_size());
 
         let entry = pos.ins().iadd(tbase, callee_scaled);
         let callee_func = pos
             .ins()
-            .load(native_pointer_type(), ir::MemFlags::new(), entry, 0);
+            .load(native_pointer_type(), ir::MemFlags::trusted(), entry, 0);
 
         // Check for a null callee.
         pos.ins()
             .trapz(callee_func, ir::TrapCode::IndirectCallToNull);
 
         // Handle external tables, set up environment.
         // A function table call could redirect execution to another module with a different realm,
         // so switch to this realm just in case.
-        let vmctx = pos.ins().load(
+        let callee_vmctx = pos.ins().load(
             native_pointer_type(),
-            ir::MemFlags::new(),
+            ir::MemFlags::trusted(),
             entry,
             native_pointer_size(),
         );
-        self.switch_to_indirect_callee_realm(&mut pos, vmctx);
+        self.switch_to_indirect_callee_realm(&mut pos, callee_vmctx);
+        self.load_pinned_reg(&mut pos, callee_vmctx);
 
         // First the wasm args.
         let mut args = ir::ValueList::default();
         args.push(callee_func, &mut pos.func.dfg.value_lists);
         args.extend(call_args.iter().cloned(), &mut pos.func.dfg.value_lists);
-        args.push(vmctx, &mut pos.func.dfg.value_lists);
+        args.push(callee_vmctx, &mut pos.func.dfg.value_lists);
         if let Some(sigid) = sigid_value {
             args.push(sigid, &mut pos.func.dfg.value_lists);
         }
 
         let call = pos
             .ins()
             .CallIndirect(ir::Opcode::CallIndirect, ir::types::INVALID, sig_ref, args)
             .0;
+
         self.switch_to_wasm_tls_realm(&mut pos);
+        self.reload_tls_and_pinned_regs(&mut pos);
+
         Ok(call)
     }
 
     fn translate_call(
         &mut self,
         mut pos: FuncCursor,
         callee_index: FuncIndex,
         callee: ir::FuncRef,
@@ -660,43 +675,45 @@ impl<'a, 'b, 'c> FuncEnvironment for Tra
         if self.env.func_is_import(callee_index) {
             // This is a call to an imported function. We need to load the callee address and vmctx
             // from the associated `FuncImportTls` struct in a global.
             let gv = self.func_import_global(pos.func, callee_index);
             let gv_addr = pos.ins().global_value(native_pointer_type(), gv);
 
             // We need the first two pointer-sized fields from the `FuncImportTls` struct: `code`
             // and `tls`.
-            let fit_code = pos
-                .ins()
-                .load(native_pointer_type(), ir::MemFlags::new(), gv_addr, 0);
+            let fit_code =
+                pos.ins()
+                    .load(native_pointer_type(), ir::MemFlags::trusted(), gv_addr, 0);
             let fit_tls = pos.ins().load(
                 native_pointer_type(),
-                ir::MemFlags::new(),
+                ir::MemFlags::trusted(),
                 gv_addr,
                 native_pointer_size(),
             );
 
             // Switch to the callee's realm.
             self.switch_to_import_realm(&mut pos, fit_tls, gv_addr);
+            self.load_pinned_reg(&mut pos, fit_tls);
 
             // The `tls` field is the VM context pointer for the callee.
             args.push(fit_tls, &mut pos.func.dfg.value_lists);
 
             // Now make an indirect call to `fit_code`.
             // TODO: We don't need the `FuncRef` that was allocated for this callee since we're
             // using an indirect call. We would need to change the `FuncTranslator` interface to
             // deal.
             args.insert(0, fit_code, &mut pos.func.dfg.value_lists);
             let sig = pos.func.dfg.ext_funcs[callee].signature;
             let call = pos
                 .ins()
                 .CallIndirect(ir::Opcode::CallIndirect, ir::types::INVALID, sig, args)
                 .0;
             self.switch_to_wasm_tls_realm(&mut pos);
+            self.reload_tls_and_pinned_regs(&mut pos);
             Ok(call)
         } else {
             // This is a call to a local function.
 
             // Then we need to pass on the VM context pointer.
             let vmctx = pos
                 .func
                 .special_param(ir::ArgumentPurpose::VMContext)
@@ -741,16 +758,17 @@ impl<'a, 'b, 'c> FuncEnvironment for Tra
             .expect("Missing vmctx arg");
         // We must use `func_addr` for symbolic references since the stubs can be far away, and the
         // C++ `SymbolicAccess` linker expects it.
         let addr = pos.ins().func_addr(native_pointer_type(), fnref);
         let call = pos
             .ins()
             .call_indirect(sigref, addr, &[instance, val, vmctx]);
         self.switch_to_wasm_tls_realm(&mut pos);
+        self.reload_tls_and_pinned_regs(&mut pos);
         Ok(pos.func.dfg.first_result(call))
     }
 
     fn translate_memory_size(
         &mut self,
         mut pos: FuncCursor,
         _index: MemoryIndex,
         _heap: ir::Heap,
@@ -773,16 +791,17 @@ impl<'a, 'b, 'c> FuncEnvironment for Tra
         let instance = self.load_instance(&mut pos);
         let vmctx = pos
             .func
             .special_param(ir::ArgumentPurpose::VMContext)
             .expect("Missing vmctx arg");
         let addr = pos.ins().func_addr(native_pointer_type(), fnref);
         let call = pos.ins().call_indirect(sigref, addr, &[instance, vmctx]);
         self.switch_to_wasm_tls_realm(&mut pos);
+        self.reload_tls_and_pinned_regs(&mut pos);
         Ok(pos.func.dfg.first_result(call))
     }
 
     fn translate_loop_header(&mut self, mut pos: FuncCursor) -> WasmResult<()> {
         let interrupt = self.load_interrupt_flag(&mut pos);
         pos.ins().trapnz(interrupt, ir::TrapCode::Interrupt);
         Ok(())
     }