author | Yury Delendik <ydelendik@mozilla.com> |
Wed, 11 Aug 2021 16:44:17 +0000 | |
changeset 588590 | e8b9b45734faec691994a891d482eea710e2c02b |
parent 588589 | 7e892c6e4241bc8eac6ede7638da0fdf16601a2a |
child 588591 | 8363b7c72148a18927edbb24d9e7e147694b4659 |
push id | 147895 |
push user | ydelendik@mozilla.com |
push date | Wed, 11 Aug 2021 16:46:41 +0000 |
treeherder | autoland@e8b9b45734fa [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | rhunt |
bugs | 1597790 |
milestone | 93.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -13008,32 +13008,54 @@ bool BaseCompiler::emitMemCopyInline() { uint32_t length = signedLength; MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength); RegI32 src = popI32(); RegI32 dest = popI32(); // Compute the number of copies of each width we will need to do size_t remainder = length; +#ifdef ENABLE_WASM_SIMD + size_t numCopies16 = remainder / sizeof(V128); + remainder %= sizeof(V128); +#endif #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; // Load all source bytes onto the value stack from low to high using the // widest transfer width we can for the system. We will trap without writing // anything if any source byte is out-of-bounds. bool omitBoundsCheck = false; size_t offset = 0; +#ifdef ENABLE_WASM_SIMD + for (uint32_t i = 0; i < numCopies16; i++) { + RegI32 temp = needI32(); + moveI32(src, temp); + pushI32(temp); + + MemoryAccessDesc access(Scalar::Simd128, 1, offset, bytecodeOffset()); + AccessCheck check; + check.omitBoundsCheck = omitBoundsCheck; + if (!loadCommon(&access, check, ValType::V128)) { + return false; + } + + offset += sizeof(V128); + omitBoundsCheck = true; + } +#endif + #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset()); AccessCheck check; @@ -13170,16 +13192,37 @@ bool BaseCompiler::emitMemCopyInline() { if (!storeCommon(&access, check, ValType::I64)) { return false; } omitBoundsCheck = true; } #endif +#ifdef ENABLE_WASM_SIMD + for (uint32_t i = 0; i < numCopies16; i++) { + offset -= sizeof(V128); + + RegV128 value = popV128(); + RegI32 temp = needI32(); + moveI32(dest, temp); + pushI32(temp); + pushV128(value); + + MemoryAccessDesc access(Scalar::Simd128, 1, offset, bytecodeOffset()); + AccessCheck check; + check.omitBoundsCheck = omitBoundsCheck; + if (!storeCommon(&access, check, ValType::V128)) { + return false; + } + + omitBoundsCheck = true; + } +#endif + freeI32(dest); freeI32(src); return true; } bool BaseCompiler::emitTableCopy() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); @@ -13246,29 +13289,36 @@ bool BaseCompiler::emitMemFillInline() { uint32_t length = uint32_t(signedLength); uint32_t value = uint32_t(signedValue); MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength); RegI32 dest = popI32(); // Compute the number of copies of each width we will need to do size_t remainder = length; +#ifdef ENABLE_WASM_SIMD + size_t numCopies16 = remainder / sizeof(V128); + remainder %= sizeof(V128); +#endif #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; MOZ_ASSERT(numCopies2 <= 1 && numCopies1 <= 1); // Generate splatted definitions for wider fills as needed +#ifdef ENABLE_WASM_SIMD + V128 val16(value); +#endif #ifdef JS_64BIT uint64_t val8 = SplatByteToUInt<uint64_t>(value, 8); #endif uint32_t val4 = SplatByteToUInt<uint32_t>(value, 4); uint32_t val2 = SplatByteToUInt<uint32_t>(value, 2); uint32_t val1 = value; // Store the fill value to the destination from high to low. We will trap @@ -13345,16 +13395,36 @@ bool BaseCompiler::emitMemFillInline() { if (!storeCommon(&access, check, ValType::I64)) { return false; } omitBoundsCheck = true; } #endif +#ifdef ENABLE_WASM_SIMD + for (uint32_t i = 0; i < numCopies16; i++) { + offset -= sizeof(V128); + + RegI32 temp = needI32(); + moveI32(dest, temp); + pushI32(temp); + pushV128(val16); + + MemoryAccessDesc access(Scalar::Simd128, 1, offset, bytecodeOffset()); + AccessCheck check; + check.omitBoundsCheck = omitBoundsCheck; + if (!storeCommon(&access, check, ValType::V128)) { + return false; + } + + omitBoundsCheck = true; + } +#endif + freeI32(dest); return true; } bool BaseCompiler::emitMemInit() { return emitInstanceCallOp<uint32_t>( SASigMemInit32, [this](uint32_t* segIndex) -> bool { uint32_t dstTableIndex;
--- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -3631,32 +3631,48 @@ static bool EmitMemCopyInline(FunctionCo MOZ_ASSERT(MaxInlineMemoryCopyLength != 0); MOZ_ASSERT(len->isConstant() && len->type() == MIRType::Int32); uint32_t length = len->toConstant()->toInt32(); MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength); // Compute the number of copies of each width we will need to do size_t remainder = length; +#ifdef ENABLE_WASM_SIMD + size_t numCopies16 = remainder / sizeof(V128); + remainder %= sizeof(V128); +#endif #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; // Load all source bytes from low to high using the widest transfer width we // can for the system. We will trap without writing anything if any source // byte is out-of-bounds. size_t offset = 0; DefVector loadedValues; +#ifdef ENABLE_WASM_SIMD + for (uint32_t i = 0; i < numCopies16; i++) { + MemoryAccessDesc access(Scalar::Simd128, 1, offset, f.bytecodeOffset()); + auto* load = f.load(src, &access, ValType::V128); + if (!load || !loadedValues.append(load)) { + return false; + } + + offset += sizeof(V128); + } +#endif + #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { MemoryAccessDesc access(Scalar::Int64, 1, offset, f.bytecodeOffset()); auto* load = f.load(src, &access, ValType::I64); if (!load || !loadedValues.append(load)) { return false; } @@ -3726,16 +3742,26 @@ static bool EmitMemCopyInline(FunctionCo offset -= sizeof(uint64_t); MemoryAccessDesc access(Scalar::Int64, 1, offset, f.bytecodeOffset()); auto* value = loadedValues.popCopy(); f.store(dst, &access, value); } #endif +#ifdef ENABLE_WASM_SIMD + for (uint32_t i = 0; i < numCopies16; i++) { + offset -= sizeof(V128); + + MemoryAccessDesc access(Scalar::Simd128, 1, offset, f.bytecodeOffset()); + auto* value = loadedValues.popCopy(); + f.store(dst, &access, value); + } +#endif + return true; } static bool EmitMemCopy(FunctionCompiler& f) { MDefinition *dst, *src, *len; uint32_t dstMemIndex; uint32_t srcMemIndex; if (!f.iter().readMemOrTableCopy(true, &dstMemIndex, &dst, &srcMemIndex, &src, @@ -3878,27 +3904,34 @@ static bool EmitMemFillInline(FunctionCo val->isConstant() && val->type() == MIRType::Int32); uint32_t length = len->toConstant()->toInt32(); uint32_t value = val->toConstant()->toInt32(); MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength); // Compute the number of copies of each width we will need to do size_t remainder = length; +#ifdef ENABLE_WASM_SIMD + size_t numCopies16 = remainder / sizeof(V128); + remainder %= sizeof(V128); +#endif #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; // Generate splatted definitions for wider fills as needed +#ifdef ENABLE_WASM_SIMD + MDefinition* val16 = numCopies16 ? f.constant(V128(value)) : nullptr; +#endif #ifdef JS_64BIT MDefinition* val8 = numCopies8 ? f.constant(int64_t(SplatByteToUInt<uint64_t>(value, 8))) : nullptr; #endif MDefinition* val4 = numCopies4 ? f.constant(Int32Value(SplatByteToUInt<uint32_t>(value, 4)), MIRType::Int32) @@ -3938,16 +3971,25 @@ static bool EmitMemFillInline(FunctionCo for (uint32_t i = 0; i < numCopies8; i++) { offset -= sizeof(uint64_t); MemoryAccessDesc access(Scalar::Int64, 1, offset, f.bytecodeOffset()); f.store(start, &access, val8); } #endif +#ifdef ENABLE_WASM_SIMD + for (uint32_t i = 0; i < numCopies16; i++) { + offset -= sizeof(V128); + + MemoryAccessDesc access(Scalar::Simd128, 1, offset, f.bytecodeOffset()); + f.store(start, &access, val16); + } +#endif + return true; } static bool EmitMemFill(FunctionCompiler& f) { MDefinition *start, *val, *len; if (!f.iter().readMemFill(&start, &val, &len)) { return false; }
--- a/js/src/wasm/WasmValue.h +++ b/js/src/wasm/WasmValue.h @@ -29,16 +29,20 @@ namespace wasm { // A V128 value. struct V128 { uint8_t bytes[16]; // Little-endian V128() { memset(bytes, 0, sizeof(bytes)); } + explicit V128(uint8_t splatValue) { + memset(bytes, int(splatValue), sizeof(bytes)); + } + template <typename T> T extractLane(unsigned lane) const { T result; MOZ_ASSERT(lane < 16 / sizeof(T)); memcpy(&result, bytes + sizeof(T) * lane, sizeof(T)); return result; }