diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e6a01255..7a2b5f10 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,6 +25,9 @@ jobs: runs-on: ${{matrix.os.version}} steps: - uses: actions/checkout@v1 + - name: work around ASLR+ASAN compatibility + run: sudo sysctl -w vm.mmap_rnd_bits=28 + if: matrix.os.name == 'ubuntu' - name: make tests run: | make -j2 config=sanitize werror=1 native=1 luau-tests diff --git a/CLI/Repl.cpp b/CLI/Repl.cpp index 84e4a654..d1122ae6 100644 --- a/CLI/Repl.cpp +++ b/CLI/Repl.cpp @@ -726,7 +726,7 @@ static void displayHelp(const char* argv0) printf(" --profile[=N]: profile the code using N Hz sampling (default 10000) and output results to profile.out\n"); printf(" --timetrace: record compiler time tracing information into trace.json\n"); printf(" --codegen: execute code using native code generation\n"); - printf(" --program-args,-a: declare start of arguments to be passed to the Luau program"); + printf(" --program-args,-a: declare start of arguments to be passed to the Luau program\n"); } static int assertionHandler(const char* expr, const char* file, int line, const char* function) diff --git a/CodeGen/include/Luau/AssemblyBuilderA64.h b/CodeGen/include/Luau/AssemblyBuilderA64.h index bea70fd0..a4d857a4 100644 --- a/CodeGen/include/Luau/AssemblyBuilderA64.h +++ b/CodeGen/include/Luau/AssemblyBuilderA64.h @@ -125,12 +125,12 @@ public: // Address of code (label) void adr(RegisterA64 dst, Label& label); - // Floating-point scalar moves + // Floating-point scalar/vector moves // Note: constant must be compatible with immediate floating point moves (see isFmovSupported) void fmov(RegisterA64 dst, RegisterA64 src); void fmov(RegisterA64 dst, double src); - // Floating-point scalar math + // Floating-point scalar/vector math void fabs(RegisterA64 dst, RegisterA64 src); void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); @@ -139,6 +139,7 @@ public: void fsqrt(RegisterA64 dst, RegisterA64 src); void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2); + // Vector component manipulation void ins_4s(RegisterA64 dst, RegisterA64 src, uint8_t index); void ins_4s(RegisterA64 dst, uint8_t dstIndex, RegisterA64 src, uint8_t srcIndex); void dup_4s(RegisterA64 dst, RegisterA64 src, uint8_t index); diff --git a/CodeGen/include/Luau/IrData.h b/CodeGen/include/Luau/IrData.h index 79d06e5a..a950370b 100644 --- a/CodeGen/include/Luau/IrData.h +++ b/CodeGen/include/Luau/IrData.h @@ -59,7 +59,8 @@ enum class IrCmd : uint8_t // Load a TValue from memory // A: Rn or Kn or pointer (TValue) - // B: int (optional 'A' pointer offset) + // B: int/none (optional 'A' pointer offset) + // C: tag/none (tag of the value being loaded) LOAD_TVALUE, // Load current environment table diff --git a/CodeGen/src/AssemblyBuilderA64.cpp b/CodeGen/src/AssemblyBuilderA64.cpp index ffb0a774..9d0522c0 100644 --- a/CodeGen/src/AssemblyBuilderA64.cpp +++ b/CodeGen/src/AssemblyBuilderA64.cpp @@ -557,16 +557,26 @@ void AssemblyBuilderA64::fmov(RegisterA64 dst, RegisterA64 src) void AssemblyBuilderA64::fmov(RegisterA64 dst, double src) { - CODEGEN_ASSERT(dst.kind == KindA64::d); + CODEGEN_ASSERT(dst.kind == KindA64::d || dst.kind == KindA64::q); int imm = getFmovImm(src); CODEGEN_ASSERT(imm >= 0 && imm <= 256); - // fmov can't encode 0, but movi can; movi is otherwise not useful for 64-bit fp immediates because it encodes repeating patterns - if (imm == 256) - placeFMOV("movi", dst, src, 0b001'0111100000'000'1110'01'00000); + // fmov can't encode 0, but movi can; movi is otherwise not useful for fp immediates because it encodes repeating patterns + if (dst.kind == KindA64::d) + { + if (imm == 256) + placeFMOV("movi", dst, src, 0b001'0111100000'000'1110'01'00000); + else + placeFMOV("fmov", dst, src, 0b000'11110'01'1'00000000'100'00000 | (imm << 8)); + } else - placeFMOV("fmov", dst, src, 0b000'11110'01'1'00000000'100'00000 | (imm << 8)); + { + if (imm == 256) + placeFMOV("movi.4s", dst, src, 0b010'0111100000'000'0000'01'00000); + else + placeFMOV("fmov.4s", dst, src, 0b010'0111100000'000'1111'0'1'00000 | ((imm >> 5) << 11) | (imm & 31)); + } } void AssemblyBuilderA64::fabs(RegisterA64 dst, RegisterA64 src) diff --git a/CodeGen/src/IrLoweringA64.cpp b/CodeGen/src/IrLoweringA64.cpp index 2a296949..284cef4d 100644 --- a/CodeGen/src/IrLoweringA64.cpp +++ b/CodeGen/src/IrLoweringA64.cpp @@ -12,6 +12,7 @@ #include "lgc.h" LUAU_FASTFLAGVARIABLE(LuauCodeGenVectorA64, false) +LUAU_FASTFLAGVARIABLE(LuauCodeGenOptVecA64, false) LUAU_FASTFLAG(LuauCodegenVectorTag2) @@ -1176,17 +1177,40 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) { inst.regA64 = regs.allocReg(KindA64::q, index); - RegisterA64 tempd = tempDouble(inst.a); - RegisterA64 temps = castReg(KindA64::s, tempd); - RegisterA64 tempw = regs.allocTemp(KindA64::w); - - build.fcvt(temps, tempd); - build.dup_4s(inst.regA64, castReg(KindA64::q, temps), 0); - - if (!FFlag::LuauCodegenVectorTag2) + if (FFlag::LuauCodeGenOptVecA64 && FFlag::LuauCodegenVectorTag2 && inst.a.kind == IrOpKind::Constant) { - build.mov(tempw, LUA_TVECTOR); - build.ins_4s(inst.regA64, tempw, 3); + float value = float(doubleOp(inst.a)); + uint32_t asU32; + static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit"); + memcpy(&asU32, &value, sizeof(value)); + + if (AssemblyBuilderA64::isFmovSupported(value)) + { + build.fmov(inst.regA64, value); + } + else + { + RegisterA64 temp = regs.allocTemp(KindA64::x); + + uint32_t vec[4] = { asU32, asU32, asU32, 0 }; + build.adr(temp, vec, sizeof(vec)); + build.ldr(inst.regA64, temp); + } + } + else + { + RegisterA64 tempd = tempDouble(inst.a); + RegisterA64 temps = castReg(KindA64::s, tempd); + RegisterA64 tempw = regs.allocTemp(KindA64::w); + + build.fcvt(temps, tempd); + build.dup_4s(inst.regA64, castReg(KindA64::q, temps), 0); + + if (!FFlag::LuauCodegenVectorTag2) + { + build.mov(tempw, LUA_TVECTOR); + build.ins_4s(inst.regA64, tempw, 3); + } } break; } diff --git a/CodeGen/src/IrTranslation.cpp b/CodeGen/src/IrTranslation.cpp index 995225a6..5d55c877 100644 --- a/CodeGen/src/IrTranslation.cpp +++ b/CodeGen/src/IrTranslation.cpp @@ -14,6 +14,7 @@ LUAU_FASTFLAGVARIABLE(LuauCodegenVectorTag2, false) LUAU_FASTFLAGVARIABLE(LuauCodegenVectorTag, false) +LUAU_FASTFLAGVARIABLE(LuauCodegenLoadTVTag, false) namespace Luau { @@ -111,6 +112,13 @@ static void translateInstLoadConstant(IrBuilder& build, int ra, int k) build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), build.constDouble(protok.value.n)); build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER)); } + else if (FFlag::LuauCodegenLoadTVTag) + { + // Tag could be LUA_TSTRING or LUA_TVECTOR; for TSTRING we could generate LOAD_POINTER/STORE_POINTER/STORE_TAG, but it's not profitable; + // however, it's still valuable to preserve the tag throughout the optimization pipeline to eliminate tag checks. + IrOp load = build.inst(IrCmd::LOAD_TVALUE, build.vmConst(k), build.constInt(0), build.constTag(protok.tt)); + build.inst(IrCmd::STORE_TVALUE, build.vmReg(ra), load); + } else { // Remaining tag here right now is LUA_TSTRING, while it can be transformed to LOAD_POINTER/STORE_POINTER/STORE_TAG, it's not profitable right diff --git a/CodeGen/src/OptimizeConstProp.cpp b/CodeGen/src/OptimizeConstProp.cpp index d765b800..ff4f7bfc 100644 --- a/CodeGen/src/OptimizeConstProp.cpp +++ b/CodeGen/src/OptimizeConstProp.cpp @@ -19,6 +19,7 @@ LUAU_FASTINTVARIABLE(LuauCodeGenReuseSlotLimit, 64) LUAU_FASTFLAGVARIABLE(DebugLuauAbortingChecks, false) LUAU_FASTFLAG(LuauCodegenVectorTag2) LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauCodeGenCoverForgprepEffect, false) +LUAU_FASTFLAG(LuauCodegenLoadTVTag) namespace Luau { @@ -726,6 +727,9 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& arg->cmd == IrCmd::UNM_VEC) tag = LUA_TVECTOR; } + + if (FFlag::LuauCodegenLoadTVTag && arg->cmd == IrCmd::LOAD_TVALUE && arg->c.kind != IrOpKind::None) + tag = function.tagOp(arg->c); } } diff --git a/tests/AssemblyBuilderA64.test.cpp b/tests/AssemblyBuilderA64.test.cpp index 320a7a6a..3942003b 100644 --- a/tests/AssemblyBuilderA64.test.cpp +++ b/tests/AssemblyBuilderA64.test.cpp @@ -451,6 +451,12 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "FPImm") SINGLE_COMPARE(fmov(d0, 0), 0x2F00E400); SINGLE_COMPARE(fmov(d0, 0.125), 0x1E681000); SINGLE_COMPARE(fmov(d0, -0.125), 0x1E781000); + SINGLE_COMPARE(fmov(d0, 1.9375), 0x1E6FF000); + + SINGLE_COMPARE(fmov(q0, 0), 0x4F000400); + SINGLE_COMPARE(fmov(q0, 0.125), 0x4F02F400); + SINGLE_COMPARE(fmov(q0, -0.125), 0x4F06F400); + SINGLE_COMPARE(fmov(q0, 1.9375), 0x4F03F7E0); CHECK(!AssemblyBuilderA64::isFmovSupported(-0.0)); CHECK(!AssemblyBuilderA64::isFmovSupported(0.12389)); diff --git a/tests/conformance/vector.lua b/tests/conformance/vector.lua index c9cc47aa..9be88f69 100644 --- a/tests/conformance/vector.lua +++ b/tests/conformance/vector.lua @@ -51,6 +51,12 @@ assert(8 * vector(8, 16, 24) == vector(64, 128, 192)); assert(vector(1, 2, 4) * '8' == vector(8, 16, 32)); assert('8' * vector(8, 16, 24) == vector(64, 128, 192)); +assert(vector(1, 2, 4) * -0.125 == vector(-0.125, -0.25, -0.5)) +assert(-0.125 * vector(1, 2, 4) == vector(-0.125, -0.25, -0.5)) + +assert(vector(1, 2, 4) * 100 == vector(100, 200, 400)) +assert(100 * vector(1, 2, 4) == vector(100, 200, 400)) + if vector_size == 4 then assert(vector(1, 2, 4, 8) / vector(8, 16, 24, 32) == vector(1/8, 2/16, 4/24, 8/32)); assert(8 / vector(8, 16, 24, 32) == vector(1, 1/2, 1/3, 1/4));