From c2e4ee0203b78ce390dadc1de91fb9e69a2ef425 Mon Sep 17 00:00:00 2001 From: Varun Saini <61795485+vrn-sn@users.noreply.github.com> Date: Mon, 18 Nov 2024 04:20:05 -0800 Subject: [PATCH 1/2] Fix benchmark runner bug introduced in release 0.652 (#1530) ### Problem In release 0.652, `RequireResolver` was refactored to add support for `luau-analyze`. As part of this update, `RuntimeRequireContext` introduced a new convention where a file's chunkname must be prefixed with `@` (e.g., `@./some/path.luau`). This change applies to all chunknames generated within `RuntimeRequireContext`. However, when a `.luau` file is executed directly from the command line (e.g., `luau ./my/script.luau`), the chunkname is still generated with the old `=` prefix (e.g., `=./some/path.luau`). Since `RuntimeRequireContext` no longer recognizes chunknames prefixed with `=`, any attempt to directly execute a `.luau` file from the command line fails. For example, running `luau ./my/script.luau` results in an error stating that the context is unsupported. [This issue also affects tools like the benchmark runner](https://github.com/luau-lang/luau/pull/1525#issuecomment-2480454018), which rely on direct file execution. ### Solution Update `runFile` to replace the `=` prefix in generated chunknames with `@`. --- CLI/Repl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CLI/Repl.cpp b/CLI/Repl.cpp index 43aab4e4..3bda38f1 100644 --- a/CLI/Repl.cpp +++ b/CLI/Repl.cpp @@ -712,7 +712,7 @@ static bool runFile(const char* name, lua_State* GL, bool repl) // new thread needs to have the globals sandboxed luaL_sandboxthread(L); - std::string chunkname = "=" + std::string(name); + std::string chunkname = "@" + std::string(name); std::string bytecode = Luau::compile(*source, copts()); int status = 0; From b1b21f395aee3257c32eaaf2b320db613c3bc4d6 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 20 Nov 2024 00:42:44 +0900 Subject: [PATCH 2/2] Compiler: Optimize k*n and k+n when types are known (#1529) When type information is specified, we can compile k*n and k+n into MULK/ADDK forms that are faster to execute, as long as we think n is a number. Since we generally restrict type aware optimizations to O2, this does that as well. This makes trig benchmark ~4% faster on Apple M2 in VM, and also a tiny improvement on scimark (~0.1%) can be observed. The optimization only affects interpreted execution, as NCG already can synthesize optimal code here. If the type information is not truthful (e.g. user annotates type as a number and it's not), the worst case scenario is flipped arguments to metamethods like __add/__mul for constant left hand side. Fixes #626 (the fix requires type information or NCG but I doubt any further work on this is warranted) --------- Co-authored-by: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com> --- Compiler/src/Compiler.cpp | 19 ++++++++++++++++ tests/Compiler.test.cpp | 47 ++++++++++++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/Compiler/src/Compiler.cpp b/Compiler/src/Compiler.cpp index 7fefe607..6b908c27 100644 --- a/Compiler/src/Compiler.cpp +++ b/Compiler/src/Compiler.cpp @@ -27,6 +27,7 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300) LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5) LUAU_FASTFLAG(LuauNativeAttribute) +LUAU_FASTFLAGVARIABLE(LuauCompileOptimizeRevArith) namespace Luau { @@ -1623,6 +1624,24 @@ struct Compiler return; } } + else if (FFlag::LuauCompileOptimizeRevArith && options.optimizationLevel >= 2 && (expr->op == AstExprBinary::Add || expr->op == AstExprBinary::Mul)) + { + // Optimization: replace k*r with r*k when r is known to be a number (otherwise metamethods may be called) + if (LuauBytecodeType* ty = exprTypes.find(expr); ty && *ty == LBC_TYPE_NUMBER) + { + int32_t lc = getConstantNumber(expr->left); + + if (lc >= 0 && lc <= 255) + { + uint8_t rr = compileExprAuto(expr->right, rs); + + bytecode.emitABC(getBinaryOpArith(expr->op, /* k= */ true), target, rr, uint8_t(lc)); + + hintTemporaryExprRegType(expr->right, rr, LBC_TYPE_NUMBER, /* instLength */ 1); + return; + } + } + } uint8_t rl = compileExprAuto(expr->left, rs); uint8_t rr = compileExprAuto(expr->right, rs); diff --git a/tests/Compiler.test.cpp b/tests/Compiler.test.cpp index 9d0824af..b062cbfe 100644 --- a/tests/Compiler.test.cpp +++ b/tests/Compiler.test.cpp @@ -23,15 +23,17 @@ LUAU_FASTINT(LuauCompileLoopUnrollThresholdMaxBoost) LUAU_FASTINT(LuauRecursionLimit) LUAU_FASTFLAG(LuauUserDefinedTypeFunctionsSyntax2) LUAU_FASTFLAG(LuauCompileVectorTypeInfo) +LUAU_FASTFLAG(LuauCompileOptimizeRevArith) using namespace Luau; -static std::string compileFunction(const char* source, uint32_t id, int optimizationLevel = 1, bool enableVectors = false) +static std::string compileFunction(const char* source, uint32_t id, int optimizationLevel = 1, int typeInfoLevel = 0, bool enableVectors = false) { Luau::BytecodeBuilder bcb; bcb.setDumpFlags(Luau::BytecodeBuilder::Dump_Code); Luau::CompileOptions options; options.optimizationLevel = optimizationLevel; + options.typeInfoLevel = typeInfoLevel; if (enableVectors) { options.vectorLib = "Vector3"; @@ -4931,32 +4933,32 @@ L0: RETURN R3 -1 TEST_CASE("VectorLiterals") { - CHECK_EQ("\n" + compileFunction("return Vector3.new(1, 2, 3)", 0, 2, /*enableVectors*/ true), R"( + CHECK_EQ("\n" + compileFunction("return Vector3.new(1, 2, 3)", 0, 2, 0, /*enableVectors*/ true), R"( LOADK R0 K0 [1, 2, 3] RETURN R0 1 )"); - CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3))", 0, 2, /*enableVectors*/ true), R"( + CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3))", 0, 2, 0, /*enableVectors*/ true), R"( GETIMPORT R0 1 [print] LOADK R1 K2 [1, 2, 3] CALL R0 1 0 RETURN R0 0 )"); - CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3, 4))", 0, 2, /*enableVectors*/ true), R"( + CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3, 4))", 0, 2, 0, /*enableVectors*/ true), R"( GETIMPORT R0 1 [print] LOADK R1 K2 [1, 2, 3, 4] CALL R0 1 0 RETURN R0 0 )"); - CHECK_EQ("\n" + compileFunction("return Vector3.new(0, 0, 0), Vector3.new(-0, 0, 0)", 0, 2, /*enableVectors*/ true), R"( + CHECK_EQ("\n" + compileFunction("return Vector3.new(0, 0, 0), Vector3.new(-0, 0, 0)", 0, 2, 0, /*enableVectors*/ true), R"( LOADK R0 K0 [0, 0, 0] LOADK R1 K1 [-0, 0, 0] RETURN R0 2 )"); - CHECK_EQ("\n" + compileFunction("return type(Vector3.new(0, 0, 0))", 0, 2, /*enableVectors*/ true), R"( + CHECK_EQ("\n" + compileFunction("return type(Vector3.new(0, 0, 0))", 0, 2, 0, /*enableVectors*/ true), R"( LOADK R0 K0 ['vector'] RETURN R0 1 )"); @@ -8845,8 +8847,9 @@ RETURN R0 1 TEST_CASE("ArithRevK") { - // - and / have special optimized form for reverse constants; in the future, + and * will likely get compiled to ADDK/MULK - // other operators are not important enough to optimize reverse constant forms for + ScopedFastFlag sff(FFlag::LuauCompileOptimizeRevArith, true); + + // - and / have special optimized form for reverse constants; in absence of type information, we can't optimize other ops CHECK_EQ( "\n" + compileFunction0(R"( local x: number = unknown @@ -8867,6 +8870,34 @@ IDIV R6 R7 R0 LOADN R8 2 POW R7 R8 R0 RETURN R1 7 +)" + ); + + // the same code with type information can optimize commutative operators (+ and *) as well + // other operators are not important enough to optimize reverse constant forms for + CHECK_EQ( + "\n" + compileFunction( + R"( +local x: number = unknown +return 2 + x, 2 - x, 2 * x, 2 / x, 2 % x, 2 // x, 2 ^ x +)", + 0, + 2, + 1 + ), + R"( +GETIMPORT R0 1 [unknown] +ADDK R1 R0 K2 [2] +SUBRK R2 K2 [2] R0 +MULK R3 R0 K2 [2] +DIVRK R4 K2 [2] R0 +LOADN R6 2 +MOD R5 R6 R0 +LOADN R7 2 +IDIV R6 R7 R0 +LOADN R8 2 +POW R7 R8 R0 +RETURN R1 7 )" ); }