Compiler: Optimize k*n and k+n when types are known (#1529)

When type information is specified, we can compile k*n and k+n into MULK/ADDK forms that are faster to execute, as long as we think n is a number. Since we generally restrict type aware optimizations to O2, this does that as well. This makes trig benchmark ~4% faster on Apple M2 in VM, and also a tiny improvement on scimark (~0.1%) can be observed. The optimization only affects interpreted execution, as NCG already can synthesize optimal code here. If the type information is not truthful (e.g. user annotates type as a number and it's not), the worst case scenario is flipped arguments to metamethods like __add/__mul for constant left hand side. Fixes #626 (the fix requires type information or NCG but I doubt any further work on this is warranted) --------- Co-authored-by: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com>
2025-05-04 10:33:46 +01:00 · 2024-11-20 00:42:44 +09:00 · 2024-11-20 00:42:44 +09:00 · b1b21f395a
commit b1b21f395a
parent c2e4ee0203
2 changed files with 58 additions and 8 deletions
--- a/Compiler/src/Compiler.cpp
+++ b/Compiler/src/Compiler.cpp
@ -27,6 +27,7 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300)
 LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5)

 LUAU_FASTFLAG(LuauNativeAttribute)
+LUAU_FASTFLAGVARIABLE(LuauCompileOptimizeRevArith)

 namespace Luau
 {
@ -1623,6 +1624,24 @@ struct Compiler
                        return;
                    }
                }
+                else if (FFlag::LuauCompileOptimizeRevArith && options.optimizationLevel >= 2 && (expr->op == AstExprBinary::Add || expr->op == AstExprBinary::Mul))
+                {
+                    // Optimization: replace k*r with r*k when r is known to be a number (otherwise metamethods may be called)
+                    if (LuauBytecodeType* ty = exprTypes.find(expr); ty && *ty == LBC_TYPE_NUMBER)
+                    {
+                        int32_t lc = getConstantNumber(expr->left);
+
+                        if (lc >= 0 && lc <= 255)
+                        {
+                            uint8_t rr = compileExprAuto(expr->right, rs);
+
+                            bytecode.emitABC(getBinaryOpArith(expr->op, /* k= */ true), target, rr, uint8_t(lc));
+
+                            hintTemporaryExprRegType(expr->right, rr, LBC_TYPE_NUMBER, /* instLength */ 1);
+                            return;
+                        }
+                    }
+                }

                uint8_t rl = compileExprAuto(expr->left, rs);
                uint8_t rr = compileExprAuto(expr->right, rs);
--- a/tests/Compiler.test.cpp
+++ b/tests/Compiler.test.cpp
@ -23,15 +23,17 @@ LUAU_FASTINT(LuauCompileLoopUnrollThresholdMaxBoost)
 LUAU_FASTINT(LuauRecursionLimit)
 LUAU_FASTFLAG(LuauUserDefinedTypeFunctionsSyntax2)
 LUAU_FASTFLAG(LuauCompileVectorTypeInfo)
+LUAU_FASTFLAG(LuauCompileOptimizeRevArith)

 using namespace Luau;

-static std::string compileFunction(const char* source, uint32_t id, int optimizationLevel = 1, bool enableVectors = false)
+static std::string compileFunction(const char* source, uint32_t id, int optimizationLevel = 1, int typeInfoLevel = 0, bool enableVectors = false)
 {
    Luau::BytecodeBuilder bcb;
    bcb.setDumpFlags(Luau::BytecodeBuilder::Dump_Code);
    Luau::CompileOptions options;
    options.optimizationLevel = optimizationLevel;
+    options.typeInfoLevel = typeInfoLevel;
    if (enableVectors)
    {
        options.vectorLib = "Vector3";
@ -4931,32 +4933,32 @@ L0: RETURN R3 -1

 TEST_CASE("VectorLiterals")
 {
-    CHECK_EQ("\n" + compileFunction("return Vector3.new(1, 2, 3)", 0, 2, /*enableVectors*/ true), R"(
+    CHECK_EQ("\n" + compileFunction("return Vector3.new(1, 2, 3)", 0, 2, 0, /*enableVectors*/ true), R"(
 LOADK R0 K0 [1, 2, 3]
 RETURN R0 1
 )");

-    CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3))", 0, 2, /*enableVectors*/ true), R"(
+    CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3))", 0, 2, 0, /*enableVectors*/ true), R"(
 GETIMPORT R0 1 [print]
 LOADK R1 K2 [1, 2, 3]
 CALL R0 1 0
 RETURN R0 0
 )");

-    CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3, 4))", 0, 2, /*enableVectors*/ true), R"(
+    CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3, 4))", 0, 2, 0, /*enableVectors*/ true), R"(
 GETIMPORT R0 1 [print]
 LOADK R1 K2 [1, 2, 3, 4]
 CALL R0 1 0
 RETURN R0 0
 )");

-    CHECK_EQ("\n" + compileFunction("return Vector3.new(0, 0, 0), Vector3.new(-0, 0, 0)", 0, 2, /*enableVectors*/ true), R"(
+    CHECK_EQ("\n" + compileFunction("return Vector3.new(0, 0, 0), Vector3.new(-0, 0, 0)", 0, 2, 0, /*enableVectors*/ true), R"(
 LOADK R0 K0 [0, 0, 0]
 LOADK R1 K1 [-0, 0, 0]
 RETURN R0 2
 )");

-    CHECK_EQ("\n" + compileFunction("return type(Vector3.new(0, 0, 0))", 0, 2, /*enableVectors*/ true), R"(
+    CHECK_EQ("\n" + compileFunction("return type(Vector3.new(0, 0, 0))", 0, 2, 0, /*enableVectors*/ true), R"(
 LOADK R0 K0 ['vector']
 RETURN R0 1
 )");
@ -8845,8 +8847,9 @@ RETURN R0 1

 TEST_CASE("ArithRevK")
 {
-    // - and / have special optimized form for reverse constants; in the future, + and * will likely get compiled to ADDK/MULK
-    // other operators are not important enough to optimize reverse constant forms for
+    ScopedFastFlag sff(FFlag::LuauCompileOptimizeRevArith, true);
+
+    // - and / have special optimized form for reverse constants; in absence of type information, we can't optimize other ops
    CHECK_EQ(
        "\n" + compileFunction0(R"(
 local x: number = unknown
@ -8867,6 +8870,34 @@ IDIV R6 R7 R0
 LOADN R8 2
 POW R7 R8 R0
 RETURN R1 7
+)"
+    );
+
+    // the same code with type information can optimize commutative operators (+ and *) as well
+    // other operators are not important enough to optimize reverse constant forms for
+    CHECK_EQ(
+        "\n" + compileFunction(
+                   R"(
+local x: number = unknown
+return 2 + x, 2 - x, 2 * x, 2 / x, 2 % x, 2 // x, 2 ^ x
+)",
+                   0,
+                   2,
+                   1
+               ),
+        R"(
+GETIMPORT R0 1 [unknown]
+ADDK R1 R0 K2 [2]
+SUBRK R2 K2 [2] R0
+MULK R3 R0 K2 [2]
+DIVRK R4 K2 [2] R0
+LOADN R6 2
+MOD R5 R6 R0
+LOADN R7 2
+IDIV R6 R7 R0
+LOADN R8 2
+POW R7 R8 R0
+RETURN R1 7
 )"
    );
 }