mirror of
https://github.com/luau-lang/luau.git
synced 2024-12-12 21:10:37 +00:00
80928acb92
Instead of patching the tag component with TVECTOR in every instruction that produces a vector value, we now use a separate IR instruction to do this. This reduces implementation redundancy, but more importantly allows for a class of optimizations: - NUM_TO_VECTOR previously patched the component unconditionally but the result was used only in MUL/DIV_VEC instructions that ignore it anyway; we can now remove this. - ADD_VEC et al can now forward the source of TAG_VECTOR instruction of either input; this shortens the latency chain and in the future could allow us to generate optimal vector instruction sequence once the temporary stores are marked as dead. - In the future on X64, ADD_VEC et al will be able to analyze the input instruction and remove tag masking conditionally. This is not part of this PR as it requires a decision around expected FP environment and/or the necessity of the existing masking to begin with. I've also renamed NUM_TO_VECTOR to NUM_TO_VEC so that "VEC" always refers to "3 float values" and for consistency with ADD/etc. Note: ADD_VEC input forwarding is currently performed unconditionally; it may or may not increase the spills that can't be reloaded from the stack. On A64 this makes the Taylor series computation a tiny bit faster (11.3ns => 11.0ns) as it removes the redundant ins instructions along the NUM_TO_VEC path. Curiously, the optimization of forwarding TAG_VECTOR input to arithmetic instructions actually has a small penalty as without it this PR runs at 10.9 ns. I don't know if this is a property of the benchmark though, as I just noticed that in this benchmark type inference actually fails to infer parts of the computation as a vector op. If desired I will happily omit this part of the change and we can explore that separately.
333 lines
7.9 KiB
C++
333 lines
7.9 KiB
C++
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
#include "lua.h"
|
|
#include "lualib.h"
|
|
|
|
#include "Luau/BytecodeBuilder.h"
|
|
#include "Luau/CodeGen.h"
|
|
#include "Luau/Compiler.h"
|
|
#include "Luau/Parser.h"
|
|
|
|
#include "doctest.h"
|
|
#include "ScopedFlags.h"
|
|
|
|
#include <memory>
|
|
|
|
LUAU_FASTFLAG(LuauCodegenVector)
|
|
LUAU_FASTFLAG(LuauCodegenVectorTag)
|
|
LUAU_FASTFLAG(LuauCodegenMathMemArgs)
|
|
|
|
static std::string getCodegenAssembly(const char* source)
|
|
{
|
|
Luau::CodeGen::AssemblyOptions options;
|
|
|
|
// For IR, we don't care about assembly, but we want a stable target
|
|
options.target = Luau::CodeGen::AssemblyOptions::Target::X64_SystemV;
|
|
|
|
options.outputBinary = false;
|
|
options.includeAssembly = false;
|
|
options.includeIr = true;
|
|
options.includeOutlinedCode = false;
|
|
|
|
options.includeIrPrefix = Luau::CodeGen::IncludeIrPrefix::No;
|
|
options.includeUseInfo = Luau::CodeGen::IncludeUseInfo::No;
|
|
options.includeCfgInfo = Luau::CodeGen::IncludeCfgInfo::No;
|
|
options.includeRegFlowInfo = Luau::CodeGen::IncludeRegFlowInfo::No;
|
|
|
|
Luau::Allocator allocator;
|
|
Luau::AstNameTable names(allocator);
|
|
Luau::ParseResult result = Luau::Parser::parse(source, strlen(source), names, allocator);
|
|
|
|
if (!result.errors.empty())
|
|
throw Luau::ParseErrors(result.errors);
|
|
|
|
Luau::CompileOptions copts = {};
|
|
|
|
copts.optimizationLevel = 2;
|
|
copts.debugLevel = 1;
|
|
copts.vectorCtor = "vector";
|
|
copts.vectorType = "vector";
|
|
|
|
Luau::BytecodeBuilder bcb;
|
|
Luau::compileOrThrow(bcb, result, names, copts);
|
|
|
|
std::string bytecode = bcb.getBytecode();
|
|
std::unique_ptr<lua_State, void (*)(lua_State*)> globalState(luaL_newstate(), lua_close);
|
|
lua_State* L = globalState.get();
|
|
|
|
if (luau_load(L, "name", bytecode.data(), bytecode.size(), 0) == 0)
|
|
return Luau::CodeGen::getAssembly(L, -1, options, nullptr);
|
|
|
|
FAIL("Failed to load bytecode");
|
|
return "";
|
|
}
|
|
|
|
TEST_SUITE_BEGIN("IrLowering");
|
|
|
|
TEST_CASE("VectorReciprocal")
|
|
{
|
|
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
|
|
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
|
|
|
|
CHECK_EQ("\n" + getCodegenAssembly(R"(
|
|
local function vecrcp(a: vector)
|
|
return 1 / a
|
|
end
|
|
)"),
|
|
R"(
|
|
; function vecrcp($arg0) line 2
|
|
bb_0:
|
|
CHECK_TAG R0, tvector, exit(entry)
|
|
JUMP bb_2
|
|
bb_2:
|
|
JUMP bb_bytecode_1
|
|
bb_bytecode_1:
|
|
%6 = NUM_TO_VEC 1
|
|
%7 = LOAD_TVALUE R0
|
|
%8 = DIV_VEC %6, %7
|
|
%9 = TAG_VECTOR %8
|
|
STORE_TVALUE R1, %9
|
|
INTERRUPT 1u
|
|
RETURN R1, 1i
|
|
)");
|
|
}
|
|
|
|
TEST_CASE("VectorComponentRead")
|
|
{
|
|
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
|
|
|
|
CHECK_EQ("\n" + getCodegenAssembly(R"(
|
|
local function compsum(a: vector)
|
|
return a.X + a.Y + a.Z
|
|
end
|
|
)"),
|
|
R"(
|
|
; function compsum($arg0) line 2
|
|
bb_0:
|
|
CHECK_TAG R0, tvector, exit(entry)
|
|
JUMP bb_2
|
|
bb_2:
|
|
JUMP bb_bytecode_1
|
|
bb_bytecode_1:
|
|
%6 = LOAD_FLOAT R0, 0i
|
|
STORE_DOUBLE R3, %6
|
|
STORE_TAG R3, tnumber
|
|
%11 = LOAD_FLOAT R0, 4i
|
|
STORE_DOUBLE R4, %11
|
|
STORE_TAG R4, tnumber
|
|
%20 = ADD_NUM %6, %11
|
|
STORE_DOUBLE R2, %20
|
|
STORE_TAG R2, tnumber
|
|
%25 = LOAD_FLOAT R0, 8i
|
|
STORE_DOUBLE R3, %25
|
|
%34 = ADD_NUM %20, %25
|
|
STORE_DOUBLE R1, %34
|
|
STORE_TAG R1, tnumber
|
|
INTERRUPT 8u
|
|
RETURN R1, 1i
|
|
)");
|
|
}
|
|
|
|
TEST_CASE("VectorAdd")
|
|
{
|
|
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
|
|
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
|
|
|
|
CHECK_EQ("\n" + getCodegenAssembly(R"(
|
|
local function vec3add(a: vector, b: vector)
|
|
return a + b
|
|
end
|
|
)"),
|
|
R"(
|
|
; function vec3add($arg0, $arg1) line 2
|
|
bb_0:
|
|
CHECK_TAG R0, tvector, exit(entry)
|
|
CHECK_TAG R1, tvector, exit(entry)
|
|
JUMP bb_2
|
|
bb_2:
|
|
JUMP bb_bytecode_1
|
|
bb_bytecode_1:
|
|
%10 = LOAD_TVALUE R0
|
|
%11 = LOAD_TVALUE R1
|
|
%12 = ADD_VEC %10, %11
|
|
%13 = TAG_VECTOR %12
|
|
STORE_TVALUE R2, %13
|
|
INTERRUPT 1u
|
|
RETURN R2, 1i
|
|
)");
|
|
}
|
|
|
|
TEST_CASE("VectorMinus")
|
|
{
|
|
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
|
|
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
|
|
|
|
CHECK_EQ("\n" + getCodegenAssembly(R"(
|
|
local function vec3minus(a: vector)
|
|
return -a
|
|
end
|
|
)"),
|
|
R"(
|
|
; function vec3minus($arg0) line 2
|
|
bb_0:
|
|
CHECK_TAG R0, tvector, exit(entry)
|
|
JUMP bb_2
|
|
bb_2:
|
|
JUMP bb_bytecode_1
|
|
bb_bytecode_1:
|
|
%6 = LOAD_TVALUE R0
|
|
%7 = UNM_VEC %6
|
|
%8 = TAG_VECTOR %7
|
|
STORE_TVALUE R1, %8
|
|
INTERRUPT 1u
|
|
RETURN R1, 1i
|
|
)");
|
|
}
|
|
|
|
TEST_CASE("VectorSubMulDiv")
|
|
{
|
|
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
|
|
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
|
|
|
|
CHECK_EQ("\n" + getCodegenAssembly(R"(
|
|
local function vec3combo(a: vector, b: vector, c: vector, d: vector)
|
|
return a * b - c / d
|
|
end
|
|
)"),
|
|
R"(
|
|
; function vec3combo($arg0, $arg1, $arg2, $arg3) line 2
|
|
bb_0:
|
|
CHECK_TAG R0, tvector, exit(entry)
|
|
CHECK_TAG R1, tvector, exit(entry)
|
|
CHECK_TAG R2, tvector, exit(entry)
|
|
CHECK_TAG R3, tvector, exit(entry)
|
|
JUMP bb_2
|
|
bb_2:
|
|
JUMP bb_bytecode_1
|
|
bb_bytecode_1:
|
|
%14 = LOAD_TVALUE R0
|
|
%15 = LOAD_TVALUE R1
|
|
%16 = MUL_VEC %14, %15
|
|
%17 = TAG_VECTOR %16
|
|
STORE_TVALUE R5, %17
|
|
%23 = LOAD_TVALUE R2
|
|
%24 = LOAD_TVALUE R3
|
|
%25 = DIV_VEC %23, %24
|
|
%26 = TAG_VECTOR %25
|
|
STORE_TVALUE R6, %26
|
|
%34 = SUB_VEC %16, %25
|
|
%35 = TAG_VECTOR %34
|
|
STORE_TVALUE R4, %35
|
|
INTERRUPT 3u
|
|
RETURN R4, 1i
|
|
)");
|
|
}
|
|
|
|
TEST_CASE("VectorMulDivMixed")
|
|
{
|
|
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
|
|
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
|
|
|
|
CHECK_EQ("\n" + getCodegenAssembly(R"(
|
|
local function vec3combo(a: vector, b: vector, c: vector, d: vector)
|
|
return a * 2 + b / 4 + 0.5 * c + 40 / d
|
|
end
|
|
)"),
|
|
R"(
|
|
; function vec3combo($arg0, $arg1, $arg2, $arg3) line 2
|
|
bb_0:
|
|
CHECK_TAG R0, tvector, exit(entry)
|
|
CHECK_TAG R1, tvector, exit(entry)
|
|
CHECK_TAG R2, tvector, exit(entry)
|
|
CHECK_TAG R3, tvector, exit(entry)
|
|
JUMP bb_2
|
|
bb_2:
|
|
JUMP bb_bytecode_1
|
|
bb_bytecode_1:
|
|
%12 = LOAD_TVALUE R0
|
|
%13 = NUM_TO_VEC 2
|
|
%14 = MUL_VEC %12, %13
|
|
%15 = TAG_VECTOR %14
|
|
STORE_TVALUE R7, %15
|
|
%19 = LOAD_TVALUE R1
|
|
%20 = NUM_TO_VEC 4
|
|
%21 = DIV_VEC %19, %20
|
|
%22 = TAG_VECTOR %21
|
|
STORE_TVALUE R8, %22
|
|
%30 = ADD_VEC %14, %21
|
|
%31 = TAG_VECTOR %30
|
|
STORE_TVALUE R6, %31
|
|
STORE_DOUBLE R8, 0.5
|
|
STORE_TAG R8, tnumber
|
|
%40 = NUM_TO_VEC 0.5
|
|
%41 = LOAD_TVALUE R2
|
|
%42 = MUL_VEC %40, %41
|
|
%43 = TAG_VECTOR %42
|
|
STORE_TVALUE R7, %43
|
|
%51 = ADD_VEC %30, %42
|
|
%52 = TAG_VECTOR %51
|
|
STORE_TVALUE R5, %52
|
|
%56 = NUM_TO_VEC 40
|
|
%57 = LOAD_TVALUE R3
|
|
%58 = DIV_VEC %56, %57
|
|
%59 = TAG_VECTOR %58
|
|
STORE_TVALUE R6, %59
|
|
%67 = ADD_VEC %51, %58
|
|
%68 = TAG_VECTOR %67
|
|
STORE_TVALUE R4, %68
|
|
INTERRUPT 8u
|
|
RETURN R4, 1i
|
|
)");
|
|
}
|
|
|
|
TEST_CASE("ExtraMathMemoryOperands")
|
|
{
|
|
ScopedFastFlag luauCodegenMathMemArgs{FFlag::LuauCodegenMathMemArgs, true};
|
|
|
|
CHECK_EQ("\n" + getCodegenAssembly(R"(
|
|
local function foo(a: number, b: number, c: number, d: number, e: number)
|
|
return math.floor(a) + math.ceil(b) + math.round(c) + math.sqrt(d) + math.abs(e)
|
|
end
|
|
)"),
|
|
R"(
|
|
; function foo($arg0, $arg1, $arg2, $arg3, $arg4) line 2
|
|
bb_0:
|
|
CHECK_TAG R0, tnumber, exit(entry)
|
|
CHECK_TAG R1, tnumber, exit(entry)
|
|
CHECK_TAG R2, tnumber, exit(entry)
|
|
CHECK_TAG R3, tnumber, exit(entry)
|
|
CHECK_TAG R4, tnumber, exit(entry)
|
|
JUMP bb_2
|
|
bb_2:
|
|
JUMP bb_bytecode_1
|
|
bb_bytecode_1:
|
|
CHECK_SAFE_ENV exit(1)
|
|
%16 = FLOOR_NUM R0
|
|
STORE_DOUBLE R9, %16
|
|
STORE_TAG R9, tnumber
|
|
%23 = CEIL_NUM R1
|
|
STORE_DOUBLE R10, %23
|
|
STORE_TAG R10, tnumber
|
|
%32 = ADD_NUM %16, %23
|
|
STORE_DOUBLE R8, %32
|
|
STORE_TAG R8, tnumber
|
|
%39 = ROUND_NUM R2
|
|
STORE_DOUBLE R9, %39
|
|
%48 = ADD_NUM %32, %39
|
|
STORE_DOUBLE R7, %48
|
|
STORE_TAG R7, tnumber
|
|
%55 = SQRT_NUM R3
|
|
STORE_DOUBLE R8, %55
|
|
%64 = ADD_NUM %48, %55
|
|
STORE_DOUBLE R6, %64
|
|
STORE_TAG R6, tnumber
|
|
%71 = ABS_NUM R4
|
|
STORE_DOUBLE R7, %71
|
|
%80 = ADD_NUM %64, %71
|
|
STORE_DOUBLE R5, %80
|
|
STORE_TAG R5, tnumber
|
|
INTERRUPT 29u
|
|
RETURN R5, 1i
|
|
)");
|
|
}
|
|
|
|
TEST_SUITE_END();
|