luau/tests/IrLowering.test.cpp
Arseny Kapoulkine 80928acb92
CodeGen: Extract all vector tag patching into TAG_VECTOR (#1171)
Instead of patching the tag component with TVECTOR in every instruction
that produces a vector value, we now use a separate IR instruction to do
this. This reduces implementation redundancy, but more importantly
allows for a class of optimizations:

- NUM_TO_VECTOR previously patched the component unconditionally but the
result was used only in MUL/DIV_VEC instructions that ignore it anyway;
we can now remove this.

- ADD_VEC et al can now forward the source of TAG_VECTOR instruction of
either input; this shortens the latency chain and in the future could
allow us to generate optimal vector instruction sequence once the
temporary stores are marked as dead.

- In the future on X64, ADD_VEC et al will be able to analyze the input
instruction and remove tag masking conditionally. This is not part of
this PR as it requires a decision around expected FP environment and/or
the necessity of the existing masking to begin with.

I've also renamed NUM_TO_VECTOR to NUM_TO_VEC so that "VEC" always
refers to "3 float values" and for consistency with ADD/etc.

Note: ADD_VEC input forwarding is currently performed unconditionally;
it may or may not increase the spills that can't be reloaded from the
stack.

On A64 this makes the Taylor series computation a tiny bit faster
(11.3ns => 11.0ns) as it removes the redundant ins instructions along
the NUM_TO_VEC path. Curiously, the optimization of forwarding
TAG_VECTOR input to arithmetic instructions actually has a small penalty
as without it this PR runs at 10.9 ns. I don't know if this is a
property of the benchmark though, as I just noticed that in this
benchmark type inference actually fails to infer parts of the
computation as a vector op. If desired I will happily omit this part of
the change and we can explore that separately.
2024-02-21 07:06:11 -08:00

333 lines
7.9 KiB
C++

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "lua.h"
#include "lualib.h"
#include "Luau/BytecodeBuilder.h"
#include "Luau/CodeGen.h"
#include "Luau/Compiler.h"
#include "Luau/Parser.h"
#include "doctest.h"
#include "ScopedFlags.h"
#include <memory>
LUAU_FASTFLAG(LuauCodegenVector)
LUAU_FASTFLAG(LuauCodegenVectorTag)
LUAU_FASTFLAG(LuauCodegenMathMemArgs)
static std::string getCodegenAssembly(const char* source)
{
Luau::CodeGen::AssemblyOptions options;
// For IR, we don't care about assembly, but we want a stable target
options.target = Luau::CodeGen::AssemblyOptions::Target::X64_SystemV;
options.outputBinary = false;
options.includeAssembly = false;
options.includeIr = true;
options.includeOutlinedCode = false;
options.includeIrPrefix = Luau::CodeGen::IncludeIrPrefix::No;
options.includeUseInfo = Luau::CodeGen::IncludeUseInfo::No;
options.includeCfgInfo = Luau::CodeGen::IncludeCfgInfo::No;
options.includeRegFlowInfo = Luau::CodeGen::IncludeRegFlowInfo::No;
Luau::Allocator allocator;
Luau::AstNameTable names(allocator);
Luau::ParseResult result = Luau::Parser::parse(source, strlen(source), names, allocator);
if (!result.errors.empty())
throw Luau::ParseErrors(result.errors);
Luau::CompileOptions copts = {};
copts.optimizationLevel = 2;
copts.debugLevel = 1;
copts.vectorCtor = "vector";
copts.vectorType = "vector";
Luau::BytecodeBuilder bcb;
Luau::compileOrThrow(bcb, result, names, copts);
std::string bytecode = bcb.getBytecode();
std::unique_ptr<lua_State, void (*)(lua_State*)> globalState(luaL_newstate(), lua_close);
lua_State* L = globalState.get();
if (luau_load(L, "name", bytecode.data(), bytecode.size(), 0) == 0)
return Luau::CodeGen::getAssembly(L, -1, options, nullptr);
FAIL("Failed to load bytecode");
return "";
}
TEST_SUITE_BEGIN("IrLowering");
TEST_CASE("VectorReciprocal")
{
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
CHECK_EQ("\n" + getCodegenAssembly(R"(
local function vecrcp(a: vector)
return 1 / a
end
)"),
R"(
; function vecrcp($arg0) line 2
bb_0:
CHECK_TAG R0, tvector, exit(entry)
JUMP bb_2
bb_2:
JUMP bb_bytecode_1
bb_bytecode_1:
%6 = NUM_TO_VEC 1
%7 = LOAD_TVALUE R0
%8 = DIV_VEC %6, %7
%9 = TAG_VECTOR %8
STORE_TVALUE R1, %9
INTERRUPT 1u
RETURN R1, 1i
)");
}
TEST_CASE("VectorComponentRead")
{
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
CHECK_EQ("\n" + getCodegenAssembly(R"(
local function compsum(a: vector)
return a.X + a.Y + a.Z
end
)"),
R"(
; function compsum($arg0) line 2
bb_0:
CHECK_TAG R0, tvector, exit(entry)
JUMP bb_2
bb_2:
JUMP bb_bytecode_1
bb_bytecode_1:
%6 = LOAD_FLOAT R0, 0i
STORE_DOUBLE R3, %6
STORE_TAG R3, tnumber
%11 = LOAD_FLOAT R0, 4i
STORE_DOUBLE R4, %11
STORE_TAG R4, tnumber
%20 = ADD_NUM %6, %11
STORE_DOUBLE R2, %20
STORE_TAG R2, tnumber
%25 = LOAD_FLOAT R0, 8i
STORE_DOUBLE R3, %25
%34 = ADD_NUM %20, %25
STORE_DOUBLE R1, %34
STORE_TAG R1, tnumber
INTERRUPT 8u
RETURN R1, 1i
)");
}
TEST_CASE("VectorAdd")
{
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
CHECK_EQ("\n" + getCodegenAssembly(R"(
local function vec3add(a: vector, b: vector)
return a + b
end
)"),
R"(
; function vec3add($arg0, $arg1) line 2
bb_0:
CHECK_TAG R0, tvector, exit(entry)
CHECK_TAG R1, tvector, exit(entry)
JUMP bb_2
bb_2:
JUMP bb_bytecode_1
bb_bytecode_1:
%10 = LOAD_TVALUE R0
%11 = LOAD_TVALUE R1
%12 = ADD_VEC %10, %11
%13 = TAG_VECTOR %12
STORE_TVALUE R2, %13
INTERRUPT 1u
RETURN R2, 1i
)");
}
TEST_CASE("VectorMinus")
{
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
CHECK_EQ("\n" + getCodegenAssembly(R"(
local function vec3minus(a: vector)
return -a
end
)"),
R"(
; function vec3minus($arg0) line 2
bb_0:
CHECK_TAG R0, tvector, exit(entry)
JUMP bb_2
bb_2:
JUMP bb_bytecode_1
bb_bytecode_1:
%6 = LOAD_TVALUE R0
%7 = UNM_VEC %6
%8 = TAG_VECTOR %7
STORE_TVALUE R1, %8
INTERRUPT 1u
RETURN R1, 1i
)");
}
TEST_CASE("VectorSubMulDiv")
{
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
CHECK_EQ("\n" + getCodegenAssembly(R"(
local function vec3combo(a: vector, b: vector, c: vector, d: vector)
return a * b - c / d
end
)"),
R"(
; function vec3combo($arg0, $arg1, $arg2, $arg3) line 2
bb_0:
CHECK_TAG R0, tvector, exit(entry)
CHECK_TAG R1, tvector, exit(entry)
CHECK_TAG R2, tvector, exit(entry)
CHECK_TAG R3, tvector, exit(entry)
JUMP bb_2
bb_2:
JUMP bb_bytecode_1
bb_bytecode_1:
%14 = LOAD_TVALUE R0
%15 = LOAD_TVALUE R1
%16 = MUL_VEC %14, %15
%17 = TAG_VECTOR %16
STORE_TVALUE R5, %17
%23 = LOAD_TVALUE R2
%24 = LOAD_TVALUE R3
%25 = DIV_VEC %23, %24
%26 = TAG_VECTOR %25
STORE_TVALUE R6, %26
%34 = SUB_VEC %16, %25
%35 = TAG_VECTOR %34
STORE_TVALUE R4, %35
INTERRUPT 3u
RETURN R4, 1i
)");
}
TEST_CASE("VectorMulDivMixed")
{
ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true};
ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true};
CHECK_EQ("\n" + getCodegenAssembly(R"(
local function vec3combo(a: vector, b: vector, c: vector, d: vector)
return a * 2 + b / 4 + 0.5 * c + 40 / d
end
)"),
R"(
; function vec3combo($arg0, $arg1, $arg2, $arg3) line 2
bb_0:
CHECK_TAG R0, tvector, exit(entry)
CHECK_TAG R1, tvector, exit(entry)
CHECK_TAG R2, tvector, exit(entry)
CHECK_TAG R3, tvector, exit(entry)
JUMP bb_2
bb_2:
JUMP bb_bytecode_1
bb_bytecode_1:
%12 = LOAD_TVALUE R0
%13 = NUM_TO_VEC 2
%14 = MUL_VEC %12, %13
%15 = TAG_VECTOR %14
STORE_TVALUE R7, %15
%19 = LOAD_TVALUE R1
%20 = NUM_TO_VEC 4
%21 = DIV_VEC %19, %20
%22 = TAG_VECTOR %21
STORE_TVALUE R8, %22
%30 = ADD_VEC %14, %21
%31 = TAG_VECTOR %30
STORE_TVALUE R6, %31
STORE_DOUBLE R8, 0.5
STORE_TAG R8, tnumber
%40 = NUM_TO_VEC 0.5
%41 = LOAD_TVALUE R2
%42 = MUL_VEC %40, %41
%43 = TAG_VECTOR %42
STORE_TVALUE R7, %43
%51 = ADD_VEC %30, %42
%52 = TAG_VECTOR %51
STORE_TVALUE R5, %52
%56 = NUM_TO_VEC 40
%57 = LOAD_TVALUE R3
%58 = DIV_VEC %56, %57
%59 = TAG_VECTOR %58
STORE_TVALUE R6, %59
%67 = ADD_VEC %51, %58
%68 = TAG_VECTOR %67
STORE_TVALUE R4, %68
INTERRUPT 8u
RETURN R4, 1i
)");
}
TEST_CASE("ExtraMathMemoryOperands")
{
ScopedFastFlag luauCodegenMathMemArgs{FFlag::LuauCodegenMathMemArgs, true};
CHECK_EQ("\n" + getCodegenAssembly(R"(
local function foo(a: number, b: number, c: number, d: number, e: number)
return math.floor(a) + math.ceil(b) + math.round(c) + math.sqrt(d) + math.abs(e)
end
)"),
R"(
; function foo($arg0, $arg1, $arg2, $arg3, $arg4) line 2
bb_0:
CHECK_TAG R0, tnumber, exit(entry)
CHECK_TAG R1, tnumber, exit(entry)
CHECK_TAG R2, tnumber, exit(entry)
CHECK_TAG R3, tnumber, exit(entry)
CHECK_TAG R4, tnumber, exit(entry)
JUMP bb_2
bb_2:
JUMP bb_bytecode_1
bb_bytecode_1:
CHECK_SAFE_ENV exit(1)
%16 = FLOOR_NUM R0
STORE_DOUBLE R9, %16
STORE_TAG R9, tnumber
%23 = CEIL_NUM R1
STORE_DOUBLE R10, %23
STORE_TAG R10, tnumber
%32 = ADD_NUM %16, %23
STORE_DOUBLE R8, %32
STORE_TAG R8, tnumber
%39 = ROUND_NUM R2
STORE_DOUBLE R9, %39
%48 = ADD_NUM %32, %39
STORE_DOUBLE R7, %48
STORE_TAG R7, tnumber
%55 = SQRT_NUM R3
STORE_DOUBLE R8, %55
%64 = ADD_NUM %48, %55
STORE_DOUBLE R6, %64
STORE_TAG R6, tnumber
%71 = ABS_NUM R4
STORE_DOUBLE R7, %71
%80 = ADD_NUM %64, %71
STORE_DOUBLE R5, %80
STORE_TAG R5, tnumber
INTERRUPT 29u
RETURN R5, 1i
)");
}
TEST_SUITE_END();