luau/CodeGen/src/IrLoweringX64.cpp
ariel 640ebbc0a5
Sync to upstream/release/663 (#1699)
Hey folks, another week means another Luau release! This one features a
number of bug fixes in the New Type Solver including improvements to
user-defined type functions and a bunch of work to untangle some of the
outstanding issues we've been seeing with constraint solving not
completing in real world use. We're also continuing to make progress on
crashes and other problems that affect the stability of fragment
autocomplete, as we work towards delivering consistent, low-latency
autocomplete for any editor environment.

## New Type Solver

- Fix a bug in user-defined type functions where `print` would
incorrectly insert `\1` a number of times.
- Fix a bug where attempting to refine an optional generic with a type
test will cause a false positive type error (fixes #1666)
- Fix a bug where the `refine` type family would not skip over
`*no-refine*` discriminants (partial resolution for #1424)
- Fix a constraint solving bug where recursive function calls would
consistently produce cyclic constraints leading to incomplete or
inaccurate type inference.
- Implement `readparent` and `writeparent` for class types in
user-defined type functions, replacing the incorrectly included `parent`
method.
- Add initial groundwork (under a debug flag) for eager free type
generalization, moving us towards further improvements to constraint
solving incomplete errors.

## Fragment Autocomplete

- Ease up some assertions to improve stability of mixed-mode use of the
two type solvers (i.e. using Fragment Autocomplete on a type graph
originally produced by the old type solver)
- Resolve a bug with type compatibility checks causing internal compiler
errors in autocomplete.

## Lexer and Parser

- Improve the accuracy of the roundtrippable AST parsing mode by
correctly placing closing parentheses on type groupings.
- Add a getter for `offset` in the Lexer by @aduermael in #1688
- Add a second entry point to the parser to parse an expression,
`parseExpr`

## Internal Contributors

Co-authored-by: Andy Friesen <afriesen@roblox.com>
Co-authored-by: Ariel Weiss <aaronweiss@roblox.com>
Co-authored-by: Aviral Goel <agoel@roblox.com>
Co-authored-by: Hunter Goldstein <hgoldstein@roblox.com>
Co-authored-by: James McNellis <jmcnellis@roblox.com>
Co-authored-by: Talha Pathan <tpathan@roblox.com>
Co-authored-by: Vighnesh Vijay <vvijay@roblox.com>
Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>

---------

Co-authored-by: Hunter Goldstein <hgoldstein@roblox.com>
Co-authored-by: Varun Saini <61795485+vrn-sn@users.noreply.github.com>
Co-authored-by: Alexander Youngblood <ayoungblood@roblox.com>
Co-authored-by: Menarul Alam <malam@roblox.com>
Co-authored-by: Aviral Goel <agoel@roblox.com>
Co-authored-by: Vighnesh <vvijay@roblox.com>
Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2025-02-28 14:42:30 -08:00

2382 lines
82 KiB
C++

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrLoweringX64.h"
#include "Luau/DenseHash.h"
#include "Luau/IrData.h"
#include "Luau/IrUtils.h"
#include "Luau/LoweringStats.h"
#include "Luau/IrCallWrapperX64.h"
#include "EmitBuiltinsX64.h"
#include "EmitCommonX64.h"
#include "EmitInstructionX64.h"
#include "NativeState.h"
#include "lstate.h"
#include "lgc.h"
LUAU_FASTFLAG(LuauVectorLibNativeDot)
namespace Luau
{
namespace CodeGen
{
namespace X64
{
IrLoweringX64::IrLoweringX64(AssemblyBuilderX64& build, ModuleHelpers& helpers, IrFunction& function, LoweringStats* stats)
: build(build)
, helpers(helpers)
, function(function)
, stats(stats)
, regs(build, function, stats)
, valueTracker(function)
, exitHandlerMap(~0u)
{
valueTracker.setRestoreCallack(
&regs,
[](void* context, IrInst& inst)
{
((IrRegAllocX64*)context)->restore(inst, false);
}
);
build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
}
void IrLoweringX64::storeDoubleAsFloat(OperandX64 dst, IrOp src)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
if (src.kind == IrOpKind::Constant)
{
build.vmovss(tmp.reg, build.f32(float(doubleOp(src))));
}
else if (src.kind == IrOpKind::Inst)
{
build.vcvtsd2ss(tmp.reg, regOp(src), regOp(src));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
build.vmovss(dst, tmp.reg);
}
void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
{
regs.currInstIdx = index;
valueTracker.beforeInstLowering(inst);
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
inst.regX64 = regs.allocReg(SizeX64::dword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.mov(inst.regX64, luauRegTag(vmRegOp(inst.a)));
else if (inst.a.kind == IrOpKind::VmConst)
build.mov(inst.regX64, luauConstantTag(vmConstOp(inst.a)));
// If we have a register, we assume it's a pointer to TValue
// We might introduce explicit operand types in the future to make this more robust
else if (inst.a.kind == IrOpKind::Inst)
build.mov(inst.regX64, dword[regOp(inst.a) + offsetof(TValue, tt)]);
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_POINTER:
inst.regX64 = regs.allocReg(SizeX64::qword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.mov(inst.regX64, luauRegValue(vmRegOp(inst.a)));
else if (inst.a.kind == IrOpKind::VmConst)
build.mov(inst.regX64, luauConstantValue(vmConstOp(inst.a)));
// If we have a register, we assume it's a pointer to TValue
// We might introduce explicit operand types in the future to make this more robust
else if (inst.a.kind == IrOpKind::Inst)
build.mov(inst.regX64, qword[regOp(inst.a) + offsetof(TValue, value)]);
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_DOUBLE:
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.vmovsd(inst.regX64, luauRegValue(vmRegOp(inst.a)));
else if (inst.a.kind == IrOpKind::VmConst)
build.vmovsd(inst.regX64, luauConstantValue(vmConstOp(inst.a)));
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_INT:
inst.regX64 = regs.allocReg(SizeX64::dword, index);
build.mov(inst.regX64, luauRegValueInt(vmRegOp(inst.a)));
break;
case IrCmd::LOAD_FLOAT:
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
if (inst.a.kind == IrOpKind::VmReg)
build.vcvtss2sd(inst.regX64, inst.regX64, dword[rBase + vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value) + intOp(inst.b)]);
else if (inst.a.kind == IrOpKind::VmConst)
build.vcvtss2sd(
inst.regX64, inst.regX64, dword[rConstants + vmConstOp(inst.a) * sizeof(TValue) + offsetof(TValue, value) + intOp(inst.b)]
);
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::LOAD_TVALUE:
{
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
int addrOffset = inst.b.kind != IrOpKind::None ? intOp(inst.b) : 0;
if (inst.a.kind == IrOpKind::VmReg)
build.vmovups(inst.regX64, luauReg(vmRegOp(inst.a)));
else if (inst.a.kind == IrOpKind::VmConst)
build.vmovups(inst.regX64, luauConstant(vmConstOp(inst.a)));
else if (inst.a.kind == IrOpKind::Inst)
build.vmovups(inst.regX64, xmmword[regOp(inst.a) + addrOffset]);
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
}
case IrCmd::LOAD_ENV:
inst.regX64 = regs.allocReg(SizeX64::qword, index);
build.mov(inst.regX64, sClosure);
build.mov(inst.regX64, qword[inst.regX64 + offsetof(Closure, env)]);
break;
case IrCmd::GET_ARR_ADDR:
if (inst.b.kind == IrOpKind::Inst)
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {inst.b});
if (dwordReg(inst.regX64) != regOp(inst.b))
build.mov(dwordReg(inst.regX64), regOp(inst.b));
build.shl(dwordReg(inst.regX64), kTValueSizeLog2);
build.add(inst.regX64, qword[regOp(inst.a) + offsetof(LuaTable, array)]);
}
else if (inst.b.kind == IrOpKind::Constant)
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {inst.a});
build.mov(inst.regX64, qword[regOp(inst.a) + offsetof(LuaTable, array)]);
if (intOp(inst.b) != 0)
build.lea(inst.regX64, addr[inst.regX64 + intOp(inst.b) * sizeof(TValue)]);
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
case IrCmd::GET_SLOT_NODE_ADDR:
{
inst.regX64 = regs.allocReg(SizeX64::qword, index);
ScopedRegX64 tmp{regs, SizeX64::qword};
getTableNodeAtCachedSlot(build, tmp.reg, inst.regX64, regOp(inst.a), uintOp(inst.b));
break;
}
case IrCmd::GET_HASH_NODE_ADDR:
{
// Custom bit shift value can only be placed in cl
ScopedRegX64 shiftTmp{regs, regs.takeReg(rcx, kInvalidInstIdx)};
inst.regX64 = regs.allocReg(SizeX64::qword, index);
ScopedRegX64 tmp{regs, SizeX64::qword};
build.mov(inst.regX64, qword[regOp(inst.a) + offsetof(LuaTable, node)]);
build.mov(dwordReg(tmp.reg), 1);
build.mov(byteReg(shiftTmp.reg), byte[regOp(inst.a) + offsetof(LuaTable, lsizenode)]);
build.shl(dwordReg(tmp.reg), byteReg(shiftTmp.reg));
build.dec(dwordReg(tmp.reg));
build.and_(dwordReg(tmp.reg), uintOp(inst.b));
build.shl(tmp.reg, kLuaNodeSizeLog2);
build.add(inst.regX64, tmp.reg);
break;
};
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {inst.a});
if (inst.a.kind == IrOpKind::Undef)
{
build.mov(inst.regX64, sClosure);
}
else
{
RegisterX64 cl = regOp(inst.a);
if (inst.regX64 != cl)
build.mov(inst.regX64, cl);
}
build.add(inst.regX64, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b));
break;
}
case IrCmd::STORE_TAG:
if (inst.b.kind == IrOpKind::Constant)
{
if (inst.a.kind == IrOpKind::Inst)
build.mov(dword[regOp(inst.a) + offsetof(TValue, tt)], tagOp(inst.b));
else
build.mov(luauRegTag(vmRegOp(inst.a)), tagOp(inst.b));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
case IrCmd::STORE_POINTER:
{
OperandX64 valueLhs = inst.a.kind == IrOpKind::Inst ? qword[regOp(inst.a) + offsetof(TValue, value)] : luauRegValue(vmRegOp(inst.a));
if (inst.b.kind == IrOpKind::Constant)
{
CODEGEN_ASSERT(intOp(inst.b) == 0);
build.mov(valueLhs, 0);
}
else if (inst.b.kind == IrOpKind::Inst)
{
build.mov(valueLhs, regOp(inst.b));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
}
case IrCmd::STORE_EXTRA:
if (inst.b.kind == IrOpKind::Constant)
{
if (inst.a.kind == IrOpKind::Inst)
build.mov(dword[regOp(inst.a) + offsetof(TValue, extra)], intOp(inst.b));
else
build.mov(luauRegExtra(vmRegOp(inst.a)), intOp(inst.b));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
case IrCmd::STORE_DOUBLE:
{
OperandX64 valueLhs = inst.a.kind == IrOpKind::Inst ? qword[regOp(inst.a) + offsetof(TValue, value)] : luauRegValue(vmRegOp(inst.a));
if (inst.b.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, build.f64(doubleOp(inst.b)));
build.vmovsd(valueLhs, tmp.reg);
}
else if (inst.b.kind == IrOpKind::Inst)
{
build.vmovsd(valueLhs, regOp(inst.b));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
}
case IrCmd::STORE_INT:
if (inst.b.kind == IrOpKind::Constant)
build.mov(luauRegValueInt(vmRegOp(inst.a)), intOp(inst.b));
else if (inst.b.kind == IrOpKind::Inst)
build.mov(luauRegValueInt(vmRegOp(inst.a)), regOp(inst.b));
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
case IrCmd::STORE_VECTOR:
storeDoubleAsFloat(luauRegValueVector(vmRegOp(inst.a), 0), inst.b);
storeDoubleAsFloat(luauRegValueVector(vmRegOp(inst.a), 1), inst.c);
storeDoubleAsFloat(luauRegValueVector(vmRegOp(inst.a), 2), inst.d);
if (inst.e.kind != IrOpKind::None)
build.mov(luauRegTag(vmRegOp(inst.a)), tagOp(inst.e));
break;
case IrCmd::STORE_TVALUE:
{
int addrOffset = inst.c.kind != IrOpKind::None ? intOp(inst.c) : 0;
if (inst.a.kind == IrOpKind::VmReg)
build.vmovups(luauReg(vmRegOp(inst.a)), regOp(inst.b));
else if (inst.a.kind == IrOpKind::Inst)
build.vmovups(xmmword[regOp(inst.a) + addrOffset], regOp(inst.b));
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
}
case IrCmd::STORE_SPLIT_TVALUE:
{
int addrOffset = inst.d.kind != IrOpKind::None ? intOp(inst.d) : 0;
OperandX64 tagLhs = inst.a.kind == IrOpKind::Inst ? dword[regOp(inst.a) + offsetof(TValue, tt) + addrOffset] : luauRegTag(vmRegOp(inst.a));
build.mov(tagLhs, tagOp(inst.b));
if (tagOp(inst.b) == LUA_TBOOLEAN)
{
OperandX64 valueLhs =
inst.a.kind == IrOpKind::Inst ? dword[regOp(inst.a) + offsetof(TValue, value) + addrOffset] : luauRegValueInt(vmRegOp(inst.a));
build.mov(valueLhs, inst.c.kind == IrOpKind::Constant ? OperandX64(intOp(inst.c)) : regOp(inst.c));
}
else if (tagOp(inst.b) == LUA_TNUMBER)
{
OperandX64 valueLhs =
inst.a.kind == IrOpKind::Inst ? qword[regOp(inst.a) + offsetof(TValue, value) + addrOffset] : luauRegValue(vmRegOp(inst.a));
if (inst.c.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, build.f64(doubleOp(inst.c)));
build.vmovsd(valueLhs, tmp.reg);
}
else
{
build.vmovsd(valueLhs, regOp(inst.c));
}
}
else if (isGCO(tagOp(inst.b)))
{
OperandX64 valueLhs =
inst.a.kind == IrOpKind::Inst ? qword[regOp(inst.a) + offsetof(TValue, value) + addrOffset] : luauRegValue(vmRegOp(inst.a));
build.mov(valueLhs, regOp(inst.c));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
}
case IrCmd::ADD_INT:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind == IrOpKind::Constant)
{
build.lea(inst.regX64, addr[regOp(inst.b) + intOp(inst.a)]);
}
else if (inst.a.kind == IrOpKind::Inst)
{
if (inst.regX64 == regOp(inst.a))
{
if (inst.b.kind == IrOpKind::Inst)
build.add(inst.regX64, regOp(inst.b));
else if (intOp(inst.b) == 1)
build.inc(inst.regX64);
else
build.add(inst.regX64, intOp(inst.b));
}
else
{
if (inst.b.kind == IrOpKind::Inst)
build.lea(inst.regX64, addr[regOp(inst.a) + regOp(inst.b)]);
else
build.lea(inst.regX64, addr[regOp(inst.a) + intOp(inst.b)]);
}
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
}
case IrCmd::SUB_INT:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.regX64 == regOp(inst.a) && intOp(inst.b) == 1)
build.dec(inst.regX64);
else if (inst.regX64 == regOp(inst.a))
build.sub(inst.regX64, intOp(inst.b));
else
build.lea(inst.regX64, addr[regOp(inst.a) - intOp(inst.b)]);
break;
case IrCmd::ADD_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, memRegDoubleOp(inst.a));
build.vaddsd(inst.regX64, tmp.reg, memRegDoubleOp(inst.b));
}
else
{
build.vaddsd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b));
}
break;
case IrCmd::SUB_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, memRegDoubleOp(inst.a));
build.vsubsd(inst.regX64, tmp.reg, memRegDoubleOp(inst.b));
}
else
{
build.vsubsd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b));
}
break;
case IrCmd::MUL_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, memRegDoubleOp(inst.a));
build.vmulsd(inst.regX64, tmp.reg, memRegDoubleOp(inst.b));
}
else
{
build.vmulsd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b));
}
break;
case IrCmd::DIV_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, memRegDoubleOp(inst.a));
build.vdivsd(inst.regX64, tmp.reg, memRegDoubleOp(inst.b));
}
else
{
build.vdivsd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b));
}
break;
case IrCmd::IDIV_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, memRegDoubleOp(inst.a));
build.vdivsd(inst.regX64, tmp.reg, memRegDoubleOp(inst.b));
}
else
{
build.vdivsd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b));
}
build.vroundsd(inst.regX64, inst.regX64, inst.regX64, RoundingModeX64::RoundToNegativeInfinity);
break;
case IrCmd::MOD_NUM:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 optLhsTmp{regs};
RegisterX64 lhs;
if (inst.a.kind == IrOpKind::Constant)
{
optLhsTmp.alloc(SizeX64::xmmword);
build.vmovsd(optLhsTmp.reg, memRegDoubleOp(inst.a));
lhs = optLhsTmp.reg;
}
else
{
lhs = regOp(inst.a);
}
if (inst.b.kind == IrOpKind::Inst)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vdivsd(tmp.reg, lhs, memRegDoubleOp(inst.b));
build.vroundsd(tmp.reg, tmp.reg, tmp.reg, RoundingModeX64::RoundToNegativeInfinity);
build.vmulsd(tmp.reg, tmp.reg, memRegDoubleOp(inst.b));
build.vsubsd(inst.regX64, lhs, tmp.reg);
}
else
{
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
build.vmovsd(tmp1.reg, memRegDoubleOp(inst.b));
build.vdivsd(tmp2.reg, lhs, tmp1.reg);
build.vroundsd(tmp2.reg, tmp2.reg, tmp2.reg, RoundingModeX64::RoundToNegativeInfinity);
build.vmulsd(tmp1.reg, tmp2.reg, tmp1.reg);
build.vsubsd(inst.regX64, lhs, tmp1.reg);
}
break;
}
case IrCmd::MIN_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, memRegDoubleOp(inst.a));
build.vminsd(inst.regX64, tmp.reg, memRegDoubleOp(inst.b));
}
else
{
build.vminsd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b));
}
break;
case IrCmd::MAX_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, memRegDoubleOp(inst.a));
build.vmaxsd(inst.regX64, tmp.reg, memRegDoubleOp(inst.b));
}
else
{
build.vmaxsd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b));
}
break;
case IrCmd::UNM_NUM:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vxorpd(inst.regX64, regOp(inst.a), build.f64(-0.0));
break;
}
case IrCmd::FLOOR_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToNegativeInfinity);
break;
case IrCmd::CEIL_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a), RoundingModeX64::RoundToPositiveInfinity);
break;
case IrCmd::ROUND_NUM:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
if (inst.a.kind != IrOpKind::Inst)
build.vmovsd(inst.regX64, memRegDoubleOp(inst.a));
else if (regOp(inst.a) != inst.regX64)
build.vmovsd(inst.regX64, inst.regX64, regOp(inst.a));
build.vandpd(tmp1.reg, inst.regX64, build.f64x2(-0.0, -0.0));
build.vmovsd(tmp2.reg, build.i64(0x3fdfffffffffffff)); // 0.49999999999999994
build.vorpd(tmp1.reg, tmp1.reg, tmp2.reg);
build.vaddsd(inst.regX64, inst.regX64, tmp1.reg);
build.vroundsd(inst.regX64, inst.regX64, inst.regX64, RoundingModeX64::RoundToZero);
break;
}
case IrCmd::SQRT_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vsqrtsd(inst.regX64, inst.regX64, memRegDoubleOp(inst.a));
break;
case IrCmd::ABS_NUM:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst)
build.vmovsd(inst.regX64, memRegDoubleOp(inst.a));
else if (regOp(inst.a) != inst.regX64)
build.vmovsd(inst.regX64, inst.regX64, regOp(inst.a));
build.vandpd(inst.regX64, inst.regX64, build.i64(~(1LL << 63)));
break;
case IrCmd::SIGN_NUM:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
ScopedRegX64 tmp0{regs, SizeX64::xmmword};
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
build.vxorpd(tmp0.reg, tmp0.reg, tmp0.reg);
// Set tmp1 to -1 if arg < 0, else 0
build.vcmpltsd(tmp1.reg, regOp(inst.a), tmp0.reg);
build.vmovsd(tmp2.reg, build.f64(-1));
build.vandpd(tmp1.reg, tmp1.reg, tmp2.reg);
// Set mask bit to 1 if 0 < arg, else 0
build.vcmpltsd(inst.regX64, tmp0.reg, regOp(inst.a));
// Result = (mask-bit == 1) ? 1.0 : tmp1
// If arg < 0 then tmp1 is -1 and mask-bit is 0, result is -1
// If arg == 0 then tmp1 is 0 and mask-bit is 0, result is 0
// If arg > 0 then tmp1 is 0 and mask-bit is 1, result is 1
build.vblendvpd(inst.regX64, tmp1.reg, build.f64x2(1, 1), inst.regX64);
break;
}
case IrCmd::SELECT_NUM:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.c, inst.d}); // can't reuse b if a is a memory operand
ScopedRegX64 tmp{regs, SizeX64::xmmword};
if (inst.c.kind == IrOpKind::Inst)
build.vcmpeqsd(tmp.reg, regOp(inst.c), memRegDoubleOp(inst.d));
else
{
build.vmovsd(tmp.reg, memRegDoubleOp(inst.c));
build.vcmpeqsd(tmp.reg, tmp.reg, memRegDoubleOp(inst.d));
}
if (inst.a.kind == IrOpKind::Inst)
build.vblendvpd(inst.regX64, regOp(inst.a), memRegDoubleOp(inst.b), tmp.reg);
else
{
build.vmovsd(inst.regX64, memRegDoubleOp(inst.a));
build.vblendvpd(inst.regX64, inst.regX64, memRegDoubleOp(inst.b), tmp.reg);
}
break;
}
case IrCmd::ADD_VEC:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 tmp1{regs};
ScopedRegX64 tmp2{regs};
RegisterX64 tmpa = vecOp(inst.a, tmp1);
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
build.vaddps(inst.regX64, tmpa, tmpb);
break;
}
case IrCmd::SUB_VEC:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 tmp1{regs};
ScopedRegX64 tmp2{regs};
RegisterX64 tmpa = vecOp(inst.a, tmp1);
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
build.vsubps(inst.regX64, tmpa, tmpb);
break;
}
case IrCmd::MUL_VEC:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 tmp1{regs};
ScopedRegX64 tmp2{regs};
RegisterX64 tmpa = vecOp(inst.a, tmp1);
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
build.vmulps(inst.regX64, tmpa, tmpb);
break;
}
case IrCmd::DIV_VEC:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 tmp1{regs};
ScopedRegX64 tmp2{regs};
RegisterX64 tmpa = vecOp(inst.a, tmp1);
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
build.vdivps(inst.regX64, tmpa, tmpb);
break;
}
case IrCmd::UNM_VEC:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vxorpd(inst.regX64, regOp(inst.a), build.f32x4(-0.0, -0.0, -0.0, -0.0));
break;
}
case IrCmd::DOT_VEC:
{
LUAU_ASSERT(FFlag::LuauVectorLibNativeDot);
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 tmp1{regs};
ScopedRegX64 tmp2{regs};
RegisterX64 tmpa = vecOp(inst.a, tmp1);
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
build.vdpps(inst.regX64, tmpa, tmpb, 0x71); // 7 = 0b0111, sum first 3 products into first float
build.vcvtss2sd(inst.regX64, inst.regX64, inst.regX64);
break;
}
case IrCmd::NOT_ANY:
{
// TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
Label saveone, savezero, exit;
if (inst.a.kind == IrOpKind::Constant)
{
// Other cases should've been constant folded
CODEGEN_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN);
}
else
{
build.cmp(regOp(inst.a), LUA_TNIL);
build.jcc(ConditionX64::Equal, saveone);
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
build.jcc(ConditionX64::NotEqual, savezero);
}
if (inst.b.kind == IrOpKind::Constant)
{
// If value is 1, we fallthrough to storing 0
if (intOp(inst.b) == 0)
build.jmp(saveone);
}
else
{
build.cmp(regOp(inst.b), 0);
build.jcc(ConditionX64::Equal, saveone);
}
build.setLabel(savezero);
build.mov(inst.regX64, 0);
build.jmp(exit);
build.setLabel(saveone);
build.mov(inst.regX64, 1);
build.setLabel(exit);
break;
}
case IrCmd::CMP_ANY:
{
IrCondition cond = conditionOp(inst.c);
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.a)));
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.b)));
if (cond == IrCondition::LessEqual)
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]);
else if (cond == IrCondition::Less)
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]);
else if (cond == IrCondition::Equal)
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]);
else
CODEGEN_ASSERT(!"Unsupported condition");
emitUpdateBase(build);
inst.regX64 = regs.takeReg(eax, index);
break;
}
case IrCmd::JUMP:
jumpOrAbortOnUndef(inst.a, next);
break;
case IrCmd::JUMP_IF_TRUTHY:
jumpIfTruthy(build, vmRegOp(inst.a), labelOp(inst.b), labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.c), next);
break;
case IrCmd::JUMP_IF_FALSY:
jumpIfFalsy(build, vmRegOp(inst.a), labelOp(inst.b), labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.c), next);
break;
case IrCmd::JUMP_EQ_TAG:
{
CODEGEN_ASSERT(inst.b.kind == IrOpKind::Inst || inst.b.kind == IrOpKind::Constant);
OperandX64 opb = inst.b.kind == IrOpKind::Inst ? regOp(inst.b) : OperandX64(tagOp(inst.b));
if (inst.a.kind == IrOpKind::Constant)
build.cmp(opb, tagOp(inst.a));
else
build.cmp(memRegTagOp(inst.a), opb);
if (isFallthroughBlock(blockOp(inst.d), next))
{
build.jcc(ConditionX64::Equal, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
}
else
{
build.jcc(ConditionX64::NotEqual, labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.c), next);
}
break;
}
case IrCmd::JUMP_CMP_INT:
{
IrCondition cond = conditionOp(inst.c);
if ((cond == IrCondition::Equal || cond == IrCondition::NotEqual) && intOp(inst.b) == 0)
{
bool invert = cond == IrCondition::NotEqual;
build.test(regOp(inst.a), regOp(inst.a));
if (isFallthroughBlock(blockOp(inst.d), next))
{
build.jcc(invert ? ConditionX64::Zero : ConditionX64::NotZero, labelOp(inst.e));
jumpOrFallthrough(blockOp(inst.d), next);
}
else
{
build.jcc(invert ? ConditionX64::NotZero : ConditionX64::Zero, labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.e), next);
}
}
else
{
build.cmp(regOp(inst.a), intOp(inst.b));
build.jcc(getConditionInt(cond), labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.e), next);
}
break;
}
case IrCmd::JUMP_EQ_POINTER:
build.cmp(regOp(inst.a), regOp(inst.b));
build.jcc(ConditionX64::Equal, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::JUMP_CMP_NUM:
{
IrCondition cond = conditionOp(inst.c);
ScopedRegX64 tmp{regs, SizeX64::xmmword};
jumpOnNumberCmp(build, tmp.reg, memRegDoubleOp(inst.a), memRegDoubleOp(inst.b), cond, labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.e), next);
break;
}
case IrCmd::JUMP_FORN_LOOP_COND:
{
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
ScopedRegX64 tmp3{regs, SizeX64::xmmword};
RegisterX64 index = inst.a.kind == IrOpKind::Inst ? regOp(inst.a) : tmp1.reg;
RegisterX64 limit = inst.b.kind == IrOpKind::Inst ? regOp(inst.b) : tmp2.reg;
if (inst.a.kind != IrOpKind::Inst)
build.vmovsd(tmp1.reg, memRegDoubleOp(inst.a));
if (inst.b.kind != IrOpKind::Inst)
build.vmovsd(tmp2.reg, memRegDoubleOp(inst.b));
Label direct;
// step > 0
jumpOnNumberCmp(build, tmp3.reg, memRegDoubleOp(inst.c), build.f64(0.0), IrCondition::Greater, direct);
// !(limit <= index)
jumpOnNumberCmp(build, noreg, limit, index, IrCondition::NotLessEqual, labelOp(inst.e));
build.jmp(labelOp(inst.d));
// !(index <= limit)
build.setLabel(direct);
jumpOnNumberCmp(build, noreg, index, limit, IrCondition::NotLessEqual, labelOp(inst.e));
jumpOrFallthrough(blockOp(inst.d), next);
break;
}
case IrCmd::TABLE_LEN:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]);
inst.regX64 = regs.takeReg(eax, index);
break;
}
case IrCmd::TABLE_SETNUM:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a);
callWrap.addArgument(SizeX64::dword, regOp(inst.b), inst.b);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_setnum)]);
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::STRING_LEN:
{
RegisterX64 ptr = regOp(inst.a);
inst.regX64 = regs.allocReg(SizeX64::dword, index);
build.mov(inst.regX64, dword[ptr + offsetof(TString, len)]);
break;
}
case IrCmd::NEW_TABLE:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.a)));
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b)));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]);
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::DUP_TABLE:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_clone)]);
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::TRY_NUM_TO_INDEX:
{
inst.regX64 = regs.allocReg(SizeX64::dword, index);
ScopedRegX64 tmp{regs, SizeX64::xmmword};
convertNumberToIndexOrJump(build, tmp.reg, regOp(inst.a), inst.regX64, labelOp(inst.b));
break;
}
case IrCmd::TRY_CALL_FASTGETTM:
{
ScopedRegX64 tmp{regs, SizeX64::qword};
build.mov(tmp.reg, qword[regOp(inst.a) + offsetof(LuaTable, metatable)]);
regs.freeLastUseReg(function.instOp(inst.a), index); // Release before the call if it's the last use
build.test(tmp.reg, tmp.reg);
build.jcc(ConditionX64::Zero, labelOp(inst.c)); // No metatable
build.test(byte[tmp.reg + offsetof(LuaTable, tmcache)], 1 << intOp(inst.b));
build.jcc(ConditionX64::NotZero, labelOp(inst.c)); // No tag method
ScopedRegX64 tmp2{regs, SizeX64::qword};
build.mov(tmp2.reg, qword[rState + offsetof(lua_State, global)]);
{
ScopedSpills spillGuard(regs);
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, tmp);
callWrap.addArgument(SizeX64::qword, intOp(inst.b));
callWrap.addArgument(SizeX64::qword, qword[tmp2.release() + offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)]);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_gettm)]);
}
build.test(rax, rax);
build.jcc(ConditionX64::Zero, labelOp(inst.c)); // No tag method
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::NEW_USERDATA:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, intOp(inst.a));
callWrap.addArgument(SizeX64::dword, intOp(inst.b));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, newUserdata)]);
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::INT_TO_NUM:
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
build.vcvtsi2sd(inst.regX64, inst.regX64, regOp(inst.a));
break;
case IrCmd::UINT_TO_NUM:
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
// AVX has no uint->double conversion; the source must come from UINT op and they all should clear top 32 bits so we can usually
// use 64-bit reg; the one exception is NUM_TO_UINT which doesn't clear top bits
if (IrCmd source = function.instOp(inst.a).cmd; source == IrCmd::NUM_TO_UINT)
{
ScopedRegX64 tmp{regs, SizeX64::dword};
build.mov(tmp.reg, regOp(inst.a));
build.vcvtsi2sd(inst.regX64, inst.regX64, qwordReg(tmp.reg));
}
else
{
CODEGEN_ASSERT(source != IrCmd::SUBSTITUTE); // we don't process substitutions
build.vcvtsi2sd(inst.regX64, inst.regX64, qwordReg(regOp(inst.a)));
}
break;
case IrCmd::NUM_TO_INT:
inst.regX64 = regs.allocReg(SizeX64::dword, index);
build.vcvttsd2si(inst.regX64, memRegDoubleOp(inst.a));
break;
case IrCmd::NUM_TO_UINT:
inst.regX64 = regs.allocReg(SizeX64::dword, index);
build.vcvttsd2si(qwordReg(inst.regX64), memRegDoubleOp(inst.a));
break;
case IrCmd::NUM_TO_VEC:
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
if (inst.a.kind == IrOpKind::Constant)
{
float value = float(doubleOp(inst.a));
uint32_t asU32;
static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit");
memcpy(&asU32, &value, sizeof(value));
build.vmovaps(inst.regX64, build.u32x4(asU32, asU32, asU32, 0));
}
else
{
build.vcvtsd2ss(inst.regX64, inst.regX64, memRegDoubleOp(inst.a));
build.vpshufps(inst.regX64, inst.regX64, inst.regX64, 0b00'00'00'00);
}
break;
case IrCmd::TAG_VECTOR:
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
build.vpinsrd(inst.regX64, regOp(inst.a), build.i32(LUA_TVECTOR), 3);
break;
case IrCmd::ADJUST_STACK_TO_REG:
{
ScopedRegX64 tmp{regs, SizeX64::qword};
if (inst.b.kind == IrOpKind::Constant)
{
build.lea(tmp.reg, addr[rBase + (vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)]);
build.mov(qword[rState + offsetof(lua_State, top)], tmp.reg);
}
else if (inst.b.kind == IrOpKind::Inst)
{
build.mov(dwordReg(tmp.reg), regOp(inst.b));
build.shl(tmp.reg, kTValueSizeLog2);
build.lea(tmp.reg, addr[rBase + tmp.reg + vmRegOp(inst.a) * sizeof(TValue)]);
build.mov(qword[rState + offsetof(lua_State, top)], tmp.reg);
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
}
case IrCmd::ADJUST_STACK_TO_TOP:
{
ScopedRegX64 tmp{regs, SizeX64::qword};
build.mov(tmp.reg, qword[rState + offsetof(lua_State, ci)]);
build.mov(tmp.reg, qword[tmp.reg + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], tmp.reg);
break;
}
case IrCmd::FASTCALL:
{
emitBuiltin(regs, build, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d));
break;
}
case IrCmd::INVOKE_FASTCALL:
{
unsigned bfid = uintOp(inst.a);
OperandX64 args = 0;
ScopedRegX64 argsAlt{regs};
// 'E' argument can only be produced by LOP_FASTCALL3
if (inst.e.kind != IrOpKind::Undef)
{
CODEGEN_ASSERT(intOp(inst.f) == 3);
ScopedRegX64 tmp{regs, SizeX64::xmmword};
argsAlt.alloc(SizeX64::qword);
build.mov(argsAlt.reg, qword[rState + offsetof(lua_State, top)]);
build.vmovups(tmp.reg, luauReg(vmRegOp(inst.d)));
build.vmovups(xmmword[argsAlt.reg], tmp.reg);
build.vmovups(tmp.reg, luauReg(vmRegOp(inst.e)));
build.vmovups(xmmword[argsAlt.reg + sizeof(TValue)], tmp.reg);
}
else
{
if (inst.d.kind == IrOpKind::VmReg)
args = luauRegAddress(vmRegOp(inst.d));
else if (inst.d.kind == IrOpKind::VmConst)
args = luauConstantAddress(vmConstOp(inst.d));
else
CODEGEN_ASSERT(inst.d.kind == IrOpKind::Undef);
}
int ra = vmRegOp(inst.b);
int arg = vmRegOp(inst.c);
int nparams = intOp(inst.f);
int nresults = intOp(inst.g);
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
callWrap.addArgument(SizeX64::qword, luauRegAddress(arg));
callWrap.addArgument(SizeX64::dword, nresults);
if (inst.e.kind != IrOpKind::Undef)
callWrap.addArgument(SizeX64::qword, argsAlt);
else
callWrap.addArgument(SizeX64::qword, args);
if (nparams == LUA_MULTRET)
{
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
ScopedRegX64 tmp{regs, SizeX64::qword};
// L->top - (ra + 1)
build.mov(reg, qword[rState + offsetof(lua_State, top)]);
build.lea(tmp.reg, addr[rBase + (ra + 1) * sizeof(TValue)]);
build.sub(reg, tmp.reg);
build.shr(reg, kTValueSizeLog2);
callWrap.addArgument(SizeX64::dword, dwordReg(reg));
}
else
{
callWrap.addArgument(SizeX64::dword, nparams);
}
ScopedRegX64 func{regs, SizeX64::qword};
build.mov(func.reg, qword[rNativeContext + offsetof(NativeContext, luauF_table) + bfid * sizeof(luau_FastFunction)]);
callWrap.call(func.release());
inst.regX64 = regs.takeReg(eax, index); // Result of a builtin call is returned in eax
break;
}
case IrCmd::CHECK_FASTCALL_RES:
{
RegisterX64 res = regOp(inst.a);
build.test(res, res); // test here will set SF=1 for a negative number and it always sets OF to 0
build.jcc(ConditionX64::Less, labelOp(inst.b)); // jl jumps if SF != OF
break;
}
case IrCmd::DO_ARITH:
{
OperandX64 opb = inst.b.kind == IrOpKind::VmReg ? luauRegAddress(vmRegOp(inst.b)) : luauConstantAddress(vmConstOp(inst.b));
OperandX64 opc = inst.c.kind == IrOpKind::VmReg ? luauRegAddress(vmRegOp(inst.c)) : luauConstantAddress(vmConstOp(inst.c));
callArithHelper(regs, build, vmRegOp(inst.a), opb, opc, TMS(intOp(inst.d)));
break;
}
case IrCmd::DO_LEN:
callLengthHelper(regs, build, vmRegOp(inst.a), vmRegOp(inst.b));
break;
case IrCmd::GET_TABLE:
if (inst.c.kind == IrOpKind::VmReg)
{
callGetTable(regs, build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a));
}
else if (inst.c.kind == IrOpKind::Constant)
{
TValue n = {};
setnvalue(&n, uintOp(inst.c));
callGetTable(regs, build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
case IrCmd::SET_TABLE:
if (inst.c.kind == IrOpKind::VmReg)
{
callSetTable(regs, build, vmRegOp(inst.b), luauRegAddress(vmRegOp(inst.c)), vmRegOp(inst.a));
}
else if (inst.c.kind == IrOpKind::Constant)
{
TValue n = {};
setnvalue(&n, uintOp(inst.c));
callSetTable(regs, build, vmRegOp(inst.b), build.bytes(&n, sizeof(n)), vmRegOp(inst.a));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
case IrCmd::GET_IMPORT:
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
build.mov(tmp1.reg, sClosure);
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, qword[tmp1.release() + offsetof(Closure, env)]);
callWrap.addArgument(SizeX64::qword, rConstants);
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.a)));
callWrap.addArgument(SizeX64::dword, uintOp(inst.b));
callWrap.addArgument(SizeX64::dword, 0);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_getimport)]);
emitUpdateBase(build);
break;
}
case IrCmd::CONCAT:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(inst.b)));
callWrap.addArgument(SizeX64::dword, int32_t(vmRegOp(inst.a) + uintOp(inst.b) - 1));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_concat)]);
emitUpdateBase(build);
break;
}
case IrCmd::GET_UPVALUE:
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
build.mov(tmp1.reg, sClosure);
build.add(tmp1.reg, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b));
// uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
Label skip;
build.cmp(dword[tmp1.reg + offsetof(TValue, tt)], LUA_TUPVAL);
build.jcc(ConditionX64::NotEqual, skip);
// UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally)
build.mov(tmp1.reg, qword[tmp1.reg + offsetof(TValue, value.gc)]);
build.mov(tmp1.reg, qword[tmp1.reg + offsetof(UpVal, v)]);
build.setLabel(skip);
build.vmovups(tmp2.reg, xmmword[tmp1.reg]);
build.vmovups(luauReg(vmRegOp(inst.a)), tmp2.reg);
break;
}
case IrCmd::SET_UPVALUE:
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
build.mov(tmp1.reg, sClosure);
build.mov(tmp2.reg, qword[tmp1.reg + offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)]);
build.mov(tmp1.reg, qword[tmp2.reg + offsetof(UpVal, v)]);
{
ScopedRegX64 tmp3{regs, SizeX64::xmmword};
build.vmovups(tmp3.reg, luauReg(vmRegOp(inst.b)));
build.vmovups(xmmword[tmp1.reg], tmp3.reg);
}
tmp1.free();
if (inst.c.kind == IrOpKind::Undef || isGCO(tagOp(inst.c)))
callBarrierObject(regs, build, tmp2.release(), {}, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c));
break;
}
case IrCmd::CHECK_TAG:
build.cmp(memRegTagOp(inst.a), tagOp(inst.b));
jumpOrAbortOnUndef(ConditionX64::NotEqual, inst.c, next);
break;
case IrCmd::CHECK_TRUTHY:
{
// Constant tags which don't require boolean value check should've been removed in constant folding
CODEGEN_ASSERT(inst.a.kind != IrOpKind::Constant || tagOp(inst.a) == LUA_TBOOLEAN);
Label skip;
if (inst.a.kind != IrOpKind::Constant)
{
// Fail to fallback on 'nil' (falsy)
build.cmp(memRegTagOp(inst.a), LUA_TNIL);
jumpOrAbortOnUndef(ConditionX64::Equal, inst.c, next);
// Skip value test if it's not a boolean (truthy)
build.cmp(memRegTagOp(inst.a), LUA_TBOOLEAN);
build.jcc(ConditionX64::NotEqual, skip);
}
// fail to fallback on 'false' boolean value (falsy)
if (inst.b.kind != IrOpKind::Constant)
{
build.cmp(memRegUintOp(inst.b), 0);
jumpOrAbortOnUndef(ConditionX64::Equal, inst.c, next);
}
else
{
if (intOp(inst.b) == 0)
jumpOrAbortOnUndef(inst.c, next);
}
if (inst.a.kind != IrOpKind::Constant)
build.setLabel(skip);
break;
}
case IrCmd::CHECK_READONLY:
build.cmp(byte[regOp(inst.a) + offsetof(LuaTable, readonly)], 0);
jumpOrAbortOnUndef(ConditionX64::NotEqual, inst.b, next);
break;
case IrCmd::CHECK_NO_METATABLE:
build.cmp(qword[regOp(inst.a) + offsetof(LuaTable, metatable)], 0);
jumpOrAbortOnUndef(ConditionX64::NotEqual, inst.b, next);
break;
case IrCmd::CHECK_SAFE_ENV:
{
ScopedRegX64 tmp{regs, SizeX64::qword};
build.mov(tmp.reg, sClosure);
build.mov(tmp.reg, qword[tmp.reg + offsetof(Closure, env)]);
build.cmp(byte[tmp.reg + offsetof(LuaTable, safeenv)], 0);
jumpOrAbortOnUndef(ConditionX64::Equal, inst.a, next);
break;
}
case IrCmd::CHECK_ARRAY_SIZE:
if (inst.b.kind == IrOpKind::Inst)
build.cmp(dword[regOp(inst.a) + offsetof(LuaTable, sizearray)], regOp(inst.b));
else if (inst.b.kind == IrOpKind::Constant)
build.cmp(dword[regOp(inst.a) + offsetof(LuaTable, sizearray)], intOp(inst.b));
else
CODEGEN_ASSERT(!"Unsupported instruction form");
jumpOrAbortOnUndef(ConditionX64::BelowEqual, inst.c, next);
break;
case IrCmd::JUMP_SLOT_MATCH:
case IrCmd::CHECK_SLOT_MATCH:
{
Label abort; // Used when guard aborts execution
const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? inst.d : inst.c;
Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp);
ScopedRegX64 tmp{regs, SizeX64::qword};
// Check if node key tag is a string
build.mov(dwordReg(tmp.reg), luauNodeKeyTag(regOp(inst.a)));
build.and_(dwordReg(tmp.reg), kTKeyTagMask);
build.cmp(dwordReg(tmp.reg), LUA_TSTRING);
build.jcc(ConditionX64::NotEqual, mismatch);
// Check that node key value matches the expected one
build.mov(tmp.reg, luauConstantValue(vmConstOp(inst.b)));
build.cmp(tmp.reg, luauNodeKeyValue(regOp(inst.a)));
build.jcc(ConditionX64::NotEqual, mismatch);
// Check that node value is not nil
build.cmp(dword[regOp(inst.a) + offsetof(LuaNode, val) + offsetof(TValue, tt)], LUA_TNIL);
build.jcc(ConditionX64::Equal, mismatch);
if (inst.cmd == IrCmd::JUMP_SLOT_MATCH)
{
jumpOrFallthrough(blockOp(inst.c), next);
}
else if (mismatchOp.kind == IrOpKind::Undef)
{
Label skip;
build.jmp(skip);
build.setLabel(abort);
build.ud2();
build.setLabel(skip);
}
break;
}
case IrCmd::CHECK_NODE_NO_NEXT:
{
ScopedRegX64 tmp{regs, SizeX64::dword};
build.mov(tmp.reg, dword[regOp(inst.a) + offsetof(LuaNode, key) + kOffsetOfTKeyTagNext]);
build.shr(tmp.reg, kTKeyTagBits);
jumpOrAbortOnUndef(ConditionX64::NotZero, inst.b, next);
break;
}
case IrCmd::CHECK_NODE_VALUE:
{
build.cmp(dword[regOp(inst.a) + offsetof(LuaNode, val) + offsetof(TValue, tt)], LUA_TNIL);
jumpOrAbortOnUndef(ConditionX64::Equal, inst.b, next);
break;
}
case IrCmd::CHECK_BUFFER_LEN:
{
int accessSize = intOp(inst.c);
CODEGEN_ASSERT(accessSize > 0);
if (inst.b.kind == IrOpKind::Inst)
{
if (accessSize == 1)
{
// Simpler check for a single byte access
build.cmp(dword[regOp(inst.a) + offsetof(Buffer, len)], regOp(inst.b));
jumpOrAbortOnUndef(ConditionX64::BelowEqual, inst.d, next);
}
else
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::dword};
// To perform the bounds check using a single branch, we take index that is limited to 32 bit int
// Access size is then added using a 64 bit addition
// This will make sure that addition will not wrap around for values like 0xffffffff
if (IrCmd source = function.instOp(inst.b).cmd; source == IrCmd::NUM_TO_INT)
{
// When previous operation is a conversion to an integer (common case), it is guaranteed to have high register bits cleared
build.lea(tmp1.reg, addr[qwordReg(regOp(inst.b)) + accessSize]);
}
else
{
// When the source of the index is unknown, it could contain garbage in the high bits, so we zero-extend it explicitly
build.mov(dwordReg(tmp1.reg), regOp(inst.b));
build.add(tmp1.reg, accessSize);
}
build.mov(tmp2.reg, dword[regOp(inst.a) + offsetof(Buffer, len)]);
build.cmp(qwordReg(tmp2.reg), tmp1.reg);
jumpOrAbortOnUndef(ConditionX64::Below, inst.d, next);
}
}
else if (inst.b.kind == IrOpKind::Constant)
{
int offset = intOp(inst.b);
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
jumpOrAbortOnUndef(inst.d, next);
else
build.cmp(dword[regOp(inst.a) + offsetof(Buffer, len)], offset + accessSize);
jumpOrAbortOnUndef(ConditionX64::Below, inst.d, next);
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
}
case IrCmd::CHECK_USERDATA_TAG:
{
build.cmp(byte[regOp(inst.a) + offsetof(Udata, tag)], intOp(inst.b));
jumpOrAbortOnUndef(ConditionX64::NotEqual, inst.c, next);
break;
}
case IrCmd::INTERRUPT:
{
unsigned pcpos = uintOp(inst.a);
// We unconditionally spill values here because that allows us to ignore register state when we synthesize interrupt handler
// This can be changed in the future if we can somehow record interrupt handler code separately
// Since interrupts are loop edges or call/ret, we don't have a significant opportunity for register reuse here anyway
regs.preserveAndFreeInstValues();
ScopedRegX64 tmp{regs, SizeX64::qword};
Label self;
build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
build.cmp(qword[tmp.reg + offsetof(global_State, cb.interrupt)], 0);
build.jcc(ConditionX64::NotEqual, self);
Label next = build.setLabel();
interruptHandlers.push_back({self, pcpos, next});
break;
}
case IrCmd::CHECK_GC:
callStepGc(regs, build);
break;
case IrCmd::BARRIER_OBJ:
callBarrierObject(regs, build, regOp(inst.a), inst.a, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c));
break;
case IrCmd::BARRIER_TABLE_BACK:
callBarrierTableFast(regs, build, regOp(inst.a), inst.a);
break;
case IrCmd::BARRIER_TABLE_FORWARD:
{
Label skip;
ScopedRegX64 tmp{regs, SizeX64::qword};
checkObjectBarrierConditions(build, tmp.reg, regOp(inst.a), inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
{
ScopedSpills spillGuard(regs);
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, regOp(inst.a), inst.a);
callWrap.addArgument(SizeX64::qword, tmp);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barriertable)]);
}
build.setLabel(skip);
break;
}
case IrCmd::SET_SAVEDPC:
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
build.mov(tmp2.reg, sCode);
build.add(tmp2.reg, uintOp(inst.a) * sizeof(Instruction));
build.mov(tmp1.reg, qword[rState + offsetof(lua_State, ci)]);
build.mov(qword[tmp1.reg + offsetof(CallInfo, savedpc)], tmp2.reg);
break;
}
case IrCmd::CLOSE_UPVALS:
{
Label next;
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::qword};
// L->openupval != 0
build.mov(tmp1.reg, qword[rState + offsetof(lua_State, openupval)]);
build.test(tmp1.reg, tmp1.reg);
build.jcc(ConditionX64::Zero, next);
// ra <= L->openuval->v
build.lea(tmp2.reg, addr[rBase + vmRegOp(inst.a) * sizeof(TValue)]);
build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(UpVal, v)]);
build.jcc(ConditionX64::Above, next);
tmp1.free();
{
ScopedSpills spillGuard(regs);
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, tmp2);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_close)]);
}
build.setLabel(next);
break;
}
case IrCmd::CAPTURE:
// No-op right now
break;
// Fallbacks to non-IR instruction implementations
case IrCmd::SETLIST:
regs.assertAllFree();
emitInstSetList(
regs, build, vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d), uintOp(inst.e), inst.f.kind == IrOpKind::Undef ? -1 : int(uintOp(inst.f))
);
break;
case IrCmd::CALL:
regs.assertAllFree();
regs.assertNoSpills();
emitInstCall(build, helpers, vmRegOp(inst.a), intOp(inst.b), intOp(inst.c));
break;
case IrCmd::RETURN:
regs.assertAllFree();
regs.assertNoSpills();
emitInstReturn(build, helpers, vmRegOp(inst.a), intOp(inst.b), function.variadic);
break;
case IrCmd::FORGLOOP:
regs.assertAllFree();
emitInstForGLoop(build, vmRegOp(inst.a), intOp(inst.b), labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGLOOP_FALLBACK:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, vmRegOp(inst.a));
callWrap.addArgument(SizeX64::dword, intOp(inst.b));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNonTableFallback)]);
emitUpdateBase(build);
build.test(al, al);
build.jcc(ConditionX64::NotZero, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
}
case IrCmd::FORGPREP_XNEXT_FALLBACK:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.b)));
callWrap.addArgument(SizeX64::dword, uintOp(inst.a) + 1);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgPrepXnextFallback)]);
jumpOrFallthrough(blockOp(inst.c), next);
break;
}
case IrCmd::COVERAGE:
{
ScopedRegX64 tmp1{regs, SizeX64::qword};
ScopedRegX64 tmp2{regs, SizeX64::dword};
ScopedRegX64 tmp3{regs, SizeX64::dword};
build.mov(tmp1.reg, sCode);
build.add(tmp1.reg, uintOp(inst.a) * sizeof(Instruction));
// hits = LUAU_INSN_E(*pc)
build.mov(tmp2.reg, dword[tmp1.reg]);
build.sar(tmp2.reg, 8);
// hits = (hits < (1 << 23) - 1) ? hits + 1 : hits;
build.xor_(tmp3.reg, tmp3.reg);
build.cmp(tmp2.reg, (1 << 23) - 1);
build.setcc(ConditionX64::NotEqual, byteReg(tmp3.reg));
build.add(tmp2.reg, tmp3.reg);
// VM_PATCH_E(pc, hits);
build.sal(tmp2.reg, 8);
build.movzx(tmp3.reg, byte[tmp1.reg]);
build.or_(tmp3.reg, tmp2.reg);
build.mov(dword[tmp1.reg], tmp3.reg);
break;
}
// Full instruction fallbacks
case IrCmd::FALLBACK_GETGLOBAL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
emitFallback(regs, build, offsetof(NativeContext, executeGETGLOBAL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETGLOBAL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
emitFallback(regs, build, offsetof(NativeContext, executeSETGLOBAL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETTABLEKS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
emitFallback(regs, build, offsetof(NativeContext, executeGETTABLEKS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETTABLEKS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
emitFallback(regs, build, offsetof(NativeContext, executeSETTABLEKS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_NAMECALL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
emitFallback(regs, build, offsetof(NativeContext, executeNAMECALL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_PREPVARARGS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::Constant);
emitFallback(regs, build, offsetof(NativeContext, executePREPVARARGS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETVARARGS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::Constant);
if (intOp(inst.c) == LUA_MULTRET)
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
build.mov(reg, sCode);
callWrap.addArgument(SizeX64::qword, addr[reg + uintOp(inst.a) * sizeof(Instruction)]);
callWrap.addArgument(SizeX64::qword, rBase);
callWrap.addArgument(SizeX64::dword, vmRegOp(inst.b));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, executeGETVARARGSMultRet)]);
emitUpdateBase(build);
}
else
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, rBase);
callWrap.addArgument(SizeX64::dword, vmRegOp(inst.b));
callWrap.addArgument(SizeX64::dword, intOp(inst.c));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, executeGETVARARGSConst)]);
}
break;
case IrCmd::NEWCLOSURE:
{
ScopedRegX64 tmp2{regs, SizeX64::qword};
build.mov(tmp2.reg, sClosure);
build.mov(tmp2.reg, qword[tmp2.reg + offsetof(Closure, l.p)]);
build.mov(tmp2.reg, qword[tmp2.reg + offsetof(Proto, p)]);
build.mov(tmp2.reg, qword[tmp2.reg + sizeof(Proto*) * uintOp(inst.c)]);
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::dword, uintOp(inst.a), inst.a);
callWrap.addArgument(SizeX64::qword, regOp(inst.b), inst.b);
callWrap.addArgument(SizeX64::qword, tmp2);
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_newLclosure)]);
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::FALLBACK_DUPCLOSURE:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
emitFallback(regs, build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(inst.a));
break;
case IrCmd::FALLBACK_FORGPREP:
emitFallback(regs, build, offsetof(NativeContext, executeFORGPREP), uintOp(inst.a));
jumpOrFallthrough(blockOp(inst.c), next);
break;
case IrCmd::BITAND_UINT:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
build.and_(inst.regX64, memRegUintOp(inst.b));
break;
case IrCmd::BITXOR_UINT:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
build.xor_(inst.regX64, memRegUintOp(inst.b));
break;
case IrCmd::BITOR_UINT:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
build.or_(inst.regX64, memRegUintOp(inst.b));
break;
case IrCmd::BITNOT_UINT:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
build.not_(inst.regX64);
break;
case IrCmd::BITLSHIFT_UINT:
{
ScopedRegX64 shiftTmp{regs};
// Custom bit shift value can only be placed in cl
// but we use it if the shift value is not a constant stored in b
if (inst.b.kind != IrOpKind::Constant)
shiftTmp.take(ecx);
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
if (inst.b.kind == IrOpKind::Constant)
{
// if shift value is a constant, we extract the byte-sized shift amount
int8_t shift = int8_t(unsigned(intOp(inst.b)));
build.shl(inst.regX64, shift);
}
else
{
build.mov(shiftTmp.reg, memRegUintOp(inst.b));
build.shl(inst.regX64, byteReg(shiftTmp.reg));
}
break;
}
case IrCmd::BITRSHIFT_UINT:
{
ScopedRegX64 shiftTmp{regs};
// Custom bit shift value can only be placed in cl
// but we use it if the shift value is not a constant stored in b
if (inst.b.kind != IrOpKind::Constant)
shiftTmp.take(ecx);
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
if (inst.b.kind == IrOpKind::Constant)
{
// if shift value is a constant, we extract the byte-sized shift amount
int8_t shift = int8_t(unsigned(intOp(inst.b)));
build.shr(inst.regX64, shift);
}
else
{
build.mov(shiftTmp.reg, memRegUintOp(inst.b));
build.shr(inst.regX64, byteReg(shiftTmp.reg));
}
break;
}
case IrCmd::BITARSHIFT_UINT:
{
ScopedRegX64 shiftTmp{regs};
// Custom bit shift value can only be placed in cl
// but we use it if the shift value is not a constant stored in b
if (inst.b.kind != IrOpKind::Constant)
shiftTmp.take(ecx);
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
if (inst.b.kind == IrOpKind::Constant)
{
// if shift value is a constant, we extract the byte-sized shift amount
int8_t shift = int8_t(unsigned(intOp(inst.b)));
build.sar(inst.regX64, shift);
}
else
{
build.mov(shiftTmp.reg, memRegUintOp(inst.b));
build.sar(inst.regX64, byteReg(shiftTmp.reg));
}
break;
}
case IrCmd::BITLROTATE_UINT:
{
ScopedRegX64 shiftTmp{regs};
// Custom bit shift value can only be placed in cl
// but we use it if the shift value is not a constant stored in b
if (inst.b.kind != IrOpKind::Constant)
shiftTmp.take(ecx);
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
if (inst.b.kind == IrOpKind::Constant)
{
// if shift value is a constant, we extract the byte-sized shift amount
int8_t shift = int8_t(unsigned(intOp(inst.b)));
build.rol(inst.regX64, shift);
}
else
{
build.mov(shiftTmp.reg, memRegUintOp(inst.b));
build.rol(inst.regX64, byteReg(shiftTmp.reg));
}
break;
}
case IrCmd::BITRROTATE_UINT:
{
ScopedRegX64 shiftTmp{regs};
// Custom bit shift value can only be placed in cl
// but we use it if the shift value is not a constant stored in b
if (inst.b.kind != IrOpKind::Constant)
shiftTmp.take(ecx);
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
if (inst.b.kind == IrOpKind::Constant)
{
// if shift value is a constant, we extract the byte-sized shift amount
int8_t shift = int8_t(unsigned(intOp(inst.b)));
build.ror(inst.regX64, shift);
}
else
{
build.mov(shiftTmp.reg, memRegUintOp(inst.b));
build.ror(inst.regX64, byteReg(shiftTmp.reg));
}
break;
}
case IrCmd::BITCOUNTLZ_UINT:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
Label zero, exit;
build.test(regOp(inst.a), regOp(inst.a));
build.jcc(ConditionX64::Equal, zero);
build.bsr(inst.regX64, regOp(inst.a));
build.xor_(inst.regX64, 0x1f);
build.jmp(exit);
build.setLabel(zero);
build.mov(inst.regX64, 32);
build.setLabel(exit);
break;
}
case IrCmd::BITCOUNTRZ_UINT:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
Label zero, exit;
build.test(regOp(inst.a), regOp(inst.a));
build.jcc(ConditionX64::Equal, zero);
build.bsf(inst.regX64, regOp(inst.a));
build.jmp(exit);
build.setLabel(zero);
build.mov(inst.regX64, 32);
build.setLabel(exit);
break;
}
case IrCmd::BYTESWAP_UINT:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a});
if (inst.a.kind != IrOpKind::Inst || inst.regX64 != regOp(inst.a))
build.mov(inst.regX64, memRegUintOp(inst.a));
build.bswap(inst.regX64);
break;
}
case IrCmd::INVOKE_LIBM:
{
IrCallWrapperX64 callWrap(regs, build, index);
callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(inst.b), inst.b);
if (inst.c.kind != IrOpKind::None)
{
bool isInt = (inst.c.kind == IrOpKind::Constant) ? constOp(inst.c).kind == IrConstKind::Int
: getCmdValueKind(function.instOp(inst.c).cmd) == IrValueKind::Int;
if (isInt)
callWrap.addArgument(SizeX64::dword, memRegUintOp(inst.c), inst.c);
else
callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(inst.c), inst.c);
}
callWrap.call(qword[rNativeContext + getNativeContextOffset(uintOp(inst.a))]);
inst.regX64 = regs.takeReg(xmm0, index);
break;
}
case IrCmd::GET_TYPE:
{
inst.regX64 = regs.allocReg(SizeX64::qword, index);
build.mov(inst.regX64, qword[rState + offsetof(lua_State, global)]);
if (inst.a.kind == IrOpKind::Inst)
build.mov(inst.regX64, qword[inst.regX64 + qwordReg(regOp(inst.a)) * sizeof(TString*) + offsetof(global_State, ttname)]);
else if (inst.a.kind == IrOpKind::Constant)
build.mov(inst.regX64, qword[inst.regX64 + tagOp(inst.a) * sizeof(TString*) + offsetof(global_State, ttname)]);
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
}
case IrCmd::GET_TYPEOF:
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.a)));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]);
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::FINDUPVAL:
{
IrCallWrapperX64 callWrap(regs, build);
callWrap.addArgument(SizeX64::qword, rState);
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(inst.a)));
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_findupval)]);
inst.regX64 = regs.takeReg(rax, index);
break;
}
case IrCmd::BUFFER_READI8:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
build.movsx(inst.regX64, byte[bufferAddrOp(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c))]);
break;
case IrCmd::BUFFER_READU8:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
build.movzx(inst.regX64, byte[bufferAddrOp(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c))]);
break;
case IrCmd::BUFFER_WRITEI8:
{
OperandX64 value = inst.c.kind == IrOpKind::Inst ? byteReg(regOp(inst.c)) : OperandX64(int8_t(intOp(inst.c)));
build.mov(byte[bufferAddrOp(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d))], value);
break;
}
case IrCmd::BUFFER_READI16:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
build.movsx(inst.regX64, word[bufferAddrOp(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c))]);
break;
case IrCmd::BUFFER_READU16:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
build.movzx(inst.regX64, word[bufferAddrOp(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c))]);
break;
case IrCmd::BUFFER_WRITEI16:
{
OperandX64 value = inst.c.kind == IrOpKind::Inst ? wordReg(regOp(inst.c)) : OperandX64(int16_t(intOp(inst.c)));
build.mov(word[bufferAddrOp(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d))], value);
break;
}
case IrCmd::BUFFER_READI32:
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {inst.a, inst.b});
build.mov(inst.regX64, dword[bufferAddrOp(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c))]);
break;
case IrCmd::BUFFER_WRITEI32:
{
OperandX64 value = inst.c.kind == IrOpKind::Inst ? regOp(inst.c) : OperandX64(intOp(inst.c));
build.mov(dword[bufferAddrOp(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d))], value);
break;
}
case IrCmd::BUFFER_READF32:
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
build.vcvtss2sd(inst.regX64, inst.regX64, dword[bufferAddrOp(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c))]);
break;
case IrCmd::BUFFER_WRITEF32:
storeDoubleAsFloat(dword[bufferAddrOp(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d))], inst.c);
break;
case IrCmd::BUFFER_READF64:
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
build.vmovsd(inst.regX64, qword[bufferAddrOp(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c))]);
break;
case IrCmd::BUFFER_WRITEF64:
if (inst.c.kind == IrOpKind::Constant)
{
ScopedRegX64 tmp{regs, SizeX64::xmmword};
build.vmovsd(tmp.reg, build.f64(doubleOp(inst.c)));
build.vmovsd(qword[bufferAddrOp(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d))], tmp.reg);
}
else if (inst.c.kind == IrOpKind::Inst)
{
build.vmovsd(qword[bufferAddrOp(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d))], regOp(inst.c));
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
// Pseudo instructions
case IrCmd::NOP:
case IrCmd::SUBSTITUTE:
CODEGEN_ASSERT(!"Pseudo instructions should not be lowered");
break;
}
valueTracker.afterInstLowering(inst, index);
regs.freeLastUseRegs(inst, index);
}
void IrLoweringX64::finishBlock(const IrBlock& curr, const IrBlock& next)
{
if (!regs.spills.empty())
{
// If we have spills remaining, we have to immediately lower the successor block
for (uint32_t predIdx : predecessors(function.cfg, function.getBlockIndex(next)))
CODEGEN_ASSERT(predIdx == function.getBlockIndex(curr) || function.blocks[predIdx].kind == IrBlockKind::Dead);
// And the next block cannot be a join block in cfg
CODEGEN_ASSERT(next.useCount == 1);
}
}
void IrLoweringX64::finishFunction()
{
if (build.logText)
build.logAppend("; interrupt handlers\n");
for (InterruptHandler& handler : interruptHandlers)
{
build.setLabel(handler.self);
build.mov(eax, handler.pcpos + 1);
build.lea(rbx, handler.next);
build.jmp(helpers.interrupt);
}
if (build.logText)
build.logAppend("; exit handlers\n");
for (ExitHandler& handler : exitHandlers)
{
CODEGEN_ASSERT(handler.pcpos != kVmExitEntryGuardPc);
build.setLabel(handler.self);
build.mov(edx, handler.pcpos * sizeof(Instruction));
build.jmp(helpers.updatePcAndContinueInVm);
}
if (stats)
{
if (regs.maxUsedSlot > kSpillSlots)
stats->regAllocErrors++;
if (regs.maxUsedSlot > stats->maxSpillSlotsUsed)
stats->maxSpillSlotsUsed = regs.maxUsedSlot;
}
}
bool IrLoweringX64::hasError() const
{
// If register allocator had to use more stack slots than we have available, this function can't run natively
if (regs.maxUsedSlot > kSpillSlots)
return true;
return false;
}
bool IrLoweringX64::isFallthroughBlock(const IrBlock& target, const IrBlock& next)
{
return target.start == next.start;
}
Label& IrLoweringX64::getTargetLabel(IrOp op, Label& fresh)
{
if (op.kind == IrOpKind::Undef)
return fresh;
if (op.kind == IrOpKind::VmExit)
{
// Special exit case that doesn't have to update pcpos
if (vmExitOp(op) == kVmExitEntryGuardPc)
return helpers.exitContinueVmClearNativeFlag;
if (uint32_t* index = exitHandlerMap.find(vmExitOp(op)))
return exitHandlers[*index].self;
return fresh;
}
return labelOp(op);
}
void IrLoweringX64::finalizeTargetLabel(IrOp op, Label& fresh)
{
if (op.kind == IrOpKind::VmExit && fresh.id != 0 && fresh.id != helpers.exitContinueVmClearNativeFlag.id)
{
exitHandlerMap[vmExitOp(op)] = uint32_t(exitHandlers.size());
exitHandlers.push_back({fresh, vmExitOp(op)});
}
}
void IrLoweringX64::jumpOrFallthrough(IrBlock& target, const IrBlock& next)
{
if (!isFallthroughBlock(target, next))
build.jmp(target.label);
}
void IrLoweringX64::jumpOrAbortOnUndef(ConditionX64 cond, IrOp target, const IrBlock& next)
{
Label fresh;
Label& label = getTargetLabel(target, fresh);
if (target.kind == IrOpKind::Undef)
{
if (cond == ConditionX64::Count)
{
build.ud2(); // Unconditional jump to abort is just an abort
}
else
{
build.jcc(getReverseCondition(cond), label);
build.ud2();
build.setLabel(label);
}
}
else if (cond == ConditionX64::Count)
{
// Unconditional jump can be skipped if it's a fallthrough
if (target.kind == IrOpKind::VmExit || !isFallthroughBlock(blockOp(target), next))
build.jmp(label);
}
else
{
build.jcc(cond, label);
}
finalizeTargetLabel(target, fresh);
}
void IrLoweringX64::jumpOrAbortOnUndef(IrOp target, const IrBlock& next)
{
jumpOrAbortOnUndef(ConditionX64::Count, target, next);
}
OperandX64 IrLoweringX64::memRegDoubleOp(IrOp op)
{
switch (op.kind)
{
case IrOpKind::Inst:
return regOp(op);
case IrOpKind::Constant:
return build.f64(doubleOp(op));
case IrOpKind::VmReg:
return luauRegValue(vmRegOp(op));
case IrOpKind::VmConst:
return luauConstantValue(vmConstOp(op));
default:
CODEGEN_ASSERT(!"Unsupported operand kind");
}
return noreg;
}
OperandX64 IrLoweringX64::memRegUintOp(IrOp op)
{
switch (op.kind)
{
case IrOpKind::Inst:
return regOp(op);
case IrOpKind::Constant:
return OperandX64(unsigned(intOp(op)));
case IrOpKind::VmReg:
return luauRegValueInt(vmRegOp(op));
default:
CODEGEN_ASSERT(!"Unsupported operand kind");
}
return noreg;
}
OperandX64 IrLoweringX64::memRegTagOp(IrOp op)
{
switch (op.kind)
{
case IrOpKind::Inst:
return regOp(op);
case IrOpKind::VmReg:
return luauRegTag(vmRegOp(op));
case IrOpKind::VmConst:
return luauConstantTag(vmConstOp(op));
default:
CODEGEN_ASSERT(!"Unsupported operand kind");
}
return noreg;
}
RegisterX64 IrLoweringX64::regOp(IrOp op)
{
IrInst& inst = function.instOp(op);
if (inst.spilled || inst.needsReload)
regs.restore(inst, false);
CODEGEN_ASSERT(inst.regX64 != noreg);
return inst.regX64;
}
OperandX64 IrLoweringX64::bufferAddrOp(IrOp bufferOp, IrOp indexOp, uint8_t tag)
{
CODEGEN_ASSERT(tag == LUA_TUSERDATA || tag == LUA_TBUFFER);
int dataOffset = tag == LUA_TBUFFER ? offsetof(Buffer, data) : offsetof(Udata, data);
if (indexOp.kind == IrOpKind::Inst)
return regOp(bufferOp) + qwordReg(regOp(indexOp)) + dataOffset;
else if (indexOp.kind == IrOpKind::Constant)
return regOp(bufferOp) + intOp(indexOp) + dataOffset;
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
RegisterX64 IrLoweringX64::vecOp(IrOp op, ScopedRegX64& tmp)
{
IrInst source = function.instOp(op);
CODEGEN_ASSERT(source.cmd != IrCmd::SUBSTITUTE); // we don't process substitutions
// source that comes from memory or from tag instruction has .w = TVECTOR, which is denormal
// to avoid performance degradation on some CPUs we mask this component to produce zero
// otherwise we conservatively assume the vector is a result of a well formed math op so .w is a normal number or zero
if (source.cmd != IrCmd::LOAD_TVALUE && source.cmd != IrCmd::TAG_VECTOR)
return regOp(op);
tmp.alloc(SizeX64::xmmword);
build.vandps(tmp.reg, regOp(op), vectorAndMaskOp());
return tmp.reg;
}
IrConst IrLoweringX64::constOp(IrOp op) const
{
return function.constOp(op);
}
uint8_t IrLoweringX64::tagOp(IrOp op) const
{
return function.tagOp(op);
}
int IrLoweringX64::intOp(IrOp op) const
{
return function.intOp(op);
}
unsigned IrLoweringX64::uintOp(IrOp op) const
{
return function.uintOp(op);
}
double IrLoweringX64::doubleOp(IrOp op) const
{
return function.doubleOp(op);
}
IrBlock& IrLoweringX64::blockOp(IrOp op) const
{
return function.blockOp(op);
}
Label& IrLoweringX64::labelOp(IrOp op) const
{
return blockOp(op).label;
}
OperandX64 IrLoweringX64::vectorAndMaskOp()
{
if (vectorAndMask.base == noreg)
vectorAndMask = build.u32x4(~0u, ~0u, ~0u, 0);
return vectorAndMask;
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau