luau/CodeGen/src/IrLoweringA64.cpp
Arseny Kapoulkine e6bf71871a
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512)
Instead of doing the dot product related math in scalar IR, we lift the
computation into a dedicated IR instruction.

On x64, we can use VDPPS which was more or less tailor made for this
purpose. This is better than manual scalar lowering that requires
reloading components from memory; it's not always a strict improvement
over the shuffle+add version (which we never had), but this can now be
adjusted in the IR lowering in an optimal fashion (maybe even based on
CPU vendor, although that'd create issues for offline compilation).

On A64, we can either use naive adds or paired adds, as there is no
dedicated vector-wide horizontal instruction until SVE. Both run at
about the same performance on M2, but paired adds require fewer
instructions and temporaries.

I've measured this using mesh-normal-vector benchmark, changing the
benchmark to just report the time of the second loop inside
`calculate_normals`, testing master vs #1504 vs this PR, also increasing
the grid size to 400 for more stable timings.

On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I
see neutral to negative results in #1504.
On M2 (base), this PR is ~28% faster vs master, while #1504 is only
about ~10% faster.

If I measure the second loop in `calculate_tangent_space` instead, I
get:

On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3%
faster
On M2 (base), this PR is ~24% faster vs master, while #1504 is only
about ~13% faster.

Note that the loops in question are not quite optimal, as they store and
reload various vectors to dictionary values due to inappropriate use of
locals. The underlying gains in individual functions are thus larger
than the numbers above; for example, changing the `calculate_normals`
loop to use a local variable to store the normalized vector (but still
saving the result to dictionary value), I get a ~24% performance
increase from this PR on Zen4 vs master instead of just 8% (#1504 is
~15% slower in this setup).
2024-11-08 16:23:09 -08:00

2719 lines
91 KiB
C++

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrLoweringA64.h"
#include "Luau/DenseHash.h"
#include "Luau/IrData.h"
#include "Luau/IrUtils.h"
#include "EmitCommonA64.h"
#include "NativeState.h"
#include "lstate.h"
#include "lgc.h"
namespace Luau
{
namespace CodeGen
{
namespace A64
{
inline ConditionA64 getConditionFP(IrCondition cond)
{
switch (cond)
{
case IrCondition::Equal:
return ConditionA64::Equal;
case IrCondition::NotEqual:
return ConditionA64::NotEqual;
case IrCondition::Less:
return ConditionA64::Minus;
case IrCondition::NotLess:
return ConditionA64::Plus;
case IrCondition::LessEqual:
return ConditionA64::UnsignedLessEqual;
case IrCondition::NotLessEqual:
return ConditionA64::UnsignedGreater;
case IrCondition::Greater:
return ConditionA64::Greater;
case IrCondition::NotGreater:
return ConditionA64::LessEqual;
case IrCondition::GreaterEqual:
return ConditionA64::GreaterEqual;
case IrCondition::NotGreaterEqual:
return ConditionA64::Less;
default:
CODEGEN_ASSERT(!"Unexpected condition code");
return ConditionA64::Always;
}
}
inline ConditionA64 getConditionInt(IrCondition cond)
{
switch (cond)
{
case IrCondition::Equal:
return ConditionA64::Equal;
case IrCondition::NotEqual:
return ConditionA64::NotEqual;
case IrCondition::Less:
return ConditionA64::Minus;
case IrCondition::NotLess:
return ConditionA64::Plus;
case IrCondition::LessEqual:
return ConditionA64::LessEqual;
case IrCondition::NotLessEqual:
return ConditionA64::Greater;
case IrCondition::Greater:
return ConditionA64::Greater;
case IrCondition::NotGreater:
return ConditionA64::LessEqual;
case IrCondition::GreaterEqual:
return ConditionA64::GreaterEqual;
case IrCondition::NotGreaterEqual:
return ConditionA64::Less;
case IrCondition::UnsignedLess:
return ConditionA64::CarryClear;
case IrCondition::UnsignedLessEqual:
return ConditionA64::UnsignedLessEqual;
case IrCondition::UnsignedGreater:
return ConditionA64::UnsignedGreater;
case IrCondition::UnsignedGreaterEqual:
return ConditionA64::CarrySet;
default:
CODEGEN_ASSERT(!"Unexpected condition code");
return ConditionA64::Always;
}
}
static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset)
{
CODEGEN_ASSERT(dst != src);
CODEGEN_ASSERT(offset <= INT_MAX);
if (offset <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(dst, src, uint16_t(offset));
}
else
{
build.mov(dst, int(offset));
build.add(dst, dst, src);
}
}
static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp, IrOp ra, int ratag, Label& skip)
{
RegisterA64 tempw = castReg(KindA64::w, temp);
AddressA64 addr = temp;
// iscollectable(ra)
if (ratag == -1 || !isGCO(ratag))
{
if (ra.kind == IrOpKind::VmReg)
addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, tt));
else if (ra.kind == IrOpKind::VmConst)
emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, tt));
build.ldr(tempw, addr);
build.cmp(tempw, LUA_TSTRING);
build.b(ConditionA64::Less, skip);
}
// isblack(obj2gco(o))
build.ldrb(tempw, mem(object, offsetof(GCheader, marked)));
build.tbz(tempw, BLACKBIT, skip);
// iswhite(gcvalue(ra))
if (ra.kind == IrOpKind::VmReg)
addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, value));
else if (ra.kind == IrOpKind::VmConst)
emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, value));
build.ldr(temp, addr);
build.ldrb(tempw, mem(temp, offsetof(GCheader, marked)));
build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT));
build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
}
static void emitAbort(AssemblyBuilderA64& build, Label& abort)
{
Label skip;
build.b(skip);
build.setLabel(abort);
build.udf();
build.setLabel(skip);
}
static void emitFallback(AssemblyBuilderA64& build, int offset, int pcpos)
{
// fallback(L, instruction, base, k)
build.mov(x0, rState);
emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction));
build.mov(x2, rBase);
build.mov(x3, rConstants);
build.ldr(x4, mem(rNativeContext, offset));
build.blr(x4);
emitUpdateBase(build);
}
static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg)
{
CODEGEN_ASSERT(kTempSlots >= 1);
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
build.add(x0, sp, sTemporary.data); // sp-relative offset
build.ldr(x1, mem(rNativeContext, uint32_t(func)));
build.blr(x1);
}
static bool emitBuiltin(AssemblyBuilderA64& build, IrFunction& function, IrRegAllocA64& regs, int bfid, int res, int arg, int nresults)
{
switch (bfid)
{
case LBF_MATH_FREXP:
{
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg);
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, LUA_TNUMBER);
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
if (nresults == 2)
{
build.ldr(w0, sTemporary);
build.scvtf(d1, w0);
build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
}
return true;
}
case LBF_MATH_MODF:
{
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg);
build.ldr(d1, sTemporary);
build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, LUA_TNUMBER);
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
if (nresults == 2)
{
build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
}
return true;
}
default:
CODEGEN_ASSERT(!"Missing A64 lowering");
return false;
}
}
static uint64_t getDoubleBits(double value)
{
uint64_t result;
static_assert(sizeof(result) == sizeof(value), "Expecting double to be 64-bit");
memcpy(&result, &value, sizeof(value));
return result;
}
IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, IrFunction& function, LoweringStats* stats)
: build(build)
, helpers(helpers)
, function(function)
, stats(stats)
, regs(function, stats, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}})
, valueTracker(function)
, exitHandlerMap(~0u)
{
valueTracker.setRestoreCallack(
this,
[](void* context, IrInst& inst)
{
IrLoweringA64* self = static_cast<IrLoweringA64*>(context);
self->regs.restoreReg(self->build, inst);
}
);
}
void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
{
valueTracker.beforeInstLowering(inst);
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_POINTER:
{
inst.regA64 = regs.allocReg(KindA64::x, index);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.gc));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_DOUBLE:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.n));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_INT:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_FLOAT:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register
AddressA64 addr = tempAddr(inst.a, intOp(inst.b));
build.ldr(temp, addr);
build.fcvt(inst.regA64, temp);
break;
}
case IrCmd::LOAD_TVALUE:
{
inst.regA64 = regs.allocReg(KindA64::q, index);
int addrOffset = inst.b.kind != IrOpKind::None ? intOp(inst.b) : 0;
AddressA64 addr = tempAddr(inst.a, addrOffset);
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_ENV:
inst.regA64 = regs.allocReg(KindA64::x, index);
build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env)));
break;
case IrCmd::GET_ARR_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, array)));
if (inst.b.kind == IrOpKind::Inst)
{
build.add(inst.regA64, inst.regA64, regOp(inst.b), kTValueSizeLog2); // implicit uxtw
}
else if (inst.b.kind == IrOpKind::Constant)
{
if (intOp(inst.b) == 0)
{
// no offset required
}
else if (intOp(inst.b) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) * sizeof(TValue)));
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.mov(temp, intOp(inst.b) * sizeof(TValue));
build.add(inst.regA64, inst.regA64, temp);
}
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
}
case IrCmd::GET_SLOT_NODE_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp1w = castReg(KindA64::w, temp1);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp2x = castReg(KindA64::x, temp2);
// note: since the stride of the load is the same as the destination register size, we can range check the array index, not the byte offset
if (uintOp(inst.b) <= AddressA64::kMaxOffset)
build.ldr(temp1w, mem(rCode, uintOp(inst.b) * sizeof(Instruction)));
else
{
build.mov(temp1, uintOp(inst.b) * sizeof(Instruction));
build.ldr(temp1w, mem(rCode, temp1));
}
// C field can be shifted as long as it's at the most significant byte of the instruction word
CODEGEN_ASSERT(kOffsetOfInstructionC == 3);
build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, nodemask8)));
build.and_(temp2, temp2, temp1w, -24);
// note: this may clobber inst.a, so it's important that we don't use it after this
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
break;
}
case IrCmd::GET_HASH_NODE_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
RegisterA64 temp1 = regs.allocTemp(KindA64::w);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp2x = castReg(KindA64::x, temp2);
// hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode)
build.mov(temp1, -1);
build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, lsizenode)));
build.lsl(temp1, temp1, temp2);
build.mov(temp2, uintOp(inst.b));
build.bic(temp2, temp2, temp1);
// note: this may clobber inst.a, so it's important that we don't use it after this
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
break;
}
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
RegisterA64 cl = inst.a.kind == IrOpKind::Undef ? rClosure : regOp(inst.a);
build.add(inst.regA64, cl, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b)));
break;
}
case IrCmd::STORE_TAG:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
if (tagOp(inst.b) == 0)
{
build.str(wzr, addr);
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, tagOp(inst.b));
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_POINTER:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
if (inst.b.kind == IrOpKind::Constant)
{
CODEGEN_ASSERT(intOp(inst.b) == 0);
build.str(xzr, addr);
}
else
{
build.str(regOp(inst.b), addr);
}
break;
}
case IrCmd::STORE_EXTRA:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, extra));
if (intOp(inst.b) == 0)
{
build.str(wzr, addr);
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, intOp(inst.b));
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_DOUBLE:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
if (inst.b.kind == IrOpKind::Constant && getDoubleBits(doubleOp(inst.b)) == 0)
{
build.str(xzr, addr);
}
else
{
RegisterA64 temp = tempDouble(inst.b);
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_INT:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
if (inst.b.kind == IrOpKind::Constant && intOp(inst.b) == 0)
{
build.str(wzr, addr);
}
else
{
RegisterA64 temp = tempInt(inst.b);
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_VECTOR:
{
RegisterA64 temp1 = tempDouble(inst.b);
RegisterA64 temp2 = tempDouble(inst.c);
RegisterA64 temp3 = tempDouble(inst.d);
RegisterA64 temp4 = regs.allocTemp(KindA64::s);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
CODEGEN_ASSERT(addr.kind == AddressKindA64::imm && addr.data % 4 == 0 && unsigned(addr.data + 8) / 4 <= AddressA64::kMaxOffset);
build.fcvt(temp4, temp1);
build.str(temp4, AddressA64(addr.base, addr.data + 0));
build.fcvt(temp4, temp2);
build.str(temp4, AddressA64(addr.base, addr.data + 4));
build.fcvt(temp4, temp3);
build.str(temp4, AddressA64(addr.base, addr.data + 8));
break;
}
case IrCmd::STORE_TVALUE:
{
int addrOffset = inst.c.kind != IrOpKind::None ? intOp(inst.c) : 0;
AddressA64 addr = tempAddr(inst.a, addrOffset);
build.str(regOp(inst.b), addr);
break;
}
case IrCmd::STORE_SPLIT_TVALUE:
{
int addrOffset = inst.d.kind != IrOpKind::None ? intOp(inst.d) : 0;
RegisterA64 tempt = regs.allocTemp(KindA64::w);
AddressA64 addrt = tempAddr(inst.a, offsetof(TValue, tt) + addrOffset);
build.mov(tempt, tagOp(inst.b));
build.str(tempt, addrt);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value) + addrOffset);
if (tagOp(inst.b) == LUA_TBOOLEAN)
{
if (inst.c.kind == IrOpKind::Constant)
{
// note: we reuse tag temp register as value for true booleans, and use built-in zero register for false values
CODEGEN_ASSERT(LUA_TBOOLEAN == 1);
build.str(intOp(inst.c) ? tempt : wzr, addr);
}
else
build.str(regOp(inst.c), addr);
}
else if (tagOp(inst.b) == LUA_TNUMBER)
{
RegisterA64 temp = tempDouble(inst.c);
build.str(temp, addr);
}
else if (isGCO(tagOp(inst.b)))
{
build.str(regOp(inst.c), addr);
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
break;
}
case IrCmd::ADD_INT:
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
build.add(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
else if (inst.a.kind == IrOpKind::Constant && unsigned(intOp(inst.a)) <= AssemblyBuilderA64::kMaxImmediate)
build.add(inst.regA64, regOp(inst.b), uint16_t(intOp(inst.a)));
else
{
RegisterA64 temp1 = tempInt(inst.a);
RegisterA64 temp2 = tempInt(inst.b);
build.add(inst.regA64, temp1, temp2);
}
break;
case IrCmd::SUB_INT:
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
build.sub(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempInt(inst.a);
RegisterA64 temp2 = tempInt(inst.b);
build.sub(inst.regA64, temp1, temp2);
}
break;
case IrCmd::ADD_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fadd(inst.regA64, temp1, temp2);
break;
}
case IrCmd::SUB_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fsub(inst.regA64, temp1, temp2);
break;
}
case IrCmd::MUL_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fmul(inst.regA64, temp1, temp2);
break;
}
case IrCmd::DIV_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fdiv(inst.regA64, temp1, temp2);
break;
}
case IrCmd::IDIV_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fdiv(inst.regA64, temp1, temp2);
build.frintm(inst.regA64, inst.regA64);
break;
}
case IrCmd::MOD_NUM:
{
inst.regA64 = regs.allocReg(KindA64::d, index); // can't allocReuse because both A and B are used twice
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fdiv(inst.regA64, temp1, temp2);
build.frintm(inst.regA64, inst.regA64);
build.fmul(inst.regA64, inst.regA64, temp2);
build.fsub(inst.regA64, temp1, inst.regA64);
break;
}
case IrCmd::MIN_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fcmp(temp1, temp2);
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Less));
break;
}
case IrCmd::MAX_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fcmp(temp1, temp2);
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Greater));
break;
}
case IrCmd::UNM_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.fneg(inst.regA64, temp);
break;
}
case IrCmd::FLOOR_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.frintm(inst.regA64, temp);
break;
}
case IrCmd::CEIL_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.frintp(inst.regA64, temp);
break;
}
case IrCmd::ROUND_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.frinta(inst.regA64, temp);
break;
}
case IrCmd::SQRT_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.fsqrt(inst.regA64, temp);
break;
}
case IrCmd::ABS_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.fabs(inst.regA64, temp);
break;
}
case IrCmd::SIGN_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
RegisterA64 temp0 = regs.allocTemp(KindA64::d);
RegisterA64 temp1 = regs.allocTemp(KindA64::d);
build.fcmpz(temp);
build.fmov(temp0, 0.0);
build.fmov(temp1, 1.0);
build.fcsel(inst.regA64, temp1, temp0, getConditionFP(IrCondition::Greater));
build.fmov(temp1, -1.0);
build.fcsel(inst.regA64, temp1, inst.regA64, getConditionFP(IrCondition::Less));
break;
}
case IrCmd::ADD_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fadd(inst.regA64, regOp(inst.a), regOp(inst.b));
break;
}
case IrCmd::SUB_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fsub(inst.regA64, regOp(inst.a), regOp(inst.b));
break;
}
case IrCmd::MUL_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fmul(inst.regA64, regOp(inst.a), regOp(inst.b));
break;
}
case IrCmd::DIV_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fdiv(inst.regA64, regOp(inst.a), regOp(inst.b));
break;
}
case IrCmd::UNM_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a});
build.fneg(inst.regA64, regOp(inst.a));
break;
}
case IrCmd::DOT_VEC:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = regs.allocTemp(KindA64::q);
RegisterA64 temps = castReg(KindA64::s, temp);
RegisterA64 regs = castReg(KindA64::s, inst.regA64);
build.fmul(temp, regOp(inst.a), regOp(inst.b));
build.faddp(regs, temps); // x+y
build.dup_4s(temp, temp, 2);
build.fadd(regs, regs, temps); // +z
build.fcvt(inst.regA64, regs);
break;
}
case IrCmd::NOT_ANY:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
// other cases should've been constant folded
CODEGEN_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN);
build.eor(inst.regA64, regOp(inst.b), 1);
}
else
{
Label notbool, exit;
// use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once
CODEGEN_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1);
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, notbool);
if (inst.b.kind == IrOpKind::Constant)
build.mov(inst.regA64, intOp(inst.b) == 0 ? 1 : 0);
else
build.eor(inst.regA64, regOp(inst.b), 1); // boolean => invert value
build.b(exit);
// not boolean => result is true iff tag was nil
build.setLabel(notbool);
build.cset(inst.regA64, ConditionA64::Less);
build.setLabel(exit);
}
break;
}
case IrCmd::CMP_ANY:
{
IrCondition cond = conditionOp(inst.c);
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (cond == IrCondition::LessEqual)
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal)));
else if (cond == IrCondition::Less)
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan)));
else if (cond == IrCondition::Equal)
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval)));
else
CODEGEN_ASSERT(!"Unsupported condition");
build.blr(x3);
emitUpdateBase(build);
inst.regA64 = regs.takeReg(w0, index);
break;
}
case IrCmd::JUMP:
if (inst.a.kind == IrOpKind::Undef || inst.a.kind == IrOpKind::VmExit)
{
Label fresh;
build.b(getTargetLabel(inst.a, fresh));
finalizeTargetLabel(inst.a, fresh);
}
else
{
jumpOrFallthrough(blockOp(inst.a), next);
}
break;
case IrCmd::JUMP_IF_TRUTHY:
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
// nil => falsy
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(temp, labelOp(inst.c));
// not boolean => truthy
build.cmp(temp, LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, labelOp(inst.b));
// compare boolean value
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
build.cbnz(temp, labelOp(inst.b));
jumpOrFallthrough(blockOp(inst.c), next);
break;
}
case IrCmd::JUMP_IF_FALSY:
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
// nil => falsy
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(temp, labelOp(inst.b));
// not boolean => truthy
build.cmp(temp, LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, labelOp(inst.c));
// compare boolean value
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
build.cbz(temp, labelOp(inst.b));
jumpOrFallthrough(blockOp(inst.c), next);
break;
}
case IrCmd::JUMP_EQ_TAG:
{
RegisterA64 zr = noreg;
if (inst.a.kind == IrOpKind::Constant && tagOp(inst.a) == 0)
zr = regOp(inst.b);
else if (inst.b.kind == IrOpKind::Constant && tagOp(inst.b) == 0)
zr = regOp(inst.a);
else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.cmp(regOp(inst.a), tagOp(inst.b));
else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Inst)
build.cmp(regOp(inst.a), regOp(inst.b));
else if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Inst)
build.cmp(regOp(inst.b), tagOp(inst.a));
else
CODEGEN_ASSERT(!"Unsupported instruction form");
if (isFallthroughBlock(blockOp(inst.d), next))
{
if (zr != noreg)
build.cbz(zr, labelOp(inst.c));
else
build.b(ConditionA64::Equal, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
}
else
{
if (zr != noreg)
build.cbnz(zr, labelOp(inst.d));
else
build.b(ConditionA64::NotEqual, labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.c), next);
}
break;
}
case IrCmd::JUMP_CMP_INT:
{
IrCondition cond = conditionOp(inst.c);
if (cond == IrCondition::Equal && intOp(inst.b) == 0)
{
build.cbz(regOp(inst.a), labelOp(inst.d));
}
else if (cond == IrCondition::NotEqual && intOp(inst.b) == 0)
{
build.cbnz(regOp(inst.a), labelOp(inst.d));
}
else
{
CODEGEN_ASSERT(unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate);
build.cmp(regOp(inst.a), uint16_t(intOp(inst.b)));
build.b(getConditionInt(cond), labelOp(inst.d));
}
jumpOrFallthrough(blockOp(inst.e), next);
break;
}
case IrCmd::JUMP_EQ_POINTER:
build.cmp(regOp(inst.a), regOp(inst.b));
build.b(ConditionA64::Equal, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::JUMP_CMP_NUM:
{
IrCondition cond = conditionOp(inst.c);
if (inst.b.kind == IrOpKind::Constant && doubleOp(inst.b) == 0.0)
{
RegisterA64 temp = tempDouble(inst.a);
build.fcmpz(temp);
}
else
{
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fcmp(temp1, temp2);
}
build.b(getConditionFP(cond), labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.e), next);
break;
}
case IrCmd::JUMP_FORN_LOOP_COND:
{
RegisterA64 index = tempDouble(inst.a);
RegisterA64 limit = tempDouble(inst.b);
RegisterA64 step = tempDouble(inst.c);
Label direct;
// step > 0
build.fcmpz(step);
build.b(getConditionFP(IrCondition::Greater), direct);
// !(limit <= index)
build.fcmp(limit, index);
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e));
build.b(labelOp(inst.d));
// !(index <= limit)
build.setLabel(direct);
build.fcmp(index, limit);
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e));
jumpOrFallthrough(blockOp(inst.d), next);
break;
}
// IrCmd::JUMP_SLOT_MATCH implemented below
case IrCmd::TABLE_LEN:
{
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
regs.spill(build, index, {reg});
build.mov(x0, reg);
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, luaH_getn)));
build.blr(x1);
inst.regA64 = regs.takeReg(w0, index);
break;
}
case IrCmd::STRING_LEN:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(TString, len)));
break;
}
case IrCmd::TABLE_SETNUM:
{
// note: we need to call regOp before spill so that we don't do redundant reloads
RegisterA64 table = regOp(inst.a);
RegisterA64 key = regOp(inst.b);
RegisterA64 temp = regs.allocTemp(KindA64::w);
regs.spill(build, index, {table, key});
if (w1 != key)
{
build.mov(x1, table);
build.mov(w2, key);
}
else
{
build.mov(temp, w1);
build.mov(x1, table);
build.mov(w2, temp);
}
build.mov(x0, rState);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_setnum)));
build.blr(x3);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::NEW_TABLE:
{
regs.spill(build, index);
build.mov(x0, rState);
build.mov(x1, uintOp(inst.a));
build.mov(x2, uintOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_new)));
build.blr(x3);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::DUP_TABLE:
{
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaH_clone)));
build.blr(x2);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::TRY_NUM_TO_INDEX:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
RegisterA64 temp1 = tempDouble(inst.a);
if (build.features & Feature_JSCVT)
{
build.fjcvtzs(inst.regA64, temp1); // fjcvtzs sets PSTATE.Z (equal) iff conversion is exact
build.b(ConditionA64::NotEqual, labelOp(inst.b));
}
else
{
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
build.fcvtzs(inst.regA64, temp1);
build.scvtf(temp2, inst.regA64);
build.fcmp(temp1, temp2);
build.b(ConditionA64::NotEqual, labelOp(inst.b));
}
break;
}
case IrCmd::TRY_CALL_FASTGETTM:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.ldr(temp1, mem(regOp(inst.a), offsetof(Table, metatable)));
build.cbz(temp1, labelOp(inst.c)); // no metatable
build.ldrb(temp2, mem(temp1, offsetof(Table, tmcache)));
build.tst(temp2, 1 << intOp(inst.b)); // can't use tbz/tbnz because their jump offsets are too short
build.b(ConditionA64::NotEqual, labelOp(inst.c)); // Equal = Zero after tst; tmcache caches *absence* of metamethods
regs.spill(build, index, {temp1});
build.mov(x0, temp1);
build.mov(w1, intOp(inst.b));
build.ldr(x2, mem(rGlobalState, offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm)));
build.blr(x3);
build.cbz(x0, labelOp(inst.c)); // no tag method
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::NEW_USERDATA:
{
regs.spill(build, index);
build.mov(x0, rState);
build.mov(x1, intOp(inst.a));
build.mov(x2, intOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, newUserdata)));
build.blr(x3);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::INT_TO_NUM:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = tempInt(inst.a);
build.scvtf(inst.regA64, temp);
break;
}
case IrCmd::UINT_TO_NUM:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = tempInt(inst.a);
build.ucvtf(inst.regA64, temp);
break;
}
case IrCmd::NUM_TO_INT:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
RegisterA64 temp = tempDouble(inst.a);
build.fcvtzs(inst.regA64, temp);
break;
}
case IrCmd::NUM_TO_UINT:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
RegisterA64 temp = tempDouble(inst.a);
// note: we don't use fcvtzu for consistency with C++ code
build.fcvtzs(castReg(KindA64::x, inst.regA64), temp);
break;
}
case IrCmd::NUM_TO_VEC:
{
inst.regA64 = regs.allocReg(KindA64::q, index);
if (inst.a.kind == IrOpKind::Constant)
{
float value = float(doubleOp(inst.a));
uint32_t asU32;
static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit");
memcpy(&asU32, &value, sizeof(value));
if (AssemblyBuilderA64::isFmovSupported(value))
{
build.fmov(inst.regA64, value);
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
uint32_t vec[4] = {asU32, asU32, asU32, 0};
build.adr(temp, vec, sizeof(vec));
build.ldr(inst.regA64, temp);
}
}
else
{
RegisterA64 tempd = tempDouble(inst.a);
RegisterA64 temps = regs.allocTemp(KindA64::s);
build.fcvt(temps, tempd);
build.dup_4s(inst.regA64, castReg(KindA64::q, temps), 0);
}
break;
}
case IrCmd::TAG_VECTOR:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a});
RegisterA64 reg = regOp(inst.a);
RegisterA64 tempw = regs.allocTemp(KindA64::w);
if (inst.regA64 != reg)
build.mov(inst.regA64, reg);
build.mov(tempw, LUA_TVECTOR);
build.ins_4s(inst.regA64, tempw, 3);
break;
}
case IrCmd::ADJUST_STACK_TO_REG:
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
if (inst.b.kind == IrOpKind::Constant)
{
build.add(temp, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
build.str(temp, mem(rState, offsetof(lua_State, top)));
}
else if (inst.b.kind == IrOpKind::Inst)
{
build.add(temp, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.add(temp, temp, regOp(inst.b), kTValueSizeLog2); // implicit uxtw
build.str(temp, mem(rState, offsetof(lua_State, top)));
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
}
case IrCmd::ADJUST_STACK_TO_TOP:
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.ldr(temp, mem(rState, offsetof(lua_State, ci)));
build.ldr(temp, mem(temp, offsetof(CallInfo, top)));
build.str(temp, mem(rState, offsetof(lua_State, top)));
break;
}
case IrCmd::FASTCALL:
regs.spill(build, index);
error |= !emitBuiltin(build, function, regs, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d));
break;
case IrCmd::INVOKE_FASTCALL:
{
// We might need a temporary and we have to preserve it over the spill
RegisterA64 temp = regs.allocTemp(KindA64::q);
regs.spill(build, index, {temp});
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
build.mov(w3, intOp(inst.g)); // nresults
// 'E' argument can only be produced by LOP_FASTCALL3 lowering
if (inst.e.kind != IrOpKind::Undef)
{
CODEGEN_ASSERT(intOp(inst.f) == 3);
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
build.ldr(temp, mem(rBase, vmRegOp(inst.d) * sizeof(TValue)));
build.str(temp, mem(x4, 0));
build.ldr(temp, mem(rBase, vmRegOp(inst.e) * sizeof(TValue)));
build.str(temp, mem(x4, sizeof(TValue)));
}
else
{
if (inst.d.kind == IrOpKind::VmReg)
build.add(x4, rBase, uint16_t(vmRegOp(inst.d) * sizeof(TValue)));
else if (inst.d.kind == IrOpKind::VmConst)
emitAddOffset(build, x4, rConstants, vmConstOp(inst.d) * sizeof(TValue));
else
CODEGEN_ASSERT(inst.d.kind == IrOpKind::Undef);
}
// nparams
if (intOp(inst.f) == LUA_MULTRET)
{
// L->top - (ra + 1)
build.ldr(x5, mem(rState, offsetof(lua_State, top)));
build.sub(x5, x5, rBase);
build.sub(x5, x5, uint16_t((vmRegOp(inst.b) + 1) * sizeof(TValue)));
build.lsr(x5, x5, kTValueSizeLog2);
}
else
build.mov(w5, intOp(inst.f));
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(inst.a) * sizeof(luau_FastFunction)));
build.blr(x6);
inst.regA64 = regs.takeReg(w0, index);
break;
}
case IrCmd::CHECK_FASTCALL_RES:
build.cmp(regOp(inst.a), 0);
build.b(ConditionA64::Less, labelOp(inst.b));
break;
case IrCmd::DO_ARITH:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
if (inst.b.kind == IrOpKind::VmConst)
emitAddOffset(build, x2, rConstants, vmConstOp(inst.b) * sizeof(TValue));
else
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (inst.c.kind == IrOpKind::VmConst)
emitAddOffset(build, x3, rConstants, vmConstOp(inst.c) * sizeof(TValue));
else
build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
switch (TMS(intOp(inst.d)))
{
case TM_ADD:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithadd)));
break;
case TM_SUB:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithsub)));
break;
case TM_MUL:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmul)));
break;
case TM_DIV:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithdiv)));
break;
case TM_IDIV:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithidiv)));
break;
case TM_MOD:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmod)));
break;
case TM_POW:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithpow)));
break;
case TM_UNM:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithunm)));
break;
default:
CODEGEN_ASSERT(!"Invalid doarith helper operation tag");
break;
}
build.blr(x4);
emitUpdateBase(build);
break;
case IrCmd::DO_LEN:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_dolen)));
build.blr(x3);
emitUpdateBase(build);
break;
case IrCmd::GET_TABLE:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (inst.c.kind == IrOpKind::VmReg)
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
else if (inst.c.kind == IrOpKind::Constant)
{
TValue n = {};
setnvalue(&n, uintOp(inst.c));
build.adr(x2, &n, sizeof(n));
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_gettable)));
build.blr(x4);
emitUpdateBase(build);
break;
case IrCmd::SET_TABLE:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (inst.c.kind == IrOpKind::VmReg)
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
else if (inst.c.kind == IrOpKind::Constant)
{
TValue n = {};
setnvalue(&n, uintOp(inst.c));
build.adr(x2, &n, sizeof(n));
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_settable)));
build.blr(x4);
emitUpdateBase(build);
break;
case IrCmd::GET_IMPORT:
regs.spill(build, index);
// luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false)
build.mov(x0, rState);
build.ldr(x1, mem(rClosure, offsetof(Closure, env)));
build.mov(x2, rConstants);
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.mov(w4, uintOp(inst.b));
build.mov(w5, 0);
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luaV_getimport)));
build.blr(x6);
emitUpdateBase(build);
break;
case IrCmd::CONCAT:
regs.spill(build, index);
build.mov(x0, rState);
build.mov(w1, uintOp(inst.b));
build.mov(w2, vmRegOp(inst.a) + uintOp(inst.b) - 1);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_concat)));
build.blr(x3);
emitUpdateBase(build);
break;
case IrCmd::GET_UPVALUE:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::q);
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b)));
// uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
Label skip;
build.ldr(temp3, mem(temp1, offsetof(TValue, tt)));
build.cmp(temp3, LUA_TUPVAL);
build.b(ConditionA64::NotEqual, skip);
// UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally)
build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc)));
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
build.setLabel(skip);
build.ldr(temp2, temp1);
build.str(temp2, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
break;
}
case IrCmd::SET_UPVALUE:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
RegisterA64 temp3 = regs.allocTemp(KindA64::q);
// UpVal*
build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)));
build.ldr(temp2, mem(temp1, offsetof(UpVal, v)));
build.ldr(temp3, mem(rBase, vmRegOp(inst.b) * sizeof(TValue)));
build.str(temp3, temp2);
if (inst.c.kind == IrOpKind::Undef || isGCO(tagOp(inst.c)))
{
Label skip;
checkObjectBarrierConditions(build, temp1, temp2, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
size_t spills = regs.spill(build, index, {temp1});
build.mov(x1, temp1);
build.mov(x0, rState);
build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
build.blr(x3);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
}
break;
}
case IrCmd::CHECK_TAG:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& fail = getTargetLabel(inst.c, fresh);
if (tagOp(inst.b) == 0)
{
build.cbnz(regOp(inst.a), fail);
}
else
{
build.cmp(regOp(inst.a), tagOp(inst.b));
build.b(ConditionA64::NotEqual, fail);
}
finalizeTargetLabel(inst.c, fresh);
break;
}
case IrCmd::CHECK_TRUTHY:
{
// Constant tags which don't require boolean value check should've been removed in constant folding
CODEGEN_ASSERT(inst.a.kind != IrOpKind::Constant || tagOp(inst.a) == LUA_TBOOLEAN);
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& target = getTargetLabel(inst.c, fresh);
Label skip;
if (inst.a.kind != IrOpKind::Constant)
{
// fail to fallback on 'nil' (falsy)
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(regOp(inst.a), target);
// skip value test if it's not a boolean (truthy)
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, skip);
}
// fail to fallback on 'false' boolean value (falsy)
if (inst.b.kind != IrOpKind::Constant)
{
build.cbz(regOp(inst.b), target);
}
else
{
if (intOp(inst.b) == 0)
build.b(target);
}
if (inst.a.kind != IrOpKind::Constant)
build.setLabel(skip);
finalizeTargetLabel(inst.c, fresh);
break;
}
case IrCmd::CHECK_READONLY:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldrb(temp, mem(regOp(inst.a), offsetof(Table, readonly)));
build.cbnz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
break;
}
case IrCmd::CHECK_NO_METATABLE:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.ldr(temp, mem(regOp(inst.a), offsetof(Table, metatable)));
build.cbnz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
break;
}
case IrCmd::CHECK_SAFE_ENV:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::x);
RegisterA64 tempw = castReg(KindA64::w, temp);
build.ldr(temp, mem(rClosure, offsetof(Closure, env)));
build.ldrb(tempw, mem(temp, offsetof(Table, safeenv)));
build.cbz(tempw, getTargetLabel(inst.a, fresh));
finalizeTargetLabel(inst.a, fresh);
break;
}
case IrCmd::CHECK_ARRAY_SIZE:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& fail = getTargetLabel(inst.c, fresh);
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(Table, sizearray)));
if (inst.b.kind == IrOpKind::Inst)
{
build.cmp(temp, regOp(inst.b));
build.b(ConditionA64::UnsignedLessEqual, fail);
}
else if (inst.b.kind == IrOpKind::Constant)
{
if (intOp(inst.b) == 0)
{
build.cbz(temp, fail);
}
else if (size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
{
build.cmp(temp, uint16_t(intOp(inst.b)));
build.b(ConditionA64::UnsignedLessEqual, fail);
}
else
{
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.mov(temp2, intOp(inst.b));
build.cmp(temp, temp2);
build.b(ConditionA64::UnsignedLessEqual, fail);
}
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
finalizeTargetLabel(inst.c, fresh);
break;
}
case IrCmd::JUMP_SLOT_MATCH:
case IrCmd::CHECK_SLOT_MATCH:
{
Label abort; // used when guard aborts execution
const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? inst.d : inst.c;
Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp);
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp1w = castReg(KindA64::w, temp1);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
CODEGEN_ASSERT(offsetof(LuaNode, key.value) == offsetof(LuaNode, key) && kOffsetOfTKeyTagNext >= 8 && kOffsetOfTKeyTagNext < 16);
build.ldp(temp1, temp2, mem(regOp(inst.a), offsetof(LuaNode, key))); // load key.value into temp1 and key.tt (alongside other bits) into temp2
build.ubfx(temp2, temp2, (kOffsetOfTKeyTagNext - 8) * 8, kTKeyTagBits); // .tt is right before .next, and 8 bytes are skipped by ldp
build.cmp(temp2, LUA_TSTRING);
build.b(ConditionA64::NotEqual, mismatch);
AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
build.ldr(temp2, addr);
build.cmp(temp1, temp2);
build.b(ConditionA64::NotEqual, mismatch);
build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(temp1w, mismatch);
if (inst.cmd == IrCmd::JUMP_SLOT_MATCH)
jumpOrFallthrough(blockOp(inst.c), next);
else if (abort.id)
emitAbort(build, abort);
break;
}
case IrCmd::CHECK_NODE_NO_NEXT:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTagNext));
build.lsr(temp, temp, kTKeyTagBits);
build.cbnz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
break;
}
case IrCmd::CHECK_NODE_VALUE:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
break;
}
case IrCmd::CHECK_BUFFER_LEN:
{
int accessSize = intOp(inst.c);
CODEGEN_ASSERT(accessSize > 0 && accessSize <= int(AssemblyBuilderA64::kMaxImmediate));
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& target = getTargetLabel(inst.d, fresh);
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(Buffer, len)));
if (inst.b.kind == IrOpKind::Inst)
{
if (accessSize == 1)
{
// fails if offset >= len
build.cmp(temp, regOp(inst.b));
build.b(ConditionA64::UnsignedLessEqual, target);
}
else
{
// fails if offset + size > len; we compute it as len - offset < size
RegisterA64 tempx = castReg(KindA64::x, temp);
build.sub(tempx, tempx, regOp(inst.b)); // implicit uxtw
build.cmp(tempx, uint16_t(accessSize));
build.b(ConditionA64::Less, target); // note: this is a signed 64-bit comparison so that out of bounds offset fails
}
}
else if (inst.b.kind == IrOpKind::Constant)
{
int offset = intOp(inst.b);
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
{
build.b(target);
}
else if (offset + accessSize <= int(AssemblyBuilderA64::kMaxImmediate))
{
build.cmp(temp, uint16_t(offset + accessSize));
build.b(ConditionA64::UnsignedLessEqual, target);
}
else
{
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.mov(temp2, offset + accessSize);
build.cmp(temp, temp2);
build.b(ConditionA64::UnsignedLessEqual, target);
}
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
}
finalizeTargetLabel(inst.d, fresh);
break;
}
case IrCmd::CHECK_USERDATA_TAG:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& fail = getTargetLabel(inst.c, fresh);
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldrb(temp, mem(regOp(inst.a), offsetof(Udata, tag)));
build.cmp(temp, intOp(inst.b));
build.b(ConditionA64::NotEqual, fail);
finalizeTargetLabel(inst.c, fresh);
break;
}
case IrCmd::INTERRUPT:
{
regs.spill(build, index);
Label self;
build.ldr(x0, mem(rGlobalState, offsetof(global_State, cb.interrupt)));
build.cbnz(x0, self);
Label next = build.setLabel();
interruptHandlers.push_back({self, uintOp(inst.a), next});
break;
}
case IrCmd::CHECK_GC:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
CODEGEN_ASSERT(offsetof(global_State, totalbytes) == offsetof(global_State, GCthreshold) + 8);
Label skip;
build.ldp(temp1, temp2, mem(rGlobalState, offsetof(global_State, GCthreshold)));
build.cmp(temp1, temp2);
build.b(ConditionA64::UnsignedGreater, skip);
size_t spills = regs.spill(build, index);
build.mov(x0, rState);
build.mov(w1, 1);
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaC_step)));
build.blr(x2);
emitUpdateBase(build);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
build.setLabel(skip);
break;
}
case IrCmd::BARRIER_OBJ:
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
Label skip;
checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
size_t spills = regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
build.blr(x3);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
break;
}
case IrCmd::BARRIER_TABLE_BACK:
{
Label skip;
RegisterA64 temp = regs.allocTemp(KindA64::w);
// isblack(obj2gco(t))
build.ldrb(temp, mem(regOp(inst.a), offsetof(GCheader, marked)));
build.tbz(temp, BLACKBIT, skip);
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
size_t spills = regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.add(x2, x1, uint16_t(offsetof(Table, gclist)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierback)));
build.blr(x3);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
break;
}
case IrCmd::BARRIER_TABLE_FORWARD:
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
Label skip;
checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
size_t spills = regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.ldr(x2, addr);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barriertable)));
build.blr(x3);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
break;
}
case IrCmd::SET_SAVEDPC:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
emitAddOffset(build, temp1, rCode, uintOp(inst.a) * sizeof(Instruction));
build.ldr(temp2, mem(rState, offsetof(lua_State, ci)));
build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc)));
break;
}
case IrCmd::CLOSE_UPVALS:
{
Label skip;
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
// L->openupval != 0
build.ldr(temp1, mem(rState, offsetof(lua_State, openupval)));
build.cbz(temp1, skip);
// ra <= L->openuval->v
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
build.add(temp2, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.cmp(temp2, temp1);
build.b(ConditionA64::UnsignedGreater, skip);
size_t spills = regs.spill(build, index, {temp2});
build.mov(x1, temp2);
build.mov(x0, rState);
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_close)));
build.blr(x2);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
build.setLabel(skip);
break;
}
case IrCmd::CAPTURE:
// no-op
break;
case IrCmd::SETLIST:
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeSETLIST), uintOp(inst.a));
break;
case IrCmd::CALL:
regs.spill(build, index);
// argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams;
if (intOp(inst.b) == LUA_MULTRET)
build.ldr(x2, mem(rState, offsetof(lua_State, top)));
else
build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + 1 + intOp(inst.b)) * sizeof(TValue)));
// callFallback(L, ra, argtop, nresults)
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.mov(w3, intOp(inst.c));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback)));
build.blr(x4);
emitUpdateBase(build);
// reentry with x0=closure (NULL implies C function; CALL_FALLBACK_YIELD will trigger exit)
build.cbnz(x0, helpers.continueCall);
break;
case IrCmd::RETURN:
regs.spill(build, index);
if (function.variadic)
{
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.ldr(x1, mem(x1, offsetof(CallInfo, func)));
}
else if (intOp(inst.b) != 1)
build.sub(x1, rBase, sizeof(TValue)); // invariant: ci->func + 1 == ci->base for non-variadic frames
if (intOp(inst.b) == 0)
{
build.mov(w2, 0);
build.b(helpers.return_);
}
else if (intOp(inst.b) == 1 && !function.variadic)
{
// fast path: minimizes x1 adjustments
// note that we skipped x1 computation for this specific case above
build.ldr(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
build.str(q0, mem(rBase, -int(sizeof(TValue))));
build.mov(x1, rBase);
build.mov(w2, 1);
build.b(helpers.return_);
}
else if (intOp(inst.b) >= 1 && intOp(inst.b) <= 3)
{
for (int r = 0; r < intOp(inst.b); ++r)
{
build.ldr(q0, mem(rBase, (vmRegOp(inst.a) + r) * sizeof(TValue)));
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
}
build.mov(w2, intOp(inst.b));
build.b(helpers.return_);
}
else
{
build.mov(w2, 0);
// vali = ra
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
// valend = (n == LUA_MULTRET) ? L->top : ra + n
if (intOp(inst.b) == LUA_MULTRET)
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
else
build.add(x4, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
Label repeatValueLoop, exitValueLoop;
if (intOp(inst.b) == LUA_MULTRET)
{
build.cmp(x3, x4);
build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual
}
build.setLabel(repeatValueLoop);
build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post));
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
build.add(w2, w2, 1);
build.cmp(x3, x4);
build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess
build.setLabel(exitValueLoop);
build.b(helpers.return_);
}
break;
case IrCmd::FORGLOOP:
// register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables
regs.spill(build, index);
// clear extra variables since we might have more than two
if (intOp(inst.b) > 2)
{
CODEGEN_ASSERT(LUA_TNIL == 0);
for (int i = 2; i < intOp(inst.b); ++i)
build.str(wzr, mem(rBase, (vmRegOp(inst.a) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt)));
}
// we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here
build.mov(x0, rState);
build.ldr(x1, mem(rBase, (vmRegOp(inst.a) + 1) * sizeof(TValue) + offsetof(TValue, value.gc)));
build.ldr(w2, mem(rBase, (vmRegOp(inst.a) + 2) * sizeof(TValue) + offsetof(TValue, value.p)));
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter)));
build.blr(x4);
// note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack
build.cbnz(w0, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGLOOP_FALLBACK:
regs.spill(build, index);
build.mov(x0, rState);
build.mov(w1, vmRegOp(inst.a));
build.mov(w2, intOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback)));
build.blr(x3);
emitUpdateBase(build);
build.cbnz(w0, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGPREP_XNEXT_FALLBACK:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
build.mov(w2, uintOp(inst.a) + 1);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback)));
build.blr(x3);
// note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack
jumpOrFallthrough(blockOp(inst.c), next);
break;
case IrCmd::COVERAGE:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
build.mov(temp1, uintOp(inst.a) * sizeof(Instruction));
build.ldr(temp2, mem(rCode, temp1));
// increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1
// note: cmp can be eliminated with adds but we aren't concerned with code size for coverage
build.add(temp3, temp2, 256);
build.cmp(temp3, 0);
build.csel(temp2, temp2, temp3, ConditionA64::Less);
build.str(temp2, mem(rCode, temp1));
break;
}
// Full instruction fallbacks
case IrCmd::FALLBACK_GETGLOBAL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeGETGLOBAL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETGLOBAL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeSETGLOBAL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETTABLEKS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeGETTABLEKS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETTABLEKS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeSETTABLEKS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_NAMECALL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeNAMECALL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_PREPVARARGS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::Constant);
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executePREPVARARGS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETVARARGS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::Constant);
regs.spill(build, index);
build.mov(x0, rState);
if (intOp(inst.c) == LUA_MULTRET)
{
emitAddOffset(build, x1, rCode, uintOp(inst.a) * sizeof(Instruction));
build.mov(x2, rBase);
build.mov(w3, vmRegOp(inst.b));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSMultRet)));
build.blr(x4);
emitUpdateBase(build);
}
else
{
build.mov(x1, rBase);
build.mov(w2, vmRegOp(inst.b));
build.mov(w3, intOp(inst.c));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSConst)));
build.blr(x4);
// note: no emitUpdateBase necessary because executeGETVARARGSConst does not reallocate stack
}
break;
case IrCmd::NEWCLOSURE:
{
RegisterA64 reg = regOp(inst.b); // note: we need to call regOp before spill so that we don't do redundant reloads
regs.spill(build, index, {reg});
build.mov(x2, reg);
build.mov(x0, rState);
build.mov(w1, uintOp(inst.a));
build.ldr(x3, mem(rClosure, offsetof(Closure, l.p)));
build.ldr(x3, mem(x3, offsetof(Proto, p)));
build.ldr(x3, mem(x3, sizeof(Proto*) * uintOp(inst.c)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaF_newLclosure)));
build.blr(x4);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::FALLBACK_DUPCLOSURE:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(inst.a));
break;
case IrCmd::FALLBACK_FORGPREP:
regs.spill(build, index);
emitFallback(build, offsetof(NativeContext, executeFORGPREP), uintOp(inst.a));
jumpOrFallthrough(blockOp(inst.c), next);
break;
// Pseudo instructions
case IrCmd::NOP:
case IrCmd::SUBSTITUTE:
CODEGEN_ASSERT(!"Pseudo instructions should not be lowered");
break;
case IrCmd::BITAND_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
build.and_(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.and_(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITXOR_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
build.eor(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.eor(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITOR_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
build.orr(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.orr(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITNOT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
build.mvn_(inst.regA64, temp);
break;
}
case IrCmd::BITLSHIFT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.lsl(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.lsl(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITRSHIFT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.lsr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.lsr(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITARSHIFT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.asr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.asr(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITLROTATE_UINT:
{
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
build.ror(inst.regA64, regOp(inst.a), uint8_t((32 - unsigned(intOp(inst.b))) & 31));
}
else
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); // can't reuse a because it would be clobbered by neg
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.neg(inst.regA64, temp2);
build.ror(inst.regA64, temp1, inst.regA64);
}
break;
}
case IrCmd::BITRROTATE_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.ror(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.ror(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITCOUNTLZ_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
build.clz(inst.regA64, temp);
break;
}
case IrCmd::BITCOUNTRZ_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
build.rbit(inst.regA64, temp);
build.clz(inst.regA64, inst.regA64);
break;
}
case IrCmd::BYTESWAP_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
build.rev(inst.regA64, temp);
break;
}
case IrCmd::INVOKE_LIBM:
{
if (inst.c.kind != IrOpKind::None)
{
bool isInt = (inst.c.kind == IrOpKind::Constant) ? constOp(inst.c).kind == IrConstKind::Int
: getCmdValueKind(function.instOp(inst.c).cmd) == IrValueKind::Int;
RegisterA64 temp1 = tempDouble(inst.b);
RegisterA64 temp2 = isInt ? tempInt(inst.c) : tempDouble(inst.c);
RegisterA64 temp3 = isInt ? noreg : regs.allocTemp(KindA64::d); // note: spill() frees all registers so we need to avoid alloc after spill
regs.spill(build, index, {temp1, temp2});
if (isInt)
{
build.fmov(d0, temp1);
build.mov(w0, temp2);
}
else if (d0 != temp2)
{
build.fmov(d0, temp1);
build.fmov(d1, temp2);
}
else
{
build.fmov(temp3, d0);
build.fmov(d0, temp1);
build.fmov(d1, temp3);
}
}
else
{
RegisterA64 temp1 = tempDouble(inst.b);
regs.spill(build, index, {temp1});
build.fmov(d0, temp1);
}
build.ldr(x1, mem(rNativeContext, getNativeContextOffset(uintOp(inst.a))));
build.blr(x1);
inst.regA64 = regs.takeReg(d0, index);
break;
}
case IrCmd::GET_TYPE:
{
inst.regA64 = regs.allocReg(KindA64::x, index);
CODEGEN_ASSERT(sizeof(TString*) == 8);
if (inst.a.kind == IrOpKind::Inst)
build.add(inst.regA64, rGlobalState, regOp(inst.a), 3); // implicit uxtw
else if (inst.a.kind == IrOpKind::Constant)
build.add(inst.regA64, rGlobalState, uint16_t(tagOp(inst.a)) * 8);
else
CODEGEN_ASSERT(!"Unsupported instruction form");
build.ldr(inst.regA64, mem(inst.regA64, offsetof(global_State, ttname)));
break;
}
case IrCmd::GET_TYPEOF:
{
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr)));
build.blr(x2);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::FINDUPVAL:
{
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_findupval)));
build.blr(x2);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::BUFFER_READI8:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
build.ldrsb(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_READU8:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
build.ldrb(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEI8:
{
RegisterA64 temp = tempInt(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
build.strb(temp, addr);
break;
}
case IrCmd::BUFFER_READI16:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
build.ldrsh(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_READU16:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
build.ldrh(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEI16:
{
RegisterA64 temp = tempInt(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
build.strh(temp, addr);
break;
}
case IrCmd::BUFFER_READI32:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEI32:
{
RegisterA64 temp = tempInt(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
build.str(temp, addr);
break;
}
case IrCmd::BUFFER_READF32:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
build.ldr(temp, addr);
build.fcvt(inst.regA64, temp);
break;
}
case IrCmd::BUFFER_WRITEF32:
{
RegisterA64 temp1 = tempDouble(inst.c);
RegisterA64 temp2 = regs.allocTemp(KindA64::s);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
build.fcvt(temp2, temp1);
build.str(temp2, addr);
break;
}
case IrCmd::BUFFER_READF64:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEF64:
{
RegisterA64 temp = tempDouble(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
build.str(temp, addr);
break;
}
// To handle unsupported instructions, add "case IrCmd::OP" and make sure to set error = true!
}
valueTracker.afterInstLowering(inst, index);
regs.freeLastUseRegs(inst, index);
regs.freeTempRegs();
}
void IrLoweringA64::finishBlock(const IrBlock& curr, const IrBlock& next)
{
if (!regs.spills.empty())
{
// If we have spills remaining, we have to immediately lower the successor block
for (uint32_t predIdx : predecessors(function.cfg, function.getBlockIndex(next)))
CODEGEN_ASSERT(predIdx == function.getBlockIndex(curr));
// And the next block cannot be a join block in cfg
CODEGEN_ASSERT(next.useCount == 1);
}
}
void IrLoweringA64::finishFunction()
{
if (build.logText)
build.logAppend("; interrupt handlers\n");
for (InterruptHandler& handler : interruptHandlers)
{
build.setLabel(handler.self);
build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction));
build.adr(x1, handler.next);
build.b(helpers.interrupt);
}
if (build.logText)
build.logAppend("; exit handlers\n");
for (ExitHandler& handler : exitHandlers)
{
CODEGEN_ASSERT(handler.pcpos != kVmExitEntryGuardPc);
build.setLabel(handler.self);
build.mov(x0, handler.pcpos * sizeof(Instruction));
build.b(helpers.updatePcAndContinueInVm);
}
if (stats)
{
if (error)
stats->loweringErrors++;
if (regs.error)
stats->regAllocErrors++;
}
}
bool IrLoweringA64::hasError() const
{
return error || regs.error;
}
bool IrLoweringA64::isFallthroughBlock(const IrBlock& target, const IrBlock& next)
{
return target.start == next.start;
}
void IrLoweringA64::jumpOrFallthrough(IrBlock& target, const IrBlock& next)
{
if (!isFallthroughBlock(target, next))
build.b(target.label);
}
Label& IrLoweringA64::getTargetLabel(IrOp op, Label& fresh)
{
if (op.kind == IrOpKind::Undef)
return fresh;
if (op.kind == IrOpKind::VmExit)
{
// Special exit case that doesn't have to update pcpos
if (vmExitOp(op) == kVmExitEntryGuardPc)
return helpers.exitContinueVmClearNativeFlag;
if (uint32_t* index = exitHandlerMap.find(vmExitOp(op)))
return exitHandlers[*index].self;
return fresh;
}
return labelOp(op);
}
void IrLoweringA64::finalizeTargetLabel(IrOp op, Label& fresh)
{
if (op.kind == IrOpKind::Undef)
{
emitAbort(build, fresh);
}
else if (op.kind == IrOpKind::VmExit && fresh.id != 0 && fresh.id != helpers.exitContinueVmClearNativeFlag.id)
{
exitHandlerMap[vmExitOp(op)] = uint32_t(exitHandlers.size());
exitHandlers.push_back({fresh, vmExitOp(op)});
}
}
RegisterA64 IrLoweringA64::tempDouble(IrOp op)
{
if (op.kind == IrOpKind::Inst)
return regOp(op);
else if (op.kind == IrOpKind::Constant)
{
double val = doubleOp(op);
if (AssemblyBuilderA64::isFmovSupported(val))
{
RegisterA64 temp = regs.allocTemp(KindA64::d);
build.fmov(temp, val);
return temp;
}
else
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
uint64_t vali = getDoubleBits(val);
if ((vali << 16) == 0)
{
build.movz(temp1, uint16_t(vali >> 48), 48);
build.fmov(temp2, temp1);
}
else if ((vali << 32) == 0)
{
build.movz(temp1, uint16_t(vali >> 48), 48);
build.movk(temp1, uint16_t(vali >> 32), 32);
build.fmov(temp2, temp1);
}
else
{
build.adr(temp1, val);
build.ldr(temp2, temp1);
}
return temp2;
}
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
RegisterA64 IrLoweringA64::tempInt(IrOp op)
{
if (op.kind == IrOpKind::Inst)
return regOp(op);
else if (op.kind == IrOpKind::Constant)
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, intOp(op));
return temp;
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
RegisterA64 IrLoweringA64::tempUint(IrOp op)
{
if (op.kind == IrOpKind::Inst)
return regOp(op);
else if (op.kind == IrOpKind::Constant)
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, unsigned(intOp(op)));
return temp;
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset)
{
// This is needed to tighten the bounds checks in the VmConst case below
CODEGEN_ASSERT(offset % 4 == 0);
// Full encoded range is wider depending on the load size, but this assertion helps establish a smaller guaranteed working range [0..4096)
CODEGEN_ASSERT(offset >= 0 && unsigned(offset / 4) <= AssemblyBuilderA64::kMaxImmediate);
if (op.kind == IrOpKind::VmReg)
return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset);
else if (op.kind == IrOpKind::VmConst)
{
size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset;
// Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries
if (constantOffset / 4 <= AddressA64::kMaxOffset)
return mem(rConstants, int(constantOffset));
RegisterA64 temp = regs.allocTemp(KindA64::x);
emitAddOffset(build, temp, rConstants, constantOffset);
return temp;
}
// If we have a register, we assume it's a pointer to TValue
// We might introduce explicit operand types in the future to make this more robust
else if (op.kind == IrOpKind::Inst)
return mem(regOp(op), offset);
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
AddressA64 IrLoweringA64::tempAddrBuffer(IrOp bufferOp, IrOp indexOp, uint8_t tag)
{
CODEGEN_ASSERT(tag == LUA_TUSERDATA || tag == LUA_TBUFFER);
int dataOffset = tag == LUA_TBUFFER ? offsetof(Buffer, data) : offsetof(Udata, data);
if (indexOp.kind == IrOpKind::Inst)
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.add(temp, regOp(bufferOp), regOp(indexOp)); // implicit uxtw
return mem(temp, dataOffset);
}
else if (indexOp.kind == IrOpKind::Constant)
{
// Since the resulting address may be used to load any size, including 1 byte, from an unaligned offset, we are limited by unscaled
// encoding
if (unsigned(intOp(indexOp)) + dataOffset <= 255)
return mem(regOp(bufferOp), int(intOp(indexOp) + dataOffset));
// indexOp can only be negative in dead code (since offsets are checked); this avoids assertion in emitAddOffset
if (intOp(indexOp) < 0)
return mem(regOp(bufferOp), dataOffset);
RegisterA64 temp = regs.allocTemp(KindA64::x);
emitAddOffset(build, temp, regOp(bufferOp), size_t(intOp(indexOp)));
return mem(temp, dataOffset);
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
RegisterA64 IrLoweringA64::regOp(IrOp op)
{
IrInst& inst = function.instOp(op);
if (inst.spilled || inst.needsReload)
regs.restoreReg(build, inst);
CODEGEN_ASSERT(inst.regA64 != noreg);
return inst.regA64;
}
IrConst IrLoweringA64::constOp(IrOp op) const
{
return function.constOp(op);
}
uint8_t IrLoweringA64::tagOp(IrOp op) const
{
return function.tagOp(op);
}
int IrLoweringA64::intOp(IrOp op) const
{
return function.intOp(op);
}
unsigned IrLoweringA64::uintOp(IrOp op) const
{
return function.uintOp(op);
}
double IrLoweringA64::doubleOp(IrOp op) const
{
return function.doubleOp(op);
}
IrBlock& IrLoweringA64::blockOp(IrOp op) const
{
return function.blockOp(op);
}
Label& IrLoweringA64::labelOp(IrOp op) const
{
return blockOp(op).label;
}
} // namespace A64
} // namespace CodeGen
} // namespace Luau