2023-03-24 18:03:04 +00:00
|
|
|
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
|
|
#include "IrLoweringA64.h"
|
|
|
|
|
|
|
|
#include "Luau/DenseHash.h"
|
2023-08-11 15:42:37 +01:00
|
|
|
#include "Luau/IrData.h"
|
2023-03-24 18:03:04 +00:00
|
|
|
#include "Luau/IrUtils.h"
|
|
|
|
|
|
|
|
#include "EmitCommonA64.h"
|
|
|
|
#include "NativeState.h"
|
|
|
|
|
|
|
|
#include "lstate.h"
|
2023-04-07 22:01:29 +01:00
|
|
|
#include "lgc.h"
|
2023-03-24 18:03:04 +00:00
|
|
|
|
2024-11-22 21:00:51 +00:00
|
|
|
LUAU_FASTFLAG(LuauVectorLibNativeDot);
|
|
|
|
|
2023-03-24 18:03:04 +00:00
|
|
|
namespace Luau
|
|
|
|
{
|
|
|
|
namespace CodeGen
|
|
|
|
{
|
|
|
|
namespace A64
|
|
|
|
{
|
|
|
|
|
2023-03-31 19:42:49 +01:00
|
|
|
inline ConditionA64 getConditionFP(IrCondition cond)
|
|
|
|
{
|
|
|
|
switch (cond)
|
|
|
|
{
|
|
|
|
case IrCondition::Equal:
|
|
|
|
return ConditionA64::Equal;
|
|
|
|
|
|
|
|
case IrCondition::NotEqual:
|
|
|
|
return ConditionA64::NotEqual;
|
|
|
|
|
|
|
|
case IrCondition::Less:
|
|
|
|
return ConditionA64::Minus;
|
|
|
|
|
|
|
|
case IrCondition::NotLess:
|
|
|
|
return ConditionA64::Plus;
|
|
|
|
|
|
|
|
case IrCondition::LessEqual:
|
|
|
|
return ConditionA64::UnsignedLessEqual;
|
|
|
|
|
|
|
|
case IrCondition::NotLessEqual:
|
|
|
|
return ConditionA64::UnsignedGreater;
|
|
|
|
|
|
|
|
case IrCondition::Greater:
|
|
|
|
return ConditionA64::Greater;
|
|
|
|
|
|
|
|
case IrCondition::NotGreater:
|
|
|
|
return ConditionA64::LessEqual;
|
|
|
|
|
|
|
|
case IrCondition::GreaterEqual:
|
|
|
|
return ConditionA64::GreaterEqual;
|
|
|
|
|
|
|
|
case IrCondition::NotGreaterEqual:
|
|
|
|
return ConditionA64::Less;
|
|
|
|
|
|
|
|
default:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unexpected condition code");
|
2023-03-31 19:42:49 +01:00
|
|
|
return ConditionA64::Always;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-08 01:13:49 +01:00
|
|
|
inline ConditionA64 getConditionInt(IrCondition cond)
|
|
|
|
{
|
|
|
|
switch (cond)
|
|
|
|
{
|
|
|
|
case IrCondition::Equal:
|
|
|
|
return ConditionA64::Equal;
|
|
|
|
|
|
|
|
case IrCondition::NotEqual:
|
|
|
|
return ConditionA64::NotEqual;
|
|
|
|
|
|
|
|
case IrCondition::Less:
|
|
|
|
return ConditionA64::Minus;
|
|
|
|
|
|
|
|
case IrCondition::NotLess:
|
|
|
|
return ConditionA64::Plus;
|
|
|
|
|
|
|
|
case IrCondition::LessEqual:
|
|
|
|
return ConditionA64::LessEqual;
|
|
|
|
|
|
|
|
case IrCondition::NotLessEqual:
|
|
|
|
return ConditionA64::Greater;
|
|
|
|
|
|
|
|
case IrCondition::Greater:
|
|
|
|
return ConditionA64::Greater;
|
|
|
|
|
|
|
|
case IrCondition::NotGreater:
|
|
|
|
return ConditionA64::LessEqual;
|
|
|
|
|
|
|
|
case IrCondition::GreaterEqual:
|
|
|
|
return ConditionA64::GreaterEqual;
|
|
|
|
|
|
|
|
case IrCondition::NotGreaterEqual:
|
|
|
|
return ConditionA64::Less;
|
|
|
|
|
|
|
|
case IrCondition::UnsignedLess:
|
|
|
|
return ConditionA64::CarryClear;
|
|
|
|
|
|
|
|
case IrCondition::UnsignedLessEqual:
|
|
|
|
return ConditionA64::UnsignedLessEqual;
|
|
|
|
|
|
|
|
case IrCondition::UnsignedGreater:
|
|
|
|
return ConditionA64::UnsignedGreater;
|
|
|
|
|
|
|
|
case IrCondition::UnsignedGreaterEqual:
|
|
|
|
return ConditionA64::CarrySet;
|
|
|
|
|
|
|
|
default:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unexpected condition code");
|
2023-09-08 01:13:49 +01:00
|
|
|
return ConditionA64::Always;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(dst != src);
|
|
|
|
CODEGEN_ASSERT(offset <= INT_MAX);
|
2023-09-01 18:58:27 +01:00
|
|
|
|
|
|
|
if (offset <= AssemblyBuilderA64::kMaxImmediate)
|
|
|
|
{
|
|
|
|
build.add(dst, src, uint16_t(offset));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
build.mov(dst, int(offset));
|
|
|
|
build.add(dst, dst, src);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp, IrOp ra, int ratag, Label& skip)
|
2023-04-07 22:01:29 +01:00
|
|
|
{
|
2023-04-14 19:06:22 +01:00
|
|
|
RegisterA64 tempw = castReg(KindA64::w, temp);
|
2023-09-01 18:58:27 +01:00
|
|
|
AddressA64 addr = temp;
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
// iscollectable(ra)
|
2023-06-24 07:19:39 +01:00
|
|
|
if (ratag == -1 || !isGCO(ratag))
|
|
|
|
{
|
2023-09-01 18:58:27 +01:00
|
|
|
if (ra.kind == IrOpKind::VmReg)
|
|
|
|
addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, tt));
|
|
|
|
else if (ra.kind == IrOpKind::VmConst)
|
|
|
|
emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, tt));
|
|
|
|
|
|
|
|
build.ldr(tempw, addr);
|
2023-06-24 07:19:39 +01:00
|
|
|
build.cmp(tempw, LUA_TSTRING);
|
|
|
|
build.b(ConditionA64::Less, skip);
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
// isblack(obj2gco(o))
|
2023-04-14 19:06:22 +01:00
|
|
|
build.ldrb(tempw, mem(object, offsetof(GCheader, marked)));
|
2023-04-21 23:14:26 +01:00
|
|
|
build.tbz(tempw, BLACKBIT, skip);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
// iswhite(gcvalue(ra))
|
2023-09-01 18:58:27 +01:00
|
|
|
if (ra.kind == IrOpKind::VmReg)
|
|
|
|
addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, value));
|
|
|
|
else if (ra.kind == IrOpKind::VmConst)
|
|
|
|
emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, value));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
build.ldr(temp, addr);
|
|
|
|
build.ldrb(tempw, mem(temp, offsetof(GCheader, marked)));
|
|
|
|
build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT));
|
|
|
|
build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
|
2023-04-14 19:06:22 +01:00
|
|
|
}
|
|
|
|
|
2023-05-25 22:36:34 +01:00
|
|
|
static void emitAbort(AssemblyBuilderA64& build, Label& abort)
|
|
|
|
{
|
|
|
|
Label skip;
|
|
|
|
build.b(skip);
|
|
|
|
build.setLabel(abort);
|
|
|
|
build.udf();
|
|
|
|
build.setLabel(skip);
|
|
|
|
}
|
|
|
|
|
2023-05-19 20:37:30 +01:00
|
|
|
static void emitFallback(AssemblyBuilderA64& build, int offset, int pcpos)
|
2023-04-14 19:06:22 +01:00
|
|
|
{
|
|
|
|
// fallback(L, instruction, base, k)
|
|
|
|
build.mov(x0, rState);
|
|
|
|
emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction));
|
|
|
|
build.mov(x2, rBase);
|
|
|
|
build.mov(x3, rConstants);
|
2023-05-19 20:37:30 +01:00
|
|
|
build.ldr(x4, mem(rNativeContext, offset));
|
2023-04-14 19:06:22 +01:00
|
|
|
build.blr(x4);
|
|
|
|
|
|
|
|
emitUpdateBase(build);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(kTempSlots >= 1);
|
2023-04-14 19:06:22 +01:00
|
|
|
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
|
|
|
|
build.add(x0, sp, sTemporary.data); // sp-relative offset
|
|
|
|
build.ldr(x1, mem(rNativeContext, uint32_t(func)));
|
|
|
|
build.blr(x1);
|
|
|
|
}
|
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
static bool emitBuiltin(AssemblyBuilderA64& build, IrFunction& function, IrRegAllocA64& regs, int bfid, int res, int arg, int nresults)
|
2023-04-14 19:06:22 +01:00
|
|
|
{
|
|
|
|
switch (bfid)
|
|
|
|
{
|
|
|
|
case LBF_MATH_FREXP:
|
2024-03-15 23:37:39 +00:00
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
|
|
|
|
emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg);
|
|
|
|
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
|
2024-03-15 23:37:39 +00:00
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.mov(temp, LUA_TNUMBER);
|
|
|
|
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
|
2024-03-15 23:37:39 +00:00
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
if (nresults == 2)
|
2023-04-14 19:06:22 +01:00
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
build.ldr(w0, sTemporary);
|
|
|
|
build.scvtf(d1, w0);
|
|
|
|
build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
|
|
|
|
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
|
2023-04-14 19:06:22 +01:00
|
|
|
}
|
|
|
|
return true;
|
2024-03-15 23:37:39 +00:00
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
case LBF_MATH_MODF:
|
2024-03-15 23:37:39 +00:00
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
|
|
|
|
emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg);
|
|
|
|
build.ldr(d1, sTemporary);
|
|
|
|
build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
|
2024-03-15 23:37:39 +00:00
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.mov(temp, LUA_TNUMBER);
|
|
|
|
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
|
2024-03-15 23:37:39 +00:00
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
if (nresults == 2)
|
2024-03-15 23:37:39 +00:00
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
|
|
|
|
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
|
2024-03-15 23:37:39 +00:00
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
return true;
|
2024-03-15 23:37:39 +00:00
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
|
|
|
|
default:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Missing A64 lowering");
|
2023-04-14 19:06:22 +01:00
|
|
|
return false;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
}
|
|
|
|
|
2023-11-10 21:10:07 +00:00
|
|
|
static uint64_t getDoubleBits(double value)
|
|
|
|
{
|
|
|
|
uint64_t result;
|
|
|
|
static_assert(sizeof(result) == sizeof(value), "Expecting double to be 64-bit");
|
|
|
|
memcpy(&result, &value, sizeof(value));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-09-15 18:26:59 +01:00
|
|
|
IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, IrFunction& function, LoweringStats* stats)
|
2023-03-24 18:03:04 +00:00
|
|
|
: build(build)
|
|
|
|
, helpers(helpers)
|
|
|
|
, function(function)
|
2023-09-15 18:26:59 +01:00
|
|
|
, stats(stats)
|
|
|
|
, regs(function, stats, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}})
|
2023-04-28 20:55:13 +01:00
|
|
|
, valueTracker(function)
|
2023-07-14 19:08:53 +01:00
|
|
|
, exitHandlerMap(~0u)
|
2023-03-24 18:03:04 +00:00
|
|
|
{
|
2024-08-02 15:30:04 +01:00
|
|
|
valueTracker.setRestoreCallack(
|
|
|
|
this,
|
|
|
|
[](void* context, IrInst& inst)
|
|
|
|
{
|
|
|
|
IrLoweringA64* self = static_cast<IrLoweringA64*>(context);
|
|
|
|
self->regs.restoreReg(self->build, inst);
|
|
|
|
}
|
|
|
|
);
|
2023-03-24 18:03:04 +00:00
|
|
|
}
|
|
|
|
|
2023-08-11 15:42:37 +01:00
|
|
|
void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
2023-03-24 18:03:04 +00:00
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
valueTracker.beforeInstLowering(inst);
|
|
|
|
|
2023-03-24 18:03:04 +00:00
|
|
|
switch (inst.cmd)
|
|
|
|
{
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::LOAD_TAG:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::w, index);
|
2023-03-31 19:42:49 +01:00
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
|
|
|
|
build.ldr(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::LOAD_POINTER:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::x, index);
|
2023-04-14 19:06:22 +01:00
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.gc));
|
2023-03-31 19:42:49 +01:00
|
|
|
build.ldr(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::LOAD_DOUBLE:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
2023-04-14 19:06:22 +01:00
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.n));
|
2023-03-31 19:42:49 +01:00
|
|
|
build.ldr(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::LOAD_INT:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::w, index);
|
2023-03-31 19:42:49 +01:00
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
|
|
|
|
build.ldr(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
2024-01-27 03:20:56 +00:00
|
|
|
case IrCmd::LOAD_FLOAT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
|
|
|
RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register
|
|
|
|
AddressA64 addr = tempAddr(inst.a, intOp(inst.b));
|
|
|
|
|
|
|
|
build.ldr(temp, addr);
|
|
|
|
build.fcvt(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::LOAD_TVALUE:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::q, index);
|
2023-08-18 19:15:41 +01:00
|
|
|
|
|
|
|
int addrOffset = inst.b.kind != IrOpKind::None ? intOp(inst.b) : 0;
|
|
|
|
AddressA64 addr = tempAddr(inst.a, addrOffset);
|
2023-03-31 19:42:49 +01:00
|
|
|
build.ldr(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::LOAD_ENV:
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::x, index);
|
2023-03-31 19:42:49 +01:00
|
|
|
build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env)));
|
|
|
|
break;
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::GET_ARR_ADDR:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, array)));
|
|
|
|
|
|
|
|
if (inst.b.kind == IrOpKind::Inst)
|
|
|
|
{
|
2023-11-10 21:10:07 +00:00
|
|
|
build.add(inst.regA64, inst.regA64, regOp(inst.b), kTValueSizeLog2); // implicit uxtw
|
2023-04-07 22:01:29 +01:00
|
|
|
}
|
|
|
|
else if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
2023-05-25 22:36:34 +01:00
|
|
|
if (intOp(inst.b) == 0)
|
|
|
|
{
|
|
|
|
// no offset required
|
|
|
|
}
|
|
|
|
else if (intOp(inst.b) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
|
2023-04-14 19:06:22 +01:00
|
|
|
{
|
|
|
|
build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) * sizeof(TValue)));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
|
|
|
build.mov(temp, intOp(inst.b) * sizeof(TValue));
|
|
|
|
build.add(inst.regA64, inst.regA64, temp);
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
}
|
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::GET_SLOT_NODE_ADDR:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp1w = castReg(KindA64::w, temp1);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
|
2023-09-01 18:58:27 +01:00
|
|
|
RegisterA64 temp2x = castReg(KindA64::x, temp2);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
// note: since the stride of the load is the same as the destination register size, we can range check the array index, not the byte offset
|
|
|
|
if (uintOp(inst.b) <= AddressA64::kMaxOffset)
|
|
|
|
build.ldr(temp1w, mem(rCode, uintOp(inst.b) * sizeof(Instruction)));
|
|
|
|
else
|
|
|
|
{
|
|
|
|
build.mov(temp1, uintOp(inst.b) * sizeof(Instruction));
|
|
|
|
build.ldr(temp1w, mem(rCode, temp1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// C field can be shifted as long as it's at the most significant byte of the instruction word
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(kOffsetOfInstructionC == 3);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, nodemask8)));
|
2023-04-21 23:14:26 +01:00
|
|
|
build.and_(temp2, temp2, temp1w, -24);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
// note: this may clobber inst.a, so it's important that we don't use it after this
|
|
|
|
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
|
2023-09-01 18:58:27 +01:00
|
|
|
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::GET_HASH_NODE_ADDR:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::w);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
|
2023-09-01 18:58:27 +01:00
|
|
|
RegisterA64 temp2x = castReg(KindA64::x, temp2);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
// hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode)
|
|
|
|
build.mov(temp1, -1);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, lsizenode)));
|
|
|
|
build.lsl(temp1, temp1, temp2);
|
|
|
|
build.mov(temp2, uintOp(inst.b));
|
2023-04-14 19:06:22 +01:00
|
|
|
build.bic(temp2, temp2, temp1);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
// note: this may clobber inst.a, so it's important that we don't use it after this
|
|
|
|
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
|
2023-09-01 18:58:27 +01:00
|
|
|
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-07-28 16:13:53 +01:00
|
|
|
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
|
|
|
|
RegisterA64 cl = inst.a.kind == IrOpKind::Undef ? rClosure : regOp(inst.a);
|
|
|
|
|
|
|
|
build.add(inst.regA64, cl, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b)));
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::STORE_TAG:
|
|
|
|
{
|
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
|
2023-08-18 19:15:41 +01:00
|
|
|
if (tagOp(inst.b) == 0)
|
|
|
|
{
|
|
|
|
build.str(wzr, addr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.mov(temp, tagOp(inst.b));
|
|
|
|
build.str(temp, addr);
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::STORE_POINTER:
|
|
|
|
{
|
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
|
2023-11-17 18:46:18 +00:00
|
|
|
if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(intOp(inst.b) == 0);
|
2023-11-17 18:46:18 +00:00
|
|
|
build.str(xzr, addr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
build.str(regOp(inst.b), addr);
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
}
|
2024-01-12 22:25:27 +00:00
|
|
|
case IrCmd::STORE_EXTRA:
|
|
|
|
{
|
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, extra));
|
|
|
|
if (intOp(inst.b) == 0)
|
|
|
|
{
|
|
|
|
build.str(wzr, addr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.mov(temp, intOp(inst.b));
|
|
|
|
build.str(temp, addr);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::STORE_DOUBLE:
|
|
|
|
{
|
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
|
2023-11-10 21:10:07 +00:00
|
|
|
if (inst.b.kind == IrOpKind::Constant && getDoubleBits(doubleOp(inst.b)) == 0)
|
|
|
|
{
|
|
|
|
build.str(xzr, addr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempDouble(inst.b);
|
|
|
|
build.str(temp, addr);
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::STORE_INT:
|
|
|
|
{
|
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
|
2023-08-18 19:15:41 +01:00
|
|
|
if (inst.b.kind == IrOpKind::Constant && intOp(inst.b) == 0)
|
|
|
|
{
|
|
|
|
build.str(wzr, addr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempInt(inst.b);
|
|
|
|
build.str(temp, addr);
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-04-28 20:55:13 +01:00
|
|
|
case IrCmd::STORE_VECTOR:
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.b);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.c);
|
|
|
|
RegisterA64 temp3 = tempDouble(inst.d);
|
|
|
|
RegisterA64 temp4 = regs.allocTemp(KindA64::s);
|
|
|
|
|
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(addr.kind == AddressKindA64::imm && addr.data % 4 == 0 && unsigned(addr.data + 8) / 4 <= AddressA64::kMaxOffset);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
build.fcvt(temp4, temp1);
|
|
|
|
build.str(temp4, AddressA64(addr.base, addr.data + 0));
|
|
|
|
build.fcvt(temp4, temp2);
|
|
|
|
build.str(temp4, AddressA64(addr.base, addr.data + 4));
|
|
|
|
build.fcvt(temp4, temp3);
|
|
|
|
build.str(temp4, AddressA64(addr.base, addr.data + 8));
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::STORE_TVALUE:
|
|
|
|
{
|
2023-08-18 19:15:41 +01:00
|
|
|
int addrOffset = inst.c.kind != IrOpKind::None ? intOp(inst.c) : 0;
|
|
|
|
AddressA64 addr = tempAddr(inst.a, addrOffset);
|
2023-03-31 19:42:49 +01:00
|
|
|
build.str(regOp(inst.b), addr);
|
|
|
|
break;
|
|
|
|
}
|
2023-08-18 19:15:41 +01:00
|
|
|
case IrCmd::STORE_SPLIT_TVALUE:
|
|
|
|
{
|
|
|
|
int addrOffset = inst.d.kind != IrOpKind::None ? intOp(inst.d) : 0;
|
|
|
|
|
|
|
|
RegisterA64 tempt = regs.allocTemp(KindA64::w);
|
|
|
|
AddressA64 addrt = tempAddr(inst.a, offsetof(TValue, tt) + addrOffset);
|
|
|
|
build.mov(tempt, tagOp(inst.b));
|
|
|
|
build.str(tempt, addrt);
|
|
|
|
|
|
|
|
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value) + addrOffset);
|
|
|
|
|
|
|
|
if (tagOp(inst.b) == LUA_TBOOLEAN)
|
|
|
|
{
|
|
|
|
if (inst.c.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
// note: we reuse tag temp register as value for true booleans, and use built-in zero register for false values
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TBOOLEAN == 1);
|
2023-08-18 19:15:41 +01:00
|
|
|
build.str(intOp(inst.c) ? tempt : wzr, addr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
build.str(regOp(inst.c), addr);
|
|
|
|
}
|
|
|
|
else if (tagOp(inst.b) == LUA_TNUMBER)
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempDouble(inst.c);
|
|
|
|
build.str(temp, addr);
|
|
|
|
}
|
|
|
|
else if (isGCO(tagOp(inst.b)))
|
|
|
|
{
|
|
|
|
build.str(regOp(inst.c), addr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-08-18 19:15:41 +01:00
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
2023-08-18 19:15:41 +01:00
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::ADD_INT:
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
|
|
|
if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
|
|
|
|
build.add(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
|
2023-04-28 20:55:13 +01:00
|
|
|
else if (inst.a.kind == IrOpKind::Constant && unsigned(intOp(inst.a)) <= AssemblyBuilderA64::kMaxImmediate)
|
|
|
|
build.add(inst.regA64, regOp(inst.b), uint16_t(intOp(inst.a)));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
2023-07-07 21:10:48 +01:00
|
|
|
RegisterA64 temp1 = tempInt(inst.a);
|
|
|
|
RegisterA64 temp2 = tempInt(inst.b);
|
|
|
|
build.add(inst.regA64, temp1, temp2);
|
2023-04-21 23:14:26 +01:00
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::SUB_INT:
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
|
|
|
if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
|
|
|
|
build.sub(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
|
|
|
|
else
|
|
|
|
{
|
2023-07-07 21:10:48 +01:00
|
|
|
RegisterA64 temp1 = tempInt(inst.a);
|
|
|
|
RegisterA64 temp2 = tempInt(inst.b);
|
|
|
|
build.sub(inst.regA64, temp1, temp2);
|
2023-04-21 23:14:26 +01:00
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::ADD_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fadd(inst.regA64, temp1, temp2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::SUB_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fsub(inst.regA64, temp1, temp2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::MUL_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fmul(inst.regA64, temp1, temp2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::DIV_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fdiv(inst.regA64, temp1, temp2);
|
|
|
|
break;
|
|
|
|
}
|
2023-09-01 18:58:27 +01:00
|
|
|
case IrCmd::IDIV_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fdiv(inst.regA64, temp1, temp2);
|
|
|
|
build.frintm(inst.regA64, inst.regA64);
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::MOD_NUM:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index); // can't allocReuse because both A and B are used twice
|
2023-03-31 19:42:49 +01:00
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fdiv(inst.regA64, temp1, temp2);
|
|
|
|
build.frintm(inst.regA64, inst.regA64);
|
|
|
|
build.fmul(inst.regA64, inst.regA64, temp2);
|
|
|
|
build.fsub(inst.regA64, temp1, inst.regA64);
|
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::MIN_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fcmp(temp1, temp2);
|
|
|
|
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Less));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::MAX_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
build.fcmp(temp1, temp2);
|
|
|
|
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Greater));
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::UNM_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
build.fneg(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::FLOOR_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
build.frintm(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::CEIL_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
build.frintp(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::ROUND_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
build.frinta(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::SQRT_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
build.fsqrt(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::ABS_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
build.fabs(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
2024-06-29 01:34:49 +01:00
|
|
|
case IrCmd::SIGN_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
|
|
|
|
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp0 = regs.allocTemp(KindA64::d);
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::d);
|
|
|
|
|
|
|
|
build.fcmpz(temp);
|
|
|
|
build.fmov(temp0, 0.0);
|
|
|
|
build.fmov(temp1, 1.0);
|
|
|
|
build.fcsel(inst.regA64, temp1, temp0, getConditionFP(IrCondition::Greater));
|
|
|
|
build.fmov(temp1, -1.0);
|
|
|
|
build.fcsel(inst.regA64, temp1, inst.regA64, getConditionFP(IrCondition::Less));
|
|
|
|
break;
|
|
|
|
}
|
2024-01-27 03:20:56 +00:00
|
|
|
case IrCmd::ADD_VEC:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
|
|
|
|
|
2024-03-30 23:14:44 +00:00
|
|
|
build.fadd(inst.regA64, regOp(inst.a), regOp(inst.b));
|
2024-01-27 03:20:56 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::SUB_VEC:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
|
|
|
|
|
2024-03-30 23:14:44 +00:00
|
|
|
build.fsub(inst.regA64, regOp(inst.a), regOp(inst.b));
|
2024-01-27 03:20:56 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::MUL_VEC:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
|
|
|
|
|
2024-03-30 23:14:44 +00:00
|
|
|
build.fmul(inst.regA64, regOp(inst.a), regOp(inst.b));
|
2024-01-27 03:20:56 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::DIV_VEC:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
|
|
|
|
|
2024-03-30 23:14:44 +00:00
|
|
|
build.fdiv(inst.regA64, regOp(inst.a), regOp(inst.b));
|
2024-01-27 03:20:56 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::UNM_VEC:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a});
|
|
|
|
|
2024-03-30 23:14:44 +00:00
|
|
|
build.fneg(inst.regA64, regOp(inst.a));
|
2024-01-27 03:20:56 +00:00
|
|
|
break;
|
|
|
|
}
|
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512)
Instead of doing the dot product related math in scalar IR, we lift the
computation into a dedicated IR instruction.
On x64, we can use VDPPS which was more or less tailor made for this
purpose. This is better than manual scalar lowering that requires
reloading components from memory; it's not always a strict improvement
over the shuffle+add version (which we never had), but this can now be
adjusted in the IR lowering in an optimal fashion (maybe even based on
CPU vendor, although that'd create issues for offline compilation).
On A64, we can either use naive adds or paired adds, as there is no
dedicated vector-wide horizontal instruction until SVE. Both run at
about the same performance on M2, but paired adds require fewer
instructions and temporaries.
I've measured this using mesh-normal-vector benchmark, changing the
benchmark to just report the time of the second loop inside
`calculate_normals`, testing master vs #1504 vs this PR, also increasing
the grid size to 400 for more stable timings.
On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I
see neutral to negative results in #1504.
On M2 (base), this PR is ~28% faster vs master, while #1504 is only
about ~10% faster.
If I measure the second loop in `calculate_tangent_space` instead, I
get:
On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3%
faster
On M2 (base), this PR is ~24% faster vs master, while #1504 is only
about ~13% faster.
Note that the loops in question are not quite optimal, as they store and
reload various vectors to dictionary values due to inappropriate use of
locals. The underlying gains in individual functions are thus larger
than the numbers above; for example, changing the `calculate_normals`
loop to use a local variable to store the normalized vector (but still
saving the result to dictionary value), I get a ~24% performance
increase from this PR on Zen4 vs master instead of just 8% (#1504 is
~15% slower in this setup).
2024-11-09 00:23:09 +00:00
|
|
|
case IrCmd::DOT_VEC:
|
|
|
|
{
|
2024-11-22 21:00:51 +00:00
|
|
|
LUAU_ASSERT(FFlag::LuauVectorLibNativeDot);
|
|
|
|
|
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512)
Instead of doing the dot product related math in scalar IR, we lift the
computation into a dedicated IR instruction.
On x64, we can use VDPPS which was more or less tailor made for this
purpose. This is better than manual scalar lowering that requires
reloading components from memory; it's not always a strict improvement
over the shuffle+add version (which we never had), but this can now be
adjusted in the IR lowering in an optimal fashion (maybe even based on
CPU vendor, although that'd create issues for offline compilation).
On A64, we can either use naive adds or paired adds, as there is no
dedicated vector-wide horizontal instruction until SVE. Both run at
about the same performance on M2, but paired adds require fewer
instructions and temporaries.
I've measured this using mesh-normal-vector benchmark, changing the
benchmark to just report the time of the second loop inside
`calculate_normals`, testing master vs #1504 vs this PR, also increasing
the grid size to 400 for more stable timings.
On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I
see neutral to negative results in #1504.
On M2 (base), this PR is ~28% faster vs master, while #1504 is only
about ~10% faster.
If I measure the second loop in `calculate_tangent_space` instead, I
get:
On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3%
faster
On M2 (base), this PR is ~24% faster vs master, while #1504 is only
about ~13% faster.
Note that the loops in question are not quite optimal, as they store and
reload various vectors to dictionary values due to inappropriate use of
locals. The underlying gains in individual functions are thus larger
than the numbers above; for example, changing the `calculate_normals`
loop to use a local variable to store the normalized vector (but still
saving the result to dictionary value), I get a ~24% performance
increase from this PR on Zen4 vs master instead of just 8% (#1504 is
~15% slower in this setup).
2024-11-09 00:23:09 +00:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
|
|
|
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::q);
|
|
|
|
RegisterA64 temps = castReg(KindA64::s, temp);
|
|
|
|
RegisterA64 regs = castReg(KindA64::s, inst.regA64);
|
|
|
|
|
|
|
|
build.fmul(temp, regOp(inst.a), regOp(inst.b));
|
|
|
|
build.faddp(regs, temps); // x+y
|
|
|
|
build.dup_4s(temp, temp, 2);
|
|
|
|
build.fadd(regs, regs, temps); // +z
|
|
|
|
build.fcvt(inst.regA64, regs);
|
|
|
|
break;
|
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
case IrCmd::NOT_ANY:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
|
|
|
|
|
|
|
if (inst.a.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
// other cases should've been constant folded
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN);
|
2023-04-14 19:06:22 +01:00
|
|
|
build.eor(inst.regA64, regOp(inst.b), 1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Label notbool, exit;
|
|
|
|
|
|
|
|
// use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1);
|
2023-04-14 19:06:22 +01:00
|
|
|
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
|
|
|
|
build.b(ConditionA64::NotEqual, notbool);
|
|
|
|
|
2023-07-14 19:08:53 +01:00
|
|
|
if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
build.mov(inst.regA64, intOp(inst.b) == 0 ? 1 : 0);
|
|
|
|
else
|
|
|
|
build.eor(inst.regA64, regOp(inst.b), 1); // boolean => invert value
|
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
build.b(exit);
|
|
|
|
|
|
|
|
// not boolean => result is true iff tag was nil
|
|
|
|
build.setLabel(notbool);
|
|
|
|
build.cset(inst.regA64, ConditionA64::Less);
|
|
|
|
|
|
|
|
build.setLabel(exit);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-08-04 20:18:54 +01:00
|
|
|
case IrCmd::CMP_ANY:
|
|
|
|
{
|
|
|
|
IrCondition cond = conditionOp(inst.c);
|
|
|
|
|
|
|
|
regs.spill(build, index);
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
|
|
|
|
|
|
|
|
if (cond == IrCondition::LessEqual)
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal)));
|
|
|
|
else if (cond == IrCondition::Less)
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan)));
|
|
|
|
else if (cond == IrCondition::Equal)
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval)));
|
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported condition");
|
2023-08-04 20:18:54 +01:00
|
|
|
|
|
|
|
build.blr(x3);
|
|
|
|
|
|
|
|
emitUpdateBase(build);
|
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
inst.regA64 = regs.takeReg(w0, index);
|
2023-08-04 20:18:54 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::JUMP:
|
2023-08-11 15:42:37 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Undef || inst.a.kind == IrOpKind::VmExit)
|
2023-07-14 19:08:53 +01:00
|
|
|
{
|
|
|
|
Label fresh;
|
|
|
|
build.b(getTargetLabel(inst.a, fresh));
|
|
|
|
finalizeTargetLabel(inst.a, fresh);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
jumpOrFallthrough(blockOp(inst.a), next);
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::JUMP_IF_TRUTHY:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
|
|
|
|
// nil => falsy
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TNIL == 0);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.cbz(temp, labelOp(inst.c));
|
|
|
|
// not boolean => truthy
|
|
|
|
build.cmp(temp, LUA_TBOOLEAN);
|
|
|
|
build.b(ConditionA64::NotEqual, labelOp(inst.b));
|
|
|
|
// compare boolean value
|
|
|
|
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
|
|
|
|
build.cbnz(temp, labelOp(inst.b));
|
|
|
|
jumpOrFallthrough(blockOp(inst.c), next);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::JUMP_IF_FALSY:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
|
|
|
|
// nil => falsy
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TNIL == 0);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.cbz(temp, labelOp(inst.b));
|
|
|
|
// not boolean => truthy
|
|
|
|
build.cmp(temp, LUA_TBOOLEAN);
|
|
|
|
build.b(ConditionA64::NotEqual, labelOp(inst.c));
|
|
|
|
// compare boolean value
|
|
|
|
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
|
|
|
|
build.cbz(temp, labelOp(inst.b));
|
|
|
|
jumpOrFallthrough(blockOp(inst.c), next);
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::JUMP_EQ_TAG:
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
|
|
|
RegisterA64 zr = noreg;
|
|
|
|
|
|
|
|
if (inst.a.kind == IrOpKind::Constant && tagOp(inst.a) == 0)
|
|
|
|
zr = regOp(inst.b);
|
|
|
|
else if (inst.b.kind == IrOpKind::Constant && tagOp(inst.b) == 0)
|
|
|
|
zr = regOp(inst.a);
|
|
|
|
else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
|
2023-03-31 19:42:49 +01:00
|
|
|
build.cmp(regOp(inst.a), tagOp(inst.b));
|
2023-04-14 19:06:22 +01:00
|
|
|
else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Inst)
|
2023-03-31 19:42:49 +01:00
|
|
|
build.cmp(regOp(inst.a), regOp(inst.b));
|
2023-04-14 19:06:22 +01:00
|
|
|
else if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Inst)
|
|
|
|
build.cmp(regOp(inst.b), tagOp(inst.a));
|
2023-03-31 19:42:49 +01:00
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-03-31 19:42:49 +01:00
|
|
|
|
|
|
|
if (isFallthroughBlock(blockOp(inst.d), next))
|
|
|
|
{
|
2023-05-25 22:36:34 +01:00
|
|
|
if (zr != noreg)
|
|
|
|
build.cbz(zr, labelOp(inst.c));
|
|
|
|
else
|
|
|
|
build.b(ConditionA64::Equal, labelOp(inst.c));
|
2023-03-31 19:42:49 +01:00
|
|
|
jumpOrFallthrough(blockOp(inst.d), next);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-05-25 22:36:34 +01:00
|
|
|
if (zr != noreg)
|
|
|
|
build.cbnz(zr, labelOp(inst.d));
|
|
|
|
else
|
|
|
|
build.b(ConditionA64::NotEqual, labelOp(inst.d));
|
2023-03-31 19:42:49 +01:00
|
|
|
jumpOrFallthrough(blockOp(inst.c), next);
|
|
|
|
}
|
|
|
|
break;
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-09-08 01:13:49 +01:00
|
|
|
case IrCmd::JUMP_CMP_INT:
|
|
|
|
{
|
|
|
|
IrCondition cond = conditionOp(inst.c);
|
|
|
|
|
|
|
|
if (cond == IrCondition::Equal && intOp(inst.b) == 0)
|
|
|
|
{
|
|
|
|
build.cbz(regOp(inst.a), labelOp(inst.d));
|
|
|
|
}
|
|
|
|
else if (cond == IrCondition::NotEqual && intOp(inst.b) == 0)
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2023-09-08 01:13:49 +01:00
|
|
|
build.cbnz(regOp(inst.a), labelOp(inst.d));
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate);
|
2023-05-25 22:36:34 +01:00
|
|
|
build.cmp(regOp(inst.a), uint16_t(intOp(inst.b)));
|
2023-09-08 01:13:49 +01:00
|
|
|
build.b(getConditionInt(cond), labelOp(inst.d));
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-09-08 01:13:49 +01:00
|
|
|
jumpOrFallthrough(blockOp(inst.e), next);
|
2023-04-21 23:14:26 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::JUMP_EQ_POINTER:
|
|
|
|
build.cmp(regOp(inst.a), regOp(inst.b));
|
|
|
|
build.b(ConditionA64::Equal, labelOp(inst.c));
|
|
|
|
jumpOrFallthrough(blockOp(inst.d), next);
|
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::JUMP_CMP_NUM:
|
|
|
|
{
|
|
|
|
IrCondition cond = conditionOp(inst.c);
|
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
if (inst.b.kind == IrOpKind::Constant && doubleOp(inst.b) == 0.0)
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
|
|
|
|
build.fcmpz(temp);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
RegisterA64 temp2 = tempDouble(inst.b);
|
|
|
|
|
|
|
|
build.fcmp(temp1, temp2);
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
|
|
|
|
build.b(getConditionFP(cond), labelOp(inst.d));
|
|
|
|
jumpOrFallthrough(blockOp(inst.e), next);
|
|
|
|
break;
|
|
|
|
}
|
2023-10-21 02:10:30 +01:00
|
|
|
case IrCmd::JUMP_FORN_LOOP_COND:
|
|
|
|
{
|
|
|
|
RegisterA64 index = tempDouble(inst.a);
|
|
|
|
RegisterA64 limit = tempDouble(inst.b);
|
2023-11-10 21:10:07 +00:00
|
|
|
RegisterA64 step = tempDouble(inst.c);
|
2023-10-21 02:10:30 +01:00
|
|
|
|
|
|
|
Label direct;
|
|
|
|
|
|
|
|
// step > 0
|
2023-11-10 21:10:07 +00:00
|
|
|
build.fcmpz(step);
|
2023-10-21 02:10:30 +01:00
|
|
|
build.b(getConditionFP(IrCondition::Greater), direct);
|
|
|
|
|
|
|
|
// !(limit <= index)
|
|
|
|
build.fcmp(limit, index);
|
|
|
|
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e));
|
|
|
|
build.b(labelOp(inst.d));
|
|
|
|
|
|
|
|
// !(index <= limit)
|
|
|
|
build.setLabel(direct);
|
|
|
|
|
|
|
|
build.fcmp(index, limit);
|
|
|
|
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e));
|
|
|
|
jumpOrFallthrough(blockOp(inst.d), next);
|
|
|
|
break;
|
|
|
|
}
|
2023-05-19 20:37:30 +01:00
|
|
|
// IrCmd::JUMP_SLOT_MATCH implemented below
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::TABLE_LEN:
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
|
|
|
|
regs.spill(build, index, {reg});
|
|
|
|
build.mov(x0, reg);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, luaH_getn)));
|
|
|
|
build.blr(x1);
|
2023-09-01 18:58:27 +01:00
|
|
|
|
|
|
|
inst.regA64 = regs.takeReg(w0, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-07-07 21:10:48 +01:00
|
|
|
case IrCmd::STRING_LEN:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::w, index);
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(TString, len)));
|
2023-07-07 21:10:48 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-09-01 18:58:27 +01:00
|
|
|
case IrCmd::TABLE_SETNUM:
|
|
|
|
{
|
|
|
|
// note: we need to call regOp before spill so that we don't do redundant reloads
|
|
|
|
RegisterA64 table = regOp(inst.a);
|
|
|
|
RegisterA64 key = regOp(inst.b);
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
|
|
|
|
regs.spill(build, index, {table, key});
|
|
|
|
|
|
|
|
if (w1 != key)
|
|
|
|
{
|
|
|
|
build.mov(x1, table);
|
|
|
|
build.mov(w2, key);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
build.mov(temp, w1);
|
|
|
|
build.mov(x1, table);
|
|
|
|
build.mov(w2, temp);
|
|
|
|
}
|
|
|
|
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_setnum)));
|
|
|
|
build.blr(x3);
|
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
2023-09-15 18:26:59 +01:00
|
|
|
break;
|
2023-09-01 18:58:27 +01:00
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::NEW_TABLE:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.mov(x1, uintOp(inst.a));
|
|
|
|
build.mov(x2, uintOp(inst.b));
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_new)));
|
|
|
|
build.blr(x3);
|
2023-04-28 20:55:13 +01:00
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::DUP_TABLE:
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
|
|
|
|
regs.spill(build, index, {reg});
|
|
|
|
build.mov(x1, reg);
|
2023-04-21 23:14:26 +01:00
|
|
|
build.mov(x0, rState);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaH_clone)));
|
|
|
|
build.blr(x2);
|
2023-04-28 20:55:13 +01:00
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::TRY_NUM_TO_INDEX:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::w, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
RegisterA64 temp1 = tempDouble(inst.a);
|
|
|
|
|
|
|
|
if (build.features & Feature_JSCVT)
|
|
|
|
{
|
|
|
|
build.fjcvtzs(inst.regA64, temp1); // fjcvtzs sets PSTATE.Z (equal) iff conversion is exact
|
|
|
|
build.b(ConditionA64::NotEqual, labelOp(inst.b));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
|
|
|
|
|
|
|
|
build.fcvtzs(inst.regA64, temp1);
|
|
|
|
build.scvtf(temp2, inst.regA64);
|
|
|
|
build.fcmp(temp1, temp2);
|
|
|
|
build.b(ConditionA64::NotEqual, labelOp(inst.b));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
case IrCmd::TRY_CALL_FASTGETTM:
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
|
|
|
|
|
|
|
|
build.ldr(temp1, mem(regOp(inst.a), offsetof(Table, metatable)));
|
|
|
|
build.cbz(temp1, labelOp(inst.c)); // no metatable
|
|
|
|
|
|
|
|
build.ldrb(temp2, mem(temp1, offsetof(Table, tmcache)));
|
|
|
|
build.tst(temp2, 1 << intOp(inst.b)); // can't use tbz/tbnz because their jump offsets are too short
|
|
|
|
build.b(ConditionA64::NotEqual, labelOp(inst.c)); // Equal = Zero after tst; tmcache caches *absence* of metamethods
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
regs.spill(build, index, {temp1});
|
2023-04-14 19:06:22 +01:00
|
|
|
build.mov(x0, temp1);
|
|
|
|
build.mov(w1, intOp(inst.b));
|
2023-08-18 19:15:41 +01:00
|
|
|
build.ldr(x2, mem(rGlobalState, offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)));
|
2023-04-14 19:06:22 +01:00
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm)));
|
|
|
|
build.blr(x3);
|
2023-06-02 20:52:15 +01:00
|
|
|
|
|
|
|
build.cbz(x0, labelOp(inst.c)); // no tag method
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
2023-04-14 19:06:22 +01:00
|
|
|
break;
|
|
|
|
}
|
2024-06-07 18:51:12 +01:00
|
|
|
case IrCmd::NEW_USERDATA:
|
|
|
|
{
|
|
|
|
regs.spill(build, index);
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.mov(x1, intOp(inst.a));
|
|
|
|
build.mov(x2, intOp(inst.b));
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, newUserdata)));
|
|
|
|
build.blr(x3);
|
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::INT_TO_NUM:
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
RegisterA64 temp = tempInt(inst.a);
|
|
|
|
build.scvtf(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::UINT_TO_NUM:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
|
|
|
RegisterA64 temp = tempInt(inst.a);
|
|
|
|
build.ucvtf(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::NUM_TO_INT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::w, index);
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
|
|
|
build.fcvtzs(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::NUM_TO_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::w, index);
|
|
|
|
RegisterA64 temp = tempDouble(inst.a);
|
2023-11-10 21:10:07 +00:00
|
|
|
// note: we don't use fcvtzu for consistency with C++ code
|
2023-04-21 23:14:26 +01:00
|
|
|
build.fcvtzs(castReg(KindA64::x, inst.regA64), temp);
|
|
|
|
break;
|
|
|
|
}
|
2024-02-21 15:06:11 +00:00
|
|
|
case IrCmd::NUM_TO_VEC:
|
2024-01-27 03:20:56 +00:00
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::q, index);
|
|
|
|
|
2024-04-12 18:18:49 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Constant)
|
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194)
When the input is a constant, we use a fairly inefficient sequence of
fmov+fcvt+dup or, when the double isn't encodable in fmov,
adr+ldr+fcvt+dup.
Instead, we can use the same lowering as X64 when the input is a
constant, and load the vector from memory. However, if the constant is
encodable via fmov, we can use a vector fmov instead (which is just one
instruction and doesn't need constant space).
Fortunately the bit encoding of fmov for 32-bit floating point numbers
matches that of 64-bit: the decoding algorithm is a little different
because it expands into a larger exponent, but the values are
compatible, so if a double can be encoded into a scalar fmov with a
given abcdefgh pattern, the same pattern should encode the same float;
due to the very limited number of mantissa and exponent bits, all values
that are encodable are also exact in both 32-bit and 64-bit floats.
This strategy is ~same as what gcc uses. For complex vectors, we
previously used 4 instructions and 8 bytes of constant storage, and now
we use 2 instructions and 16 bytes of constant storage, so the memory
footprint is the same; for simple vectors we just need 1 instruction (4
bytes).
clang lowers vector constants a little differently, opting to synthesize
a 64-bit integer using 4 instructions (mov/movk) and then move it to the
vector register - this requires 5 instructions and 20 bytes, vs ours/gcc
2 instructions and 8+16=24 bytes. I tried a simpler version of this that
would be more compact - synthesize a 32-bit integer constant with
mov+movk, and move it to vector register via dup.4s - but this was a
little slower on M2, so for now we prefer the slightly larger version as
it's not a regression vs current implementation.
On the vector approximation benchmark we get:
- Before this PR (flag=false): ~7.85 ns/op
- After this PR (flag=true): ~7.74 ns/op
- After this PR, with 0.125 instead of 0.123 in the benchmark code (to
use fmov): ~7.52 ns/op
- Not part of this PR, but the mov/dup strategy described above: ~8.00
ns/op
2024-03-13 19:56:11 +00:00
|
|
|
{
|
|
|
|
float value = float(doubleOp(inst.a));
|
|
|
|
uint32_t asU32;
|
|
|
|
static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit");
|
|
|
|
memcpy(&asU32, &value, sizeof(value));
|
2024-01-27 03:20:56 +00:00
|
|
|
|
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194)
When the input is a constant, we use a fairly inefficient sequence of
fmov+fcvt+dup or, when the double isn't encodable in fmov,
adr+ldr+fcvt+dup.
Instead, we can use the same lowering as X64 when the input is a
constant, and load the vector from memory. However, if the constant is
encodable via fmov, we can use a vector fmov instead (which is just one
instruction and doesn't need constant space).
Fortunately the bit encoding of fmov for 32-bit floating point numbers
matches that of 64-bit: the decoding algorithm is a little different
because it expands into a larger exponent, but the values are
compatible, so if a double can be encoded into a scalar fmov with a
given abcdefgh pattern, the same pattern should encode the same float;
due to the very limited number of mantissa and exponent bits, all values
that are encodable are also exact in both 32-bit and 64-bit floats.
This strategy is ~same as what gcc uses. For complex vectors, we
previously used 4 instructions and 8 bytes of constant storage, and now
we use 2 instructions and 16 bytes of constant storage, so the memory
footprint is the same; for simple vectors we just need 1 instruction (4
bytes).
clang lowers vector constants a little differently, opting to synthesize
a 64-bit integer using 4 instructions (mov/movk) and then move it to the
vector register - this requires 5 instructions and 20 bytes, vs ours/gcc
2 instructions and 8+16=24 bytes. I tried a simpler version of this that
would be more compact - synthesize a 32-bit integer constant with
mov+movk, and move it to vector register via dup.4s - but this was a
little slower on M2, so for now we prefer the slightly larger version as
it's not a regression vs current implementation.
On the vector approximation benchmark we get:
- Before this PR (flag=false): ~7.85 ns/op
- After this PR (flag=true): ~7.74 ns/op
- After this PR, with 0.125 instead of 0.123 in the benchmark code (to
use fmov): ~7.52 ns/op
- Not part of this PR, but the mov/dup strategy described above: ~8.00
ns/op
2024-03-13 19:56:11 +00:00
|
|
|
if (AssemblyBuilderA64::isFmovSupported(value))
|
|
|
|
{
|
|
|
|
build.fmov(inst.regA64, value);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
2024-01-27 03:20:56 +00:00
|
|
|
|
2024-03-15 23:37:39 +00:00
|
|
|
uint32_t vec[4] = {asU32, asU32, asU32, 0};
|
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194)
When the input is a constant, we use a fairly inefficient sequence of
fmov+fcvt+dup or, when the double isn't encodable in fmov,
adr+ldr+fcvt+dup.
Instead, we can use the same lowering as X64 when the input is a
constant, and load the vector from memory. However, if the constant is
encodable via fmov, we can use a vector fmov instead (which is just one
instruction and doesn't need constant space).
Fortunately the bit encoding of fmov for 32-bit floating point numbers
matches that of 64-bit: the decoding algorithm is a little different
because it expands into a larger exponent, but the values are
compatible, so if a double can be encoded into a scalar fmov with a
given abcdefgh pattern, the same pattern should encode the same float;
due to the very limited number of mantissa and exponent bits, all values
that are encodable are also exact in both 32-bit and 64-bit floats.
This strategy is ~same as what gcc uses. For complex vectors, we
previously used 4 instructions and 8 bytes of constant storage, and now
we use 2 instructions and 16 bytes of constant storage, so the memory
footprint is the same; for simple vectors we just need 1 instruction (4
bytes).
clang lowers vector constants a little differently, opting to synthesize
a 64-bit integer using 4 instructions (mov/movk) and then move it to the
vector register - this requires 5 instructions and 20 bytes, vs ours/gcc
2 instructions and 8+16=24 bytes. I tried a simpler version of this that
would be more compact - synthesize a 32-bit integer constant with
mov+movk, and move it to vector register via dup.4s - but this was a
little slower on M2, so for now we prefer the slightly larger version as
it's not a regression vs current implementation.
On the vector approximation benchmark we get:
- Before this PR (flag=false): ~7.85 ns/op
- After this PR (flag=true): ~7.74 ns/op
- After this PR, with 0.125 instead of 0.123 in the benchmark code (to
use fmov): ~7.52 ns/op
- Not part of this PR, but the mov/dup strategy described above: ~8.00
ns/op
2024-03-13 19:56:11 +00:00
|
|
|
build.adr(temp, vec, sizeof(vec));
|
|
|
|
build.ldr(inst.regA64, temp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2024-02-21 15:06:11 +00:00
|
|
|
{
|
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194)
When the input is a constant, we use a fairly inefficient sequence of
fmov+fcvt+dup or, when the double isn't encodable in fmov,
adr+ldr+fcvt+dup.
Instead, we can use the same lowering as X64 when the input is a
constant, and load the vector from memory. However, if the constant is
encodable via fmov, we can use a vector fmov instead (which is just one
instruction and doesn't need constant space).
Fortunately the bit encoding of fmov for 32-bit floating point numbers
matches that of 64-bit: the decoding algorithm is a little different
because it expands into a larger exponent, but the values are
compatible, so if a double can be encoded into a scalar fmov with a
given abcdefgh pattern, the same pattern should encode the same float;
due to the very limited number of mantissa and exponent bits, all values
that are encodable are also exact in both 32-bit and 64-bit floats.
This strategy is ~same as what gcc uses. For complex vectors, we
previously used 4 instructions and 8 bytes of constant storage, and now
we use 2 instructions and 16 bytes of constant storage, so the memory
footprint is the same; for simple vectors we just need 1 instruction (4
bytes).
clang lowers vector constants a little differently, opting to synthesize
a 64-bit integer using 4 instructions (mov/movk) and then move it to the
vector register - this requires 5 instructions and 20 bytes, vs ours/gcc
2 instructions and 8+16=24 bytes. I tried a simpler version of this that
would be more compact - synthesize a 32-bit integer constant with
mov+movk, and move it to vector register via dup.4s - but this was a
little slower on M2, so for now we prefer the slightly larger version as
it's not a regression vs current implementation.
On the vector approximation benchmark we get:
- Before this PR (flag=false): ~7.85 ns/op
- After this PR (flag=true): ~7.74 ns/op
- After this PR, with 0.125 instead of 0.123 in the benchmark code (to
use fmov): ~7.52 ns/op
- Not part of this PR, but the mov/dup strategy described above: ~8.00
ns/op
2024-03-13 19:56:11 +00:00
|
|
|
RegisterA64 tempd = tempDouble(inst.a);
|
2024-09-27 19:58:21 +01:00
|
|
|
RegisterA64 temps = regs.allocTemp(KindA64::s);
|
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194)
When the input is a constant, we use a fairly inefficient sequence of
fmov+fcvt+dup or, when the double isn't encodable in fmov,
adr+ldr+fcvt+dup.
Instead, we can use the same lowering as X64 when the input is a
constant, and load the vector from memory. However, if the constant is
encodable via fmov, we can use a vector fmov instead (which is just one
instruction and doesn't need constant space).
Fortunately the bit encoding of fmov for 32-bit floating point numbers
matches that of 64-bit: the decoding algorithm is a little different
because it expands into a larger exponent, but the values are
compatible, so if a double can be encoded into a scalar fmov with a
given abcdefgh pattern, the same pattern should encode the same float;
due to the very limited number of mantissa and exponent bits, all values
that are encodable are also exact in both 32-bit and 64-bit floats.
This strategy is ~same as what gcc uses. For complex vectors, we
previously used 4 instructions and 8 bytes of constant storage, and now
we use 2 instructions and 16 bytes of constant storage, so the memory
footprint is the same; for simple vectors we just need 1 instruction (4
bytes).
clang lowers vector constants a little differently, opting to synthesize
a 64-bit integer using 4 instructions (mov/movk) and then move it to the
vector register - this requires 5 instructions and 20 bytes, vs ours/gcc
2 instructions and 8+16=24 bytes. I tried a simpler version of this that
would be more compact - synthesize a 32-bit integer constant with
mov+movk, and move it to vector register via dup.4s - but this was a
little slower on M2, so for now we prefer the slightly larger version as
it's not a regression vs current implementation.
On the vector approximation benchmark we get:
- Before this PR (flag=false): ~7.85 ns/op
- After this PR (flag=true): ~7.74 ns/op
- After this PR, with 0.125 instead of 0.123 in the benchmark code (to
use fmov): ~7.52 ns/op
- Not part of this PR, but the mov/dup strategy described above: ~8.00
ns/op
2024-03-13 19:56:11 +00:00
|
|
|
|
|
|
|
build.fcvt(temps, tempd);
|
|
|
|
build.dup_4s(inst.regA64, castReg(KindA64::q, temps), 0);
|
2024-02-21 15:06:11 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::TAG_VECTOR:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a});
|
|
|
|
|
|
|
|
RegisterA64 reg = regOp(inst.a);
|
|
|
|
RegisterA64 tempw = regs.allocTemp(KindA64::w);
|
|
|
|
|
|
|
|
if (inst.regA64 != reg)
|
|
|
|
build.mov(inst.regA64, reg);
|
|
|
|
|
2024-01-27 03:20:56 +00:00
|
|
|
build.mov(tempw, LUA_TVECTOR);
|
|
|
|
build.ins_4s(inst.regA64, tempw, 3);
|
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::ADJUST_STACK_TO_REG:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
|
|
|
|
|
|
|
if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
build.add(temp, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
|
|
|
|
build.str(temp, mem(rState, offsetof(lua_State, top)));
|
|
|
|
}
|
|
|
|
else if (inst.b.kind == IrOpKind::Inst)
|
|
|
|
{
|
|
|
|
build.add(temp, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
2023-11-10 21:10:07 +00:00
|
|
|
build.add(temp, temp, regOp(inst.b), kTValueSizeLog2); // implicit uxtw
|
2023-04-07 22:01:29 +01:00
|
|
|
build.str(temp, mem(rState, offsetof(lua_State, top)));
|
|
|
|
}
|
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::ADJUST_STACK_TO_TOP:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
|
|
|
build.ldr(temp, mem(rState, offsetof(lua_State, ci)));
|
|
|
|
build.ldr(temp, mem(temp, offsetof(CallInfo, top)));
|
|
|
|
build.str(temp, mem(rState, offsetof(lua_State, top)));
|
|
|
|
break;
|
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
case IrCmd::FASTCALL:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2024-06-21 00:37:55 +01:00
|
|
|
|
2024-08-23 17:35:30 +01:00
|
|
|
error |= !emitBuiltin(build, function, regs, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d));
|
2023-04-14 19:06:22 +01:00
|
|
|
break;
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::INVOKE_FASTCALL:
|
|
|
|
{
|
2024-08-23 17:35:30 +01:00
|
|
|
// We might need a temporary and we have to preserve it over the spill
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::q);
|
|
|
|
regs.spill(build, index, {temp});
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2024-08-23 17:35:30 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
|
|
|
|
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
|
|
|
|
build.mov(w3, intOp(inst.g)); // nresults
|
2024-06-21 00:37:55 +01:00
|
|
|
|
2024-08-23 17:35:30 +01:00
|
|
|
// 'E' argument can only be produced by LOP_FASTCALL3 lowering
|
|
|
|
if (inst.e.kind != IrOpKind::Undef)
|
|
|
|
{
|
|
|
|
CODEGEN_ASSERT(intOp(inst.f) == 3);
|
2024-06-21 00:37:55 +01:00
|
|
|
|
2024-08-23 17:35:30 +01:00
|
|
|
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
|
2024-06-21 00:37:55 +01:00
|
|
|
|
2024-08-23 17:35:30 +01:00
|
|
|
build.ldr(temp, mem(rBase, vmRegOp(inst.d) * sizeof(TValue)));
|
|
|
|
build.str(temp, mem(x4, 0));
|
2024-06-21 00:37:55 +01:00
|
|
|
|
2024-08-23 17:35:30 +01:00
|
|
|
build.ldr(temp, mem(rBase, vmRegOp(inst.e) * sizeof(TValue)));
|
|
|
|
build.str(temp, mem(x4, sizeof(TValue)));
|
2023-04-07 22:01:29 +01:00
|
|
|
}
|
|
|
|
else
|
2024-06-21 00:37:55 +01:00
|
|
|
{
|
|
|
|
if (inst.d.kind == IrOpKind::VmReg)
|
|
|
|
build.add(x4, rBase, uint16_t(vmRegOp(inst.d) * sizeof(TValue)));
|
|
|
|
else if (inst.d.kind == IrOpKind::VmConst)
|
|
|
|
emitAddOffset(build, x4, rConstants, vmConstOp(inst.d) * sizeof(TValue));
|
|
|
|
else
|
|
|
|
CODEGEN_ASSERT(inst.d.kind == IrOpKind::Undef);
|
2024-08-23 17:35:30 +01:00
|
|
|
}
|
2024-06-21 00:37:55 +01:00
|
|
|
|
2024-08-23 17:35:30 +01:00
|
|
|
// nparams
|
|
|
|
if (intOp(inst.f) == LUA_MULTRET)
|
|
|
|
{
|
|
|
|
// L->top - (ra + 1)
|
|
|
|
build.ldr(x5, mem(rState, offsetof(lua_State, top)));
|
|
|
|
build.sub(x5, x5, rBase);
|
|
|
|
build.sub(x5, x5, uint16_t((vmRegOp(inst.b) + 1) * sizeof(TValue)));
|
|
|
|
build.lsr(x5, x5, kTValueSizeLog2);
|
2024-06-21 00:37:55 +01:00
|
|
|
}
|
2024-08-23 17:35:30 +01:00
|
|
|
else
|
|
|
|
build.mov(w5, intOp(inst.f));
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(inst.a) * sizeof(luau_FastFunction)));
|
|
|
|
build.blr(x6);
|
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
inst.regA64 = regs.takeReg(w0, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::CHECK_FASTCALL_RES:
|
|
|
|
build.cmp(regOp(inst.a), 0);
|
|
|
|
build.b(ConditionA64::Less, labelOp(inst.b));
|
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::DO_ARITH:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-03-31 19:42:49 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
Add SUBRK and DIVRK bytecode instructions to bytecode v5 (#1115)
Right now, we can compile R\*K for all arithmetic instructions, but K\*R
gets compiled into two instructions (LOADN/LOADK + arithmetic opcode).
This is problematic since it leads to reduced performance for some code.
However, we'd like to avoid adding reverse variants of ADDK et al for
all opcodes to avoid the increase in I$ footprint for interpreter.
Looking at the arithmetic instructions, % and // don't have interesting
use cases for K\*V; ^ is sometimes used with constant on the left hand
side but this would need to call pow() by necessity in all cases so it
would be slow regardless of the dispatch overhead. This leaves the four
basic arithmetic operations.
For + and \*, we can implement a compiler-side optimization in the
future that transforms K\*R to R\*K automatically. This could either be
done unconditionally at -O2, or conditionally based on the type of the
value (driven by type annotations / inference) -- this technically
changes behavior in presence of metamethods, although it might be
sensible to just always do this because non-commutative +/* are evil.
However, for - and / it is impossible for the compiler to optimize this
in the future, so we need dedicated opcodes. This only increases the
interpreter size by ~300 bytes (~1.5%) on X64.
This makes spectral-norm and math-partial-sums 6% faster; maybe more
importantly, voxelgen gets 1.5% faster (so this change does have
real-world impact).
To avoid the proliferation of bytecode versions this change piggybacks
on the bytecode version bump that was just made in 604 for vector
constants; we would still be able to enable these independently but
we'll consider v5 complete when both are enabled.
Related: #626
---------
Co-authored-by: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com>
2023-11-28 15:35:01 +00:00
|
|
|
|
|
|
|
if (inst.b.kind == IrOpKind::VmConst)
|
|
|
|
emitAddOffset(build, x2, rConstants, vmConstOp(inst.b) * sizeof(TValue));
|
|
|
|
else
|
|
|
|
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
|
2023-03-31 19:42:49 +01:00
|
|
|
|
|
|
|
if (inst.c.kind == IrOpKind::VmConst)
|
2023-04-14 19:06:22 +01:00
|
|
|
emitAddOffset(build, x3, rConstants, vmConstOp(inst.c) * sizeof(TValue));
|
2023-03-31 19:42:49 +01:00
|
|
|
else
|
|
|
|
build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
|
|
|
|
|
2024-06-29 01:34:49 +01:00
|
|
|
switch (TMS(intOp(inst.d)))
|
2024-05-26 18:09:09 +01:00
|
|
|
{
|
2024-06-29 01:34:49 +01:00
|
|
|
case TM_ADD:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithadd)));
|
|
|
|
break;
|
|
|
|
case TM_SUB:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithsub)));
|
|
|
|
break;
|
|
|
|
case TM_MUL:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmul)));
|
|
|
|
break;
|
|
|
|
case TM_DIV:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithdiv)));
|
|
|
|
break;
|
|
|
|
case TM_IDIV:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithidiv)));
|
|
|
|
break;
|
|
|
|
case TM_MOD:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmod)));
|
|
|
|
break;
|
|
|
|
case TM_POW:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithpow)));
|
|
|
|
break;
|
|
|
|
case TM_UNM:
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithunm)));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
CODEGEN_ASSERT(!"Invalid doarith helper operation tag");
|
|
|
|
break;
|
2024-05-26 18:09:09 +01:00
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
|
2024-06-29 01:34:49 +01:00
|
|
|
build.blr(x4);
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
emitUpdateBase(build);
|
|
|
|
break;
|
|
|
|
case IrCmd::DO_LEN:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_dolen)));
|
|
|
|
build.blr(x3);
|
|
|
|
|
|
|
|
emitUpdateBase(build);
|
|
|
|
break;
|
|
|
|
case IrCmd::GET_TABLE:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
|
|
|
|
|
|
|
|
if (inst.c.kind == IrOpKind::VmReg)
|
|
|
|
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
|
|
|
|
else if (inst.c.kind == IrOpKind::Constant)
|
|
|
|
{
|
2023-05-25 22:36:34 +01:00
|
|
|
TValue n = {};
|
2023-04-07 22:01:29 +01:00
|
|
|
setnvalue(&n, uintOp(inst.c));
|
|
|
|
build.adr(x2, &n, sizeof(n));
|
|
|
|
}
|
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_gettable)));
|
|
|
|
build.blr(x4);
|
|
|
|
|
|
|
|
emitUpdateBase(build);
|
|
|
|
break;
|
|
|
|
case IrCmd::SET_TABLE:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
|
|
|
|
|
|
|
|
if (inst.c.kind == IrOpKind::VmReg)
|
|
|
|
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
|
|
|
|
else if (inst.c.kind == IrOpKind::Constant)
|
|
|
|
{
|
2023-05-25 22:36:34 +01:00
|
|
|
TValue n = {};
|
2023-04-07 22:01:29 +01:00
|
|
|
setnvalue(&n, uintOp(inst.c));
|
|
|
|
build.adr(x2, &n, sizeof(n));
|
|
|
|
}
|
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_settable)));
|
|
|
|
build.blr(x4);
|
|
|
|
|
2023-03-31 19:42:49 +01:00
|
|
|
emitUpdateBase(build);
|
|
|
|
break;
|
|
|
|
case IrCmd::GET_IMPORT:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-25 22:36:34 +01:00
|
|
|
// luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false)
|
2023-04-14 19:06:22 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.ldr(x1, mem(rClosure, offsetof(Closure, env)));
|
|
|
|
build.mov(x2, rConstants);
|
2023-05-25 22:36:34 +01:00
|
|
|
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.mov(w4, uintOp(inst.b));
|
|
|
|
build.mov(w5, 0);
|
|
|
|
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luaV_getimport)));
|
|
|
|
build.blr(x6);
|
2023-04-14 19:06:22 +01:00
|
|
|
|
|
|
|
emitUpdateBase(build);
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::CONCAT:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.mov(x0, rState);
|
2023-08-18 19:15:41 +01:00
|
|
|
build.mov(w1, uintOp(inst.b));
|
|
|
|
build.mov(w2, vmRegOp(inst.a) + uintOp(inst.b) - 1);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_concat)));
|
|
|
|
build.blr(x3);
|
|
|
|
|
|
|
|
emitUpdateBase(build);
|
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::GET_UPVALUE:
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::q);
|
|
|
|
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
|
|
|
|
|
|
|
|
build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b)));
|
|
|
|
|
|
|
|
// uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
|
|
|
|
Label skip;
|
|
|
|
build.ldr(temp3, mem(temp1, offsetof(TValue, tt)));
|
|
|
|
build.cmp(temp3, LUA_TUPVAL);
|
|
|
|
build.b(ConditionA64::NotEqual, skip);
|
|
|
|
|
|
|
|
// UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally)
|
|
|
|
build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc)));
|
|
|
|
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
|
|
|
|
|
|
|
|
build.setLabel(skip);
|
|
|
|
|
|
|
|
build.ldr(temp2, temp1);
|
|
|
|
build.str(temp2, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::SET_UPVALUE:
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp3 = regs.allocTemp(KindA64::q);
|
|
|
|
|
|
|
|
// UpVal*
|
|
|
|
build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)));
|
|
|
|
|
|
|
|
build.ldr(temp2, mem(temp1, offsetof(UpVal, v)));
|
|
|
|
build.ldr(temp3, mem(rBase, vmRegOp(inst.b) * sizeof(TValue)));
|
|
|
|
build.str(temp3, temp2);
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
if (inst.c.kind == IrOpKind::Undef || isGCO(tagOp(inst.c)))
|
|
|
|
{
|
|
|
|
Label skip;
|
2023-09-01 18:58:27 +01:00
|
|
|
checkObjectBarrierConditions(build, temp1, temp2, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
size_t spills = regs.spill(build, index, {temp1});
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
build.mov(x1, temp1);
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
|
|
|
|
build.blr(x3);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
|
|
|
|
build.setLabel(skip);
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::CHECK_TAG:
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2023-07-14 19:08:53 +01:00
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
2023-08-11 15:42:37 +01:00
|
|
|
Label& fail = getTargetLabel(inst.c, fresh);
|
2023-07-28 16:13:53 +01:00
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
if (tagOp(inst.b) == 0)
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
build.cbnz(regOp(inst.a), fail);
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
build.cmp(regOp(inst.a), tagOp(inst.b));
|
|
|
|
build.b(ConditionA64::NotEqual, fail);
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-08-11 15:42:37 +01:00
|
|
|
|
|
|
|
finalizeTargetLabel(inst.c, fresh);
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-08-04 20:18:54 +01:00
|
|
|
case IrCmd::CHECK_TRUTHY:
|
|
|
|
{
|
|
|
|
// Constant tags which don't require boolean value check should've been removed in constant folding
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.a.kind != IrOpKind::Constant || tagOp(inst.a) == LUA_TBOOLEAN);
|
2023-08-04 20:18:54 +01:00
|
|
|
|
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
|
|
|
Label& target = getTargetLabel(inst.c, fresh);
|
|
|
|
|
|
|
|
Label skip;
|
|
|
|
|
|
|
|
if (inst.a.kind != IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
// fail to fallback on 'nil' (falsy)
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TNIL == 0);
|
2023-08-04 20:18:54 +01:00
|
|
|
build.cbz(regOp(inst.a), target);
|
|
|
|
|
|
|
|
// skip value test if it's not a boolean (truthy)
|
|
|
|
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
|
|
|
|
build.b(ConditionA64::NotEqual, skip);
|
|
|
|
}
|
|
|
|
|
|
|
|
// fail to fallback on 'false' boolean value (falsy)
|
2024-04-25 23:26:09 +01:00
|
|
|
if (inst.b.kind != IrOpKind::Constant)
|
2024-03-30 23:14:44 +00:00
|
|
|
{
|
|
|
|
build.cbz(regOp(inst.b), target);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (intOp(inst.b) == 0)
|
|
|
|
build.b(target);
|
|
|
|
}
|
2023-08-04 20:18:54 +01:00
|
|
|
|
|
|
|
if (inst.a.kind != IrOpKind::Constant)
|
|
|
|
build.setLabel(skip);
|
|
|
|
|
|
|
|
finalizeTargetLabel(inst.c, fresh);
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::CHECK_READONLY:
|
|
|
|
{
|
2023-07-14 19:08:53 +01:00
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
2023-03-31 19:42:49 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.ldrb(temp, mem(regOp(inst.a), offsetof(Table, readonly)));
|
2023-07-14 19:08:53 +01:00
|
|
|
build.cbnz(temp, getTargetLabel(inst.b, fresh));
|
|
|
|
finalizeTargetLabel(inst.b, fresh);
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::CHECK_NO_METATABLE:
|
|
|
|
{
|
2023-07-14 19:08:53 +01:00
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
2023-03-31 19:42:49 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
|
|
|
build.ldr(temp, mem(regOp(inst.a), offsetof(Table, metatable)));
|
2023-07-14 19:08:53 +01:00
|
|
|
build.cbnz(temp, getTargetLabel(inst.b, fresh));
|
|
|
|
finalizeTargetLabel(inst.b, fresh);
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::CHECK_SAFE_ENV:
|
|
|
|
{
|
2023-07-14 19:08:53 +01:00
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
2023-03-31 19:42:49 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
2023-04-07 22:01:29 +01:00
|
|
|
RegisterA64 tempw = castReg(KindA64::w, temp);
|
2023-03-31 19:42:49 +01:00
|
|
|
build.ldr(temp, mem(rClosure, offsetof(Closure, env)));
|
|
|
|
build.ldrb(tempw, mem(temp, offsetof(Table, safeenv)));
|
2023-07-14 19:08:53 +01:00
|
|
|
build.cbz(tempw, getTargetLabel(inst.a, fresh));
|
|
|
|
finalizeTargetLabel(inst.a, fresh);
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::CHECK_ARRAY_SIZE:
|
|
|
|
{
|
2023-07-14 19:08:53 +01:00
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
|
|
|
Label& fail = getTargetLabel(inst.c, fresh);
|
2023-05-25 22:36:34 +01:00
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.ldr(temp, mem(regOp(inst.a), offsetof(Table, sizearray)));
|
|
|
|
|
|
|
|
if (inst.b.kind == IrOpKind::Inst)
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2023-04-07 22:01:29 +01:00
|
|
|
build.cmp(temp, regOp(inst.b));
|
2023-05-25 22:36:34 +01:00
|
|
|
build.b(ConditionA64::UnsignedLessEqual, fail);
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
else if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
2023-05-25 22:36:34 +01:00
|
|
|
if (intOp(inst.b) == 0)
|
|
|
|
{
|
|
|
|
build.cbz(temp, fail);
|
|
|
|
}
|
|
|
|
else if (size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
|
2023-04-14 19:06:22 +01:00
|
|
|
{
|
|
|
|
build.cmp(temp, uint16_t(intOp(inst.b)));
|
2023-05-25 22:36:34 +01:00
|
|
|
build.b(ConditionA64::UnsignedLessEqual, fail);
|
2023-04-14 19:06:22 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
|
|
|
|
build.mov(temp2, intOp(inst.b));
|
|
|
|
build.cmp(temp, temp2);
|
2023-05-25 22:36:34 +01:00
|
|
|
build.b(ConditionA64::UnsignedLessEqual, fail);
|
2023-04-14 19:06:22 +01:00
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
}
|
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-07-14 19:08:53 +01:00
|
|
|
finalizeTargetLabel(inst.c, fresh);
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-05-19 20:37:30 +01:00
|
|
|
case IrCmd::JUMP_SLOT_MATCH:
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::CHECK_SLOT_MATCH:
|
|
|
|
{
|
2023-05-25 22:36:34 +01:00
|
|
|
Label abort; // used when guard aborts execution
|
|
|
|
const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? inst.d : inst.c;
|
|
|
|
Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp);
|
2023-05-19 20:37:30 +01:00
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp1w = castReg(KindA64::w, temp1);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
|
|
|
|
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(offsetof(LuaNode, key.value) == offsetof(LuaNode, key) && kOffsetOfTKeyTagNext >= 8 && kOffsetOfTKeyTagNext < 16);
|
2023-05-19 20:37:30 +01:00
|
|
|
build.ldp(temp1, temp2, mem(regOp(inst.a), offsetof(LuaNode, key))); // load key.value into temp1 and key.tt (alongside other bits) into temp2
|
|
|
|
build.ubfx(temp2, temp2, (kOffsetOfTKeyTagNext - 8) * 8, kTKeyTagBits); // .tt is right before .next, and 8 bytes are skipped by ldp
|
|
|
|
build.cmp(temp2, LUA_TSTRING);
|
|
|
|
build.b(ConditionA64::NotEqual, mismatch);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
|
|
|
|
build.ldr(temp2, addr);
|
|
|
|
build.cmp(temp1, temp2);
|
2023-05-19 20:37:30 +01:00
|
|
|
build.b(ConditionA64::NotEqual, mismatch);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TNIL == 0);
|
2023-05-19 20:37:30 +01:00
|
|
|
build.cbz(temp1w, mismatch);
|
|
|
|
|
|
|
|
if (inst.cmd == IrCmd::JUMP_SLOT_MATCH)
|
|
|
|
jumpOrFallthrough(blockOp(inst.c), next);
|
2023-05-25 22:36:34 +01:00
|
|
|
else if (abort.id)
|
|
|
|
emitAbort(build, abort);
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
case IrCmd::CHECK_NODE_NO_NEXT:
|
|
|
|
{
|
2023-07-14 19:08:53 +01:00
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
2023-04-14 19:06:22 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
|
2023-05-19 20:37:30 +01:00
|
|
|
build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTagNext));
|
|
|
|
build.lsr(temp, temp, kTKeyTagBits);
|
2023-07-14 19:08:53 +01:00
|
|
|
build.cbnz(temp, getTargetLabel(inst.b, fresh));
|
|
|
|
finalizeTargetLabel(inst.b, fresh);
|
2023-04-14 19:06:22 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-09-01 18:58:27 +01:00
|
|
|
case IrCmd::CHECK_NODE_VALUE:
|
|
|
|
{
|
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
|
|
|
|
build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TNIL == 0);
|
2023-09-01 18:58:27 +01:00
|
|
|
build.cbz(temp, getTargetLabel(inst.b, fresh));
|
|
|
|
finalizeTargetLabel(inst.b, fresh);
|
|
|
|
break;
|
|
|
|
}
|
2023-11-10 21:10:07 +00:00
|
|
|
case IrCmd::CHECK_BUFFER_LEN:
|
|
|
|
{
|
|
|
|
int accessSize = intOp(inst.c);
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(accessSize > 0 && accessSize <= int(AssemblyBuilderA64::kMaxImmediate));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
|
|
|
Label& target = getTargetLabel(inst.d, fresh);
|
|
|
|
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.ldr(temp, mem(regOp(inst.a), offsetof(Buffer, len)));
|
|
|
|
|
|
|
|
if (inst.b.kind == IrOpKind::Inst)
|
|
|
|
{
|
|
|
|
if (accessSize == 1)
|
|
|
|
{
|
|
|
|
// fails if offset >= len
|
|
|
|
build.cmp(temp, regOp(inst.b));
|
|
|
|
build.b(ConditionA64::UnsignedLessEqual, target);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-02-02 21:32:42 +00:00
|
|
|
// fails if offset + size > len; we compute it as len - offset < size
|
2023-11-10 21:10:07 +00:00
|
|
|
RegisterA64 tempx = castReg(KindA64::x, temp);
|
|
|
|
build.sub(tempx, tempx, regOp(inst.b)); // implicit uxtw
|
|
|
|
build.cmp(tempx, uint16_t(accessSize));
|
2024-03-01 18:45:26 +00:00
|
|
|
build.b(ConditionA64::Less, target); // note: this is a signed 64-bit comparison so that out of bounds offset fails
|
2023-11-10 21:10:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
int offset = intOp(inst.b);
|
|
|
|
|
|
|
|
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
|
|
|
|
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
|
|
|
|
{
|
|
|
|
build.b(target);
|
|
|
|
}
|
|
|
|
else if (offset + accessSize <= int(AssemblyBuilderA64::kMaxImmediate))
|
|
|
|
{
|
|
|
|
build.cmp(temp, uint16_t(offset + accessSize));
|
|
|
|
build.b(ConditionA64::UnsignedLessEqual, target);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
|
|
|
|
build.mov(temp2, offset + accessSize);
|
|
|
|
build.cmp(temp, temp2);
|
|
|
|
build.b(ConditionA64::UnsignedLessEqual, target);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-11-10 21:10:07 +00:00
|
|
|
}
|
|
|
|
finalizeTargetLabel(inst.d, fresh);
|
|
|
|
break;
|
|
|
|
}
|
2024-06-07 18:51:12 +01:00
|
|
|
case IrCmd::CHECK_USERDATA_TAG:
|
|
|
|
{
|
|
|
|
Label fresh; // used when guard aborts execution or jumps to a VM exit
|
|
|
|
Label& fail = getTargetLabel(inst.c, fresh);
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.ldrb(temp, mem(regOp(inst.a), offsetof(Udata, tag)));
|
2024-07-08 22:57:06 +01:00
|
|
|
build.cmp(temp, intOp(inst.b));
|
2024-06-07 18:51:12 +01:00
|
|
|
build.b(ConditionA64::NotEqual, fail);
|
|
|
|
finalizeTargetLabel(inst.c, fresh);
|
|
|
|
break;
|
|
|
|
}
|
2023-03-24 18:03:04 +00:00
|
|
|
case IrCmd::INTERRUPT:
|
|
|
|
{
|
2023-06-16 18:35:18 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
Label self;
|
2023-03-31 19:42:49 +01:00
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
build.ldr(x0, mem(rGlobalState, offsetof(global_State, cb.interrupt)));
|
2023-06-16 18:35:18 +01:00
|
|
|
build.cbnz(x0, self);
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
Label next = build.setLabel();
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
interruptHandlers.push_back({self, uintOp(inst.a), next});
|
2023-03-24 18:03:04 +00:00
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::CHECK_GC:
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
|
|
|
|
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(offsetof(global_State, totalbytes) == offsetof(global_State, GCthreshold) + 8);
|
2023-04-07 22:01:29 +01:00
|
|
|
Label skip;
|
2023-08-18 19:15:41 +01:00
|
|
|
build.ldp(temp1, temp2, mem(rGlobalState, offsetof(global_State, GCthreshold)));
|
2023-04-07 22:01:29 +01:00
|
|
|
build.cmp(temp1, temp2);
|
|
|
|
build.b(ConditionA64::UnsignedGreater, skip);
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
size_t spills = regs.spill(build, index);
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.mov(w1, 1);
|
2023-08-18 19:15:41 +01:00
|
|
|
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaC_step)));
|
|
|
|
build.blr(x2);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
emitUpdateBase(build);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
build.setLabel(skip);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BARRIER_OBJ:
|
|
|
|
{
|
2023-04-14 19:06:22 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
Label skip;
|
2023-09-01 18:58:27 +01:00
|
|
|
checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
|
|
|
|
size_t spills = regs.spill(build, index, {reg});
|
|
|
|
build.mov(x1, reg);
|
2023-04-21 23:14:26 +01:00
|
|
|
build.mov(x0, rState);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
|
|
|
|
build.blr(x3);
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
|
|
|
|
build.setLabel(skip);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BARRIER_TABLE_BACK:
|
|
|
|
{
|
|
|
|
Label skip;
|
2023-04-14 19:06:22 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
// isblack(obj2gco(t))
|
2023-04-14 19:06:22 +01:00
|
|
|
build.ldrb(temp, mem(regOp(inst.a), offsetof(GCheader, marked)));
|
2023-04-21 23:14:26 +01:00
|
|
|
build.tbz(temp, BLACKBIT, skip);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
|
|
|
|
size_t spills = regs.spill(build, index, {reg});
|
|
|
|
build.mov(x1, reg);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.mov(x0, rState);
|
2023-04-21 23:14:26 +01:00
|
|
|
build.add(x2, x1, uint16_t(offsetof(Table, gclist)));
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierback)));
|
|
|
|
build.blr(x3);
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
|
|
|
|
build.setLabel(skip);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BARRIER_TABLE_FORWARD:
|
|
|
|
{
|
2023-04-14 19:06:22 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
Label skip;
|
2023-09-01 18:58:27 +01:00
|
|
|
checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
|
2023-09-01 18:58:27 +01:00
|
|
|
AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
|
2023-04-28 20:55:13 +01:00
|
|
|
size_t spills = regs.spill(build, index, {reg});
|
|
|
|
build.mov(x1, reg);
|
2023-04-21 23:14:26 +01:00
|
|
|
build.mov(x0, rState);
|
2023-09-01 18:58:27 +01:00
|
|
|
build.ldr(x2, addr);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barriertable)));
|
|
|
|
build.blr(x3);
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
|
|
|
|
build.setLabel(skip);
|
|
|
|
break;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::SET_SAVEDPC:
|
2023-03-24 18:03:04 +00:00
|
|
|
{
|
2023-03-31 19:42:49 +01:00
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
|
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
emitAddOffset(build, temp1, rCode, uintOp(inst.a) * sizeof(Instruction));
|
2023-03-31 19:42:49 +01:00
|
|
|
build.ldr(temp2, mem(rState, offsetof(lua_State, ci)));
|
|
|
|
build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc)));
|
2023-03-24 18:03:04 +00:00
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::CLOSE_UPVALS:
|
|
|
|
{
|
|
|
|
Label skip;
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
|
|
|
|
|
|
|
|
// L->openupval != 0
|
|
|
|
build.ldr(temp1, mem(rState, offsetof(lua_State, openupval)));
|
|
|
|
build.cbz(temp1, skip);
|
|
|
|
|
|
|
|
// ra <= L->openuval->v
|
|
|
|
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
|
|
|
|
build.add(temp2, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.cmp(temp2, temp1);
|
|
|
|
build.b(ConditionA64::UnsignedGreater, skip);
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
size_t spills = regs.spill(build, index, {temp2});
|
|
|
|
build.mov(x1, temp2);
|
2023-04-21 23:14:26 +01:00
|
|
|
build.mov(x0, rState);
|
2023-04-07 22:01:29 +01:00
|
|
|
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_close)));
|
|
|
|
build.blr(x2);
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
build.setLabel(skip);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::CAPTURE:
|
|
|
|
// no-op
|
|
|
|
break;
|
2023-04-14 19:06:22 +01:00
|
|
|
case IrCmd::SETLIST:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeSETLIST), uintOp(inst.a));
|
2023-04-14 19:06:22 +01:00
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::CALL:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-14 19:06:22 +01:00
|
|
|
// argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams;
|
|
|
|
if (intOp(inst.b) == LUA_MULTRET)
|
|
|
|
build.ldr(x2, mem(rState, offsetof(lua_State, top)));
|
|
|
|
else
|
|
|
|
build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + 1 + intOp(inst.b)) * sizeof(TValue)));
|
|
|
|
|
|
|
|
// callFallback(L, ra, argtop, nresults)
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.mov(w3, intOp(inst.c));
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback)));
|
|
|
|
build.blr(x4);
|
|
|
|
|
2023-10-06 20:02:32 +01:00
|
|
|
emitUpdateBase(build);
|
|
|
|
|
|
|
|
// reentry with x0=closure (NULL implies C function; CALL_FALLBACK_YIELD will trigger exit)
|
|
|
|
build.cbnz(x0, helpers.continueCall);
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::RETURN:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-06-09 18:08:00 +01:00
|
|
|
|
|
|
|
if (function.variadic)
|
|
|
|
{
|
|
|
|
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
|
|
|
|
build.ldr(x1, mem(x1, offsetof(CallInfo, func)));
|
|
|
|
}
|
|
|
|
else if (intOp(inst.b) != 1)
|
|
|
|
build.sub(x1, rBase, sizeof(TValue)); // invariant: ci->func + 1 == ci->base for non-variadic frames
|
|
|
|
|
|
|
|
if (intOp(inst.b) == 0)
|
|
|
|
{
|
|
|
|
build.mov(w2, 0);
|
|
|
|
build.b(helpers.return_);
|
|
|
|
}
|
|
|
|
else if (intOp(inst.b) == 1 && !function.variadic)
|
|
|
|
{
|
|
|
|
// fast path: minimizes x1 adjustments
|
|
|
|
// note that we skipped x1 computation for this specific case above
|
|
|
|
build.ldr(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.str(q0, mem(rBase, -int(sizeof(TValue))));
|
|
|
|
build.mov(x1, rBase);
|
|
|
|
build.mov(w2, 1);
|
|
|
|
build.b(helpers.return_);
|
|
|
|
}
|
|
|
|
else if (intOp(inst.b) >= 1 && intOp(inst.b) <= 3)
|
|
|
|
{
|
|
|
|
for (int r = 0; r < intOp(inst.b); ++r)
|
|
|
|
{
|
|
|
|
build.ldr(q0, mem(rBase, (vmRegOp(inst.a) + r) * sizeof(TValue)));
|
|
|
|
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
|
|
|
|
}
|
|
|
|
build.mov(w2, intOp(inst.b));
|
|
|
|
build.b(helpers.return_);
|
|
|
|
}
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
2023-06-09 18:08:00 +01:00
|
|
|
{
|
|
|
|
build.mov(w2, 0);
|
2023-04-14 19:06:22 +01:00
|
|
|
|
2023-06-09 18:08:00 +01:00
|
|
|
// vali = ra
|
|
|
|
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
|
|
|
|
// valend = (n == LUA_MULTRET) ? L->top : ra + n
|
|
|
|
if (intOp(inst.b) == LUA_MULTRET)
|
|
|
|
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
|
|
|
|
else
|
|
|
|
build.add(x4, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
|
|
|
|
|
|
|
|
Label repeatValueLoop, exitValueLoop;
|
|
|
|
|
|
|
|
if (intOp(inst.b) == LUA_MULTRET)
|
|
|
|
{
|
|
|
|
build.cmp(x3, x4);
|
|
|
|
build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual
|
|
|
|
}
|
|
|
|
|
|
|
|
build.setLabel(repeatValueLoop);
|
|
|
|
build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post));
|
|
|
|
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
|
|
|
|
build.add(w2, w2, 1);
|
|
|
|
build.cmp(x3, x4);
|
|
|
|
build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess
|
|
|
|
|
|
|
|
build.setLabel(exitValueLoop);
|
|
|
|
build.b(helpers.return_);
|
|
|
|
}
|
2023-04-14 19:06:22 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::FORGLOOP:
|
|
|
|
// register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-14 19:06:22 +01:00
|
|
|
// clear extra variables since we might have more than two
|
|
|
|
if (intOp(inst.b) > 2)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(LUA_TNIL == 0);
|
2023-04-14 19:06:22 +01:00
|
|
|
for (int i = 2; i < intOp(inst.b); ++i)
|
2023-08-18 19:15:41 +01:00
|
|
|
build.str(wzr, mem(rBase, (vmRegOp(inst.a) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt)));
|
2023-04-14 19:06:22 +01:00
|
|
|
}
|
|
|
|
// we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.ldr(x1, mem(rBase, (vmRegOp(inst.a) + 1) * sizeof(TValue) + offsetof(TValue, value.gc)));
|
|
|
|
build.ldr(w2, mem(rBase, (vmRegOp(inst.a) + 2) * sizeof(TValue) + offsetof(TValue, value.p)));
|
|
|
|
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter)));
|
|
|
|
build.blr(x4);
|
|
|
|
// note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack
|
|
|
|
build.cbnz(w0, labelOp(inst.c));
|
|
|
|
jumpOrFallthrough(blockOp(inst.d), next);
|
|
|
|
break;
|
|
|
|
case IrCmd::FORGLOOP_FALLBACK:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-14 19:06:22 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.mov(w1, vmRegOp(inst.a));
|
|
|
|
build.mov(w2, intOp(inst.b));
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback)));
|
|
|
|
build.blr(x3);
|
|
|
|
emitUpdateBase(build);
|
|
|
|
build.cbnz(w0, labelOp(inst.c));
|
|
|
|
jumpOrFallthrough(blockOp(inst.d), next);
|
|
|
|
break;
|
|
|
|
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-04-14 19:06:22 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
|
|
|
|
build.mov(w2, uintOp(inst.a) + 1);
|
|
|
|
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback)));
|
|
|
|
build.blr(x3);
|
|
|
|
// note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack
|
|
|
|
jumpOrFallthrough(blockOp(inst.c), next);
|
2023-03-31 19:42:49 +01:00
|
|
|
break;
|
2023-04-14 19:06:22 +01:00
|
|
|
case IrCmd::COVERAGE:
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
|
|
|
|
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
|
|
|
|
|
|
|
|
build.mov(temp1, uintOp(inst.a) * sizeof(Instruction));
|
|
|
|
build.ldr(temp2, mem(rCode, temp1));
|
|
|
|
|
|
|
|
// increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1
|
|
|
|
// note: cmp can be eliminated with adds but we aren't concerned with code size for coverage
|
|
|
|
build.add(temp3, temp2, 256);
|
|
|
|
build.cmp(temp3, 0);
|
|
|
|
build.csel(temp2, temp2, temp3, ConditionA64::Less);
|
|
|
|
|
|
|
|
build.str(temp2, mem(rCode, temp1));
|
|
|
|
break;
|
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
|
|
|
|
// Full instruction fallbacks
|
|
|
|
case IrCmd::FALLBACK_GETGLOBAL:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeGETGLOBAL), uintOp(inst.a));
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::FALLBACK_SETGLOBAL:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeSETGLOBAL), uintOp(inst.a));
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::FALLBACK_GETTABLEKS:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeGETTABLEKS), uintOp(inst.a));
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::FALLBACK_SETTABLEKS:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeSETTABLEKS), uintOp(inst.a));
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::FALLBACK_NAMECALL:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeNAMECALL), uintOp(inst.a));
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::FALLBACK_PREPVARARGS:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::Constant);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executePREPVARARGS), uintOp(inst.a));
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::FALLBACK_GETVARARGS:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.c.kind == IrOpKind::Constant);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-08-04 20:18:54 +01:00
|
|
|
build.mov(x0, rState);
|
|
|
|
|
|
|
|
if (intOp(inst.c) == LUA_MULTRET)
|
|
|
|
{
|
|
|
|
emitAddOffset(build, x1, rCode, uintOp(inst.a) * sizeof(Instruction));
|
|
|
|
build.mov(x2, rBase);
|
2023-08-18 19:15:41 +01:00
|
|
|
build.mov(w3, vmRegOp(inst.b));
|
2023-08-04 20:18:54 +01:00
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSMultRet)));
|
|
|
|
build.blr(x4);
|
|
|
|
|
|
|
|
emitUpdateBase(build);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
build.mov(x1, rBase);
|
2023-08-18 19:15:41 +01:00
|
|
|
build.mov(w2, vmRegOp(inst.b));
|
|
|
|
build.mov(w3, intOp(inst.c));
|
2023-08-04 20:18:54 +01:00
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSConst)));
|
|
|
|
build.blr(x4);
|
2023-08-18 19:15:41 +01:00
|
|
|
|
|
|
|
// note: no emitUpdateBase necessary because executeGETVARARGSConst does not reallocate stack
|
2023-08-04 20:18:54 +01:00
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
2023-07-28 16:13:53 +01:00
|
|
|
case IrCmd::NEWCLOSURE:
|
|
|
|
{
|
|
|
|
RegisterA64 reg = regOp(inst.b); // note: we need to call regOp before spill so that we don't do redundant reloads
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-07-28 16:13:53 +01:00
|
|
|
regs.spill(build, index, {reg});
|
|
|
|
build.mov(x2, reg);
|
|
|
|
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.mov(w1, uintOp(inst.a));
|
|
|
|
|
|
|
|
build.ldr(x3, mem(rClosure, offsetof(Closure, l.p)));
|
|
|
|
build.ldr(x3, mem(x3, offsetof(Proto, p)));
|
|
|
|
build.ldr(x3, mem(x3, sizeof(Proto*) * uintOp(inst.c)));
|
|
|
|
|
|
|
|
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaF_newLclosure)));
|
|
|
|
build.blr(x4);
|
|
|
|
|
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
2023-07-28 16:13:53 +01:00
|
|
|
}
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::FALLBACK_DUPCLOSURE:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(inst.a));
|
2023-04-07 22:01:29 +01:00
|
|
|
break;
|
2023-04-14 19:06:22 +01:00
|
|
|
case IrCmd::FALLBACK_FORGPREP:
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.spill(build, index);
|
2023-05-19 20:37:30 +01:00
|
|
|
emitFallback(build, offsetof(NativeContext, executeFORGPREP), uintOp(inst.a));
|
2023-04-14 19:06:22 +01:00
|
|
|
jumpOrFallthrough(blockOp(inst.c), next);
|
|
|
|
break;
|
2023-04-07 22:01:29 +01:00
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
// Pseudo instructions
|
|
|
|
case IrCmd::NOP:
|
|
|
|
case IrCmd::SUBSTITUTE:
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Pseudo instructions should not be lowered");
|
2023-04-14 19:06:22 +01:00
|
|
|
break;
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::BITAND_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
|
2023-04-28 20:55:13 +01:00
|
|
|
build.and_(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.and_(inst.regA64, temp1, temp2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITXOR_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
|
2023-04-28 20:55:13 +01:00
|
|
|
build.eor(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.eor(inst.regA64, temp1, temp2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITOR_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
|
2023-04-28 20:55:13 +01:00
|
|
|
build.orr(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.orr(inst.regA64, temp1, temp2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITNOT_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempUint(inst.a);
|
2023-05-12 18:50:47 +01:00
|
|
|
build.mvn_(inst.regA64, temp);
|
2023-04-21 23:14:26 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITLSHIFT_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
|
2023-04-28 20:55:13 +01:00
|
|
|
build.lsl(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.lsl(inst.regA64, temp1, temp2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITRSHIFT_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
|
2023-04-28 20:55:13 +01:00
|
|
|
build.lsr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.lsr(inst.regA64, temp1, temp2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITARSHIFT_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
|
2023-04-28 20:55:13 +01:00
|
|
|
build.asr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.asr(inst.regA64, temp1, temp2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITLROTATE_UINT:
|
|
|
|
{
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
|
2023-04-21 23:14:26 +01:00
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
|
2023-04-28 20:55:13 +01:00
|
|
|
build.ror(inst.regA64, regOp(inst.a), uint8_t((32 - unsigned(intOp(inst.b))) & 31));
|
2023-04-21 23:14:26 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); // can't reuse a because it would be clobbered by neg
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.neg(inst.regA64, temp2);
|
|
|
|
build.ror(inst.regA64, temp1, inst.regA64);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITRROTATE_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
|
2023-07-07 21:10:48 +01:00
|
|
|
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
|
2023-04-28 20:55:13 +01:00
|
|
|
build.ror(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
|
2023-04-21 23:14:26 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempUint(inst.a);
|
|
|
|
RegisterA64 temp2 = tempUint(inst.b);
|
|
|
|
build.ror(inst.regA64, temp1, temp2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITCOUNTLZ_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempUint(inst.a);
|
|
|
|
build.clz(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::BITCOUNTRZ_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempUint(inst.a);
|
|
|
|
build.rbit(inst.regA64, temp);
|
|
|
|
build.clz(inst.regA64, inst.regA64);
|
|
|
|
break;
|
|
|
|
}
|
2023-11-03 23:45:04 +00:00
|
|
|
case IrCmd::BYTESWAP_UINT:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
|
|
|
|
RegisterA64 temp = tempUint(inst.a);
|
|
|
|
build.rev(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::INVOKE_LIBM:
|
|
|
|
{
|
|
|
|
if (inst.c.kind != IrOpKind::None)
|
|
|
|
{
|
2023-05-05 22:52:49 +01:00
|
|
|
bool isInt = (inst.c.kind == IrOpKind::Constant) ? constOp(inst.c).kind == IrConstKind::Int
|
|
|
|
: getCmdValueKind(function.instOp(inst.c).cmd) == IrValueKind::Int;
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
RegisterA64 temp1 = tempDouble(inst.b);
|
2023-05-05 22:52:49 +01:00
|
|
|
RegisterA64 temp2 = isInt ? tempInt(inst.c) : tempDouble(inst.c);
|
|
|
|
RegisterA64 temp3 = isInt ? noreg : regs.allocTemp(KindA64::d); // note: spill() frees all registers so we need to avoid alloc after spill
|
2023-04-28 20:55:13 +01:00
|
|
|
regs.spill(build, index, {temp1, temp2});
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
if (isInt)
|
|
|
|
{
|
|
|
|
build.fmov(d0, temp1);
|
|
|
|
build.mov(w0, temp2);
|
|
|
|
}
|
|
|
|
else if (d0 != temp2)
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
|
|
|
build.fmov(d0, temp1);
|
|
|
|
build.fmov(d1, temp2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
build.fmov(temp3, d0);
|
|
|
|
build.fmov(d0, temp1);
|
|
|
|
build.fmov(d1, temp3);
|
|
|
|
}
|
2023-04-21 23:14:26 +01:00
|
|
|
}
|
|
|
|
else
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.b);
|
|
|
|
regs.spill(build, index, {temp1});
|
|
|
|
build.fmov(d0, temp1);
|
|
|
|
}
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
build.ldr(x1, mem(rNativeContext, getNativeContextOffset(uintOp(inst.a))));
|
|
|
|
build.blr(x1);
|
2023-04-28 20:55:13 +01:00
|
|
|
inst.regA64 = regs.takeReg(d0, index);
|
2023-04-21 23:14:26 +01:00
|
|
|
break;
|
2023-06-24 07:19:39 +01:00
|
|
|
}
|
|
|
|
case IrCmd::GET_TYPE:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::x, index);
|
|
|
|
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(sizeof(TString*) == 8);
|
2023-06-24 07:19:39 +01:00
|
|
|
|
|
|
|
if (inst.a.kind == IrOpKind::Inst)
|
2023-11-10 21:10:07 +00:00
|
|
|
build.add(inst.regA64, rGlobalState, regOp(inst.a), 3); // implicit uxtw
|
2023-06-24 07:19:39 +01:00
|
|
|
else if (inst.a.kind == IrOpKind::Constant)
|
2023-08-18 19:15:41 +01:00
|
|
|
build.add(inst.regA64, rGlobalState, uint16_t(tagOp(inst.a)) * 8);
|
2023-06-24 07:19:39 +01:00
|
|
|
else
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-06-24 07:19:39 +01:00
|
|
|
|
|
|
|
build.ldr(inst.regA64, mem(inst.regA64, offsetof(global_State, ttname)));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case IrCmd::GET_TYPEOF:
|
|
|
|
{
|
|
|
|
regs.spill(build, index);
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr)));
|
|
|
|
build.blr(x2);
|
2023-07-28 16:13:53 +01:00
|
|
|
|
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::FINDUPVAL:
|
|
|
|
{
|
|
|
|
regs.spill(build, index);
|
|
|
|
build.mov(x0, rState);
|
|
|
|
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
|
|
|
|
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_findupval)));
|
|
|
|
build.blr(x2);
|
2023-06-24 07:19:39 +01:00
|
|
|
|
|
|
|
inst.regA64 = regs.takeReg(x0, index);
|
|
|
|
break;
|
2023-04-21 23:14:26 +01:00
|
|
|
}
|
|
|
|
|
2023-11-10 21:10:07 +00:00
|
|
|
case IrCmd::BUFFER_READI8:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.ldrsb(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_READU8:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.ldrb(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_WRITEI8:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempInt(inst.c);
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.strb(temp, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_READI16:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.ldrsh(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_READU16:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.ldrh(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_WRITEI16:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempInt(inst.c);
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.strh(temp, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_READI32:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.ldr(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_WRITEI32:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempInt(inst.c);
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.str(temp, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_READF32:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
|
|
|
RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.ldr(temp, addr);
|
|
|
|
build.fcvt(inst.regA64, temp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_WRITEF32:
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = tempDouble(inst.c);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::s);
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.fcvt(temp2, temp1);
|
|
|
|
build.str(temp2, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_READF64:
|
|
|
|
{
|
|
|
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.ldr(inst.regA64, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case IrCmd::BUFFER_WRITEF64:
|
|
|
|
{
|
|
|
|
RegisterA64 temp = tempDouble(inst.c);
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
|
|
|
build.str(temp, addr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
// To handle unsupported instructions, add "case IrCmd::OP" and make sure to set error = true!
|
2023-03-24 18:03:04 +00:00
|
|
|
}
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
valueTracker.afterInstLowering(inst, index);
|
|
|
|
|
2023-03-31 19:42:49 +01:00
|
|
|
regs.freeLastUseRegs(inst, index);
|
|
|
|
regs.freeTempRegs();
|
2023-03-24 18:03:04 +00:00
|
|
|
}
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
void IrLoweringA64::finishBlock(const IrBlock& curr, const IrBlock& next)
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
2023-08-18 19:15:41 +01:00
|
|
|
if (!regs.spills.empty())
|
|
|
|
{
|
|
|
|
// If we have spills remaining, we have to immediately lower the successor block
|
|
|
|
for (uint32_t predIdx : predecessors(function.cfg, function.getBlockIndex(next)))
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(predIdx == function.getBlockIndex(curr));
|
2023-08-18 19:15:41 +01:00
|
|
|
|
|
|
|
// And the next block cannot be a join block in cfg
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(next.useCount == 1);
|
2023-08-18 19:15:41 +01:00
|
|
|
}
|
2023-04-28 20:55:13 +01:00
|
|
|
}
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
void IrLoweringA64::finishFunction()
|
|
|
|
{
|
|
|
|
if (build.logText)
|
|
|
|
build.logAppend("; interrupt handlers\n");
|
|
|
|
|
|
|
|
for (InterruptHandler& handler : interruptHandlers)
|
|
|
|
{
|
|
|
|
build.setLabel(handler.self);
|
|
|
|
build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction));
|
|
|
|
build.adr(x1, handler.next);
|
|
|
|
build.b(helpers.interrupt);
|
|
|
|
}
|
2023-07-07 21:10:48 +01:00
|
|
|
|
|
|
|
if (build.logText)
|
|
|
|
build.logAppend("; exit handlers\n");
|
|
|
|
|
|
|
|
for (ExitHandler& handler : exitHandlers)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(handler.pcpos != kVmExitEntryGuardPc);
|
2023-08-11 15:42:37 +01:00
|
|
|
|
2023-07-07 21:10:48 +01:00
|
|
|
build.setLabel(handler.self);
|
2023-08-11 15:42:37 +01:00
|
|
|
|
2023-07-07 21:10:48 +01:00
|
|
|
build.mov(x0, handler.pcpos * sizeof(Instruction));
|
|
|
|
build.b(helpers.updatePcAndContinueInVm);
|
|
|
|
}
|
2023-09-15 18:26:59 +01:00
|
|
|
|
|
|
|
if (stats)
|
|
|
|
{
|
|
|
|
if (error)
|
|
|
|
stats->loweringErrors++;
|
|
|
|
|
|
|
|
if (regs.error)
|
|
|
|
stats->regAllocErrors++;
|
|
|
|
}
|
2023-06-16 18:35:18 +01:00
|
|
|
}
|
|
|
|
|
2023-04-07 22:01:29 +01:00
|
|
|
bool IrLoweringA64::hasError() const
|
|
|
|
{
|
2023-07-07 21:10:48 +01:00
|
|
|
return error || regs.error;
|
2023-04-07 22:01:29 +01:00
|
|
|
}
|
|
|
|
|
2023-08-11 15:42:37 +01:00
|
|
|
bool IrLoweringA64::isFallthroughBlock(const IrBlock& target, const IrBlock& next)
|
2023-03-24 18:03:04 +00:00
|
|
|
{
|
|
|
|
return target.start == next.start;
|
|
|
|
}
|
|
|
|
|
2023-08-11 15:42:37 +01:00
|
|
|
void IrLoweringA64::jumpOrFallthrough(IrBlock& target, const IrBlock& next)
|
2023-03-24 18:03:04 +00:00
|
|
|
{
|
|
|
|
if (!isFallthroughBlock(target, next))
|
|
|
|
build.b(target.label);
|
|
|
|
}
|
|
|
|
|
2023-07-14 19:08:53 +01:00
|
|
|
Label& IrLoweringA64::getTargetLabel(IrOp op, Label& fresh)
|
|
|
|
{
|
|
|
|
if (op.kind == IrOpKind::Undef)
|
|
|
|
return fresh;
|
|
|
|
|
|
|
|
if (op.kind == IrOpKind::VmExit)
|
|
|
|
{
|
2023-08-11 15:42:37 +01:00
|
|
|
// Special exit case that doesn't have to update pcpos
|
|
|
|
if (vmExitOp(op) == kVmExitEntryGuardPc)
|
|
|
|
return helpers.exitContinueVmClearNativeFlag;
|
|
|
|
|
|
|
|
if (uint32_t* index = exitHandlerMap.find(vmExitOp(op)))
|
2023-07-14 19:08:53 +01:00
|
|
|
return exitHandlers[*index].self;
|
|
|
|
|
|
|
|
return fresh;
|
|
|
|
}
|
|
|
|
|
|
|
|
return labelOp(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IrLoweringA64::finalizeTargetLabel(IrOp op, Label& fresh)
|
|
|
|
{
|
|
|
|
if (op.kind == IrOpKind::Undef)
|
|
|
|
{
|
|
|
|
emitAbort(build, fresh);
|
|
|
|
}
|
2023-08-11 15:42:37 +01:00
|
|
|
else if (op.kind == IrOpKind::VmExit && fresh.id != 0 && fresh.id != helpers.exitContinueVmClearNativeFlag.id)
|
2023-07-14 19:08:53 +01:00
|
|
|
{
|
2023-08-11 15:42:37 +01:00
|
|
|
exitHandlerMap[vmExitOp(op)] = uint32_t(exitHandlers.size());
|
|
|
|
exitHandlers.push_back({fresh, vmExitOp(op)});
|
2023-07-14 19:08:53 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-31 19:42:49 +01:00
|
|
|
RegisterA64 IrLoweringA64::tempDouble(IrOp op)
|
|
|
|
{
|
|
|
|
if (op.kind == IrOpKind::Inst)
|
|
|
|
return regOp(op);
|
|
|
|
else if (op.kind == IrOpKind::Constant)
|
|
|
|
{
|
2023-04-21 23:14:26 +01:00
|
|
|
double val = doubleOp(op);
|
|
|
|
|
|
|
|
if (AssemblyBuilderA64::isFmovSupported(val))
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::d);
|
|
|
|
build.fmov(temp, val);
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
|
|
|
|
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
|
2023-05-12 18:50:47 +01:00
|
|
|
|
2023-11-10 21:10:07 +00:00
|
|
|
uint64_t vali = getDoubleBits(val);
|
2023-05-12 18:50:47 +01:00
|
|
|
|
|
|
|
if ((vali << 16) == 0)
|
|
|
|
{
|
|
|
|
build.movz(temp1, uint16_t(vali >> 48), 48);
|
|
|
|
build.fmov(temp2, temp1);
|
|
|
|
}
|
|
|
|
else if ((vali << 32) == 0)
|
|
|
|
{
|
|
|
|
build.movz(temp1, uint16_t(vali >> 48), 48);
|
|
|
|
build.movk(temp1, uint16_t(vali >> 32), 32);
|
|
|
|
build.fmov(temp2, temp1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
build.adr(temp1, val);
|
|
|
|
build.ldr(temp2, temp1);
|
|
|
|
}
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
return temp2;
|
|
|
|
}
|
2023-03-31 19:42:49 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-03-31 19:42:49 +01:00
|
|
|
return noreg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RegisterA64 IrLoweringA64::tempInt(IrOp op)
|
|
|
|
{
|
|
|
|
if (op.kind == IrOpKind::Inst)
|
|
|
|
return regOp(op);
|
|
|
|
else if (op.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
|
|
|
build.mov(temp, intOp(op));
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-03-31 19:42:49 +01:00
|
|
|
return noreg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
RegisterA64 IrLoweringA64::tempUint(IrOp op)
|
|
|
|
{
|
|
|
|
if (op.kind == IrOpKind::Inst)
|
|
|
|
return regOp(op);
|
|
|
|
else if (op.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::w);
|
2023-04-28 20:55:13 +01:00
|
|
|
build.mov(temp, unsigned(intOp(op)));
|
2023-04-21 23:14:26 +01:00
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-04-21 23:14:26 +01:00
|
|
|
return noreg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-31 19:42:49 +01:00
|
|
|
AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset)
|
|
|
|
{
|
|
|
|
// This is needed to tighten the bounds checks in the VmConst case below
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(offset % 4 == 0);
|
2023-10-13 21:20:12 +01:00
|
|
|
// Full encoded range is wider depending on the load size, but this assertion helps establish a smaller guaranteed working range [0..4096)
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(offset >= 0 && unsigned(offset / 4) <= AssemblyBuilderA64::kMaxImmediate);
|
2023-03-31 19:42:49 +01:00
|
|
|
|
|
|
|
if (op.kind == IrOpKind::VmReg)
|
|
|
|
return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset);
|
|
|
|
else if (op.kind == IrOpKind::VmConst)
|
|
|
|
{
|
|
|
|
size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset;
|
|
|
|
|
|
|
|
// Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries
|
|
|
|
if (constantOffset / 4 <= AddressA64::kMaxOffset)
|
|
|
|
return mem(rConstants, int(constantOffset));
|
|
|
|
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
|
|
|
|
2023-04-14 19:06:22 +01:00
|
|
|
emitAddOffset(build, temp, rConstants, constantOffset);
|
2023-03-31 19:42:49 +01:00
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
// If we have a register, we assume it's a pointer to TValue
|
|
|
|
// We might introduce explicit operand types in the future to make this more robust
|
|
|
|
else if (op.kind == IrOpKind::Inst)
|
|
|
|
return mem(regOp(op), offset);
|
|
|
|
else
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
2023-03-31 19:42:49 +01:00
|
|
|
return noreg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-07 18:51:12 +01:00
|
|
|
AddressA64 IrLoweringA64::tempAddrBuffer(IrOp bufferOp, IrOp indexOp, uint8_t tag)
|
2023-11-10 21:10:07 +00:00
|
|
|
{
|
2024-07-08 22:57:06 +01:00
|
|
|
CODEGEN_ASSERT(tag == LUA_TUSERDATA || tag == LUA_TBUFFER);
|
|
|
|
int dataOffset = tag == LUA_TBUFFER ? offsetof(Buffer, data) : offsetof(Udata, data);
|
2023-11-10 21:10:07 +00:00
|
|
|
|
2024-07-08 22:57:06 +01:00
|
|
|
if (indexOp.kind == IrOpKind::Inst)
|
|
|
|
{
|
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
|
|
|
build.add(temp, regOp(bufferOp), regOp(indexOp)); // implicit uxtw
|
|
|
|
return mem(temp, dataOffset);
|
|
|
|
}
|
|
|
|
else if (indexOp.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
// Since the resulting address may be used to load any size, including 1 byte, from an unaligned offset, we are limited by unscaled
|
|
|
|
// encoding
|
|
|
|
if (unsigned(intOp(indexOp)) + dataOffset <= 255)
|
|
|
|
return mem(regOp(bufferOp), int(intOp(indexOp) + dataOffset));
|
2023-11-10 21:10:07 +00:00
|
|
|
|
2024-07-08 22:57:06 +01:00
|
|
|
// indexOp can only be negative in dead code (since offsets are checked); this avoids assertion in emitAddOffset
|
|
|
|
if (intOp(indexOp) < 0)
|
|
|
|
return mem(regOp(bufferOp), dataOffset);
|
2024-06-07 18:51:12 +01:00
|
|
|
|
2024-07-08 22:57:06 +01:00
|
|
|
RegisterA64 temp = regs.allocTemp(KindA64::x);
|
|
|
|
emitAddOffset(build, temp, regOp(bufferOp), size_t(intOp(indexOp)));
|
|
|
|
return mem(temp, dataOffset);
|
2023-11-10 21:10:07 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-07-08 22:57:06 +01:00
|
|
|
CODEGEN_ASSERT(!"Unsupported instruction form");
|
|
|
|
return noreg;
|
2023-11-10 21:10:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
RegisterA64 IrLoweringA64::regOp(IrOp op)
|
2023-03-24 18:03:04 +00:00
|
|
|
{
|
|
|
|
IrInst& inst = function.instOp(op);
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
if (inst.spilled || inst.needsReload)
|
2023-04-21 23:14:26 +01:00
|
|
|
regs.restoreReg(build, inst);
|
|
|
|
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(inst.regA64 != noreg);
|
2023-03-24 18:03:04 +00:00
|
|
|
return inst.regA64;
|
|
|
|
}
|
|
|
|
|
|
|
|
IrConst IrLoweringA64::constOp(IrOp op) const
|
|
|
|
{
|
|
|
|
return function.constOp(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t IrLoweringA64::tagOp(IrOp op) const
|
|
|
|
{
|
|
|
|
return function.tagOp(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
int IrLoweringA64::intOp(IrOp op) const
|
|
|
|
{
|
|
|
|
return function.intOp(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned IrLoweringA64::uintOp(IrOp op) const
|
|
|
|
{
|
|
|
|
return function.uintOp(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
double IrLoweringA64::doubleOp(IrOp op) const
|
|
|
|
{
|
|
|
|
return function.doubleOp(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
IrBlock& IrLoweringA64::blockOp(IrOp op) const
|
|
|
|
{
|
|
|
|
return function.blockOp(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
Label& IrLoweringA64::labelOp(IrOp op) const
|
|
|
|
{
|
|
|
|
return blockOp(op).label;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace A64
|
|
|
|
} // namespace CodeGen
|
|
|
|
} // namespace Luau
|