// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details #include "IrLoweringA64.h" #include "Luau/DenseHash.h" #include "Luau/IrData.h" #include "Luau/IrUtils.h" #include "EmitCommonA64.h" #include "NativeState.h" #include "lstate.h" #include "lgc.h" LUAU_FASTFLAG(LuauVectorLibNativeDot); namespace Luau { namespace CodeGen { namespace A64 { inline ConditionA64 getConditionFP(IrCondition cond) { switch (cond) { case IrCondition::Equal: return ConditionA64::Equal; case IrCondition::NotEqual: return ConditionA64::NotEqual; case IrCondition::Less: return ConditionA64::Minus; case IrCondition::NotLess: return ConditionA64::Plus; case IrCondition::LessEqual: return ConditionA64::UnsignedLessEqual; case IrCondition::NotLessEqual: return ConditionA64::UnsignedGreater; case IrCondition::Greater: return ConditionA64::Greater; case IrCondition::NotGreater: return ConditionA64::LessEqual; case IrCondition::GreaterEqual: return ConditionA64::GreaterEqual; case IrCondition::NotGreaterEqual: return ConditionA64::Less; default: CODEGEN_ASSERT(!"Unexpected condition code"); return ConditionA64::Always; } } inline ConditionA64 getConditionInt(IrCondition cond) { switch (cond) { case IrCondition::Equal: return ConditionA64::Equal; case IrCondition::NotEqual: return ConditionA64::NotEqual; case IrCondition::Less: return ConditionA64::Minus; case IrCondition::NotLess: return ConditionA64::Plus; case IrCondition::LessEqual: return ConditionA64::LessEqual; case IrCondition::NotLessEqual: return ConditionA64::Greater; case IrCondition::Greater: return ConditionA64::Greater; case IrCondition::NotGreater: return ConditionA64::LessEqual; case IrCondition::GreaterEqual: return ConditionA64::GreaterEqual; case IrCondition::NotGreaterEqual: return ConditionA64::Less; case IrCondition::UnsignedLess: return ConditionA64::CarryClear; case IrCondition::UnsignedLessEqual: return ConditionA64::UnsignedLessEqual; case IrCondition::UnsignedGreater: return ConditionA64::UnsignedGreater; case IrCondition::UnsignedGreaterEqual: return ConditionA64::CarrySet; default: CODEGEN_ASSERT(!"Unexpected condition code"); return ConditionA64::Always; } } static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset) { CODEGEN_ASSERT(dst != src); CODEGEN_ASSERT(offset <= INT_MAX); if (offset <= AssemblyBuilderA64::kMaxImmediate) { build.add(dst, src, uint16_t(offset)); } else { build.mov(dst, int(offset)); build.add(dst, dst, src); } } static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp, IrOp ra, int ratag, Label& skip) { RegisterA64 tempw = castReg(KindA64::w, temp); AddressA64 addr = temp; // iscollectable(ra) if (ratag == -1 || !isGCO(ratag)) { if (ra.kind == IrOpKind::VmReg) addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, tt)); else if (ra.kind == IrOpKind::VmConst) emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, tt)); build.ldr(tempw, addr); build.cmp(tempw, LUA_TSTRING); build.b(ConditionA64::Less, skip); } // isblack(obj2gco(o)) build.ldrb(tempw, mem(object, offsetof(GCheader, marked))); build.tbz(tempw, BLACKBIT, skip); // iswhite(gcvalue(ra)) if (ra.kind == IrOpKind::VmReg) addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, value)); else if (ra.kind == IrOpKind::VmConst) emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, value)); build.ldr(temp, addr); build.ldrb(tempw, mem(temp, offsetof(GCheader, marked))); build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT)); build.b(ConditionA64::Equal, skip); // Equal = Zero after tst } static void emitAbort(AssemblyBuilderA64& build, Label& abort) { Label skip; build.b(skip); build.setLabel(abort); build.udf(); build.setLabel(skip); } static void emitFallback(AssemblyBuilderA64& build, int offset, int pcpos) { // fallback(L, instruction, base, k) build.mov(x0, rState); emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction)); build.mov(x2, rBase); build.mov(x3, rConstants); build.ldr(x4, mem(rNativeContext, offset)); build.blr(x4); emitUpdateBase(build); } static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg) { CODEGEN_ASSERT(kTempSlots >= 1); build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n))); build.add(x0, sp, sTemporary.data); // sp-relative offset build.ldr(x1, mem(rNativeContext, uint32_t(func))); build.blr(x1); } static bool emitBuiltin(AssemblyBuilderA64& build, IrFunction& function, IrRegAllocA64& regs, int bfid, int res, int arg, int nresults) { switch (bfid) { case LBF_MATH_FREXP: { CODEGEN_ASSERT(nresults == 1 || nresults == 2); emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg); build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n))); RegisterA64 temp = regs.allocTemp(KindA64::w); build.mov(temp, LUA_TNUMBER); build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt))); if (nresults == 2) { build.ldr(w0, sTemporary); build.scvtf(d1, w0); build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n))); build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt))); } return true; } case LBF_MATH_MODF: { CODEGEN_ASSERT(nresults == 1 || nresults == 2); emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg); build.ldr(d1, sTemporary); build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n))); RegisterA64 temp = regs.allocTemp(KindA64::w); build.mov(temp, LUA_TNUMBER); build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt))); if (nresults == 2) { build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n))); build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt))); } return true; } default: CODEGEN_ASSERT(!"Missing A64 lowering"); return false; } } static uint64_t getDoubleBits(double value) { uint64_t result; static_assert(sizeof(result) == sizeof(value), "Expecting double to be 64-bit"); memcpy(&result, &value, sizeof(value)); return result; } IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, IrFunction& function, LoweringStats* stats) : build(build) , helpers(helpers) , function(function) , stats(stats) , regs(function, stats, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}}) , valueTracker(function) , exitHandlerMap(~0u) { valueTracker.setRestoreCallack( this, [](void* context, IrInst& inst) { IrLoweringA64* self = static_cast(context); self->regs.restoreReg(self->build, inst); } ); } void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) { valueTracker.beforeInstLowering(inst); switch (inst.cmd) { case IrCmd::LOAD_TAG: { inst.regA64 = regs.allocReg(KindA64::w, index); AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt)); build.ldr(inst.regA64, addr); break; } case IrCmd::LOAD_POINTER: { inst.regA64 = regs.allocReg(KindA64::x, index); AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.gc)); build.ldr(inst.regA64, addr); break; } case IrCmd::LOAD_DOUBLE: { inst.regA64 = regs.allocReg(KindA64::d, index); AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.n)); build.ldr(inst.regA64, addr); break; } case IrCmd::LOAD_INT: { inst.regA64 = regs.allocReg(KindA64::w, index); AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); build.ldr(inst.regA64, addr); break; } case IrCmd::LOAD_FLOAT: { inst.regA64 = regs.allocReg(KindA64::d, index); RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register AddressA64 addr = tempAddr(inst.a, intOp(inst.b)); build.ldr(temp, addr); build.fcvt(inst.regA64, temp); break; } case IrCmd::LOAD_TVALUE: { inst.regA64 = regs.allocReg(KindA64::q, index); int addrOffset = inst.b.kind != IrOpKind::None ? intOp(inst.b) : 0; AddressA64 addr = tempAddr(inst.a, addrOffset); build.ldr(inst.regA64, addr); break; } case IrCmd::LOAD_ENV: inst.regA64 = regs.allocReg(KindA64::x, index); build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env))); break; case IrCmd::GET_ARR_ADDR: { inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a}); build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, array))); if (inst.b.kind == IrOpKind::Inst) { build.add(inst.regA64, inst.regA64, regOp(inst.b), kTValueSizeLog2); // implicit uxtw } else if (inst.b.kind == IrOpKind::Constant) { if (intOp(inst.b) == 0) { // no offset required } else if (intOp(inst.b) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate) { build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) * sizeof(TValue))); } else { RegisterA64 temp = regs.allocTemp(KindA64::x); build.mov(temp, intOp(inst.b) * sizeof(TValue)); build.add(inst.regA64, inst.regA64, temp); } } else CODEGEN_ASSERT(!"Unsupported instruction form"); break; } case IrCmd::GET_SLOT_NODE_ADDR: { inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a}); RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp1w = castReg(KindA64::w, temp1); RegisterA64 temp2 = regs.allocTemp(KindA64::w); RegisterA64 temp2x = castReg(KindA64::x, temp2); // note: since the stride of the load is the same as the destination register size, we can range check the array index, not the byte offset if (uintOp(inst.b) <= AddressA64::kMaxOffset) build.ldr(temp1w, mem(rCode, uintOp(inst.b) * sizeof(Instruction))); else { build.mov(temp1, uintOp(inst.b) * sizeof(Instruction)); build.ldr(temp1w, mem(rCode, temp1)); } // C field can be shifted as long as it's at the most significant byte of the instruction word CODEGEN_ASSERT(kOffsetOfInstructionC == 3); build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, nodemask8))); build.and_(temp2, temp2, temp1w, -24); // note: this may clobber inst.a, so it's important that we don't use it after this build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node))); build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero) break; } case IrCmd::GET_HASH_NODE_ADDR: { inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a}); RegisterA64 temp1 = regs.allocTemp(KindA64::w); RegisterA64 temp2 = regs.allocTemp(KindA64::w); RegisterA64 temp2x = castReg(KindA64::x, temp2); // hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode) build.mov(temp1, -1); build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, lsizenode))); build.lsl(temp1, temp1, temp2); build.mov(temp2, uintOp(inst.b)); build.bic(temp2, temp2, temp1); // note: this may clobber inst.a, so it's important that we don't use it after this build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node))); build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero) break; } case IrCmd::GET_CLOSURE_UPVAL_ADDR: { inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a}); RegisterA64 cl = inst.a.kind == IrOpKind::Undef ? rClosure : regOp(inst.a); build.add(inst.regA64, cl, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b))); break; } case IrCmd::STORE_TAG: { AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt)); if (tagOp(inst.b) == 0) { build.str(wzr, addr); } else { RegisterA64 temp = regs.allocTemp(KindA64::w); build.mov(temp, tagOp(inst.b)); build.str(temp, addr); } break; } case IrCmd::STORE_POINTER: { AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); if (inst.b.kind == IrOpKind::Constant) { CODEGEN_ASSERT(intOp(inst.b) == 0); build.str(xzr, addr); } else { build.str(regOp(inst.b), addr); } break; } case IrCmd::STORE_EXTRA: { AddressA64 addr = tempAddr(inst.a, offsetof(TValue, extra)); if (intOp(inst.b) == 0) { build.str(wzr, addr); } else { RegisterA64 temp = regs.allocTemp(KindA64::w); build.mov(temp, intOp(inst.b)); build.str(temp, addr); } break; } case IrCmd::STORE_DOUBLE: { AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); if (inst.b.kind == IrOpKind::Constant && getDoubleBits(doubleOp(inst.b)) == 0) { build.str(xzr, addr); } else { RegisterA64 temp = tempDouble(inst.b); build.str(temp, addr); } break; } case IrCmd::STORE_INT: { AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); if (inst.b.kind == IrOpKind::Constant && intOp(inst.b) == 0) { build.str(wzr, addr); } else { RegisterA64 temp = tempInt(inst.b); build.str(temp, addr); } break; } case IrCmd::STORE_VECTOR: { RegisterA64 temp1 = tempDouble(inst.b); RegisterA64 temp2 = tempDouble(inst.c); RegisterA64 temp3 = tempDouble(inst.d); RegisterA64 temp4 = regs.allocTemp(KindA64::s); AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value)); CODEGEN_ASSERT(addr.kind == AddressKindA64::imm && addr.data % 4 == 0 && unsigned(addr.data + 8) / 4 <= AddressA64::kMaxOffset); build.fcvt(temp4, temp1); build.str(temp4, AddressA64(addr.base, addr.data + 0)); build.fcvt(temp4, temp2); build.str(temp4, AddressA64(addr.base, addr.data + 4)); build.fcvt(temp4, temp3); build.str(temp4, AddressA64(addr.base, addr.data + 8)); break; } case IrCmd::STORE_TVALUE: { int addrOffset = inst.c.kind != IrOpKind::None ? intOp(inst.c) : 0; AddressA64 addr = tempAddr(inst.a, addrOffset); build.str(regOp(inst.b), addr); break; } case IrCmd::STORE_SPLIT_TVALUE: { int addrOffset = inst.d.kind != IrOpKind::None ? intOp(inst.d) : 0; RegisterA64 tempt = regs.allocTemp(KindA64::w); AddressA64 addrt = tempAddr(inst.a, offsetof(TValue, tt) + addrOffset); build.mov(tempt, tagOp(inst.b)); build.str(tempt, addrt); AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value) + addrOffset); if (tagOp(inst.b) == LUA_TBOOLEAN) { if (inst.c.kind == IrOpKind::Constant) { // note: we reuse tag temp register as value for true booleans, and use built-in zero register for false values CODEGEN_ASSERT(LUA_TBOOLEAN == 1); build.str(intOp(inst.c) ? tempt : wzr, addr); } else build.str(regOp(inst.c), addr); } else if (tagOp(inst.b) == LUA_TNUMBER) { RegisterA64 temp = tempDouble(inst.c); build.str(temp, addr); } else if (isGCO(tagOp(inst.b))) { build.str(regOp(inst.c), addr); } else { CODEGEN_ASSERT(!"Unsupported instruction form"); } break; } case IrCmd::ADD_INT: inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate) build.add(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b))); else if (inst.a.kind == IrOpKind::Constant && unsigned(intOp(inst.a)) <= AssemblyBuilderA64::kMaxImmediate) build.add(inst.regA64, regOp(inst.b), uint16_t(intOp(inst.a))); else { RegisterA64 temp1 = tempInt(inst.a); RegisterA64 temp2 = tempInt(inst.b); build.add(inst.regA64, temp1, temp2); } break; case IrCmd::SUB_INT: inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate) build.sub(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b))); else { RegisterA64 temp1 = tempInt(inst.a); RegisterA64 temp2 = tempInt(inst.b); build.sub(inst.regA64, temp1, temp2); } break; case IrCmd::ADD_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fadd(inst.regA64, temp1, temp2); break; } case IrCmd::SUB_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fsub(inst.regA64, temp1, temp2); break; } case IrCmd::MUL_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fmul(inst.regA64, temp1, temp2); break; } case IrCmd::DIV_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fdiv(inst.regA64, temp1, temp2); break; } case IrCmd::IDIV_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fdiv(inst.regA64, temp1, temp2); build.frintm(inst.regA64, inst.regA64); break; } case IrCmd::MOD_NUM: { inst.regA64 = regs.allocReg(KindA64::d, index); // can't allocReuse because both A and B are used twice RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fdiv(inst.regA64, temp1, temp2); build.frintm(inst.regA64, inst.regA64); build.fmul(inst.regA64, inst.regA64, temp2); build.fsub(inst.regA64, temp1, inst.regA64); break; } case IrCmd::MIN_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fcmp(temp1, temp2); build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Less)); break; } case IrCmd::MAX_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b}); RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fcmp(temp1, temp2); build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Greater)); break; } case IrCmd::UNM_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); RegisterA64 temp = tempDouble(inst.a); build.fneg(inst.regA64, temp); break; } case IrCmd::FLOOR_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); RegisterA64 temp = tempDouble(inst.a); build.frintm(inst.regA64, temp); break; } case IrCmd::CEIL_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); RegisterA64 temp = tempDouble(inst.a); build.frintp(inst.regA64, temp); break; } case IrCmd::ROUND_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); RegisterA64 temp = tempDouble(inst.a); build.frinta(inst.regA64, temp); break; } case IrCmd::SQRT_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); RegisterA64 temp = tempDouble(inst.a); build.fsqrt(inst.regA64, temp); break; } case IrCmd::ABS_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); RegisterA64 temp = tempDouble(inst.a); build.fabs(inst.regA64, temp); break; } case IrCmd::SIGN_NUM: { inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a}); RegisterA64 temp = tempDouble(inst.a); RegisterA64 temp0 = regs.allocTemp(KindA64::d); RegisterA64 temp1 = regs.allocTemp(KindA64::d); build.fcmpz(temp); build.fmov(temp0, 0.0); build.fmov(temp1, 1.0); build.fcsel(inst.regA64, temp1, temp0, getConditionFP(IrCondition::Greater)); build.fmov(temp1, -1.0); build.fcsel(inst.regA64, temp1, inst.regA64, getConditionFP(IrCondition::Less)); break; } case IrCmd::ADD_VEC: { inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b}); build.fadd(inst.regA64, regOp(inst.a), regOp(inst.b)); break; } case IrCmd::SUB_VEC: { inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b}); build.fsub(inst.regA64, regOp(inst.a), regOp(inst.b)); break; } case IrCmd::MUL_VEC: { inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b}); build.fmul(inst.regA64, regOp(inst.a), regOp(inst.b)); break; } case IrCmd::DIV_VEC: { inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b}); build.fdiv(inst.regA64, regOp(inst.a), regOp(inst.b)); break; } case IrCmd::UNM_VEC: { inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a}); build.fneg(inst.regA64, regOp(inst.a)); break; } case IrCmd::DOT_VEC: { LUAU_ASSERT(FFlag::LuauVectorLibNativeDot); inst.regA64 = regs.allocReg(KindA64::d, index); RegisterA64 temp = regs.allocTemp(KindA64::q); RegisterA64 temps = castReg(KindA64::s, temp); RegisterA64 regs = castReg(KindA64::s, inst.regA64); build.fmul(temp, regOp(inst.a), regOp(inst.b)); build.faddp(regs, temps); // x+y build.dup_4s(temp, temp, 2); build.fadd(regs, regs, temps); // +z build.fcvt(inst.regA64, regs); break; } case IrCmd::NOT_ANY: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Constant) { // other cases should've been constant folded CODEGEN_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN); build.eor(inst.regA64, regOp(inst.b), 1); } else { Label notbool, exit; // use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once CODEGEN_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1); build.cmp(regOp(inst.a), LUA_TBOOLEAN); build.b(ConditionA64::NotEqual, notbool); if (inst.b.kind == IrOpKind::Constant) build.mov(inst.regA64, intOp(inst.b) == 0 ? 1 : 0); else build.eor(inst.regA64, regOp(inst.b), 1); // boolean => invert value build.b(exit); // not boolean => result is true iff tag was nil build.setLabel(notbool); build.cset(inst.regA64, ConditionA64::Less); build.setLabel(exit); } break; } case IrCmd::CMP_ANY: { IrCondition cond = conditionOp(inst.c); regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); if (cond == IrCondition::LessEqual) build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal))); else if (cond == IrCondition::Less) build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan))); else if (cond == IrCondition::Equal) build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval))); else CODEGEN_ASSERT(!"Unsupported condition"); build.blr(x3); emitUpdateBase(build); inst.regA64 = regs.takeReg(w0, index); break; } case IrCmd::JUMP: if (inst.a.kind == IrOpKind::Undef || inst.a.kind == IrOpKind::VmExit) { Label fresh; build.b(getTargetLabel(inst.a, fresh)); finalizeTargetLabel(inst.a, fresh); } else { jumpOrFallthrough(blockOp(inst.a), next); } break; case IrCmd::JUMP_IF_TRUTHY: { RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt))); // nil => falsy CODEGEN_ASSERT(LUA_TNIL == 0); build.cbz(temp, labelOp(inst.c)); // not boolean => truthy build.cmp(temp, LUA_TBOOLEAN); build.b(ConditionA64::NotEqual, labelOp(inst.b)); // compare boolean value build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value))); build.cbnz(temp, labelOp(inst.b)); jumpOrFallthrough(blockOp(inst.c), next); break; } case IrCmd::JUMP_IF_FALSY: { RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt))); // nil => falsy CODEGEN_ASSERT(LUA_TNIL == 0); build.cbz(temp, labelOp(inst.b)); // not boolean => truthy build.cmp(temp, LUA_TBOOLEAN); build.b(ConditionA64::NotEqual, labelOp(inst.c)); // compare boolean value build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value))); build.cbz(temp, labelOp(inst.b)); jumpOrFallthrough(blockOp(inst.c), next); break; } case IrCmd::JUMP_EQ_TAG: { RegisterA64 zr = noreg; if (inst.a.kind == IrOpKind::Constant && tagOp(inst.a) == 0) zr = regOp(inst.b); else if (inst.b.kind == IrOpKind::Constant && tagOp(inst.b) == 0) zr = regOp(inst.a); else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant) build.cmp(regOp(inst.a), tagOp(inst.b)); else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Inst) build.cmp(regOp(inst.a), regOp(inst.b)); else if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Inst) build.cmp(regOp(inst.b), tagOp(inst.a)); else CODEGEN_ASSERT(!"Unsupported instruction form"); if (isFallthroughBlock(blockOp(inst.d), next)) { if (zr != noreg) build.cbz(zr, labelOp(inst.c)); else build.b(ConditionA64::Equal, labelOp(inst.c)); jumpOrFallthrough(blockOp(inst.d), next); } else { if (zr != noreg) build.cbnz(zr, labelOp(inst.d)); else build.b(ConditionA64::NotEqual, labelOp(inst.d)); jumpOrFallthrough(blockOp(inst.c), next); } break; } case IrCmd::JUMP_CMP_INT: { IrCondition cond = conditionOp(inst.c); if (cond == IrCondition::Equal && intOp(inst.b) == 0) { build.cbz(regOp(inst.a), labelOp(inst.d)); } else if (cond == IrCondition::NotEqual && intOp(inst.b) == 0) { build.cbnz(regOp(inst.a), labelOp(inst.d)); } else { CODEGEN_ASSERT(unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate); build.cmp(regOp(inst.a), uint16_t(intOp(inst.b))); build.b(getConditionInt(cond), labelOp(inst.d)); } jumpOrFallthrough(blockOp(inst.e), next); break; } case IrCmd::JUMP_EQ_POINTER: build.cmp(regOp(inst.a), regOp(inst.b)); build.b(ConditionA64::Equal, labelOp(inst.c)); jumpOrFallthrough(blockOp(inst.d), next); break; case IrCmd::JUMP_CMP_NUM: { IrCondition cond = conditionOp(inst.c); if (inst.b.kind == IrOpKind::Constant && doubleOp(inst.b) == 0.0) { RegisterA64 temp = tempDouble(inst.a); build.fcmpz(temp); } else { RegisterA64 temp1 = tempDouble(inst.a); RegisterA64 temp2 = tempDouble(inst.b); build.fcmp(temp1, temp2); } build.b(getConditionFP(cond), labelOp(inst.d)); jumpOrFallthrough(blockOp(inst.e), next); break; } case IrCmd::JUMP_FORN_LOOP_COND: { RegisterA64 index = tempDouble(inst.a); RegisterA64 limit = tempDouble(inst.b); RegisterA64 step = tempDouble(inst.c); Label direct; // step > 0 build.fcmpz(step); build.b(getConditionFP(IrCondition::Greater), direct); // !(limit <= index) build.fcmp(limit, index); build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e)); build.b(labelOp(inst.d)); // !(index <= limit) build.setLabel(direct); build.fcmp(index, limit); build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e)); jumpOrFallthrough(blockOp(inst.d), next); break; } // IrCmd::JUMP_SLOT_MATCH implemented below case IrCmd::TABLE_LEN: { RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads regs.spill(build, index, {reg}); build.mov(x0, reg); build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, luaH_getn))); build.blr(x1); inst.regA64 = regs.takeReg(w0, index); break; } case IrCmd::STRING_LEN: { inst.regA64 = regs.allocReg(KindA64::w, index); build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(TString, len))); break; } case IrCmd::TABLE_SETNUM: { // note: we need to call regOp before spill so that we don't do redundant reloads RegisterA64 table = regOp(inst.a); RegisterA64 key = regOp(inst.b); RegisterA64 temp = regs.allocTemp(KindA64::w); regs.spill(build, index, {table, key}); if (w1 != key) { build.mov(x1, table); build.mov(w2, key); } else { build.mov(temp, w1); build.mov(x1, table); build.mov(w2, temp); } build.mov(x0, rState); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_setnum))); build.blr(x3); inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::NEW_TABLE: { regs.spill(build, index); build.mov(x0, rState); build.mov(x1, uintOp(inst.a)); build.mov(x2, uintOp(inst.b)); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_new))); build.blr(x3); inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::DUP_TABLE: { RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads regs.spill(build, index, {reg}); build.mov(x1, reg); build.mov(x0, rState); build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaH_clone))); build.blr(x2); inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::TRY_NUM_TO_INDEX: { inst.regA64 = regs.allocReg(KindA64::w, index); RegisterA64 temp1 = tempDouble(inst.a); if (build.features & Feature_JSCVT) { build.fjcvtzs(inst.regA64, temp1); // fjcvtzs sets PSTATE.Z (equal) iff conversion is exact build.b(ConditionA64::NotEqual, labelOp(inst.b)); } else { RegisterA64 temp2 = regs.allocTemp(KindA64::d); build.fcvtzs(inst.regA64, temp1); build.scvtf(temp2, inst.regA64); build.fcmp(temp1, temp2); build.b(ConditionA64::NotEqual, labelOp(inst.b)); } break; } case IrCmd::TRY_CALL_FASTGETTM: { RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::w); build.ldr(temp1, mem(regOp(inst.a), offsetof(Table, metatable))); build.cbz(temp1, labelOp(inst.c)); // no metatable build.ldrb(temp2, mem(temp1, offsetof(Table, tmcache))); build.tst(temp2, 1 << intOp(inst.b)); // can't use tbz/tbnz because their jump offsets are too short build.b(ConditionA64::NotEqual, labelOp(inst.c)); // Equal = Zero after tst; tmcache caches *absence* of metamethods regs.spill(build, index, {temp1}); build.mov(x0, temp1); build.mov(w1, intOp(inst.b)); build.ldr(x2, mem(rGlobalState, offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*))); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm))); build.blr(x3); build.cbz(x0, labelOp(inst.c)); // no tag method inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::NEW_USERDATA: { regs.spill(build, index); build.mov(x0, rState); build.mov(x1, intOp(inst.a)); build.mov(x2, intOp(inst.b)); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, newUserdata))); build.blr(x3); inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::INT_TO_NUM: { inst.regA64 = regs.allocReg(KindA64::d, index); RegisterA64 temp = tempInt(inst.a); build.scvtf(inst.regA64, temp); break; } case IrCmd::UINT_TO_NUM: { inst.regA64 = regs.allocReg(KindA64::d, index); RegisterA64 temp = tempInt(inst.a); build.ucvtf(inst.regA64, temp); break; } case IrCmd::NUM_TO_INT: { inst.regA64 = regs.allocReg(KindA64::w, index); RegisterA64 temp = tempDouble(inst.a); build.fcvtzs(inst.regA64, temp); break; } case IrCmd::NUM_TO_UINT: { inst.regA64 = regs.allocReg(KindA64::w, index); RegisterA64 temp = tempDouble(inst.a); // note: we don't use fcvtzu for consistency with C++ code build.fcvtzs(castReg(KindA64::x, inst.regA64), temp); break; } case IrCmd::NUM_TO_VEC: { inst.regA64 = regs.allocReg(KindA64::q, index); if (inst.a.kind == IrOpKind::Constant) { float value = float(doubleOp(inst.a)); uint32_t asU32; static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit"); memcpy(&asU32, &value, sizeof(value)); if (AssemblyBuilderA64::isFmovSupported(value)) { build.fmov(inst.regA64, value); } else { RegisterA64 temp = regs.allocTemp(KindA64::x); uint32_t vec[4] = {asU32, asU32, asU32, 0}; build.adr(temp, vec, sizeof(vec)); build.ldr(inst.regA64, temp); } } else { RegisterA64 tempd = tempDouble(inst.a); RegisterA64 temps = regs.allocTemp(KindA64::s); build.fcvt(temps, tempd); build.dup_4s(inst.regA64, castReg(KindA64::q, temps), 0); } break; } case IrCmd::TAG_VECTOR: { inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a}); RegisterA64 reg = regOp(inst.a); RegisterA64 tempw = regs.allocTemp(KindA64::w); if (inst.regA64 != reg) build.mov(inst.regA64, reg); build.mov(tempw, LUA_TVECTOR); build.ins_4s(inst.regA64, tempw, 3); break; } case IrCmd::ADJUST_STACK_TO_REG: { RegisterA64 temp = regs.allocTemp(KindA64::x); if (inst.b.kind == IrOpKind::Constant) { build.add(temp, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue))); build.str(temp, mem(rState, offsetof(lua_State, top))); } else if (inst.b.kind == IrOpKind::Inst) { build.add(temp, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.add(temp, temp, regOp(inst.b), kTValueSizeLog2); // implicit uxtw build.str(temp, mem(rState, offsetof(lua_State, top))); } else CODEGEN_ASSERT(!"Unsupported instruction form"); break; } case IrCmd::ADJUST_STACK_TO_TOP: { RegisterA64 temp = regs.allocTemp(KindA64::x); build.ldr(temp, mem(rState, offsetof(lua_State, ci))); build.ldr(temp, mem(temp, offsetof(CallInfo, top))); build.str(temp, mem(rState, offsetof(lua_State, top))); break; } case IrCmd::FASTCALL: regs.spill(build, index); error |= !emitBuiltin(build, function, regs, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d)); break; case IrCmd::INVOKE_FASTCALL: { // We might need a temporary and we have to preserve it over the spill RegisterA64 temp = regs.allocTemp(KindA64::q); regs.spill(build, index, {temp}); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue))); build.mov(w3, intOp(inst.g)); // nresults // 'E' argument can only be produced by LOP_FASTCALL3 lowering if (inst.e.kind != IrOpKind::Undef) { CODEGEN_ASSERT(intOp(inst.f) == 3); build.ldr(x4, mem(rState, offsetof(lua_State, top))); build.ldr(temp, mem(rBase, vmRegOp(inst.d) * sizeof(TValue))); build.str(temp, mem(x4, 0)); build.ldr(temp, mem(rBase, vmRegOp(inst.e) * sizeof(TValue))); build.str(temp, mem(x4, sizeof(TValue))); } else { if (inst.d.kind == IrOpKind::VmReg) build.add(x4, rBase, uint16_t(vmRegOp(inst.d) * sizeof(TValue))); else if (inst.d.kind == IrOpKind::VmConst) emitAddOffset(build, x4, rConstants, vmConstOp(inst.d) * sizeof(TValue)); else CODEGEN_ASSERT(inst.d.kind == IrOpKind::Undef); } // nparams if (intOp(inst.f) == LUA_MULTRET) { // L->top - (ra + 1) build.ldr(x5, mem(rState, offsetof(lua_State, top))); build.sub(x5, x5, rBase); build.sub(x5, x5, uint16_t((vmRegOp(inst.b) + 1) * sizeof(TValue))); build.lsr(x5, x5, kTValueSizeLog2); } else build.mov(w5, intOp(inst.f)); build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(inst.a) * sizeof(luau_FastFunction))); build.blr(x6); inst.regA64 = regs.takeReg(w0, index); break; } case IrCmd::CHECK_FASTCALL_RES: build.cmp(regOp(inst.a), 0); build.b(ConditionA64::Less, labelOp(inst.b)); break; case IrCmd::DO_ARITH: regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); if (inst.b.kind == IrOpKind::VmConst) emitAddOffset(build, x2, rConstants, vmConstOp(inst.b) * sizeof(TValue)); else build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); if (inst.c.kind == IrOpKind::VmConst) emitAddOffset(build, x3, rConstants, vmConstOp(inst.c) * sizeof(TValue)); else build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue))); switch (TMS(intOp(inst.d))) { case TM_ADD: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithadd))); break; case TM_SUB: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithsub))); break; case TM_MUL: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmul))); break; case TM_DIV: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithdiv))); break; case TM_IDIV: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithidiv))); break; case TM_MOD: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmod))); break; case TM_POW: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithpow))); break; case TM_UNM: build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithunm))); break; default: CODEGEN_ASSERT(!"Invalid doarith helper operation tag"); break; } build.blr(x4); emitUpdateBase(build); break; case IrCmd::DO_LEN: regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_dolen))); build.blr(x3); emitUpdateBase(build); break; case IrCmd::GET_TABLE: regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); if (inst.c.kind == IrOpKind::VmReg) build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue))); else if (inst.c.kind == IrOpKind::Constant) { TValue n = {}; setnvalue(&n, uintOp(inst.c)); build.adr(x2, &n, sizeof(n)); } else CODEGEN_ASSERT(!"Unsupported instruction form"); build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_gettable))); build.blr(x4); emitUpdateBase(build); break; case IrCmd::SET_TABLE: regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); if (inst.c.kind == IrOpKind::VmReg) build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue))); else if (inst.c.kind == IrOpKind::Constant) { TValue n = {}; setnvalue(&n, uintOp(inst.c)); build.adr(x2, &n, sizeof(n)); } else CODEGEN_ASSERT(!"Unsupported instruction form"); build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_settable))); build.blr(x4); emitUpdateBase(build); break; case IrCmd::GET_IMPORT: regs.spill(build, index); // luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false) build.mov(x0, rState); build.ldr(x1, mem(rClosure, offsetof(Closure, env))); build.mov(x2, rConstants); build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.mov(w4, uintOp(inst.b)); build.mov(w5, 0); build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luaV_getimport))); build.blr(x6); emitUpdateBase(build); break; case IrCmd::CONCAT: regs.spill(build, index); build.mov(x0, rState); build.mov(w1, uintOp(inst.b)); build.mov(w2, vmRegOp(inst.a) + uintOp(inst.b) - 1); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_concat))); build.blr(x3); emitUpdateBase(build); break; case IrCmd::GET_UPVALUE: { RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::q); RegisterA64 temp3 = regs.allocTemp(KindA64::w); build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b))); // uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value Label skip; build.ldr(temp3, mem(temp1, offsetof(TValue, tt))); build.cmp(temp3, LUA_TUPVAL); build.b(ConditionA64::NotEqual, skip); // UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally) build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc))); build.ldr(temp1, mem(temp1, offsetof(UpVal, v))); build.setLabel(skip); build.ldr(temp2, temp1); build.str(temp2, mem(rBase, vmRegOp(inst.a) * sizeof(TValue))); break; } case IrCmd::SET_UPVALUE: { RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::x); RegisterA64 temp3 = regs.allocTemp(KindA64::q); // UpVal* build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc))); build.ldr(temp2, mem(temp1, offsetof(UpVal, v))); build.ldr(temp3, mem(rBase, vmRegOp(inst.b) * sizeof(TValue))); build.str(temp3, temp2); if (inst.c.kind == IrOpKind::Undef || isGCO(tagOp(inst.c))) { Label skip; checkObjectBarrierConditions(build, temp1, temp2, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip); size_t spills = regs.spill(build, index, {temp1}); build.mov(x1, temp1); build.mov(x0, rState); build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value))); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf))); build.blr(x3); regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack build.setLabel(skip); } break; } case IrCmd::CHECK_TAG: { Label fresh; // used when guard aborts execution or jumps to a VM exit Label& fail = getTargetLabel(inst.c, fresh); if (tagOp(inst.b) == 0) { build.cbnz(regOp(inst.a), fail); } else { build.cmp(regOp(inst.a), tagOp(inst.b)); build.b(ConditionA64::NotEqual, fail); } finalizeTargetLabel(inst.c, fresh); break; } case IrCmd::CHECK_TRUTHY: { // Constant tags which don't require boolean value check should've been removed in constant folding CODEGEN_ASSERT(inst.a.kind != IrOpKind::Constant || tagOp(inst.a) == LUA_TBOOLEAN); Label fresh; // used when guard aborts execution or jumps to a VM exit Label& target = getTargetLabel(inst.c, fresh); Label skip; if (inst.a.kind != IrOpKind::Constant) { // fail to fallback on 'nil' (falsy) CODEGEN_ASSERT(LUA_TNIL == 0); build.cbz(regOp(inst.a), target); // skip value test if it's not a boolean (truthy) build.cmp(regOp(inst.a), LUA_TBOOLEAN); build.b(ConditionA64::NotEqual, skip); } // fail to fallback on 'false' boolean value (falsy) if (inst.b.kind != IrOpKind::Constant) { build.cbz(regOp(inst.b), target); } else { if (intOp(inst.b) == 0) build.b(target); } if (inst.a.kind != IrOpKind::Constant) build.setLabel(skip); finalizeTargetLabel(inst.c, fresh); break; } case IrCmd::CHECK_READONLY: { Label fresh; // used when guard aborts execution or jumps to a VM exit RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldrb(temp, mem(regOp(inst.a), offsetof(Table, readonly))); build.cbnz(temp, getTargetLabel(inst.b, fresh)); finalizeTargetLabel(inst.b, fresh); break; } case IrCmd::CHECK_NO_METATABLE: { Label fresh; // used when guard aborts execution or jumps to a VM exit RegisterA64 temp = regs.allocTemp(KindA64::x); build.ldr(temp, mem(regOp(inst.a), offsetof(Table, metatable))); build.cbnz(temp, getTargetLabel(inst.b, fresh)); finalizeTargetLabel(inst.b, fresh); break; } case IrCmd::CHECK_SAFE_ENV: { Label fresh; // used when guard aborts execution or jumps to a VM exit RegisterA64 temp = regs.allocTemp(KindA64::x); RegisterA64 tempw = castReg(KindA64::w, temp); build.ldr(temp, mem(rClosure, offsetof(Closure, env))); build.ldrb(tempw, mem(temp, offsetof(Table, safeenv))); build.cbz(tempw, getTargetLabel(inst.a, fresh)); finalizeTargetLabel(inst.a, fresh); break; } case IrCmd::CHECK_ARRAY_SIZE: { Label fresh; // used when guard aborts execution or jumps to a VM exit Label& fail = getTargetLabel(inst.c, fresh); RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldr(temp, mem(regOp(inst.a), offsetof(Table, sizearray))); if (inst.b.kind == IrOpKind::Inst) { build.cmp(temp, regOp(inst.b)); build.b(ConditionA64::UnsignedLessEqual, fail); } else if (inst.b.kind == IrOpKind::Constant) { if (intOp(inst.b) == 0) { build.cbz(temp, fail); } else if (size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate) { build.cmp(temp, uint16_t(intOp(inst.b))); build.b(ConditionA64::UnsignedLessEqual, fail); } else { RegisterA64 temp2 = regs.allocTemp(KindA64::w); build.mov(temp2, intOp(inst.b)); build.cmp(temp, temp2); build.b(ConditionA64::UnsignedLessEqual, fail); } } else CODEGEN_ASSERT(!"Unsupported instruction form"); finalizeTargetLabel(inst.c, fresh); break; } case IrCmd::JUMP_SLOT_MATCH: case IrCmd::CHECK_SLOT_MATCH: { Label abort; // used when guard aborts execution const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? inst.d : inst.c; Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp); RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp1w = castReg(KindA64::w, temp1); RegisterA64 temp2 = regs.allocTemp(KindA64::x); CODEGEN_ASSERT(offsetof(LuaNode, key.value) == offsetof(LuaNode, key) && kOffsetOfTKeyTagNext >= 8 && kOffsetOfTKeyTagNext < 16); build.ldp(temp1, temp2, mem(regOp(inst.a), offsetof(LuaNode, key))); // load key.value into temp1 and key.tt (alongside other bits) into temp2 build.ubfx(temp2, temp2, (kOffsetOfTKeyTagNext - 8) * 8, kTKeyTagBits); // .tt is right before .next, and 8 bytes are skipped by ldp build.cmp(temp2, LUA_TSTRING); build.b(ConditionA64::NotEqual, mismatch); AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value)); build.ldr(temp2, addr); build.cmp(temp1, temp2); build.b(ConditionA64::NotEqual, mismatch); build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, val.tt))); CODEGEN_ASSERT(LUA_TNIL == 0); build.cbz(temp1w, mismatch); if (inst.cmd == IrCmd::JUMP_SLOT_MATCH) jumpOrFallthrough(blockOp(inst.c), next); else if (abort.id) emitAbort(build, abort); break; } case IrCmd::CHECK_NODE_NO_NEXT: { Label fresh; // used when guard aborts execution or jumps to a VM exit RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTagNext)); build.lsr(temp, temp, kTKeyTagBits); build.cbnz(temp, getTargetLabel(inst.b, fresh)); finalizeTargetLabel(inst.b, fresh); break; } case IrCmd::CHECK_NODE_VALUE: { Label fresh; // used when guard aborts execution or jumps to a VM exit RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, val.tt))); CODEGEN_ASSERT(LUA_TNIL == 0); build.cbz(temp, getTargetLabel(inst.b, fresh)); finalizeTargetLabel(inst.b, fresh); break; } case IrCmd::CHECK_BUFFER_LEN: { int accessSize = intOp(inst.c); CODEGEN_ASSERT(accessSize > 0 && accessSize <= int(AssemblyBuilderA64::kMaxImmediate)); Label fresh; // used when guard aborts execution or jumps to a VM exit Label& target = getTargetLabel(inst.d, fresh); RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldr(temp, mem(regOp(inst.a), offsetof(Buffer, len))); if (inst.b.kind == IrOpKind::Inst) { if (accessSize == 1) { // fails if offset >= len build.cmp(temp, regOp(inst.b)); build.b(ConditionA64::UnsignedLessEqual, target); } else { // fails if offset + size > len; we compute it as len - offset < size RegisterA64 tempx = castReg(KindA64::x, temp); build.sub(tempx, tempx, regOp(inst.b)); // implicit uxtw build.cmp(tempx, uint16_t(accessSize)); build.b(ConditionA64::Less, target); // note: this is a signed 64-bit comparison so that out of bounds offset fails } } else if (inst.b.kind == IrOpKind::Constant) { int offset = intOp(inst.b); // Constant folding can take care of it, but for safety we avoid overflow/underflow cases here if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX)) { build.b(target); } else if (offset + accessSize <= int(AssemblyBuilderA64::kMaxImmediate)) { build.cmp(temp, uint16_t(offset + accessSize)); build.b(ConditionA64::UnsignedLessEqual, target); } else { RegisterA64 temp2 = regs.allocTemp(KindA64::w); build.mov(temp2, offset + accessSize); build.cmp(temp, temp2); build.b(ConditionA64::UnsignedLessEqual, target); } } else { CODEGEN_ASSERT(!"Unsupported instruction form"); } finalizeTargetLabel(inst.d, fresh); break; } case IrCmd::CHECK_USERDATA_TAG: { Label fresh; // used when guard aborts execution or jumps to a VM exit Label& fail = getTargetLabel(inst.c, fresh); RegisterA64 temp = regs.allocTemp(KindA64::w); build.ldrb(temp, mem(regOp(inst.a), offsetof(Udata, tag))); build.cmp(temp, intOp(inst.b)); build.b(ConditionA64::NotEqual, fail); finalizeTargetLabel(inst.c, fresh); break; } case IrCmd::INTERRUPT: { regs.spill(build, index); Label self; build.ldr(x0, mem(rGlobalState, offsetof(global_State, cb.interrupt))); build.cbnz(x0, self); Label next = build.setLabel(); interruptHandlers.push_back({self, uintOp(inst.a), next}); break; } case IrCmd::CHECK_GC: { RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::x); CODEGEN_ASSERT(offsetof(global_State, totalbytes) == offsetof(global_State, GCthreshold) + 8); Label skip; build.ldp(temp1, temp2, mem(rGlobalState, offsetof(global_State, GCthreshold))); build.cmp(temp1, temp2); build.b(ConditionA64::UnsignedGreater, skip); size_t spills = regs.spill(build, index); build.mov(x0, rState); build.mov(w1, 1); build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaC_step))); build.blr(x2); emitUpdateBase(build); regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state build.setLabel(skip); break; } case IrCmd::BARRIER_OBJ: { RegisterA64 temp = regs.allocTemp(KindA64::x); Label skip; checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip); RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads size_t spills = regs.spill(build, index, {reg}); build.mov(x1, reg); build.mov(x0, rState); build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value))); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf))); build.blr(x3); regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack build.setLabel(skip); break; } case IrCmd::BARRIER_TABLE_BACK: { Label skip; RegisterA64 temp = regs.allocTemp(KindA64::w); // isblack(obj2gco(t)) build.ldrb(temp, mem(regOp(inst.a), offsetof(GCheader, marked))); build.tbz(temp, BLACKBIT, skip); RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads size_t spills = regs.spill(build, index, {reg}); build.mov(x1, reg); build.mov(x0, rState); build.add(x2, x1, uint16_t(offsetof(Table, gclist))); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierback))); build.blr(x3); regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack build.setLabel(skip); break; } case IrCmd::BARRIER_TABLE_FORWARD: { RegisterA64 temp = regs.allocTemp(KindA64::x); Label skip; checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip); RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value)); size_t spills = regs.spill(build, index, {reg}); build.mov(x1, reg); build.mov(x0, rState); build.ldr(x2, addr); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barriertable))); build.blr(x3); regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state // note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack build.setLabel(skip); break; } case IrCmd::SET_SAVEDPC: { RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::x); emitAddOffset(build, temp1, rCode, uintOp(inst.a) * sizeof(Instruction)); build.ldr(temp2, mem(rState, offsetof(lua_State, ci))); build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc))); break; } case IrCmd::CLOSE_UPVALS: { Label skip; RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::x); // L->openupval != 0 build.ldr(temp1, mem(rState, offsetof(lua_State, openupval))); build.cbz(temp1, skip); // ra <= L->openuval->v build.ldr(temp1, mem(temp1, offsetof(UpVal, v))); build.add(temp2, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.cmp(temp2, temp1); build.b(ConditionA64::UnsignedGreater, skip); size_t spills = regs.spill(build, index, {temp2}); build.mov(x1, temp2); build.mov(x0, rState); build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_close))); build.blr(x2); regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state build.setLabel(skip); break; } case IrCmd::CAPTURE: // no-op break; case IrCmd::SETLIST: regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeSETLIST), uintOp(inst.a)); break; case IrCmd::CALL: regs.spill(build, index); // argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams; if (intOp(inst.b) == LUA_MULTRET) build.ldr(x2, mem(rState, offsetof(lua_State, top))); else build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + 1 + intOp(inst.b)) * sizeof(TValue))); // callFallback(L, ra, argtop, nresults) build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.mov(w3, intOp(inst.c)); build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback))); build.blr(x4); emitUpdateBase(build); // reentry with x0=closure (NULL implies C function; CALL_FALLBACK_YIELD will trigger exit) build.cbnz(x0, helpers.continueCall); break; case IrCmd::RETURN: regs.spill(build, index); if (function.variadic) { build.ldr(x1, mem(rState, offsetof(lua_State, ci))); build.ldr(x1, mem(x1, offsetof(CallInfo, func))); } else if (intOp(inst.b) != 1) build.sub(x1, rBase, sizeof(TValue)); // invariant: ci->func + 1 == ci->base for non-variadic frames if (intOp(inst.b) == 0) { build.mov(w2, 0); build.b(helpers.return_); } else if (intOp(inst.b) == 1 && !function.variadic) { // fast path: minimizes x1 adjustments // note that we skipped x1 computation for this specific case above build.ldr(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue))); build.str(q0, mem(rBase, -int(sizeof(TValue)))); build.mov(x1, rBase); build.mov(w2, 1); build.b(helpers.return_); } else if (intOp(inst.b) >= 1 && intOp(inst.b) <= 3) { for (int r = 0; r < intOp(inst.b); ++r) { build.ldr(q0, mem(rBase, (vmRegOp(inst.a) + r) * sizeof(TValue))); build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post)); } build.mov(w2, intOp(inst.b)); build.b(helpers.return_); } else { build.mov(w2, 0); // vali = ra build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); // valend = (n == LUA_MULTRET) ? L->top : ra + n if (intOp(inst.b) == LUA_MULTRET) build.ldr(x4, mem(rState, offsetof(lua_State, top))); else build.add(x4, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue))); Label repeatValueLoop, exitValueLoop; if (intOp(inst.b) == LUA_MULTRET) { build.cmp(x3, x4); build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual } build.setLabel(repeatValueLoop); build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post)); build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post)); build.add(w2, w2, 1); build.cmp(x3, x4); build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess build.setLabel(exitValueLoop); build.b(helpers.return_); } break; case IrCmd::FORGLOOP: // register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables regs.spill(build, index); // clear extra variables since we might have more than two if (intOp(inst.b) > 2) { CODEGEN_ASSERT(LUA_TNIL == 0); for (int i = 2; i < intOp(inst.b); ++i) build.str(wzr, mem(rBase, (vmRegOp(inst.a) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt))); } // we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here build.mov(x0, rState); build.ldr(x1, mem(rBase, (vmRegOp(inst.a) + 1) * sizeof(TValue) + offsetof(TValue, value.gc))); build.ldr(w2, mem(rBase, (vmRegOp(inst.a) + 2) * sizeof(TValue) + offsetof(TValue, value.p))); build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter))); build.blr(x4); // note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack build.cbnz(w0, labelOp(inst.c)); jumpOrFallthrough(blockOp(inst.d), next); break; case IrCmd::FORGLOOP_FALLBACK: regs.spill(build, index); build.mov(x0, rState); build.mov(w1, vmRegOp(inst.a)); build.mov(w2, intOp(inst.b)); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback))); build.blr(x3); emitUpdateBase(build); build.cbnz(w0, labelOp(inst.c)); jumpOrFallthrough(blockOp(inst.d), next); break; case IrCmd::FORGPREP_XNEXT_FALLBACK: regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue))); build.mov(w2, uintOp(inst.a) + 1); build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback))); build.blr(x3); // note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack jumpOrFallthrough(blockOp(inst.c), next); break; case IrCmd::COVERAGE: { RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::w); RegisterA64 temp3 = regs.allocTemp(KindA64::w); build.mov(temp1, uintOp(inst.a) * sizeof(Instruction)); build.ldr(temp2, mem(rCode, temp1)); // increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1 // note: cmp can be eliminated with adds but we aren't concerned with code size for coverage build.add(temp3, temp2, 256); build.cmp(temp3, 0); build.csel(temp2, temp2, temp3, ConditionA64::Less); build.str(temp2, mem(rCode, temp1)); break; } // Full instruction fallbacks case IrCmd::FALLBACK_GETGLOBAL: CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst); regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeGETGLOBAL), uintOp(inst.a)); break; case IrCmd::FALLBACK_SETGLOBAL: CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst); regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeSETGLOBAL), uintOp(inst.a)); break; case IrCmd::FALLBACK_GETTABLEKS: CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst); regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeGETTABLEKS), uintOp(inst.a)); break; case IrCmd::FALLBACK_SETTABLEKS: CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst); regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeSETTABLEKS), uintOp(inst.a)); break; case IrCmd::FALLBACK_NAMECALL: CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst); regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeNAMECALL), uintOp(inst.a)); break; case IrCmd::FALLBACK_PREPVARARGS: CODEGEN_ASSERT(inst.b.kind == IrOpKind::Constant); regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executePREPVARARGS), uintOp(inst.a)); break; case IrCmd::FALLBACK_GETVARARGS: CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.c.kind == IrOpKind::Constant); regs.spill(build, index); build.mov(x0, rState); if (intOp(inst.c) == LUA_MULTRET) { emitAddOffset(build, x1, rCode, uintOp(inst.a) * sizeof(Instruction)); build.mov(x2, rBase); build.mov(w3, vmRegOp(inst.b)); build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSMultRet))); build.blr(x4); emitUpdateBase(build); } else { build.mov(x1, rBase); build.mov(w2, vmRegOp(inst.b)); build.mov(w3, intOp(inst.c)); build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSConst))); build.blr(x4); // note: no emitUpdateBase necessary because executeGETVARARGSConst does not reallocate stack } break; case IrCmd::NEWCLOSURE: { RegisterA64 reg = regOp(inst.b); // note: we need to call regOp before spill so that we don't do redundant reloads regs.spill(build, index, {reg}); build.mov(x2, reg); build.mov(x0, rState); build.mov(w1, uintOp(inst.a)); build.ldr(x3, mem(rClosure, offsetof(Closure, l.p))); build.ldr(x3, mem(x3, offsetof(Proto, p))); build.ldr(x3, mem(x3, sizeof(Proto*) * uintOp(inst.c))); build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaF_newLclosure))); build.blr(x4); inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::FALLBACK_DUPCLOSURE: CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg); CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst); regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(inst.a)); break; case IrCmd::FALLBACK_FORGPREP: regs.spill(build, index); emitFallback(build, offsetof(NativeContext, executeFORGPREP), uintOp(inst.a)); jumpOrFallthrough(blockOp(inst.c), next); break; // Pseudo instructions case IrCmd::NOP: case IrCmd::SUBSTITUTE: CODEGEN_ASSERT(!"Pseudo instructions should not be lowered"); break; case IrCmd::BITAND_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b)))) build.and_(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b))); else { RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.and_(inst.regA64, temp1, temp2); } break; } case IrCmd::BITXOR_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b)))) build.eor(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b))); else { RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.eor(inst.regA64, temp1, temp2); } break; } case IrCmd::BITOR_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b)))) build.orr(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b))); else { RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.orr(inst.regA64, temp1, temp2); } break; } case IrCmd::BITNOT_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a}); RegisterA64 temp = tempUint(inst.a); build.mvn_(inst.regA64, temp); break; } case IrCmd::BITLSHIFT_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant) build.lsl(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31)); else { RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.lsl(inst.regA64, temp1, temp2); } break; } case IrCmd::BITRSHIFT_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant) build.lsr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31)); else { RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.lsr(inst.regA64, temp1, temp2); } break; } case IrCmd::BITARSHIFT_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant) build.asr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31)); else { RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.asr(inst.regA64, temp1, temp2); } break; } case IrCmd::BITLROTATE_UINT: { if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant) { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a}); build.ror(inst.regA64, regOp(inst.a), uint8_t((32 - unsigned(intOp(inst.b))) & 31)); } else { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); // can't reuse a because it would be clobbered by neg RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.neg(inst.regA64, temp2); build.ror(inst.regA64, temp1, inst.regA64); } break; } case IrCmd::BITRROTATE_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant) build.ror(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31)); else { RegisterA64 temp1 = tempUint(inst.a); RegisterA64 temp2 = tempUint(inst.b); build.ror(inst.regA64, temp1, temp2); } break; } case IrCmd::BITCOUNTLZ_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a}); RegisterA64 temp = tempUint(inst.a); build.clz(inst.regA64, temp); break; } case IrCmd::BITCOUNTRZ_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a}); RegisterA64 temp = tempUint(inst.a); build.rbit(inst.regA64, temp); build.clz(inst.regA64, inst.regA64); break; } case IrCmd::BYTESWAP_UINT: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a}); RegisterA64 temp = tempUint(inst.a); build.rev(inst.regA64, temp); break; } case IrCmd::INVOKE_LIBM: { if (inst.c.kind != IrOpKind::None) { bool isInt = (inst.c.kind == IrOpKind::Constant) ? constOp(inst.c).kind == IrConstKind::Int : getCmdValueKind(function.instOp(inst.c).cmd) == IrValueKind::Int; RegisterA64 temp1 = tempDouble(inst.b); RegisterA64 temp2 = isInt ? tempInt(inst.c) : tempDouble(inst.c); RegisterA64 temp3 = isInt ? noreg : regs.allocTemp(KindA64::d); // note: spill() frees all registers so we need to avoid alloc after spill regs.spill(build, index, {temp1, temp2}); if (isInt) { build.fmov(d0, temp1); build.mov(w0, temp2); } else if (d0 != temp2) { build.fmov(d0, temp1); build.fmov(d1, temp2); } else { build.fmov(temp3, d0); build.fmov(d0, temp1); build.fmov(d1, temp3); } } else { RegisterA64 temp1 = tempDouble(inst.b); regs.spill(build, index, {temp1}); build.fmov(d0, temp1); } build.ldr(x1, mem(rNativeContext, getNativeContextOffset(uintOp(inst.a)))); build.blr(x1); inst.regA64 = regs.takeReg(d0, index); break; } case IrCmd::GET_TYPE: { inst.regA64 = regs.allocReg(KindA64::x, index); CODEGEN_ASSERT(sizeof(TString*) == 8); if (inst.a.kind == IrOpKind::Inst) build.add(inst.regA64, rGlobalState, regOp(inst.a), 3); // implicit uxtw else if (inst.a.kind == IrOpKind::Constant) build.add(inst.regA64, rGlobalState, uint16_t(tagOp(inst.a)) * 8); else CODEGEN_ASSERT(!"Unsupported instruction form"); build.ldr(inst.regA64, mem(inst.regA64, offsetof(global_State, ttname))); break; } case IrCmd::GET_TYPEOF: { regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr))); build.blr(x2); inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::FINDUPVAL: { regs.spill(build, index); build.mov(x0, rState); build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue))); build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_findupval))); build.blr(x2); inst.regA64 = regs.takeReg(x0, index); break; } case IrCmd::BUFFER_READI8: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c)); build.ldrsb(inst.regA64, addr); break; } case IrCmd::BUFFER_READU8: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c)); build.ldrb(inst.regA64, addr); break; } case IrCmd::BUFFER_WRITEI8: { RegisterA64 temp = tempInt(inst.c); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d)); build.strb(temp, addr); break; } case IrCmd::BUFFER_READI16: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c)); build.ldrsh(inst.regA64, addr); break; } case IrCmd::BUFFER_READU16: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c)); build.ldrh(inst.regA64, addr); break; } case IrCmd::BUFFER_WRITEI16: { RegisterA64 temp = tempInt(inst.c); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d)); build.strh(temp, addr); break; } case IrCmd::BUFFER_READI32: { inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c)); build.ldr(inst.regA64, addr); break; } case IrCmd::BUFFER_WRITEI32: { RegisterA64 temp = tempInt(inst.c); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d)); build.str(temp, addr); break; } case IrCmd::BUFFER_READF32: { inst.regA64 = regs.allocReg(KindA64::d, index); RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c)); build.ldr(temp, addr); build.fcvt(inst.regA64, temp); break; } case IrCmd::BUFFER_WRITEF32: { RegisterA64 temp1 = tempDouble(inst.c); RegisterA64 temp2 = regs.allocTemp(KindA64::s); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d)); build.fcvt(temp2, temp1); build.str(temp2, addr); break; } case IrCmd::BUFFER_READF64: { inst.regA64 = regs.allocReg(KindA64::d, index); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c)); build.ldr(inst.regA64, addr); break; } case IrCmd::BUFFER_WRITEF64: { RegisterA64 temp = tempDouble(inst.c); AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d)); build.str(temp, addr); break; } // To handle unsupported instructions, add "case IrCmd::OP" and make sure to set error = true! } valueTracker.afterInstLowering(inst, index); regs.freeLastUseRegs(inst, index); regs.freeTempRegs(); } void IrLoweringA64::finishBlock(const IrBlock& curr, const IrBlock& next) { if (!regs.spills.empty()) { // If we have spills remaining, we have to immediately lower the successor block for (uint32_t predIdx : predecessors(function.cfg, function.getBlockIndex(next))) CODEGEN_ASSERT(predIdx == function.getBlockIndex(curr)); // And the next block cannot be a join block in cfg CODEGEN_ASSERT(next.useCount == 1); } } void IrLoweringA64::finishFunction() { if (build.logText) build.logAppend("; interrupt handlers\n"); for (InterruptHandler& handler : interruptHandlers) { build.setLabel(handler.self); build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction)); build.adr(x1, handler.next); build.b(helpers.interrupt); } if (build.logText) build.logAppend("; exit handlers\n"); for (ExitHandler& handler : exitHandlers) { CODEGEN_ASSERT(handler.pcpos != kVmExitEntryGuardPc); build.setLabel(handler.self); build.mov(x0, handler.pcpos * sizeof(Instruction)); build.b(helpers.updatePcAndContinueInVm); } if (stats) { if (error) stats->loweringErrors++; if (regs.error) stats->regAllocErrors++; } } bool IrLoweringA64::hasError() const { return error || regs.error; } bool IrLoweringA64::isFallthroughBlock(const IrBlock& target, const IrBlock& next) { return target.start == next.start; } void IrLoweringA64::jumpOrFallthrough(IrBlock& target, const IrBlock& next) { if (!isFallthroughBlock(target, next)) build.b(target.label); } Label& IrLoweringA64::getTargetLabel(IrOp op, Label& fresh) { if (op.kind == IrOpKind::Undef) return fresh; if (op.kind == IrOpKind::VmExit) { // Special exit case that doesn't have to update pcpos if (vmExitOp(op) == kVmExitEntryGuardPc) return helpers.exitContinueVmClearNativeFlag; if (uint32_t* index = exitHandlerMap.find(vmExitOp(op))) return exitHandlers[*index].self; return fresh; } return labelOp(op); } void IrLoweringA64::finalizeTargetLabel(IrOp op, Label& fresh) { if (op.kind == IrOpKind::Undef) { emitAbort(build, fresh); } else if (op.kind == IrOpKind::VmExit && fresh.id != 0 && fresh.id != helpers.exitContinueVmClearNativeFlag.id) { exitHandlerMap[vmExitOp(op)] = uint32_t(exitHandlers.size()); exitHandlers.push_back({fresh, vmExitOp(op)}); } } RegisterA64 IrLoweringA64::tempDouble(IrOp op) { if (op.kind == IrOpKind::Inst) return regOp(op); else if (op.kind == IrOpKind::Constant) { double val = doubleOp(op); if (AssemblyBuilderA64::isFmovSupported(val)) { RegisterA64 temp = regs.allocTemp(KindA64::d); build.fmov(temp, val); return temp; } else { RegisterA64 temp1 = regs.allocTemp(KindA64::x); RegisterA64 temp2 = regs.allocTemp(KindA64::d); uint64_t vali = getDoubleBits(val); if ((vali << 16) == 0) { build.movz(temp1, uint16_t(vali >> 48), 48); build.fmov(temp2, temp1); } else if ((vali << 32) == 0) { build.movz(temp1, uint16_t(vali >> 48), 48); build.movk(temp1, uint16_t(vali >> 32), 32); build.fmov(temp2, temp1); } else { build.adr(temp1, val); build.ldr(temp2, temp1); } return temp2; } } else { CODEGEN_ASSERT(!"Unsupported instruction form"); return noreg; } } RegisterA64 IrLoweringA64::tempInt(IrOp op) { if (op.kind == IrOpKind::Inst) return regOp(op); else if (op.kind == IrOpKind::Constant) { RegisterA64 temp = regs.allocTemp(KindA64::w); build.mov(temp, intOp(op)); return temp; } else { CODEGEN_ASSERT(!"Unsupported instruction form"); return noreg; } } RegisterA64 IrLoweringA64::tempUint(IrOp op) { if (op.kind == IrOpKind::Inst) return regOp(op); else if (op.kind == IrOpKind::Constant) { RegisterA64 temp = regs.allocTemp(KindA64::w); build.mov(temp, unsigned(intOp(op))); return temp; } else { CODEGEN_ASSERT(!"Unsupported instruction form"); return noreg; } } AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset) { // This is needed to tighten the bounds checks in the VmConst case below CODEGEN_ASSERT(offset % 4 == 0); // Full encoded range is wider depending on the load size, but this assertion helps establish a smaller guaranteed working range [0..4096) CODEGEN_ASSERT(offset >= 0 && unsigned(offset / 4) <= AssemblyBuilderA64::kMaxImmediate); if (op.kind == IrOpKind::VmReg) return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset); else if (op.kind == IrOpKind::VmConst) { size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset; // Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries if (constantOffset / 4 <= AddressA64::kMaxOffset) return mem(rConstants, int(constantOffset)); RegisterA64 temp = regs.allocTemp(KindA64::x); emitAddOffset(build, temp, rConstants, constantOffset); return temp; } // If we have a register, we assume it's a pointer to TValue // We might introduce explicit operand types in the future to make this more robust else if (op.kind == IrOpKind::Inst) return mem(regOp(op), offset); else { CODEGEN_ASSERT(!"Unsupported instruction form"); return noreg; } } AddressA64 IrLoweringA64::tempAddrBuffer(IrOp bufferOp, IrOp indexOp, uint8_t tag) { CODEGEN_ASSERT(tag == LUA_TUSERDATA || tag == LUA_TBUFFER); int dataOffset = tag == LUA_TBUFFER ? offsetof(Buffer, data) : offsetof(Udata, data); if (indexOp.kind == IrOpKind::Inst) { RegisterA64 temp = regs.allocTemp(KindA64::x); build.add(temp, regOp(bufferOp), regOp(indexOp)); // implicit uxtw return mem(temp, dataOffset); } else if (indexOp.kind == IrOpKind::Constant) { // Since the resulting address may be used to load any size, including 1 byte, from an unaligned offset, we are limited by unscaled // encoding if (unsigned(intOp(indexOp)) + dataOffset <= 255) return mem(regOp(bufferOp), int(intOp(indexOp) + dataOffset)); // indexOp can only be negative in dead code (since offsets are checked); this avoids assertion in emitAddOffset if (intOp(indexOp) < 0) return mem(regOp(bufferOp), dataOffset); RegisterA64 temp = regs.allocTemp(KindA64::x); emitAddOffset(build, temp, regOp(bufferOp), size_t(intOp(indexOp))); return mem(temp, dataOffset); } else { CODEGEN_ASSERT(!"Unsupported instruction form"); return noreg; } } RegisterA64 IrLoweringA64::regOp(IrOp op) { IrInst& inst = function.instOp(op); if (inst.spilled || inst.needsReload) regs.restoreReg(build, inst); CODEGEN_ASSERT(inst.regA64 != noreg); return inst.regA64; } IrConst IrLoweringA64::constOp(IrOp op) const { return function.constOp(op); } uint8_t IrLoweringA64::tagOp(IrOp op) const { return function.tagOp(op); } int IrLoweringA64::intOp(IrOp op) const { return function.intOp(op); } unsigned IrLoweringA64::uintOp(IrOp op) const { return function.uintOp(op); } double IrLoweringA64::doubleOp(IrOp op) const { return function.doubleOp(op); } IrBlock& IrLoweringA64::blockOp(IrOp op) const { return function.blockOp(op); } Label& IrLoweringA64::labelOp(IrOp op) const { return blockOp(op).label; } } // namespace A64 } // namespace CodeGen } // namespace Luau