luau/CodeGen/src/EmitCommonX64.cpp

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "EmitCommonX64.h"

#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrCallWrapperX64.h"
#include "Luau/IrData.h"
#include "Luau/IrRegAllocX64.h"
#include "Luau/IrUtils.h"

#include "NativeState.h"

#include "lgc.h"
#include "lstate.h"

#include <utility>

namespace Luau
{
namespace CodeGen
{
namespace X64
{

void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label)
{
    // Refresher on comi/ucomi EFLAGS:
    // all zero: greater
    // CF only: less
    // ZF only: equal
    // PF+CF+ZF: unordered (NaN)

    // To avoid the lack of conditional jumps that check for "greater" conditions in IEEE 754 compliant way, we use "less" forms to emulate these
    if (cond == IrCondition::Greater || cond == IrCondition::GreaterEqual || cond == IrCondition::NotGreater || cond == IrCondition::NotGreaterEqual)
        std::swap(lhs, rhs);

    if (rhs.cat == CategoryX64::reg)
    {
        build.vucomisd(rhs, lhs);
    }
    else
    {
        build.vmovsd(tmp, rhs);
        build.vucomisd(tmp, lhs);
    }

    // Keep in mind that 'Not' conditions want 'true' for comparisons with NaN
    // And because of NaN, integer check interchangeability like 'not less or equal' <-> 'greater' does not hold
    switch (cond)
    {
    case IrCondition::NotLessEqual:
    case IrCondition::NotGreaterEqual:
        // (b < a) is the same as !(a <= b). jnae checks CF=1 which means < or NaN
        build.jcc(ConditionX64::NotAboveEqual, label);
        break;
    case IrCondition::LessEqual:
    case IrCondition::GreaterEqual:
        // (b >= a) is the same as (a <= b). jae checks CF=0 which means >= and not NaN
        build.jcc(ConditionX64::AboveEqual, label);
        break;
    case IrCondition::NotLess:
    case IrCondition::NotGreater:
        // (b <= a) is the same as !(a < b). jna checks CF=1 or ZF=1 which means <= or NaN
        build.jcc(ConditionX64::NotAbove, label);
        break;
    case IrCondition::Less:
    case IrCondition::Greater:
        // (b > a) is the same as (a < b). ja checks CF=0 and ZF=0 which means > and not NaN
        build.jcc(ConditionX64::Above, label);
        break;
    case IrCondition::NotEqual:
        // ZF=0 or PF=1 means != or NaN
        build.jcc(ConditionX64::NotZero, label);
        build.jcc(ConditionX64::Parity, label);
        break;
    default:
        LUAU_ASSERT(!"Unsupported condition");
    }
}

ConditionX64 getConditionInt(IrCondition cond)
{
    switch (cond)
    {
    case IrCondition::Equal:
        return ConditionX64::Equal;
    case IrCondition::NotEqual:
        return ConditionX64::NotEqual;
    case IrCondition::Less:
        return ConditionX64::Less;
    case IrCondition::NotLess:
        return ConditionX64::NotLess;
    case IrCondition::LessEqual:
        return ConditionX64::LessEqual;
    case IrCondition::NotLessEqual:
        return ConditionX64::NotLessEqual;
    case IrCondition::Greater:
        return ConditionX64::Greater;
    case IrCondition::NotGreater:
        return ConditionX64::NotGreater;
    case IrCondition::GreaterEqual:
        return ConditionX64::GreaterEqual;
    case IrCondition::NotGreaterEqual:
        return ConditionX64::NotGreaterEqual;
    case IrCondition::UnsignedLess:
        return ConditionX64::Below;
    case IrCondition::UnsignedLessEqual:
        return ConditionX64::BelowEqual;
    case IrCondition::UnsignedGreater:
        return ConditionX64::Above;
    case IrCondition::UnsignedGreaterEqual:
        return ConditionX64::AboveEqual;
    default:
        LUAU_ASSERT(!"Unsupported condition");
        return ConditionX64::Zero;
    }
}

void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos)
{
    LUAU_ASSERT(tmp != node);
    LUAU_ASSERT(table != node);

    build.mov(node, qword[table + offsetof(Table, node)]);

    // compute cached slot
    build.mov(tmp, sCode);
    build.movzx(dwordReg(tmp), byte[tmp + pcpos * sizeof(Instruction) + kOffsetOfInstructionC]);
    build.and_(byteReg(tmp), byte[table + offsetof(Table, nodemask8)]);

    // LuaNode* n = &h->node[slot];
    build.shl(dwordReg(tmp), kLuaNodeSizeLog2);
    build.add(node, tmp);
}

void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label)
{
    LUAU_ASSERT(numi.size == SizeX64::dword);

    // Convert to integer, NaN is converted into 0x80000000
    build.vcvttsd2si(numi, numd);

    // Convert that integer back to double
    build.vcvtsi2sd(tmp, numd, numi);

    build.vucomisd(tmp, numd); // Sets ZF=1 if equal or NaN
    // We don't need non-integer values
    // But to skip the PF=1 check, we proceed with NaN because 0x80000000 index is out of bounds
    build.jcc(ConditionX64::NotZero, label);
}

void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, OperandX64 b, OperandX64 c, TMS tm)
{
    IrCallWrapperX64 callWrap(regs, build);
    callWrap.addArgument(SizeX64::qword, rState);
    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
    callWrap.addArgument(SizeX64::qword, b);
    callWrap.addArgument(SizeX64::qword, c);
    callWrap.addArgument(SizeX64::dword, tm);
    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarith)]);

    emitUpdateBase(build);
}

void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb)
{
    IrCallWrapperX64 callWrap(regs, build);
    callWrap.addArgument(SizeX64::qword, rState);
    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]);

    emitUpdateBase(build);
}

void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
{
    IrCallWrapperX64 callWrap(regs, build);
    callWrap.addArgument(SizeX64::qword, rState);
    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
    callWrap.addArgument(SizeX64::qword, c);
    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]);

    emitUpdateBase(build);
}

void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
{
    IrCallWrapperX64 callWrap(regs, build);
    callWrap.addArgument(SizeX64::qword, rState);
    callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
    callWrap.addArgument(SizeX64::qword, c);
    callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
    callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]);

    emitUpdateBase(build);
}

void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, IrOp ra, int ratag, Label& skip)
{
    // Barrier should've been optimized away if we know that it's not collectable, checking for correctness
    if (ratag == -1 || !isGCO(ratag))
    {
        // iscollectable(ra)
        OperandX64 tag = (ra.kind == IrOpKind::VmReg) ? luauRegTag(vmRegOp(ra)) : luauConstantTag(vmConstOp(ra));
        build.cmp(tag, LUA_TSTRING);
        build.jcc(ConditionX64::Less, skip);
    }

    // isblack(obj2gco(o))
    build.test(byte[object + offsetof(GCheader, marked)], bitmask(BLACKBIT));
    build.jcc(ConditionX64::Zero, skip);

    // iswhite(gcvalue(ra))
    OperandX64 value = (ra.kind == IrOpKind::VmReg) ? luauRegValue(vmRegOp(ra)) : luauConstantValue(vmConstOp(ra));
    build.mov(tmp, value);
    build.test(byte[tmp + offsetof(GCheader, marked)], bit2mask(WHITE0BIT, WHITE1BIT));
    build.jcc(ConditionX64::Zero, skip);
}


void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, IrOp ra, int ratag)
{
    Label skip;

    ScopedRegX64 tmp{regs, SizeX64::qword};
    checkObjectBarrierConditions(build, tmp.reg, object, ra, ratag, skip);

    {
        ScopedSpills spillGuard(regs);

        IrCallWrapperX64 callWrap(regs, build);
        callWrap.addArgument(SizeX64::qword, rState);
        callWrap.addArgument(SizeX64::qword, object, objectOp);
        callWrap.addArgument(SizeX64::qword, tmp);
        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierf)]);
    }

    build.setLabel(skip);
}

void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp)
{
    Label skip;

    // isblack(obj2gco(t))
    build.test(byte[table + offsetof(GCheader, marked)], bitmask(BLACKBIT));
    build.jcc(ConditionX64::Zero, skip);

    {
        ScopedSpills spillGuard(regs);

        IrCallWrapperX64 callWrap(regs, build);
        callWrap.addArgument(SizeX64::qword, rState);
        callWrap.addArgument(SizeX64::qword, table, tableOp);
        callWrap.addArgument(SizeX64::qword, addr[table + offsetof(Table, gclist)]);
        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]);
    }

    build.setLabel(skip);
}

void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build)
{
    Label skip;

    {
        ScopedRegX64 tmp1{regs, SizeX64::qword};
        ScopedRegX64 tmp2{regs, SizeX64::qword};

        build.mov(tmp1.reg, qword[rState + offsetof(lua_State, global)]);
        build.mov(tmp2.reg, qword[tmp1.reg + offsetof(global_State, totalbytes)]);
        build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(global_State, GCthreshold)]);
        build.jcc(ConditionX64::Below, skip);
    }

    {
        ScopedSpills spillGuard(regs);

        IrCallWrapperX64 callWrap(regs, build);
        callWrap.addArgument(SizeX64::qword, rState);
        callWrap.addArgument(SizeX64::dword, 1);
        callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]);
        emitUpdateBase(build);
    }

    build.setLabel(skip);
}

void emitClearNativeFlag(AssemblyBuilderX64& build)
{
    build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
    build.and_(dword[rax + offsetof(CallInfo, flags)], ~LUA_CALLINFO_NATIVE);
}

void emitExit(AssemblyBuilderX64& build, bool continueInVm)
{
    if (continueInVm)
        build.mov(eax, 1);
    else
        build.xor_(eax, eax);

    build.jmp(qword[rNativeContext + offsetof(NativeContext, gateExit)]);
}

void emitUpdateBase(AssemblyBuilderX64& build)
{
    build.mov(rBase, qword[rState + offsetof(lua_State, base)]);
}

void emitInterrupt(AssemblyBuilderX64& build)
{
    // rax = pcpos + 1
    // rbx = return address in native code

    // note: rbx is non-volatile so it will be saved across interrupt call automatically

    RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
    RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;

    Label skip;

    // Update L->ci->savedpc; required in case interrupt errors
    build.mov(rcx, sCode);
    build.lea(rcx, addr[rcx + rax * sizeof(Instruction)]);
    build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
    build.mov(qword[rax + offsetof(CallInfo, savedpc)], rcx);

    // Load interrupt handler; it may be nullptr in case the update raced with the check before we got here
    build.mov(rax, qword[rState + offsetof(lua_State, global)]);
    build.mov(rax, qword[rax + offsetof(global_State, cb.interrupt)]);
    build.test(rax, rax);
    build.jcc(ConditionX64::Zero, skip);

    // Call interrupt
    build.mov(rArg1, rState);
    build.mov(dwordReg(rArg2), -1);
    build.call(rax);

    // Check if we need to exit
    build.mov(al, byte[rState + offsetof(lua_State, status)]);
    build.test(al, al);
    build.jcc(ConditionX64::Zero, skip);

    build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
    build.sub(qword[rax + offsetof(CallInfo, savedpc)], sizeof(Instruction));
    emitExit(build, /* continueInVm */ false);

    build.setLabel(skip);

    emitUpdateBase(build); // interrupt may have reallocated stack

    build.jmp(rbx);
}

void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos)
{
    // fallback(L, instruction, base, k)
    IrCallWrapperX64 callWrap(regs, build);
    callWrap.addArgument(SizeX64::qword, rState);

    RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
    build.mov(reg, sCode);
    callWrap.addArgument(SizeX64::qword, addr[reg + pcpos * sizeof(Instruction)]);

    callWrap.addArgument(SizeX64::qword, rBase);
    callWrap.addArgument(SizeX64::qword, rConstants);
    callWrap.call(qword[rNativeContext + offset]);

    emitUpdateBase(build);
}

void emitUpdatePcForExit(AssemblyBuilderX64& build)
{
    // edx = pcpos * sizeof(Instruction)
    build.add(rdx, sCode);
    build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
    build.mov(qword[rax + offsetof(CallInfo, savedpc)], rdx);
}

void emitReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers)
{
    // input: res in rdi, number of written values in ecx
    RegisterX64 res = rdi;
    RegisterX64 written = ecx;

    RegisterX64 ci = r8;
    RegisterX64 cip = r9;
    RegisterX64 nresults = esi;

    build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
    build.lea(cip, addr[ci - sizeof(CallInfo)]);

    // nresults = ci->nresults
    build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]);

    Label skipResultCopy;

    // Fill the rest of the expected results (nresults - written) with 'nil'
    RegisterX64 counter = written;
    build.sub(counter, nresults); // counter = -(nresults - written)
    build.jcc(ConditionX64::GreaterEqual, skipResultCopy);

    Label repeatNilLoop = build.setLabel();
    build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
    build.add(res, sizeof(TValue));
    build.inc(counter);
    build.jcc(ConditionX64::NotZero, repeatNilLoop);

    build.setLabel(skipResultCopy);

    build.mov(qword[rState + offsetof(lua_State, ci)], cip);     // L->ci = cip
    build.mov(rBase, qword[cip + offsetof(CallInfo, base)]);     // sync base = L->base while we have a chance
    build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base

    Label skipFixedRetTop;
    build.test(nresults, nresults);                       // test here will set SF=1 for a negative number and it always sets OF to 0
    build.jcc(ConditionX64::Less, skipFixedRetTop);       // jl jumps if SF != OF
    build.mov(res, qword[cip + offsetof(CallInfo, top)]); // res = cip->top
    build.setLabel(skipFixedRetTop);

    build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res

    // Unlikely, but this might be the last return from VM
    build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN);
    build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm);

    // Returning back to the previous function is a bit tricky
    // Registers alive: r9 (cip)
    RegisterX64 proto = rcx;
    RegisterX64 execdata = rbx;

    // Change closure
    build.mov(rax, qword[cip + offsetof(CallInfo, func)]);
    build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);
    build.mov(sClosure, rax);

    build.mov(proto, qword[rax + offsetof(Closure, l.p)]);

    build.mov(execdata, qword[proto + offsetof(Proto, execdata)]);

    build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE);
    build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data

    // Change constants
    build.mov(rConstants, qword[proto + offsetof(Proto, k)]);

    // Change code
    build.mov(rdx, qword[proto + offsetof(Proto, code)]);
    build.mov(sCode, rdx);

    build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]);

    // To get instruction index from instruction pointer, we need to divide byte offset by 4
    // But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
    build.sub(rax, rdx);

    // Get new instruction location and jump to it
    build.mov(edx, dword[execdata + rax]);
    build.add(rdx, qword[proto + offsetof(Proto, exectarget)]);
    build.jmp(rdx);
}


} // namespace X64
} // namespace CodeGen
} // namespace Luau