luau/CodeGen/src/AssemblyBuilderX64.cpp
vegorov-rbx 97965c7c0a
Sync to upstream/release/576 (#928)
* `ClassType` can now have an indexer defined on it. This allows custom
types to be used in `t[x]` expressions.
* Fixed search for closest executable breakpoint line. Previously,
breakpoints might have been skipped in `else` blocks at the end of a
function
* Fixed how unification is performed for two optional types `a? <: b?`,
previously it might have unified either 'a' or 'b' with 'nil'. Note that
this fix is not enabled by default yet (see the list in
`ExperimentalFlags.h`)

In the new type solver, a concept of 'Type Families' has been
introduced.
Type families can be thought of as type aliases with custom type
inference/reduction logic included with them.
For example, we can have an `Add<T, U>` type family that will resolve
the type that is the result of adding two values together.
This will help type inference to figure out what 'T' and 'U' might be
when explicit type annotations are not provided.
In this update we don't define any type families, but they will be added
in the near future.
It is also possible for Luau embedders to define their own type families
in the global/environment scope.

Other changes include:
* Fixed scope used to find out which generic types should be included in
the function generic type list
* Fixed a crash after cyclic bound types were created during unification

And in native code generation (jit):
* Use of arm64 target on M1 now requires macOS 13
* Entry into native code has been optimized. This is especially
important for coroutine call/pcall performance as they involve going
through a C call frame
* LOP_LOADK(X) translation into IR has been improved to enable type
tag/constant propagation
* arm64 can use integer immediate values to synthesize floating-point
values
* x64 assembler removes duplicate 64bit numbers from the data section to
save space
* Linux `perf` can now be used to profile native Luau code (when running
with --codegen-perf CLI argument)
2023-05-12 10:50:47 -07:00

1568 lines
42 KiB
C++

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/AssemblyBuilderX64.h"
#include "ByteUtils.h"
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
namespace Luau
{
namespace CodeGen
{
namespace X64
{
// TODO: more assertions on operand sizes
static const uint8_t codeForCondition[] = {
0x0, 0x1, 0x2, 0x3, 0x2, 0x6, 0x7, 0x3, 0x4, 0xc, 0xe, 0xf, 0xd, 0x3, 0x7, 0x6, 0x2, 0x5, 0xd, 0xf, 0xe, 0xc, 0x4, 0x5, 0xa, 0xb};
static_assert(sizeof(codeForCondition) / sizeof(codeForCondition[0]) == size_t(ConditionX64::Count), "all conditions have to be covered");
static const char* jccTextForCondition[] = {"jo", "jno", "jc", "jnc", "jb", "jbe", "ja", "jae", "je", "jl", "jle", "jg", "jge", "jnb", "jnbe", "jna",
"jnae", "jne", "jnl", "jnle", "jng", "jnge", "jz", "jnz", "jp", "jnp"};
static_assert(sizeof(jccTextForCondition) / sizeof(jccTextForCondition[0]) == size_t(ConditionX64::Count), "all conditions have to be covered");
static const char* setccTextForCondition[] = {"seto", "setno", "setc", "setnc", "setb", "setbe", "seta", "setae", "sete", "setl", "setle", "setg",
"setge", "setnb", "setnbe", "setna", "setnae", "setne", "setnl", "setnle", "setng", "setnge", "setz", "setnz", "setp", "setnp"};
static_assert(sizeof(setccTextForCondition) / sizeof(setccTextForCondition[0]) == size_t(ConditionX64::Count), "all conditions have to be covered");
#define OP_PLUS_REG(op, reg) ((op) + (reg & 0x7))
#define OP_PLUS_CC(op, cc) ((op) + uint8_t(cc))
#define REX_W_BIT(value) (value ? 0x8 : 0x0)
#define REX_W(reg) REX_W_BIT((reg).size == SizeX64::qword || ((reg).size == SizeX64::byte && (reg).index >= 4))
#define REX_R(reg) (((reg).index & 0x8) >> 1)
#define REX_X(reg) (((reg).index & 0x8) >> 2)
#define REX_B(reg) (((reg).index & 0x8) >> 3)
#define AVX_W(value) ((value) ? 0x80 : 0x0)
#define AVX_R(reg) ((~(reg).index & 0x8) << 4)
#define AVX_X(reg) ((~(reg).index & 0x8) << 3)
#define AVX_B(reg) ((~(reg).index & 0x8) << 2)
#define AVX_3_1() 0b11000100
#define AVX_3_2(r, x, b, m) (AVX_R(r) | AVX_X(x) | AVX_B(b) | (m))
#define AVX_3_3(w, v, l, p) (AVX_W(w) | ((~(v.index) & 0xf) << 3) | ((l) << 2) | (p))
#define MOD_RM(mod, reg, rm) (((mod) << 6) | (((reg)&0x7) << 3) | ((rm)&0x7))
#define SIB(scale, index, base) ((getScaleEncoding(scale) << 6) | (((index)&0x7) << 3) | ((base)&0x7))
const unsigned AVX_0F = 0b0001;
[[maybe_unused]] const unsigned AVX_0F38 = 0b0010;
[[maybe_unused]] const unsigned AVX_0F3A = 0b0011;
const unsigned AVX_NP = 0b00;
const unsigned AVX_66 = 0b01;
const unsigned AVX_F3 = 0b10;
const unsigned AVX_F2 = 0b11;
const unsigned kMaxAlign = 32;
const unsigned kMaxInstructionLength = 16;
const uint8_t kRoundingPrecisionInexact = 0b1000;
static ABIX64 getCurrentX64ABI()
{
#if defined(_WIN32)
return ABIX64::Windows;
#else
return ABIX64::SystemV;
#endif
}
AssemblyBuilderX64::AssemblyBuilderX64(bool logText, ABIX64 abi)
: logText(logText)
, abi(abi)
, constCache64(~0ull)
{
data.resize(4096);
dataPos = data.size(); // data is filled backwards
code.resize(4096);
codePos = code.data();
codeEnd = code.data() + code.size();
}
AssemblyBuilderX64::AssemblyBuilderX64(bool logText)
: AssemblyBuilderX64(logText, getCurrentX64ABI())
{
}
AssemblyBuilderX64::~AssemblyBuilderX64()
{
LUAU_ASSERT(finalized);
}
void AssemblyBuilderX64::add(OperandX64 lhs, OperandX64 rhs)
{
placeBinary("add", lhs, rhs, 0x80, 0x81, 0x83, 0x00, 0x01, 0x02, 0x03, 0);
}
void AssemblyBuilderX64::sub(OperandX64 lhs, OperandX64 rhs)
{
placeBinary("sub", lhs, rhs, 0x80, 0x81, 0x83, 0x28, 0x29, 0x2a, 0x2b, 5);
}
void AssemblyBuilderX64::cmp(OperandX64 lhs, OperandX64 rhs)
{
placeBinary("cmp", lhs, rhs, 0x80, 0x81, 0x83, 0x38, 0x39, 0x3a, 0x3b, 7);
}
void AssemblyBuilderX64::and_(OperandX64 lhs, OperandX64 rhs)
{
placeBinary("and", lhs, rhs, 0x80, 0x81, 0x83, 0x20, 0x21, 0x22, 0x23, 4);
}
void AssemblyBuilderX64::or_(OperandX64 lhs, OperandX64 rhs)
{
placeBinary("or", lhs, rhs, 0x80, 0x81, 0x83, 0x08, 0x09, 0x0a, 0x0b, 1);
}
void AssemblyBuilderX64::xor_(OperandX64 lhs, OperandX64 rhs)
{
placeBinary("xor", lhs, rhs, 0x80, 0x81, 0x83, 0x30, 0x31, 0x32, 0x33, 6);
}
void AssemblyBuilderX64::sal(OperandX64 lhs, OperandX64 rhs)
{
placeShift("sal", lhs, rhs, 4);
}
void AssemblyBuilderX64::sar(OperandX64 lhs, OperandX64 rhs)
{
placeShift("sar", lhs, rhs, 7);
}
void AssemblyBuilderX64::shl(OperandX64 lhs, OperandX64 rhs)
{
placeShift("shl", lhs, rhs, 4); // same as sal
}
void AssemblyBuilderX64::shr(OperandX64 lhs, OperandX64 rhs)
{
placeShift("shr", lhs, rhs, 5);
}
void AssemblyBuilderX64::rol(OperandX64 lhs, OperandX64 rhs)
{
placeShift("rol", lhs, rhs, 0);
}
void AssemblyBuilderX64::ror(OperandX64 lhs, OperandX64 rhs)
{
placeShift("ror", lhs, rhs, 1);
}
void AssemblyBuilderX64::mov(OperandX64 lhs, OperandX64 rhs)
{
if (logText)
log("mov", lhs, rhs);
if (lhs.cat == CategoryX64::reg && rhs.cat == CategoryX64::imm)
{
SizeX64 size = lhs.base.size;
placeRex(lhs.base);
if (size == SizeX64::byte)
{
place(OP_PLUS_REG(0xb0, lhs.base.index));
placeImm8(rhs.imm);
}
else if (size == SizeX64::dword)
{
place(OP_PLUS_REG(0xb8, lhs.base.index));
placeImm32(rhs.imm);
}
else
{
LUAU_ASSERT(size == SizeX64::qword);
place(OP_PLUS_REG(0xb8, lhs.base.index));
placeImm64(rhs.imm);
}
}
else if (lhs.cat == CategoryX64::mem && rhs.cat == CategoryX64::imm)
{
SizeX64 size = lhs.memSize;
placeRex(lhs);
if (size == SizeX64::byte)
{
place(0xc6);
placeModRegMem(lhs, 0, /*extraCodeBytes=*/1);
placeImm8(rhs.imm);
}
else
{
LUAU_ASSERT(size == SizeX64::dword || size == SizeX64::qword);
place(0xc7);
placeModRegMem(lhs, 0, /*extraCodeBytes=*/4);
placeImm32(rhs.imm);
}
}
else if (lhs.cat == CategoryX64::reg && (rhs.cat == CategoryX64::reg || rhs.cat == CategoryX64::mem))
{
placeBinaryRegAndRegMem(lhs, rhs, 0x8a, 0x8b);
}
else if (lhs.cat == CategoryX64::mem && rhs.cat == CategoryX64::reg)
{
placeBinaryRegMemAndReg(lhs, rhs, 0x88, 0x89);
}
else
{
LUAU_ASSERT(!"No encoding for this operand combination");
}
commit();
}
void AssemblyBuilderX64::mov64(RegisterX64 lhs, int64_t imm)
{
if (logText)
{
text.append(" mov ");
log(lhs);
logAppend(",%llXh\n", (unsigned long long)imm);
}
LUAU_ASSERT(lhs.size == SizeX64::qword);
placeRex(lhs);
place(OP_PLUS_REG(0xb8, lhs.index));
placeImm64(imm);
commit();
}
void AssemblyBuilderX64::movsx(RegisterX64 lhs, OperandX64 rhs)
{
if (logText)
log("movsx", lhs, rhs);
LUAU_ASSERT(rhs.memSize == SizeX64::byte || rhs.memSize == SizeX64::word);
placeRex(lhs, rhs);
place(0x0f);
place(rhs.memSize == SizeX64::byte ? 0xbe : 0xbf);
placeRegAndModRegMem(lhs, rhs);
commit();
}
void AssemblyBuilderX64::movzx(RegisterX64 lhs, OperandX64 rhs)
{
if (logText)
log("movzx", lhs, rhs);
LUAU_ASSERT(rhs.memSize == SizeX64::byte || rhs.memSize == SizeX64::word);
placeRex(lhs, rhs);
place(0x0f);
place(rhs.memSize == SizeX64::byte ? 0xb6 : 0xb7);
placeRegAndModRegMem(lhs, rhs);
commit();
}
void AssemblyBuilderX64::div(OperandX64 op)
{
placeUnaryModRegMem("div", op, 0xf6, 0xf7, 6);
}
void AssemblyBuilderX64::idiv(OperandX64 op)
{
placeUnaryModRegMem("idiv", op, 0xf6, 0xf7, 7);
}
void AssemblyBuilderX64::mul(OperandX64 op)
{
placeUnaryModRegMem("mul", op, 0xf6, 0xf7, 4);
}
void AssemblyBuilderX64::imul(OperandX64 op)
{
placeUnaryModRegMem("imul", op, 0xf6, 0xf7, 5);
}
void AssemblyBuilderX64::neg(OperandX64 op)
{
placeUnaryModRegMem("neg", op, 0xf6, 0xf7, 3);
}
void AssemblyBuilderX64::not_(OperandX64 op)
{
placeUnaryModRegMem("not", op, 0xf6, 0xf7, 2);
}
void AssemblyBuilderX64::dec(OperandX64 op)
{
placeUnaryModRegMem("dec", op, 0xfe, 0xff, 1);
}
void AssemblyBuilderX64::inc(OperandX64 op)
{
placeUnaryModRegMem("inc", op, 0xfe, 0xff, 0);
}
void AssemblyBuilderX64::imul(OperandX64 lhs, OperandX64 rhs)
{
if (logText)
log("imul", lhs, rhs);
placeRex(lhs.base, rhs);
place(0x0f);
place(0xaf);
placeRegAndModRegMem(lhs, rhs);
commit();
}
void AssemblyBuilderX64::imul(OperandX64 dst, OperandX64 lhs, int32_t rhs)
{
if (logText)
log("imul", dst, lhs, rhs);
placeRex(dst.base, lhs);
if (int8_t(rhs) == rhs)
{
place(0x6b);
placeRegAndModRegMem(dst, lhs, /*extraCodeBytes=*/1);
placeImm8(rhs);
}
else
{
place(0x69);
placeRegAndModRegMem(dst, lhs, /*extraCodeBytes=*/4);
placeImm32(rhs);
}
commit();
}
void AssemblyBuilderX64::test(OperandX64 lhs, OperandX64 rhs)
{
// No forms for r/m*, imm8 and reg, r/m*
placeBinary("test", lhs, rhs, 0xf6, 0xf7, 0xf7, 0x84, 0x85, 0x84, 0x85, 0);
}
void AssemblyBuilderX64::lea(OperandX64 lhs, OperandX64 rhs)
{
if (logText)
log("lea", lhs, rhs);
LUAU_ASSERT(lhs.cat == CategoryX64::reg && rhs.cat == CategoryX64::mem && rhs.memSize == SizeX64::none);
LUAU_ASSERT(rhs.base == rip || rhs.base.size == lhs.base.size);
LUAU_ASSERT(rhs.index == noreg || rhs.index.size == lhs.base.size);
rhs.memSize = lhs.base.size;
placeBinaryRegAndRegMem(lhs, rhs, 0x8d, 0x8d);
}
void AssemblyBuilderX64::push(OperandX64 op)
{
if (logText)
log("push", op);
LUAU_ASSERT(op.cat == CategoryX64::reg && op.base.size == SizeX64::qword);
placeRex(op.base);
place(OP_PLUS_REG(0x50, op.base.index));
commit();
}
void AssemblyBuilderX64::pop(OperandX64 op)
{
if (logText)
log("pop", op);
LUAU_ASSERT(op.cat == CategoryX64::reg && op.base.size == SizeX64::qword);
placeRex(op.base);
place(OP_PLUS_REG(0x58, op.base.index));
commit();
}
void AssemblyBuilderX64::ret()
{
if (logText)
log("ret");
place(0xc3);
commit();
}
void AssemblyBuilderX64::setcc(ConditionX64 cond, OperandX64 op)
{
SizeX64 size = op.cat == CategoryX64::reg ? op.base.size : op.memSize;
LUAU_ASSERT(size == SizeX64::byte);
if (logText)
log(setccTextForCondition[size_t(cond)], op);
placeRex(op);
place(0x0f);
place(0x90 | codeForCondition[size_t(cond)]);
placeModRegMem(op, 0);
commit();
}
void AssemblyBuilderX64::jcc(ConditionX64 cond, Label& label)
{
placeJcc(jccTextForCondition[size_t(cond)], label, codeForCondition[size_t(cond)]);
}
void AssemblyBuilderX64::jmp(Label& label)
{
place(0xe9);
placeLabel(label);
if (logText)
log("jmp", label);
commit();
}
void AssemblyBuilderX64::jmp(OperandX64 op)
{
LUAU_ASSERT((op.cat == CategoryX64::reg ? op.base.size : op.memSize) == SizeX64::qword);
if (logText)
log("jmp", op);
// Indirect absolute calls always work in 64 bit width mode, so REX.W is optional
// While we could keep an optional prefix, in Windows x64 ABI it signals a tail call return statement to the unwinder
placeRexNoW(op);
place(0xff);
placeModRegMem(op, 4);
commit();
}
void AssemblyBuilderX64::call(Label& label)
{
place(0xe8);
placeLabel(label);
if (logText)
log("call", label);
commit();
}
void AssemblyBuilderX64::call(OperandX64 op)
{
LUAU_ASSERT((op.cat == CategoryX64::reg ? op.base.size : op.memSize) == SizeX64::qword);
if (logText)
log("call", op);
// Indirect absolute calls always work in 64 bit width mode, so REX.W is optional
placeRexNoW(op);
place(0xff);
placeModRegMem(op, 2);
commit();
}
void AssemblyBuilderX64::int3()
{
if (logText)
log("int3");
place(0xcc);
commit();
}
void AssemblyBuilderX64::bsr(RegisterX64 dst, OperandX64 src)
{
if (logText)
log("bsr", dst, src);
LUAU_ASSERT(dst.size == SizeX64::dword || dst.size == SizeX64::qword);
placeRex(dst, src);
place(0x0f);
place(0xbd);
placeRegAndModRegMem(dst, src);
commit();
}
void AssemblyBuilderX64::bsf(RegisterX64 dst, OperandX64 src)
{
if (logText)
log("bsf", dst, src);
LUAU_ASSERT(dst.size == SizeX64::dword || dst.size == SizeX64::qword);
placeRex(dst, src);
place(0x0f);
place(0xbc);
placeRegAndModRegMem(dst, src);
commit();
}
void AssemblyBuilderX64::nop(uint32_t length)
{
while (length != 0)
{
uint32_t step = length > 9 ? 9 : length;
length -= step;
switch (step)
{
case 1:
if (logText)
logAppend(" nop\n");
place(0x90);
break;
case 2:
if (logText)
logAppend(" xchg ax, ax ; %u-byte nop\n", step);
place(0x66);
place(0x90);
break;
case 3:
if (logText)
logAppend(" nop dword ptr[rax] ; %u-byte nop\n", step);
place(0x0f);
place(0x1f);
place(0x00);
break;
case 4:
if (logText)
logAppend(" nop dword ptr[rax] ; %u-byte nop\n", step);
place(0x0f);
place(0x1f);
place(0x40);
place(0x00);
break;
case 5:
if (logText)
logAppend(" nop dword ptr[rax+rax] ; %u-byte nop\n", step);
place(0x0f);
place(0x1f);
place(0x44);
place(0x00);
place(0x00);
break;
case 6:
if (logText)
logAppend(" nop word ptr[rax+rax] ; %u-byte nop\n", step);
place(0x66);
place(0x0f);
place(0x1f);
place(0x44);
place(0x00);
place(0x00);
break;
case 7:
if (logText)
logAppend(" nop dword ptr[rax] ; %u-byte nop\n", step);
place(0x0f);
place(0x1f);
place(0x80);
place(0x00);
place(0x00);
place(0x00);
place(0x00);
break;
case 8:
if (logText)
logAppend(" nop dword ptr[rax+rax] ; %u-byte nop\n", step);
place(0x0f);
place(0x1f);
place(0x84);
place(0x00);
place(0x00);
place(0x00);
place(0x00);
place(0x00);
break;
case 9:
if (logText)
logAppend(" nop word ptr[rax+rax] ; %u-byte nop\n", step);
place(0x66);
place(0x0f);
place(0x1f);
place(0x84);
place(0x00);
place(0x00);
place(0x00);
place(0x00);
place(0x00);
break;
}
commit();
}
}
void AssemblyBuilderX64::align(uint32_t alignment, AlignmentDataX64 data)
{
LUAU_ASSERT((alignment & (alignment - 1)) == 0);
uint32_t size = getCodeSize();
uint32_t pad = ((size + alignment - 1) & ~(alignment - 1)) - size;
switch (data)
{
case AlignmentDataX64::Nop:
if (logText)
logAppend("; align %u\n", alignment);
nop(pad);
break;
case AlignmentDataX64::Int3:
if (logText)
logAppend("; align %u using int3\n", alignment);
while (codePos + pad > codeEnd)
extend();
for (uint32_t i = 0; i < pad; ++i)
place(0xcc);
commit();
break;
case AlignmentDataX64::Ud2:
if (logText)
logAppend("; align %u using ud2\n", alignment);
while (codePos + pad > codeEnd)
extend();
uint32_t i = 0;
for (; i + 1 < pad; i += 2)
{
place(0x0f);
place(0x0b);
}
if (i < pad)
place(0xcc);
commit();
break;
}
}
void AssemblyBuilderX64::vaddpd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vaddpd", dst, src1, src2, 0x58, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vaddps(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vaddps", dst, src1, src2, 0x58, false, AVX_0F, AVX_NP);
}
void AssemblyBuilderX64::vaddsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vaddsd", dst, src1, src2, 0x58, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vaddss(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vaddss", dst, src1, src2, 0x58, false, AVX_0F, AVX_F3);
}
void AssemblyBuilderX64::vsubsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vsubsd", dst, src1, src2, 0x5c, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vmulsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vmulsd", dst, src1, src2, 0x59, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vdivsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vdivsd", dst, src1, src2, 0x5e, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vandpd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vandpd", dst, src1, src2, 0x54, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vandnpd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vandnpd", dst, src1, src2, 0x55, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vxorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vxorpd", dst, src1, src2, 0x57, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vorpd", dst, src1, src2, 0x56, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vucomisd(OperandX64 src1, OperandX64 src2)
{
placeAvx("vucomisd", src1, src2, 0x2e, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vcvttsd2si(OperandX64 dst, OperandX64 src)
{
placeAvx("vcvttsd2si", dst, src, 0x2c, dst.base.size == SizeX64::qword, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vcvtsi2sd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vcvtsi2sd", dst, src1, src2, 0x2a, (src2.cat == CategoryX64::reg ? src2.base.size : src2.memSize) == SizeX64::qword, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vcvtsd2ss(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
if (src2.cat == CategoryX64::reg)
LUAU_ASSERT(src2.base.size == SizeX64::xmmword);
else
LUAU_ASSERT(src2.memSize == SizeX64::qword);
placeAvx("vcvtsd2ss", dst, src1, src2, 0x5a, (src2.cat == CategoryX64::reg ? src2.base.size : src2.memSize) == SizeX64::qword, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vroundsd(OperandX64 dst, OperandX64 src1, OperandX64 src2, RoundingModeX64 roundingMode)
{
placeAvx("vroundsd", dst, src1, src2, uint8_t(roundingMode) | kRoundingPrecisionInexact, 0x0b, false, AVX_0F3A, AVX_66);
}
void AssemblyBuilderX64::vsqrtpd(OperandX64 dst, OperandX64 src)
{
placeAvx("vsqrtpd", dst, src, 0x51, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vsqrtps(OperandX64 dst, OperandX64 src)
{
placeAvx("vsqrtps", dst, src, 0x51, false, AVX_0F, AVX_NP);
}
void AssemblyBuilderX64::vsqrtsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vsqrtsd", dst, src1, src2, 0x51, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vsqrtss(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vsqrtss", dst, src1, src2, 0x51, false, AVX_0F, AVX_F3);
}
void AssemblyBuilderX64::vmovsd(OperandX64 dst, OperandX64 src)
{
placeAvx("vmovsd", dst, src, 0x10, 0x11, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vmovsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vmovsd", dst, src1, src2, 0x10, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vmovss(OperandX64 dst, OperandX64 src)
{
placeAvx("vmovss", dst, src, 0x10, 0x11, false, AVX_0F, AVX_F3);
}
void AssemblyBuilderX64::vmovss(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vmovss", dst, src1, src2, 0x10, false, AVX_0F, AVX_F3);
}
void AssemblyBuilderX64::vmovapd(OperandX64 dst, OperandX64 src)
{
placeAvx("vmovapd", dst, src, 0x28, 0x29, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vmovaps(OperandX64 dst, OperandX64 src)
{
placeAvx("vmovaps", dst, src, 0x28, 0x29, false, AVX_0F, AVX_NP);
}
void AssemblyBuilderX64::vmovupd(OperandX64 dst, OperandX64 src)
{
placeAvx("vmovupd", dst, src, 0x10, 0x11, false, AVX_0F, AVX_66);
}
void AssemblyBuilderX64::vmovups(OperandX64 dst, OperandX64 src)
{
placeAvx("vmovups", dst, src, 0x10, 0x11, false, AVX_0F, AVX_NP);
}
void AssemblyBuilderX64::vmovq(OperandX64 dst, OperandX64 src)
{
if (dst.base.size == SizeX64::xmmword)
{
LUAU_ASSERT(dst.cat == CategoryX64::reg);
LUAU_ASSERT(src.base.size == SizeX64::qword);
placeAvx("vmovq", dst, src, 0x6e, true, AVX_0F, AVX_66);
}
else if (dst.base.size == SizeX64::qword)
{
LUAU_ASSERT(src.cat == CategoryX64::reg);
LUAU_ASSERT(src.base.size == SizeX64::xmmword);
placeAvx("vmovq", src, dst, 0x7e, true, AVX_0F, AVX_66);
}
else
{
LUAU_ASSERT(!"No encoding for left operand of this category");
}
}
void AssemblyBuilderX64::vmaxsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vmaxsd", dst, src1, src2, 0x5f, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vminsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vminsd", dst, src1, src2, 0x5d, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vcmpltsd(OperandX64 dst, OperandX64 src1, OperandX64 src2)
{
placeAvx("vcmpltsd", dst, src1, src2, 0x01, 0xc2, false, AVX_0F, AVX_F2);
}
void AssemblyBuilderX64::vblendvpd(RegisterX64 dst, RegisterX64 src1, OperandX64 mask, RegisterX64 src3)
{
// bits [7:4] of imm8 are used to select register for operand 4
placeAvx("vblendvpd", dst, src1, mask, src3.index << 4, 0x4b, false, AVX_0F3A, AVX_66);
}
bool AssemblyBuilderX64::finalize()
{
code.resize(codePos - code.data());
// Resolve jump targets
for (Label fixup : pendingLabels)
{
// If this assertion fires, a label was used in jmp without calling setLabel
LUAU_ASSERT(labelLocations[fixup.id - 1] != ~0u);
uint32_t value = labelLocations[fixup.id - 1] - (fixup.location + 4);
writeu32(&code[fixup.location], value);
}
size_t dataSize = data.size() - dataPos;
// Shrink data
if (dataSize > 0)
memmove(&data[0], &data[dataPos], dataSize);
data.resize(dataSize);
finalized = true;
return true;
}
Label AssemblyBuilderX64::setLabel()
{
Label label{nextLabel++, getCodeSize()};
labelLocations.push_back(~0u);
if (logText)
log(label);
return label;
}
void AssemblyBuilderX64::setLabel(Label& label)
{
if (label.id == 0)
{
label.id = nextLabel++;
labelLocations.push_back(~0u);
}
label.location = getCodeSize();
labelLocations[label.id - 1] = label.location;
if (logText)
log(label);
}
OperandX64 AssemblyBuilderX64::i64(int64_t value)
{
uint64_t as64BitKey = value;
if (as64BitKey != ~0ull)
{
if (int32_t* prev = constCache64.find(as64BitKey))
return OperandX64(SizeX64::qword, noreg, 1, rip, *prev);
}
size_t pos = allocateData(8, 8);
writeu64(&data[pos], value);
int32_t offset = int32_t(pos - data.size());
if (as64BitKey != ~0ull)
constCache64[as64BitKey] = offset;
return OperandX64(SizeX64::qword, noreg, 1, rip, offset);
}
OperandX64 AssemblyBuilderX64::f32(float value)
{
size_t pos = allocateData(4, 4);
writef32(&data[pos], value);
return OperandX64(SizeX64::dword, noreg, 1, rip, int32_t(pos - data.size()));
}
OperandX64 AssemblyBuilderX64::f64(double value)
{
uint64_t as64BitKey;
static_assert(sizeof(as64BitKey) == sizeof(value), "Expecting double to be 64-bit");
memcpy(&as64BitKey, &value, sizeof(value));
if (as64BitKey != ~0ull)
{
if (int32_t* prev = constCache64.find(as64BitKey))
return OperandX64(SizeX64::qword, noreg, 1, rip, *prev);
}
size_t pos = allocateData(8, 8);
writef64(&data[pos], value);
int32_t offset = int32_t(pos - data.size());
if (as64BitKey != ~0ull)
constCache64[as64BitKey] = offset;
return OperandX64(SizeX64::qword, noreg, 1, rip, offset);
}
OperandX64 AssemblyBuilderX64::f32x4(float x, float y, float z, float w)
{
size_t pos = allocateData(16, 16);
writef32(&data[pos], x);
writef32(&data[pos + 4], y);
writef32(&data[pos + 8], z);
writef32(&data[pos + 12], w);
return OperandX64(SizeX64::xmmword, noreg, 1, rip, int32_t(pos - data.size()));
}
OperandX64 AssemblyBuilderX64::f64x2(double x, double y)
{
size_t pos = allocateData(16, 16);
writef64(&data[pos], x);
writef64(&data[pos + 8], y);
return OperandX64(SizeX64::xmmword, noreg, 1, rip, int32_t(pos - data.size()));
}
OperandX64 AssemblyBuilderX64::bytes(const void* ptr, size_t size, size_t align)
{
size_t pos = allocateData(size, align);
memcpy(&data[pos], ptr, size);
return OperandX64(SizeX64::none, noreg, 1, rip, int32_t(pos - data.size()));
}
void AssemblyBuilderX64::logAppend(const char* fmt, ...)
{
char buf[256];
va_list args;
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
text.append(buf);
}
uint32_t AssemblyBuilderX64::getCodeSize() const
{
return uint32_t(codePos - code.data());
}
void AssemblyBuilderX64::placeBinary(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t codeimm8, uint8_t codeimm, uint8_t codeimmImm8,
uint8_t code8rev, uint8_t coderev, uint8_t code8, uint8_t code, uint8_t opreg)
{
if (logText)
log(name, lhs, rhs);
if ((lhs.cat == CategoryX64::reg || lhs.cat == CategoryX64::mem) && rhs.cat == CategoryX64::imm)
placeBinaryRegMemAndImm(lhs, rhs, codeimm8, codeimm, codeimmImm8, opreg);
else if (lhs.cat == CategoryX64::reg && (rhs.cat == CategoryX64::reg || rhs.cat == CategoryX64::mem))
placeBinaryRegAndRegMem(lhs, rhs, code8, code);
else if (lhs.cat == CategoryX64::mem && rhs.cat == CategoryX64::reg)
placeBinaryRegMemAndReg(lhs, rhs, code8rev, coderev);
else
LUAU_ASSERT(!"No encoding for this operand combination");
}
void AssemblyBuilderX64::placeBinaryRegMemAndImm(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code, uint8_t codeImm8, uint8_t opreg)
{
LUAU_ASSERT(lhs.cat == CategoryX64::reg || lhs.cat == CategoryX64::mem);
LUAU_ASSERT(rhs.cat == CategoryX64::imm);
SizeX64 size = lhs.cat == CategoryX64::reg ? lhs.base.size : lhs.memSize;
LUAU_ASSERT(size == SizeX64::byte || size == SizeX64::dword || size == SizeX64::qword);
placeRex(lhs);
if (size == SizeX64::byte)
{
place(code8);
placeModRegMem(lhs, opreg, /*extraCodeBytes=*/1);
placeImm8(rhs.imm);
}
else
{
LUAU_ASSERT(size == SizeX64::dword || size == SizeX64::qword);
if (int8_t(rhs.imm) == rhs.imm && code != codeImm8)
{
place(codeImm8);
placeModRegMem(lhs, opreg, /*extraCodeBytes=*/1);
placeImm8(rhs.imm);
}
else
{
place(code);
placeModRegMem(lhs, opreg, /*extraCodeBytes=*/4);
placeImm32(rhs.imm);
}
}
commit();
}
void AssemblyBuilderX64::placeBinaryRegAndRegMem(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code)
{
LUAU_ASSERT(lhs.cat == CategoryX64::reg && (rhs.cat == CategoryX64::reg || rhs.cat == CategoryX64::mem));
LUAU_ASSERT(lhs.base.size == (rhs.cat == CategoryX64::reg ? rhs.base.size : rhs.memSize));
SizeX64 size = lhs.base.size;
LUAU_ASSERT(size == SizeX64::byte || size == SizeX64::dword || size == SizeX64::qword);
placeRex(lhs.base, rhs);
place(size == SizeX64::byte ? code8 : code);
placeRegAndModRegMem(lhs, rhs);
commit();
}
void AssemblyBuilderX64::placeBinaryRegMemAndReg(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code)
{
// In two operand instructions, first operand is always a register, but data flow direction is reversed
placeBinaryRegAndRegMem(rhs, lhs, code8, code);
}
void AssemblyBuilderX64::placeUnaryModRegMem(const char* name, OperandX64 op, uint8_t code8, uint8_t code, uint8_t opreg)
{
if (logText)
log(name, op);
LUAU_ASSERT(op.cat == CategoryX64::reg || op.cat == CategoryX64::mem);
SizeX64 size = op.cat == CategoryX64::reg ? op.base.size : op.memSize;
LUAU_ASSERT(size == SizeX64::byte || size == SizeX64::dword || size == SizeX64::qword);
placeRex(op);
place(size == SizeX64::byte ? code8 : code);
placeModRegMem(op, opreg);
commit();
}
void AssemblyBuilderX64::placeShift(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t opreg)
{
if (logText)
log(name, lhs, rhs);
LUAU_ASSERT(lhs.cat == CategoryX64::reg || lhs.cat == CategoryX64::mem);
LUAU_ASSERT(rhs.cat == CategoryX64::imm || (rhs.cat == CategoryX64::reg && rhs.base == cl));
SizeX64 size = lhs.base.size;
placeRex(lhs.base);
if (rhs.cat == CategoryX64::imm && rhs.imm == 1)
{
place(size == SizeX64::byte ? 0xd0 : 0xd1);
placeModRegMem(lhs, opreg);
}
else if (rhs.cat == CategoryX64::imm)
{
LUAU_ASSERT(int8_t(rhs.imm) == rhs.imm);
place(size == SizeX64::byte ? 0xc0 : 0xc1);
placeModRegMem(lhs, opreg, /*extraCodeBytes=*/1);
placeImm8(rhs.imm);
}
else
{
place(size == SizeX64::byte ? 0xd2 : 0xd3);
placeModRegMem(lhs, opreg);
}
commit();
}
void AssemblyBuilderX64::placeJcc(const char* name, Label& label, uint8_t cc)
{
place(0x0f);
place(OP_PLUS_CC(0x80, cc));
placeLabel(label);
if (logText)
log(name, label);
commit();
}
void AssemblyBuilderX64::placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, bool setW, uint8_t mode, uint8_t prefix)
{
LUAU_ASSERT(dst.cat == CategoryX64::reg);
LUAU_ASSERT(src.cat == CategoryX64::reg || src.cat == CategoryX64::mem);
if (logText)
log(name, dst, src);
placeVex(dst, noreg, src, setW, mode, prefix);
place(code);
placeRegAndModRegMem(dst, src);
commit();
}
void AssemblyBuilderX64::placeAvx(
const char* name, OperandX64 dst, OperandX64 src, uint8_t code, uint8_t coderev, bool setW, uint8_t mode, uint8_t prefix)
{
LUAU_ASSERT((dst.cat == CategoryX64::mem && src.cat == CategoryX64::reg) || (dst.cat == CategoryX64::reg && src.cat == CategoryX64::mem));
if (logText)
log(name, dst, src);
if (dst.cat == CategoryX64::mem)
{
placeVex(src, noreg, dst, setW, mode, prefix);
place(coderev);
placeRegAndModRegMem(src, dst);
}
else
{
placeVex(dst, noreg, src, setW, mode, prefix);
place(code);
placeRegAndModRegMem(dst, src);
}
commit();
}
void AssemblyBuilderX64::placeAvx(
const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t code, bool setW, uint8_t mode, uint8_t prefix)
{
LUAU_ASSERT(dst.cat == CategoryX64::reg);
LUAU_ASSERT(src1.cat == CategoryX64::reg);
LUAU_ASSERT(src2.cat == CategoryX64::reg || src2.cat == CategoryX64::mem);
if (logText)
log(name, dst, src1, src2);
placeVex(dst, src1, src2, setW, mode, prefix);
place(code);
placeRegAndModRegMem(dst, src2);
commit();
}
void AssemblyBuilderX64::placeAvx(
const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t imm8, uint8_t code, bool setW, uint8_t mode, uint8_t prefix)
{
LUAU_ASSERT(dst.cat == CategoryX64::reg);
LUAU_ASSERT(src1.cat == CategoryX64::reg);
LUAU_ASSERT(src2.cat == CategoryX64::reg || src2.cat == CategoryX64::mem);
if (logText)
log(name, dst, src1, src2, imm8);
placeVex(dst, src1, src2, setW, mode, prefix);
place(code);
placeRegAndModRegMem(dst, src2, /*extraCodeBytes=*/1);
placeImm8(imm8);
commit();
}
void AssemblyBuilderX64::placeRex(RegisterX64 op)
{
uint8_t code = REX_W(op) | REX_B(op);
if (code != 0)
place(code | 0x40);
}
void AssemblyBuilderX64::placeRex(OperandX64 op)
{
uint8_t code = 0;
if (op.cat == CategoryX64::reg)
code = REX_W(op.base) | REX_B(op.base);
else if (op.cat == CategoryX64::mem)
code = REX_W_BIT(op.memSize == SizeX64::qword) | REX_X(op.index) | REX_B(op.base);
else
LUAU_ASSERT(!"No encoding for left operand of this category");
if (code != 0)
place(code | 0x40);
}
void AssemblyBuilderX64::placeRexNoW(OperandX64 op)
{
uint8_t code = 0;
if (op.cat == CategoryX64::reg)
code = REX_B(op.base);
else if (op.cat == CategoryX64::mem)
code = REX_X(op.index) | REX_B(op.base);
else
LUAU_ASSERT(!"No encoding for left operand of this category");
if (code != 0)
place(code | 0x40);
}
void AssemblyBuilderX64::placeRex(RegisterX64 lhs, OperandX64 rhs)
{
uint8_t code = REX_W(lhs);
if (rhs.cat == CategoryX64::imm)
code |= REX_B(lhs);
else
code |= REX_R(lhs) | REX_X(rhs.index) | REX_B(rhs.base);
if (code != 0)
place(code | 0x40);
}
void AssemblyBuilderX64::placeVex(OperandX64 dst, OperandX64 src1, OperandX64 src2, bool setW, uint8_t mode, uint8_t prefix)
{
LUAU_ASSERT(dst.cat == CategoryX64::reg);
LUAU_ASSERT(src1.cat == CategoryX64::reg);
LUAU_ASSERT(src2.cat == CategoryX64::reg || src2.cat == CategoryX64::mem);
place(AVX_3_1());
place(AVX_3_2(dst.base, src2.index, src2.base, mode));
place(AVX_3_3(setW, src1.base, dst.base.size == SizeX64::ymmword, prefix));
}
static uint8_t getScaleEncoding(uint8_t scale)
{
static const uint8_t scales[9] = {0xff, 0, 1, 0xff, 2, 0xff, 0xff, 0xff, 3};
LUAU_ASSERT(scale < 9 && scales[scale] != 0xff);
return scales[scale];
}
void AssemblyBuilderX64::placeRegAndModRegMem(OperandX64 lhs, OperandX64 rhs, int32_t extraCodeBytes)
{
LUAU_ASSERT(lhs.cat == CategoryX64::reg);
placeModRegMem(rhs, lhs.base.index, extraCodeBytes);
}
void AssemblyBuilderX64::placeModRegMem(OperandX64 rhs, uint8_t regop, int32_t extraCodeBytes)
{
if (rhs.cat == CategoryX64::reg)
{
place(MOD_RM(0b11, regop, rhs.base.index));
}
else if (rhs.cat == CategoryX64::mem)
{
RegisterX64 index = rhs.index;
RegisterX64 base = rhs.base;
uint8_t mod = 0b00;
if (rhs.imm != 0)
{
if (int8_t(rhs.imm) == rhs.imm)
mod = 0b01;
else
mod = 0b10;
}
else
{
// r13/bp-based addressing requires a displacement
if ((base.index & 0x7) == 0b101)
mod = 0b01;
}
if (index != noreg && base != noreg)
{
place(MOD_RM(mod, regop, 0b100));
place(SIB(rhs.scale, index.index, base.index));
if (mod != 0b00)
placeImm8Or32(rhs.imm);
}
else if (index != noreg && rhs.scale != 1)
{
place(MOD_RM(0b00, regop, 0b100));
place(SIB(rhs.scale, index.index, 0b101));
placeImm32(rhs.imm);
}
else if ((base.index & 0x7) == 0b100) // r12/sp-based addressing requires SIB
{
LUAU_ASSERT(rhs.scale == 1);
LUAU_ASSERT(index == noreg);
place(MOD_RM(mod, regop, 0b100));
place(SIB(rhs.scale, 0b100, base.index));
if (rhs.imm != 0)
placeImm8Or32(rhs.imm);
}
else if (base == rip)
{
place(MOD_RM(0b00, regop, 0b101));
// As a reminder: we do (getCodeSize() + 4) here to calculate the offset of the end of the current instruction we are placing.
// Since we have already placed all of the instruction bytes for this instruction, we add +4 to account for the imm32 displacement.
// Some instructions, however, are encoded such that an additional imm8 byte, or imm32 bytes, is placed after the ModRM byte, thus,
// we need to account for that case here as well.
placeImm32(-int32_t(getCodeSize() + 4 + extraCodeBytes) + rhs.imm);
}
else if (base != noreg)
{
place(MOD_RM(mod, regop, base.index));
if (mod != 0b00)
placeImm8Or32(rhs.imm);
}
else
{
place(MOD_RM(0b00, regop, 0b100));
place(SIB(1, 0b100, 0b101));
placeImm32(rhs.imm);
}
}
else
{
LUAU_ASSERT(!"No encoding for right operand of this category");
}
}
void AssemblyBuilderX64::placeImm8Or32(int32_t imm)
{
int8_t imm8 = int8_t(imm);
if (imm8 == imm)
place(imm8);
else
placeImm32(imm);
}
void AssemblyBuilderX64::placeImm8(int32_t imm)
{
int8_t imm8 = int8_t(imm);
if (imm8 == imm)
place(imm8);
else
LUAU_ASSERT(!"Invalid immediate value");
}
void AssemblyBuilderX64::placeImm32(int32_t imm)
{
uint8_t* pos = codePos;
LUAU_ASSERT(pos + sizeof(imm) < codeEnd);
codePos = writeu32(pos, imm);
}
void AssemblyBuilderX64::placeImm64(int64_t imm)
{
uint8_t* pos = codePos;
LUAU_ASSERT(pos + sizeof(imm) < codeEnd);
codePos = writeu64(pos, imm);
}
void AssemblyBuilderX64::placeLabel(Label& label)
{
if (label.location == ~0u)
{
if (label.id == 0)
{
label.id = nextLabel++;
labelLocations.push_back(~0u);
}
pendingLabels.push_back({label.id, getCodeSize()});
placeImm32(0);
}
else
{
placeImm32(int32_t(label.location - (4 + getCodeSize())));
}
}
void AssemblyBuilderX64::place(uint8_t byte)
{
LUAU_ASSERT(codePos < codeEnd);
*codePos++ = byte;
}
void AssemblyBuilderX64::commit()
{
LUAU_ASSERT(codePos <= codeEnd);
if (codeEnd - codePos < kMaxInstructionLength)
extend();
}
void AssemblyBuilderX64::extend()
{
uint32_t count = getCodeSize();
code.resize(code.size() * 2);
codePos = code.data() + count;
codeEnd = code.data() + code.size();
}
size_t AssemblyBuilderX64::allocateData(size_t size, size_t align)
{
LUAU_ASSERT(align > 0 && align <= kMaxAlign && (align & (align - 1)) == 0);
if (dataPos < size)
{
size_t oldSize = data.size();
data.resize(data.size() * 2);
memcpy(&data[oldSize], &data[0], oldSize);
memset(&data[0], 0, oldSize);
dataPos += oldSize;
}
dataPos = (dataPos - size) & ~(align - 1);
return dataPos;
}
void AssemblyBuilderX64::log(const char* opcode)
{
logAppend(" %s\n", opcode);
}
void AssemblyBuilderX64::log(const char* opcode, OperandX64 op)
{
logAppend(" %-12s", opcode);
log(op);
text.append("\n");
}
void AssemblyBuilderX64::log(const char* opcode, OperandX64 op1, OperandX64 op2)
{
logAppend(" %-12s", opcode);
log(op1);
text.append(",");
log(op2);
text.append("\n");
}
void AssemblyBuilderX64::log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3)
{
logAppend(" %-12s", opcode);
log(op1);
text.append(",");
log(op2);
text.append(",");
log(op3);
text.append("\n");
}
void AssemblyBuilderX64::log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3, OperandX64 op4)
{
logAppend(" %-12s", opcode);
log(op1);
text.append(",");
log(op2);
text.append(",");
log(op3);
text.append(",");
log(op4);
text.append("\n");
}
void AssemblyBuilderX64::log(Label label)
{
logAppend(".L%d:\n", label.id);
}
void AssemblyBuilderX64::log(const char* opcode, Label label)
{
logAppend(" %-12s.L%d\n", opcode, label.id);
}
void AssemblyBuilderX64::log(OperandX64 op)
{
switch (op.cat)
{
case CategoryX64::reg:
logAppend("%s", getRegisterName(op.base));
break;
case CategoryX64::mem:
if (op.base == rip)
{
logAppend("%s ptr [.start%+d]", getSizeName(op.memSize), op.imm);
return;
}
logAppend("%s ptr [", getSizeName(op.memSize));
if (op.base != noreg)
logAppend("%s", getRegisterName(op.base));
if (op.index != noreg)
logAppend("%s%s", op.base != noreg ? "+" : "", getRegisterName(op.index));
if (op.scale != 1)
logAppend("*%d", op.scale);
if (op.imm != 0)
{
if (op.imm >= 0 && op.imm <= 9)
logAppend("+%d", op.imm);
else if (op.imm > 0)
logAppend("+0%Xh", op.imm);
else
logAppend("-0%Xh", -op.imm);
}
text.append("]");
break;
case CategoryX64::imm:
if (op.imm >= 0 && op.imm <= 9)
logAppend("%d", op.imm);
else
logAppend("%Xh", op.imm);
break;
default:
LUAU_ASSERT(!"Unknown operand category");
}
}
const char* AssemblyBuilderX64::getSizeName(SizeX64 size) const
{
static const char* sizeNames[] = {"none", "byte", "word", "dword", "qword", "xmmword", "ymmword"};
LUAU_ASSERT(unsigned(size) < sizeof(sizeNames) / sizeof(sizeNames[0]));
return sizeNames[unsigned(size)];
}
const char* AssemblyBuilderX64::getRegisterName(RegisterX64 reg) const
{
static const char* names[][16] = {{"rip", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""},
{"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"},
{"ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"},
{"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"},
{"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"},
{"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"},
{"ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"}};
LUAU_ASSERT(reg.index < 16);
LUAU_ASSERT(reg.size <= SizeX64::ymmword);
return names[size_t(reg.size)][reg.index];
}
} // namespace X64
} // namespace CodeGen
} // namespace Luau