luau/tests/AssemblyBuilderX64.test.cpp
aaron 9c588be16d
Sync to upstream/release/610 (#1154)
# What's changed?

* Check interrupt handler inside the pattern match engine to eliminate
potential for programs to hang during string library function execution.
* Allow iteration over table properties to pass the old type solver. 

### Native Code Generation

* Use in-place memory operands for math library operations on x64.
* Replace opaque bools with separate enum classes in IrDump to improve
code maintainability.
* Translate operations on inferred vectors to IR.
* Enable support for debugging native-compiled functions in Roblox
Studio.

### New Type Solver

* Rework type inference for boolean and string literals to introduce
bounded free types (bounded below by the singleton type, and above by
the primitive type) and reworked primitive type constraint to decide
which is the appropriate type for the literal.
* Introduce `FunctionCheckConstraint` to handle bidirectional
typechecking for function calls, pushing the expected parameter types
from the function onto the arguments.
* Introduce `union` and `intersect` type families to compute deferred
simplified unions and intersections to be employed by the constraint
generation logic in the new solver.
* Implement support for expanding the domain of local types in
`Unifier2`.
* Rework type inference for iteration variables bound by for in loops to
use local types.
* Change constraint blocking logic to use a set to prevent accidental
re-blocking.
* Add logic to detect missing return statements in functions.

### Internal Contributors

Co-authored-by: Aaron Weiss <aaronweiss@roblox.com>
Co-authored-by: Alexander McCord <amccord@roblox.com>
Co-authored-by: Andy Friesen <afriesen@roblox.com>
Co-authored-by: Aviral Goel <agoel@roblox.com>
Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>

---------

Co-authored-by: Alexander McCord <amccord@roblox.com>
Co-authored-by: Andy Friesen <afriesen@roblox.com>
Co-authored-by: Vighnesh <vvijay@roblox.com>
Co-authored-by: Aviral Goel <agoel@roblox.com>
Co-authored-by: David Cope <dcope@roblox.com>
Co-authored-by: Lily Brown <lbrown@roblox.com>
Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-26 19:20:56 -08:00

782 lines
33 KiB
C++

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/StringUtils.h"
#include "doctest.h"
#include "ScopedFlags.h"
#include <string.h>
LUAU_FASTFLAG(LuauCache32BitAsmConsts)
using namespace Luau::CodeGen;
using namespace Luau::CodeGen::X64;
static std::string bytecodeAsArray(const std::vector<uint8_t>& bytecode)
{
std::string result = "{";
for (size_t i = 0; i < bytecode.size(); i++)
Luau::formatAppend(result, "%s0x%02x", i == 0 ? "" : ", ", bytecode[i]);
return result.append("}");
}
class AssemblyBuilderX64Fixture
{
public:
bool check(void (*f)(AssemblyBuilderX64& build), std::vector<uint8_t> code, std::vector<uint8_t> data = {})
{
AssemblyBuilderX64 build(/* logText= */ false);
f(build);
build.finalize();
if (build.code != code)
{
printf("Expected code: %s\nReceived code: %s\n", bytecodeAsArray(code).c_str(), bytecodeAsArray(build.code).c_str());
return false;
}
if (build.data != data)
{
printf("Expected data: %s\nReceived data: %s\n", bytecodeAsArray(data).c_str(), bytecodeAsArray(build.data).c_str());
return false;
}
return true;
}
};
TEST_SUITE_BEGIN("x64Assembly");
#define SINGLE_COMPARE(inst, ...) \
CHECK(check( \
[](AssemblyBuilderX64& build) { \
build.inst; \
}, \
{__VA_ARGS__}))
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseBinaryInstructionForms")
{
// reg, reg
SINGLE_COMPARE(add(rax, rcx), 0x48, 0x03, 0xc1);
SINGLE_COMPARE(add(rsp, r12), 0x49, 0x03, 0xe4);
SINGLE_COMPARE(add(r14, r10), 0x4d, 0x03, 0xf2);
// reg, imm
SINGLE_COMPARE(add(rax, 0), 0x48, 0x83, 0xc0, 0x00);
SINGLE_COMPARE(add(rax, 0x7f), 0x48, 0x83, 0xc0, 0x7f);
SINGLE_COMPARE(add(rax, 0x80), 0x48, 0x81, 0xc0, 0x80, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r10, 0x7fffffff), 0x49, 0x81, 0xc2, 0xff, 0xff, 0xff, 0x7f);
SINGLE_COMPARE(add(al, 3), 0x80, 0xc0, 0x03);
SINGLE_COMPARE(add(sil, 3), 0x48, 0x80, 0xc6, 0x03);
SINGLE_COMPARE(add(r11b, 3), 0x49, 0x80, 0xc3, 0x03);
// reg, [reg]
SINGLE_COMPARE(add(rax, qword[rax]), 0x48, 0x03, 0x00);
SINGLE_COMPARE(add(rax, qword[rbx]), 0x48, 0x03, 0x03);
SINGLE_COMPARE(add(rax, qword[rsp]), 0x48, 0x03, 0x04, 0x24);
SINGLE_COMPARE(add(rax, qword[rbp]), 0x48, 0x03, 0x45, 0x00);
SINGLE_COMPARE(add(rax, qword[r10]), 0x49, 0x03, 0x02);
SINGLE_COMPARE(add(rax, qword[r12]), 0x49, 0x03, 0x04, 0x24);
SINGLE_COMPARE(add(rax, qword[r13]), 0x49, 0x03, 0x45, 0x00);
SINGLE_COMPARE(add(r12, qword[rax]), 0x4c, 0x03, 0x20);
SINGLE_COMPARE(add(r12, qword[rbx]), 0x4c, 0x03, 0x23);
SINGLE_COMPARE(add(r12, qword[rsp]), 0x4c, 0x03, 0x24, 0x24);
SINGLE_COMPARE(add(r12, qword[rbp]), 0x4c, 0x03, 0x65, 0x00);
SINGLE_COMPARE(add(r12, qword[r10]), 0x4d, 0x03, 0x22);
SINGLE_COMPARE(add(r12, qword[r12]), 0x4d, 0x03, 0x24, 0x24);
SINGLE_COMPARE(add(r12, qword[r13]), 0x4d, 0x03, 0x65, 0x00);
// reg, [base+imm8]
SINGLE_COMPARE(add(rax, qword[rax + 0x1b]), 0x48, 0x03, 0x40, 0x1b);
SINGLE_COMPARE(add(rax, qword[rbx + 0x1b]), 0x48, 0x03, 0x43, 0x1b);
SINGLE_COMPARE(add(rax, qword[rsp + 0x1b]), 0x48, 0x03, 0x44, 0x24, 0x1b);
SINGLE_COMPARE(add(rax, qword[rbp + 0x1b]), 0x48, 0x03, 0x45, 0x1b);
SINGLE_COMPARE(add(rax, qword[r10 + 0x1b]), 0x49, 0x03, 0x42, 0x1b);
SINGLE_COMPARE(add(rax, qword[r12 + 0x1b]), 0x49, 0x03, 0x44, 0x24, 0x1b);
SINGLE_COMPARE(add(rax, qword[r13 + 0x1b]), 0x49, 0x03, 0x45, 0x1b);
SINGLE_COMPARE(add(r12, qword[rax + 0x1b]), 0x4c, 0x03, 0x60, 0x1b);
SINGLE_COMPARE(add(r12, qword[rbx + 0x1b]), 0x4c, 0x03, 0x63, 0x1b);
SINGLE_COMPARE(add(r12, qword[rsp + 0x1b]), 0x4c, 0x03, 0x64, 0x24, 0x1b);
SINGLE_COMPARE(add(r12, qword[rbp + 0x1b]), 0x4c, 0x03, 0x65, 0x1b);
SINGLE_COMPARE(add(r12, qword[r10 + 0x1b]), 0x4d, 0x03, 0x62, 0x1b);
SINGLE_COMPARE(add(r12, qword[r12 + 0x1b]), 0x4d, 0x03, 0x64, 0x24, 0x1b);
SINGLE_COMPARE(add(r12, qword[r13 + 0x1b]), 0x4d, 0x03, 0x65, 0x1b);
// reg, [base+imm32]
SINGLE_COMPARE(add(rax, qword[rax + 0xabab]), 0x48, 0x03, 0x80, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[rbx + 0xabab]), 0x48, 0x03, 0x83, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[rsp + 0xabab]), 0x48, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[rbp + 0xabab]), 0x48, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[r10 + 0xabab]), 0x49, 0x03, 0x82, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[r12 + 0xabab]), 0x49, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[r13 + 0xabab]), 0x49, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rax + 0xabab]), 0x4c, 0x03, 0xa0, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rbx + 0xabab]), 0x4c, 0x03, 0xa3, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rsp + 0xabab]), 0x4c, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rbp + 0xabab]), 0x4c, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[r10 + 0xabab]), 0x4d, 0x03, 0xa2, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[r12 + 0xabab]), 0x4d, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[r13 + 0xabab]), 0x4d, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
// reg, [index*scale]
SINGLE_COMPARE(add(rax, qword[rax * 2]), 0x48, 0x03, 0x04, 0x45, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[rbx * 2]), 0x48, 0x03, 0x04, 0x5d, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[rbp * 2]), 0x48, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[r10 * 2]), 0x4a, 0x03, 0x04, 0x55, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[r12 * 2]), 0x4a, 0x03, 0x04, 0x65, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[r13 * 2]), 0x4a, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rax * 2]), 0x4c, 0x03, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rbx * 2]), 0x4c, 0x03, 0x24, 0x5d, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rbp * 2]), 0x4c, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[r10 * 2]), 0x4e, 0x03, 0x24, 0x55, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[r12 * 2]), 0x4e, 0x03, 0x24, 0x65, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[r13 * 2]), 0x4e, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
// reg, [base+index*scale+imm]
SINGLE_COMPARE(add(rax, qword[rax + rax * 2]), 0x48, 0x03, 0x04, 0x40);
SINGLE_COMPARE(add(rax, qword[rax + rbx * 2 + 0x1b]), 0x48, 0x03, 0x44, 0x58, 0x1b);
SINGLE_COMPARE(add(rax, qword[rax + rbp * 2]), 0x48, 0x03, 0x04, 0x68);
SINGLE_COMPARE(add(rax, qword[rax + rbp + 0xabab]), 0x48, 0x03, 0x84, 0x28, 0xAB, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[rax + r12 + 0x1b]), 0x4a, 0x03, 0x44, 0x20, 0x1b);
SINGLE_COMPARE(add(rax, qword[rax + r12 * 4 + 0xabab]), 0x4a, 0x03, 0x84, 0xa0, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[rax + r13 * 2 + 0x1b]), 0x4a, 0x03, 0x44, 0x68, 0x1b);
SINGLE_COMPARE(add(rax, qword[rax + r13 + 0xabab]), 0x4a, 0x03, 0x84, 0x28, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rax + r12 * 2]), 0x4e, 0x03, 0x24, 0x60);
SINGLE_COMPARE(add(r12, qword[rax + r13 + 0xabab]), 0x4e, 0x03, 0xA4, 0x28, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(r12, qword[rax + rbp * 2 + 0x1b]), 0x4c, 0x03, 0x64, 0x68, 0x1b);
// reg, [imm32]
SINGLE_COMPARE(add(rax, qword[0]), 0x48, 0x03, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(add(rax, qword[0xabab]), 0x48, 0x03, 0x04, 0x25, 0xab, 0xab, 0x00, 0x00);
// [addr], reg
SINGLE_COMPARE(add(qword[rax], rax), 0x48, 0x01, 0x00);
SINGLE_COMPARE(add(qword[rax + rax * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0x80, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(qword[rbx + rax * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x43, 0x1b);
SINGLE_COMPARE(add(qword[rbx + rbp * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x6b, 0x1b);
SINGLE_COMPARE(add(qword[rbp + rbp * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0xad, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(qword[rbp + r12 + 0x1b], rax), 0x4a, 0x01, 0x44, 0x25, 0x1b);
SINGLE_COMPARE(add(qword[r12], rax), 0x49, 0x01, 0x04, 0x24);
SINGLE_COMPARE(add(qword[r13 + rbx + 0xabab], rax), 0x49, 0x01, 0x84, 0x1d, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(qword[rax + r13 * 2 + 0x1b], rsi), 0x4a, 0x01, 0x74, 0x68, 0x1b);
SINGLE_COMPARE(add(qword[rbp + rbx * 2], rsi), 0x48, 0x01, 0x74, 0x5d, 0x00);
SINGLE_COMPARE(add(qword[rsp + r10 * 2 + 0x1b], r10), 0x4e, 0x01, 0x54, 0x54, 0x1b);
// [addr], imm
SINGLE_COMPARE(add(byte[rax], 2), 0x80, 0x00, 0x02);
SINGLE_COMPARE(add(dword[rax], 2), 0x83, 0x00, 0x02);
SINGLE_COMPARE(add(dword[rax], 0xabcd), 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
SINGLE_COMPARE(add(qword[rax], 2), 0x48, 0x83, 0x00, 0x02);
SINGLE_COMPARE(add(qword[rax], 0xabcd), 0x48, 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseUnaryInstructionForms")
{
SINGLE_COMPARE(div(rcx), 0x48, 0xf7, 0xf1);
SINGLE_COMPARE(idiv(qword[rax]), 0x48, 0xf7, 0x38);
SINGLE_COMPARE(mul(qword[rax + rbx]), 0x48, 0xf7, 0x24, 0x18);
SINGLE_COMPARE(imul(r9), 0x49, 0xf7, 0xe9);
SINGLE_COMPARE(neg(r9), 0x49, 0xf7, 0xd9);
SINGLE_COMPARE(not_(r12), 0x49, 0xf7, 0xd4);
SINGLE_COMPARE(inc(r12), 0x49, 0xff, 0xc4);
SINGLE_COMPARE(dec(ecx), 0xff, 0xc9);
SINGLE_COMPARE(dec(byte[rdx]), 0xfe, 0x0a);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov")
{
SINGLE_COMPARE(mov(rcx, 1), 0x48, 0xb9, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(mov64(rcx, 0x1234567812345678ll), 0x48, 0xb9, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
SINGLE_COMPARE(mov(ecx, 2), 0xb9, 0x02, 0x00, 0x00, 0x00);
SINGLE_COMPARE(mov(cl, 2), 0xb1, 0x02);
SINGLE_COMPARE(mov(sil, 2), 0x48, 0xb6, 0x02);
SINGLE_COMPARE(mov(r9b, 2), 0x49, 0xb1, 0x02);
SINGLE_COMPARE(mov(rcx, qword[rdi]), 0x48, 0x8b, 0x0f);
SINGLE_COMPARE(mov(dword[rax], 0xabcd), 0xc7, 0x00, 0xcd, 0xab, 0x00, 0x00);
SINGLE_COMPARE(mov(r13, 1), 0x49, 0xbd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(mov64(r13, 0x1234567812345678ll), 0x49, 0xbd, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
SINGLE_COMPARE(mov(r13d, 2), 0x41, 0xbd, 0x02, 0x00, 0x00, 0x00);
SINGLE_COMPARE(mov(r13, qword[r12]), 0x4d, 0x8b, 0x2c, 0x24);
SINGLE_COMPARE(mov(dword[r13], 0xabcd), 0x41, 0xc7, 0x45, 0x00, 0xcd, 0xab, 0x00, 0x00);
SINGLE_COMPARE(mov(qword[rdx], r9), 0x4c, 0x89, 0x0a);
SINGLE_COMPARE(mov(byte[rsi], 0x3), 0xc6, 0x06, 0x03);
SINGLE_COMPARE(mov(byte[rsi], al), 0x88, 0x06);
SINGLE_COMPARE(mov(byte[rsi], dil), 0x48, 0x88, 0x3e);
SINGLE_COMPARE(mov(byte[rsi], r10b), 0x4c, 0x88, 0x16);
SINGLE_COMPARE(mov(wordReg(ebx), 0x3a3d), 0x66, 0xbb, 0x3d, 0x3a);
SINGLE_COMPARE(mov(word[rsi], 0x3a3d), 0x66, 0xc7, 0x06, 0x3d, 0x3a);
SINGLE_COMPARE(mov(word[rsi], wordReg(eax)), 0x66, 0x89, 0x06);
SINGLE_COMPARE(mov(word[rsi], wordReg(edi)), 0x66, 0x89, 0x3e);
SINGLE_COMPARE(mov(word[rsi], wordReg(r10)), 0x66, 0x44, 0x89, 0x16);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMovExtended")
{
SINGLE_COMPARE(movsx(eax, byte[rcx]), 0x0f, 0xbe, 0x01);
SINGLE_COMPARE(movsx(r12, byte[r10]), 0x4d, 0x0f, 0xbe, 0x22);
SINGLE_COMPARE(movsx(ebx, word[r11]), 0x41, 0x0f, 0xbf, 0x1b);
SINGLE_COMPARE(movsx(rdx, word[rcx]), 0x48, 0x0f, 0xbf, 0x11);
SINGLE_COMPARE(movzx(eax, byte[rcx]), 0x0f, 0xb6, 0x01);
SINGLE_COMPARE(movzx(r12, byte[r10]), 0x4d, 0x0f, 0xb6, 0x22);
SINGLE_COMPARE(movzx(ebx, word[r11]), 0x41, 0x0f, 0xb7, 0x1b);
SINGLE_COMPARE(movzx(rdx, word[rcx]), 0x48, 0x0f, 0xb7, 0x11);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfTest")
{
SINGLE_COMPARE(test(al, 8), 0xf6, 0xc0, 0x08);
SINGLE_COMPARE(test(eax, 8), 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
SINGLE_COMPARE(test(rax, 8), 0x48, 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
SINGLE_COMPARE(test(rcx, 0xabab), 0x48, 0xf7, 0xc1, 0xab, 0xab, 0x00, 0x00);
SINGLE_COMPARE(test(rcx, rax), 0x48, 0x85, 0xc8);
SINGLE_COMPARE(test(rax, qword[rcx]), 0x48, 0x85, 0x01);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfShift")
{
SINGLE_COMPARE(shl(al, 1), 0xd0, 0xe0);
SINGLE_COMPARE(shl(al, cl), 0xd2, 0xe0);
SINGLE_COMPARE(shl(sil, cl), 0x48, 0xd2, 0xe6);
SINGLE_COMPARE(shl(r10b, cl), 0x49, 0xd2, 0xe2);
SINGLE_COMPARE(shr(al, 4), 0xc0, 0xe8, 0x04);
SINGLE_COMPARE(shr(eax, 1), 0xd1, 0xe8);
SINGLE_COMPARE(sal(eax, cl), 0xd3, 0xe0);
SINGLE_COMPARE(sal(eax, 4), 0xc1, 0xe0, 0x04);
SINGLE_COMPARE(sar(rax, 4), 0x48, 0xc1, 0xf8, 0x04);
SINGLE_COMPARE(sar(r11, 1), 0x49, 0xd1, 0xfb);
SINGLE_COMPARE(rol(eax, 1), 0xd1, 0xc0);
SINGLE_COMPARE(rol(eax, cl), 0xd3, 0xc0);
SINGLE_COMPARE(ror(eax, 1), 0xd1, 0xc8);
SINGLE_COMPARE(ror(eax, cl), 0xd3, 0xc8);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfLea")
{
SINGLE_COMPARE(lea(rax, addr[rdx + rcx]), 0x48, 0x8d, 0x04, 0x0a);
SINGLE_COMPARE(lea(rax, addr[rdx + rax * 4]), 0x48, 0x8d, 0x04, 0x82);
SINGLE_COMPARE(lea(rax, addr[r13 + r12 * 4 + 4]), 0x4b, 0x8d, 0x44, 0xa5, 0x04);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfSetcc")
{
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, bl), 0x0f, 0x95, 0xc3);
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, dil), 0x48, 0x0f, 0x95, 0xc7);
SINGLE_COMPARE(setcc(ConditionX64::BelowEqual, byte[rcx]), 0x0f, 0x96, 0x01);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfCmov")
{
SINGLE_COMPARE(cmov(ConditionX64::LessEqual, ebx, eax), 0x0f, 0x4e, 0xd8);
SINGLE_COMPARE(cmov(ConditionX64::NotZero, rbx, qword[rax]), 0x48, 0x0f, 0x45, 0x18);
SINGLE_COMPARE(cmov(ConditionX64::Zero, rbx, qword[rax + rcx]), 0x48, 0x0f, 0x44, 0x1c, 0x08);
SINGLE_COMPARE(cmov(ConditionX64::BelowEqual, r14d, r15d), 0x45, 0x0f, 0x46, 0xf7);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfAbsoluteJumps")
{
SINGLE_COMPARE(jmp(rax), 0xff, 0xe0);
SINGLE_COMPARE(jmp(r14), 0x41, 0xff, 0xe6);
SINGLE_COMPARE(jmp(qword[r14 + rdx * 4]), 0x41, 0xff, 0x24, 0x96);
SINGLE_COMPARE(call(rax), 0xff, 0xd0);
SINGLE_COMPARE(call(r14), 0x41, 0xff, 0xd6);
SINGLE_COMPARE(call(qword[r14 + rdx * 4]), 0x41, 0xff, 0x14, 0x96);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfImul")
{
SINGLE_COMPARE(imul(ecx, esi), 0x0f, 0xaf, 0xce);
SINGLE_COMPARE(imul(r12, rax), 0x4c, 0x0f, 0xaf, 0xe0);
SINGLE_COMPARE(imul(r12, qword[rdx + rdi]), 0x4c, 0x0f, 0xaf, 0x24, 0x3a);
SINGLE_COMPARE(imul(ecx, edx, 8), 0x6b, 0xca, 0x08);
SINGLE_COMPARE(imul(ecx, r9d, 0xabcd), 0x41, 0x69, 0xc9, 0xcd, 0xab, 0x00, 0x00);
SINGLE_COMPARE(imul(r8d, eax, -9), 0x44, 0x6b, 0xc0, 0xf7);
SINGLE_COMPARE(imul(rcx, rdx, 17), 0x48, 0x6b, 0xca, 0x11);
SINGLE_COMPARE(imul(rcx, r12, 0xabcd), 0x49, 0x69, 0xcc, 0xcd, 0xab, 0x00, 0x00);
SINGLE_COMPARE(imul(r12, rax, -13), 0x4c, 0x6b, 0xe0, 0xf3);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "NopForms")
{
SINGLE_COMPARE(nop(), 0x90);
SINGLE_COMPARE(nop(2), 0x66, 0x90);
SINGLE_COMPARE(nop(3), 0x0f, 0x1f, 0x00);
SINGLE_COMPARE(nop(4), 0x0f, 0x1f, 0x40, 0x00);
SINGLE_COMPARE(nop(5), 0x0f, 0x1f, 0x44, 0x00, 0x00);
SINGLE_COMPARE(nop(6), 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00);
SINGLE_COMPARE(nop(7), 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(nop(8), 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(nop(9), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
SINGLE_COMPARE(nop(15), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00); // 9+6
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentForms")
{
CHECK(check(
[](AssemblyBuilderX64& build) {
build.ret();
build.align(8, AlignmentDataX64::Nop);
},
{0xc3, 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}));
CHECK(check(
[](AssemblyBuilderX64& build) {
build.ret();
build.align(32, AlignmentDataX64::Nop);
},
{0xc3, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84,
0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x1f, 0x40, 0x00}));
CHECK(check(
[](AssemblyBuilderX64& build) {
build.ret();
build.align(8, AlignmentDataX64::Int3);
},
{0xc3, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc}));
CHECK(check(
[](AssemblyBuilderX64& build) {
build.ret();
build.align(8, AlignmentDataX64::Ud2);
},
{0xc3, 0x0f, 0x0b, 0x0f, 0x0b, 0x0f, 0x0b, 0xcc}));
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentOverflow")
{
// Test that alignment correctly resizes the code buffer
{
AssemblyBuilderX64 build(/* logText */ false);
build.ret();
build.align(8192, AlignmentDataX64::Nop);
build.finalize();
}
{
AssemblyBuilderX64 build(/* logText */ false);
build.ret();
build.align(8192, AlignmentDataX64::Int3);
build.finalize();
}
{
AssemblyBuilderX64 build(/* logText */ false);
for (int i = 0; i < 8192; i++)
build.int3();
build.finalize();
}
{
AssemblyBuilderX64 build(/* logText */ false);
build.ret();
build.align(8192, AlignmentDataX64::Ud2);
build.finalize();
}
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "ControlFlow")
{
// Jump back
CHECK(check(
[](AssemblyBuilderX64& build) {
Label start = build.setLabel();
build.add(rsi, 1);
build.cmp(rsi, rdi);
build.jcc(ConditionX64::Equal, start);
},
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf3, 0xff, 0xff, 0xff}));
// Jump back, but the label is set before use
CHECK(check(
[](AssemblyBuilderX64& build) {
Label start;
build.add(rsi, 1);
build.setLabel(start);
build.cmp(rsi, rdi);
build.jcc(ConditionX64::Equal, start);
},
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf7, 0xff, 0xff, 0xff}));
// Jump forward
CHECK(check(
[](AssemblyBuilderX64& build) {
Label skip;
build.cmp(rsi, rdi);
build.jcc(ConditionX64::Greater, skip);
build.or_(rdi, 0x3e);
build.setLabel(skip);
},
{0x48, 0x3b, 0xf7, 0x0f, 0x8f, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xcf, 0x3e}));
// Regular jump
CHECK(check(
[](AssemblyBuilderX64& build) {
Label skip;
build.jmp(skip);
build.and_(rdi, 0x3e);
build.setLabel(skip);
},
{0xe9, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xe7, 0x3e}));
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelCall")
{
CHECK(check(
[](AssemblyBuilderX64& build) {
Label fnB;
build.and_(rcx, 0x3e);
build.call(fnB);
build.ret();
build.setLabel(fnB);
build.lea(rax, addr[rcx + 0x1f]);
build.ret();
},
{0x48, 0x83, 0xe1, 0x3e, 0xe8, 0x01, 0x00, 0x00, 0x00, 0xc3, 0x48, 0x8d, 0x41, 0x1f, 0xc3}));
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXBinaryInstructionForms")
{
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x58, 0xc6);
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x29, 0x58, 0x01);
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymm14), 0xc4, 0x41, 0x2d, 0x58, 0xc6);
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymmword[r9]), 0xc4, 0x41, 0x2d, 0x58, 0x01);
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x58, 0xc6);
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x28, 0x58, 0x01);
SINGLE_COMPARE(vaddsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x58, 0xc6);
SINGLE_COMPARE(vaddsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x58, 0x01);
SINGLE_COMPARE(vaddss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x58, 0xc6);
SINGLE_COMPARE(vaddss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x58, 0x01);
SINGLE_COMPARE(vaddps(xmm1, xmm2, xmm3), 0xc4, 0xe1, 0x68, 0x58, 0xcb);
SINGLE_COMPARE(vaddps(xmm9, xmm12, xmmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x18, 0x58, 0x4c, 0x71, 0x1c);
SINGLE_COMPARE(vaddps(ymm1, ymm2, ymm3), 0xc4, 0xe1, 0x6c, 0x58, 0xcb);
SINGLE_COMPARE(vaddps(ymm9, ymm12, ymmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x1c, 0x58, 0x4c, 0x71, 0x1c);
// Coverage for other instructions that follow the same pattern
SINGLE_COMPARE(vsubsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5c, 0xc6);
SINGLE_COMPARE(vmulsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x59, 0xc6);
SINGLE_COMPARE(vdivsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5e, 0xc6);
SINGLE_COMPARE(vsubps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5c, 0xc6);
SINGLE_COMPARE(vmulps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x59, 0xc6);
SINGLE_COMPARE(vdivps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5e, 0xc6);
SINGLE_COMPARE(vorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x56, 0xc6);
SINGLE_COMPARE(vxorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x57, 0xc6);
SINGLE_COMPARE(vorps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x56, 0xc6);
SINGLE_COMPARE(vandpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x54, 0xc6);
SINGLE_COMPARE(vandnpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x55, 0xc6);
SINGLE_COMPARE(vmaxsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5f, 0xc6);
SINGLE_COMPARE(vminsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5d, 0xc6);
SINGLE_COMPARE(vcmpltsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x01);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXUnaryMergeInstructionForms")
{
SINGLE_COMPARE(vsqrtpd(xmm8, xmm10), 0xc4, 0x41, 0x79, 0x51, 0xc2);
SINGLE_COMPARE(vsqrtpd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x51, 0x01);
SINGLE_COMPARE(vsqrtpd(ymm8, ymm10), 0xc4, 0x41, 0x7d, 0x51, 0xc2);
SINGLE_COMPARE(vsqrtpd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x51, 0x01);
SINGLE_COMPARE(vsqrtps(xmm8, xmm10), 0xc4, 0x41, 0x78, 0x51, 0xc2);
SINGLE_COMPARE(vsqrtps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x51, 0x01);
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x51, 0xc6);
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x51, 0x01);
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x51, 0xc6);
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x51, 0x01);
// Coverage for other instructions that follow the same pattern
SINGLE_COMPARE(vucomisd(xmm1, xmm4), 0xc4, 0xe1, 0x79, 0x2e, 0xcc);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXMoveInstructionForms")
{
SINGLE_COMPARE(vmovsd(qword[r9], xmm10), 0xc4, 0x41, 0x7b, 0x11, 0x11);
SINGLE_COMPARE(vmovsd(xmm8, qword[r9]), 0xc4, 0x41, 0x7b, 0x10, 0x01);
SINGLE_COMPARE(vmovsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x10, 0xc6);
SINGLE_COMPARE(vmovss(dword[r9], xmm10), 0xc4, 0x41, 0x7a, 0x11, 0x11);
SINGLE_COMPARE(vmovss(xmm8, dword[r9]), 0xc4, 0x41, 0x7a, 0x10, 0x01);
SINGLE_COMPARE(vmovss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x10, 0xc6);
SINGLE_COMPARE(vmovapd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x28, 0x01);
SINGLE_COMPARE(vmovapd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x29, 0x11);
SINGLE_COMPARE(vmovapd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x28, 0x01);
SINGLE_COMPARE(vmovaps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x28, 0x01);
SINGLE_COMPARE(vmovaps(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x29, 0x11);
SINGLE_COMPARE(vmovaps(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x28, 0x01);
SINGLE_COMPARE(vmovupd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x10, 0x01);
SINGLE_COMPARE(vmovupd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x11, 0x11);
SINGLE_COMPARE(vmovupd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x10, 0x01);
SINGLE_COMPARE(vmovups(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x10, 0x01);
SINGLE_COMPARE(vmovups(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x11, 0x11);
SINGLE_COMPARE(vmovups(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x10, 0x01);
SINGLE_COMPARE(vmovq(xmm1, rbx), 0xc4, 0xe1, 0xf9, 0x6e, 0xcb);
SINGLE_COMPARE(vmovq(rbx, xmm1), 0xc4, 0xe1, 0xf9, 0x7e, 0xcb);
SINGLE_COMPARE(vmovq(xmm1, qword[r9]), 0xc4, 0xc1, 0xf9, 0x6e, 0x09);
SINGLE_COMPARE(vmovq(qword[r9], xmm1), 0xc4, 0xc1, 0xf9, 0x7e, 0x09);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXConversionInstructionForms")
{
SINGLE_COMPARE(vcvttsd2si(ecx, xmm0), 0xc4, 0xe1, 0x7b, 0x2c, 0xc8);
SINGLE_COMPARE(vcvttsd2si(r9d, xmmword[rcx + rdx]), 0xc4, 0x61, 0x7b, 0x2c, 0x0c, 0x11);
SINGLE_COMPARE(vcvttsd2si(rdx, xmm0), 0xc4, 0xe1, 0xfb, 0x2c, 0xd0);
SINGLE_COMPARE(vcvttsd2si(r13, xmmword[rcx + rdx]), 0xc4, 0x61, 0xfb, 0x2c, 0x2c, 0x11);
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, ecx), 0xc4, 0xe1, 0x2b, 0x2a, 0xe9);
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, dword[rcx + rdx]), 0xc4, 0xe1, 0x23, 0x2a, 0x34, 0x11);
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, r13), 0xc4, 0xc1, 0xab, 0x2a, 0xed);
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x2a, 0x34, 0x11);
SINGLE_COMPARE(vcvtsd2ss(xmm5, xmm10, xmm11), 0xc4, 0xc1, 0x2b, 0x5a, 0xeb);
SINGLE_COMPARE(vcvtsd2ss(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x5a, 0x34, 0x11);
SINGLE_COMPARE(vcvtss2sd(xmm3, xmm8, xmm12), 0xc4, 0xc1, 0x3a, 0x5a, 0xdc);
SINGLE_COMPARE(vcvtss2sd(xmm4, xmm9, dword[rcx + rsi]), 0xc4, 0xe1, 0x32, 0x5a, 0x24, 0x31);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXTernaryInstructionForms")
{
SINGLE_COMPARE(vroundsd(xmm7, xmm12, xmm3, RoundingModeX64::RoundToNegativeInfinity), 0xc4, 0xe3, 0x19, 0x0b, 0xfb, 0x09);
SINGLE_COMPARE(
vroundsd(xmm8, xmm13, xmmword[r13 + rdx], RoundingModeX64::RoundToPositiveInfinity), 0xc4, 0x43, 0x11, 0x0b, 0x44, 0x15, 0x00, 0x0a);
SINGLE_COMPARE(vroundsd(xmm9, xmm14, xmmword[rcx + r10], RoundingModeX64::RoundToZero), 0xc4, 0x23, 0x09, 0x0b, 0x0c, 0x11, 0x0b);
SINGLE_COMPARE(vblendvpd(xmm7, xmm12, xmmword[rcx + r10], xmm5), 0xc4, 0xa3, 0x19, 0x4b, 0x3c, 0x11, 0x50);
SINGLE_COMPARE(vpshufps(xmm7, xmm12, xmmword[rcx + r10], 0b11010100), 0xc4, 0xa1, 0x18, 0xc6, 0x3c, 0x11, 0xd4);
SINGLE_COMPARE(vpinsrd(xmm7, xmm12, xmmword[rcx + r10], 2), 0xc4, 0xa3, 0x19, 0x22, 0x3c, 0x11, 0x02);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "MiscInstructions")
{
SINGLE_COMPARE(int3(), 0xcc);
SINGLE_COMPARE(ud2(), 0x0f, 0x0b);
SINGLE_COMPARE(bsr(eax, edx), 0x0f, 0xbd, 0xc2);
SINGLE_COMPARE(bsf(eax, edx), 0x0f, 0xbc, 0xc2);
SINGLE_COMPARE(bswap(eax), 0x0f, 0xc8);
SINGLE_COMPARE(bswap(r12d), 0x41, 0x0f, 0xcc);
SINGLE_COMPARE(bswap(rax), 0x48, 0x0f, 0xc8);
SINGLE_COMPARE(bswap(r12), 0x49, 0x0f, 0xcc);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelLea")
{
CHECK(check(
[](AssemblyBuilderX64& build) {
Label fn;
build.lea(rax, fn);
build.ret();
build.setLabel(fn);
build.ret();
},
{0x48, 0x8d, 0x05, 0x01, 0x00, 0x00, 0x00, 0xc3, 0xc3}));
}
TEST_CASE("LogTest")
{
AssemblyBuilderX64 build(/* logText= */ true);
build.push(r12);
build.align(8);
build.align(8, AlignmentDataX64::Int3);
build.align(8, AlignmentDataX64::Ud2);
build.add(rax, rdi);
build.add(rcx, 8);
build.sub(dword[rax], 0x1fdc);
build.and_(dword[rcx], 0x37);
build.mov(rdi, qword[rax + rsi * 2]);
build.vaddss(xmm0, xmm0, dword[rax + r14 * 2 + 0x1c]);
Label start = build.setLabel();
build.cmp(rsi, rdi);
build.jcc(ConditionX64::Equal, start);
build.lea(rcx, start);
build.lea(rcx, addr[rdx]);
build.jmp(qword[rdx]);
build.vaddps(ymm9, ymm12, ymmword[rbp + 0xc]);
build.vaddpd(ymm2, ymm7, build.f64(2.5));
build.neg(qword[rbp + r12 * 2]);
build.mov64(r10, 0x1234567812345678ll);
build.vmovapd(xmmword[rax], xmm11);
build.movzx(eax, byte[rcx]);
build.movsx(rsi, word[r12]);
build.imul(rcx, rdx);
build.imul(rcx, rdx, 8);
build.vroundsd(xmm1, xmm2, xmm3, RoundingModeX64::RoundToNearestEven);
build.add(rdx, qword[rcx - 12]);
build.pop(r12);
build.cmov(ConditionX64::AboveEqual, rax, rbx);
build.ret();
build.int3();
build.nop();
build.nop(2);
build.nop(3);
build.nop(4);
build.nop(5);
build.nop(6);
build.nop(7);
build.nop(8);
build.nop(9);
build.finalize();
std::string expected = R"(
push r12
; align 8
nop word ptr[rax+rax] ; 6-byte nop
; align 8 using int3
; align 8 using ud2
add rax,rdi
add rcx,8
sub dword ptr [rax],1FDCh
and dword ptr [rcx],37h
mov rdi,qword ptr [rax+rsi*2]
vaddss xmm0,xmm0,dword ptr [rax+r14*2+01Ch]
.L1:
cmp rsi,rdi
je .L1
lea rcx,.L1
lea rcx,[rdx]
jmp qword ptr [rdx]
vaddps ymm9,ymm12,ymmword ptr [rbp+0Ch]
vaddpd ymm2,ymm7,qword ptr [.start-8]
neg qword ptr [rbp+r12*2]
mov r10,1234567812345678h
vmovapd xmmword ptr [rax],xmm11
movzx eax,byte ptr [rcx]
movsx rsi,word ptr [r12]
imul rcx,rdx
imul rcx,rdx,8
vroundsd xmm1,xmm2,xmm3,8
add rdx,qword ptr [rcx-0Ch]
pop r12
cmovae rax,rbx
ret
int3
nop
xchg ax, ax ; 2-byte nop
nop dword ptr[rax] ; 3-byte nop
nop dword ptr[rax] ; 4-byte nop
nop dword ptr[rax+rax] ; 5-byte nop
nop word ptr[rax+rax] ; 6-byte nop
nop dword ptr[rax] ; 7-byte nop
nop dword ptr[rax+rax] ; 8-byte nop
nop word ptr[rax+rax] ; 9-byte nop
)";
CHECK("\n" + build.text == expected);
}
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "Constants")
{
// clang-format off
CHECK(check(
[](AssemblyBuilderX64& build) {
build.xor_(rax, rax);
build.add(rax, build.i64(0x1234567887654321));
build.vmovss(xmm2, build.f32(1.0f));
build.vmovsd(xmm3, build.f64(1.0));
build.vmovaps(xmm4, build.f32x4(1.0f, 2.0f, 4.0f, 8.0f));
char arr[16] = "hello world!123";
build.vmovupd(xmm5, build.bytes(arr, 16, 8));
build.vmovapd(xmm5, build.f64x2(5.0, 6.0));
build.ret();
},
{
0x48, 0x33, 0xc0,
0x48, 0x03, 0x05, 0xee, 0xff, 0xff, 0xff,
0xc4, 0xe1, 0x7a, 0x10, 0x15, 0xe1, 0xff, 0xff, 0xff,
0xc4, 0xe1, 0x7b, 0x10, 0x1d, 0xcc, 0xff, 0xff, 0xff,
0xc4, 0xe1, 0x78, 0x28, 0x25, 0xab, 0xff, 0xff, 0xff,
0xc4, 0xe1, 0x79, 0x10, 0x2d, 0x92, 0xff, 0xff, 0xff,
0xc4, 0xe1, 0x79, 0x28, 0x2d, 0x79, 0xff, 0xff, 0xff,
0xc3
},
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x40,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x40,
'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', '1', '2', '3', 0x0,
0x00, 0x00, 0x80, 0x3f,
0x00, 0x00, 0x00, 0x40,
0x00, 0x00, 0x80, 0x40,
0x00, 0x00, 0x00, 0x41,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // padding to align f32x4
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f,
0x00, 0x00, 0x00, 0x00, // padding to align f64
0x00, 0x00, 0x80, 0x3f,
0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
}));
// clang-format on
}
TEST_CASE("ConstantStorage")
{
AssemblyBuilderX64 build(/* logText= */ false);
for (int i = 0; i <= 3000; i++)
build.vaddss(xmm0, xmm0, build.i32(i));
build.finalize();
CHECK(build.data.size() == 12004);
for (int i = 0; i <= 3000; i++)
{
CHECK(build.data[i * 4 + 0] == ((3000 - i) & 0xff));
CHECK(build.data[i * 4 + 1] == ((3000 - i) >> 8));
CHECK(build.data[i * 4 + 2] == 0x00);
CHECK(build.data[i * 4 + 3] == 0x00);
}
}
TEST_CASE("ConstantStorageDedup")
{
ScopedFastFlag luauCache32BitAsmConsts{FFlag::LuauCache32BitAsmConsts, true};
AssemblyBuilderX64 build(/* logText= */ false);
for (int i = 0; i <= 3000; i++)
build.vaddss(xmm0, xmm0, build.f32(1.0f));
build.finalize();
CHECK(build.data.size() == 4);
CHECK(build.data[0] == 0x00);
CHECK(build.data[1] == 0x00);
CHECK(build.data[2] == 0x80);
CHECK(build.data[3] == 0x3f);
}
TEST_CASE("ConstantCaching")
{
AssemblyBuilderX64 build(/* logText= */ false);
OperandX64 two = build.f64(2);
// Force data relocation
for (int i = 0; i < 4096; i++)
build.f64(i);
CHECK(build.f64(2).imm == two.imm);
build.finalize();
}
TEST_SUITE_END();