mirror of
https://github.com/luau-lang/luau.git
synced 2025-01-22 10:48:05 +00:00
24cacc94ed
Some checks are pending
benchmark / callgrind (map[branch:main name:luau-lang/benchmark-data], ubuntu-22.04) (push) Waiting to run
build / macos (push) Waiting to run
build / macos-arm (push) Waiting to run
build / ubuntu (push) Waiting to run
build / windows (Win32) (push) Waiting to run
build / windows (x64) (push) Waiting to run
build / coverage (push) Waiting to run
build / web (push) Waiting to run
release / macos (push) Waiting to run
release / ubuntu (push) Waiting to run
release / windows (push) Waiting to run
release / web (push) Waiting to run
To implement math.lerp without branches, we add SELECT_NUM which selects one of the two inputs based on the comparison condition. For simplicity, we only support C == D for now; this can be extended to a more generic version with a IrCondition operand E, but that requires more work on the SSE side (to flip the comparison for some conditions like Greater, and expose more generic vcmpsd). Note: On AArch64 this will effectively result in a change in floating point behavior between native code and non-native code: clang synthesizes fmadd (because floating point contraction is allowed by default, and the arch always has the instruction), whereas this change will use fmul+fadd. I am not sure if this is good or bad, and if this is a problem in C or not. Specifically, clang's behavior results in different results between X64 and AArch64 when *not* using codegen, and with this change the behavior when using codegen is... the same? :) Fixing this will require either using LERP_NUM instead and hand-coding lowering, or exposing some sort of "quasi" MADD_NUM (which would lower to fma on AArch64 and mul+add on X64). A small benefit to the current approach is `lerp(1, 5, t)` constant-folds the subtraction. With LERP_NUM this optimization will need to be implemented manually as a partial constant-folding for LERP_NUM. A similar problem exists today for vector.cross & vector.dot. So maybe this is not something we need to fix, unsure.
809 lines
33 KiB
C++
809 lines
33 KiB
C++
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
#include "Luau/AssemblyBuilderX64.h"
|
|
#include "Luau/StringUtils.h"
|
|
|
|
#include "doctest.h"
|
|
#include "ScopedFlags.h"
|
|
|
|
#include <string.h>
|
|
|
|
using namespace Luau::CodeGen;
|
|
using namespace Luau::CodeGen::X64;
|
|
|
|
LUAU_FASTFLAG(LuauVectorLibNativeDot);
|
|
|
|
static std::string bytecodeAsArray(const std::vector<uint8_t>& bytecode)
|
|
{
|
|
std::string result = "{";
|
|
|
|
for (size_t i = 0; i < bytecode.size(); i++)
|
|
Luau::formatAppend(result, "%s0x%02x", i == 0 ? "" : ", ", bytecode[i]);
|
|
|
|
return result.append("}");
|
|
}
|
|
|
|
class AssemblyBuilderX64Fixture
|
|
{
|
|
public:
|
|
bool check(void (*f)(AssemblyBuilderX64& build), std::vector<uint8_t> code, std::vector<uint8_t> data = {})
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
f(build);
|
|
|
|
build.finalize();
|
|
|
|
if (build.code != code)
|
|
{
|
|
printf("Expected code: %s\nReceived code: %s\n", bytecodeAsArray(code).c_str(), bytecodeAsArray(build.code).c_str());
|
|
return false;
|
|
}
|
|
|
|
if (build.data != data)
|
|
{
|
|
printf("Expected data: %s\nReceived data: %s\n", bytecodeAsArray(data).c_str(), bytecodeAsArray(build.data).c_str());
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
};
|
|
|
|
TEST_SUITE_BEGIN("x64Assembly");
|
|
|
|
#define SINGLE_COMPARE(inst, ...) \
|
|
CHECK(check( \
|
|
[](AssemblyBuilderX64& build) \
|
|
{ \
|
|
build.inst; \
|
|
}, \
|
|
{__VA_ARGS__} \
|
|
))
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseBinaryInstructionForms")
|
|
{
|
|
// reg, reg
|
|
SINGLE_COMPARE(add(rax, rcx), 0x48, 0x03, 0xc1);
|
|
SINGLE_COMPARE(add(rsp, r12), 0x49, 0x03, 0xe4);
|
|
SINGLE_COMPARE(add(r14, r10), 0x4d, 0x03, 0xf2);
|
|
|
|
// reg, imm
|
|
SINGLE_COMPARE(add(rax, 0), 0x48, 0x83, 0xc0, 0x00);
|
|
SINGLE_COMPARE(add(rax, 0x7f), 0x48, 0x83, 0xc0, 0x7f);
|
|
SINGLE_COMPARE(add(rax, 0x80), 0x48, 0x81, 0xc0, 0x80, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r10, 0x7fffffff), 0x49, 0x81, 0xc2, 0xff, 0xff, 0xff, 0x7f);
|
|
SINGLE_COMPARE(add(al, 3), 0x80, 0xc0, 0x03);
|
|
SINGLE_COMPARE(add(sil, 3), 0x48, 0x80, 0xc6, 0x03);
|
|
SINGLE_COMPARE(add(r11b, 3), 0x49, 0x80, 0xc3, 0x03);
|
|
|
|
// reg, [reg]
|
|
SINGLE_COMPARE(add(rax, qword[rax]), 0x48, 0x03, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbx]), 0x48, 0x03, 0x03);
|
|
SINGLE_COMPARE(add(rax, qword[rsp]), 0x48, 0x03, 0x04, 0x24);
|
|
SINGLE_COMPARE(add(rax, qword[rbp]), 0x48, 0x03, 0x45, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r10]), 0x49, 0x03, 0x02);
|
|
SINGLE_COMPARE(add(rax, qword[r12]), 0x49, 0x03, 0x04, 0x24);
|
|
SINGLE_COMPARE(add(rax, qword[r13]), 0x49, 0x03, 0x45, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax]), 0x4c, 0x03, 0x20);
|
|
SINGLE_COMPARE(add(r12, qword[rbx]), 0x4c, 0x03, 0x23);
|
|
SINGLE_COMPARE(add(r12, qword[rsp]), 0x4c, 0x03, 0x24, 0x24);
|
|
SINGLE_COMPARE(add(r12, qword[rbp]), 0x4c, 0x03, 0x65, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r10]), 0x4d, 0x03, 0x22);
|
|
SINGLE_COMPARE(add(r12, qword[r12]), 0x4d, 0x03, 0x24, 0x24);
|
|
SINGLE_COMPARE(add(r12, qword[r13]), 0x4d, 0x03, 0x65, 0x00);
|
|
|
|
// reg, [base+imm8]
|
|
SINGLE_COMPARE(add(rax, qword[rax + 0x1b]), 0x48, 0x03, 0x40, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rbx + 0x1b]), 0x48, 0x03, 0x43, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rsp + 0x1b]), 0x48, 0x03, 0x44, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rbp + 0x1b]), 0x48, 0x03, 0x45, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[r10 + 0x1b]), 0x49, 0x03, 0x42, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[r12 + 0x1b]), 0x49, 0x03, 0x44, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[r13 + 0x1b]), 0x49, 0x03, 0x45, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + 0x1b]), 0x4c, 0x03, 0x60, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[rbx + 0x1b]), 0x4c, 0x03, 0x63, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[rsp + 0x1b]), 0x4c, 0x03, 0x64, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[rbp + 0x1b]), 0x4c, 0x03, 0x65, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[r10 + 0x1b]), 0x4d, 0x03, 0x62, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[r12 + 0x1b]), 0x4d, 0x03, 0x64, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[r13 + 0x1b]), 0x4d, 0x03, 0x65, 0x1b);
|
|
|
|
// reg, [base+imm32]
|
|
SINGLE_COMPARE(add(rax, qword[rax + 0xabab]), 0x48, 0x03, 0x80, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbx + 0xabab]), 0x48, 0x03, 0x83, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rsp + 0xabab]), 0x48, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbp + 0xabab]), 0x48, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r10 + 0xabab]), 0x49, 0x03, 0x82, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r12 + 0xabab]), 0x49, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r13 + 0xabab]), 0x49, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + 0xabab]), 0x4c, 0x03, 0xa0, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbx + 0xabab]), 0x4c, 0x03, 0xa3, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rsp + 0xabab]), 0x4c, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbp + 0xabab]), 0x4c, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r10 + 0xabab]), 0x4d, 0x03, 0xa2, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r12 + 0xabab]), 0x4d, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r13 + 0xabab]), 0x4d, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
// reg, [index*scale]
|
|
SINGLE_COMPARE(add(rax, qword[rax * 2]), 0x48, 0x03, 0x04, 0x45, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbx * 2]), 0x48, 0x03, 0x04, 0x5d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbp * 2]), 0x48, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r10 * 2]), 0x4a, 0x03, 0x04, 0x55, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r12 * 2]), 0x4a, 0x03, 0x04, 0x65, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r13 * 2]), 0x4a, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax * 2]), 0x4c, 0x03, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbx * 2]), 0x4c, 0x03, 0x24, 0x5d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbp * 2]), 0x4c, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r10 * 2]), 0x4e, 0x03, 0x24, 0x55, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r12 * 2]), 0x4e, 0x03, 0x24, 0x65, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r13 * 2]), 0x4e, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
// reg, [base+index*scale+imm]
|
|
SINGLE_COMPARE(add(rax, qword[rax + rax * 2]), 0x48, 0x03, 0x04, 0x40);
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbx * 2 + 0x1b]), 0x48, 0x03, 0x44, 0x58, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbp * 2]), 0x48, 0x03, 0x04, 0x68);
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbp + 0xabab]), 0x48, 0x03, 0x84, 0x28, 0xAB, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r12 + 0x1b]), 0x4a, 0x03, 0x44, 0x20, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r12 * 4 + 0xabab]), 0x4a, 0x03, 0x84, 0xa0, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r13 * 2 + 0x1b]), 0x4a, 0x03, 0x44, 0x68, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r13 + 0xabab]), 0x4a, 0x03, 0x84, 0x28, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rax + r12 * 2]), 0x4e, 0x03, 0x24, 0x60);
|
|
SINGLE_COMPARE(add(r12, qword[rax + r13 + 0xabab]), 0x4e, 0x03, 0xA4, 0x28, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rax + rbp * 2 + 0x1b]), 0x4c, 0x03, 0x64, 0x68, 0x1b);
|
|
|
|
// reg, [imm32]
|
|
SINGLE_COMPARE(add(rax, qword[0]), 0x48, 0x03, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[0xabab]), 0x48, 0x03, 0x04, 0x25, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
// [addr], reg
|
|
SINGLE_COMPARE(add(qword[rax], rax), 0x48, 0x01, 0x00);
|
|
SINGLE_COMPARE(add(qword[rax + rax * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0x80, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rbx + rax * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x43, 0x1b);
|
|
SINGLE_COMPARE(add(qword[rbx + rbp * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x6b, 0x1b);
|
|
SINGLE_COMPARE(add(qword[rbp + rbp * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0xad, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rbp + r12 + 0x1b], rax), 0x4a, 0x01, 0x44, 0x25, 0x1b);
|
|
SINGLE_COMPARE(add(qword[r12], rax), 0x49, 0x01, 0x04, 0x24);
|
|
SINGLE_COMPARE(add(qword[r13 + rbx + 0xabab], rax), 0x49, 0x01, 0x84, 0x1d, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rax + r13 * 2 + 0x1b], rsi), 0x4a, 0x01, 0x74, 0x68, 0x1b);
|
|
SINGLE_COMPARE(add(qword[rbp + rbx * 2], rsi), 0x48, 0x01, 0x74, 0x5d, 0x00);
|
|
SINGLE_COMPARE(add(qword[rsp + r10 * 2 + 0x1b], r10), 0x4e, 0x01, 0x54, 0x54, 0x1b);
|
|
|
|
// [addr], imm
|
|
SINGLE_COMPARE(add(byte[rax], 2), 0x80, 0x00, 0x02);
|
|
SINGLE_COMPARE(add(dword[rax], 2), 0x83, 0x00, 0x02);
|
|
SINGLE_COMPARE(add(dword[rax], 0xabcd), 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rax], 2), 0x48, 0x83, 0x00, 0x02);
|
|
SINGLE_COMPARE(add(qword[rax], 0xabcd), 0x48, 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseUnaryInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(div(rcx), 0x48, 0xf7, 0xf1);
|
|
SINGLE_COMPARE(idiv(qword[rax]), 0x48, 0xf7, 0x38);
|
|
SINGLE_COMPARE(mul(qword[rax + rbx]), 0x48, 0xf7, 0x24, 0x18);
|
|
SINGLE_COMPARE(imul(r9), 0x49, 0xf7, 0xe9);
|
|
SINGLE_COMPARE(neg(r9), 0x49, 0xf7, 0xd9);
|
|
SINGLE_COMPARE(not_(r12), 0x49, 0xf7, 0xd4);
|
|
SINGLE_COMPARE(inc(r12), 0x49, 0xff, 0xc4);
|
|
SINGLE_COMPARE(dec(ecx), 0xff, 0xc9);
|
|
SINGLE_COMPARE(dec(byte[rdx]), 0xfe, 0x0a);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov")
|
|
{
|
|
SINGLE_COMPARE(mov(rcx, 1), 0x48, 0xb9, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov64(rcx, 0x1234567812345678ll), 0x48, 0xb9, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
|
|
SINGLE_COMPARE(mov(ecx, 2), 0xb9, 0x02, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(cl, 2), 0xb1, 0x02);
|
|
SINGLE_COMPARE(mov(sil, 2), 0x48, 0xb6, 0x02);
|
|
SINGLE_COMPARE(mov(r9b, 2), 0x49, 0xb1, 0x02);
|
|
SINGLE_COMPARE(mov(rcx, qword[rdi]), 0x48, 0x8b, 0x0f);
|
|
SINGLE_COMPARE(mov(dword[rax], 0xabcd), 0xc7, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(r13, 1), 0x49, 0xbd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov64(r13, 0x1234567812345678ll), 0x49, 0xbd, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
|
|
SINGLE_COMPARE(mov(r13d, 2), 0x41, 0xbd, 0x02, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(r13, qword[r12]), 0x4d, 0x8b, 0x2c, 0x24);
|
|
SINGLE_COMPARE(mov(dword[r13], 0xabcd), 0x41, 0xc7, 0x45, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(qword[rdx], r9), 0x4c, 0x89, 0x0a);
|
|
SINGLE_COMPARE(mov(byte[rsi], 0x3), 0xc6, 0x06, 0x03);
|
|
SINGLE_COMPARE(mov(byte[rsi], al), 0x88, 0x06);
|
|
SINGLE_COMPARE(mov(byte[rsi], dil), 0x48, 0x88, 0x3e);
|
|
SINGLE_COMPARE(mov(byte[rsi], r10b), 0x4c, 0x88, 0x16);
|
|
SINGLE_COMPARE(mov(wordReg(ebx), 0x3a3d), 0x66, 0xbb, 0x3d, 0x3a);
|
|
SINGLE_COMPARE(mov(word[rsi], 0x3a3d), 0x66, 0xc7, 0x06, 0x3d, 0x3a);
|
|
SINGLE_COMPARE(mov(word[rsi], wordReg(eax)), 0x66, 0x89, 0x06);
|
|
SINGLE_COMPARE(mov(word[rsi], wordReg(edi)), 0x66, 0x89, 0x3e);
|
|
SINGLE_COMPARE(mov(word[rsi], wordReg(r10)), 0x66, 0x44, 0x89, 0x16);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMovExtended")
|
|
{
|
|
SINGLE_COMPARE(movsx(eax, byte[rcx]), 0x0f, 0xbe, 0x01);
|
|
SINGLE_COMPARE(movsx(r12, byte[r10]), 0x4d, 0x0f, 0xbe, 0x22);
|
|
SINGLE_COMPARE(movsx(ebx, word[r11]), 0x41, 0x0f, 0xbf, 0x1b);
|
|
SINGLE_COMPARE(movsx(rdx, word[rcx]), 0x48, 0x0f, 0xbf, 0x11);
|
|
SINGLE_COMPARE(movzx(eax, byte[rcx]), 0x0f, 0xb6, 0x01);
|
|
SINGLE_COMPARE(movzx(r12, byte[r10]), 0x4d, 0x0f, 0xb6, 0x22);
|
|
SINGLE_COMPARE(movzx(ebx, word[r11]), 0x41, 0x0f, 0xb7, 0x1b);
|
|
SINGLE_COMPARE(movzx(rdx, word[rcx]), 0x48, 0x0f, 0xb7, 0x11);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfTest")
|
|
{
|
|
SINGLE_COMPARE(test(al, 8), 0xf6, 0xc0, 0x08);
|
|
SINGLE_COMPARE(test(eax, 8), 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(test(rax, 8), 0x48, 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(test(rcx, 0xabab), 0x48, 0xf7, 0xc1, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(test(rcx, rax), 0x48, 0x85, 0xc8);
|
|
SINGLE_COMPARE(test(rax, qword[rcx]), 0x48, 0x85, 0x01);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfShift")
|
|
{
|
|
SINGLE_COMPARE(shl(al, 1), 0xd0, 0xe0);
|
|
SINGLE_COMPARE(shl(al, cl), 0xd2, 0xe0);
|
|
SINGLE_COMPARE(shl(sil, cl), 0x48, 0xd2, 0xe6);
|
|
SINGLE_COMPARE(shl(r10b, cl), 0x49, 0xd2, 0xe2);
|
|
SINGLE_COMPARE(shr(al, 4), 0xc0, 0xe8, 0x04);
|
|
SINGLE_COMPARE(shr(eax, 1), 0xd1, 0xe8);
|
|
SINGLE_COMPARE(sal(eax, cl), 0xd3, 0xe0);
|
|
SINGLE_COMPARE(sal(eax, 4), 0xc1, 0xe0, 0x04);
|
|
SINGLE_COMPARE(sar(rax, 4), 0x48, 0xc1, 0xf8, 0x04);
|
|
SINGLE_COMPARE(sar(r11, 1), 0x49, 0xd1, 0xfb);
|
|
SINGLE_COMPARE(rol(eax, 1), 0xd1, 0xc0);
|
|
SINGLE_COMPARE(rol(eax, cl), 0xd3, 0xc0);
|
|
SINGLE_COMPARE(ror(eax, 1), 0xd1, 0xc8);
|
|
SINGLE_COMPARE(ror(eax, cl), 0xd3, 0xc8);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfLea")
|
|
{
|
|
SINGLE_COMPARE(lea(rax, addr[rdx + rcx]), 0x48, 0x8d, 0x04, 0x0a);
|
|
SINGLE_COMPARE(lea(rax, addr[rdx + rax * 4]), 0x48, 0x8d, 0x04, 0x82);
|
|
SINGLE_COMPARE(lea(rax, addr[r13 + r12 * 4 + 4]), 0x4b, 0x8d, 0x44, 0xa5, 0x04);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfSetcc")
|
|
{
|
|
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, bl), 0x0f, 0x95, 0xc3);
|
|
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, dil), 0x48, 0x0f, 0x95, 0xc7);
|
|
SINGLE_COMPARE(setcc(ConditionX64::BelowEqual, byte[rcx]), 0x0f, 0x96, 0x01);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfCmov")
|
|
{
|
|
SINGLE_COMPARE(cmov(ConditionX64::LessEqual, ebx, eax), 0x0f, 0x4e, 0xd8);
|
|
SINGLE_COMPARE(cmov(ConditionX64::NotZero, rbx, qword[rax]), 0x48, 0x0f, 0x45, 0x18);
|
|
SINGLE_COMPARE(cmov(ConditionX64::Zero, rbx, qword[rax + rcx]), 0x48, 0x0f, 0x44, 0x1c, 0x08);
|
|
SINGLE_COMPARE(cmov(ConditionX64::BelowEqual, r14d, r15d), 0x45, 0x0f, 0x46, 0xf7);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfAbsoluteJumps")
|
|
{
|
|
SINGLE_COMPARE(jmp(rax), 0xff, 0xe0);
|
|
SINGLE_COMPARE(jmp(r14), 0x41, 0xff, 0xe6);
|
|
SINGLE_COMPARE(jmp(qword[r14 + rdx * 4]), 0x41, 0xff, 0x24, 0x96);
|
|
SINGLE_COMPARE(call(rax), 0xff, 0xd0);
|
|
SINGLE_COMPARE(call(r14), 0x41, 0xff, 0xd6);
|
|
SINGLE_COMPARE(call(qword[r14 + rdx * 4]), 0x41, 0xff, 0x14, 0x96);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfImul")
|
|
{
|
|
SINGLE_COMPARE(imul(ecx, esi), 0x0f, 0xaf, 0xce);
|
|
SINGLE_COMPARE(imul(r12, rax), 0x4c, 0x0f, 0xaf, 0xe0);
|
|
SINGLE_COMPARE(imul(r12, qword[rdx + rdi]), 0x4c, 0x0f, 0xaf, 0x24, 0x3a);
|
|
SINGLE_COMPARE(imul(ecx, edx, 8), 0x6b, 0xca, 0x08);
|
|
SINGLE_COMPARE(imul(ecx, r9d, 0xabcd), 0x41, 0x69, 0xc9, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(imul(r8d, eax, -9), 0x44, 0x6b, 0xc0, 0xf7);
|
|
SINGLE_COMPARE(imul(rcx, rdx, 17), 0x48, 0x6b, 0xca, 0x11);
|
|
SINGLE_COMPARE(imul(rcx, r12, 0xabcd), 0x49, 0x69, 0xcc, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(imul(r12, rax, -13), 0x4c, 0x6b, 0xe0, 0xf3);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "NopForms")
|
|
{
|
|
SINGLE_COMPARE(nop(), 0x90);
|
|
SINGLE_COMPARE(nop(2), 0x66, 0x90);
|
|
SINGLE_COMPARE(nop(3), 0x0f, 0x1f, 0x00);
|
|
SINGLE_COMPARE(nop(4), 0x0f, 0x1f, 0x40, 0x00);
|
|
SINGLE_COMPARE(nop(5), 0x0f, 0x1f, 0x44, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(6), 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(7), 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(8), 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(9), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(15), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00); // 9+6
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentForms")
|
|
{
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
build.ret();
|
|
build.align(8, AlignmentDataX64::Nop);
|
|
},
|
|
{0xc3, 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}
|
|
));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
build.ret();
|
|
build.align(32, AlignmentDataX64::Nop);
|
|
},
|
|
{0xc3, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x1f, 0x40, 0x00}
|
|
));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
build.ret();
|
|
build.align(8, AlignmentDataX64::Int3);
|
|
},
|
|
{0xc3, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc}
|
|
));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
build.ret();
|
|
build.align(8, AlignmentDataX64::Ud2);
|
|
},
|
|
{0xc3, 0x0f, 0x0b, 0x0f, 0x0b, 0x0f, 0x0b, 0xcc}
|
|
));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentOverflow")
|
|
{
|
|
// Test that alignment correctly resizes the code buffer
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
build.ret();
|
|
build.align(8192, AlignmentDataX64::Nop);
|
|
build.finalize();
|
|
}
|
|
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
build.ret();
|
|
build.align(8192, AlignmentDataX64::Int3);
|
|
build.finalize();
|
|
}
|
|
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
for (int i = 0; i < 8192; i++)
|
|
build.int3();
|
|
build.finalize();
|
|
}
|
|
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
build.ret();
|
|
build.align(8192, AlignmentDataX64::Ud2);
|
|
build.finalize();
|
|
}
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "ControlFlow")
|
|
{
|
|
// Jump back
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
Label start = build.setLabel();
|
|
build.add(rsi, 1);
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Equal, start);
|
|
},
|
|
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf3, 0xff, 0xff, 0xff}
|
|
));
|
|
|
|
// Jump back, but the label is set before use
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
Label start;
|
|
build.add(rsi, 1);
|
|
build.setLabel(start);
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Equal, start);
|
|
},
|
|
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf7, 0xff, 0xff, 0xff}
|
|
));
|
|
|
|
// Jump forward
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
Label skip;
|
|
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Greater, skip);
|
|
build.or_(rdi, 0x3e);
|
|
build.setLabel(skip);
|
|
},
|
|
{0x48, 0x3b, 0xf7, 0x0f, 0x8f, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xcf, 0x3e}
|
|
));
|
|
|
|
// Regular jump
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
Label skip;
|
|
|
|
build.jmp(skip);
|
|
build.and_(rdi, 0x3e);
|
|
build.setLabel(skip);
|
|
},
|
|
{0xe9, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xe7, 0x3e}
|
|
));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelCall")
|
|
{
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
Label fnB;
|
|
|
|
build.and_(rcx, 0x3e);
|
|
build.call(fnB);
|
|
build.ret();
|
|
|
|
build.setLabel(fnB);
|
|
build.lea(rax, addr[rcx + 0x1f]);
|
|
build.ret();
|
|
},
|
|
{0x48, 0x83, 0xe1, 0x3e, 0xe8, 0x01, 0x00, 0x00, 0x00, 0xc3, 0x48, 0x8d, 0x41, 0x1f, 0xc3}
|
|
));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXBinaryInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x29, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymm14), 0xc4, 0x41, 0x2d, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymmword[r9]), 0xc4, 0x41, 0x2d, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x28, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x58, 0x01);
|
|
|
|
SINGLE_COMPARE(vaddps(xmm1, xmm2, xmm3), 0xc4, 0xe1, 0x68, 0x58, 0xcb);
|
|
SINGLE_COMPARE(vaddps(xmm9, xmm12, xmmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x18, 0x58, 0x4c, 0x71, 0x1c);
|
|
SINGLE_COMPARE(vaddps(ymm1, ymm2, ymm3), 0xc4, 0xe1, 0x6c, 0x58, 0xcb);
|
|
SINGLE_COMPARE(vaddps(ymm9, ymm12, ymmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x1c, 0x58, 0x4c, 0x71, 0x1c);
|
|
|
|
// Coverage for other instructions that follow the same pattern
|
|
SINGLE_COMPARE(vsubsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5c, 0xc6);
|
|
SINGLE_COMPARE(vmulsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x59, 0xc6);
|
|
SINGLE_COMPARE(vdivsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5e, 0xc6);
|
|
|
|
SINGLE_COMPARE(vsubps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5c, 0xc6);
|
|
SINGLE_COMPARE(vmulps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x59, 0xc6);
|
|
SINGLE_COMPARE(vdivps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5e, 0xc6);
|
|
|
|
SINGLE_COMPARE(vorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x56, 0xc6);
|
|
SINGLE_COMPARE(vxorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x57, 0xc6);
|
|
SINGLE_COMPARE(vorps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x56, 0xc6);
|
|
|
|
SINGLE_COMPARE(vandpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x54, 0xc6);
|
|
SINGLE_COMPARE(vandnpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x55, 0xc6);
|
|
|
|
SINGLE_COMPARE(vmaxsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5f, 0xc6);
|
|
SINGLE_COMPARE(vminsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5d, 0xc6);
|
|
|
|
SINGLE_COMPARE(vcmpeqsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x00);
|
|
SINGLE_COMPARE(vcmpltsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x01);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXUnaryMergeInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vsqrtpd(xmm8, xmm10), 0xc4, 0x41, 0x79, 0x51, 0xc2);
|
|
SINGLE_COMPARE(vsqrtpd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtpd(ymm8, ymm10), 0xc4, 0x41, 0x7d, 0x51, 0xc2);
|
|
SINGLE_COMPARE(vsqrtpd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtps(xmm8, xmm10), 0xc4, 0x41, 0x78, 0x51, 0xc2);
|
|
SINGLE_COMPARE(vsqrtps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x51, 0xc6);
|
|
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x51, 0xc6);
|
|
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x51, 0x01);
|
|
|
|
// Coverage for other instructions that follow the same pattern
|
|
SINGLE_COMPARE(vucomisd(xmm1, xmm4), 0xc4, 0xe1, 0x79, 0x2e, 0xcc);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXMoveInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vmovsd(qword[r9], xmm10), 0xc4, 0x41, 0x7b, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovsd(xmm8, qword[r9]), 0xc4, 0x41, 0x7b, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x10, 0xc6);
|
|
SINGLE_COMPARE(vmovss(dword[r9], xmm10), 0xc4, 0x41, 0x7a, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovss(xmm8, dword[r9]), 0xc4, 0x41, 0x7a, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x10, 0xc6);
|
|
SINGLE_COMPARE(vmovapd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovapd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x29, 0x11);
|
|
SINGLE_COMPARE(vmovapd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovaps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovaps(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x29, 0x11);
|
|
SINGLE_COMPARE(vmovaps(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovupd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovupd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovupd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovups(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovups(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovups(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovq(xmm1, rbx), 0xc4, 0xe1, 0xf9, 0x6e, 0xcb);
|
|
SINGLE_COMPARE(vmovq(rbx, xmm1), 0xc4, 0xe1, 0xf9, 0x7e, 0xcb);
|
|
SINGLE_COMPARE(vmovq(xmm1, qword[r9]), 0xc4, 0xc1, 0xf9, 0x6e, 0x09);
|
|
SINGLE_COMPARE(vmovq(qword[r9], xmm1), 0xc4, 0xc1, 0xf9, 0x7e, 0x09);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXConversionInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vcvttsd2si(ecx, xmm0), 0xc4, 0xe1, 0x7b, 0x2c, 0xc8);
|
|
SINGLE_COMPARE(vcvttsd2si(r9d, xmmword[rcx + rdx]), 0xc4, 0x61, 0x7b, 0x2c, 0x0c, 0x11);
|
|
SINGLE_COMPARE(vcvttsd2si(rdx, xmm0), 0xc4, 0xe1, 0xfb, 0x2c, 0xd0);
|
|
SINGLE_COMPARE(vcvttsd2si(r13, xmmword[rcx + rdx]), 0xc4, 0x61, 0xfb, 0x2c, 0x2c, 0x11);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, ecx), 0xc4, 0xe1, 0x2b, 0x2a, 0xe9);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, dword[rcx + rdx]), 0xc4, 0xe1, 0x23, 0x2a, 0x34, 0x11);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, r13), 0xc4, 0xc1, 0xab, 0x2a, 0xed);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x2a, 0x34, 0x11);
|
|
SINGLE_COMPARE(vcvtsd2ss(xmm5, xmm10, xmm11), 0xc4, 0xc1, 0x2b, 0x5a, 0xeb);
|
|
SINGLE_COMPARE(vcvtsd2ss(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x5a, 0x34, 0x11);
|
|
SINGLE_COMPARE(vcvtss2sd(xmm3, xmm8, xmm12), 0xc4, 0xc1, 0x3a, 0x5a, 0xdc);
|
|
SINGLE_COMPARE(vcvtss2sd(xmm4, xmm9, dword[rcx + rsi]), 0xc4, 0xe1, 0x32, 0x5a, 0x24, 0x31);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXTernaryInstructionForms")
|
|
{
|
|
ScopedFastFlag sff{FFlag::LuauVectorLibNativeDot, true};
|
|
|
|
SINGLE_COMPARE(vroundsd(xmm7, xmm12, xmm3, RoundingModeX64::RoundToNegativeInfinity), 0xc4, 0xe3, 0x19, 0x0b, 0xfb, 0x09);
|
|
SINGLE_COMPARE(
|
|
vroundsd(xmm8, xmm13, xmmword[r13 + rdx], RoundingModeX64::RoundToPositiveInfinity), 0xc4, 0x43, 0x11, 0x0b, 0x44, 0x15, 0x00, 0x0a
|
|
);
|
|
SINGLE_COMPARE(vroundsd(xmm9, xmm14, xmmword[rcx + r10], RoundingModeX64::RoundToZero), 0xc4, 0x23, 0x09, 0x0b, 0x0c, 0x11, 0x0b);
|
|
SINGLE_COMPARE(vblendvpd(xmm7, xmm12, xmmword[rcx + r10], xmm5), 0xc4, 0xa3, 0x19, 0x4b, 0x3c, 0x11, 0x50);
|
|
|
|
SINGLE_COMPARE(vpshufps(xmm7, xmm12, xmmword[rcx + r10], 0b11010100), 0xc4, 0xa1, 0x18, 0xc6, 0x3c, 0x11, 0xd4);
|
|
SINGLE_COMPARE(vpinsrd(xmm7, xmm12, xmmword[rcx + r10], 2), 0xc4, 0xa3, 0x19, 0x22, 0x3c, 0x11, 0x02);
|
|
|
|
SINGLE_COMPARE(vdpps(xmm7, xmm12, xmmword[rcx + r10], 2), 0xc4, 0xa3, 0x19, 0x40, 0x3c, 0x11, 0x02);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "MiscInstructions")
|
|
{
|
|
SINGLE_COMPARE(int3(), 0xcc);
|
|
SINGLE_COMPARE(ud2(), 0x0f, 0x0b);
|
|
SINGLE_COMPARE(bsr(eax, edx), 0x0f, 0xbd, 0xc2);
|
|
SINGLE_COMPARE(bsf(eax, edx), 0x0f, 0xbc, 0xc2);
|
|
SINGLE_COMPARE(bswap(eax), 0x0f, 0xc8);
|
|
SINGLE_COMPARE(bswap(r12d), 0x41, 0x0f, 0xcc);
|
|
SINGLE_COMPARE(bswap(rax), 0x48, 0x0f, 0xc8);
|
|
SINGLE_COMPARE(bswap(r12), 0x49, 0x0f, 0xcc);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelLea")
|
|
{
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build)
|
|
{
|
|
Label fn;
|
|
build.lea(rax, fn);
|
|
build.ret();
|
|
|
|
build.setLabel(fn);
|
|
build.ret();
|
|
},
|
|
{0x48, 0x8d, 0x05, 0x01, 0x00, 0x00, 0x00, 0xc3, 0xc3}
|
|
));
|
|
}
|
|
|
|
TEST_CASE("LogTest")
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ true);
|
|
|
|
build.push(r12);
|
|
build.align(8);
|
|
build.align(8, AlignmentDataX64::Int3);
|
|
build.align(8, AlignmentDataX64::Ud2);
|
|
|
|
build.add(rax, rdi);
|
|
build.add(rcx, 8);
|
|
build.sub(dword[rax], 0x1fdc);
|
|
build.and_(dword[rcx], 0x37);
|
|
build.mov(rdi, qword[rax + rsi * 2]);
|
|
build.vaddss(xmm0, xmm0, dword[rax + r14 * 2 + 0x1c]);
|
|
|
|
Label start = build.setLabel();
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Equal, start);
|
|
build.lea(rcx, start);
|
|
build.lea(rcx, addr[rdx]);
|
|
|
|
build.jmp(qword[rdx]);
|
|
build.vaddps(ymm9, ymm12, ymmword[rbp + 0xc]);
|
|
build.vaddpd(ymm2, ymm7, build.f64(2.5));
|
|
build.neg(qword[rbp + r12 * 2]);
|
|
build.mov64(r10, 0x1234567812345678ll);
|
|
build.vmovapd(xmmword[rax], xmm11);
|
|
build.movzx(eax, byte[rcx]);
|
|
build.movsx(rsi, word[r12]);
|
|
build.imul(rcx, rdx);
|
|
build.imul(rcx, rdx, 8);
|
|
build.vroundsd(xmm1, xmm2, xmm3, RoundingModeX64::RoundToNearestEven);
|
|
build.add(rdx, qword[rcx - 12]);
|
|
build.pop(r12);
|
|
build.cmov(ConditionX64::AboveEqual, rax, rbx);
|
|
build.ret();
|
|
build.int3();
|
|
|
|
build.nop();
|
|
build.nop(2);
|
|
build.nop(3);
|
|
build.nop(4);
|
|
build.nop(5);
|
|
build.nop(6);
|
|
build.nop(7);
|
|
build.nop(8);
|
|
build.nop(9);
|
|
|
|
build.finalize();
|
|
|
|
std::string expected = R"(
|
|
push r12
|
|
; align 8
|
|
nop word ptr[rax+rax] ; 6-byte nop
|
|
; align 8 using int3
|
|
; align 8 using ud2
|
|
add rax,rdi
|
|
add rcx,8
|
|
sub dword ptr [rax],1FDCh
|
|
and dword ptr [rcx],37h
|
|
mov rdi,qword ptr [rax+rsi*2]
|
|
vaddss xmm0,xmm0,dword ptr [rax+r14*2+01Ch]
|
|
.L1:
|
|
cmp rsi,rdi
|
|
je .L1
|
|
lea rcx,.L1
|
|
lea rcx,[rdx]
|
|
jmp qword ptr [rdx]
|
|
vaddps ymm9,ymm12,ymmword ptr [rbp+0Ch]
|
|
vaddpd ymm2,ymm7,qword ptr [.start-8]
|
|
neg qword ptr [rbp+r12*2]
|
|
mov r10,1234567812345678h
|
|
vmovapd xmmword ptr [rax],xmm11
|
|
movzx eax,byte ptr [rcx]
|
|
movsx rsi,word ptr [r12]
|
|
imul rcx,rdx
|
|
imul rcx,rdx,8
|
|
vroundsd xmm1,xmm2,xmm3,8
|
|
add rdx,qword ptr [rcx-0Ch]
|
|
pop r12
|
|
cmovae rax,rbx
|
|
ret
|
|
int3
|
|
nop
|
|
xchg ax, ax ; 2-byte nop
|
|
nop dword ptr[rax] ; 3-byte nop
|
|
nop dword ptr[rax] ; 4-byte nop
|
|
nop dword ptr[rax+rax] ; 5-byte nop
|
|
nop word ptr[rax+rax] ; 6-byte nop
|
|
nop dword ptr[rax] ; 7-byte nop
|
|
nop dword ptr[rax+rax] ; 8-byte nop
|
|
nop word ptr[rax+rax] ; 9-byte nop
|
|
)";
|
|
|
|
CHECK("\n" + build.text == expected);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "Constants")
|
|
{
|
|
// clang-format off
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
build.xor_(rax, rax);
|
|
build.add(rax, build.i64(0x1234567887654321));
|
|
build.vmovss(xmm2, build.f32(1.0f));
|
|
build.vmovsd(xmm3, build.f64(1.0));
|
|
build.vmovaps(xmm4, build.f32x4(1.0f, 2.0f, 4.0f, 8.0f));
|
|
char arr[16] = "hello world!123";
|
|
build.vmovupd(xmm5, build.bytes(arr, 16, 8));
|
|
build.vmovapd(xmm5, build.f64x2(5.0, 6.0));
|
|
build.ret();
|
|
},
|
|
{
|
|
0x48, 0x33, 0xc0,
|
|
0x48, 0x03, 0x05, 0xee, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x7a, 0x10, 0x15, 0xe1, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x7b, 0x10, 0x1d, 0xcc, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x78, 0x28, 0x25, 0xab, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x79, 0x10, 0x2d, 0x92, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x79, 0x28, 0x2d, 0x79, 0xff, 0xff, 0xff,
|
|
0xc3
|
|
},
|
|
{
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x40,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x40,
|
|
'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', '1', '2', '3', 0x0,
|
|
0x00, 0x00, 0x80, 0x3f,
|
|
0x00, 0x00, 0x00, 0x40,
|
|
0x00, 0x00, 0x80, 0x40,
|
|
0x00, 0x00, 0x00, 0x41,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // padding to align f32x4
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f,
|
|
0x00, 0x00, 0x00, 0x00, // padding to align f64
|
|
0x00, 0x00, 0x80, 0x3f,
|
|
0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
|
|
}));
|
|
// clang-format on
|
|
}
|
|
|
|
TEST_CASE("ConstantStorage")
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
|
build.vaddss(xmm0, xmm0, build.i32(i));
|
|
|
|
build.finalize();
|
|
|
|
CHECK(build.data.size() == 12004);
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
|
{
|
|
CHECK(build.data[i * 4 + 0] == ((3000 - i) & 0xff));
|
|
CHECK(build.data[i * 4 + 1] == ((3000 - i) >> 8));
|
|
CHECK(build.data[i * 4 + 2] == 0x00);
|
|
CHECK(build.data[i * 4 + 3] == 0x00);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("ConstantStorageDedup")
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
|
build.vaddss(xmm0, xmm0, build.f32(1.0f));
|
|
|
|
build.finalize();
|
|
|
|
CHECK(build.data.size() == 4);
|
|
|
|
CHECK(build.data[0] == 0x00);
|
|
CHECK(build.data[1] == 0x00);
|
|
CHECK(build.data[2] == 0x80);
|
|
CHECK(build.data[3] == 0x3f);
|
|
}
|
|
|
|
TEST_CASE("ConstantCaching")
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
OperandX64 two = build.f64(2);
|
|
|
|
// Force data relocation
|
|
for (int i = 0; i < 4096; i++)
|
|
build.f64(i);
|
|
|
|
CHECK(build.f64(2).imm == two.imm);
|
|
|
|
build.finalize();
|
|
}
|
|
|
|
TEST_SUITE_END();
|