mirror of
https://github.com/luau-lang/luau.git
synced 2024-12-14 06:00:39 +00:00
97965c7c0a
* `ClassType` can now have an indexer defined on it. This allows custom types to be used in `t[x]` expressions. * Fixed search for closest executable breakpoint line. Previously, breakpoints might have been skipped in `else` blocks at the end of a function * Fixed how unification is performed for two optional types `a? <: b?`, previously it might have unified either 'a' or 'b' with 'nil'. Note that this fix is not enabled by default yet (see the list in `ExperimentalFlags.h`) In the new type solver, a concept of 'Type Families' has been introduced. Type families can be thought of as type aliases with custom type inference/reduction logic included with them. For example, we can have an `Add<T, U>` type family that will resolve the type that is the result of adding two values together. This will help type inference to figure out what 'T' and 'U' might be when explicit type annotations are not provided. In this update we don't define any type families, but they will be added in the near future. It is also possible for Luau embedders to define their own type families in the global/environment scope. Other changes include: * Fixed scope used to find out which generic types should be included in the function generic type list * Fixed a crash after cyclic bound types were created during unification And in native code generation (jit): * Use of arm64 target on M1 now requires macOS 13 * Entry into native code has been optimized. This is especially important for coroutine call/pcall performance as they involve going through a C call frame * LOP_LOADK(X) translation into IR has been improved to enable type tag/constant propagation * arm64 can use integer immediate values to synthesize floating-point values * x64 assembler removes duplicate 64bit numbers from the data section to save space * Linux `perf` can now be used to profile native Luau code (when running with --codegen-perf CLI argument)
713 lines
30 KiB
C++
713 lines
30 KiB
C++
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
#include "Luau/AssemblyBuilderX64.h"
|
|
#include "Luau/StringUtils.h"
|
|
|
|
#include "doctest.h"
|
|
|
|
#include <string.h>
|
|
|
|
using namespace Luau::CodeGen;
|
|
using namespace Luau::CodeGen::X64;
|
|
|
|
static std::string bytecodeAsArray(const std::vector<uint8_t>& bytecode)
|
|
{
|
|
std::string result = "{";
|
|
|
|
for (size_t i = 0; i < bytecode.size(); i++)
|
|
Luau::formatAppend(result, "%s0x%02x", i == 0 ? "" : ", ", bytecode[i]);
|
|
|
|
return result.append("}");
|
|
}
|
|
|
|
class AssemblyBuilderX64Fixture
|
|
{
|
|
public:
|
|
bool check(void (*f)(AssemblyBuilderX64& build), std::vector<uint8_t> code, std::vector<uint8_t> data = {})
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
f(build);
|
|
|
|
build.finalize();
|
|
|
|
if (build.code != code)
|
|
{
|
|
printf("Expected code: %s\nReceived code: %s\n", bytecodeAsArray(code).c_str(), bytecodeAsArray(build.code).c_str());
|
|
return false;
|
|
}
|
|
|
|
if (build.data != data)
|
|
{
|
|
printf("Expected data: %s\nReceived data: %s\n", bytecodeAsArray(data).c_str(), bytecodeAsArray(build.data).c_str());
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
};
|
|
|
|
TEST_SUITE_BEGIN("x64Assembly");
|
|
|
|
#define SINGLE_COMPARE(inst, ...) \
|
|
CHECK(check( \
|
|
[](AssemblyBuilderX64& build) { \
|
|
build.inst; \
|
|
}, \
|
|
{__VA_ARGS__}))
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseBinaryInstructionForms")
|
|
{
|
|
// reg, reg
|
|
SINGLE_COMPARE(add(rax, rcx), 0x48, 0x03, 0xc1);
|
|
SINGLE_COMPARE(add(rsp, r12), 0x49, 0x03, 0xe4);
|
|
SINGLE_COMPARE(add(r14, r10), 0x4d, 0x03, 0xf2);
|
|
|
|
// reg, imm
|
|
SINGLE_COMPARE(add(rax, 0), 0x48, 0x83, 0xc0, 0x00);
|
|
SINGLE_COMPARE(add(rax, 0x7f), 0x48, 0x83, 0xc0, 0x7f);
|
|
SINGLE_COMPARE(add(rax, 0x80), 0x48, 0x81, 0xc0, 0x80, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r10, 0x7fffffff), 0x49, 0x81, 0xc2, 0xff, 0xff, 0xff, 0x7f);
|
|
SINGLE_COMPARE(add(al, 3), 0x80, 0xc0, 0x03);
|
|
SINGLE_COMPARE(add(sil, 3), 0x48, 0x80, 0xc6, 0x03);
|
|
SINGLE_COMPARE(add(r11b, 3), 0x49, 0x80, 0xc3, 0x03);
|
|
|
|
// reg, [reg]
|
|
SINGLE_COMPARE(add(rax, qword[rax]), 0x48, 0x03, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbx]), 0x48, 0x03, 0x03);
|
|
SINGLE_COMPARE(add(rax, qword[rsp]), 0x48, 0x03, 0x04, 0x24);
|
|
SINGLE_COMPARE(add(rax, qword[rbp]), 0x48, 0x03, 0x45, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r10]), 0x49, 0x03, 0x02);
|
|
SINGLE_COMPARE(add(rax, qword[r12]), 0x49, 0x03, 0x04, 0x24);
|
|
SINGLE_COMPARE(add(rax, qword[r13]), 0x49, 0x03, 0x45, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax]), 0x4c, 0x03, 0x20);
|
|
SINGLE_COMPARE(add(r12, qword[rbx]), 0x4c, 0x03, 0x23);
|
|
SINGLE_COMPARE(add(r12, qword[rsp]), 0x4c, 0x03, 0x24, 0x24);
|
|
SINGLE_COMPARE(add(r12, qword[rbp]), 0x4c, 0x03, 0x65, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r10]), 0x4d, 0x03, 0x22);
|
|
SINGLE_COMPARE(add(r12, qword[r12]), 0x4d, 0x03, 0x24, 0x24);
|
|
SINGLE_COMPARE(add(r12, qword[r13]), 0x4d, 0x03, 0x65, 0x00);
|
|
|
|
// reg, [base+imm8]
|
|
SINGLE_COMPARE(add(rax, qword[rax + 0x1b]), 0x48, 0x03, 0x40, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rbx + 0x1b]), 0x48, 0x03, 0x43, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rsp + 0x1b]), 0x48, 0x03, 0x44, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rbp + 0x1b]), 0x48, 0x03, 0x45, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[r10 + 0x1b]), 0x49, 0x03, 0x42, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[r12 + 0x1b]), 0x49, 0x03, 0x44, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[r13 + 0x1b]), 0x49, 0x03, 0x45, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + 0x1b]), 0x4c, 0x03, 0x60, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[rbx + 0x1b]), 0x4c, 0x03, 0x63, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[rsp + 0x1b]), 0x4c, 0x03, 0x64, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[rbp + 0x1b]), 0x4c, 0x03, 0x65, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[r10 + 0x1b]), 0x4d, 0x03, 0x62, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[r12 + 0x1b]), 0x4d, 0x03, 0x64, 0x24, 0x1b);
|
|
SINGLE_COMPARE(add(r12, qword[r13 + 0x1b]), 0x4d, 0x03, 0x65, 0x1b);
|
|
|
|
// reg, [base+imm32]
|
|
SINGLE_COMPARE(add(rax, qword[rax + 0xabab]), 0x48, 0x03, 0x80, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbx + 0xabab]), 0x48, 0x03, 0x83, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rsp + 0xabab]), 0x48, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbp + 0xabab]), 0x48, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r10 + 0xabab]), 0x49, 0x03, 0x82, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r12 + 0xabab]), 0x49, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r13 + 0xabab]), 0x49, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + 0xabab]), 0x4c, 0x03, 0xa0, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbx + 0xabab]), 0x4c, 0x03, 0xa3, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rsp + 0xabab]), 0x4c, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbp + 0xabab]), 0x4c, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r10 + 0xabab]), 0x4d, 0x03, 0xa2, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r12 + 0xabab]), 0x4d, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r13 + 0xabab]), 0x4d, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
// reg, [index*scale]
|
|
SINGLE_COMPARE(add(rax, qword[rax * 2]), 0x48, 0x03, 0x04, 0x45, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbx * 2]), 0x48, 0x03, 0x04, 0x5d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rbp * 2]), 0x48, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r10 * 2]), 0x4a, 0x03, 0x04, 0x55, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r12 * 2]), 0x4a, 0x03, 0x04, 0x65, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[r13 * 2]), 0x4a, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax * 2]), 0x4c, 0x03, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbx * 2]), 0x4c, 0x03, 0x24, 0x5d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rbp * 2]), 0x4c, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r10 * 2]), 0x4e, 0x03, 0x24, 0x55, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r12 * 2]), 0x4e, 0x03, 0x24, 0x65, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[r13 * 2]), 0x4e, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
// reg, [base+index*scale+imm]
|
|
SINGLE_COMPARE(add(rax, qword[rax + rax * 2]), 0x48, 0x03, 0x04, 0x40);
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbx * 2 + 0x1b]), 0x48, 0x03, 0x44, 0x58, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbp * 2]), 0x48, 0x03, 0x04, 0x68);
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbp + 0xabab]), 0x48, 0x03, 0x84, 0x28, 0xAB, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r12 + 0x1b]), 0x4a, 0x03, 0x44, 0x20, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r12 * 4 + 0xabab]), 0x4a, 0x03, 0x84, 0xa0, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r13 * 2 + 0x1b]), 0x4a, 0x03, 0x44, 0x68, 0x1b);
|
|
SINGLE_COMPARE(add(rax, qword[rax + r13 + 0xabab]), 0x4a, 0x03, 0x84, 0x28, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rax + r12 * 2]), 0x4e, 0x03, 0x24, 0x60);
|
|
SINGLE_COMPARE(add(r12, qword[rax + r13 + 0xabab]), 0x4e, 0x03, 0xA4, 0x28, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(r12, qword[rax + rbp * 2 + 0x1b]), 0x4c, 0x03, 0x64, 0x68, 0x1b);
|
|
|
|
// reg, [imm32]
|
|
SINGLE_COMPARE(add(rax, qword[0]), 0x48, 0x03, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(rax, qword[0xabab]), 0x48, 0x03, 0x04, 0x25, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
// [addr], reg
|
|
SINGLE_COMPARE(add(qword[rax], rax), 0x48, 0x01, 0x00);
|
|
SINGLE_COMPARE(add(qword[rax + rax * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0x80, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rbx + rax * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x43, 0x1b);
|
|
SINGLE_COMPARE(add(qword[rbx + rbp * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x6b, 0x1b);
|
|
SINGLE_COMPARE(add(qword[rbp + rbp * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0xad, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rbp + r12 + 0x1b], rax), 0x4a, 0x01, 0x44, 0x25, 0x1b);
|
|
SINGLE_COMPARE(add(qword[r12], rax), 0x49, 0x01, 0x04, 0x24);
|
|
SINGLE_COMPARE(add(qword[r13 + rbx + 0xabab], rax), 0x49, 0x01, 0x84, 0x1d, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rax + r13 * 2 + 0x1b], rsi), 0x4a, 0x01, 0x74, 0x68, 0x1b);
|
|
SINGLE_COMPARE(add(qword[rbp + rbx * 2], rsi), 0x48, 0x01, 0x74, 0x5d, 0x00);
|
|
SINGLE_COMPARE(add(qword[rsp + r10 * 2 + 0x1b], r10), 0x4e, 0x01, 0x54, 0x54, 0x1b);
|
|
|
|
// [addr], imm
|
|
SINGLE_COMPARE(add(byte[rax], 2), 0x80, 0x00, 0x02);
|
|
SINGLE_COMPARE(add(dword[rax], 2), 0x83, 0x00, 0x02);
|
|
SINGLE_COMPARE(add(dword[rax], 0xabcd), 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(add(qword[rax], 2), 0x48, 0x83, 0x00, 0x02);
|
|
SINGLE_COMPARE(add(qword[rax], 0xabcd), 0x48, 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseUnaryInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(div(rcx), 0x48, 0xf7, 0xf1);
|
|
SINGLE_COMPARE(idiv(qword[rax]), 0x48, 0xf7, 0x38);
|
|
SINGLE_COMPARE(mul(qword[rax + rbx]), 0x48, 0xf7, 0x24, 0x18);
|
|
SINGLE_COMPARE(imul(r9), 0x49, 0xf7, 0xe9);
|
|
SINGLE_COMPARE(neg(r9), 0x49, 0xf7, 0xd9);
|
|
SINGLE_COMPARE(not_(r12), 0x49, 0xf7, 0xd4);
|
|
SINGLE_COMPARE(inc(r12), 0x49, 0xff, 0xc4);
|
|
SINGLE_COMPARE(dec(ecx), 0xff, 0xc9);
|
|
SINGLE_COMPARE(dec(byte[rdx]), 0xfe, 0x0a);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov")
|
|
{
|
|
SINGLE_COMPARE(mov(rcx, 1), 0x48, 0xb9, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov64(rcx, 0x1234567812345678ll), 0x48, 0xb9, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
|
|
SINGLE_COMPARE(mov(ecx, 2), 0xb9, 0x02, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(cl, 2), 0xb1, 0x02);
|
|
SINGLE_COMPARE(mov(sil, 2), 0x48, 0xb6, 0x02);
|
|
SINGLE_COMPARE(mov(r9b, 2), 0x49, 0xb1, 0x02);
|
|
SINGLE_COMPARE(mov(rcx, qword[rdi]), 0x48, 0x8b, 0x0f);
|
|
SINGLE_COMPARE(mov(dword[rax], 0xabcd), 0xc7, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(r13, 1), 0x49, 0xbd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov64(r13, 0x1234567812345678ll), 0x49, 0xbd, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
|
|
SINGLE_COMPARE(mov(r13d, 2), 0x41, 0xbd, 0x02, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(r13, qword[r12]), 0x4d, 0x8b, 0x2c, 0x24);
|
|
SINGLE_COMPARE(mov(dword[r13], 0xabcd), 0x41, 0xc7, 0x45, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(mov(qword[rdx], r9), 0x4c, 0x89, 0x0a);
|
|
SINGLE_COMPARE(mov(byte[rsi], 0x3), 0xc6, 0x06, 0x03);
|
|
SINGLE_COMPARE(mov(byte[rsi], al), 0x88, 0x06);
|
|
SINGLE_COMPARE(mov(byte[rsi], dil), 0x48, 0x88, 0x3e);
|
|
SINGLE_COMPARE(mov(byte[rsi], r10b), 0x4c, 0x88, 0x16);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMovExtended")
|
|
{
|
|
SINGLE_COMPARE(movsx(eax, byte[rcx]), 0x0f, 0xbe, 0x01);
|
|
SINGLE_COMPARE(movsx(r12, byte[r10]), 0x4d, 0x0f, 0xbe, 0x22);
|
|
SINGLE_COMPARE(movsx(ebx, word[r11]), 0x41, 0x0f, 0xbf, 0x1b);
|
|
SINGLE_COMPARE(movsx(rdx, word[rcx]), 0x48, 0x0f, 0xbf, 0x11);
|
|
SINGLE_COMPARE(movzx(eax, byte[rcx]), 0x0f, 0xb6, 0x01);
|
|
SINGLE_COMPARE(movzx(r12, byte[r10]), 0x4d, 0x0f, 0xb6, 0x22);
|
|
SINGLE_COMPARE(movzx(ebx, word[r11]), 0x41, 0x0f, 0xb7, 0x1b);
|
|
SINGLE_COMPARE(movzx(rdx, word[rcx]), 0x48, 0x0f, 0xb7, 0x11);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfTest")
|
|
{
|
|
SINGLE_COMPARE(test(al, 8), 0xf6, 0xc0, 0x08);
|
|
SINGLE_COMPARE(test(eax, 8), 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(test(rax, 8), 0x48, 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(test(rcx, 0xabab), 0x48, 0xf7, 0xc1, 0xab, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(test(rcx, rax), 0x48, 0x85, 0xc8);
|
|
SINGLE_COMPARE(test(rax, qword[rcx]), 0x48, 0x85, 0x01);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfShift")
|
|
{
|
|
SINGLE_COMPARE(shl(al, 1), 0xd0, 0xe0);
|
|
SINGLE_COMPARE(shl(al, cl), 0xd2, 0xe0);
|
|
SINGLE_COMPARE(shl(sil, cl), 0x48, 0xd2, 0xe6);
|
|
SINGLE_COMPARE(shl(r10b, cl), 0x49, 0xd2, 0xe2);
|
|
SINGLE_COMPARE(shr(al, 4), 0xc0, 0xe8, 0x04);
|
|
SINGLE_COMPARE(shr(eax, 1), 0xd1, 0xe8);
|
|
SINGLE_COMPARE(sal(eax, cl), 0xd3, 0xe0);
|
|
SINGLE_COMPARE(sal(eax, 4), 0xc1, 0xe0, 0x04);
|
|
SINGLE_COMPARE(sar(rax, 4), 0x48, 0xc1, 0xf8, 0x04);
|
|
SINGLE_COMPARE(sar(r11, 1), 0x49, 0xd1, 0xfb);
|
|
SINGLE_COMPARE(rol(eax, 1), 0xd1, 0xc0);
|
|
SINGLE_COMPARE(rol(eax, cl), 0xd3, 0xc0);
|
|
SINGLE_COMPARE(ror(eax, 1), 0xd1, 0xc8);
|
|
SINGLE_COMPARE(ror(eax, cl), 0xd3, 0xc8);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfLea")
|
|
{
|
|
SINGLE_COMPARE(lea(rax, addr[rdx + rcx]), 0x48, 0x8d, 0x04, 0x0a);
|
|
SINGLE_COMPARE(lea(rax, addr[rdx + rax * 4]), 0x48, 0x8d, 0x04, 0x82);
|
|
SINGLE_COMPARE(lea(rax, addr[r13 + r12 * 4 + 4]), 0x4b, 0x8d, 0x44, 0xa5, 0x04);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfSetcc")
|
|
{
|
|
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, bl), 0x0f, 0x95, 0xc3);
|
|
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, dil), 0x48, 0x0f, 0x95, 0xc7);
|
|
SINGLE_COMPARE(setcc(ConditionX64::BelowEqual, byte[rcx]), 0x0f, 0x96, 0x01);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfAbsoluteJumps")
|
|
{
|
|
SINGLE_COMPARE(jmp(rax), 0xff, 0xe0);
|
|
SINGLE_COMPARE(jmp(r14), 0x41, 0xff, 0xe6);
|
|
SINGLE_COMPARE(jmp(qword[r14 + rdx * 4]), 0x41, 0xff, 0x24, 0x96);
|
|
SINGLE_COMPARE(call(rax), 0xff, 0xd0);
|
|
SINGLE_COMPARE(call(r14), 0x41, 0xff, 0xd6);
|
|
SINGLE_COMPARE(call(qword[r14 + rdx * 4]), 0x41, 0xff, 0x14, 0x96);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfImul")
|
|
{
|
|
SINGLE_COMPARE(imul(ecx, esi), 0x0f, 0xaf, 0xce);
|
|
SINGLE_COMPARE(imul(r12, rax), 0x4c, 0x0f, 0xaf, 0xe0);
|
|
SINGLE_COMPARE(imul(r12, qword[rdx + rdi]), 0x4c, 0x0f, 0xaf, 0x24, 0x3a);
|
|
SINGLE_COMPARE(imul(ecx, edx, 8), 0x6b, 0xca, 0x08);
|
|
SINGLE_COMPARE(imul(ecx, r9d, 0xabcd), 0x41, 0x69, 0xc9, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(imul(r8d, eax, -9), 0x44, 0x6b, 0xc0, 0xf7);
|
|
SINGLE_COMPARE(imul(rcx, rdx, 17), 0x48, 0x6b, 0xca, 0x11);
|
|
SINGLE_COMPARE(imul(rcx, r12, 0xabcd), 0x49, 0x69, 0xcc, 0xcd, 0xab, 0x00, 0x00);
|
|
SINGLE_COMPARE(imul(r12, rax, -13), 0x4c, 0x6b, 0xe0, 0xf3);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "NopForms")
|
|
{
|
|
SINGLE_COMPARE(nop(), 0x90);
|
|
SINGLE_COMPARE(nop(2), 0x66, 0x90);
|
|
SINGLE_COMPARE(nop(3), 0x0f, 0x1f, 0x00);
|
|
SINGLE_COMPARE(nop(4), 0x0f, 0x1f, 0x40, 0x00);
|
|
SINGLE_COMPARE(nop(5), 0x0f, 0x1f, 0x44, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(6), 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(7), 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(8), 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(9), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
SINGLE_COMPARE(nop(15), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00); // 9+6
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentForms")
|
|
{
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
build.ret();
|
|
build.align(8, AlignmentDataX64::Nop);
|
|
},
|
|
{0xc3, 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
build.ret();
|
|
build.align(32, AlignmentDataX64::Nop);
|
|
},
|
|
{0xc3, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x1f, 0x40, 0x00}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
build.ret();
|
|
build.align(8, AlignmentDataX64::Int3);
|
|
},
|
|
{0xc3, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc}));
|
|
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
build.ret();
|
|
build.align(8, AlignmentDataX64::Ud2);
|
|
},
|
|
{0xc3, 0x0f, 0x0b, 0x0f, 0x0b, 0x0f, 0x0b, 0xcc}));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentOverflow")
|
|
{
|
|
// Test that alignment correctly resizes the code buffer
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
build.ret();
|
|
build.align(8192, AlignmentDataX64::Nop);
|
|
build.finalize();
|
|
}
|
|
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
build.ret();
|
|
build.align(8192, AlignmentDataX64::Int3);
|
|
build.finalize();
|
|
}
|
|
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
for (int i = 0; i < 8192; i++)
|
|
build.int3();
|
|
build.finalize();
|
|
}
|
|
|
|
{
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
build.ret();
|
|
build.align(8192, AlignmentDataX64::Ud2);
|
|
build.finalize();
|
|
}
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "ControlFlow")
|
|
{
|
|
// Jump back
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
Label start = build.setLabel();
|
|
build.add(rsi, 1);
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Equal, start);
|
|
},
|
|
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf3, 0xff, 0xff, 0xff}));
|
|
|
|
// Jump back, but the label is set before use
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
Label start;
|
|
build.add(rsi, 1);
|
|
build.setLabel(start);
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Equal, start);
|
|
},
|
|
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf7, 0xff, 0xff, 0xff}));
|
|
|
|
// Jump forward
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
Label skip;
|
|
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Greater, skip);
|
|
build.or_(rdi, 0x3e);
|
|
build.setLabel(skip);
|
|
},
|
|
{0x48, 0x3b, 0xf7, 0x0f, 0x8f, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xcf, 0x3e}));
|
|
|
|
// Regular jump
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
Label skip;
|
|
|
|
build.jmp(skip);
|
|
build.and_(rdi, 0x3e);
|
|
build.setLabel(skip);
|
|
},
|
|
{0xe9, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xe7, 0x3e}));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelCall")
|
|
{
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
Label fnB;
|
|
|
|
build.and_(rcx, 0x3e);
|
|
build.call(fnB);
|
|
build.ret();
|
|
|
|
build.setLabel(fnB);
|
|
build.lea(rax, addr[rcx + 0x1f]);
|
|
build.ret();
|
|
},
|
|
{0x48, 0x83, 0xe1, 0x3e, 0xe8, 0x01, 0x00, 0x00, 0x00, 0xc3, 0x48, 0x8d, 0x41, 0x1f, 0xc3}));
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXBinaryInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x29, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymm14), 0xc4, 0x41, 0x2d, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymmword[r9]), 0xc4, 0x41, 0x2d, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x28, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x58, 0x01);
|
|
SINGLE_COMPARE(vaddss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x58, 0xc6);
|
|
SINGLE_COMPARE(vaddss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x58, 0x01);
|
|
|
|
SINGLE_COMPARE(vaddps(xmm1, xmm2, xmm3), 0xc4, 0xe1, 0x68, 0x58, 0xcb);
|
|
SINGLE_COMPARE(vaddps(xmm9, xmm12, xmmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x18, 0x58, 0x4c, 0x71, 0x1c);
|
|
SINGLE_COMPARE(vaddps(ymm1, ymm2, ymm3), 0xc4, 0xe1, 0x6c, 0x58, 0xcb);
|
|
SINGLE_COMPARE(vaddps(ymm9, ymm12, ymmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x1c, 0x58, 0x4c, 0x71, 0x1c);
|
|
|
|
// Coverage for other instructions that follow the same pattern
|
|
SINGLE_COMPARE(vsubsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5c, 0xc6);
|
|
SINGLE_COMPARE(vmulsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x59, 0xc6);
|
|
SINGLE_COMPARE(vdivsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5e, 0xc6);
|
|
|
|
SINGLE_COMPARE(vorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x56, 0xc6);
|
|
SINGLE_COMPARE(vxorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x57, 0xc6);
|
|
|
|
SINGLE_COMPARE(vandpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x54, 0xc6);
|
|
SINGLE_COMPARE(vandnpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x55, 0xc6);
|
|
|
|
SINGLE_COMPARE(vmaxsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5f, 0xc6);
|
|
SINGLE_COMPARE(vminsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5d, 0xc6);
|
|
|
|
SINGLE_COMPARE(vcmpltsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x01);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXUnaryMergeInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vsqrtpd(xmm8, xmm10), 0xc4, 0x41, 0x79, 0x51, 0xc2);
|
|
SINGLE_COMPARE(vsqrtpd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtpd(ymm8, ymm10), 0xc4, 0x41, 0x7d, 0x51, 0xc2);
|
|
SINGLE_COMPARE(vsqrtpd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtps(xmm8, xmm10), 0xc4, 0x41, 0x78, 0x51, 0xc2);
|
|
SINGLE_COMPARE(vsqrtps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x51, 0xc6);
|
|
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x51, 0x01);
|
|
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x51, 0xc6);
|
|
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x51, 0x01);
|
|
|
|
// Coverage for other instructions that follow the same pattern
|
|
SINGLE_COMPARE(vucomisd(xmm1, xmm4), 0xc4, 0xe1, 0x79, 0x2e, 0xcc);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXMoveInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vmovsd(qword[r9], xmm10), 0xc4, 0x41, 0x7b, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovsd(xmm8, qword[r9]), 0xc4, 0x41, 0x7b, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x10, 0xc6);
|
|
SINGLE_COMPARE(vmovss(dword[r9], xmm10), 0xc4, 0x41, 0x7a, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovss(xmm8, dword[r9]), 0xc4, 0x41, 0x7a, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x10, 0xc6);
|
|
SINGLE_COMPARE(vmovapd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovapd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x29, 0x11);
|
|
SINGLE_COMPARE(vmovapd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovaps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovaps(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x29, 0x11);
|
|
SINGLE_COMPARE(vmovaps(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x28, 0x01);
|
|
SINGLE_COMPARE(vmovupd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovupd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovupd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovups(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovups(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x11, 0x11);
|
|
SINGLE_COMPARE(vmovups(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x10, 0x01);
|
|
SINGLE_COMPARE(vmovq(xmm1, rbx), 0xc4, 0xe1, 0xf9, 0x6e, 0xcb);
|
|
SINGLE_COMPARE(vmovq(rbx, xmm1), 0xc4, 0xe1, 0xf9, 0x7e, 0xcb);
|
|
SINGLE_COMPARE(vmovq(xmm1, qword[r9]), 0xc4, 0xc1, 0xf9, 0x6e, 0x09);
|
|
SINGLE_COMPARE(vmovq(qword[r9], xmm1), 0xc4, 0xc1, 0xf9, 0x7e, 0x09);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXConversionInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vcvttsd2si(ecx, xmm0), 0xc4, 0xe1, 0x7b, 0x2c, 0xc8);
|
|
SINGLE_COMPARE(vcvttsd2si(r9d, xmmword[rcx + rdx]), 0xc4, 0x61, 0x7b, 0x2c, 0x0c, 0x11);
|
|
SINGLE_COMPARE(vcvttsd2si(rdx, xmm0), 0xc4, 0xe1, 0xfb, 0x2c, 0xd0);
|
|
SINGLE_COMPARE(vcvttsd2si(r13, xmmword[rcx + rdx]), 0xc4, 0x61, 0xfb, 0x2c, 0x2c, 0x11);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, ecx), 0xc4, 0xe1, 0x2b, 0x2a, 0xe9);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, dword[rcx + rdx]), 0xc4, 0xe1, 0x23, 0x2a, 0x34, 0x11);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, r13), 0xc4, 0xc1, 0xab, 0x2a, 0xed);
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x2a, 0x34, 0x11);
|
|
SINGLE_COMPARE(vcvtsd2ss(xmm5, xmm10, xmm11), 0xc4, 0xc1, 0x2b, 0x5a, 0xeb);
|
|
SINGLE_COMPARE(vcvtsd2ss(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x5a, 0x34, 0x11);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXTernaryInstructionForms")
|
|
{
|
|
SINGLE_COMPARE(vroundsd(xmm7, xmm12, xmm3, RoundingModeX64::RoundToNegativeInfinity), 0xc4, 0xe3, 0x19, 0x0b, 0xfb, 0x09);
|
|
SINGLE_COMPARE(
|
|
vroundsd(xmm8, xmm13, xmmword[r13 + rdx], RoundingModeX64::RoundToPositiveInfinity), 0xc4, 0x43, 0x11, 0x0b, 0x44, 0x15, 0x00, 0x0a);
|
|
SINGLE_COMPARE(vroundsd(xmm9, xmm14, xmmword[rcx + r10], RoundingModeX64::RoundToZero), 0xc4, 0x23, 0x09, 0x0b, 0x0c, 0x11, 0x0b);
|
|
SINGLE_COMPARE(vblendvpd(xmm7, xmm12, xmmword[rcx + r10], xmm5), 0xc4, 0xa3, 0x19, 0x4b, 0x3c, 0x11, 0x50);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "MiscInstructions")
|
|
{
|
|
SINGLE_COMPARE(int3(), 0xcc);
|
|
SINGLE_COMPARE(bsr(eax, edx), 0x0f, 0xbd, 0xc2);
|
|
SINGLE_COMPARE(bsf(eax, edx), 0x0f, 0xbc, 0xc2);
|
|
}
|
|
|
|
TEST_CASE("LogTest")
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ true);
|
|
|
|
build.push(r12);
|
|
build.align(8);
|
|
build.align(8, AlignmentDataX64::Int3);
|
|
build.align(8, AlignmentDataX64::Ud2);
|
|
|
|
build.add(rax, rdi);
|
|
build.add(rcx, 8);
|
|
build.sub(dword[rax], 0x1fdc);
|
|
build.and_(dword[rcx], 0x37);
|
|
build.mov(rdi, qword[rax + rsi * 2]);
|
|
build.vaddss(xmm0, xmm0, dword[rax + r14 * 2 + 0x1c]);
|
|
|
|
Label start = build.setLabel();
|
|
build.cmp(rsi, rdi);
|
|
build.jcc(ConditionX64::Equal, start);
|
|
|
|
build.jmp(qword[rdx]);
|
|
build.vaddps(ymm9, ymm12, ymmword[rbp + 0xc]);
|
|
build.vaddpd(ymm2, ymm7, build.f64(2.5));
|
|
build.neg(qword[rbp + r12 * 2]);
|
|
build.mov64(r10, 0x1234567812345678ll);
|
|
build.vmovapd(xmmword[rax], xmm11);
|
|
build.movzx(eax, byte[rcx]);
|
|
build.movsx(rsi, word[r12]);
|
|
build.imul(rcx, rdx);
|
|
build.imul(rcx, rdx, 8);
|
|
build.vroundsd(xmm1, xmm2, xmm3, RoundingModeX64::RoundToNearestEven);
|
|
build.add(rdx, qword[rcx - 12]);
|
|
build.pop(r12);
|
|
build.ret();
|
|
build.int3();
|
|
|
|
build.nop();
|
|
build.nop(2);
|
|
build.nop(3);
|
|
build.nop(4);
|
|
build.nop(5);
|
|
build.nop(6);
|
|
build.nop(7);
|
|
build.nop(8);
|
|
build.nop(9);
|
|
|
|
build.finalize();
|
|
|
|
std::string expected = R"(
|
|
push r12
|
|
; align 8
|
|
nop word ptr[rax+rax] ; 6-byte nop
|
|
; align 8 using int3
|
|
; align 8 using ud2
|
|
add rax,rdi
|
|
add rcx,8
|
|
sub dword ptr [rax],1FDCh
|
|
and dword ptr [rcx],37h
|
|
mov rdi,qword ptr [rax+rsi*2]
|
|
vaddss xmm0,xmm0,dword ptr [rax+r14*2+01Ch]
|
|
.L1:
|
|
cmp rsi,rdi
|
|
je .L1
|
|
jmp qword ptr [rdx]
|
|
vaddps ymm9,ymm12,ymmword ptr [rbp+0Ch]
|
|
vaddpd ymm2,ymm7,qword ptr [.start-8]
|
|
neg qword ptr [rbp+r12*2]
|
|
mov r10,1234567812345678h
|
|
vmovapd xmmword ptr [rax],xmm11
|
|
movzx eax,byte ptr [rcx]
|
|
movsx rsi,word ptr [r12]
|
|
imul rcx,rdx
|
|
imul rcx,rdx,8
|
|
vroundsd xmm1,xmm2,xmm3,8
|
|
add rdx,qword ptr [rcx-0Ch]
|
|
pop r12
|
|
ret
|
|
int3
|
|
nop
|
|
xchg ax, ax ; 2-byte nop
|
|
nop dword ptr[rax] ; 3-byte nop
|
|
nop dword ptr[rax] ; 4-byte nop
|
|
nop dword ptr[rax+rax] ; 5-byte nop
|
|
nop word ptr[rax+rax] ; 6-byte nop
|
|
nop dword ptr[rax] ; 7-byte nop
|
|
nop dword ptr[rax+rax] ; 8-byte nop
|
|
nop word ptr[rax+rax] ; 9-byte nop
|
|
)";
|
|
|
|
CHECK("\n" + build.text == expected);
|
|
}
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "Constants")
|
|
{
|
|
// clang-format off
|
|
CHECK(check(
|
|
[](AssemblyBuilderX64& build) {
|
|
build.xor_(rax, rax);
|
|
build.add(rax, build.i64(0x1234567887654321));
|
|
build.vmovss(xmm2, build.f32(1.0f));
|
|
build.vmovsd(xmm3, build.f64(1.0));
|
|
build.vmovaps(xmm4, build.f32x4(1.0f, 2.0f, 4.0f, 8.0f));
|
|
char arr[16] = "hello world!123";
|
|
build.vmovupd(xmm5, build.bytes(arr, 16, 8));
|
|
build.vmovapd(xmm5, build.f64x2(5.0, 6.0));
|
|
build.ret();
|
|
},
|
|
{
|
|
0x48, 0x33, 0xc0,
|
|
0x48, 0x03, 0x05, 0xee, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x7a, 0x10, 0x15, 0xe1, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x7b, 0x10, 0x1d, 0xcc, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x78, 0x28, 0x25, 0xab, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x79, 0x10, 0x2d, 0x92, 0xff, 0xff, 0xff,
|
|
0xc4, 0xe1, 0x79, 0x28, 0x2d, 0x79, 0xff, 0xff, 0xff,
|
|
0xc3
|
|
},
|
|
{
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x40,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x40,
|
|
'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', '1', '2', '3', 0x0,
|
|
0x00, 0x00, 0x80, 0x3f,
|
|
0x00, 0x00, 0x00, 0x40,
|
|
0x00, 0x00, 0x80, 0x40,
|
|
0x00, 0x00, 0x00, 0x41,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // padding to align f32x4
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f,
|
|
0x00, 0x00, 0x00, 0x00, // padding to align f64
|
|
0x00, 0x00, 0x80, 0x3f,
|
|
0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
|
|
}));
|
|
// clang-format on
|
|
}
|
|
|
|
TEST_CASE("ConstantStorage")
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
|
build.vaddss(xmm0, xmm0, build.f32(1.0f));
|
|
|
|
build.finalize();
|
|
|
|
CHECK(build.data.size() == 12004);
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
|
{
|
|
CHECK(build.data[i * 4 + 0] == 0x00);
|
|
CHECK(build.data[i * 4 + 1] == 0x00);
|
|
CHECK(build.data[i * 4 + 2] == 0x80);
|
|
CHECK(build.data[i * 4 + 3] == 0x3f);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("ConstantCaching")
|
|
{
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
OperandX64 two = build.f64(2);
|
|
|
|
// Force data relocation
|
|
for (int i = 0; i < 4096; i++)
|
|
build.f64(i);
|
|
|
|
CHECK(build.f64(2).imm == two.imm);
|
|
|
|
build.finalize();
|
|
}
|
|
|
|
TEST_SUITE_END();
|