2022-05-26 23:08:16 +01:00
|
|
|
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
|
|
#include "Luau/AssemblyBuilderX64.h"
|
|
|
|
#include "Luau/StringUtils.h"
|
|
|
|
|
|
|
|
#include "doctest.h"
|
2024-01-27 03:20:56 +00:00
|
|
|
#include "ScopedFlags.h"
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
using namespace Luau::CodeGen;
|
2023-03-03 20:21:14 +00:00
|
|
|
using namespace Luau::CodeGen::X64;
|
2022-05-26 23:08:16 +01:00
|
|
|
|
2024-11-22 21:00:51 +00:00
|
|
|
LUAU_FASTFLAG(LuauVectorLibNativeDot);
|
|
|
|
|
2022-11-04 17:33:22 +00:00
|
|
|
static std::string bytecodeAsArray(const std::vector<uint8_t>& bytecode)
|
2022-05-26 23:08:16 +01:00
|
|
|
{
|
|
|
|
std::string result = "{";
|
|
|
|
|
|
|
|
for (size_t i = 0; i < bytecode.size(); i++)
|
|
|
|
Luau::formatAppend(result, "%s0x%02x", i == 0 ? "" : ", ", bytecode[i]);
|
|
|
|
|
|
|
|
return result.append("}");
|
|
|
|
}
|
|
|
|
|
|
|
|
class AssemblyBuilderX64Fixture
|
|
|
|
{
|
|
|
|
public:
|
2022-11-04 17:33:22 +00:00
|
|
|
bool check(void (*f)(AssemblyBuilderX64& build), std::vector<uint8_t> code, std::vector<uint8_t> data = {})
|
2022-05-26 23:08:16 +01:00
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
|
|
|
|
f(build);
|
|
|
|
|
|
|
|
build.finalize();
|
|
|
|
|
2022-08-11 22:01:33 +01:00
|
|
|
if (build.code != code)
|
2022-05-26 23:08:16 +01:00
|
|
|
{
|
2022-08-11 22:01:33 +01:00
|
|
|
printf("Expected code: %s\nReceived code: %s\n", bytecodeAsArray(code).c_str(), bytecodeAsArray(build.code).c_str());
|
2022-11-04 17:33:22 +00:00
|
|
|
return false;
|
2022-08-11 22:01:33 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (build.data != data)
|
|
|
|
{
|
|
|
|
printf("Expected data: %s\nReceived data: %s\n", bytecodeAsArray(data).c_str(), bytecodeAsArray(build.data).c_str());
|
2022-11-04 17:33:22 +00:00
|
|
|
return false;
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
2022-11-04 17:33:22 +00:00
|
|
|
|
|
|
|
return true;
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_SUITE_BEGIN("x64Assembly");
|
|
|
|
|
|
|
|
#define SINGLE_COMPARE(inst, ...) \
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check( \
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build) \
|
|
|
|
{ \
|
2022-05-26 23:08:16 +01:00
|
|
|
build.inst; \
|
|
|
|
}, \
|
2024-08-02 15:30:04 +01:00
|
|
|
{__VA_ARGS__} \
|
|
|
|
))
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseBinaryInstructionForms")
|
|
|
|
{
|
|
|
|
// reg, reg
|
|
|
|
SINGLE_COMPARE(add(rax, rcx), 0x48, 0x03, 0xc1);
|
|
|
|
SINGLE_COMPARE(add(rsp, r12), 0x49, 0x03, 0xe4);
|
|
|
|
SINGLE_COMPARE(add(r14, r10), 0x4d, 0x03, 0xf2);
|
|
|
|
|
|
|
|
// reg, imm
|
|
|
|
SINGLE_COMPARE(add(rax, 0), 0x48, 0x83, 0xc0, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, 0x7f), 0x48, 0x83, 0xc0, 0x7f);
|
|
|
|
SINGLE_COMPARE(add(rax, 0x80), 0x48, 0x81, 0xc0, 0x80, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r10, 0x7fffffff), 0x49, 0x81, 0xc2, 0xff, 0xff, 0xff, 0x7f);
|
2023-04-14 19:06:22 +01:00
|
|
|
SINGLE_COMPARE(add(al, 3), 0x80, 0xc0, 0x03);
|
|
|
|
SINGLE_COMPARE(add(sil, 3), 0x48, 0x80, 0xc6, 0x03);
|
|
|
|
SINGLE_COMPARE(add(r11b, 3), 0x49, 0x80, 0xc3, 0x03);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// reg, [reg]
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax]), 0x48, 0x03, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbx]), 0x48, 0x03, 0x03);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rsp]), 0x48, 0x03, 0x04, 0x24);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbp]), 0x48, 0x03, 0x45, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r10]), 0x49, 0x03, 0x02);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r12]), 0x49, 0x03, 0x04, 0x24);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r13]), 0x49, 0x03, 0x45, 0x00);
|
|
|
|
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax]), 0x4c, 0x03, 0x20);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbx]), 0x4c, 0x03, 0x23);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rsp]), 0x4c, 0x03, 0x24, 0x24);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbp]), 0x4c, 0x03, 0x65, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r10]), 0x4d, 0x03, 0x22);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r12]), 0x4d, 0x03, 0x24, 0x24);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r13]), 0x4d, 0x03, 0x65, 0x00);
|
|
|
|
|
|
|
|
// reg, [base+imm8]
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + 0x1b]), 0x48, 0x03, 0x40, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbx + 0x1b]), 0x48, 0x03, 0x43, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rsp + 0x1b]), 0x48, 0x03, 0x44, 0x24, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbp + 0x1b]), 0x48, 0x03, 0x45, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r10 + 0x1b]), 0x49, 0x03, 0x42, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r12 + 0x1b]), 0x49, 0x03, 0x44, 0x24, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r13 + 0x1b]), 0x49, 0x03, 0x45, 0x1b);
|
|
|
|
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + 0x1b]), 0x4c, 0x03, 0x60, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbx + 0x1b]), 0x4c, 0x03, 0x63, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rsp + 0x1b]), 0x4c, 0x03, 0x64, 0x24, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbp + 0x1b]), 0x4c, 0x03, 0x65, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r10 + 0x1b]), 0x4d, 0x03, 0x62, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r12 + 0x1b]), 0x4d, 0x03, 0x64, 0x24, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r13 + 0x1b]), 0x4d, 0x03, 0x65, 0x1b);
|
|
|
|
|
|
|
|
// reg, [base+imm32]
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + 0xabab]), 0x48, 0x03, 0x80, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbx + 0xabab]), 0x48, 0x03, 0x83, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rsp + 0xabab]), 0x48, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbp + 0xabab]), 0x48, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r10 + 0xabab]), 0x49, 0x03, 0x82, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r12 + 0xabab]), 0x49, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r13 + 0xabab]), 0x49, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + 0xabab]), 0x4c, 0x03, 0xa0, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbx + 0xabab]), 0x4c, 0x03, 0xa3, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rsp + 0xabab]), 0x4c, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbp + 0xabab]), 0x4c, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r10 + 0xabab]), 0x4d, 0x03, 0xa2, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r12 + 0xabab]), 0x4d, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r13 + 0xabab]), 0x4d, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
|
|
|
|
// reg, [index*scale]
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax * 2]), 0x48, 0x03, 0x04, 0x45, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbx * 2]), 0x48, 0x03, 0x04, 0x5d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rbp * 2]), 0x48, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r10 * 2]), 0x4a, 0x03, 0x04, 0x55, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r12 * 2]), 0x4a, 0x03, 0x04, 0x65, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[r13 * 2]), 0x4a, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax * 2]), 0x4c, 0x03, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbx * 2]), 0x4c, 0x03, 0x24, 0x5d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rbp * 2]), 0x4c, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r10 * 2]), 0x4e, 0x03, 0x24, 0x55, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r12 * 2]), 0x4e, 0x03, 0x24, 0x65, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[r13 * 2]), 0x4e, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
|
|
|
|
// reg, [base+index*scale+imm]
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + rax * 2]), 0x48, 0x03, 0x04, 0x40);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbx * 2 + 0x1b]), 0x48, 0x03, 0x44, 0x58, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbp * 2]), 0x48, 0x03, 0x04, 0x68);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + rbp + 0xabab]), 0x48, 0x03, 0x84, 0x28, 0xAB, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + r12 + 0x1b]), 0x4a, 0x03, 0x44, 0x20, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + r12 * 4 + 0xabab]), 0x4a, 0x03, 0x84, 0xa0, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + r13 * 2 + 0x1b]), 0x4a, 0x03, 0x44, 0x68, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[rax + r13 + 0xabab]), 0x4a, 0x03, 0x84, 0x28, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + r12 * 2]), 0x4e, 0x03, 0x24, 0x60);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + r13 + 0xabab]), 0x4e, 0x03, 0xA4, 0x28, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(r12, qword[rax + rbp * 2 + 0x1b]), 0x4c, 0x03, 0x64, 0x68, 0x1b);
|
|
|
|
|
|
|
|
// reg, [imm32]
|
|
|
|
SINGLE_COMPARE(add(rax, qword[0]), 0x48, 0x03, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(rax, qword[0xabab]), 0x48, 0x03, 0x04, 0x25, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
|
|
|
|
// [addr], reg
|
|
|
|
SINGLE_COMPARE(add(qword[rax], rax), 0x48, 0x01, 0x00);
|
|
|
|
SINGLE_COMPARE(add(qword[rax + rax * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0x80, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(qword[rbx + rax * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x43, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(qword[rbx + rbp * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x6b, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(qword[rbp + rbp * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0xad, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(qword[rbp + r12 + 0x1b], rax), 0x4a, 0x01, 0x44, 0x25, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(qword[r12], rax), 0x49, 0x01, 0x04, 0x24);
|
|
|
|
SINGLE_COMPARE(add(qword[r13 + rbx + 0xabab], rax), 0x49, 0x01, 0x84, 0x1d, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(qword[rax + r13 * 2 + 0x1b], rsi), 0x4a, 0x01, 0x74, 0x68, 0x1b);
|
|
|
|
SINGLE_COMPARE(add(qword[rbp + rbx * 2], rsi), 0x48, 0x01, 0x74, 0x5d, 0x00);
|
|
|
|
SINGLE_COMPARE(add(qword[rsp + r10 * 2 + 0x1b], r10), 0x4e, 0x01, 0x54, 0x54, 0x1b);
|
2022-07-21 22:16:54 +01:00
|
|
|
|
|
|
|
// [addr], imm
|
|
|
|
SINGLE_COMPARE(add(byte[rax], 2), 0x80, 0x00, 0x02);
|
|
|
|
SINGLE_COMPARE(add(dword[rax], 2), 0x83, 0x00, 0x02);
|
|
|
|
SINGLE_COMPARE(add(dword[rax], 0xabcd), 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(add(qword[rax], 2), 0x48, 0x83, 0x00, 0x02);
|
|
|
|
SINGLE_COMPARE(add(qword[rax], 0xabcd), 0x48, 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseUnaryInstructionForms")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(div(rcx), 0x48, 0xf7, 0xf1);
|
|
|
|
SINGLE_COMPARE(idiv(qword[rax]), 0x48, 0xf7, 0x38);
|
|
|
|
SINGLE_COMPARE(mul(qword[rax + rbx]), 0x48, 0xf7, 0x24, 0x18);
|
2022-08-11 22:01:33 +01:00
|
|
|
SINGLE_COMPARE(imul(r9), 0x49, 0xf7, 0xe9);
|
2022-05-26 23:08:16 +01:00
|
|
|
SINGLE_COMPARE(neg(r9), 0x49, 0xf7, 0xd9);
|
|
|
|
SINGLE_COMPARE(not_(r12), 0x49, 0xf7, 0xd4);
|
2022-10-14 20:48:41 +01:00
|
|
|
SINGLE_COMPARE(inc(r12), 0x49, 0xff, 0xc4);
|
|
|
|
SINGLE_COMPARE(dec(ecx), 0xff, 0xc9);
|
|
|
|
SINGLE_COMPARE(dec(byte[rdx]), 0xfe, 0x0a);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(mov(rcx, 1), 0x48, 0xb9, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(mov64(rcx, 0x1234567812345678ll), 0x48, 0xb9, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
|
|
|
|
SINGLE_COMPARE(mov(ecx, 2), 0xb9, 0x02, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(mov(cl, 2), 0xb1, 0x02);
|
2023-04-14 19:06:22 +01:00
|
|
|
SINGLE_COMPARE(mov(sil, 2), 0x48, 0xb6, 0x02);
|
|
|
|
SINGLE_COMPARE(mov(r9b, 2), 0x49, 0xb1, 0x02);
|
2022-05-26 23:08:16 +01:00
|
|
|
SINGLE_COMPARE(mov(rcx, qword[rdi]), 0x48, 0x8b, 0x0f);
|
|
|
|
SINGLE_COMPARE(mov(dword[rax], 0xabcd), 0xc7, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(mov(r13, 1), 0x49, 0xbd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(mov64(r13, 0x1234567812345678ll), 0x49, 0xbd, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
|
|
|
|
SINGLE_COMPARE(mov(r13d, 2), 0x41, 0xbd, 0x02, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(mov(r13, qword[r12]), 0x4d, 0x8b, 0x2c, 0x24);
|
|
|
|
SINGLE_COMPARE(mov(dword[r13], 0xabcd), 0x41, 0xc7, 0x45, 0x00, 0xcd, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(mov(qword[rdx], r9), 0x4c, 0x89, 0x0a);
|
|
|
|
SINGLE_COMPARE(mov(byte[rsi], 0x3), 0xc6, 0x06, 0x03);
|
|
|
|
SINGLE_COMPARE(mov(byte[rsi], al), 0x88, 0x06);
|
2023-04-14 19:06:22 +01:00
|
|
|
SINGLE_COMPARE(mov(byte[rsi], dil), 0x48, 0x88, 0x3e);
|
|
|
|
SINGLE_COMPARE(mov(byte[rsi], r10b), 0x4c, 0x88, 0x16);
|
2023-11-10 21:10:07 +00:00
|
|
|
SINGLE_COMPARE(mov(wordReg(ebx), 0x3a3d), 0x66, 0xbb, 0x3d, 0x3a);
|
|
|
|
SINGLE_COMPARE(mov(word[rsi], 0x3a3d), 0x66, 0xc7, 0x06, 0x3d, 0x3a);
|
|
|
|
SINGLE_COMPARE(mov(word[rsi], wordReg(eax)), 0x66, 0x89, 0x06);
|
|
|
|
SINGLE_COMPARE(mov(word[rsi], wordReg(edi)), 0x66, 0x89, 0x3e);
|
|
|
|
SINGLE_COMPARE(mov(word[rsi], wordReg(r10)), 0x66, 0x44, 0x89, 0x16);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
2022-08-11 22:01:33 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMovExtended")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(movsx(eax, byte[rcx]), 0x0f, 0xbe, 0x01);
|
|
|
|
SINGLE_COMPARE(movsx(r12, byte[r10]), 0x4d, 0x0f, 0xbe, 0x22);
|
|
|
|
SINGLE_COMPARE(movsx(ebx, word[r11]), 0x41, 0x0f, 0xbf, 0x1b);
|
|
|
|
SINGLE_COMPARE(movsx(rdx, word[rcx]), 0x48, 0x0f, 0xbf, 0x11);
|
|
|
|
SINGLE_COMPARE(movzx(eax, byte[rcx]), 0x0f, 0xb6, 0x01);
|
|
|
|
SINGLE_COMPARE(movzx(r12, byte[r10]), 0x4d, 0x0f, 0xb6, 0x22);
|
|
|
|
SINGLE_COMPARE(movzx(ebx, word[r11]), 0x41, 0x0f, 0xb7, 0x1b);
|
|
|
|
SINGLE_COMPARE(movzx(rdx, word[rcx]), 0x48, 0x0f, 0xb7, 0x11);
|
|
|
|
}
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfTest")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(test(al, 8), 0xf6, 0xc0, 0x08);
|
|
|
|
SINGLE_COMPARE(test(eax, 8), 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(test(rax, 8), 0x48, 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(test(rcx, 0xabab), 0x48, 0xf7, 0xc1, 0xab, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(test(rcx, rax), 0x48, 0x85, 0xc8);
|
|
|
|
SINGLE_COMPARE(test(rax, qword[rcx]), 0x48, 0x85, 0x01);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfShift")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(shl(al, 1), 0xd0, 0xe0);
|
|
|
|
SINGLE_COMPARE(shl(al, cl), 0xd2, 0xe0);
|
2023-04-14 19:06:22 +01:00
|
|
|
SINGLE_COMPARE(shl(sil, cl), 0x48, 0xd2, 0xe6);
|
|
|
|
SINGLE_COMPARE(shl(r10b, cl), 0x49, 0xd2, 0xe2);
|
2022-05-26 23:08:16 +01:00
|
|
|
SINGLE_COMPARE(shr(al, 4), 0xc0, 0xe8, 0x04);
|
|
|
|
SINGLE_COMPARE(shr(eax, 1), 0xd1, 0xe8);
|
|
|
|
SINGLE_COMPARE(sal(eax, cl), 0xd3, 0xe0);
|
|
|
|
SINGLE_COMPARE(sal(eax, 4), 0xc1, 0xe0, 0x04);
|
|
|
|
SINGLE_COMPARE(sar(rax, 4), 0x48, 0xc1, 0xf8, 0x04);
|
|
|
|
SINGLE_COMPARE(sar(r11, 1), 0x49, 0xd1, 0xfb);
|
2023-04-21 23:14:26 +01:00
|
|
|
SINGLE_COMPARE(rol(eax, 1), 0xd1, 0xc0);
|
|
|
|
SINGLE_COMPARE(rol(eax, cl), 0xd3, 0xc0);
|
|
|
|
SINGLE_COMPARE(ror(eax, 1), 0xd1, 0xc8);
|
|
|
|
SINGLE_COMPARE(ror(eax, cl), 0xd3, 0xc8);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfLea")
|
|
|
|
{
|
2022-11-04 17:33:22 +00:00
|
|
|
SINGLE_COMPARE(lea(rax, addr[rdx + rcx]), 0x48, 0x8d, 0x04, 0x0a);
|
|
|
|
SINGLE_COMPARE(lea(rax, addr[rdx + rax * 4]), 0x48, 0x8d, 0x04, 0x82);
|
|
|
|
SINGLE_COMPARE(lea(rax, addr[r13 + r12 * 4 + 4]), 0x4b, 0x8d, 0x44, 0xa5, 0x04);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
2023-01-13 22:10:01 +00:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfSetcc")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, bl), 0x0f, 0x95, 0xc3);
|
2023-04-14 19:06:22 +01:00
|
|
|
SINGLE_COMPARE(setcc(ConditionX64::NotEqual, dil), 0x48, 0x0f, 0x95, 0xc7);
|
2023-01-13 22:10:01 +00:00
|
|
|
SINGLE_COMPARE(setcc(ConditionX64::BelowEqual, byte[rcx]), 0x0f, 0x96, 0x01);
|
|
|
|
}
|
|
|
|
|
2023-10-06 20:02:32 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfCmov")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(cmov(ConditionX64::LessEqual, ebx, eax), 0x0f, 0x4e, 0xd8);
|
|
|
|
SINGLE_COMPARE(cmov(ConditionX64::NotZero, rbx, qword[rax]), 0x48, 0x0f, 0x45, 0x18);
|
|
|
|
SINGLE_COMPARE(cmov(ConditionX64::Zero, rbx, qword[rax + rcx]), 0x48, 0x0f, 0x44, 0x1c, 0x08);
|
|
|
|
SINGLE_COMPARE(cmov(ConditionX64::BelowEqual, r14d, r15d), 0x45, 0x0f, 0x46, 0xf7);
|
|
|
|
}
|
|
|
|
|
2022-07-08 02:22:39 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfAbsoluteJumps")
|
|
|
|
{
|
2022-10-07 01:23:29 +01:00
|
|
|
SINGLE_COMPARE(jmp(rax), 0xff, 0xe0);
|
|
|
|
SINGLE_COMPARE(jmp(r14), 0x41, 0xff, 0xe6);
|
|
|
|
SINGLE_COMPARE(jmp(qword[r14 + rdx * 4]), 0x41, 0xff, 0x24, 0x96);
|
|
|
|
SINGLE_COMPARE(call(rax), 0xff, 0xd0);
|
|
|
|
SINGLE_COMPARE(call(r14), 0x41, 0xff, 0xd6);
|
|
|
|
SINGLE_COMPARE(call(qword[r14 + rdx * 4]), 0x41, 0xff, 0x14, 0x96);
|
2022-07-08 02:22:39 +01:00
|
|
|
}
|
|
|
|
|
2022-08-11 22:01:33 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfImul")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(imul(ecx, esi), 0x0f, 0xaf, 0xce);
|
|
|
|
SINGLE_COMPARE(imul(r12, rax), 0x4c, 0x0f, 0xaf, 0xe0);
|
|
|
|
SINGLE_COMPARE(imul(r12, qword[rdx + rdi]), 0x4c, 0x0f, 0xaf, 0x24, 0x3a);
|
|
|
|
SINGLE_COMPARE(imul(ecx, edx, 8), 0x6b, 0xca, 0x08);
|
|
|
|
SINGLE_COMPARE(imul(ecx, r9d, 0xabcd), 0x41, 0x69, 0xc9, 0xcd, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(imul(r8d, eax, -9), 0x44, 0x6b, 0xc0, 0xf7);
|
|
|
|
SINGLE_COMPARE(imul(rcx, rdx, 17), 0x48, 0x6b, 0xca, 0x11);
|
|
|
|
SINGLE_COMPARE(imul(rcx, r12, 0xabcd), 0x49, 0x69, 0xcc, 0xcd, 0xab, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(imul(r12, rax, -13), 0x4c, 0x6b, 0xe0, 0xf3);
|
|
|
|
}
|
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "NopForms")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(nop(), 0x90);
|
|
|
|
SINGLE_COMPARE(nop(2), 0x66, 0x90);
|
|
|
|
SINGLE_COMPARE(nop(3), 0x0f, 0x1f, 0x00);
|
|
|
|
SINGLE_COMPARE(nop(4), 0x0f, 0x1f, 0x40, 0x00);
|
|
|
|
SINGLE_COMPARE(nop(5), 0x0f, 0x1f, 0x44, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(nop(6), 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(nop(7), 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(nop(8), 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(nop(9), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
|
|
|
|
SINGLE_COMPARE(nop(15), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00); // 9+6
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentForms")
|
|
|
|
{
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-10-21 18:54:01 +01:00
|
|
|
build.ret();
|
|
|
|
build.align(8, AlignmentDataX64::Nop);
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0xc3, 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}
|
|
|
|
));
|
2022-10-21 18:54:01 +01:00
|
|
|
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-10-21 18:54:01 +01:00
|
|
|
build.ret();
|
|
|
|
build.align(32, AlignmentDataX64::Nop);
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0xc3, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00,
|
|
|
|
0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x1f, 0x40, 0x00}
|
|
|
|
));
|
2022-10-21 18:54:01 +01:00
|
|
|
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-10-21 18:54:01 +01:00
|
|
|
build.ret();
|
|
|
|
build.align(8, AlignmentDataX64::Int3);
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0xc3, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc}
|
|
|
|
));
|
2022-10-21 18:54:01 +01:00
|
|
|
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-10-21 18:54:01 +01:00
|
|
|
build.ret();
|
|
|
|
build.align(8, AlignmentDataX64::Ud2);
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0xc3, 0x0f, 0x0b, 0x0f, 0x0b, 0x0f, 0x0b, 0xcc}
|
|
|
|
));
|
2022-10-21 18:54:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentOverflow")
|
|
|
|
{
|
|
|
|
// Test that alignment correctly resizes the code buffer
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
|
|
|
|
build.ret();
|
|
|
|
build.align(8192, AlignmentDataX64::Nop);
|
|
|
|
build.finalize();
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
|
|
|
|
build.ret();
|
|
|
|
build.align(8192, AlignmentDataX64::Int3);
|
|
|
|
build.finalize();
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
|
|
|
|
for (int i = 0; i < 8192; i++)
|
|
|
|
build.int3();
|
|
|
|
build.finalize();
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText */ false);
|
|
|
|
|
|
|
|
build.ret();
|
|
|
|
build.align(8192, AlignmentDataX64::Ud2);
|
|
|
|
build.finalize();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "ControlFlow")
|
|
|
|
{
|
|
|
|
// Jump back
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-05-26 23:08:16 +01:00
|
|
|
Label start = build.setLabel();
|
|
|
|
build.add(rsi, 1);
|
|
|
|
build.cmp(rsi, rdi);
|
2022-11-04 17:33:22 +00:00
|
|
|
build.jcc(ConditionX64::Equal, start);
|
2022-05-26 23:08:16 +01:00
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf3, 0xff, 0xff, 0xff}
|
|
|
|
));
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// Jump back, but the label is set before use
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-05-26 23:08:16 +01:00
|
|
|
Label start;
|
|
|
|
build.add(rsi, 1);
|
|
|
|
build.setLabel(start);
|
|
|
|
build.cmp(rsi, rdi);
|
2022-11-04 17:33:22 +00:00
|
|
|
build.jcc(ConditionX64::Equal, start);
|
2022-05-26 23:08:16 +01:00
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf7, 0xff, 0xff, 0xff}
|
|
|
|
));
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// Jump forward
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-05-26 23:08:16 +01:00
|
|
|
Label skip;
|
|
|
|
|
|
|
|
build.cmp(rsi, rdi);
|
2022-11-04 17:33:22 +00:00
|
|
|
build.jcc(ConditionX64::Greater, skip);
|
2022-05-26 23:08:16 +01:00
|
|
|
build.or_(rdi, 0x3e);
|
|
|
|
build.setLabel(skip);
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0x48, 0x3b, 0xf7, 0x0f, 0x8f, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xcf, 0x3e}
|
|
|
|
));
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// Regular jump
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-05-26 23:08:16 +01:00
|
|
|
Label skip;
|
|
|
|
|
|
|
|
build.jmp(skip);
|
|
|
|
build.and_(rdi, 0x3e);
|
|
|
|
build.setLabel(skip);
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0xe9, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xe7, 0x3e}
|
|
|
|
));
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
2022-07-08 02:22:39 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelCall")
|
|
|
|
{
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2022-07-08 02:22:39 +01:00
|
|
|
Label fnB;
|
|
|
|
|
|
|
|
build.and_(rcx, 0x3e);
|
|
|
|
build.call(fnB);
|
|
|
|
build.ret();
|
|
|
|
|
|
|
|
build.setLabel(fnB);
|
2022-11-04 17:33:22 +00:00
|
|
|
build.lea(rax, addr[rcx + 0x1f]);
|
2022-07-08 02:22:39 +01:00
|
|
|
build.ret();
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0x48, 0x83, 0xe1, 0x3e, 0xe8, 0x01, 0x00, 0x00, 0x00, 0xc3, 0x48, 0x8d, 0x41, 0x1f, 0xc3}
|
|
|
|
));
|
2022-07-08 02:22:39 +01:00
|
|
|
}
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXBinaryInstructionForms")
|
|
|
|
{
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x58, 0xc6);
|
|
|
|
SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x29, 0x58, 0x01);
|
|
|
|
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymm14), 0xc4, 0x41, 0x2d, 0x58, 0xc6);
|
|
|
|
SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymmword[r9]), 0xc4, 0x41, 0x2d, 0x58, 0x01);
|
|
|
|
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x58, 0xc6);
|
|
|
|
SINGLE_COMPARE(vaddps(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x28, 0x58, 0x01);
|
|
|
|
SINGLE_COMPARE(vaddsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x58, 0xc6);
|
|
|
|
SINGLE_COMPARE(vaddsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x58, 0x01);
|
|
|
|
SINGLE_COMPARE(vaddss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x58, 0xc6);
|
|
|
|
SINGLE_COMPARE(vaddss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x58, 0x01);
|
|
|
|
|
|
|
|
SINGLE_COMPARE(vaddps(xmm1, xmm2, xmm3), 0xc4, 0xe1, 0x68, 0x58, 0xcb);
|
|
|
|
SINGLE_COMPARE(vaddps(xmm9, xmm12, xmmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x18, 0x58, 0x4c, 0x71, 0x1c);
|
|
|
|
SINGLE_COMPARE(vaddps(ymm1, ymm2, ymm3), 0xc4, 0xe1, 0x6c, 0x58, 0xcb);
|
|
|
|
SINGLE_COMPARE(vaddps(ymm9, ymm12, ymmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x1c, 0x58, 0x4c, 0x71, 0x1c);
|
2022-07-21 22:16:54 +01:00
|
|
|
|
|
|
|
// Coverage for other instructions that follow the same pattern
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vsubsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5c, 0xc6);
|
|
|
|
SINGLE_COMPARE(vmulsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x59, 0xc6);
|
|
|
|
SINGLE_COMPARE(vdivsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5e, 0xc6);
|
2022-07-21 22:16:54 +01:00
|
|
|
|
2024-01-27 03:20:56 +00:00
|
|
|
SINGLE_COMPARE(vsubps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5c, 0xc6);
|
|
|
|
SINGLE_COMPARE(vmulps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x59, 0xc6);
|
|
|
|
SINGLE_COMPARE(vdivps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5e, 0xc6);
|
|
|
|
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
SINGLE_COMPARE(vorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x56, 0xc6);
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vxorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x57, 0xc6);
|
2024-01-27 03:20:56 +00:00
|
|
|
SINGLE_COMPARE(vorps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x56, 0xc6);
|
2023-01-04 20:53:17 +00:00
|
|
|
|
|
|
|
SINGLE_COMPARE(vandpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x54, 0xc6);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
SINGLE_COMPARE(vandnpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x55, 0xc6);
|
2023-01-04 20:53:17 +00:00
|
|
|
|
|
|
|
SINGLE_COMPARE(vmaxsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5f, 0xc6);
|
|
|
|
SINGLE_COMPARE(vminsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5d, 0xc6);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
|
CodeGen: Implement support for math.lerp lowering (#1609)
To implement math.lerp without branches, we add SELECT_NUM which
selects one of the two inputs based on the comparison condition.
For simplicity, we only support C == D for now; this can be extended to
a more generic version with a IrCondition operand E, but that requires
more work on the SSE side (to flip the comparison for some conditions
like Greater, and expose more generic vcmpsd).
Note: On AArch64 this will effectively result in a change in floating
point
behavior between native code and non-native code: clang synthesizes
fmadd (because floating point contraction is allowed by default, and the
arch always has the instruction), whereas this change will use
fmul+fadd.
I am not sure if this is good or bad, and if this is a problem in C or
not.
Specifically, clang's behavior results in different results between X64
and AArch64 when *not* using codegen, and with this change the behavior
when using codegen is... the same? :)
Fixing this will require either using LERP_NUM instead and hand-coding
lowering, or exposing some sort of "quasi" MADD_NUM (which would
lower to fma on AArch64 and mul+add on X64).
A small benefit to the current approach is `lerp(1, 5, t)`
constant-folds the
subtraction. With LERP_NUM this optimization will need to be implemented
manually as a partial constant-folding for LERP_NUM.
A similar problem exists today for vector.cross & vector.dot. So maybe
this
is not something we need to fix, unsure.
2025-01-16 18:48:27 +00:00
|
|
|
SINGLE_COMPARE(vcmpeqsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x00);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
SINGLE_COMPARE(vcmpltsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x01);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXUnaryMergeInstructionForms")
|
|
|
|
{
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vsqrtpd(xmm8, xmm10), 0xc4, 0x41, 0x79, 0x51, 0xc2);
|
|
|
|
SINGLE_COMPARE(vsqrtpd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x51, 0x01);
|
|
|
|
SINGLE_COMPARE(vsqrtpd(ymm8, ymm10), 0xc4, 0x41, 0x7d, 0x51, 0xc2);
|
|
|
|
SINGLE_COMPARE(vsqrtpd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x51, 0x01);
|
|
|
|
SINGLE_COMPARE(vsqrtps(xmm8, xmm10), 0xc4, 0x41, 0x78, 0x51, 0xc2);
|
|
|
|
SINGLE_COMPARE(vsqrtps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x51, 0x01);
|
|
|
|
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x51, 0xc6);
|
|
|
|
SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x51, 0x01);
|
|
|
|
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x51, 0xc6);
|
|
|
|
SINGLE_COMPARE(vsqrtss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x51, 0x01);
|
2022-07-21 22:16:54 +01:00
|
|
|
|
|
|
|
// Coverage for other instructions that follow the same pattern
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vucomisd(xmm1, xmm4), 0xc4, 0xe1, 0x79, 0x2e, 0xcc);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXMoveInstructionForms")
|
|
|
|
{
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vmovsd(qword[r9], xmm10), 0xc4, 0x41, 0x7b, 0x11, 0x11);
|
|
|
|
SINGLE_COMPARE(vmovsd(xmm8, qword[r9]), 0xc4, 0x41, 0x7b, 0x10, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x10, 0xc6);
|
|
|
|
SINGLE_COMPARE(vmovss(dword[r9], xmm10), 0xc4, 0x41, 0x7a, 0x11, 0x11);
|
|
|
|
SINGLE_COMPARE(vmovss(xmm8, dword[r9]), 0xc4, 0x41, 0x7a, 0x10, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x10, 0xc6);
|
|
|
|
SINGLE_COMPARE(vmovapd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x28, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovapd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x29, 0x11);
|
|
|
|
SINGLE_COMPARE(vmovapd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x28, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovaps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x28, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovaps(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x29, 0x11);
|
|
|
|
SINGLE_COMPARE(vmovaps(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x28, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovupd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x10, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovupd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x11, 0x11);
|
|
|
|
SINGLE_COMPARE(vmovupd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x10, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovups(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x10, 0x01);
|
|
|
|
SINGLE_COMPARE(vmovups(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x11, 0x11);
|
|
|
|
SINGLE_COMPARE(vmovups(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x10, 0x01);
|
2023-01-04 20:53:17 +00:00
|
|
|
SINGLE_COMPARE(vmovq(xmm1, rbx), 0xc4, 0xe1, 0xf9, 0x6e, 0xcb);
|
|
|
|
SINGLE_COMPARE(vmovq(rbx, xmm1), 0xc4, 0xe1, 0xf9, 0x7e, 0xcb);
|
|
|
|
SINGLE_COMPARE(vmovq(xmm1, qword[r9]), 0xc4, 0xc1, 0xf9, 0x6e, 0x09);
|
|
|
|
SINGLE_COMPARE(vmovq(qword[r9], xmm1), 0xc4, 0xc1, 0xf9, 0x7e, 0x09);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
2022-08-11 22:01:33 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXConversionInstructionForms")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(vcvttsd2si(ecx, xmm0), 0xc4, 0xe1, 0x7b, 0x2c, 0xc8);
|
|
|
|
SINGLE_COMPARE(vcvttsd2si(r9d, xmmword[rcx + rdx]), 0xc4, 0x61, 0x7b, 0x2c, 0x0c, 0x11);
|
|
|
|
SINGLE_COMPARE(vcvttsd2si(rdx, xmm0), 0xc4, 0xe1, 0xfb, 0x2c, 0xd0);
|
|
|
|
SINGLE_COMPARE(vcvttsd2si(r13, xmmword[rcx + rdx]), 0xc4, 0x61, 0xfb, 0x2c, 0x2c, 0x11);
|
|
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, ecx), 0xc4, 0xe1, 0x2b, 0x2a, 0xe9);
|
|
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, dword[rcx + rdx]), 0xc4, 0xe1, 0x23, 0x2a, 0x34, 0x11);
|
|
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, r13), 0xc4, 0xc1, 0xab, 0x2a, 0xed);
|
|
|
|
SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x2a, 0x34, 0x11);
|
2023-04-07 22:01:29 +01:00
|
|
|
SINGLE_COMPARE(vcvtsd2ss(xmm5, xmm10, xmm11), 0xc4, 0xc1, 0x2b, 0x5a, 0xeb);
|
|
|
|
SINGLE_COMPARE(vcvtsd2ss(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x5a, 0x34, 0x11);
|
2023-11-10 21:10:07 +00:00
|
|
|
SINGLE_COMPARE(vcvtss2sd(xmm3, xmm8, xmm12), 0xc4, 0xc1, 0x3a, 0x5a, 0xdc);
|
|
|
|
SINGLE_COMPARE(vcvtss2sd(xmm4, xmm9, dword[rcx + rsi]), 0xc4, 0xe1, 0x32, 0x5a, 0x24, 0x31);
|
2022-08-11 22:01:33 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXTernaryInstructionForms")
|
|
|
|
{
|
2024-11-22 21:00:51 +00:00
|
|
|
ScopedFastFlag sff{FFlag::LuauVectorLibNativeDot, true};
|
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vroundsd(xmm7, xmm12, xmm3, RoundingModeX64::RoundToNegativeInfinity), 0xc4, 0xe3, 0x19, 0x0b, 0xfb, 0x09);
|
2022-10-14 20:48:41 +01:00
|
|
|
SINGLE_COMPARE(
|
2024-08-02 15:30:04 +01:00
|
|
|
vroundsd(xmm8, xmm13, xmmword[r13 + rdx], RoundingModeX64::RoundToPositiveInfinity), 0xc4, 0x43, 0x11, 0x0b, 0x44, 0x15, 0x00, 0x0a
|
|
|
|
);
|
2022-10-21 18:54:01 +01:00
|
|
|
SINGLE_COMPARE(vroundsd(xmm9, xmm14, xmmword[rcx + r10], RoundingModeX64::RoundToZero), 0xc4, 0x23, 0x09, 0x0b, 0x0c, 0x11, 0x0b);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
SINGLE_COMPARE(vblendvpd(xmm7, xmm12, xmmword[rcx + r10], xmm5), 0xc4, 0xa3, 0x19, 0x4b, 0x3c, 0x11, 0x50);
|
2024-01-27 03:20:56 +00:00
|
|
|
|
|
|
|
SINGLE_COMPARE(vpshufps(xmm7, xmm12, xmmword[rcx + r10], 0b11010100), 0xc4, 0xa1, 0x18, 0xc6, 0x3c, 0x11, 0xd4);
|
|
|
|
SINGLE_COMPARE(vpinsrd(xmm7, xmm12, xmmword[rcx + r10], 2), 0xc4, 0xa3, 0x19, 0x22, 0x3c, 0x11, 0x02);
|
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512)
Instead of doing the dot product related math in scalar IR, we lift the
computation into a dedicated IR instruction.
On x64, we can use VDPPS which was more or less tailor made for this
purpose. This is better than manual scalar lowering that requires
reloading components from memory; it's not always a strict improvement
over the shuffle+add version (which we never had), but this can now be
adjusted in the IR lowering in an optimal fashion (maybe even based on
CPU vendor, although that'd create issues for offline compilation).
On A64, we can either use naive adds or paired adds, as there is no
dedicated vector-wide horizontal instruction until SVE. Both run at
about the same performance on M2, but paired adds require fewer
instructions and temporaries.
I've measured this using mesh-normal-vector benchmark, changing the
benchmark to just report the time of the second loop inside
`calculate_normals`, testing master vs #1504 vs this PR, also increasing
the grid size to 400 for more stable timings.
On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I
see neutral to negative results in #1504.
On M2 (base), this PR is ~28% faster vs master, while #1504 is only
about ~10% faster.
If I measure the second loop in `calculate_tangent_space` instead, I
get:
On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3%
faster
On M2 (base), this PR is ~24% faster vs master, while #1504 is only
about ~13% faster.
Note that the loops in question are not quite optimal, as they store and
reload various vectors to dictionary values due to inappropriate use of
locals. The underlying gains in individual functions are thus larger
than the numbers above; for example, changing the `calculate_normals`
loop to use a local variable to store the normalized vector (but still
saving the result to dictionary value), I get a ~24% performance
increase from this PR on Zen4 vs master instead of just 8% (#1504 is
~15% slower in this setup).
2024-11-09 00:23:09 +00:00
|
|
|
|
|
|
|
SINGLE_COMPARE(vdpps(xmm7, xmm12, xmmword[rcx + r10], 2), 0xc4, 0xa3, 0x19, 0x40, 0x3c, 0x11, 0x02);
|
2022-08-11 22:01:33 +01:00
|
|
|
}
|
|
|
|
|
2022-07-21 22:16:54 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "MiscInstructions")
|
|
|
|
{
|
|
|
|
SINGLE_COMPARE(int3(), 0xcc);
|
2023-05-25 22:36:34 +01:00
|
|
|
SINGLE_COMPARE(ud2(), 0x0f, 0x0b);
|
2023-04-21 23:14:26 +01:00
|
|
|
SINGLE_COMPARE(bsr(eax, edx), 0x0f, 0xbd, 0xc2);
|
|
|
|
SINGLE_COMPARE(bsf(eax, edx), 0x0f, 0xbc, 0xc2);
|
2023-10-21 02:10:30 +01:00
|
|
|
SINGLE_COMPARE(bswap(eax), 0x0f, 0xc8);
|
|
|
|
SINGLE_COMPARE(bswap(r12d), 0x41, 0x0f, 0xcc);
|
|
|
|
SINGLE_COMPARE(bswap(rax), 0x48, 0x0f, 0xc8);
|
|
|
|
SINGLE_COMPARE(bswap(r12), 0x49, 0x0f, 0xcc);
|
2022-07-21 22:16:54 +01:00
|
|
|
}
|
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelLea")
|
|
|
|
{
|
|
|
|
CHECK(check(
|
2024-08-02 15:30:04 +01:00
|
|
|
[](AssemblyBuilderX64& build)
|
|
|
|
{
|
2023-06-16 18:35:18 +01:00
|
|
|
Label fn;
|
|
|
|
build.lea(rax, fn);
|
|
|
|
build.ret();
|
|
|
|
|
|
|
|
build.setLabel(fn);
|
|
|
|
build.ret();
|
|
|
|
},
|
2024-08-02 15:30:04 +01:00
|
|
|
{0x48, 0x8d, 0x05, 0x01, 0x00, 0x00, 0x00, 0xc3, 0xc3}
|
|
|
|
));
|
2023-06-16 18:35:18 +01:00
|
|
|
}
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
TEST_CASE("LogTest")
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText= */ true);
|
|
|
|
|
|
|
|
build.push(r12);
|
2022-10-21 18:54:01 +01:00
|
|
|
build.align(8);
|
|
|
|
build.align(8, AlignmentDataX64::Int3);
|
|
|
|
build.align(8, AlignmentDataX64::Ud2);
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
build.add(rax, rdi);
|
|
|
|
build.add(rcx, 8);
|
|
|
|
build.sub(dword[rax], 0x1fdc);
|
|
|
|
build.and_(dword[rcx], 0x37);
|
|
|
|
build.mov(rdi, qword[rax + rsi * 2]);
|
|
|
|
build.vaddss(xmm0, xmm0, dword[rax + r14 * 2 + 0x1c]);
|
|
|
|
|
|
|
|
Label start = build.setLabel();
|
|
|
|
build.cmp(rsi, rdi);
|
2022-11-04 17:33:22 +00:00
|
|
|
build.jcc(ConditionX64::Equal, start);
|
2023-06-16 18:35:18 +01:00
|
|
|
build.lea(rcx, start);
|
2023-10-27 22:18:41 +01:00
|
|
|
build.lea(rcx, addr[rdx]);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
build.jmp(qword[rdx]);
|
|
|
|
build.vaddps(ymm9, ymm12, ymmword[rbp + 0xc]);
|
|
|
|
build.vaddpd(ymm2, ymm7, build.f64(2.5));
|
|
|
|
build.neg(qword[rbp + r12 * 2]);
|
|
|
|
build.mov64(r10, 0x1234567812345678ll);
|
|
|
|
build.vmovapd(xmmword[rax], xmm11);
|
2022-08-11 22:01:33 +01:00
|
|
|
build.movzx(eax, byte[rcx]);
|
|
|
|
build.movsx(rsi, word[r12]);
|
|
|
|
build.imul(rcx, rdx);
|
|
|
|
build.imul(rcx, rdx, 8);
|
2022-10-14 20:48:41 +01:00
|
|
|
build.vroundsd(xmm1, xmm2, xmm3, RoundingModeX64::RoundToNearestEven);
|
2022-10-21 18:54:01 +01:00
|
|
|
build.add(rdx, qword[rcx - 12]);
|
2022-05-26 23:08:16 +01:00
|
|
|
build.pop(r12);
|
2023-10-06 20:02:32 +01:00
|
|
|
build.cmov(ConditionX64::AboveEqual, rax, rbx);
|
2022-05-26 23:08:16 +01:00
|
|
|
build.ret();
|
2022-07-21 22:16:54 +01:00
|
|
|
build.int3();
|
2022-05-26 23:08:16 +01:00
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
build.nop();
|
|
|
|
build.nop(2);
|
|
|
|
build.nop(3);
|
|
|
|
build.nop(4);
|
|
|
|
build.nop(5);
|
|
|
|
build.nop(6);
|
|
|
|
build.nop(7);
|
|
|
|
build.nop(8);
|
|
|
|
build.nop(9);
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
build.finalize();
|
|
|
|
|
2022-11-04 17:33:22 +00:00
|
|
|
std::string expected = R"(
|
2022-05-26 23:08:16 +01:00
|
|
|
push r12
|
2022-10-21 18:54:01 +01:00
|
|
|
; align 8
|
|
|
|
nop word ptr[rax+rax] ; 6-byte nop
|
|
|
|
; align 8 using int3
|
|
|
|
; align 8 using ud2
|
2022-05-26 23:08:16 +01:00
|
|
|
add rax,rdi
|
|
|
|
add rcx,8
|
|
|
|
sub dword ptr [rax],1FDCh
|
|
|
|
and dword ptr [rcx],37h
|
|
|
|
mov rdi,qword ptr [rax+rsi*2]
|
|
|
|
vaddss xmm0,xmm0,dword ptr [rax+r14*2+01Ch]
|
|
|
|
.L1:
|
|
|
|
cmp rsi,rdi
|
|
|
|
je .L1
|
2023-06-16 18:35:18 +01:00
|
|
|
lea rcx,.L1
|
2023-10-27 22:18:41 +01:00
|
|
|
lea rcx,[rdx]
|
2022-05-26 23:08:16 +01:00
|
|
|
jmp qword ptr [rdx]
|
|
|
|
vaddps ymm9,ymm12,ymmword ptr [rbp+0Ch]
|
|
|
|
vaddpd ymm2,ymm7,qword ptr [.start-8]
|
|
|
|
neg qword ptr [rbp+r12*2]
|
|
|
|
mov r10,1234567812345678h
|
|
|
|
vmovapd xmmword ptr [rax],xmm11
|
2022-08-11 22:01:33 +01:00
|
|
|
movzx eax,byte ptr [rcx]
|
|
|
|
movsx rsi,word ptr [r12]
|
|
|
|
imul rcx,rdx
|
|
|
|
imul rcx,rdx,8
|
2022-10-14 20:48:41 +01:00
|
|
|
vroundsd xmm1,xmm2,xmm3,8
|
2022-10-21 18:54:01 +01:00
|
|
|
add rdx,qword ptr [rcx-0Ch]
|
2022-05-26 23:08:16 +01:00
|
|
|
pop r12
|
2023-10-06 20:02:32 +01:00
|
|
|
cmovae rax,rbx
|
2022-05-26 23:08:16 +01:00
|
|
|
ret
|
2022-07-21 22:16:54 +01:00
|
|
|
int3
|
2022-10-21 18:54:01 +01:00
|
|
|
nop
|
|
|
|
xchg ax, ax ; 2-byte nop
|
|
|
|
nop dword ptr[rax] ; 3-byte nop
|
|
|
|
nop dword ptr[rax] ; 4-byte nop
|
|
|
|
nop dword ptr[rax+rax] ; 5-byte nop
|
|
|
|
nop word ptr[rax+rax] ; 6-byte nop
|
|
|
|
nop dword ptr[rax] ; 7-byte nop
|
|
|
|
nop dword ptr[rax+rax] ; 8-byte nop
|
|
|
|
nop word ptr[rax+rax] ; 9-byte nop
|
2022-05-26 23:08:16 +01:00
|
|
|
)";
|
2022-11-04 17:33:22 +00:00
|
|
|
|
|
|
|
CHECK("\n" + build.text == expected);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "Constants")
|
|
|
|
{
|
|
|
|
// clang-format off
|
2022-11-04 17:33:22 +00:00
|
|
|
CHECK(check(
|
2022-05-26 23:08:16 +01:00
|
|
|
[](AssemblyBuilderX64& build) {
|
|
|
|
build.xor_(rax, rax);
|
|
|
|
build.add(rax, build.i64(0x1234567887654321));
|
|
|
|
build.vmovss(xmm2, build.f32(1.0f));
|
|
|
|
build.vmovsd(xmm3, build.f64(1.0));
|
|
|
|
build.vmovaps(xmm4, build.f32x4(1.0f, 2.0f, 4.0f, 8.0f));
|
2022-08-11 22:01:33 +01:00
|
|
|
char arr[16] = "hello world!123";
|
|
|
|
build.vmovupd(xmm5, build.bytes(arr, 16, 8));
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
build.vmovapd(xmm5, build.f64x2(5.0, 6.0));
|
2022-05-26 23:08:16 +01:00
|
|
|
build.ret();
|
|
|
|
},
|
|
|
|
{
|
|
|
|
0x48, 0x33, 0xc0,
|
|
|
|
0x48, 0x03, 0x05, 0xee, 0xff, 0xff, 0xff,
|
2022-10-21 18:54:01 +01:00
|
|
|
0xc4, 0xe1, 0x7a, 0x10, 0x15, 0xe1, 0xff, 0xff, 0xff,
|
|
|
|
0xc4, 0xe1, 0x7b, 0x10, 0x1d, 0xcc, 0xff, 0xff, 0xff,
|
|
|
|
0xc4, 0xe1, 0x78, 0x28, 0x25, 0xab, 0xff, 0xff, 0xff,
|
|
|
|
0xc4, 0xe1, 0x79, 0x10, 0x2d, 0x92, 0xff, 0xff, 0xff,
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
0xc4, 0xe1, 0x79, 0x28, 0x2d, 0x79, 0xff, 0xff, 0xff,
|
2022-05-26 23:08:16 +01:00
|
|
|
0xc3
|
2022-08-11 22:01:33 +01:00
|
|
|
},
|
|
|
|
{
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x40,
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x40,
|
2022-08-11 22:01:33 +01:00
|
|
|
'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', '1', '2', '3', 0x0,
|
|
|
|
0x00, 0x00, 0x80, 0x3f,
|
|
|
|
0x00, 0x00, 0x00, 0x40,
|
|
|
|
0x00, 0x00, 0x80, 0x40,
|
|
|
|
0x00, 0x00, 0x00, 0x41,
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // padding to align f32x4
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f,
|
|
|
|
0x00, 0x00, 0x00, 0x00, // padding to align f64
|
|
|
|
0x00, 0x00, 0x80, 0x3f,
|
|
|
|
0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
|
2022-11-04 17:33:22 +00:00
|
|
|
}));
|
2022-05-26 23:08:16 +01:00
|
|
|
// clang-format on
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE("ConstantStorage")
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
2024-01-27 03:20:56 +00:00
|
|
|
build.vaddss(xmm0, xmm0, build.i32(i));
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
build.finalize();
|
|
|
|
|
2023-05-12 18:50:47 +01:00
|
|
|
CHECK(build.data.size() == 12004);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
|
|
|
{
|
2024-01-27 03:20:56 +00:00
|
|
|
CHECK(build.data[i * 4 + 0] == ((3000 - i) & 0xff));
|
|
|
|
CHECK(build.data[i * 4 + 1] == ((3000 - i) >> 8));
|
|
|
|
CHECK(build.data[i * 4 + 2] == 0x00);
|
|
|
|
CHECK(build.data[i * 4 + 3] == 0x00);
|
2022-05-26 23:08:16 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-27 03:20:56 +00:00
|
|
|
TEST_CASE("ConstantStorageDedup")
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
|
|
|
|
for (int i = 0; i <= 3000; i++)
|
|
|
|
build.vaddss(xmm0, xmm0, build.f32(1.0f));
|
|
|
|
|
|
|
|
build.finalize();
|
|
|
|
|
|
|
|
CHECK(build.data.size() == 4);
|
|
|
|
|
|
|
|
CHECK(build.data[0] == 0x00);
|
|
|
|
CHECK(build.data[1] == 0x00);
|
|
|
|
CHECK(build.data[2] == 0x80);
|
|
|
|
CHECK(build.data[3] == 0x3f);
|
|
|
|
}
|
|
|
|
|
2023-05-12 18:50:47 +01:00
|
|
|
TEST_CASE("ConstantCaching")
|
|
|
|
{
|
|
|
|
AssemblyBuilderX64 build(/* logText= */ false);
|
|
|
|
|
|
|
|
OperandX64 two = build.f64(2);
|
|
|
|
|
|
|
|
// Force data relocation
|
|
|
|
for (int i = 0; i < 4096; i++)
|
|
|
|
build.f64(i);
|
|
|
|
|
|
|
|
CHECK(build.f64(2).imm == two.imm);
|
|
|
|
|
|
|
|
build.finalize();
|
|
|
|
}
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
TEST_SUITE_END();
|