diff --git a/CodeGen/include/Luau/AssemblyBuilderA64.h b/CodeGen/include/Luau/AssemblyBuilderA64.h index 78251012..0c827f64 100644 --- a/CodeGen/include/Luau/AssemblyBuilderA64.h +++ b/CodeGen/include/Luau/AssemblyBuilderA64.h @@ -230,6 +230,7 @@ private: void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op); void placeBFM(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op, int immr, int imms); void placeER(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift); + void placeVR(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint16_t op, uint8_t op2); void place(uint32_t word); diff --git a/CodeGen/src/AssemblyBuilderA64.cpp b/CodeGen/src/AssemblyBuilderA64.cpp index de0eb0cd..44bbe5e8 100644 --- a/CodeGen/src/AssemblyBuilderA64.cpp +++ b/CodeGen/src/AssemblyBuilderA64.cpp @@ -63,13 +63,22 @@ AssemblyBuilderA64::~AssemblyBuilderA64() void AssemblyBuilderA64::mov(RegisterA64 dst, RegisterA64 src) { - LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst == sp); - LUAU_ASSERT(dst.kind == src.kind || (dst.kind == KindA64::x && src == sp) || (dst == sp && src.kind == KindA64::x)); + if (dst.kind != KindA64::q) + { + LUAU_ASSERT(dst.kind == KindA64::w || dst.kind == KindA64::x || dst == sp); + LUAU_ASSERT(dst.kind == src.kind || (dst.kind == KindA64::x && src == sp) || (dst == sp && src.kind == KindA64::x)); - if (dst == sp || src == sp) - placeR1("mov", dst, src, 0b00'100010'0'000000000000); + if (dst == sp || src == sp) + placeR1("mov", dst, src, 0b00'100010'0'000000000000); + else + placeSR2("mov", dst, src, 0b01'01010); + } else - placeSR2("mov", dst, src, 0b01'01010); + { + LUAU_ASSERT(dst.kind == src.kind); + + placeR1("mov", dst, src, 0b10'01110'10'1'00000'00011'1 | (src.index << 6)); + } } void AssemblyBuilderA64::mov(RegisterA64 dst, int src) @@ -575,12 +584,18 @@ void AssemblyBuilderA64::fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src placeR3("fadd", dst, src1, src2, 0b11110'01'1, 0b0010'10); } - else + else if (dst.kind == KindA64::s) { LUAU_ASSERT(dst.kind == KindA64::s && src1.kind == KindA64::s && src2.kind == KindA64::s); placeR3("fadd", dst, src1, src2, 0b11110'00'1, 0b0010'10); } + else + { + LUAU_ASSERT(dst.kind == KindA64::q && src1.kind == KindA64::q && src2.kind == KindA64::q); + + placeVR("fadd", dst, src1, src2, 0b0'01110'0'0'1, 0b11010'1); + } } void AssemblyBuilderA64::fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) @@ -591,12 +606,18 @@ void AssemblyBuilderA64::fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src placeR3("fdiv", dst, src1, src2, 0b11110'01'1, 0b0001'10); } - else + else if (dst.kind == KindA64::s) { LUAU_ASSERT(dst.kind == KindA64::s && src1.kind == KindA64::s && src2.kind == KindA64::s); placeR3("fdiv", dst, src1, src2, 0b11110'00'1, 0b0001'10); } + else + { + LUAU_ASSERT(dst.kind == KindA64::q && src1.kind == KindA64::q && src2.kind == KindA64::q); + + placeVR("fdiv", dst, src1, src2, 0b1'01110'00'1, 0b11111'1); + } } void AssemblyBuilderA64::fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2) @@ -607,12 +628,18 @@ void AssemblyBuilderA64::fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src placeR3("fmul", dst, src1, src2, 0b11110'01'1, 0b0000'10); } - else + else if (dst.kind == KindA64::s) { LUAU_ASSERT(dst.kind == KindA64::s && src1.kind == KindA64::s && src2.kind == KindA64::s); placeR3("fmul", dst, src1, src2, 0b11110'00'1, 0b0000'10); } + else + { + LUAU_ASSERT(dst.kind == KindA64::q && src1.kind == KindA64::q && src2.kind == KindA64::q); + + placeVR("fmul", dst, src1, src2, 0b1'01110'00'1, 0b11011'1); + } } void AssemblyBuilderA64::fneg(RegisterA64 dst, RegisterA64 src) @@ -623,12 +650,18 @@ void AssemblyBuilderA64::fneg(RegisterA64 dst, RegisterA64 src) placeR1("fneg", dst, src, 0b000'11110'01'1'0000'10'10000); } - else + else if (dst.kind == KindA64::s) { LUAU_ASSERT(dst.kind == KindA64::s && src.kind == KindA64::s); placeR1("fneg", dst, src, 0b000'11110'00'1'0000'10'10000); } + else + { + LUAU_ASSERT(dst.kind == KindA64::q && src.kind == KindA64::q); + + placeR1("fneg", dst, src, 0b011'01110'1'0'10000'01111'10); + } } void AssemblyBuilderA64::fsqrt(RegisterA64 dst, RegisterA64 src) @@ -646,12 +679,18 @@ void AssemblyBuilderA64::fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src placeR3("fsub", dst, src1, src2, 0b11110'01'1, 0b0011'10); } - else + else if (dst.kind == KindA64::s) { LUAU_ASSERT(dst.kind == KindA64::s && src1.kind == KindA64::s && src2.kind == KindA64::s); placeR3("fsub", dst, src1, src2, 0b11110'00'1, 0b0011'10); } + else + { + LUAU_ASSERT(dst.kind == KindA64::q && src1.kind == KindA64::q && src2.kind == KindA64::q); + + placeVR("fsub", dst, src1, src2, 0b0'01110'10'1, 0b11010'1); + } } void AssemblyBuilderA64::ins_4s(RegisterA64 dst, RegisterA64 src, uint8_t index) @@ -1226,6 +1265,17 @@ void AssemblyBuilderA64::placeER(const char* name, RegisterA64 dst, RegisterA64 commit(); } +void AssemblyBuilderA64::placeVR(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint16_t op, uint8_t op2) +{ + if (logText) + logAppend(" %-12sv%d.4s,v%d.4s,v%d.4s\n", name, dst.index, src1.index, src2.index); + + LUAU_ASSERT(dst.kind == KindA64::q && dst.kind == src1.kind && dst.kind == src2.kind); + + place(dst.index | (src1.index << 5) | (op2 << 10) | (src2.index << 16) | (op << 21) | (1 << 30)); + commit(); +} + void AssemblyBuilderA64::place(uint32_t word) { LUAU_ASSERT(codePos < codeEnd); diff --git a/tests/AssemblyBuilderA64.test.cpp b/tests/AssemblyBuilderA64.test.cpp index 6657d889..320a7a6a 100644 --- a/tests/AssemblyBuilderA64.test.cpp +++ b/tests/AssemblyBuilderA64.test.cpp @@ -218,6 +218,7 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "Moves") { SINGLE_COMPARE(mov(x0, x1), 0xAA0103E0); SINGLE_COMPARE(mov(w0, w1), 0x2A0103E0); + SINGLE_COMPARE(mov(q0, q1), 0x4EA11C20); SINGLE_COMPARE(movz(x0, 42), 0xD2800540); SINGLE_COMPARE(movz(w0, 42), 0x52800540); @@ -501,6 +502,15 @@ TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "PrePostIndexing") SINGLE_COMPARE(str(q0, mem(x1, 1, AddressKindA64::post)), 0x3C801420); } +TEST_CASE_FIXTURE(AssemblyBuilderA64Fixture, "SIMDMath") +{ + SINGLE_COMPARE(fadd(q0, q1, q2), 0x4E22D420); + SINGLE_COMPARE(fsub(q0, q1, q2), 0x4EA2D420); + SINGLE_COMPARE(fmul(q0, q1, q2), 0x6E22DC20); + SINGLE_COMPARE(fdiv(q0, q1, q2), 0x6E22FC20); + SINGLE_COMPARE(fneg(q0, q1), 0x6EA0F820); +} + TEST_CASE("LogTest") { AssemblyBuilderA64 build(/* logText= */ true); @@ -552,6 +562,7 @@ TEST_CASE("LogTest") build.ins_4s(q31, 1, q29, 2); build.dup_4s(s29, q31, 2); build.dup_4s(q29, q30, 0); + build.fmul(q0, q1, q2); build.setLabel(l); build.ret(); @@ -594,6 +605,7 @@ TEST_CASE("LogTest") ins v31.s[1],v29.s[2] dup s29,v31.s[2] dup v29.4s,v30.s[0] + fmul v0.4s,v1.4s,v2.4s .L1: ret )";