2022-05-26 23:08:16 +01:00
|
|
|
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "Luau/Common.h"
|
2023-05-12 18:50:47 +01:00
|
|
|
#include "Luau/DenseHash.h"
|
2022-05-26 23:08:16 +01:00
|
|
|
#include "Luau/Label.h"
|
2022-11-04 17:33:22 +00:00
|
|
|
#include "Luau/ConditionX64.h"
|
2022-05-26 23:08:16 +01:00
|
|
|
#include "Luau/OperandX64.h"
|
|
|
|
#include "Luau/RegisterX64.h"
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace Luau
|
|
|
|
{
|
|
|
|
namespace CodeGen
|
|
|
|
{
|
2023-03-03 20:21:14 +00:00
|
|
|
namespace X64
|
|
|
|
{
|
2022-05-26 23:08:16 +01:00
|
|
|
|
2022-10-14 20:48:41 +01:00
|
|
|
enum class RoundingModeX64
|
|
|
|
{
|
|
|
|
RoundToNearestEven = 0b00,
|
|
|
|
RoundToNegativeInfinity = 0b01,
|
|
|
|
RoundToPositiveInfinity = 0b10,
|
|
|
|
RoundToZero = 0b11,
|
|
|
|
};
|
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
enum class AlignmentDataX64
|
|
|
|
{
|
|
|
|
Nop,
|
|
|
|
Int3,
|
|
|
|
Ud2, // int3 will be used as a fall-back if it doesn't fit
|
|
|
|
};
|
|
|
|
|
|
|
|
enum class ABIX64
|
|
|
|
{
|
|
|
|
Windows,
|
|
|
|
SystemV,
|
|
|
|
};
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
class AssemblyBuilderX64
|
|
|
|
{
|
|
|
|
public:
|
2023-03-31 19:42:49 +01:00
|
|
|
explicit AssemblyBuilderX64(bool logText, ABIX64 abi);
|
2022-05-26 23:08:16 +01:00
|
|
|
explicit AssemblyBuilderX64(bool logText);
|
|
|
|
~AssemblyBuilderX64();
|
|
|
|
|
|
|
|
// Base two operand instructions with 9 opcode selection
|
|
|
|
void add(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void sub(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void cmp(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void and_(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void or_(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void xor_(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
|
|
|
|
// Binary shift instructions with special rhs handling
|
|
|
|
void sal(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void sar(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void shl(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void shr(OperandX64 lhs, OperandX64 rhs);
|
2023-04-21 23:14:26 +01:00
|
|
|
void rol(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void ror(OperandX64 lhs, OperandX64 rhs);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// Two operand mov instruction has additional specialized encodings
|
|
|
|
void mov(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void mov64(RegisterX64 lhs, int64_t imm);
|
2022-08-11 22:01:33 +01:00
|
|
|
void movsx(RegisterX64 lhs, OperandX64 rhs);
|
|
|
|
void movzx(RegisterX64 lhs, OperandX64 rhs);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// Base one operand instruction with 2 opcode selection
|
|
|
|
void div(OperandX64 op);
|
|
|
|
void idiv(OperandX64 op);
|
|
|
|
void mul(OperandX64 op);
|
2022-08-11 22:01:33 +01:00
|
|
|
void imul(OperandX64 op);
|
2022-05-26 23:08:16 +01:00
|
|
|
void neg(OperandX64 op);
|
|
|
|
void not_(OperandX64 op);
|
2022-10-14 20:48:41 +01:00
|
|
|
void dec(OperandX64 op);
|
|
|
|
void inc(OperandX64 op);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
2022-08-11 22:01:33 +01:00
|
|
|
// Additional forms of imul
|
|
|
|
void imul(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void imul(OperandX64 dst, OperandX64 lhs, int32_t rhs);
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
void test(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
void lea(OperandX64 lhs, OperandX64 rhs);
|
2023-01-13 22:10:01 +00:00
|
|
|
void setcc(ConditionX64 cond, OperandX64 op);
|
2023-10-06 20:02:32 +01:00
|
|
|
void cmov(ConditionX64 cond, RegisterX64 lhs, OperandX64 rhs);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
void push(OperandX64 op);
|
|
|
|
void pop(OperandX64 op);
|
|
|
|
void ret();
|
|
|
|
|
|
|
|
// Control flow
|
2022-11-04 17:33:22 +00:00
|
|
|
void jcc(ConditionX64 cond, Label& label);
|
2022-05-26 23:08:16 +01:00
|
|
|
void jmp(Label& label);
|
|
|
|
void jmp(OperandX64 op);
|
|
|
|
|
2022-07-08 02:22:39 +01:00
|
|
|
void call(Label& label);
|
|
|
|
void call(OperandX64 op);
|
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
void lea(RegisterX64 lhs, Label& label);
|
|
|
|
|
2022-07-21 22:16:54 +01:00
|
|
|
void int3();
|
2023-05-25 22:36:34 +01:00
|
|
|
void ud2();
|
2022-07-21 22:16:54 +01:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
void bsr(RegisterX64 dst, OperandX64 src);
|
|
|
|
void bsf(RegisterX64 dst, OperandX64 src);
|
2023-10-21 02:10:30 +01:00
|
|
|
void bswap(RegisterX64 dst);
|
2023-04-21 23:14:26 +01:00
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
// Code alignment
|
|
|
|
void nop(uint32_t length = 1);
|
|
|
|
void align(uint32_t alignment, AlignmentDataX64 data = AlignmentDataX64::Nop);
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
// AVX
|
|
|
|
void vaddpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
void vaddps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
void vaddsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
void vaddss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
|
2022-07-21 22:16:54 +01:00
|
|
|
void vsubsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2024-01-27 03:20:56 +00:00
|
|
|
void vsubps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2022-07-21 22:16:54 +01:00
|
|
|
void vmulsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2024-01-27 03:20:56 +00:00
|
|
|
void vmulps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2022-07-21 22:16:54 +01:00
|
|
|
void vdivsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2024-01-27 03:20:56 +00:00
|
|
|
void vdivps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2022-07-21 22:16:54 +01:00
|
|
|
|
2024-01-27 03:20:56 +00:00
|
|
|
void vandps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2023-01-04 20:53:17 +00:00
|
|
|
void vandpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
void vandnpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2023-01-04 20:53:17 +00:00
|
|
|
|
2022-07-21 22:16:54 +01:00
|
|
|
void vxorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2024-01-27 03:20:56 +00:00
|
|
|
void vorps(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
void vorpd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2022-07-21 22:16:54 +01:00
|
|
|
|
2022-08-11 22:01:33 +01:00
|
|
|
void vucomisd(OperandX64 src1, OperandX64 src2);
|
|
|
|
|
|
|
|
void vcvttsd2si(OperandX64 dst, OperandX64 src);
|
|
|
|
void vcvtsi2sd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2023-04-07 22:01:29 +01:00
|
|
|
void vcvtsd2ss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2023-11-10 21:10:07 +00:00
|
|
|
void vcvtss2sd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
2022-08-11 22:01:33 +01:00
|
|
|
|
2022-10-14 20:48:41 +01:00
|
|
|
void vroundsd(OperandX64 dst, OperandX64 src1, OperandX64 src2, RoundingModeX64 roundingMode); // inexact
|
2022-07-21 22:16:54 +01:00
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
void vsqrtpd(OperandX64 dst, OperandX64 src);
|
|
|
|
void vsqrtps(OperandX64 dst, OperandX64 src);
|
|
|
|
void vsqrtsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
void vsqrtss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
|
|
|
|
void vmovsd(OperandX64 dst, OperandX64 src);
|
|
|
|
void vmovsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
void vmovss(OperandX64 dst, OperandX64 src);
|
|
|
|
void vmovss(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
void vmovapd(OperandX64 dst, OperandX64 src);
|
|
|
|
void vmovaps(OperandX64 dst, OperandX64 src);
|
|
|
|
void vmovupd(OperandX64 dst, OperandX64 src);
|
|
|
|
void vmovups(OperandX64 dst, OperandX64 src);
|
2023-01-04 20:53:17 +00:00
|
|
|
void vmovq(OperandX64 lhs, OperandX64 rhs);
|
|
|
|
|
|
|
|
void vmaxsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
void vminsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
|
CodeGen: Implement support for math.lerp lowering (#1609)
To implement math.lerp without branches, we add SELECT_NUM which
selects one of the two inputs based on the comparison condition.
For simplicity, we only support C == D for now; this can be extended to
a more generic version with a IrCondition operand E, but that requires
more work on the SSE side (to flip the comparison for some conditions
like Greater, and expose more generic vcmpsd).
Note: On AArch64 this will effectively result in a change in floating
point
behavior between native code and non-native code: clang synthesizes
fmadd (because floating point contraction is allowed by default, and the
arch always has the instruction), whereas this change will use
fmul+fadd.
I am not sure if this is good or bad, and if this is a problem in C or
not.
Specifically, clang's behavior results in different results between X64
and AArch64 when *not* using codegen, and with this change the behavior
when using codegen is... the same? :)
Fixing this will require either using LERP_NUM instead and hand-coding
lowering, or exposing some sort of "quasi" MADD_NUM (which would
lower to fma on AArch64 and mul+add on X64).
A small benefit to the current approach is `lerp(1, 5, t)`
constant-folds the
subtraction. With LERP_NUM this optimization will need to be implemented
manually as a partial constant-folding for LERP_NUM.
A similar problem exists today for vector.cross & vector.dot. So maybe
this
is not something we need to fix, unsure.
2025-01-16 18:48:27 +00:00
|
|
|
void vcmpeqsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
void vcmpltsd(OperandX64 dst, OperandX64 src1, OperandX64 src2);
|
|
|
|
|
|
|
|
void vblendvpd(RegisterX64 dst, RegisterX64 src1, OperandX64 mask, RegisterX64 src3);
|
|
|
|
|
2024-01-27 03:20:56 +00:00
|
|
|
void vpshufps(RegisterX64 dst, RegisterX64 src1, OperandX64 src2, uint8_t shuffle);
|
|
|
|
void vpinsrd(RegisterX64 dst, RegisterX64 src1, OperandX64 src2, uint8_t offset);
|
|
|
|
|
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512)
Instead of doing the dot product related math in scalar IR, we lift the
computation into a dedicated IR instruction.
On x64, we can use VDPPS which was more or less tailor made for this
purpose. This is better than manual scalar lowering that requires
reloading components from memory; it's not always a strict improvement
over the shuffle+add version (which we never had), but this can now be
adjusted in the IR lowering in an optimal fashion (maybe even based on
CPU vendor, although that'd create issues for offline compilation).
On A64, we can either use naive adds or paired adds, as there is no
dedicated vector-wide horizontal instruction until SVE. Both run at
about the same performance on M2, but paired adds require fewer
instructions and temporaries.
I've measured this using mesh-normal-vector benchmark, changing the
benchmark to just report the time of the second loop inside
`calculate_normals`, testing master vs #1504 vs this PR, also increasing
the grid size to 400 for more stable timings.
On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I
see neutral to negative results in #1504.
On M2 (base), this PR is ~28% faster vs master, while #1504 is only
about ~10% faster.
If I measure the second loop in `calculate_tangent_space` instead, I
get:
On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3%
faster
On M2 (base), this PR is ~24% faster vs master, while #1504 is only
about ~13% faster.
Note that the loops in question are not quite optimal, as they store and
reload various vectors to dictionary values due to inappropriate use of
locals. The underlying gains in individual functions are thus larger
than the numbers above; for example, changing the `calculate_normals`
loop to use a local variable to store the normalized vector (but still
saving the result to dictionary value), I get a ~24% performance
increase from this PR on Zen4 vs master instead of just 8% (#1504 is
~15% slower in this setup).
2024-11-09 00:23:09 +00:00
|
|
|
void vdpps(OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t mask);
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
// Run final checks
|
2023-04-28 20:55:13 +01:00
|
|
|
bool finalize();
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// Places a label at current location and returns it
|
|
|
|
Label setLabel();
|
|
|
|
|
|
|
|
// Assigns label position to the current location
|
|
|
|
void setLabel(Label& label);
|
|
|
|
|
2023-03-17 19:20:37 +00:00
|
|
|
// Extracts code offset (in bytes) from label
|
|
|
|
uint32_t getLabelOffset(const Label& label)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(label.location != ~0u);
|
2023-03-17 19:20:37 +00:00
|
|
|
return label.location;
|
|
|
|
}
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
// Constant allocation (uses rip-relative addressing)
|
2024-01-27 03:20:56 +00:00
|
|
|
OperandX64 i32(int32_t value);
|
2022-05-26 23:08:16 +01:00
|
|
|
OperandX64 i64(int64_t value);
|
|
|
|
OperandX64 f32(float value);
|
|
|
|
OperandX64 f64(double value);
|
2024-01-27 03:20:56 +00:00
|
|
|
OperandX64 u32x4(uint32_t x, uint32_t y, uint32_t z, uint32_t w);
|
2022-05-26 23:08:16 +01:00
|
|
|
OperandX64 f32x4(float x, float y, float z, float w);
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
OperandX64 f64x2(double x, double y);
|
2022-08-11 22:01:33 +01:00
|
|
|
OperandX64 bytes(const void* ptr, size_t size, size_t align = 8);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
2022-10-14 20:48:41 +01:00
|
|
|
void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
|
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
uint32_t getCodeSize() const;
|
|
|
|
|
2024-01-12 22:25:27 +00:00
|
|
|
unsigned getInstructionCount() const;
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
// Resulting data and code that need to be copied over one after the other
|
|
|
|
// The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
|
|
|
|
std::vector<uint8_t> data;
|
|
|
|
std::vector<uint8_t> code;
|
|
|
|
|
|
|
|
std::string text;
|
|
|
|
|
2022-10-14 20:48:41 +01:00
|
|
|
const bool logText = false;
|
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
const ABIX64 abi;
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
private:
|
|
|
|
// Instruction archetypes
|
2024-08-02 15:30:04 +01:00
|
|
|
void placeBinary(
|
|
|
|
const char* name,
|
|
|
|
OperandX64 lhs,
|
|
|
|
OperandX64 rhs,
|
|
|
|
uint8_t codeimm8,
|
|
|
|
uint8_t codeimm,
|
|
|
|
uint8_t codeimmImm8,
|
|
|
|
uint8_t code8rev,
|
|
|
|
uint8_t coderev,
|
|
|
|
uint8_t code8,
|
|
|
|
uint8_t code,
|
|
|
|
uint8_t opreg
|
|
|
|
);
|
2022-05-26 23:08:16 +01:00
|
|
|
void placeBinaryRegMemAndImm(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code, uint8_t codeImm8, uint8_t opreg);
|
|
|
|
void placeBinaryRegAndRegMem(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
|
|
|
|
void placeBinaryRegMemAndReg(OperandX64 lhs, OperandX64 rhs, uint8_t code8, uint8_t code);
|
|
|
|
|
|
|
|
void placeUnaryModRegMem(const char* name, OperandX64 op, uint8_t code8, uint8_t code, uint8_t opreg);
|
|
|
|
|
|
|
|
void placeShift(const char* name, OperandX64 lhs, OperandX64 rhs, uint8_t opreg);
|
|
|
|
|
|
|
|
void placeJcc(const char* name, Label& label, uint8_t cc);
|
|
|
|
|
|
|
|
void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
|
|
|
|
void placeAvx(const char* name, OperandX64 dst, OperandX64 src, uint8_t code, uint8_t coderev, bool setW, uint8_t mode, uint8_t prefix);
|
|
|
|
void placeAvx(const char* name, OperandX64 dst, OperandX64 src1, OperandX64 src2, uint8_t code, bool setW, uint8_t mode, uint8_t prefix);
|
2022-08-11 22:01:33 +01:00
|
|
|
void placeAvx(
|
2024-08-02 15:30:04 +01:00
|
|
|
const char* name,
|
|
|
|
OperandX64 dst,
|
|
|
|
OperandX64 src1,
|
|
|
|
OperandX64 src2,
|
|
|
|
uint8_t imm8,
|
|
|
|
uint8_t code,
|
|
|
|
bool setW,
|
|
|
|
uint8_t mode,
|
|
|
|
uint8_t prefix
|
|
|
|
);
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
// Instruction components
|
2023-01-13 22:10:01 +00:00
|
|
|
void placeRegAndModRegMem(OperandX64 lhs, OperandX64 rhs, int32_t extraCodeBytes = 0);
|
|
|
|
void placeModRegMem(OperandX64 rhs, uint8_t regop, int32_t extraCodeBytes = 0);
|
2022-05-26 23:08:16 +01:00
|
|
|
void placeRex(RegisterX64 op);
|
|
|
|
void placeRex(OperandX64 op);
|
2022-10-07 01:23:29 +01:00
|
|
|
void placeRexNoW(OperandX64 op);
|
2022-05-26 23:08:16 +01:00
|
|
|
void placeRex(RegisterX64 lhs, OperandX64 rhs);
|
|
|
|
void placeVex(OperandX64 dst, OperandX64 src1, OperandX64 src2, bool setW, uint8_t mode, uint8_t prefix);
|
|
|
|
void placeImm8Or32(int32_t imm);
|
|
|
|
void placeImm8(int32_t imm);
|
2023-11-10 21:10:07 +00:00
|
|
|
void placeImm16(int16_t imm);
|
2022-05-26 23:08:16 +01:00
|
|
|
void placeImm32(int32_t imm);
|
|
|
|
void placeImm64(int64_t imm);
|
|
|
|
void placeLabel(Label& label);
|
|
|
|
void place(uint8_t byte);
|
|
|
|
|
|
|
|
void commit();
|
|
|
|
LUAU_NOINLINE void extend();
|
|
|
|
|
|
|
|
// Data
|
|
|
|
size_t allocateData(size_t size, size_t align);
|
|
|
|
|
|
|
|
// Logging of assembly in text form (Intel asm with VS disassembly formatting)
|
|
|
|
LUAU_NOINLINE void log(const char* opcode);
|
|
|
|
LUAU_NOINLINE void log(const char* opcode, OperandX64 op);
|
|
|
|
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2);
|
|
|
|
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3);
|
2022-08-11 22:01:33 +01:00
|
|
|
LUAU_NOINLINE void log(const char* opcode, OperandX64 op1, OperandX64 op2, OperandX64 op3, OperandX64 op4);
|
2022-05-26 23:08:16 +01:00
|
|
|
LUAU_NOINLINE void log(Label label);
|
|
|
|
LUAU_NOINLINE void log(const char* opcode, Label label);
|
2023-06-16 18:35:18 +01:00
|
|
|
LUAU_NOINLINE void log(const char* opcode, RegisterX64 reg, Label label);
|
2022-05-26 23:08:16 +01:00
|
|
|
void log(OperandX64 op);
|
|
|
|
|
2022-10-21 18:54:01 +01:00
|
|
|
const char* getSizeName(SizeX64 size) const;
|
|
|
|
const char* getRegisterName(RegisterX64 reg) const;
|
2022-05-26 23:08:16 +01:00
|
|
|
|
|
|
|
uint32_t nextLabel = 1;
|
|
|
|
std::vector<Label> pendingLabels;
|
|
|
|
std::vector<uint32_t> labelLocations;
|
|
|
|
|
2024-01-27 03:20:56 +00:00
|
|
|
DenseHashMap<uint32_t, int32_t> constCache32;
|
2023-05-12 18:50:47 +01:00
|
|
|
DenseHashMap<uint64_t, int32_t> constCache64;
|
|
|
|
|
2022-05-26 23:08:16 +01:00
|
|
|
bool finalized = false;
|
|
|
|
|
|
|
|
size_t dataPos = 0;
|
|
|
|
|
|
|
|
uint8_t* codePos = nullptr;
|
|
|
|
uint8_t* codeEnd = nullptr;
|
2024-01-12 22:25:27 +00:00
|
|
|
|
|
|
|
unsigned instructionCount = 0;
|
2022-05-26 23:08:16 +01:00
|
|
|
};
|
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
} // namespace X64
|
2022-05-26 23:08:16 +01:00
|
|
|
} // namespace CodeGen
|
|
|
|
} // namespace Luau
|