mirror of
https://github.com/luau-lang/luau.git
synced 2025-01-10 21:39:09 +00:00
e6bf71871a
Instead of doing the dot product related math in scalar IR, we lift the computation into a dedicated IR instruction. On x64, we can use VDPPS which was more or less tailor made for this purpose. This is better than manual scalar lowering that requires reloading components from memory; it's not always a strict improvement over the shuffle+add version (which we never had), but this can now be adjusted in the IR lowering in an optimal fashion (maybe even based on CPU vendor, although that'd create issues for offline compilation). On A64, we can either use naive adds or paired adds, as there is no dedicated vector-wide horizontal instruction until SVE. Both run at about the same performance on M2, but paired adds require fewer instructions and temporaries. I've measured this using mesh-normal-vector benchmark, changing the benchmark to just report the time of the second loop inside `calculate_normals`, testing master vs #1504 vs this PR, also increasing the grid size to 400 for more stable timings. On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I see neutral to negative results in #1504. On M2 (base), this PR is ~28% faster vs master, while #1504 is only about ~10% faster. If I measure the second loop in `calculate_tangent_space` instead, I get: On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3% faster On M2 (base), this PR is ~24% faster vs master, while #1504 is only about ~13% faster. Note that the loops in question are not quite optimal, as they store and reload various vectors to dictionary values due to inappropriate use of locals. The underlying gains in individual functions are thus larger than the numbers above; for example, changing the `calculate_normals` loop to use a local variable to store the normalized vector (but still saving the result to dictionary value), I get a ~24% performance increase from this PR on Zen4 vs master instead of just 8% (#1504 is ~15% slower in this setup).
1246 lines
31 KiB
C++
1246 lines
31 KiB
C++
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
#pragma once
|
|
|
|
#include "Luau/Bytecode.h"
|
|
#include "Luau/IrAnalysis.h"
|
|
#include "Luau/Label.h"
|
|
#include "Luau/RegisterX64.h"
|
|
#include "Luau/RegisterA64.h"
|
|
|
|
#include <optional>
|
|
#include <vector>
|
|
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
struct Proto;
|
|
|
|
namespace Luau
|
|
{
|
|
namespace CodeGen
|
|
{
|
|
|
|
// IR extensions to LuauBuiltinFunction enum (these only exist inside IR, and start from 256 to avoid collisions)
|
|
enum
|
|
{
|
|
LBF_IR_MATH_LOG2 = 256,
|
|
};
|
|
|
|
// IR instruction command.
|
|
// In the command description, following abbreviations are used:
|
|
// * Rn - VM stack register slot, n in 0..254
|
|
// * Kn - VM proto constant slot, n in 0..2^23-1
|
|
// * UPn - VM function upvalue slot, n in 0..199
|
|
// * A, B, C, D, E, F, G are instruction arguments
|
|
enum class IrCmd : uint8_t
|
|
{
|
|
NOP,
|
|
|
|
// Load a tag from TValue
|
|
// A: Rn or Kn
|
|
LOAD_TAG,
|
|
|
|
// Load a pointer (*) from TValue
|
|
// A: Rn or Kn
|
|
LOAD_POINTER,
|
|
|
|
// Load a double number from TValue
|
|
// A: Rn or Kn
|
|
LOAD_DOUBLE,
|
|
|
|
// Load an int from TValue
|
|
// A: Rn
|
|
LOAD_INT,
|
|
|
|
// Load a float field from vector as a double number
|
|
// A: Rn or Kn
|
|
// B: int (offset from the start of TValue)
|
|
LOAD_FLOAT,
|
|
|
|
// Load a TValue from memory
|
|
// A: Rn or Kn or pointer (TValue)
|
|
// B: int/none (optional 'A' pointer offset)
|
|
// C: tag/none (tag of the value being loaded)
|
|
LOAD_TVALUE,
|
|
|
|
// Load current environment table
|
|
LOAD_ENV,
|
|
|
|
// Get pointer (TValue) to table array at index
|
|
// A: pointer (Table)
|
|
// B: int
|
|
GET_ARR_ADDR,
|
|
|
|
// Get pointer (LuaNode) to table node element at the active cached slot index
|
|
// A: pointer (Table)
|
|
// B: unsigned int (pcpos)
|
|
// C: Kn
|
|
GET_SLOT_NODE_ADDR,
|
|
|
|
// Get pointer (LuaNode) to table node element at the main position of the specified key hash
|
|
// A: pointer (Table)
|
|
// B: unsigned int (hash)
|
|
GET_HASH_NODE_ADDR,
|
|
|
|
// Get pointer (TValue) to Closure upvalue.
|
|
// A: pointer or undef (Closure)
|
|
// B: UPn
|
|
// When undef is specified, uses current function Closure.
|
|
GET_CLOSURE_UPVAL_ADDR,
|
|
|
|
// Store a tag into TValue
|
|
// A: Rn
|
|
// B: tag
|
|
STORE_TAG,
|
|
|
|
// Store an integer into the extra field of the TValue
|
|
// A: Rn
|
|
// B: int
|
|
STORE_EXTRA,
|
|
|
|
// Store a pointer (*) into TValue
|
|
// A: Rn
|
|
// B: pointer
|
|
STORE_POINTER,
|
|
|
|
// Store a double number into TValue
|
|
// A: Rn
|
|
// B: double
|
|
STORE_DOUBLE,
|
|
|
|
// Store an int into TValue
|
|
// A: Rn
|
|
// B: int
|
|
STORE_INT,
|
|
|
|
// Store a vector into TValue
|
|
// A: Rn
|
|
// B: double (x)
|
|
// C: double (y)
|
|
// D: double (z)
|
|
STORE_VECTOR,
|
|
|
|
// Store a TValue into memory
|
|
// A: Rn or pointer (TValue)
|
|
// B: TValue
|
|
// C: int (optional 'A' pointer offset)
|
|
STORE_TVALUE,
|
|
|
|
// Store a pair of tag and value into memory
|
|
// A: Rn or pointer (TValue)
|
|
// B: tag (must be a constant)
|
|
// C: int/double/pointer
|
|
// D: int (optional 'A' pointer offset)
|
|
STORE_SPLIT_TVALUE,
|
|
|
|
// Add/Sub two integers together
|
|
// A, B: int
|
|
ADD_INT,
|
|
SUB_INT,
|
|
|
|
// Add/Sub/Mul/Div/Idiv/Mod two double numbers
|
|
// A, B: double
|
|
// In final x64 lowering, B can also be Rn or Kn
|
|
ADD_NUM,
|
|
SUB_NUM,
|
|
MUL_NUM,
|
|
DIV_NUM,
|
|
IDIV_NUM,
|
|
MOD_NUM,
|
|
|
|
// Get the minimum/maximum of two numbers
|
|
// If one of the values is NaN, 'B' is returned as the result
|
|
// A, B: double
|
|
// In final x64 lowering, B can also be Rn or Kn
|
|
MIN_NUM,
|
|
MAX_NUM,
|
|
|
|
// Negate a double number
|
|
// A: double
|
|
UNM_NUM,
|
|
|
|
// Round number to negative infinity (math.floor)
|
|
// A: double
|
|
FLOOR_NUM,
|
|
|
|
// Round number to positive infinity (math.ceil)
|
|
// A: double
|
|
CEIL_NUM,
|
|
|
|
// Round number to nearest integer number, rounding half-way cases away from zero (math.round)
|
|
// A: double
|
|
ROUND_NUM,
|
|
|
|
// Get square root of the argument (math.sqrt)
|
|
// A: double
|
|
SQRT_NUM,
|
|
|
|
// Get absolute value of the argument (math.abs)
|
|
// A: double
|
|
ABS_NUM,
|
|
|
|
// Get the sign of the argument (math.sign)
|
|
// A: double
|
|
SIGN_NUM,
|
|
|
|
// Add/Sub/Mul/Div/Idiv two vectors
|
|
// A, B: TValue
|
|
ADD_VEC,
|
|
SUB_VEC,
|
|
MUL_VEC,
|
|
DIV_VEC,
|
|
|
|
// Negate a vector
|
|
// A: TValue
|
|
UNM_VEC,
|
|
|
|
// Compute dot product between two vectors
|
|
// A, B: TValue
|
|
DOT_VEC,
|
|
|
|
// Compute Luau 'not' operation on destructured TValue
|
|
// A: tag
|
|
// B: int (value)
|
|
NOT_ANY,
|
|
|
|
// Perform a TValue comparison, supported conditions are LessEqual, Less and Equal
|
|
// A, B: Rn
|
|
// C: condition
|
|
CMP_ANY,
|
|
|
|
// Unconditional jump
|
|
// A: block/vmexit/undef
|
|
JUMP,
|
|
|
|
// Jump if TValue is truthy
|
|
// A: Rn
|
|
// B: block (if true)
|
|
// C: block (if false)
|
|
JUMP_IF_TRUTHY,
|
|
|
|
// Jump if TValue is falsy
|
|
// A: Rn
|
|
// B: block (if true)
|
|
// C: block (if false)
|
|
JUMP_IF_FALSY,
|
|
|
|
// Jump if tags are equal
|
|
// A, B: tag
|
|
// C: block (if true)
|
|
// D: block (if false)
|
|
JUMP_EQ_TAG,
|
|
|
|
// Perform a conditional jump based on the result of integer comparison
|
|
// A, B: int
|
|
// C: condition
|
|
// D: block (if true)
|
|
// E: block (if false)
|
|
JUMP_CMP_INT,
|
|
|
|
// Jump if pointers are equal
|
|
// A, B: pointer (*)
|
|
// C: block (if true)
|
|
// D: block (if false)
|
|
JUMP_EQ_POINTER,
|
|
|
|
// Perform a conditional jump based on the result of double comparison
|
|
// A, B: double
|
|
// C: condition
|
|
// D: block (if true)
|
|
// E: block (if false)
|
|
JUMP_CMP_NUM,
|
|
|
|
// Perform jump based on a numerical loop condition (step > 0 ? idx <= limit : limit <= idx)
|
|
// A: double (index)
|
|
// B: double (limit)
|
|
// C: double (step)
|
|
// D: block (if true)
|
|
// E: block (if false)
|
|
JUMP_FORN_LOOP_COND,
|
|
|
|
// Perform a conditional jump based on cached table node slot matching the actual table node slot for a key
|
|
// A: pointer (LuaNode)
|
|
// B: Kn
|
|
// C: block (if matches)
|
|
// D: block (if it doesn't)
|
|
JUMP_SLOT_MATCH,
|
|
|
|
// Get table length
|
|
// A: pointer (Table)
|
|
TABLE_LEN,
|
|
|
|
// Get string length
|
|
// A: pointer (string)
|
|
STRING_LEN,
|
|
|
|
// Allocate new table
|
|
// A: unsigned int (array element count)
|
|
// B: unsigned int (node element count)
|
|
NEW_TABLE,
|
|
|
|
// Duplicate a table
|
|
// A: pointer (Table)
|
|
DUP_TABLE,
|
|
|
|
// Insert an integer key into a table and return the pointer to inserted value (TValue)
|
|
// A: pointer (Table)
|
|
// B: int (key)
|
|
TABLE_SETNUM,
|
|
|
|
// Try to convert a double number into a table index (int) or jump if it's not an integer
|
|
// A: double
|
|
// B: block
|
|
TRY_NUM_TO_INDEX,
|
|
|
|
// Try to get pointer to tag method TValue inside the table's metatable or jump if there is no such value or metatable
|
|
// A: table
|
|
// B: int (TMS enum)
|
|
// C: block
|
|
TRY_CALL_FASTGETTM,
|
|
|
|
// Create new tagged userdata
|
|
// A: int (size)
|
|
// B: int (tag)
|
|
NEW_USERDATA,
|
|
|
|
// Convert integer into a double number
|
|
// A: int
|
|
INT_TO_NUM,
|
|
UINT_TO_NUM,
|
|
|
|
// Converts a double number to an integer. 'A' may be any representable integer in a double.
|
|
// A: double
|
|
NUM_TO_INT,
|
|
|
|
// Converts a double number to an unsigned integer. For out-of-range values of 'A', the result is arch-specific.
|
|
// A: double
|
|
NUM_TO_UINT,
|
|
|
|
// Converts a double number to a vector with the value in X/Y/Z
|
|
// A: double
|
|
NUM_TO_VEC,
|
|
|
|
// Adds VECTOR type tag to a vector, preserving X/Y/Z components
|
|
// A: TValue
|
|
TAG_VECTOR,
|
|
|
|
// Adjust stack top (L->top) to point at 'B' TValues *after* the specified register
|
|
// This is used to return multiple values
|
|
// A: Rn
|
|
// B: int (offset)
|
|
ADJUST_STACK_TO_REG,
|
|
|
|
// Restore stack top (L->top) to point to the function stack top (L->ci->top)
|
|
// This is used to recover after calling a variadic function
|
|
ADJUST_STACK_TO_TOP,
|
|
|
|
// Execute fastcall builtin function with 1 argument in-place
|
|
// This is used for a few builtins that can have more than 1 result and cannot be represented as a regular instruction
|
|
// A: unsigned int (builtin id)
|
|
// B: Rn (result start)
|
|
// C: Rn (first argument)
|
|
// D: int (result count)
|
|
FASTCALL,
|
|
|
|
// Call the fastcall builtin function
|
|
// A: unsigned int (builtin id)
|
|
// B: Rn (result start)
|
|
// C: Rn (argument start)
|
|
// D: Rn or Kn or undef (optional second argument)
|
|
// E: Rn or Kn or undef (optional third argument)
|
|
// F: int (argument count or -1 to use all arguments up to stack top)
|
|
// G: int (result count or -1 to preserve all results and adjust stack top)
|
|
INVOKE_FASTCALL,
|
|
|
|
// Check that fastcall builtin function invocation was successful (negative result count jumps to fallback)
|
|
// A: int (result count)
|
|
// B: block (fallback)
|
|
CHECK_FASTCALL_RES,
|
|
|
|
// Fallback functions
|
|
|
|
// Perform an arithmetic operation on TValues of any type
|
|
// A: Rn (where to store the result)
|
|
// B: Rn (lhs)
|
|
// C: Rn or Kn (rhs)
|
|
// D: int (TMS enum with arithmetic type)
|
|
DO_ARITH,
|
|
|
|
// Get length of a TValue of any type
|
|
// A: Rn (where to store the result)
|
|
// B: Rn
|
|
DO_LEN,
|
|
|
|
// Lookup a value in TValue of any type using a key of any type
|
|
// A: Rn (where to store the result)
|
|
// B: Rn
|
|
// C: Rn or unsigned int (key)
|
|
GET_TABLE,
|
|
|
|
// Store a value into TValue of any type using a key of any type
|
|
// A: Rn (value to store)
|
|
// B: Rn
|
|
// C: Rn or unsigned int (key)
|
|
SET_TABLE,
|
|
|
|
// Lookup a value in the environment
|
|
// A: Rn (where to store the result)
|
|
// B: unsigned int (import path)
|
|
GET_IMPORT,
|
|
|
|
// Concatenate multiple TValues into a string
|
|
// A: Rn (value start)
|
|
// B: unsigned int (number of registers to go over)
|
|
// Note: result is stored in the register specified in 'A'
|
|
// Note: all referenced registers might be modified in the operation
|
|
CONCAT,
|
|
|
|
// Load function upvalue into stack slot
|
|
// A: Rn
|
|
// B: UPn
|
|
GET_UPVALUE,
|
|
|
|
// Store TValue from stack slot into a function upvalue
|
|
// A: UPn
|
|
// B: Rn
|
|
// C: tag/undef (tag of the value that was written)
|
|
SET_UPVALUE,
|
|
|
|
// Guards and checks (these instructions are not block terminators even though they jump to fallback)
|
|
|
|
// Guard against tag mismatch
|
|
// A, B: tag
|
|
// C: block/vmexit/undef
|
|
// In final x64 lowering, A can also be Rn
|
|
// When DebugLuauAbortingChecks flag is enabled, A can also be Rn
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_TAG,
|
|
|
|
// Guard against a falsy tag+value
|
|
// A: tag
|
|
// B: value
|
|
// C: block/vmexit/undef
|
|
CHECK_TRUTHY,
|
|
|
|
// Guard against readonly table
|
|
// A: pointer (Table)
|
|
// B: block/vmexit/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_READONLY,
|
|
|
|
// Guard against table having a metatable
|
|
// A: pointer (Table)
|
|
// B: block/vmexit/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_NO_METATABLE,
|
|
|
|
// Guard against executing in unsafe environment, exits to VM on check failure
|
|
// A: vmexit/vmexit/undef
|
|
// When undef is specified, execution is aborted on check failure
|
|
CHECK_SAFE_ENV,
|
|
|
|
// Guard against index overflowing the table array size
|
|
// A: pointer (Table)
|
|
// B: int (index)
|
|
// C: block/vmexit/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_ARRAY_SIZE,
|
|
|
|
// Guard against cached table node slot not matching the actual table node slot for a key
|
|
// A: pointer (LuaNode)
|
|
// B: Kn
|
|
// C: block/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_SLOT_MATCH,
|
|
|
|
// Guard against table node with a linked next node to ensure that our lookup hits the main position of the key
|
|
// A: pointer (LuaNode)
|
|
// B: block/vmexit/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_NODE_NO_NEXT,
|
|
|
|
// Guard against table node with 'nil' value
|
|
// A: pointer (LuaNode)
|
|
// B: block/vmexit/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_NODE_VALUE,
|
|
|
|
// Guard against access at specified offset/size overflowing the buffer length
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
// C: int (size)
|
|
// D: block/vmexit/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_BUFFER_LEN,
|
|
|
|
// Guard against userdata tag mismatch
|
|
// A: pointer (userdata)
|
|
// B: int (tag)
|
|
// C: block/vmexit/undef
|
|
// When undef is specified instead of a block, execution is aborted on check failure
|
|
CHECK_USERDATA_TAG,
|
|
|
|
// Special operations
|
|
|
|
// Check interrupt handler
|
|
// A: unsigned int (pcpos)
|
|
INTERRUPT,
|
|
|
|
// Check and run GC assist if necessary
|
|
CHECK_GC,
|
|
|
|
// Handle GC write barrier (forward)
|
|
// A: pointer (GCObject)
|
|
// B: Rn (TValue that was written to the object)
|
|
// C: tag/undef (tag of the value that was written)
|
|
BARRIER_OBJ,
|
|
|
|
// Handle GC write barrier (backwards) for a write into a table
|
|
// A: pointer (Table)
|
|
BARRIER_TABLE_BACK,
|
|
|
|
// Handle GC write barrier (forward) for a write into a table
|
|
// A: pointer (Table)
|
|
// B: Rn (TValue that was written to the object)
|
|
// C: tag/undef (tag of the value that was written)
|
|
BARRIER_TABLE_FORWARD,
|
|
|
|
// Update savedpc value
|
|
// A: unsigned int (pcpos)
|
|
SET_SAVEDPC,
|
|
|
|
// Close open upvalues for registers at specified index or higher
|
|
// A: Rn (starting register index)
|
|
CLOSE_UPVALS,
|
|
|
|
// While capture is a no-op right now, it might be useful to track register/upvalue lifetimes
|
|
// A: Rn or UPn
|
|
// B: unsigned int (1 for reference capture, 0 for value capture)
|
|
CAPTURE,
|
|
|
|
// Operations that don't have an IR representation yet
|
|
|
|
// Set a list of values to table in target register
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (target)
|
|
// C: Rn (source start)
|
|
// D: int (count or -1 to assign values up to stack top)
|
|
// E: unsigned int (table index to start from)
|
|
// F: undef/unsigned int (target table known size)
|
|
SETLIST,
|
|
|
|
// Call specified function
|
|
// A: Rn (function, followed by arguments)
|
|
// B: int (argument count or -1 to use all arguments up to stack top)
|
|
// C: int (result count or -1 to preserve all results and adjust stack top)
|
|
// Note: return values are placed starting from Rn specified in 'A'
|
|
CALL,
|
|
|
|
// Return specified values from the function
|
|
// A: Rn (value start)
|
|
// B: int (result count or -1 to return all values up to stack top)
|
|
RETURN,
|
|
|
|
// Adjust loop variables for one iteration of a generic for loop, jump back to the loop header if loop needs to continue
|
|
// A: Rn (loop variable start, updates Rn+2 and 'B' number of registers starting from Rn+3)
|
|
// B: int (loop variable count, if more than 2, registers starting from Rn+5 are set to nil)
|
|
// C: block (repeat)
|
|
// D: block (exit)
|
|
FORGLOOP,
|
|
|
|
// Handle LOP_FORGLOOP fallback when variable being iterated is not a table
|
|
// A: Rn (loop state start, updates Rn+2 and 'B' number of registers starting from Rn+3)
|
|
// B: int (loop variable count and a MSB set when it's an ipairs-like iteration loop)
|
|
// C: block (repeat)
|
|
// D: block (exit)
|
|
FORGLOOP_FALLBACK,
|
|
|
|
// Fallback for generic for loop preparation when iterating over builtin pairs/ipairs
|
|
// It raises an error if 'B' register is not a function
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn
|
|
// C: block (forgloop location)
|
|
FORGPREP_XNEXT_FALLBACK,
|
|
|
|
// Increment coverage data (saturating 24 bit add)
|
|
// A: unsigned int (bytecode instruction index)
|
|
COVERAGE,
|
|
|
|
// Operations that have a translation, but use a full instruction fallback
|
|
|
|
// Load a value from global table at specified key
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (dest)
|
|
// C: Kn (key)
|
|
FALLBACK_GETGLOBAL,
|
|
|
|
// Store a value into global table at specified key
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (value)
|
|
// C: Kn (key)
|
|
FALLBACK_SETGLOBAL,
|
|
|
|
// Load a value from table at specified key
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (dest)
|
|
// C: Rn (table)
|
|
// D: Kn (key)
|
|
FALLBACK_GETTABLEKS,
|
|
|
|
// Store a value into a table at specified key
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (value)
|
|
// C: Rn (table)
|
|
// D: Kn (key)
|
|
FALLBACK_SETTABLEKS,
|
|
|
|
// Load function from source register using name into target register and copying source register into target register + 1
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (target)
|
|
// C: Rn (source)
|
|
// D: Kn (name)
|
|
FALLBACK_NAMECALL,
|
|
|
|
// Operations that don't have assembly lowering at all
|
|
|
|
// Prepare stack for variadic functions so that GETVARARGS works correctly
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: int (numparams)
|
|
FALLBACK_PREPVARARGS,
|
|
|
|
// Copy variables into the target registers from vararg storage for current function
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (dest start)
|
|
// C: int (count)
|
|
FALLBACK_GETVARARGS,
|
|
|
|
// Create closure from a child proto
|
|
// A: unsigned int (nups)
|
|
// B: pointer (table)
|
|
// C: unsigned int (protoid)
|
|
NEWCLOSURE,
|
|
|
|
// Create closure from a pre-created function object (reusing it unless environments diverge)
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (dest)
|
|
// C: Kn (prototype)
|
|
FALLBACK_DUPCLOSURE,
|
|
|
|
// Prepare loop variables for a generic for loop, jump to the loop backedge unconditionally
|
|
// A: unsigned int (bytecode instruction index)
|
|
// B: Rn (loop state start, updates Rn Rn+1 Rn+2)
|
|
// C: block
|
|
FALLBACK_FORGPREP,
|
|
|
|
// Instruction that passes value through, it is produced by constant folding and users substitute it with the value
|
|
SUBSTITUTE,
|
|
// A: operand of any type
|
|
|
|
// Performs bitwise and/xor/or on two unsigned integers
|
|
// A, B: int
|
|
BITAND_UINT,
|
|
BITXOR_UINT,
|
|
BITOR_UINT,
|
|
|
|
// Performs bitwise not on an unsigned integer
|
|
// A: int
|
|
BITNOT_UINT,
|
|
|
|
// Performs bitwise shift/rotate on an unsigned integer
|
|
// A: int (source)
|
|
// B: int (shift amount)
|
|
BITLSHIFT_UINT,
|
|
BITRSHIFT_UINT,
|
|
BITARSHIFT_UINT,
|
|
BITLROTATE_UINT,
|
|
BITRROTATE_UINT,
|
|
|
|
// Returns the number of consecutive zero bits in A starting from the left-most (most significant) bit.
|
|
// A: int
|
|
BITCOUNTLZ_UINT,
|
|
BITCOUNTRZ_UINT,
|
|
|
|
// Swap byte order in A
|
|
// A: int
|
|
BYTESWAP_UINT,
|
|
|
|
// Calls native libm function with 1 or 2 arguments
|
|
// A: builtin function ID
|
|
// B: double
|
|
// C: double/int (optional, 2nd argument)
|
|
INVOKE_LIBM,
|
|
|
|
// Returns the string name of a type based on tag, alternative for type(x)
|
|
// A: tag
|
|
GET_TYPE,
|
|
|
|
// Returns the string name of a type either from a __type metatable field or just based on the tag, alternative for typeof(x)
|
|
// A: Rn
|
|
GET_TYPEOF,
|
|
|
|
// Find or create an upval at the given level
|
|
// A: Rn (level)
|
|
FINDUPVAL,
|
|
|
|
// Read i8 (sign-extended to int) from buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
BUFFER_READI8,
|
|
|
|
// Read u8 (zero-extended to int) from buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
BUFFER_READU8,
|
|
|
|
// Write i8/u8 value (int argument is truncated) to buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
// C: int (value)
|
|
BUFFER_WRITEI8,
|
|
|
|
// Read i16 (sign-extended to int) from buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
BUFFER_READI16,
|
|
|
|
// Read u16 (zero-extended to int) from buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
BUFFER_READU16,
|
|
|
|
// Write i16/u16 value (int argument is truncated) to buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
// C: int (value)
|
|
BUFFER_WRITEI16,
|
|
|
|
// Read i32 value from buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
BUFFER_READI32,
|
|
|
|
// Write i32/u32 value to buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
// C: int (value)
|
|
BUFFER_WRITEI32,
|
|
|
|
// Read float value (converted to double) from buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
BUFFER_READF32,
|
|
|
|
// Write float value (converted from double) to buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
// C: double (value)
|
|
BUFFER_WRITEF32,
|
|
|
|
// Read double value from buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
BUFFER_READF64,
|
|
|
|
// Write double value to buffer storage at specified offset
|
|
// A: pointer (buffer)
|
|
// B: int (offset)
|
|
// C: double (value)
|
|
BUFFER_WRITEF64,
|
|
};
|
|
|
|
enum class IrConstKind : uint8_t
|
|
{
|
|
Int,
|
|
Uint,
|
|
Double,
|
|
Tag,
|
|
};
|
|
|
|
struct IrConst
|
|
{
|
|
IrConstKind kind;
|
|
|
|
union
|
|
{
|
|
int valueInt;
|
|
unsigned valueUint;
|
|
double valueDouble;
|
|
uint8_t valueTag;
|
|
};
|
|
};
|
|
|
|
enum class IrCondition : uint8_t
|
|
{
|
|
Equal,
|
|
NotEqual,
|
|
Less,
|
|
NotLess,
|
|
LessEqual,
|
|
NotLessEqual,
|
|
Greater,
|
|
NotGreater,
|
|
GreaterEqual,
|
|
NotGreaterEqual,
|
|
|
|
UnsignedLess,
|
|
UnsignedLessEqual,
|
|
UnsignedGreater,
|
|
UnsignedGreaterEqual,
|
|
|
|
Count
|
|
};
|
|
|
|
enum class IrOpKind : uint32_t
|
|
{
|
|
None,
|
|
|
|
Undef,
|
|
|
|
// To reference a constant value
|
|
Constant,
|
|
|
|
// To specify a condition code
|
|
Condition,
|
|
|
|
// To reference a result of a previous instruction
|
|
Inst,
|
|
|
|
// To reference a basic block in control flow
|
|
Block,
|
|
|
|
// To reference a VM register
|
|
VmReg,
|
|
|
|
// To reference a VM constant
|
|
VmConst,
|
|
|
|
// To reference a VM upvalue
|
|
VmUpvalue,
|
|
|
|
// To reference an exit to VM at specific PC pos
|
|
VmExit,
|
|
};
|
|
|
|
// VmExit uses a special value to indicate that pcpos update should be skipped
|
|
// This is only used during type checking at function entry
|
|
constexpr uint32_t kVmExitEntryGuardPc = (1u << 28) - 1;
|
|
|
|
struct IrOp
|
|
{
|
|
IrOpKind kind : 4;
|
|
uint32_t index : 28;
|
|
|
|
IrOp()
|
|
: kind(IrOpKind::None)
|
|
, index(0)
|
|
{
|
|
}
|
|
|
|
IrOp(IrOpKind kind, uint32_t index)
|
|
: kind(kind)
|
|
, index(index)
|
|
{
|
|
}
|
|
|
|
bool operator==(const IrOp& rhs) const
|
|
{
|
|
return kind == rhs.kind && index == rhs.index;
|
|
}
|
|
|
|
bool operator!=(const IrOp& rhs) const
|
|
{
|
|
return !(*this == rhs);
|
|
}
|
|
};
|
|
|
|
static_assert(sizeof(IrOp) == 4);
|
|
|
|
enum class IrValueKind : uint8_t
|
|
{
|
|
Unknown, // Used by SUBSTITUTE, argument has to be checked to get type
|
|
None,
|
|
Tag,
|
|
Int,
|
|
Pointer,
|
|
Double,
|
|
Tvalue,
|
|
};
|
|
|
|
struct IrInst
|
|
{
|
|
IrCmd cmd;
|
|
|
|
// Operands
|
|
IrOp a;
|
|
IrOp b;
|
|
IrOp c;
|
|
IrOp d;
|
|
IrOp e;
|
|
IrOp f;
|
|
IrOp g;
|
|
|
|
uint32_t lastUse = 0;
|
|
uint16_t useCount = 0;
|
|
|
|
// Location of the result (optional)
|
|
X64::RegisterX64 regX64 = X64::noreg;
|
|
A64::RegisterA64 regA64 = A64::noreg;
|
|
bool reusedReg = false;
|
|
bool spilled = false;
|
|
bool needsReload = false;
|
|
};
|
|
|
|
// When IrInst operands are used, current instruction index is often required to track lifetime
|
|
constexpr uint32_t kInvalidInstIdx = ~0u;
|
|
|
|
struct IrInstHash
|
|
{
|
|
static const uint32_t m = 0x5bd1e995;
|
|
static const int r = 24;
|
|
|
|
static uint32_t mix(uint32_t h, uint32_t k)
|
|
{
|
|
// MurmurHash2 step
|
|
k *= m;
|
|
k ^= k >> r;
|
|
k *= m;
|
|
|
|
h *= m;
|
|
h ^= k;
|
|
|
|
return h;
|
|
}
|
|
|
|
static uint32_t mix(uint32_t h, IrOp op)
|
|
{
|
|
static_assert(sizeof(op) == sizeof(uint32_t));
|
|
uint32_t k;
|
|
memcpy(&k, &op, sizeof(op));
|
|
|
|
return mix(h, k);
|
|
}
|
|
|
|
size_t operator()(const IrInst& key) const
|
|
{
|
|
// MurmurHash2 unrolled
|
|
uint32_t h = 25;
|
|
|
|
h = mix(h, uint32_t(key.cmd));
|
|
h = mix(h, key.a);
|
|
h = mix(h, key.b);
|
|
h = mix(h, key.c);
|
|
h = mix(h, key.d);
|
|
h = mix(h, key.e);
|
|
h = mix(h, key.f);
|
|
h = mix(h, key.g);
|
|
|
|
// MurmurHash2 tail
|
|
h ^= h >> 13;
|
|
h *= m;
|
|
h ^= h >> 15;
|
|
|
|
return h;
|
|
}
|
|
};
|
|
|
|
struct IrInstEq
|
|
{
|
|
bool operator()(const IrInst& a, const IrInst& b) const
|
|
{
|
|
return a.cmd == b.cmd && a.a == b.a && a.b == b.b && a.c == b.c && a.d == b.d && a.e == b.e && a.f == b.f && a.g == b.g;
|
|
}
|
|
};
|
|
|
|
enum class IrBlockKind : uint8_t
|
|
{
|
|
Bytecode,
|
|
Fallback,
|
|
Internal,
|
|
Linearized,
|
|
Dead,
|
|
};
|
|
|
|
struct IrBlock
|
|
{
|
|
IrBlockKind kind;
|
|
|
|
uint16_t useCount = 0;
|
|
|
|
// 'start' and 'finish' define an inclusive range of instructions which belong to this block inside the function
|
|
// When block has been constructed, 'finish' always points to the first and only terminating instruction
|
|
uint32_t start = ~0u;
|
|
uint32_t finish = ~0u;
|
|
|
|
uint32_t sortkey = ~0u;
|
|
uint32_t chainkey = 0;
|
|
uint32_t expectedNextBlock = ~0u;
|
|
|
|
Label label;
|
|
};
|
|
|
|
struct BytecodeMapping
|
|
{
|
|
uint32_t irLocation;
|
|
uint32_t asmLocation;
|
|
};
|
|
|
|
struct BytecodeBlock
|
|
{
|
|
// 'start' and 'finish' define an inclusive range of instructions which belong to the block
|
|
int startpc = -1;
|
|
int finishpc = -1;
|
|
};
|
|
|
|
struct BytecodeTypes
|
|
{
|
|
uint8_t result = LBC_TYPE_ANY;
|
|
uint8_t a = LBC_TYPE_ANY;
|
|
uint8_t b = LBC_TYPE_ANY;
|
|
uint8_t c = LBC_TYPE_ANY;
|
|
};
|
|
|
|
struct BytecodeRegTypeInfo
|
|
{
|
|
uint8_t type = LBC_TYPE_ANY;
|
|
uint8_t reg = 0; // Register slot where variable is stored
|
|
int startpc = 0; // First point where variable is alive (could be before variable has been assigned a value)
|
|
int endpc = 0; // First point where variable is dead
|
|
};
|
|
|
|
struct BytecodeTypeInfo
|
|
{
|
|
std::vector<uint8_t> argumentTypes;
|
|
std::vector<BytecodeRegTypeInfo> regTypes;
|
|
std::vector<uint8_t> upvalueTypes;
|
|
|
|
// Offsets into regTypes for each individual register
|
|
// One extra element at the end contains the vector size for easier arr[Rn], arr[Rn + 1] range access
|
|
std::vector<uint32_t> regTypeOffsets;
|
|
};
|
|
|
|
struct IrFunction
|
|
{
|
|
std::vector<IrBlock> blocks;
|
|
std::vector<IrInst> instructions;
|
|
std::vector<IrConst> constants;
|
|
|
|
std::vector<BytecodeBlock> bcBlocks;
|
|
std::vector<BytecodeTypes> bcTypes;
|
|
|
|
std::vector<BytecodeMapping> bcMapping;
|
|
uint32_t entryBlock = 0;
|
|
uint32_t entryLocation = 0;
|
|
|
|
// For each instruction, an operand that can be used to recompute the value
|
|
std::vector<IrOp> valueRestoreOps;
|
|
std::vector<uint32_t> validRestoreOpBlocks;
|
|
|
|
BytecodeTypeInfo bcTypeInfo;
|
|
|
|
Proto* proto = nullptr;
|
|
bool variadic = false;
|
|
|
|
CfgInfo cfg;
|
|
|
|
IrBlock& blockOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::Block);
|
|
return blocks[op.index];
|
|
}
|
|
|
|
IrInst& instOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::Inst);
|
|
return instructions[op.index];
|
|
}
|
|
|
|
IrInst* asInstOp(IrOp op)
|
|
{
|
|
if (op.kind == IrOpKind::Inst)
|
|
return &instructions[op.index];
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
IrConst& constOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::Constant);
|
|
return constants[op.index];
|
|
}
|
|
|
|
uint8_t tagOp(IrOp op)
|
|
{
|
|
IrConst& value = constOp(op);
|
|
|
|
CODEGEN_ASSERT(value.kind == IrConstKind::Tag);
|
|
return value.valueTag;
|
|
}
|
|
|
|
std::optional<uint8_t> asTagOp(IrOp op)
|
|
{
|
|
if (op.kind != IrOpKind::Constant)
|
|
return std::nullopt;
|
|
|
|
IrConst& value = constOp(op);
|
|
|
|
if (value.kind != IrConstKind::Tag)
|
|
return std::nullopt;
|
|
|
|
return value.valueTag;
|
|
}
|
|
|
|
int intOp(IrOp op)
|
|
{
|
|
IrConst& value = constOp(op);
|
|
|
|
CODEGEN_ASSERT(value.kind == IrConstKind::Int);
|
|
return value.valueInt;
|
|
}
|
|
|
|
std::optional<int> asIntOp(IrOp op)
|
|
{
|
|
if (op.kind != IrOpKind::Constant)
|
|
return std::nullopt;
|
|
|
|
IrConst& value = constOp(op);
|
|
|
|
if (value.kind != IrConstKind::Int)
|
|
return std::nullopt;
|
|
|
|
return value.valueInt;
|
|
}
|
|
|
|
unsigned uintOp(IrOp op)
|
|
{
|
|
IrConst& value = constOp(op);
|
|
|
|
CODEGEN_ASSERT(value.kind == IrConstKind::Uint);
|
|
return value.valueUint;
|
|
}
|
|
|
|
std::optional<unsigned> asUintOp(IrOp op)
|
|
{
|
|
if (op.kind != IrOpKind::Constant)
|
|
return std::nullopt;
|
|
|
|
IrConst& value = constOp(op);
|
|
|
|
if (value.kind != IrConstKind::Uint)
|
|
return std::nullopt;
|
|
|
|
return value.valueUint;
|
|
}
|
|
|
|
double doubleOp(IrOp op)
|
|
{
|
|
IrConst& value = constOp(op);
|
|
|
|
CODEGEN_ASSERT(value.kind == IrConstKind::Double);
|
|
return value.valueDouble;
|
|
}
|
|
|
|
std::optional<double> asDoubleOp(IrOp op)
|
|
{
|
|
if (op.kind != IrOpKind::Constant)
|
|
return std::nullopt;
|
|
|
|
IrConst& value = constOp(op);
|
|
|
|
if (value.kind != IrConstKind::Double)
|
|
return std::nullopt;
|
|
|
|
return value.valueDouble;
|
|
}
|
|
|
|
uint32_t getBlockIndex(const IrBlock& block) const
|
|
{
|
|
// Can only be called with blocks from our vector
|
|
CODEGEN_ASSERT(&block >= blocks.data() && &block <= blocks.data() + blocks.size());
|
|
return uint32_t(&block - blocks.data());
|
|
}
|
|
|
|
uint32_t getInstIndex(const IrInst& inst) const
|
|
{
|
|
// Can only be called with instructions from our vector
|
|
CODEGEN_ASSERT(&inst >= instructions.data() && &inst <= instructions.data() + instructions.size());
|
|
return uint32_t(&inst - instructions.data());
|
|
}
|
|
|
|
void recordRestoreOp(uint32_t instIdx, IrOp location)
|
|
{
|
|
if (instIdx >= valueRestoreOps.size())
|
|
valueRestoreOps.resize(instIdx + 1);
|
|
|
|
valueRestoreOps[instIdx] = location;
|
|
}
|
|
|
|
IrOp findRestoreOp(uint32_t instIdx, bool limitToCurrentBlock) const
|
|
{
|
|
if (instIdx >= valueRestoreOps.size())
|
|
return {};
|
|
|
|
// When spilled, values can only reference restore operands in the current block chain
|
|
if (limitToCurrentBlock)
|
|
{
|
|
for (uint32_t blockIdx : validRestoreOpBlocks)
|
|
{
|
|
const IrBlock& block = blocks[blockIdx];
|
|
|
|
if (instIdx >= block.start && instIdx <= block.finish)
|
|
return valueRestoreOps[instIdx];
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
return valueRestoreOps[instIdx];
|
|
}
|
|
|
|
IrOp findRestoreOp(const IrInst& inst, bool limitToCurrentBlock) const
|
|
{
|
|
return findRestoreOp(getInstIndex(inst), limitToCurrentBlock);
|
|
}
|
|
|
|
BytecodeTypes getBytecodeTypesAt(int pcpos) const
|
|
{
|
|
CODEGEN_ASSERT(pcpos >= 0);
|
|
|
|
if (size_t(pcpos) < bcTypes.size())
|
|
return bcTypes[pcpos];
|
|
|
|
return BytecodeTypes();
|
|
}
|
|
};
|
|
|
|
inline IrCondition conditionOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::Condition);
|
|
return IrCondition(op.index);
|
|
}
|
|
|
|
inline int vmRegOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::VmReg);
|
|
return op.index;
|
|
}
|
|
|
|
inline int vmConstOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::VmConst);
|
|
return op.index;
|
|
}
|
|
|
|
inline int vmUpvalueOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::VmUpvalue);
|
|
return op.index;
|
|
}
|
|
|
|
inline uint32_t vmExitOp(IrOp op)
|
|
{
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::VmExit);
|
|
return op.index;
|
|
}
|
|
|
|
} // namespace CodeGen
|
|
} // namespace Luau
|