luau/CodeGen/src/IrLoweringA64.cpp

2724 lines
91 KiB
C++
Raw Normal View History

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "IrLoweringA64.h"
#include "Luau/DenseHash.h"
#include "Luau/IrData.h"
#include "Luau/IrUtils.h"
#include "EmitCommonA64.h"
#include "NativeState.h"
#include "lstate.h"
#include "lgc.h"
LUAU_FASTFLAG(LuauVectorLibNativeDot);
namespace Luau
{
namespace CodeGen
{
namespace A64
{
inline ConditionA64 getConditionFP(IrCondition cond)
{
switch (cond)
{
case IrCondition::Equal:
return ConditionA64::Equal;
case IrCondition::NotEqual:
return ConditionA64::NotEqual;
case IrCondition::Less:
return ConditionA64::Minus;
case IrCondition::NotLess:
return ConditionA64::Plus;
case IrCondition::LessEqual:
return ConditionA64::UnsignedLessEqual;
case IrCondition::NotLessEqual:
return ConditionA64::UnsignedGreater;
case IrCondition::Greater:
return ConditionA64::Greater;
case IrCondition::NotGreater:
return ConditionA64::LessEqual;
case IrCondition::GreaterEqual:
return ConditionA64::GreaterEqual;
case IrCondition::NotGreaterEqual:
return ConditionA64::Less;
default:
CODEGEN_ASSERT(!"Unexpected condition code");
return ConditionA64::Always;
}
}
inline ConditionA64 getConditionInt(IrCondition cond)
{
switch (cond)
{
case IrCondition::Equal:
return ConditionA64::Equal;
case IrCondition::NotEqual:
return ConditionA64::NotEqual;
case IrCondition::Less:
return ConditionA64::Minus;
case IrCondition::NotLess:
return ConditionA64::Plus;
case IrCondition::LessEqual:
return ConditionA64::LessEqual;
case IrCondition::NotLessEqual:
return ConditionA64::Greater;
case IrCondition::Greater:
return ConditionA64::Greater;
case IrCondition::NotGreater:
return ConditionA64::LessEqual;
case IrCondition::GreaterEqual:
return ConditionA64::GreaterEqual;
case IrCondition::NotGreaterEqual:
return ConditionA64::Less;
case IrCondition::UnsignedLess:
return ConditionA64::CarryClear;
case IrCondition::UnsignedLessEqual:
return ConditionA64::UnsignedLessEqual;
case IrCondition::UnsignedGreater:
return ConditionA64::UnsignedGreater;
case IrCondition::UnsignedGreaterEqual:
return ConditionA64::CarrySet;
default:
CODEGEN_ASSERT(!"Unexpected condition code");
return ConditionA64::Always;
}
}
static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset)
{
CODEGEN_ASSERT(dst != src);
CODEGEN_ASSERT(offset <= INT_MAX);
if (offset <= AssemblyBuilderA64::kMaxImmediate)
{
build.add(dst, src, uint16_t(offset));
}
else
{
build.mov(dst, int(offset));
build.add(dst, dst, src);
}
}
static void checkObjectBarrierConditions(AssemblyBuilderA64& build, RegisterA64 object, RegisterA64 temp, IrOp ra, int ratag, Label& skip)
{
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
RegisterA64 tempw = castReg(KindA64::w, temp);
AddressA64 addr = temp;
// iscollectable(ra)
if (ratag == -1 || !isGCO(ratag))
{
if (ra.kind == IrOpKind::VmReg)
addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, tt));
else if (ra.kind == IrOpKind::VmConst)
emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, tt));
build.ldr(tempw, addr);
build.cmp(tempw, LUA_TSTRING);
build.b(ConditionA64::Less, skip);
}
// isblack(obj2gco(o))
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.ldrb(tempw, mem(object, offsetof(GCheader, marked)));
build.tbz(tempw, BLACKBIT, skip);
// iswhite(gcvalue(ra))
if (ra.kind == IrOpKind::VmReg)
addr = mem(rBase, vmRegOp(ra) * sizeof(TValue) + offsetof(TValue, value));
else if (ra.kind == IrOpKind::VmConst)
emitAddOffset(build, temp, rConstants, vmConstOp(ra) * sizeof(TValue) + offsetof(TValue, value));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldr(temp, addr);
build.ldrb(tempw, mem(temp, offsetof(GCheader, marked)));
build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT));
build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
}
static void emitAbort(AssemblyBuilderA64& build, Label& abort)
{
Label skip;
build.b(skip);
build.setLabel(abort);
build.udf();
build.setLabel(skip);
}
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
static void emitFallback(AssemblyBuilderA64& build, int offset, int pcpos)
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
{
// fallback(L, instruction, base, k)
build.mov(x0, rState);
emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction));
build.mov(x2, rBase);
build.mov(x3, rConstants);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
build.ldr(x4, mem(rNativeContext, offset));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.blr(x4);
emitUpdateBase(build);
}
static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg)
{
CODEGEN_ASSERT(kTempSlots >= 1);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
build.add(x0, sp, sTemporary.data); // sp-relative offset
build.ldr(x1, mem(rNativeContext, uint32_t(func)));
build.blr(x1);
}
static bool emitBuiltin(AssemblyBuilderA64& build, IrFunction& function, IrRegAllocA64& regs, int bfid, int res, int arg, int nresults)
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
{
switch (bfid)
{
case LBF_MATH_FREXP:
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
{
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg);
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, LUA_TNUMBER);
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
if (nresults == 2)
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
{
build.ldr(w0, sTemporary);
build.scvtf(d1, w0);
build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
}
return true;
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case LBF_MATH_MODF:
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
{
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg);
build.ldr(d1, sTemporary);
build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, LUA_TNUMBER);
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
if (nresults == 2)
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
{
build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
return true;
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
default:
CODEGEN_ASSERT(!"Missing A64 lowering");
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
return false;
}
}
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
static uint64_t getDoubleBits(double value)
{
uint64_t result;
static_assert(sizeof(result) == sizeof(value), "Expecting double to be 64-bit");
memcpy(&result, &value, sizeof(value));
return result;
}
IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, IrFunction& function, LoweringStats* stats)
: build(build)
, helpers(helpers)
, function(function)
, stats(stats)
, regs(function, stats, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}})
, valueTracker(function)
, exitHandlerMap(~0u)
{
valueTracker.setRestoreCallack(
this,
[](void* context, IrInst& inst)
{
IrLoweringA64* self = static_cast<IrLoweringA64*>(context);
self->regs.restoreReg(self->build, inst);
}
);
}
void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
{
valueTracker.beforeInstLowering(inst);
switch (inst.cmd)
{
case IrCmd::LOAD_TAG:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_POINTER:
{
inst.regA64 = regs.allocReg(KindA64::x, index);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.gc));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_DOUBLE:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value.n));
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_INT:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
build.ldr(inst.regA64, addr);
break;
}
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
case IrCmd::LOAD_FLOAT:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register
AddressA64 addr = tempAddr(inst.a, intOp(inst.b));
build.ldr(temp, addr);
build.fcvt(inst.regA64, temp);
break;
}
case IrCmd::LOAD_TVALUE:
{
inst.regA64 = regs.allocReg(KindA64::q, index);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
int addrOffset = inst.b.kind != IrOpKind::None ? intOp(inst.b) : 0;
AddressA64 addr = tempAddr(inst.a, addrOffset);
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::LOAD_ENV:
inst.regA64 = regs.allocReg(KindA64::x, index);
build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env)));
break;
case IrCmd::GET_ARR_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, array)));
if (inst.b.kind == IrOpKind::Inst)
{
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.add(inst.regA64, inst.regA64, regOp(inst.b), kTValueSizeLog2); // implicit uxtw
}
else if (inst.b.kind == IrOpKind::Constant)
{
if (intOp(inst.b) == 0)
{
// no offset required
}
else if (intOp(inst.b) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
{
build.add(inst.regA64, inst.regA64, uint16_t(intOp(inst.b) * sizeof(TValue)));
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.mov(temp, intOp(inst.b) * sizeof(TValue));
build.add(inst.regA64, inst.regA64, temp);
}
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
}
case IrCmd::GET_SLOT_NODE_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp1w = castReg(KindA64::w, temp1);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp2x = castReg(KindA64::x, temp2);
// note: since the stride of the load is the same as the destination register size, we can range check the array index, not the byte offset
if (uintOp(inst.b) <= AddressA64::kMaxOffset)
build.ldr(temp1w, mem(rCode, uintOp(inst.b) * sizeof(Instruction)));
else
{
build.mov(temp1, uintOp(inst.b) * sizeof(Instruction));
build.ldr(temp1w, mem(rCode, temp1));
}
// C field can be shifted as long as it's at the most significant byte of the instruction word
CODEGEN_ASSERT(kOffsetOfInstructionC == 3);
build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, nodemask8)));
build.and_(temp2, temp2, temp1w, -24);
// note: this may clobber inst.a, so it's important that we don't use it after this
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
break;
}
case IrCmd::GET_HASH_NODE_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
RegisterA64 temp1 = regs.allocTemp(KindA64::w);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp2x = castReg(KindA64::x, temp2);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
// hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode)
build.mov(temp1, -1);
build.ldrb(temp2, mem(regOp(inst.a), offsetof(Table, lsizenode)));
build.lsl(temp1, temp1, temp2);
build.mov(temp2, uintOp(inst.b));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.bic(temp2, temp2, temp1);
// note: this may clobber inst.a, so it's important that we don't use it after this
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(Table, node)));
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
break;
}
Sync to upstream/release/588 (#992) Type checker/autocomplete: * `Luau::autocomplete` no longer performs typechecking internally, make sure to run `Frontend::check` before performing autocomplete requests * Autocomplete string suggestions without "" are now only suggested inside the "" * Autocomplete suggestions now include `function (anonymous autofilled)` key with a full suggestion for the function expression (with arguments included) stored in `AutocompleteEntry::insertText` * `AutocompleteEntry::indexedWithSelf` is provided for function call suggestions made with `:` * Cyclic modules now see each other type exports as `any` to prevent memory use-after-free (similar to module return type) Runtime: * Updated inline/loop unroll cost model to better handle assignments (Fixes https://github.com/Roblox/luau/issues/978) * `math.noise` speed was improved by ~30% * `table.concat` speed was improved by ~5-7% * `tonumber` and `tostring` now have fastcall paths that execute ~1.5x and ~2.5x faster respectively (fixes #777) * Fixed crash in `luaL_typename` when index refers to a non-existing value * Fixed potential out of memory scenario when using `string.sub` or `string.char` in a loop * Fixed behavior of some fastcall builtins when called without arguments under -O2 to match original functions * Support for native code execution in VM is now enabled by default (note: native code still has to be generated explicitly) * `Codegen::compile` now accepts `CodeGen_OnlyNativeModules` flag. When set, only modules that have a `--!native` hot-comment at the top will be compiled to native code In our new typechecker: * Generic type packs are no longer considered to be variadic during unification * Timeout and cancellation now works in new solver * Fixed false positive errors around 'table' and 'function' type refinements * Table literals now use covariant unification rules. This is sound since literal has no type specified and has no aliases * Fixed issues with blocked types escaping the constraint solver * Fixed more places where error messages that should've been suppressed were still reported * Fixed errors when iterating over a top table type In our native code generation (jit): * 'DebugLuauAbortingChecks' flag is now supported on A64 * LOP_NEWCLOSURE has been translated to IR
2023-07-28 16:13:53 +01:00
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
{
inst.regA64 = regs.allocReuse(KindA64::x, index, {inst.a});
RegisterA64 cl = inst.a.kind == IrOpKind::Undef ? rClosure : regOp(inst.a);
build.add(inst.regA64, cl, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b)));
break;
}
case IrCmd::STORE_TAG:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, tt));
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
if (tagOp(inst.b) == 0)
{
build.str(wzr, addr);
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, tagOp(inst.b));
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_POINTER:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
if (inst.b.kind == IrOpKind::Constant)
{
CODEGEN_ASSERT(intOp(inst.b) == 0);
build.str(xzr, addr);
}
else
{
build.str(regOp(inst.b), addr);
}
break;
}
case IrCmd::STORE_EXTRA:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, extra));
if (intOp(inst.b) == 0)
{
build.str(wzr, addr);
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, intOp(inst.b));
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_DOUBLE:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
if (inst.b.kind == IrOpKind::Constant && getDoubleBits(doubleOp(inst.b)) == 0)
{
build.str(xzr, addr);
}
else
{
RegisterA64 temp = tempDouble(inst.b);
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_INT:
{
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
if (inst.b.kind == IrOpKind::Constant && intOp(inst.b) == 0)
{
build.str(wzr, addr);
}
else
{
RegisterA64 temp = tempInt(inst.b);
build.str(temp, addr);
}
break;
}
case IrCmd::STORE_VECTOR:
{
RegisterA64 temp1 = tempDouble(inst.b);
RegisterA64 temp2 = tempDouble(inst.c);
RegisterA64 temp3 = tempDouble(inst.d);
RegisterA64 temp4 = regs.allocTemp(KindA64::s);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value));
CODEGEN_ASSERT(addr.kind == AddressKindA64::imm && addr.data % 4 == 0 && unsigned(addr.data + 8) / 4 <= AddressA64::kMaxOffset);
build.fcvt(temp4, temp1);
build.str(temp4, AddressA64(addr.base, addr.data + 0));
build.fcvt(temp4, temp2);
build.str(temp4, AddressA64(addr.base, addr.data + 4));
build.fcvt(temp4, temp3);
build.str(temp4, AddressA64(addr.base, addr.data + 8));
break;
}
case IrCmd::STORE_TVALUE:
{
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
int addrOffset = inst.c.kind != IrOpKind::None ? intOp(inst.c) : 0;
AddressA64 addr = tempAddr(inst.a, addrOffset);
build.str(regOp(inst.b), addr);
break;
}
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
case IrCmd::STORE_SPLIT_TVALUE:
{
int addrOffset = inst.d.kind != IrOpKind::None ? intOp(inst.d) : 0;
RegisterA64 tempt = regs.allocTemp(KindA64::w);
AddressA64 addrt = tempAddr(inst.a, offsetof(TValue, tt) + addrOffset);
build.mov(tempt, tagOp(inst.b));
build.str(tempt, addrt);
AddressA64 addr = tempAddr(inst.a, offsetof(TValue, value) + addrOffset);
if (tagOp(inst.b) == LUA_TBOOLEAN)
{
if (inst.c.kind == IrOpKind::Constant)
{
// note: we reuse tag temp register as value for true booleans, and use built-in zero register for false values
CODEGEN_ASSERT(LUA_TBOOLEAN == 1);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.str(intOp(inst.c) ? tempt : wzr, addr);
}
else
build.str(regOp(inst.c), addr);
}
else if (tagOp(inst.b) == LUA_TNUMBER)
{
RegisterA64 temp = tempDouble(inst.c);
build.str(temp, addr);
}
else if (isGCO(tagOp(inst.b)))
{
build.str(regOp(inst.c), addr);
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
}
break;
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
}
case IrCmd::ADD_INT:
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
build.add(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
else if (inst.a.kind == IrOpKind::Constant && unsigned(intOp(inst.a)) <= AssemblyBuilderA64::kMaxImmediate)
build.add(inst.regA64, regOp(inst.b), uint16_t(intOp(inst.a)));
else
{
RegisterA64 temp1 = tempInt(inst.a);
RegisterA64 temp2 = tempInt(inst.b);
build.add(inst.regA64, temp1, temp2);
}
break;
case IrCmd::SUB_INT:
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.b.kind == IrOpKind::Constant && unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
build.sub(inst.regA64, regOp(inst.a), uint16_t(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempInt(inst.a);
RegisterA64 temp2 = tempInt(inst.b);
build.sub(inst.regA64, temp1, temp2);
}
break;
case IrCmd::ADD_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fadd(inst.regA64, temp1, temp2);
break;
}
case IrCmd::SUB_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fsub(inst.regA64, temp1, temp2);
break;
}
case IrCmd::MUL_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fmul(inst.regA64, temp1, temp2);
break;
}
case IrCmd::DIV_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fdiv(inst.regA64, temp1, temp2);
break;
}
case IrCmd::IDIV_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fdiv(inst.regA64, temp1, temp2);
build.frintm(inst.regA64, inst.regA64);
break;
}
case IrCmd::MOD_NUM:
{
inst.regA64 = regs.allocReg(KindA64::d, index); // can't allocReuse because both A and B are used twice
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fdiv(inst.regA64, temp1, temp2);
build.frintm(inst.regA64, inst.regA64);
build.fmul(inst.regA64, inst.regA64, temp2);
build.fsub(inst.regA64, temp1, inst.regA64);
break;
}
case IrCmd::MIN_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fcmp(temp1, temp2);
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Less));
break;
}
case IrCmd::MAX_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a, inst.b});
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fcmp(temp1, temp2);
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Greater));
break;
}
case IrCmd::UNM_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.fneg(inst.regA64, temp);
break;
}
case IrCmd::FLOOR_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.frintm(inst.regA64, temp);
break;
}
case IrCmd::CEIL_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.frintp(inst.regA64, temp);
break;
}
case IrCmd::ROUND_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.frinta(inst.regA64, temp);
break;
}
case IrCmd::SQRT_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.fsqrt(inst.regA64, temp);
break;
}
case IrCmd::ABS_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
build.fabs(inst.regA64, temp);
break;
}
Sync to upstream/release/632 (#1307) # What's Changed? - Fix #1137 by appropriately retaining additional metadata from definition files throughout the type system. - Improve Frontend for LSPs by appropriately allowing the cancellation of typechecking while running its destructor. ## New Solver - Added support for the `rawget` type function. - Reduced overall static memory usage of builtin type functions. - Fixed a crash where visitors could mutate a union or intersection type and fail to invalidate iteration over them in doing so. - Revised autocomplete functionality to not rely on a separate run of the type solver when using the new solver. - Implemented a more relaxed semantic rule for casting. - Fixed some smaller crashes in the new solver. ## Native Code Generation - Add additional codegen specialization for `math.sign` - Cleaned up a large number of outstanding fflags in the code. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: James McNellis <jmcnellis@roblox.com> Co-authored-by: Jeremy Yoo <jyoo@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-06-29 01:34:49 +01:00
case IrCmd::SIGN_NUM:
{
inst.regA64 = regs.allocReuse(KindA64::d, index, {inst.a});
RegisterA64 temp = tempDouble(inst.a);
RegisterA64 temp0 = regs.allocTemp(KindA64::d);
RegisterA64 temp1 = regs.allocTemp(KindA64::d);
build.fcmpz(temp);
build.fmov(temp0, 0.0);
build.fmov(temp1, 1.0);
build.fcsel(inst.regA64, temp1, temp0, getConditionFP(IrCondition::Greater));
build.fmov(temp1, -1.0);
build.fcsel(inst.regA64, temp1, inst.regA64, getConditionFP(IrCondition::Less));
break;
}
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
case IrCmd::ADD_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fadd(inst.regA64, regOp(inst.a), regOp(inst.b));
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
break;
}
case IrCmd::SUB_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fsub(inst.regA64, regOp(inst.a), regOp(inst.b));
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
break;
}
case IrCmd::MUL_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fmul(inst.regA64, regOp(inst.a), regOp(inst.b));
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
break;
}
case IrCmd::DIV_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a, inst.b});
build.fdiv(inst.regA64, regOp(inst.a), regOp(inst.b));
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
break;
}
case IrCmd::UNM_VEC:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a});
build.fneg(inst.regA64, regOp(inst.a));
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
break;
}
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512) Instead of doing the dot product related math in scalar IR, we lift the computation into a dedicated IR instruction. On x64, we can use VDPPS which was more or less tailor made for this purpose. This is better than manual scalar lowering that requires reloading components from memory; it's not always a strict improvement over the shuffle+add version (which we never had), but this can now be adjusted in the IR lowering in an optimal fashion (maybe even based on CPU vendor, although that'd create issues for offline compilation). On A64, we can either use naive adds or paired adds, as there is no dedicated vector-wide horizontal instruction until SVE. Both run at about the same performance on M2, but paired adds require fewer instructions and temporaries. I've measured this using mesh-normal-vector benchmark, changing the benchmark to just report the time of the second loop inside `calculate_normals`, testing master vs #1504 vs this PR, also increasing the grid size to 400 for more stable timings. On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I see neutral to negative results in #1504. On M2 (base), this PR is ~28% faster vs master, while #1504 is only about ~10% faster. If I measure the second loop in `calculate_tangent_space` instead, I get: On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3% faster On M2 (base), this PR is ~24% faster vs master, while #1504 is only about ~13% faster. Note that the loops in question are not quite optimal, as they store and reload various vectors to dictionary values due to inappropriate use of locals. The underlying gains in individual functions are thus larger than the numbers above; for example, changing the `calculate_normals` loop to use a local variable to store the normalized vector (but still saving the result to dictionary value), I get a ~24% performance increase from this PR on Zen4 vs master instead of just 8% (#1504 is ~15% slower in this setup).
2024-11-09 00:23:09 +00:00
case IrCmd::DOT_VEC:
{
LUAU_ASSERT(FFlag::LuauVectorLibNativeDot);
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512) Instead of doing the dot product related math in scalar IR, we lift the computation into a dedicated IR instruction. On x64, we can use VDPPS which was more or less tailor made for this purpose. This is better than manual scalar lowering that requires reloading components from memory; it's not always a strict improvement over the shuffle+add version (which we never had), but this can now be adjusted in the IR lowering in an optimal fashion (maybe even based on CPU vendor, although that'd create issues for offline compilation). On A64, we can either use naive adds or paired adds, as there is no dedicated vector-wide horizontal instruction until SVE. Both run at about the same performance on M2, but paired adds require fewer instructions and temporaries. I've measured this using mesh-normal-vector benchmark, changing the benchmark to just report the time of the second loop inside `calculate_normals`, testing master vs #1504 vs this PR, also increasing the grid size to 400 for more stable timings. On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I see neutral to negative results in #1504. On M2 (base), this PR is ~28% faster vs master, while #1504 is only about ~10% faster. If I measure the second loop in `calculate_tangent_space` instead, I get: On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3% faster On M2 (base), this PR is ~24% faster vs master, while #1504 is only about ~13% faster. Note that the loops in question are not quite optimal, as they store and reload various vectors to dictionary values due to inappropriate use of locals. The underlying gains in individual functions are thus larger than the numbers above; for example, changing the `calculate_normals` loop to use a local variable to store the normalized vector (but still saving the result to dictionary value), I get a ~24% performance increase from this PR on Zen4 vs master instead of just 8% (#1504 is ~15% slower in this setup).
2024-11-09 00:23:09 +00:00
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = regs.allocTemp(KindA64::q);
RegisterA64 temps = castReg(KindA64::s, temp);
RegisterA64 regs = castReg(KindA64::s, inst.regA64);
build.fmul(temp, regOp(inst.a), regOp(inst.b));
build.faddp(regs, temps); // x+y
build.dup_4s(temp, temp, 2);
build.fadd(regs, regs, temps); // +z
build.fcvt(inst.regA64, regs);
break;
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case IrCmd::NOT_ANY:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Constant)
{
// other cases should've been constant folded
CODEGEN_ASSERT(tagOp(inst.a) == LUA_TBOOLEAN);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.eor(inst.regA64, regOp(inst.b), 1);
}
else
{
Label notbool, exit;
// use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once
CODEGEN_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, notbool);
if (inst.b.kind == IrOpKind::Constant)
build.mov(inst.regA64, intOp(inst.b) == 0 ? 1 : 0);
else
build.eor(inst.regA64, regOp(inst.b), 1); // boolean => invert value
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.b(exit);
// not boolean => result is true iff tag was nil
build.setLabel(notbool);
build.cset(inst.regA64, ConditionA64::Less);
build.setLabel(exit);
}
break;
}
case IrCmd::CMP_ANY:
{
IrCondition cond = conditionOp(inst.c);
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (cond == IrCondition::LessEqual)
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal)));
else if (cond == IrCondition::Less)
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan)));
else if (cond == IrCondition::Equal)
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval)));
else
CODEGEN_ASSERT(!"Unsupported condition");
build.blr(x3);
emitUpdateBase(build);
inst.regA64 = regs.takeReg(w0, index);
break;
}
case IrCmd::JUMP:
if (inst.a.kind == IrOpKind::Undef || inst.a.kind == IrOpKind::VmExit)
{
Label fresh;
build.b(getTargetLabel(inst.a, fresh));
finalizeTargetLabel(inst.a, fresh);
}
else
{
jumpOrFallthrough(blockOp(inst.a), next);
}
break;
case IrCmd::JUMP_IF_TRUTHY:
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
// nil => falsy
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(temp, labelOp(inst.c));
// not boolean => truthy
build.cmp(temp, LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, labelOp(inst.b));
// compare boolean value
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
build.cbnz(temp, labelOp(inst.b));
jumpOrFallthrough(blockOp(inst.c), next);
break;
}
case IrCmd::JUMP_IF_FALSY:
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, tt)));
// nil => falsy
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(temp, labelOp(inst.b));
// not boolean => truthy
build.cmp(temp, LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, labelOp(inst.c));
// compare boolean value
build.ldr(temp, mem(rBase, vmRegOp(inst.a) * sizeof(TValue) + offsetof(TValue, value)));
build.cbz(temp, labelOp(inst.b));
jumpOrFallthrough(blockOp(inst.c), next);
break;
}
case IrCmd::JUMP_EQ_TAG:
{
RegisterA64 zr = noreg;
if (inst.a.kind == IrOpKind::Constant && tagOp(inst.a) == 0)
zr = regOp(inst.b);
else if (inst.b.kind == IrOpKind::Constant && tagOp(inst.b) == 0)
zr = regOp(inst.a);
else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.cmp(regOp(inst.a), tagOp(inst.b));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
else if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Inst)
build.cmp(regOp(inst.a), regOp(inst.b));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
else if (inst.a.kind == IrOpKind::Constant && inst.b.kind == IrOpKind::Inst)
build.cmp(regOp(inst.b), tagOp(inst.a));
else
CODEGEN_ASSERT(!"Unsupported instruction form");
if (isFallthroughBlock(blockOp(inst.d), next))
{
if (zr != noreg)
build.cbz(zr, labelOp(inst.c));
else
build.b(ConditionA64::Equal, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
}
else
{
if (zr != noreg)
build.cbnz(zr, labelOp(inst.d));
else
build.b(ConditionA64::NotEqual, labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.c), next);
}
break;
}
case IrCmd::JUMP_CMP_INT:
{
IrCondition cond = conditionOp(inst.c);
if (cond == IrCondition::Equal && intOp(inst.b) == 0)
{
build.cbz(regOp(inst.a), labelOp(inst.d));
}
else if (cond == IrCondition::NotEqual && intOp(inst.b) == 0)
{
build.cbnz(regOp(inst.a), labelOp(inst.d));
}
else
{
CODEGEN_ASSERT(unsigned(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate);
build.cmp(regOp(inst.a), uint16_t(intOp(inst.b)));
build.b(getConditionInt(cond), labelOp(inst.d));
}
jumpOrFallthrough(blockOp(inst.e), next);
break;
}
case IrCmd::JUMP_EQ_POINTER:
build.cmp(regOp(inst.a), regOp(inst.b));
build.b(ConditionA64::Equal, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::JUMP_CMP_NUM:
{
IrCondition cond = conditionOp(inst.c);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
if (inst.b.kind == IrOpKind::Constant && doubleOp(inst.b) == 0.0)
{
RegisterA64 temp = tempDouble(inst.a);
build.fcmpz(temp);
}
else
{
RegisterA64 temp1 = tempDouble(inst.a);
RegisterA64 temp2 = tempDouble(inst.b);
build.fcmp(temp1, temp2);
}
build.b(getConditionFP(cond), labelOp(inst.d));
jumpOrFallthrough(blockOp(inst.e), next);
break;
}
Sync to upstream/release/600 (#1076) ### What's Changed - Improve readability of unions and intersections by limiting the number of elements of those types that can be presented on a single line (gated under `FFlag::LuauToStringSimpleCompositeTypesSingleLine`) - Adds a new option to the compiler `--record-stats` to record and output compilation statistics - `if...then...else` expressions are now optimized into `AND/OR` form when possible. ### VM - Add a new `buffer` type to Luau based on the [buffer RFC](https://github.com/Roblox/luau/pull/739) and additional C API functions to work with it; this release does not include the library. - Internal C API to work with string buffers has been updated to align with Lua version more closely ### Native Codegen - Added support for new X64 instruction (rev) and new A64 instruction (bswap) in the assembler - Simplified the way numerical loop condition is translated to IR ### New Type Solver - Operator inference now handled by type families - Created a new system called `Type Paths` to explain why subtyping tests fail in order to improve the quality of error messages. - Systematic changes to implement Data Flow analysis in the new solver (`Breadcrumb` removed and replaced with `RefinementKey`) --- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com>
2023-10-21 02:10:30 +01:00
case IrCmd::JUMP_FORN_LOOP_COND:
{
RegisterA64 index = tempDouble(inst.a);
RegisterA64 limit = tempDouble(inst.b);
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
RegisterA64 step = tempDouble(inst.c);
Sync to upstream/release/600 (#1076) ### What's Changed - Improve readability of unions and intersections by limiting the number of elements of those types that can be presented on a single line (gated under `FFlag::LuauToStringSimpleCompositeTypesSingleLine`) - Adds a new option to the compiler `--record-stats` to record and output compilation statistics - `if...then...else` expressions are now optimized into `AND/OR` form when possible. ### VM - Add a new `buffer` type to Luau based on the [buffer RFC](https://github.com/Roblox/luau/pull/739) and additional C API functions to work with it; this release does not include the library. - Internal C API to work with string buffers has been updated to align with Lua version more closely ### Native Codegen - Added support for new X64 instruction (rev) and new A64 instruction (bswap) in the assembler - Simplified the way numerical loop condition is translated to IR ### New Type Solver - Operator inference now handled by type families - Created a new system called `Type Paths` to explain why subtyping tests fail in order to improve the quality of error messages. - Systematic changes to implement Data Flow analysis in the new solver (`Breadcrumb` removed and replaced with `RefinementKey`) --- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com>
2023-10-21 02:10:30 +01:00
Label direct;
// step > 0
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.fcmpz(step);
Sync to upstream/release/600 (#1076) ### What's Changed - Improve readability of unions and intersections by limiting the number of elements of those types that can be presented on a single line (gated under `FFlag::LuauToStringSimpleCompositeTypesSingleLine`) - Adds a new option to the compiler `--record-stats` to record and output compilation statistics - `if...then...else` expressions are now optimized into `AND/OR` form when possible. ### VM - Add a new `buffer` type to Luau based on the [buffer RFC](https://github.com/Roblox/luau/pull/739) and additional C API functions to work with it; this release does not include the library. - Internal C API to work with string buffers has been updated to align with Lua version more closely ### Native Codegen - Added support for new X64 instruction (rev) and new A64 instruction (bswap) in the assembler - Simplified the way numerical loop condition is translated to IR ### New Type Solver - Operator inference now handled by type families - Created a new system called `Type Paths` to explain why subtyping tests fail in order to improve the quality of error messages. - Systematic changes to implement Data Flow analysis in the new solver (`Breadcrumb` removed and replaced with `RefinementKey`) --- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com>
2023-10-21 02:10:30 +01:00
build.b(getConditionFP(IrCondition::Greater), direct);
// !(limit <= index)
build.fcmp(limit, index);
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e));
build.b(labelOp(inst.d));
// !(index <= limit)
build.setLabel(direct);
build.fcmp(index, limit);
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(inst.e));
jumpOrFallthrough(blockOp(inst.d), next);
break;
}
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
// IrCmd::JUMP_SLOT_MATCH implemented below
case IrCmd::TABLE_LEN:
{
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
regs.spill(build, index, {reg});
build.mov(x0, reg);
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, luaH_getn)));
build.blr(x1);
inst.regA64 = regs.takeReg(w0, index);
break;
}
case IrCmd::STRING_LEN:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.ldr(inst.regA64, mem(regOp(inst.a), offsetof(TString, len)));
break;
}
case IrCmd::TABLE_SETNUM:
{
// note: we need to call regOp before spill so that we don't do redundant reloads
RegisterA64 table = regOp(inst.a);
RegisterA64 key = regOp(inst.b);
RegisterA64 temp = regs.allocTemp(KindA64::w);
regs.spill(build, index, {table, key});
if (w1 != key)
{
build.mov(x1, table);
build.mov(w2, key);
}
else
{
build.mov(temp, w1);
build.mov(x1, table);
build.mov(w2, temp);
}
build.mov(x0, rState);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_setnum)));
build.blr(x3);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::NEW_TABLE:
{
regs.spill(build, index);
build.mov(x0, rState);
build.mov(x1, uintOp(inst.a));
build.mov(x2, uintOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_new)));
build.blr(x3);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::DUP_TABLE:
{
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaH_clone)));
build.blr(x2);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::TRY_NUM_TO_INDEX:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
RegisterA64 temp1 = tempDouble(inst.a);
if (build.features & Feature_JSCVT)
{
build.fjcvtzs(inst.regA64, temp1); // fjcvtzs sets PSTATE.Z (equal) iff conversion is exact
build.b(ConditionA64::NotEqual, labelOp(inst.b));
}
else
{
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
build.fcvtzs(inst.regA64, temp1);
build.scvtf(temp2, inst.regA64);
build.fcmp(temp1, temp2);
build.b(ConditionA64::NotEqual, labelOp(inst.b));
}
break;
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case IrCmd::TRY_CALL_FASTGETTM:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.ldr(temp1, mem(regOp(inst.a), offsetof(Table, metatable)));
build.cbz(temp1, labelOp(inst.c)); // no metatable
build.ldrb(temp2, mem(temp1, offsetof(Table, tmcache)));
build.tst(temp2, 1 << intOp(inst.b)); // can't use tbz/tbnz because their jump offsets are too short
build.b(ConditionA64::NotEqual, labelOp(inst.c)); // Equal = Zero after tst; tmcache caches *absence* of metamethods
regs.spill(build, index, {temp1});
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.mov(x0, temp1);
build.mov(w1, intOp(inst.b));
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.ldr(x2, mem(rGlobalState, offsetof(global_State, tmname) + intOp(inst.b) * sizeof(TString*)));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm)));
build.blr(x3);
build.cbz(x0, labelOp(inst.c)); // no tag method
inst.regA64 = regs.takeReg(x0, index);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
break;
}
case IrCmd::NEW_USERDATA:
{
regs.spill(build, index);
build.mov(x0, rState);
build.mov(x1, intOp(inst.a));
build.mov(x2, intOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, newUserdata)));
build.blr(x3);
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::INT_TO_NUM:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = tempInt(inst.a);
build.scvtf(inst.regA64, temp);
break;
}
case IrCmd::UINT_TO_NUM:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = tempInt(inst.a);
build.ucvtf(inst.regA64, temp);
break;
}
case IrCmd::NUM_TO_INT:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
RegisterA64 temp = tempDouble(inst.a);
build.fcvtzs(inst.regA64, temp);
break;
}
case IrCmd::NUM_TO_UINT:
{
inst.regA64 = regs.allocReg(KindA64::w, index);
RegisterA64 temp = tempDouble(inst.a);
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
// note: we don't use fcvtzu for consistency with C++ code
build.fcvtzs(castReg(KindA64::x, inst.regA64), temp);
break;
}
CodeGen: Extract all vector tag patching into TAG_VECTOR (#1171) Instead of patching the tag component with TVECTOR in every instruction that produces a vector value, we now use a separate IR instruction to do this. This reduces implementation redundancy, but more importantly allows for a class of optimizations: - NUM_TO_VECTOR previously patched the component unconditionally but the result was used only in MUL/DIV_VEC instructions that ignore it anyway; we can now remove this. - ADD_VEC et al can now forward the source of TAG_VECTOR instruction of either input; this shortens the latency chain and in the future could allow us to generate optimal vector instruction sequence once the temporary stores are marked as dead. - In the future on X64, ADD_VEC et al will be able to analyze the input instruction and remove tag masking conditionally. This is not part of this PR as it requires a decision around expected FP environment and/or the necessity of the existing masking to begin with. I've also renamed NUM_TO_VECTOR to NUM_TO_VEC so that "VEC" always refers to "3 float values" and for consistency with ADD/etc. Note: ADD_VEC input forwarding is currently performed unconditionally; it may or may not increase the spills that can't be reloaded from the stack. On A64 this makes the Taylor series computation a tiny bit faster (11.3ns => 11.0ns) as it removes the redundant ins instructions along the NUM_TO_VEC path. Curiously, the optimization of forwarding TAG_VECTOR input to arithmetic instructions actually has a small penalty as without it this PR runs at 10.9 ns. I don't know if this is a property of the benchmark though, as I just noticed that in this benchmark type inference actually fails to infer parts of the computation as a vector op. If desired I will happily omit this part of the change and we can explore that separately.
2024-02-21 15:06:11 +00:00
case IrCmd::NUM_TO_VEC:
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
{
inst.regA64 = regs.allocReg(KindA64::q, index);
if (inst.a.kind == IrOpKind::Constant)
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194) When the input is a constant, we use a fairly inefficient sequence of fmov+fcvt+dup or, when the double isn't encodable in fmov, adr+ldr+fcvt+dup. Instead, we can use the same lowering as X64 when the input is a constant, and load the vector from memory. However, if the constant is encodable via fmov, we can use a vector fmov instead (which is just one instruction and doesn't need constant space). Fortunately the bit encoding of fmov for 32-bit floating point numbers matches that of 64-bit: the decoding algorithm is a little different because it expands into a larger exponent, but the values are compatible, so if a double can be encoded into a scalar fmov with a given abcdefgh pattern, the same pattern should encode the same float; due to the very limited number of mantissa and exponent bits, all values that are encodable are also exact in both 32-bit and 64-bit floats. This strategy is ~same as what gcc uses. For complex vectors, we previously used 4 instructions and 8 bytes of constant storage, and now we use 2 instructions and 16 bytes of constant storage, so the memory footprint is the same; for simple vectors we just need 1 instruction (4 bytes). clang lowers vector constants a little differently, opting to synthesize a 64-bit integer using 4 instructions (mov/movk) and then move it to the vector register - this requires 5 instructions and 20 bytes, vs ours/gcc 2 instructions and 8+16=24 bytes. I tried a simpler version of this that would be more compact - synthesize a 32-bit integer constant with mov+movk, and move it to vector register via dup.4s - but this was a little slower on M2, so for now we prefer the slightly larger version as it's not a regression vs current implementation. On the vector approximation benchmark we get: - Before this PR (flag=false): ~7.85 ns/op - After this PR (flag=true): ~7.74 ns/op - After this PR, with 0.125 instead of 0.123 in the benchmark code (to use fmov): ~7.52 ns/op - Not part of this PR, but the mov/dup strategy described above: ~8.00 ns/op
2024-03-13 19:56:11 +00:00
{
float value = float(doubleOp(inst.a));
uint32_t asU32;
static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit");
memcpy(&asU32, &value, sizeof(value));
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194) When the input is a constant, we use a fairly inefficient sequence of fmov+fcvt+dup or, when the double isn't encodable in fmov, adr+ldr+fcvt+dup. Instead, we can use the same lowering as X64 when the input is a constant, and load the vector from memory. However, if the constant is encodable via fmov, we can use a vector fmov instead (which is just one instruction and doesn't need constant space). Fortunately the bit encoding of fmov for 32-bit floating point numbers matches that of 64-bit: the decoding algorithm is a little different because it expands into a larger exponent, but the values are compatible, so if a double can be encoded into a scalar fmov with a given abcdefgh pattern, the same pattern should encode the same float; due to the very limited number of mantissa and exponent bits, all values that are encodable are also exact in both 32-bit and 64-bit floats. This strategy is ~same as what gcc uses. For complex vectors, we previously used 4 instructions and 8 bytes of constant storage, and now we use 2 instructions and 16 bytes of constant storage, so the memory footprint is the same; for simple vectors we just need 1 instruction (4 bytes). clang lowers vector constants a little differently, opting to synthesize a 64-bit integer using 4 instructions (mov/movk) and then move it to the vector register - this requires 5 instructions and 20 bytes, vs ours/gcc 2 instructions and 8+16=24 bytes. I tried a simpler version of this that would be more compact - synthesize a 32-bit integer constant with mov+movk, and move it to vector register via dup.4s - but this was a little slower on M2, so for now we prefer the slightly larger version as it's not a regression vs current implementation. On the vector approximation benchmark we get: - Before this PR (flag=false): ~7.85 ns/op - After this PR (flag=true): ~7.74 ns/op - After this PR, with 0.125 instead of 0.123 in the benchmark code (to use fmov): ~7.52 ns/op - Not part of this PR, but the mov/dup strategy described above: ~8.00 ns/op
2024-03-13 19:56:11 +00:00
if (AssemblyBuilderA64::isFmovSupported(value))
{
build.fmov(inst.regA64, value);
}
else
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
Sync to upstream/release/617 (#1204) # What's Changed * Fix a case where the stack wasn't completely cleaned up where `debug.info` errored when passed `"f"` option and a thread. * Fix a case of uninitialized field in `luaF_newproto`. ### New Type Solver * When a local is captured in a function, don't add a new entry to the `DfgScope::bindings` if the capture occurs within a loop. * Fix a poor performance characteristic during unification by not trying to simplify an intersection. * Fix a case of multiple constraints mutating the same blocked type causing incorrect inferences. * Fix a case of assertion failure when overload resolution encounters a return typepack mismatch. * When refining a property of the top `table` type, we no longer signal an unknown property error. * Fix a misuse of free types when trying to infer the type of a subscript expression. * Fix a case of assertion failure when trying to resolve an overload from `never`. ### Native Code Generation * Fix dead store optimization issues caused by partial stores. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-03-15 23:37:39 +00:00
uint32_t vec[4] = {asU32, asU32, asU32, 0};
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194) When the input is a constant, we use a fairly inefficient sequence of fmov+fcvt+dup or, when the double isn't encodable in fmov, adr+ldr+fcvt+dup. Instead, we can use the same lowering as X64 when the input is a constant, and load the vector from memory. However, if the constant is encodable via fmov, we can use a vector fmov instead (which is just one instruction and doesn't need constant space). Fortunately the bit encoding of fmov for 32-bit floating point numbers matches that of 64-bit: the decoding algorithm is a little different because it expands into a larger exponent, but the values are compatible, so if a double can be encoded into a scalar fmov with a given abcdefgh pattern, the same pattern should encode the same float; due to the very limited number of mantissa and exponent bits, all values that are encodable are also exact in both 32-bit and 64-bit floats. This strategy is ~same as what gcc uses. For complex vectors, we previously used 4 instructions and 8 bytes of constant storage, and now we use 2 instructions and 16 bytes of constant storage, so the memory footprint is the same; for simple vectors we just need 1 instruction (4 bytes). clang lowers vector constants a little differently, opting to synthesize a 64-bit integer using 4 instructions (mov/movk) and then move it to the vector register - this requires 5 instructions and 20 bytes, vs ours/gcc 2 instructions and 8+16=24 bytes. I tried a simpler version of this that would be more compact - synthesize a 32-bit integer constant with mov+movk, and move it to vector register via dup.4s - but this was a little slower on M2, so for now we prefer the slightly larger version as it's not a regression vs current implementation. On the vector approximation benchmark we get: - Before this PR (flag=false): ~7.85 ns/op - After this PR (flag=true): ~7.74 ns/op - After this PR, with 0.125 instead of 0.123 in the benchmark code (to use fmov): ~7.52 ns/op - Not part of this PR, but the mov/dup strategy described above: ~8.00 ns/op
2024-03-13 19:56:11 +00:00
build.adr(temp, vec, sizeof(vec));
build.ldr(inst.regA64, temp);
}
}
else
CodeGen: Extract all vector tag patching into TAG_VECTOR (#1171) Instead of patching the tag component with TVECTOR in every instruction that produces a vector value, we now use a separate IR instruction to do this. This reduces implementation redundancy, but more importantly allows for a class of optimizations: - NUM_TO_VECTOR previously patched the component unconditionally but the result was used only in MUL/DIV_VEC instructions that ignore it anyway; we can now remove this. - ADD_VEC et al can now forward the source of TAG_VECTOR instruction of either input; this shortens the latency chain and in the future could allow us to generate optimal vector instruction sequence once the temporary stores are marked as dead. - In the future on X64, ADD_VEC et al will be able to analyze the input instruction and remove tag masking conditionally. This is not part of this PR as it requires a decision around expected FP environment and/or the necessity of the existing masking to begin with. I've also renamed NUM_TO_VECTOR to NUM_TO_VEC so that "VEC" always refers to "3 float values" and for consistency with ADD/etc. Note: ADD_VEC input forwarding is currently performed unconditionally; it may or may not increase the spills that can't be reloaded from the stack. On A64 this makes the Taylor series computation a tiny bit faster (11.3ns => 11.0ns) as it removes the redundant ins instructions along the NUM_TO_VEC path. Curiously, the optimization of forwarding TAG_VECTOR input to arithmetic instructions actually has a small penalty as without it this PR runs at 10.9 ns. I don't know if this is a property of the benchmark though, as I just noticed that in this benchmark type inference actually fails to infer parts of the computation as a vector op. If desired I will happily omit this part of the change and we can explore that separately.
2024-02-21 15:06:11 +00:00
{
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194) When the input is a constant, we use a fairly inefficient sequence of fmov+fcvt+dup or, when the double isn't encodable in fmov, adr+ldr+fcvt+dup. Instead, we can use the same lowering as X64 when the input is a constant, and load the vector from memory. However, if the constant is encodable via fmov, we can use a vector fmov instead (which is just one instruction and doesn't need constant space). Fortunately the bit encoding of fmov for 32-bit floating point numbers matches that of 64-bit: the decoding algorithm is a little different because it expands into a larger exponent, but the values are compatible, so if a double can be encoded into a scalar fmov with a given abcdefgh pattern, the same pattern should encode the same float; due to the very limited number of mantissa and exponent bits, all values that are encodable are also exact in both 32-bit and 64-bit floats. This strategy is ~same as what gcc uses. For complex vectors, we previously used 4 instructions and 8 bytes of constant storage, and now we use 2 instructions and 16 bytes of constant storage, so the memory footprint is the same; for simple vectors we just need 1 instruction (4 bytes). clang lowers vector constants a little differently, opting to synthesize a 64-bit integer using 4 instructions (mov/movk) and then move it to the vector register - this requires 5 instructions and 20 bytes, vs ours/gcc 2 instructions and 8+16=24 bytes. I tried a simpler version of this that would be more compact - synthesize a 32-bit integer constant with mov+movk, and move it to vector register via dup.4s - but this was a little slower on M2, so for now we prefer the slightly larger version as it's not a regression vs current implementation. On the vector approximation benchmark we get: - Before this PR (flag=false): ~7.85 ns/op - After this PR (flag=true): ~7.74 ns/op - After this PR, with 0.125 instead of 0.123 in the benchmark code (to use fmov): ~7.52 ns/op - Not part of this PR, but the mov/dup strategy described above: ~8.00 ns/op
2024-03-13 19:56:11 +00:00
RegisterA64 tempd = tempDouble(inst.a);
Sync to upstream/release/645 (#1440) In this update, we continue to improve the overall stability of the new type solver. We're also shipping some early bits of two new features, one of the language and one of the analysis API: user-defined type functions and an incremental typechecking API. If you use the new solver and want to use all new fixes included in this release, you have to reference an additional Luau flag: ```c++ LUAU_DYNAMIC_FASTINT(LuauTypeSolverRelease) ``` And set its value to `645`: ```c++ DFInt::LuauTypeSolverRelease.value = 645; // Or a higher value for future updates ``` ## New Solver * Fix a crash where scopes are incorrectly accessed cross-module after they've been deallocated by appropriately zeroing out associated scope pointers for free types, generic types, table types, etc. * Fix a crash where we were incorrectly caching results for bound types in generalization. * Eliminated some unnecessary intermediate allocations in the constraint solver and type function infrastructure. * Built some initial groundwork for an incremental typecheck API for use by language servers. * Built an initial technical preview for [user-defined type functions](https://rfcs.luau-lang.org/user-defined-type-functions.html), more work still to come (including calling type functions from other type functions), but adventurous folks wanting to experiment with it can try it out by enabling `FFlag::LuauUserDefinedTypeFunctionsSyntax` and `FFlag::LuauUserDefinedTypeFunction` in their local environment. Special thanks to @joonyoo181 who built up all the initial infrastructure for this during his internship! ## Miscellaneous changes * Fix a compilation error on Ubuntu (fixes #1437) --- Internal Contributors: Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Hunter Goldstein <hgoldstein@roblox.com> Co-authored-by: Jeremy Yoo <jyoo@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> Co-authored-by: Junseo Yoo <jyoo@roblox.com>
2024-09-27 19:58:21 +01:00
RegisterA64 temps = regs.allocTemp(KindA64::s);
CodeGen: Improve lowering of NUM_TO_VEC on A64 for constants (#1194) When the input is a constant, we use a fairly inefficient sequence of fmov+fcvt+dup or, when the double isn't encodable in fmov, adr+ldr+fcvt+dup. Instead, we can use the same lowering as X64 when the input is a constant, and load the vector from memory. However, if the constant is encodable via fmov, we can use a vector fmov instead (which is just one instruction and doesn't need constant space). Fortunately the bit encoding of fmov for 32-bit floating point numbers matches that of 64-bit: the decoding algorithm is a little different because it expands into a larger exponent, but the values are compatible, so if a double can be encoded into a scalar fmov with a given abcdefgh pattern, the same pattern should encode the same float; due to the very limited number of mantissa and exponent bits, all values that are encodable are also exact in both 32-bit and 64-bit floats. This strategy is ~same as what gcc uses. For complex vectors, we previously used 4 instructions and 8 bytes of constant storage, and now we use 2 instructions and 16 bytes of constant storage, so the memory footprint is the same; for simple vectors we just need 1 instruction (4 bytes). clang lowers vector constants a little differently, opting to synthesize a 64-bit integer using 4 instructions (mov/movk) and then move it to the vector register - this requires 5 instructions and 20 bytes, vs ours/gcc 2 instructions and 8+16=24 bytes. I tried a simpler version of this that would be more compact - synthesize a 32-bit integer constant with mov+movk, and move it to vector register via dup.4s - but this was a little slower on M2, so for now we prefer the slightly larger version as it's not a regression vs current implementation. On the vector approximation benchmark we get: - Before this PR (flag=false): ~7.85 ns/op - After this PR (flag=true): ~7.74 ns/op - After this PR, with 0.125 instead of 0.123 in the benchmark code (to use fmov): ~7.52 ns/op - Not part of this PR, but the mov/dup strategy described above: ~8.00 ns/op
2024-03-13 19:56:11 +00:00
build.fcvt(temps, tempd);
build.dup_4s(inst.regA64, castReg(KindA64::q, temps), 0);
CodeGen: Extract all vector tag patching into TAG_VECTOR (#1171) Instead of patching the tag component with TVECTOR in every instruction that produces a vector value, we now use a separate IR instruction to do this. This reduces implementation redundancy, but more importantly allows for a class of optimizations: - NUM_TO_VECTOR previously patched the component unconditionally but the result was used only in MUL/DIV_VEC instructions that ignore it anyway; we can now remove this. - ADD_VEC et al can now forward the source of TAG_VECTOR instruction of either input; this shortens the latency chain and in the future could allow us to generate optimal vector instruction sequence once the temporary stores are marked as dead. - In the future on X64, ADD_VEC et al will be able to analyze the input instruction and remove tag masking conditionally. This is not part of this PR as it requires a decision around expected FP environment and/or the necessity of the existing masking to begin with. I've also renamed NUM_TO_VECTOR to NUM_TO_VEC so that "VEC" always refers to "3 float values" and for consistency with ADD/etc. Note: ADD_VEC input forwarding is currently performed unconditionally; it may or may not increase the spills that can't be reloaded from the stack. On A64 this makes the Taylor series computation a tiny bit faster (11.3ns => 11.0ns) as it removes the redundant ins instructions along the NUM_TO_VEC path. Curiously, the optimization of forwarding TAG_VECTOR input to arithmetic instructions actually has a small penalty as without it this PR runs at 10.9 ns. I don't know if this is a property of the benchmark though, as I just noticed that in this benchmark type inference actually fails to infer parts of the computation as a vector op. If desired I will happily omit this part of the change and we can explore that separately.
2024-02-21 15:06:11 +00:00
}
break;
}
case IrCmd::TAG_VECTOR:
{
inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a});
RegisterA64 reg = regOp(inst.a);
RegisterA64 tempw = regs.allocTemp(KindA64::w);
if (inst.regA64 != reg)
build.mov(inst.regA64, reg);
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-27 03:20:56 +00:00
build.mov(tempw, LUA_TVECTOR);
build.ins_4s(inst.regA64, tempw, 3);
break;
}
case IrCmd::ADJUST_STACK_TO_REG:
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
if (inst.b.kind == IrOpKind::Constant)
{
build.add(temp, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
build.str(temp, mem(rState, offsetof(lua_State, top)));
}
else if (inst.b.kind == IrOpKind::Inst)
{
build.add(temp, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.add(temp, temp, regOp(inst.b), kTValueSizeLog2); // implicit uxtw
build.str(temp, mem(rState, offsetof(lua_State, top)));
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
break;
}
case IrCmd::ADJUST_STACK_TO_TOP:
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.ldr(temp, mem(rState, offsetof(lua_State, ci)));
build.ldr(temp, mem(temp, offsetof(CallInfo, top)));
build.str(temp, mem(rState, offsetof(lua_State, top)));
break;
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case IrCmd::FASTCALL:
regs.spill(build, index);
error |= !emitBuiltin(build, function, regs, uintOp(inst.a), vmRegOp(inst.b), vmRegOp(inst.c), intOp(inst.d));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
break;
case IrCmd::INVOKE_FASTCALL:
{
// We might need a temporary and we have to preserve it over the spill
RegisterA64 temp = regs.allocTemp(KindA64::q);
regs.spill(build, index, {temp});
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
build.mov(w3, intOp(inst.g)); // nresults
// 'E' argument can only be produced by LOP_FASTCALL3 lowering
if (inst.e.kind != IrOpKind::Undef)
{
CODEGEN_ASSERT(intOp(inst.f) == 3);
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
build.ldr(temp, mem(rBase, vmRegOp(inst.d) * sizeof(TValue)));
build.str(temp, mem(x4, 0));
build.ldr(temp, mem(rBase, vmRegOp(inst.e) * sizeof(TValue)));
build.str(temp, mem(x4, sizeof(TValue)));
}
else
{
if (inst.d.kind == IrOpKind::VmReg)
build.add(x4, rBase, uint16_t(vmRegOp(inst.d) * sizeof(TValue)));
else if (inst.d.kind == IrOpKind::VmConst)
emitAddOffset(build, x4, rConstants, vmConstOp(inst.d) * sizeof(TValue));
else
CODEGEN_ASSERT(inst.d.kind == IrOpKind::Undef);
}
// nparams
if (intOp(inst.f) == LUA_MULTRET)
{
// L->top - (ra + 1)
build.ldr(x5, mem(rState, offsetof(lua_State, top)));
build.sub(x5, x5, rBase);
build.sub(x5, x5, uint16_t((vmRegOp(inst.b) + 1) * sizeof(TValue)));
build.lsr(x5, x5, kTValueSizeLog2);
}
else
build.mov(w5, intOp(inst.f));
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(inst.a) * sizeof(luau_FastFunction)));
build.blr(x6);
inst.regA64 = regs.takeReg(w0, index);
break;
}
case IrCmd::CHECK_FASTCALL_RES:
build.cmp(regOp(inst.a), 0);
build.b(ConditionA64::Less, labelOp(inst.b));
break;
case IrCmd::DO_ARITH:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
Add SUBRK and DIVRK bytecode instructions to bytecode v5 (#1115) Right now, we can compile R\*K for all arithmetic instructions, but K\*R gets compiled into two instructions (LOADN/LOADK + arithmetic opcode). This is problematic since it leads to reduced performance for some code. However, we'd like to avoid adding reverse variants of ADDK et al for all opcodes to avoid the increase in I$ footprint for interpreter. Looking at the arithmetic instructions, % and // don't have interesting use cases for K\*V; ^ is sometimes used with constant on the left hand side but this would need to call pow() by necessity in all cases so it would be slow regardless of the dispatch overhead. This leaves the four basic arithmetic operations. For + and \*, we can implement a compiler-side optimization in the future that transforms K\*R to R\*K automatically. This could either be done unconditionally at -O2, or conditionally based on the type of the value (driven by type annotations / inference) -- this technically changes behavior in presence of metamethods, although it might be sensible to just always do this because non-commutative +/* are evil. However, for - and / it is impossible for the compiler to optimize this in the future, so we need dedicated opcodes. This only increases the interpreter size by ~300 bytes (~1.5%) on X64. This makes spectral-norm and math-partial-sums 6% faster; maybe more importantly, voxelgen gets 1.5% faster (so this change does have real-world impact). To avoid the proliferation of bytecode versions this change piggybacks on the bytecode version bump that was just made in 604 for vector constants; we would still be able to enable these independently but we'll consider v5 complete when both are enabled. Related: #626 --------- Co-authored-by: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com>
2023-11-28 15:35:01 +00:00
if (inst.b.kind == IrOpKind::VmConst)
emitAddOffset(build, x2, rConstants, vmConstOp(inst.b) * sizeof(TValue));
else
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (inst.c.kind == IrOpKind::VmConst)
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
emitAddOffset(build, x3, rConstants, vmConstOp(inst.c) * sizeof(TValue));
else
build.add(x3, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
Sync to upstream/release/632 (#1307) # What's Changed? - Fix #1137 by appropriately retaining additional metadata from definition files throughout the type system. - Improve Frontend for LSPs by appropriately allowing the cancellation of typechecking while running its destructor. ## New Solver - Added support for the `rawget` type function. - Reduced overall static memory usage of builtin type functions. - Fixed a crash where visitors could mutate a union or intersection type and fail to invalidate iteration over them in doing so. - Revised autocomplete functionality to not rely on a separate run of the type solver when using the new solver. - Implemented a more relaxed semantic rule for casting. - Fixed some smaller crashes in the new solver. ## Native Code Generation - Add additional codegen specialization for `math.sign` - Cleaned up a large number of outstanding fflags in the code. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: James McNellis <jmcnellis@roblox.com> Co-authored-by: Jeremy Yoo <jyoo@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-06-29 01:34:49 +01:00
switch (TMS(intOp(inst.d)))
Sync to upstream/release/627 (#1266) ### What's new? * Removed new `table.move` optimization because of correctness problems. ### New Type Solver * Improved error messages for type families to describe what's wrong in more detail, and ideally without using the term `type family` at all. * Change `boolean` and `string` singletons in type checking to report errors to the user when they've gotten an impossible type (indicating a type error from their context). * Split debugging flags for type family reduction (`DebugLuauLogTypeFamilies`) from general solver logging (`DebugLuauLogSolver`). * Improve type simplification to support patterns like `(number | string) | (string | number)` becoming `number | string`. ### Native Code Generation * Use templated `luaV_doarith` to speedup vector operation fallbacks. * Various small changes to better support arm64 on Windows. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: James McNellis <jmcnellis@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-05-26 18:09:09 +01:00
{
Sync to upstream/release/632 (#1307) # What's Changed? - Fix #1137 by appropriately retaining additional metadata from definition files throughout the type system. - Improve Frontend for LSPs by appropriately allowing the cancellation of typechecking while running its destructor. ## New Solver - Added support for the `rawget` type function. - Reduced overall static memory usage of builtin type functions. - Fixed a crash where visitors could mutate a union or intersection type and fail to invalidate iteration over them in doing so. - Revised autocomplete functionality to not rely on a separate run of the type solver when using the new solver. - Implemented a more relaxed semantic rule for casting. - Fixed some smaller crashes in the new solver. ## Native Code Generation - Add additional codegen specialization for `math.sign` - Cleaned up a large number of outstanding fflags in the code. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: James McNellis <jmcnellis@roblox.com> Co-authored-by: Jeremy Yoo <jyoo@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-06-29 01:34:49 +01:00
case TM_ADD:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithadd)));
break;
case TM_SUB:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithsub)));
break;
case TM_MUL:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmul)));
break;
case TM_DIV:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithdiv)));
break;
case TM_IDIV:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithidiv)));
break;
case TM_MOD:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmod)));
break;
case TM_POW:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithpow)));
break;
case TM_UNM:
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithunm)));
break;
default:
CODEGEN_ASSERT(!"Invalid doarith helper operation tag");
break;
Sync to upstream/release/627 (#1266) ### What's new? * Removed new `table.move` optimization because of correctness problems. ### New Type Solver * Improved error messages for type families to describe what's wrong in more detail, and ideally without using the term `type family` at all. * Change `boolean` and `string` singletons in type checking to report errors to the user when they've gotten an impossible type (indicating a type error from their context). * Split debugging flags for type family reduction (`DebugLuauLogTypeFamilies`) from general solver logging (`DebugLuauLogSolver`). * Improve type simplification to support patterns like `(number | string) | (string | number)` becoming `number | string`. ### Native Code Generation * Use templated `luaV_doarith` to speedup vector operation fallbacks. * Various small changes to better support arm64 on Windows. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: James McNellis <jmcnellis@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-05-26 18:09:09 +01:00
}
Sync to upstream/release/632 (#1307) # What's Changed? - Fix #1137 by appropriately retaining additional metadata from definition files throughout the type system. - Improve Frontend for LSPs by appropriately allowing the cancellation of typechecking while running its destructor. ## New Solver - Added support for the `rawget` type function. - Reduced overall static memory usage of builtin type functions. - Fixed a crash where visitors could mutate a union or intersection type and fail to invalidate iteration over them in doing so. - Revised autocomplete functionality to not rely on a separate run of the type solver when using the new solver. - Implemented a more relaxed semantic rule for casting. - Fixed some smaller crashes in the new solver. ## Native Code Generation - Add additional codegen specialization for `math.sign` - Cleaned up a large number of outstanding fflags in the code. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: James McNellis <jmcnellis@roblox.com> Co-authored-by: Jeremy Yoo <jyoo@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-06-29 01:34:49 +01:00
build.blr(x4);
emitUpdateBase(build);
break;
case IrCmd::DO_LEN:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.add(x2, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_dolen)));
build.blr(x3);
emitUpdateBase(build);
break;
case IrCmd::GET_TABLE:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (inst.c.kind == IrOpKind::VmReg)
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
else if (inst.c.kind == IrOpKind::Constant)
{
TValue n = {};
setnvalue(&n, uintOp(inst.c));
build.adr(x2, &n, sizeof(n));
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_gettable)));
build.blr(x4);
emitUpdateBase(build);
break;
case IrCmd::SET_TABLE:
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
if (inst.c.kind == IrOpKind::VmReg)
build.add(x2, rBase, uint16_t(vmRegOp(inst.c) * sizeof(TValue)));
else if (inst.c.kind == IrOpKind::Constant)
{
TValue n = {};
setnvalue(&n, uintOp(inst.c));
build.adr(x2, &n, sizeof(n));
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_settable)));
build.blr(x4);
emitUpdateBase(build);
break;
case IrCmd::GET_IMPORT:
regs.spill(build, index);
// luaV_getimport(L, cl->env, k, ra, aux, /* propagatenil= */ false)
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.mov(x0, rState);
build.ldr(x1, mem(rClosure, offsetof(Closure, env)));
build.mov(x2, rConstants);
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.mov(w4, uintOp(inst.b));
build.mov(w5, 0);
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luaV_getimport)));
build.blr(x6);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
emitUpdateBase(build);
break;
case IrCmd::CONCAT:
regs.spill(build, index);
build.mov(x0, rState);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.mov(w1, uintOp(inst.b));
build.mov(w2, vmRegOp(inst.a) + uintOp(inst.b) - 1);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_concat)));
build.blr(x3);
emitUpdateBase(build);
break;
case IrCmd::GET_UPVALUE:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::q);
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.b)));
// uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
Label skip;
build.ldr(temp3, mem(temp1, offsetof(TValue, tt)));
build.cmp(temp3, LUA_TUPVAL);
build.b(ConditionA64::NotEqual, skip);
// UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally)
build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc)));
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
build.setLabel(skip);
build.ldr(temp2, temp1);
build.str(temp2, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
break;
}
case IrCmd::SET_UPVALUE:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
RegisterA64 temp3 = regs.allocTemp(KindA64::q);
// UpVal*
build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(inst.a) + offsetof(TValue, value.gc)));
build.ldr(temp2, mem(temp1, offsetof(UpVal, v)));
build.ldr(temp3, mem(rBase, vmRegOp(inst.b) * sizeof(TValue)));
build.str(temp3, temp2);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
if (inst.c.kind == IrOpKind::Undef || isGCO(tagOp(inst.c)))
{
Label skip;
checkObjectBarrierConditions(build, temp1, temp2, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
size_t spills = regs.spill(build, index, {temp1});
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.mov(x1, temp1);
build.mov(x0, rState);
build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
build.blr(x3);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
}
break;
}
case IrCmd::CHECK_TAG:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& fail = getTargetLabel(inst.c, fresh);
Sync to upstream/release/588 (#992) Type checker/autocomplete: * `Luau::autocomplete` no longer performs typechecking internally, make sure to run `Frontend::check` before performing autocomplete requests * Autocomplete string suggestions without "" are now only suggested inside the "" * Autocomplete suggestions now include `function (anonymous autofilled)` key with a full suggestion for the function expression (with arguments included) stored in `AutocompleteEntry::insertText` * `AutocompleteEntry::indexedWithSelf` is provided for function call suggestions made with `:` * Cyclic modules now see each other type exports as `any` to prevent memory use-after-free (similar to module return type) Runtime: * Updated inline/loop unroll cost model to better handle assignments (Fixes https://github.com/Roblox/luau/issues/978) * `math.noise` speed was improved by ~30% * `table.concat` speed was improved by ~5-7% * `tonumber` and `tostring` now have fastcall paths that execute ~1.5x and ~2.5x faster respectively (fixes #777) * Fixed crash in `luaL_typename` when index refers to a non-existing value * Fixed potential out of memory scenario when using `string.sub` or `string.char` in a loop * Fixed behavior of some fastcall builtins when called without arguments under -O2 to match original functions * Support for native code execution in VM is now enabled by default (note: native code still has to be generated explicitly) * `Codegen::compile` now accepts `CodeGen_OnlyNativeModules` flag. When set, only modules that have a `--!native` hot-comment at the top will be compiled to native code In our new typechecker: * Generic type packs are no longer considered to be variadic during unification * Timeout and cancellation now works in new solver * Fixed false positive errors around 'table' and 'function' type refinements * Table literals now use covariant unification rules. This is sound since literal has no type specified and has no aliases * Fixed issues with blocked types escaping the constraint solver * Fixed more places where error messages that should've been suppressed were still reported * Fixed errors when iterating over a top table type In our native code generation (jit): * 'DebugLuauAbortingChecks' flag is now supported on A64 * LOP_NEWCLOSURE has been translated to IR
2023-07-28 16:13:53 +01:00
if (tagOp(inst.b) == 0)
{
build.cbnz(regOp(inst.a), fail);
}
else
{
build.cmp(regOp(inst.a), tagOp(inst.b));
build.b(ConditionA64::NotEqual, fail);
}
finalizeTargetLabel(inst.c, fresh);
break;
}
case IrCmd::CHECK_TRUTHY:
{
// Constant tags which don't require boolean value check should've been removed in constant folding
CODEGEN_ASSERT(inst.a.kind != IrOpKind::Constant || tagOp(inst.a) == LUA_TBOOLEAN);
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& target = getTargetLabel(inst.c, fresh);
Label skip;
if (inst.a.kind != IrOpKind::Constant)
{
// fail to fallback on 'nil' (falsy)
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(regOp(inst.a), target);
// skip value test if it's not a boolean (truthy)
build.cmp(regOp(inst.a), LUA_TBOOLEAN);
build.b(ConditionA64::NotEqual, skip);
}
// fail to fallback on 'false' boolean value (falsy)
Sync to upstream/release/623 (#1236) # What's changed? ### New Type Solver - Unification of two fresh types no longer binds them together. - Replaced uses of raw `emplace` with `emplaceType` to catch cyclic bound types when they are created. - `SetIndexerConstraint` is blocked until the indexer result type is not blocked. - Fix a case where a blocked type got past the constraint solver. - Searching for free types should no longer traverse into `ClassType`s. - Fix a corner case that could result in the non-testable type `~{}`. - Fix incorrect flagging when `any` was a parameter of some checked function in nonstrict type checker. - `IterableConstraint` now consider tables without `__iter` to be iterables. ### Native Code Generation - Improve register type info lookup by program counter. - Generate type information for locals and upvalues --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: James McNellis <jmcnellis@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-04-25 23:26:09 +01:00
if (inst.b.kind != IrOpKind::Constant)
{
build.cbz(regOp(inst.b), target);
}
else
{
if (intOp(inst.b) == 0)
build.b(target);
}
if (inst.a.kind != IrOpKind::Constant)
build.setLabel(skip);
finalizeTargetLabel(inst.c, fresh);
break;
}
case IrCmd::CHECK_READONLY:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldrb(temp, mem(regOp(inst.a), offsetof(Table, readonly)));
build.cbnz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
break;
}
case IrCmd::CHECK_NO_METATABLE:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.ldr(temp, mem(regOp(inst.a), offsetof(Table, metatable)));
build.cbnz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
break;
}
case IrCmd::CHECK_SAFE_ENV:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::x);
RegisterA64 tempw = castReg(KindA64::w, temp);
build.ldr(temp, mem(rClosure, offsetof(Closure, env)));
build.ldrb(tempw, mem(temp, offsetof(Table, safeenv)));
build.cbz(tempw, getTargetLabel(inst.a, fresh));
finalizeTargetLabel(inst.a, fresh);
break;
}
case IrCmd::CHECK_ARRAY_SIZE:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& fail = getTargetLabel(inst.c, fresh);
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(Table, sizearray)));
if (inst.b.kind == IrOpKind::Inst)
{
build.cmp(temp, regOp(inst.b));
build.b(ConditionA64::UnsignedLessEqual, fail);
}
else if (inst.b.kind == IrOpKind::Constant)
{
if (intOp(inst.b) == 0)
{
build.cbz(temp, fail);
}
else if (size_t(intOp(inst.b)) <= AssemblyBuilderA64::kMaxImmediate)
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
{
build.cmp(temp, uint16_t(intOp(inst.b)));
build.b(ConditionA64::UnsignedLessEqual, fail);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
}
else
{
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.mov(temp2, intOp(inst.b));
build.cmp(temp, temp2);
build.b(ConditionA64::UnsignedLessEqual, fail);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
}
}
else
CODEGEN_ASSERT(!"Unsupported instruction form");
finalizeTargetLabel(inst.c, fresh);
break;
}
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
case IrCmd::JUMP_SLOT_MATCH:
case IrCmd::CHECK_SLOT_MATCH:
{
Label abort; // used when guard aborts execution
const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? inst.d : inst.c;
Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp1w = castReg(KindA64::w, temp1);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
CODEGEN_ASSERT(offsetof(LuaNode, key.value) == offsetof(LuaNode, key) && kOffsetOfTKeyTagNext >= 8 && kOffsetOfTKeyTagNext < 16);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
build.ldp(temp1, temp2, mem(regOp(inst.a), offsetof(LuaNode, key))); // load key.value into temp1 and key.tt (alongside other bits) into temp2
build.ubfx(temp2, temp2, (kOffsetOfTKeyTagNext - 8) * 8, kTKeyTagBits); // .tt is right before .next, and 8 bytes are skipped by ldp
build.cmp(temp2, LUA_TSTRING);
build.b(ConditionA64::NotEqual, mismatch);
AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
build.ldr(temp2, addr);
build.cmp(temp1, temp2);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
build.b(ConditionA64::NotEqual, mismatch);
build.ldr(temp1w, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
CODEGEN_ASSERT(LUA_TNIL == 0);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
build.cbz(temp1w, mismatch);
if (inst.cmd == IrCmd::JUMP_SLOT_MATCH)
jumpOrFallthrough(blockOp(inst.c), next);
else if (abort.id)
emitAbort(build, abort);
break;
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case IrCmd::CHECK_NODE_NO_NEXT:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
RegisterA64 temp = regs.allocTemp(KindA64::w);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, key) + kOffsetOfTKeyTagNext));
build.lsr(temp, temp, kTKeyTagBits);
build.cbnz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
break;
}
case IrCmd::CHECK_NODE_VALUE:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(LuaNode, val.tt)));
CODEGEN_ASSERT(LUA_TNIL == 0);
build.cbz(temp, getTargetLabel(inst.b, fresh));
finalizeTargetLabel(inst.b, fresh);
break;
}
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
case IrCmd::CHECK_BUFFER_LEN:
{
int accessSize = intOp(inst.c);
CODEGEN_ASSERT(accessSize > 0 && accessSize <= int(AssemblyBuilderA64::kMaxImmediate));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& target = getTargetLabel(inst.d, fresh);
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldr(temp, mem(regOp(inst.a), offsetof(Buffer, len)));
if (inst.b.kind == IrOpKind::Inst)
{
if (accessSize == 1)
{
// fails if offset >= len
build.cmp(temp, regOp(inst.b));
build.b(ConditionA64::UnsignedLessEqual, target);
}
else
{
Sync to upstream/release/611 (#1160) # What's changed? ### Native Code Generation * Fixed an UAF relating to reusing a hash key after a weak table has undergone some GC. * Fixed a bounds check on arm64 to allow access to the last byte of a buffer. ### New Type Solver * Type states now preserves error-suppression, i.e. `local x: any = 5` and `x.foo` does not error. * Made error-suppression logic in subtyping more accurate. * Subtyping now knows how to reduce type families. * Fixed function call overload resolution so that the return type resolves to the correct overload. * Fixed a case where we attempted to reduce irreducible type families a few too many times, leading to duplicate errors. * Type checker needs to type check annotations in function signatures to be able to report errors relating to those annotations. * Fixed an UAF from a pointer to stack-allocated data in Subtyping's `explainReasonings`. ### Nonstrict Type Checker * Fixed a crash when calling a checked function of the form `math.abs` with an incorrect argument type. * Fixed a crash when calling a checked function with a number of arguments that did not exactly match the number of parameters required. --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-02-02 21:32:42 +00:00
// fails if offset + size > len; we compute it as len - offset < size
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
RegisterA64 tempx = castReg(KindA64::x, temp);
build.sub(tempx, tempx, regOp(inst.b)); // implicit uxtw
build.cmp(tempx, uint16_t(accessSize));
Sync to upstream/release/615 (#1175) # What's changed? * Luau allocation scheme was changed to handle allocations in 513-1024 byte range internally without falling back to global allocator * coroutine/thread creation no longer requires any global allocations, making it up to 15% faster (vs libc malloc) * table construction for 17-32 keys or 33-64 array elements is up to 30% faster (vs libc malloc) ### New Type Solver * Cyclic unary negation type families are reduced to `number` when possible * Class types are skipped when searching for free types in unifier to improve performance * Fixed issues with table type inference when metatables are present * Improved inference of iteration loop types * Fixed an issue with bidirectional inference of method calls * Type simplification will now preserve error suppression markers ### Native Code Generation * Fixed TAG_VECTOR skip optimization to not break instruction use counts (broken optimization wasn't included in 614) * Fixed missing side-effect when optimizing generic loop preparation instruction --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com>
2024-03-01 18:45:26 +00:00
build.b(ConditionA64::Less, target); // note: this is a signed 64-bit comparison so that out of bounds offset fails
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
}
}
else if (inst.b.kind == IrOpKind::Constant)
{
int offset = intOp(inst.b);
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
{
build.b(target);
}
else if (offset + accessSize <= int(AssemblyBuilderA64::kMaxImmediate))
{
build.cmp(temp, uint16_t(offset + accessSize));
build.b(ConditionA64::UnsignedLessEqual, target);
}
else
{
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
build.mov(temp2, offset + accessSize);
build.cmp(temp, temp2);
build.b(ConditionA64::UnsignedLessEqual, target);
}
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
}
finalizeTargetLabel(inst.d, fresh);
break;
}
case IrCmd::CHECK_USERDATA_TAG:
{
Label fresh; // used when guard aborts execution or jumps to a VM exit
Label& fail = getTargetLabel(inst.c, fresh);
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.ldrb(temp, mem(regOp(inst.a), offsetof(Udata, tag)));
build.cmp(temp, intOp(inst.b));
build.b(ConditionA64::NotEqual, fail);
finalizeTargetLabel(inst.c, fresh);
break;
}
case IrCmd::INTERRUPT:
{
regs.spill(build, index);
Label self;
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.ldr(x0, mem(rGlobalState, offsetof(global_State, cb.interrupt)));
build.cbnz(x0, self);
Label next = build.setLabel();
interruptHandlers.push_back({self, uintOp(inst.a), next});
break;
}
case IrCmd::CHECK_GC:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
CODEGEN_ASSERT(offsetof(global_State, totalbytes) == offsetof(global_State, GCthreshold) + 8);
Label skip;
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.ldp(temp1, temp2, mem(rGlobalState, offsetof(global_State, GCthreshold)));
build.cmp(temp1, temp2);
build.b(ConditionA64::UnsignedGreater, skip);
size_t spills = regs.spill(build, index);
build.mov(x0, rState);
build.mov(w1, 1);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaC_step)));
build.blr(x2);
emitUpdateBase(build);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
build.setLabel(skip);
break;
}
case IrCmd::BARRIER_OBJ:
{
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
RegisterA64 temp = regs.allocTemp(KindA64::x);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
Label skip;
checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
size_t spills = regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.ldr(x2, mem(rBase, vmRegOp(inst.b) * sizeof(TValue) + offsetof(TValue, value)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
build.blr(x3);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
break;
}
case IrCmd::BARRIER_TABLE_BACK:
{
Label skip;
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
RegisterA64 temp = regs.allocTemp(KindA64::w);
// isblack(obj2gco(t))
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.ldrb(temp, mem(regOp(inst.a), offsetof(GCheader, marked)));
build.tbz(temp, BLACKBIT, skip);
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
size_t spills = regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.add(x2, x1, uint16_t(offsetof(Table, gclist)));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierback)));
build.blr(x3);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
break;
}
case IrCmd::BARRIER_TABLE_FORWARD:
{
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
RegisterA64 temp = regs.allocTemp(KindA64::x);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
Label skip;
checkObjectBarrierConditions(build, regOp(inst.a), temp, inst.b, inst.c.kind == IrOpKind::Undef ? -1 : tagOp(inst.c), skip);
RegisterA64 reg = regOp(inst.a); // note: we need to call regOp before spill so that we don't do redundant reloads
AddressA64 addr = tempAddr(inst.b, offsetof(TValue, value));
size_t spills = regs.spill(build, index, {reg});
build.mov(x1, reg);
build.mov(x0, rState);
build.ldr(x2, addr);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barriertable)));
build.blr(x3);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
build.setLabel(skip);
break;
}
case IrCmd::SET_SAVEDPC:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
emitAddOffset(build, temp1, rCode, uintOp(inst.a) * sizeof(Instruction));
build.ldr(temp2, mem(rState, offsetof(lua_State, ci)));
build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc)));
break;
}
case IrCmd::CLOSE_UPVALS:
{
Label skip;
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
// L->openupval != 0
build.ldr(temp1, mem(rState, offsetof(lua_State, openupval)));
build.cbz(temp1, skip);
// ra <= L->openuval->v
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
build.add(temp2, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.cmp(temp2, temp1);
build.b(ConditionA64::UnsignedGreater, skip);
size_t spills = regs.spill(build, index, {temp2});
build.mov(x1, temp2);
build.mov(x0, rState);
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_close)));
build.blr(x2);
regs.restore(build, spills); // need to restore before skip so that registers are in a consistent state
build.setLabel(skip);
break;
}
case IrCmd::CAPTURE:
// no-op
break;
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case IrCmd::SETLIST:
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeSETLIST), uintOp(inst.a));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
break;
case IrCmd::CALL:
regs.spill(build, index);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
// argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams;
if (intOp(inst.b) == LUA_MULTRET)
build.ldr(x2, mem(rState, offsetof(lua_State, top)));
else
build.add(x2, rBase, uint16_t((vmRegOp(inst.a) + 1 + intOp(inst.b)) * sizeof(TValue)));
// callFallback(L, ra, argtop, nresults)
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.mov(w3, intOp(inst.c));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback)));
build.blr(x4);
emitUpdateBase(build);
// reentry with x0=closure (NULL implies C function; CALL_FALLBACK_YIELD will trigger exit)
build.cbnz(x0, helpers.continueCall);
break;
case IrCmd::RETURN:
regs.spill(build, index);
if (function.variadic)
{
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
build.ldr(x1, mem(x1, offsetof(CallInfo, func)));
}
else if (intOp(inst.b) != 1)
build.sub(x1, rBase, sizeof(TValue)); // invariant: ci->func + 1 == ci->base for non-variadic frames
if (intOp(inst.b) == 0)
{
build.mov(w2, 0);
build.b(helpers.return_);
}
else if (intOp(inst.b) == 1 && !function.variadic)
{
// fast path: minimizes x1 adjustments
// note that we skipped x1 computation for this specific case above
build.ldr(q0, mem(rBase, vmRegOp(inst.a) * sizeof(TValue)));
build.str(q0, mem(rBase, -int(sizeof(TValue))));
build.mov(x1, rBase);
build.mov(w2, 1);
build.b(helpers.return_);
}
else if (intOp(inst.b) >= 1 && intOp(inst.b) <= 3)
{
for (int r = 0; r < intOp(inst.b); ++r)
{
build.ldr(q0, mem(rBase, (vmRegOp(inst.a) + r) * sizeof(TValue)));
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
}
build.mov(w2, intOp(inst.b));
build.b(helpers.return_);
}
else
{
build.mov(w2, 0);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
// vali = ra
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
// valend = (n == LUA_MULTRET) ? L->top : ra + n
if (intOp(inst.b) == LUA_MULTRET)
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
else
build.add(x4, rBase, uint16_t((vmRegOp(inst.a) + intOp(inst.b)) * sizeof(TValue)));
Label repeatValueLoop, exitValueLoop;
if (intOp(inst.b) == LUA_MULTRET)
{
build.cmp(x3, x4);
build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual
}
build.setLabel(repeatValueLoop);
build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post));
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
build.add(w2, w2, 1);
build.cmp(x3, x4);
build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess
build.setLabel(exitValueLoop);
build.b(helpers.return_);
}
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
break;
case IrCmd::FORGLOOP:
// register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables
regs.spill(build, index);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
// clear extra variables since we might have more than two
if (intOp(inst.b) > 2)
{
CODEGEN_ASSERT(LUA_TNIL == 0);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
for (int i = 2; i < intOp(inst.b); ++i)
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.str(wzr, mem(rBase, (vmRegOp(inst.a) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt)));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
}
// we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here
build.mov(x0, rState);
build.ldr(x1, mem(rBase, (vmRegOp(inst.a) + 1) * sizeof(TValue) + offsetof(TValue, value.gc)));
build.ldr(w2, mem(rBase, (vmRegOp(inst.a) + 2) * sizeof(TValue) + offsetof(TValue, value.p)));
build.add(x3, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter)));
build.blr(x4);
// note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack
build.cbnz(w0, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGLOOP_FALLBACK:
regs.spill(build, index);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.mov(x0, rState);
build.mov(w1, vmRegOp(inst.a));
build.mov(w2, intOp(inst.b));
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback)));
build.blr(x3);
emitUpdateBase(build);
build.cbnz(w0, labelOp(inst.c));
jumpOrFallthrough(blockOp(inst.d), next);
break;
case IrCmd::FORGPREP_XNEXT_FALLBACK:
regs.spill(build, index);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.b) * sizeof(TValue)));
build.mov(w2, uintOp(inst.a) + 1);
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback)));
build.blr(x3);
// note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack
jumpOrFallthrough(blockOp(inst.c), next);
break;
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case IrCmd::COVERAGE:
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
build.mov(temp1, uintOp(inst.a) * sizeof(Instruction));
build.ldr(temp2, mem(rCode, temp1));
// increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1
// note: cmp can be eliminated with adds but we aren't concerned with code size for coverage
build.add(temp3, temp2, 256);
build.cmp(temp3, 0);
build.csel(temp2, temp2, temp3, ConditionA64::Less);
build.str(temp2, mem(rCode, temp1));
break;
}
// Full instruction fallbacks
case IrCmd::FALLBACK_GETGLOBAL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeGETGLOBAL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETGLOBAL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeSETGLOBAL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETTABLEKS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeGETTABLEKS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_SETTABLEKS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeSETTABLEKS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_NAMECALL:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.d.kind == IrOpKind::VmConst);
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeNAMECALL), uintOp(inst.a));
break;
case IrCmd::FALLBACK_PREPVARARGS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::Constant);
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executePREPVARARGS), uintOp(inst.a));
break;
case IrCmd::FALLBACK_GETVARARGS:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::Constant);
regs.spill(build, index);
build.mov(x0, rState);
if (intOp(inst.c) == LUA_MULTRET)
{
emitAddOffset(build, x1, rCode, uintOp(inst.a) * sizeof(Instruction));
build.mov(x2, rBase);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.mov(w3, vmRegOp(inst.b));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSMultRet)));
build.blr(x4);
emitUpdateBase(build);
}
else
{
build.mov(x1, rBase);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.mov(w2, vmRegOp(inst.b));
build.mov(w3, intOp(inst.c));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSConst)));
build.blr(x4);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
// note: no emitUpdateBase necessary because executeGETVARARGSConst does not reallocate stack
}
break;
Sync to upstream/release/588 (#992) Type checker/autocomplete: * `Luau::autocomplete` no longer performs typechecking internally, make sure to run `Frontend::check` before performing autocomplete requests * Autocomplete string suggestions without "" are now only suggested inside the "" * Autocomplete suggestions now include `function (anonymous autofilled)` key with a full suggestion for the function expression (with arguments included) stored in `AutocompleteEntry::insertText` * `AutocompleteEntry::indexedWithSelf` is provided for function call suggestions made with `:` * Cyclic modules now see each other type exports as `any` to prevent memory use-after-free (similar to module return type) Runtime: * Updated inline/loop unroll cost model to better handle assignments (Fixes https://github.com/Roblox/luau/issues/978) * `math.noise` speed was improved by ~30% * `table.concat` speed was improved by ~5-7% * `tonumber` and `tostring` now have fastcall paths that execute ~1.5x and ~2.5x faster respectively (fixes #777) * Fixed crash in `luaL_typename` when index refers to a non-existing value * Fixed potential out of memory scenario when using `string.sub` or `string.char` in a loop * Fixed behavior of some fastcall builtins when called without arguments under -O2 to match original functions * Support for native code execution in VM is now enabled by default (note: native code still has to be generated explicitly) * `Codegen::compile` now accepts `CodeGen_OnlyNativeModules` flag. When set, only modules that have a `--!native` hot-comment at the top will be compiled to native code In our new typechecker: * Generic type packs are no longer considered to be variadic during unification * Timeout and cancellation now works in new solver * Fixed false positive errors around 'table' and 'function' type refinements * Table literals now use covariant unification rules. This is sound since literal has no type specified and has no aliases * Fixed issues with blocked types escaping the constraint solver * Fixed more places where error messages that should've been suppressed were still reported * Fixed errors when iterating over a top table type In our native code generation (jit): * 'DebugLuauAbortingChecks' flag is now supported on A64 * LOP_NEWCLOSURE has been translated to IR
2023-07-28 16:13:53 +01:00
case IrCmd::NEWCLOSURE:
{
RegisterA64 reg = regOp(inst.b); // note: we need to call regOp before spill so that we don't do redundant reloads
Sync to upstream/release/588 (#992) Type checker/autocomplete: * `Luau::autocomplete` no longer performs typechecking internally, make sure to run `Frontend::check` before performing autocomplete requests * Autocomplete string suggestions without "" are now only suggested inside the "" * Autocomplete suggestions now include `function (anonymous autofilled)` key with a full suggestion for the function expression (with arguments included) stored in `AutocompleteEntry::insertText` * `AutocompleteEntry::indexedWithSelf` is provided for function call suggestions made with `:` * Cyclic modules now see each other type exports as `any` to prevent memory use-after-free (similar to module return type) Runtime: * Updated inline/loop unroll cost model to better handle assignments (Fixes https://github.com/Roblox/luau/issues/978) * `math.noise` speed was improved by ~30% * `table.concat` speed was improved by ~5-7% * `tonumber` and `tostring` now have fastcall paths that execute ~1.5x and ~2.5x faster respectively (fixes #777) * Fixed crash in `luaL_typename` when index refers to a non-existing value * Fixed potential out of memory scenario when using `string.sub` or `string.char` in a loop * Fixed behavior of some fastcall builtins when called without arguments under -O2 to match original functions * Support for native code execution in VM is now enabled by default (note: native code still has to be generated explicitly) * `Codegen::compile` now accepts `CodeGen_OnlyNativeModules` flag. When set, only modules that have a `--!native` hot-comment at the top will be compiled to native code In our new typechecker: * Generic type packs are no longer considered to be variadic during unification * Timeout and cancellation now works in new solver * Fixed false positive errors around 'table' and 'function' type refinements * Table literals now use covariant unification rules. This is sound since literal has no type specified and has no aliases * Fixed issues with blocked types escaping the constraint solver * Fixed more places where error messages that should've been suppressed were still reported * Fixed errors when iterating over a top table type In our native code generation (jit): * 'DebugLuauAbortingChecks' flag is now supported on A64 * LOP_NEWCLOSURE has been translated to IR
2023-07-28 16:13:53 +01:00
regs.spill(build, index, {reg});
build.mov(x2, reg);
build.mov(x0, rState);
build.mov(w1, uintOp(inst.a));
build.ldr(x3, mem(rClosure, offsetof(Closure, l.p)));
build.ldr(x3, mem(x3, offsetof(Proto, p)));
build.ldr(x3, mem(x3, sizeof(Proto*) * uintOp(inst.c)));
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaF_newLclosure)));
build.blr(x4);
inst.regA64 = regs.takeReg(x0, index);
break;
Sync to upstream/release/588 (#992) Type checker/autocomplete: * `Luau::autocomplete` no longer performs typechecking internally, make sure to run `Frontend::check` before performing autocomplete requests * Autocomplete string suggestions without "" are now only suggested inside the "" * Autocomplete suggestions now include `function (anonymous autofilled)` key with a full suggestion for the function expression (with arguments included) stored in `AutocompleteEntry::insertText` * `AutocompleteEntry::indexedWithSelf` is provided for function call suggestions made with `:` * Cyclic modules now see each other type exports as `any` to prevent memory use-after-free (similar to module return type) Runtime: * Updated inline/loop unroll cost model to better handle assignments (Fixes https://github.com/Roblox/luau/issues/978) * `math.noise` speed was improved by ~30% * `table.concat` speed was improved by ~5-7% * `tonumber` and `tostring` now have fastcall paths that execute ~1.5x and ~2.5x faster respectively (fixes #777) * Fixed crash in `luaL_typename` when index refers to a non-existing value * Fixed potential out of memory scenario when using `string.sub` or `string.char` in a loop * Fixed behavior of some fastcall builtins when called without arguments under -O2 to match original functions * Support for native code execution in VM is now enabled by default (note: native code still has to be generated explicitly) * `Codegen::compile` now accepts `CodeGen_OnlyNativeModules` flag. When set, only modules that have a `--!native` hot-comment at the top will be compiled to native code In our new typechecker: * Generic type packs are no longer considered to be variadic during unification * Timeout and cancellation now works in new solver * Fixed false positive errors around 'table' and 'function' type refinements * Table literals now use covariant unification rules. This is sound since literal has no type specified and has no aliases * Fixed issues with blocked types escaping the constraint solver * Fixed more places where error messages that should've been suppressed were still reported * Fixed errors when iterating over a top table type In our native code generation (jit): * 'DebugLuauAbortingChecks' flag is now supported on A64 * LOP_NEWCLOSURE has been translated to IR
2023-07-28 16:13:53 +01:00
}
case IrCmd::FALLBACK_DUPCLOSURE:
CODEGEN_ASSERT(inst.b.kind == IrOpKind::VmReg);
CODEGEN_ASSERT(inst.c.kind == IrOpKind::VmConst);
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(inst.a));
break;
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
case IrCmd::FALLBACK_FORGPREP:
regs.spill(build, index);
Sync to upstream/release/577 (#934) Lots of things going on this week: * Fix a crash that could occur in the presence of a cyclic union. We shouldn't be creating cyclic unions, but we shouldn't be crashing when they arise either. * Minor cleanup of `luau_precall` * Internal change to make L->top handling slightly more uniform * Optimize SETGLOBAL & GETGLOBAL fallback C functions. * https://github.com/Roblox/luau/pull/929 * The syntax to the `luau-reduce` commandline tool has changed. It now accepts a script, a command to execute, and an error to search for. It no longer automatically passes the script to the command which makes it a lot more flexible. Also be warned that it edits the script it is passed **in place**. Do not point it at something that is not in source control! New solver * Switch to a greedier but more fallible algorithm for simplifying union and intersection types that are created as part of refinement calculation. This has much better and more predictable performance. * Fix a constraint cycle in recursive function calls. * Much improved inference of binary addition. Functions like `function add(x, y) return x + y end` can now be inferred without annotations. We also accurately typecheck calls to functions like this. * Many small bugfixes surrounding things like table indexers * Add support for indexers on class types. This was previously added to the old solver; we now add it to the new one for feature parity. JIT * https://github.com/Roblox/luau/pull/931 * Fuse key.value and key.tt loads for CEHCK_SLOT_MATCH in A64 * Implement remaining aliases of BFM for A64 * Implement new callinfo flag for A64 * Add instruction simplification for int->num->int conversion chains * Don't even load execdata for X64 calls * Treat opcode fallbacks the same as manually written fallbacks --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-05-19 20:37:30 +01:00
emitFallback(build, offsetof(NativeContext, executeFORGPREP), uintOp(inst.a));
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
jumpOrFallthrough(blockOp(inst.c), next);
break;
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
// Pseudo instructions
case IrCmd::NOP:
case IrCmd::SUBSTITUTE:
CODEGEN_ASSERT(!"Pseudo instructions should not be lowered");
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
break;
case IrCmd::BITAND_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
build.and_(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.and_(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITXOR_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
build.eor(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.eor(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITOR_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant && AssemblyBuilderA64::isMaskSupported(unsigned(intOp(inst.b))))
build.orr(inst.regA64, regOp(inst.a), unsigned(intOp(inst.b)));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.orr(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITNOT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
Sync to upstream/release/576 (#928) * `ClassType` can now have an indexer defined on it. This allows custom types to be used in `t[x]` expressions. * Fixed search for closest executable breakpoint line. Previously, breakpoints might have been skipped in `else` blocks at the end of a function * Fixed how unification is performed for two optional types `a? <: b?`, previously it might have unified either 'a' or 'b' with 'nil'. Note that this fix is not enabled by default yet (see the list in `ExperimentalFlags.h`) In the new type solver, a concept of 'Type Families' has been introduced. Type families can be thought of as type aliases with custom type inference/reduction logic included with them. For example, we can have an `Add<T, U>` type family that will resolve the type that is the result of adding two values together. This will help type inference to figure out what 'T' and 'U' might be when explicit type annotations are not provided. In this update we don't define any type families, but they will be added in the near future. It is also possible for Luau embedders to define their own type families in the global/environment scope. Other changes include: * Fixed scope used to find out which generic types should be included in the function generic type list * Fixed a crash after cyclic bound types were created during unification And in native code generation (jit): * Use of arm64 target on M1 now requires macOS 13 * Entry into native code has been optimized. This is especially important for coroutine call/pcall performance as they involve going through a C call frame * LOP_LOADK(X) translation into IR has been improved to enable type tag/constant propagation * arm64 can use integer immediate values to synthesize floating-point values * x64 assembler removes duplicate 64bit numbers from the data section to save space * Linux `perf` can now be used to profile native Luau code (when running with --codegen-perf CLI argument)
2023-05-12 18:50:47 +01:00
build.mvn_(inst.regA64, temp);
break;
}
case IrCmd::BITLSHIFT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.lsl(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.lsl(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITRSHIFT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.lsr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.lsr(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITARSHIFT_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.asr(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.asr(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITLROTATE_UINT:
{
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
build.ror(inst.regA64, regOp(inst.a), uint8_t((32 - unsigned(intOp(inst.b))) & 31));
}
else
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b}); // can't reuse a because it would be clobbered by neg
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.neg(inst.regA64, temp2);
build.ror(inst.regA64, temp1, inst.regA64);
}
break;
}
case IrCmd::BITRROTATE_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});
if (inst.a.kind == IrOpKind::Inst && inst.b.kind == IrOpKind::Constant)
build.ror(inst.regA64, regOp(inst.a), uint8_t(unsigned(intOp(inst.b)) & 31));
else
{
RegisterA64 temp1 = tempUint(inst.a);
RegisterA64 temp2 = tempUint(inst.b);
build.ror(inst.regA64, temp1, temp2);
}
break;
}
case IrCmd::BITCOUNTLZ_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
build.clz(inst.regA64, temp);
break;
}
case IrCmd::BITCOUNTRZ_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
build.rbit(inst.regA64, temp);
build.clz(inst.regA64, inst.regA64);
break;
}
case IrCmd::BYTESWAP_UINT:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a});
RegisterA64 temp = tempUint(inst.a);
build.rev(inst.regA64, temp);
break;
}
case IrCmd::INVOKE_LIBM:
{
if (inst.c.kind != IrOpKind::None)
{
bool isInt = (inst.c.kind == IrOpKind::Constant) ? constOp(inst.c).kind == IrConstKind::Int
: getCmdValueKind(function.instOp(inst.c).cmd) == IrValueKind::Int;
RegisterA64 temp1 = tempDouble(inst.b);
RegisterA64 temp2 = isInt ? tempInt(inst.c) : tempDouble(inst.c);
RegisterA64 temp3 = isInt ? noreg : regs.allocTemp(KindA64::d); // note: spill() frees all registers so we need to avoid alloc after spill
regs.spill(build, index, {temp1, temp2});
if (isInt)
{
build.fmov(d0, temp1);
build.mov(w0, temp2);
}
else if (d0 != temp2)
{
build.fmov(d0, temp1);
build.fmov(d1, temp2);
}
else
{
build.fmov(temp3, d0);
build.fmov(d0, temp1);
build.fmov(d1, temp3);
}
}
else
{
RegisterA64 temp1 = tempDouble(inst.b);
regs.spill(build, index, {temp1});
build.fmov(d0, temp1);
}
build.ldr(x1, mem(rNativeContext, getNativeContextOffset(uintOp(inst.a))));
build.blr(x1);
inst.regA64 = regs.takeReg(d0, index);
break;
}
case IrCmd::GET_TYPE:
{
inst.regA64 = regs.allocReg(KindA64::x, index);
CODEGEN_ASSERT(sizeof(TString*) == 8);
if (inst.a.kind == IrOpKind::Inst)
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.add(inst.regA64, rGlobalState, regOp(inst.a), 3); // implicit uxtw
else if (inst.a.kind == IrOpKind::Constant)
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
build.add(inst.regA64, rGlobalState, uint16_t(tagOp(inst.a)) * 8);
else
CODEGEN_ASSERT(!"Unsupported instruction form");
build.ldr(inst.regA64, mem(inst.regA64, offsetof(global_State, ttname)));
break;
}
case IrCmd::GET_TYPEOF:
{
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr)));
build.blr(x2);
Sync to upstream/release/588 (#992) Type checker/autocomplete: * `Luau::autocomplete` no longer performs typechecking internally, make sure to run `Frontend::check` before performing autocomplete requests * Autocomplete string suggestions without "" are now only suggested inside the "" * Autocomplete suggestions now include `function (anonymous autofilled)` key with a full suggestion for the function expression (with arguments included) stored in `AutocompleteEntry::insertText` * `AutocompleteEntry::indexedWithSelf` is provided for function call suggestions made with `:` * Cyclic modules now see each other type exports as `any` to prevent memory use-after-free (similar to module return type) Runtime: * Updated inline/loop unroll cost model to better handle assignments (Fixes https://github.com/Roblox/luau/issues/978) * `math.noise` speed was improved by ~30% * `table.concat` speed was improved by ~5-7% * `tonumber` and `tostring` now have fastcall paths that execute ~1.5x and ~2.5x faster respectively (fixes #777) * Fixed crash in `luaL_typename` when index refers to a non-existing value * Fixed potential out of memory scenario when using `string.sub` or `string.char` in a loop * Fixed behavior of some fastcall builtins when called without arguments under -O2 to match original functions * Support for native code execution in VM is now enabled by default (note: native code still has to be generated explicitly) * `Codegen::compile` now accepts `CodeGen_OnlyNativeModules` flag. When set, only modules that have a `--!native` hot-comment at the top will be compiled to native code In our new typechecker: * Generic type packs are no longer considered to be variadic during unification * Timeout and cancellation now works in new solver * Fixed false positive errors around 'table' and 'function' type refinements * Table literals now use covariant unification rules. This is sound since literal has no type specified and has no aliases * Fixed issues with blocked types escaping the constraint solver * Fixed more places where error messages that should've been suppressed were still reported * Fixed errors when iterating over a top table type In our native code generation (jit): * 'DebugLuauAbortingChecks' flag is now supported on A64 * LOP_NEWCLOSURE has been translated to IR
2023-07-28 16:13:53 +01:00
inst.regA64 = regs.takeReg(x0, index);
break;
}
case IrCmd::FINDUPVAL:
{
regs.spill(build, index);
build.mov(x0, rState);
build.add(x1, rBase, uint16_t(vmRegOp(inst.a) * sizeof(TValue)));
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_findupval)));
build.blr(x2);
inst.regA64 = regs.takeReg(x0, index);
break;
}
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
case IrCmd::BUFFER_READI8:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldrsb(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_READU8:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldrb(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEI8:
{
RegisterA64 temp = tempInt(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.strb(temp, addr);
break;
}
case IrCmd::BUFFER_READI16:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldrsh(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_READU16:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldrh(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEI16:
{
RegisterA64 temp = tempInt(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.strh(temp, addr);
break;
}
case IrCmd::BUFFER_READI32:
{
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.b});
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEI32:
{
RegisterA64 temp = tempInt(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.str(temp, addr);
break;
}
case IrCmd::BUFFER_READF32:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
RegisterA64 temp = castReg(KindA64::s, inst.regA64); // safe to alias a fresh register
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldr(temp, addr);
build.fcvt(inst.regA64, temp);
break;
}
case IrCmd::BUFFER_WRITEF32:
{
RegisterA64 temp1 = tempDouble(inst.c);
RegisterA64 temp2 = regs.allocTemp(KindA64::s);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.fcvt(temp2, temp1);
build.str(temp2, addr);
break;
}
case IrCmd::BUFFER_READF64:
{
inst.regA64 = regs.allocReg(KindA64::d, index);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.c.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.c));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.ldr(inst.regA64, addr);
break;
}
case IrCmd::BUFFER_WRITEF64:
{
RegisterA64 temp = tempDouble(inst.c);
AddressA64 addr = tempAddrBuffer(inst.a, inst.b, inst.d.kind == IrOpKind::None ? LUA_TBUFFER : tagOp(inst.d));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
build.str(temp, addr);
break;
}
// To handle unsupported instructions, add "case IrCmd::OP" and make sure to set error = true!
}
valueTracker.afterInstLowering(inst, index);
regs.freeLastUseRegs(inst, index);
regs.freeTempRegs();
}
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
void IrLoweringA64::finishBlock(const IrBlock& curr, const IrBlock& next)
{
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
if (!regs.spills.empty())
{
// If we have spills remaining, we have to immediately lower the successor block
for (uint32_t predIdx : predecessors(function.cfg, function.getBlockIndex(next)))
CODEGEN_ASSERT(predIdx == function.getBlockIndex(curr));
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
// And the next block cannot be a join block in cfg
CODEGEN_ASSERT(next.useCount == 1);
Sync to upstream/release/591 (#1012) * Fix a use-after-free bug in the new type cloning algorithm * Tighten up the type of `coroutine.wrap`. It is now `<A..., R...>(f: (A...) -> R...) -> ((A...) -> R...)` * Break `.luaurc` out into a separate library target `Luau.Config`. This makes it easier for applications to reason about config files without also depending on the type inference engine. * Move typechecking limits into `FrontendOptions`. This allows embedders more finely-grained control over autocomplete's internal time limits. * Fix stability issue with debugger onprotectederror callback allowing break in non-yieldable contexts New solver: * Initial work toward [Local Type Inference](https://github.com/Roblox/luau/blob/0e1082108fd6fb3a32dfdf5f1766ea3fc1391328/rfcs/local-type-inference.md) * Introduce a new subtyping test. This will be much nicer than the old test because it is completely separate both from actual type inference and from error reporting. Native code generation: * Added function to compute iterated dominance frontier * Optimize barriers in SET_UPVALUE when tag is known * Cache lua_State::global in a register on A64 * Optimize constant stores in A64 lowering * Track table array size state to optimize array size checks * Add split tag/value store into a VM register * Check that spills can outlive the block only in specific conditions --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-08-18 19:15:41 +01:00
}
}
void IrLoweringA64::finishFunction()
{
if (build.logText)
build.logAppend("; interrupt handlers\n");
for (InterruptHandler& handler : interruptHandlers)
{
build.setLabel(handler.self);
build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction));
build.adr(x1, handler.next);
build.b(helpers.interrupt);
}
if (build.logText)
build.logAppend("; exit handlers\n");
for (ExitHandler& handler : exitHandlers)
{
CODEGEN_ASSERT(handler.pcpos != kVmExitEntryGuardPc);
build.setLabel(handler.self);
build.mov(x0, handler.pcpos * sizeof(Instruction));
build.b(helpers.updatePcAndContinueInVm);
}
if (stats)
{
if (error)
stats->loweringErrors++;
if (regs.error)
stats->regAllocErrors++;
}
}
bool IrLoweringA64::hasError() const
{
return error || regs.error;
}
bool IrLoweringA64::isFallthroughBlock(const IrBlock& target, const IrBlock& next)
{
return target.start == next.start;
}
void IrLoweringA64::jumpOrFallthrough(IrBlock& target, const IrBlock& next)
{
if (!isFallthroughBlock(target, next))
build.b(target.label);
}
Label& IrLoweringA64::getTargetLabel(IrOp op, Label& fresh)
{
if (op.kind == IrOpKind::Undef)
return fresh;
if (op.kind == IrOpKind::VmExit)
{
// Special exit case that doesn't have to update pcpos
if (vmExitOp(op) == kVmExitEntryGuardPc)
return helpers.exitContinueVmClearNativeFlag;
if (uint32_t* index = exitHandlerMap.find(vmExitOp(op)))
return exitHandlers[*index].self;
return fresh;
}
return labelOp(op);
}
void IrLoweringA64::finalizeTargetLabel(IrOp op, Label& fresh)
{
if (op.kind == IrOpKind::Undef)
{
emitAbort(build, fresh);
}
else if (op.kind == IrOpKind::VmExit && fresh.id != 0 && fresh.id != helpers.exitContinueVmClearNativeFlag.id)
{
exitHandlerMap[vmExitOp(op)] = uint32_t(exitHandlers.size());
exitHandlers.push_back({fresh, vmExitOp(op)});
}
}
RegisterA64 IrLoweringA64::tempDouble(IrOp op)
{
if (op.kind == IrOpKind::Inst)
return regOp(op);
else if (op.kind == IrOpKind::Constant)
{
double val = doubleOp(op);
if (AssemblyBuilderA64::isFmovSupported(val))
{
RegisterA64 temp = regs.allocTemp(KindA64::d);
build.fmov(temp, val);
return temp;
}
else
{
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
Sync to upstream/release/576 (#928) * `ClassType` can now have an indexer defined on it. This allows custom types to be used in `t[x]` expressions. * Fixed search for closest executable breakpoint line. Previously, breakpoints might have been skipped in `else` blocks at the end of a function * Fixed how unification is performed for two optional types `a? <: b?`, previously it might have unified either 'a' or 'b' with 'nil'. Note that this fix is not enabled by default yet (see the list in `ExperimentalFlags.h`) In the new type solver, a concept of 'Type Families' has been introduced. Type families can be thought of as type aliases with custom type inference/reduction logic included with them. For example, we can have an `Add<T, U>` type family that will resolve the type that is the result of adding two values together. This will help type inference to figure out what 'T' and 'U' might be when explicit type annotations are not provided. In this update we don't define any type families, but they will be added in the near future. It is also possible for Luau embedders to define their own type families in the global/environment scope. Other changes include: * Fixed scope used to find out which generic types should be included in the function generic type list * Fixed a crash after cyclic bound types were created during unification And in native code generation (jit): * Use of arm64 target on M1 now requires macOS 13 * Entry into native code has been optimized. This is especially important for coroutine call/pcall performance as they involve going through a C call frame * LOP_LOADK(X) translation into IR has been improved to enable type tag/constant propagation * arm64 can use integer immediate values to synthesize floating-point values * x64 assembler removes duplicate 64bit numbers from the data section to save space * Linux `perf` can now be used to profile native Luau code (when running with --codegen-perf CLI argument)
2023-05-12 18:50:47 +01:00
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
uint64_t vali = getDoubleBits(val);
Sync to upstream/release/576 (#928) * `ClassType` can now have an indexer defined on it. This allows custom types to be used in `t[x]` expressions. * Fixed search for closest executable breakpoint line. Previously, breakpoints might have been skipped in `else` blocks at the end of a function * Fixed how unification is performed for two optional types `a? <: b?`, previously it might have unified either 'a' or 'b' with 'nil'. Note that this fix is not enabled by default yet (see the list in `ExperimentalFlags.h`) In the new type solver, a concept of 'Type Families' has been introduced. Type families can be thought of as type aliases with custom type inference/reduction logic included with them. For example, we can have an `Add<T, U>` type family that will resolve the type that is the result of adding two values together. This will help type inference to figure out what 'T' and 'U' might be when explicit type annotations are not provided. In this update we don't define any type families, but they will be added in the near future. It is also possible for Luau embedders to define their own type families in the global/environment scope. Other changes include: * Fixed scope used to find out which generic types should be included in the function generic type list * Fixed a crash after cyclic bound types were created during unification And in native code generation (jit): * Use of arm64 target on M1 now requires macOS 13 * Entry into native code has been optimized. This is especially important for coroutine call/pcall performance as they involve going through a C call frame * LOP_LOADK(X) translation into IR has been improved to enable type tag/constant propagation * arm64 can use integer immediate values to synthesize floating-point values * x64 assembler removes duplicate 64bit numbers from the data section to save space * Linux `perf` can now be used to profile native Luau code (when running with --codegen-perf CLI argument)
2023-05-12 18:50:47 +01:00
if ((vali << 16) == 0)
{
build.movz(temp1, uint16_t(vali >> 48), 48);
build.fmov(temp2, temp1);
}
else if ((vali << 32) == 0)
{
build.movz(temp1, uint16_t(vali >> 48), 48);
build.movk(temp1, uint16_t(vali >> 32), 32);
build.fmov(temp2, temp1);
}
else
{
build.adr(temp1, val);
build.ldr(temp2, temp1);
}
return temp2;
}
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
RegisterA64 IrLoweringA64::tempInt(IrOp op)
{
if (op.kind == IrOpKind::Inst)
return regOp(op);
else if (op.kind == IrOpKind::Constant)
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, intOp(op));
return temp;
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
RegisterA64 IrLoweringA64::tempUint(IrOp op)
{
if (op.kind == IrOpKind::Inst)
return regOp(op);
else if (op.kind == IrOpKind::Constant)
{
RegisterA64 temp = regs.allocTemp(KindA64::w);
build.mov(temp, unsigned(intOp(op)));
return temp;
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset)
{
// This is needed to tighten the bounds checks in the VmConst case below
CODEGEN_ASSERT(offset % 4 == 0);
// Full encoded range is wider depending on the load size, but this assertion helps establish a smaller guaranteed working range [0..4096)
CODEGEN_ASSERT(offset >= 0 && unsigned(offset / 4) <= AssemblyBuilderA64::kMaxImmediate);
if (op.kind == IrOpKind::VmReg)
return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset);
else if (op.kind == IrOpKind::VmConst)
{
size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset;
// Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries
if (constantOffset / 4 <= AddressA64::kMaxOffset)
return mem(rConstants, int(constantOffset));
RegisterA64 temp = regs.allocTemp(KindA64::x);
Sync to upstream/release/572 (#899) * Fixed exported types not being suggested in autocomplete * `T...` is now convertible to `...any` (Fixes https://github.com/Roblox/luau/issues/767) * Fixed issue with `T?` not being convertible to `T | T` or `T?` (sometimes when internal pointer identity is different) * Fixed potential crash in missing table key error suggestion to use a similar existing key * `lua_topointer` now returns a pointer for strings C++ API Changes: * `prepareModuleScope` callback has moved from TypeChecker to Frontend * For LSPs, AstQuery functions (and `isWithinComment`) can be used without full Frontend data A lot of changes in our two experimental components as well. In our work on the new type-solver, the following issues were fixed: * Fixed table union and intersection indexing * Correct custom type environments are now used * Fixed issue with values of `free & number` type not accepted in numeric operations And these are the changes in native code generation (JIT): * arm64 lowering is almost complete with support for 99% of IR commands and all fastcalls * Fixed x64 assembly encoding for extended byte registers * More external x64 calls are aware of register allocator * `math.min`/`math.max` with more than 2 arguments are now lowered to IR as well * Fixed correctness issues with `math` library calls with multiple results in variadic context and with x64 register conflicts * x64 register allocator learnt to restore values from VM memory instead of always using stack spills * x64 exception unwind information now supports multiple functions and fixes function start offset in Dwarf2 info
2023-04-14 19:06:22 +01:00
emitAddOffset(build, temp, rConstants, constantOffset);
return temp;
}
// If we have a register, we assume it's a pointer to TValue
// We might introduce explicit operand types in the future to make this more robust
else if (op.kind == IrOpKind::Inst)
return mem(regOp(op), offset);
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
}
}
AddressA64 IrLoweringA64::tempAddrBuffer(IrOp bufferOp, IrOp indexOp, uint8_t tag)
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
{
CODEGEN_ASSERT(tag == LUA_TUSERDATA || tag == LUA_TBUFFER);
int dataOffset = tag == LUA_TBUFFER ? offsetof(Buffer, data) : offsetof(Udata, data);
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
if (indexOp.kind == IrOpKind::Inst)
{
RegisterA64 temp = regs.allocTemp(KindA64::x);
build.add(temp, regOp(bufferOp), regOp(indexOp)); // implicit uxtw
return mem(temp, dataOffset);
}
else if (indexOp.kind == IrOpKind::Constant)
{
// Since the resulting address may be used to load any size, including 1 byte, from an unaligned offset, we are limited by unscaled
// encoding
if (unsigned(intOp(indexOp)) + dataOffset <= 255)
return mem(regOp(bufferOp), int(intOp(indexOp) + dataOffset));
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
// indexOp can only be negative in dead code (since offsets are checked); this avoids assertion in emitAddOffset
if (intOp(indexOp) < 0)
return mem(regOp(bufferOp), dataOffset);
RegisterA64 temp = regs.allocTemp(KindA64::x);
emitAddOffset(build, temp, regOp(bufferOp), size_t(intOp(indexOp)));
return mem(temp, dataOffset);
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
}
else
{
CODEGEN_ASSERT(!"Unsupported instruction form");
return noreg;
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-10 21:10:07 +00:00
}
}
RegisterA64 IrLoweringA64::regOp(IrOp op)
{
IrInst& inst = function.instOp(op);
if (inst.spilled || inst.needsReload)
regs.restoreReg(build, inst);
CODEGEN_ASSERT(inst.regA64 != noreg);
return inst.regA64;
}
IrConst IrLoweringA64::constOp(IrOp op) const
{
return function.constOp(op);
}
uint8_t IrLoweringA64::tagOp(IrOp op) const
{
return function.tagOp(op);
}
int IrLoweringA64::intOp(IrOp op) const
{
return function.intOp(op);
}
unsigned IrLoweringA64::uintOp(IrOp op) const
{
return function.uintOp(op);
}
double IrLoweringA64::doubleOp(IrOp op) const
{
return function.doubleOp(op);
}
IrBlock& IrLoweringA64::blockOp(IrOp op) const
{
return function.blockOp(op);
}
Label& IrLoweringA64::labelOp(IrOp op) const
{
return blockOp(op).label;
}
} // namespace A64
} // namespace CodeGen
} // namespace Luau