luau/CodeGen/src/OptimizeFinalX64.cpp

145 lines
4.5 KiB
C++
Raw Normal View History

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/OptimizeFinalX64.h"
#include "Luau/IrUtils.h"
#include <utility>
namespace Luau
{
namespace CodeGen
{
// x64 assembly allows memory operands, but IR separates loads from uses
// To improve final x64 lowering, we try to 'inline' single-use register/constant loads into some of our instructions
// This pass might not be useful on different architectures
static void optimizeMemoryOperandsX64(IrFunction& function, IrBlock& block)
{
CODEGEN_ASSERT(block.kind != IrBlockKind::Dead);
for (uint32_t index = block.start; index <= block.finish; index++)
{
CODEGEN_ASSERT(index < function.instructions.size());
IrInst& inst = function.instructions[index];
switch (inst.cmd)
{
case IrCmd::CHECK_TAG:
{
if (inst.a.kind == IrOpKind::Inst)
{
IrInst& tag = function.instOp(inst.a);
if (tag.useCount == 1 && tag.cmd == IrCmd::LOAD_TAG && (tag.a.kind == IrOpKind::VmReg || tag.a.kind == IrOpKind::VmConst))
replace(function, inst.a, tag.a);
}
break;
}
case IrCmd::CHECK_TRUTHY:
{
if (inst.a.kind == IrOpKind::Inst)
{
IrInst& tag = function.instOp(inst.a);
if (tag.useCount == 1 && tag.cmd == IrCmd::LOAD_TAG && (tag.a.kind == IrOpKind::VmReg || tag.a.kind == IrOpKind::VmConst))
replace(function, inst.a, tag.a);
}
if (inst.b.kind == IrOpKind::Inst)
{
IrInst& value = function.instOp(inst.b);
if (value.useCount == 1 && value.cmd == IrCmd::LOAD_INT)
replace(function, inst.b, value.a);
}
break;
}
case IrCmd::ADD_NUM:
case IrCmd::SUB_NUM:
case IrCmd::MUL_NUM:
case IrCmd::DIV_NUM:
case IrCmd::IDIV_NUM:
case IrCmd::MOD_NUM:
Sync to upstream/release/566 (#853) * Fixed incorrect lexeme generated for string parts in the middle of an interpolated string (Fixes https://github.com/Roblox/luau/issues/744) * DeprecatedApi lint can report some issues without type inference information * Fixed performance of autocomplete requests when suggestions have large intersection types (Solves https://github.com/Roblox/luau/discussions/847) * Marked `table.getn`/`foreach`/`foreachi` as deprecated ([RFC: Deprecate table.getn/foreach/foreachi](https://github.com/Roblox/luau/blob/master/rfcs/deprecate-table-getn-foreach.md)) * With -O2 optimization level, we now optimize builtin calls based on known argument/return count. Note that this change can be observable if `getfenv/setfenv` is used to substitute a builtin, especially if arity is different. Fastcall heavy tests show a 1-2% improvement. * Luau can now be built with clang-cl (Fixes https://github.com/Roblox/luau/issues/736) We also made many improvements to our experimental components. For our new type solver: * Overhauled data flow analysis system, fixed issues with 'repeat' loops, global variables and type annotations * Type refinements now work on generic table indexing with a string literal * Type refinements will properly track potentially 'nil' values (like t[x] for a missing key) and their further refinements * Internal top table type is now isomorphic to `{}` which fixes issues when `typeof(v) == 'table'` type refinement is handled * References to non-existent types in type annotations no longer resolve to 'error' type like in old solver * Improved handling of class unions in property access expressions * Fixed default type packs * Unsealed tables can now have metatables * Restored expected types for function arguments And for native code generation: * Added min and max IR instructions mapping to vminsd/vmaxsd on x64 * We now speculatively extract direct execution fast-paths based on expected types of expressions which provides better optimization opportunities inside a single basic block * Translated existing math fastcalls to IR form to improve tag guard removal and constant propagation
2023-03-03 22:21:14 +02:00
case IrCmd::MIN_NUM:
case IrCmd::MAX_NUM:
{
if (inst.b.kind == IrOpKind::Inst)
{
IrInst& rhs = function.instOp(inst.b);
if (rhs.useCount == 1 && rhs.cmd == IrCmd::LOAD_DOUBLE && (rhs.a.kind == IrOpKind::VmReg || rhs.a.kind == IrOpKind::VmConst))
replace(function, inst.b, rhs.a);
}
break;
}
case IrCmd::JUMP_EQ_TAG:
{
if (inst.a.kind == IrOpKind::Inst)
{
IrInst& tagA = function.instOp(inst.a);
if (tagA.useCount == 1 && tagA.cmd == IrCmd::LOAD_TAG && (tagA.a.kind == IrOpKind::VmReg || tagA.a.kind == IrOpKind::VmConst))
{
replace(function, inst.a, tagA.a);
break;
}
}
if (inst.b.kind == IrOpKind::Inst)
{
IrInst& tagB = function.instOp(inst.b);
if (tagB.useCount == 1 && tagB.cmd == IrCmd::LOAD_TAG && (tagB.a.kind == IrOpKind::VmReg || tagB.a.kind == IrOpKind::VmConst))
{
std::swap(inst.a, inst.b);
replace(function, inst.a, tagB.a);
}
}
break;
}
case IrCmd::JUMP_CMP_NUM:
{
if (inst.a.kind == IrOpKind::Inst)
{
IrInst& num = function.instOp(inst.a);
if (num.useCount == 1 && num.cmd == IrCmd::LOAD_DOUBLE)
replace(function, inst.a, num.a);
}
break;
}
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-26 19:20:56 -08:00
case IrCmd::FLOOR_NUM:
case IrCmd::CEIL_NUM:
case IrCmd::ROUND_NUM:
case IrCmd::SQRT_NUM:
case IrCmd::ABS_NUM:
{
Sync to upstream/release/615 (#1175) # What's changed? * Luau allocation scheme was changed to handle allocations in 513-1024 byte range internally without falling back to global allocator * coroutine/thread creation no longer requires any global allocations, making it up to 15% faster (vs libc malloc) * table construction for 17-32 keys or 33-64 array elements is up to 30% faster (vs libc malloc) ### New Type Solver * Cyclic unary negation type families are reduced to `number` when possible * Class types are skipped when searching for free types in unifier to improve performance * Fixed issues with table type inference when metatables are present * Improved inference of iteration loop types * Fixed an issue with bidirectional inference of method calls * Type simplification will now preserve error suppression markers ### Native Code Generation * Fixed TAG_VECTOR skip optimization to not break instruction use counts (broken optimization wasn't included in 614) * Fixed missing side-effect when optimizing generic loop preparation instruction --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com>
2024-03-01 10:45:26 -08:00
if (inst.a.kind == IrOpKind::Inst)
Sync to upstream/release/610 (#1154) # What's changed? * Check interrupt handler inside the pattern match engine to eliminate potential for programs to hang during string library function execution. * Allow iteration over table properties to pass the old type solver. ### Native Code Generation * Use in-place memory operands for math library operations on x64. * Replace opaque bools with separate enum classes in IrDump to improve code maintainability. * Translate operations on inferred vectors to IR. * Enable support for debugging native-compiled functions in Roblox Studio. ### New Type Solver * Rework type inference for boolean and string literals to introduce bounded free types (bounded below by the singleton type, and above by the primitive type) and reworked primitive type constraint to decide which is the appropriate type for the literal. * Introduce `FunctionCheckConstraint` to handle bidirectional typechecking for function calls, pushing the expected parameter types from the function onto the arguments. * Introduce `union` and `intersect` type families to compute deferred simplified unions and intersections to be employed by the constraint generation logic in the new solver. * Implement support for expanding the domain of local types in `Unifier2`. * Rework type inference for iteration variables bound by for in loops to use local types. * Change constraint blocking logic to use a set to prevent accidental re-blocking. * Add logic to detect missing return statements in functions. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2024-01-26 19:20:56 -08:00
{
IrInst& arg = function.instOp(inst.a);
if (arg.useCount == 1 && arg.cmd == IrCmd::LOAD_DOUBLE && (arg.a.kind == IrOpKind::VmReg || arg.a.kind == IrOpKind::VmConst))
replace(function, inst.a, arg.a);
}
break;
}
default:
break;
}
}
}
void optimizeMemoryOperandsX64(IrFunction& function)
{
for (IrBlock& block : function.blocks)
{
if (block.kind == IrBlockKind::Dead)
continue;
optimizeMemoryOperandsX64(function, block);
}
}
} // namespace CodeGen
} // namespace Luau