2023-02-24 21:49:38 +00:00
|
|
|
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
|
|
#include "Luau/OptimizeConstProp.h"
|
|
|
|
|
|
|
|
#include "Luau/DenseHash.h"
|
2023-08-11 15:42:37 +01:00
|
|
|
#include "Luau/IrData.h"
|
2023-02-24 21:49:38 +00:00
|
|
|
#include "Luau/IrBuilder.h"
|
|
|
|
#include "Luau/IrUtils.h"
|
|
|
|
|
|
|
|
#include "lua.h"
|
|
|
|
|
2023-12-02 07:46:57 +00:00
|
|
|
#include <limits.h>
|
2024-11-27 12:44:39 +00:00
|
|
|
#include <math.h>
|
2023-12-02 07:46:57 +00:00
|
|
|
|
2025-01-10 19:34:39 +00:00
|
|
|
#include <algorithm>
|
2023-04-28 20:55:13 +01:00
|
|
|
#include <array>
|
2023-08-18 19:15:41 +01:00
|
|
|
#include <utility>
|
2023-03-03 20:21:14 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
LUAU_FASTINTVARIABLE(LuauCodeGenMinLinearBlockPath, 3)
|
2023-09-01 18:58:27 +01:00
|
|
|
LUAU_FASTINTVARIABLE(LuauCodeGenReuseSlotLimit, 64)
|
2024-06-07 18:51:12 +01:00
|
|
|
LUAU_FASTINTVARIABLE(LuauCodeGenReuseUdataTagLimit, 64)
|
2025-01-10 19:34:39 +00:00
|
|
|
LUAU_FASTINTVARIABLE(LuauCodeGenLiveSlotReuseLimit, 8)
|
2024-11-01 19:06:07 +00:00
|
|
|
LUAU_FASTFLAGVARIABLE(DebugLuauAbortingChecks)
|
2025-01-10 19:34:39 +00:00
|
|
|
LUAU_FASTFLAG(LuauVectorLibNativeDot)
|
|
|
|
LUAU_FASTFLAGVARIABLE(LuauCodeGenArithOpt)
|
|
|
|
LUAU_FASTFLAGVARIABLE(LuauCodeGenLimitLiveSlotReuse)
|
2023-03-03 20:21:14 +00:00
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
namespace Luau
|
|
|
|
{
|
|
|
|
namespace CodeGen
|
|
|
|
{
|
|
|
|
|
|
|
|
// Data we know about the register value
|
|
|
|
struct RegisterInfo
|
|
|
|
{
|
|
|
|
uint8_t tag = 0xff;
|
|
|
|
IrOp value;
|
|
|
|
|
|
|
|
// Used to quickly invalidate links between SSA values and register memory
|
|
|
|
// It's a bit imprecise where value and tag both always invalidate together
|
|
|
|
uint32_t version = 0;
|
|
|
|
|
|
|
|
bool knownNotReadonly = false;
|
|
|
|
bool knownNoMetatable = false;
|
2023-08-18 19:15:41 +01:00
|
|
|
int knownTableArraySize = -1;
|
2023-02-24 21:49:38 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// Load instructions are linked to target register to carry knowledge about the target
|
|
|
|
// We track a register version at the point of the load so it's easy to break the link when register is updated
|
|
|
|
struct RegisterLink
|
|
|
|
{
|
|
|
|
uint8_t reg = 0;
|
|
|
|
uint32_t version = 0;
|
|
|
|
};
|
|
|
|
|
2025-01-10 19:34:39 +00:00
|
|
|
// Reference to an instruction together with the position of that instruction in the current block chain and the last position of reuse
|
|
|
|
struct NumberedInstruction
|
|
|
|
{
|
|
|
|
uint32_t instIdx = 0;
|
|
|
|
uint32_t startPos = 0;
|
|
|
|
uint32_t finishPos = 0;
|
|
|
|
};
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
// Data we know about the current VM state
|
|
|
|
struct ConstPropState
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
ConstPropState(IrFunction& function)
|
2023-03-24 18:03:04 +00:00
|
|
|
: function(function)
|
2023-04-28 20:55:13 +01:00
|
|
|
, valueMap({})
|
2023-03-24 18:03:04 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
uint8_t tryGetTag(IrOp op)
|
|
|
|
{
|
|
|
|
if (RegisterInfo* info = tryGetRegisterInfo(op))
|
|
|
|
return info->tag;
|
|
|
|
|
|
|
|
return 0xff;
|
|
|
|
}
|
|
|
|
|
2023-05-25 22:36:34 +01:00
|
|
|
void updateTag(IrOp op, uint8_t tag)
|
|
|
|
{
|
|
|
|
if (RegisterInfo* info = tryGetRegisterInfo(op))
|
|
|
|
info->tag = tag;
|
|
|
|
}
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
void saveTag(IrOp op, uint8_t tag)
|
|
|
|
{
|
|
|
|
if (RegisterInfo* info = tryGetRegisterInfo(op))
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
|
|
|
if (info->tag != tag)
|
|
|
|
{
|
|
|
|
info->tag = tag;
|
|
|
|
info->version++;
|
|
|
|
}
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
IrOp tryGetValue(IrOp op)
|
|
|
|
{
|
|
|
|
if (RegisterInfo* info = tryGetRegisterInfo(op))
|
|
|
|
return info->value;
|
|
|
|
|
|
|
|
return IrOp{IrOpKind::None, 0u};
|
|
|
|
}
|
|
|
|
|
|
|
|
void saveValue(IrOp op, IrOp value)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(value.kind == IrOpKind::Constant);
|
2023-02-24 21:49:38 +00:00
|
|
|
|
|
|
|
if (RegisterInfo* info = tryGetRegisterInfo(op))
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
|
|
|
if (info->value != value)
|
|
|
|
{
|
|
|
|
info->value = value;
|
|
|
|
info->knownNotReadonly = false;
|
|
|
|
info->knownNoMetatable = false;
|
2023-08-18 19:15:41 +01:00
|
|
|
info->knownTableArraySize = -1;
|
2023-04-28 20:55:13 +01:00
|
|
|
info->version++;
|
|
|
|
}
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void invalidate(RegisterInfo& reg, bool invalidateTag, bool invalidateValue)
|
|
|
|
{
|
|
|
|
if (invalidateTag)
|
|
|
|
{
|
|
|
|
reg.tag = 0xff;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (invalidateValue)
|
|
|
|
{
|
|
|
|
reg.value = {};
|
|
|
|
reg.knownNotReadonly = false;
|
|
|
|
reg.knownNoMetatable = false;
|
2023-08-18 19:15:41 +01:00
|
|
|
reg.knownTableArraySize = -1;
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
reg.version++;
|
|
|
|
}
|
|
|
|
|
|
|
|
void invalidateTag(IrOp regOp)
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
// TODO: use maxstacksize from Proto
|
|
|
|
maxReg = vmRegOp(regOp) > maxReg ? vmRegOp(regOp) : maxReg;
|
2023-04-07 22:01:29 +01:00
|
|
|
invalidate(regs[vmRegOp(regOp)], /* invalidateTag */ true, /* invalidateValue */ false);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void invalidateValue(IrOp regOp)
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
// TODO: use maxstacksize from Proto
|
|
|
|
maxReg = vmRegOp(regOp) > maxReg ? vmRegOp(regOp) : maxReg;
|
2023-04-07 22:01:29 +01:00
|
|
|
invalidate(regs[vmRegOp(regOp)], /* invalidateTag */ false, /* invalidateValue */ true);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void invalidate(IrOp regOp)
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
// TODO: use maxstacksize from Proto
|
|
|
|
maxReg = vmRegOp(regOp) > maxReg ? vmRegOp(regOp) : maxReg;
|
2023-04-07 22:01:29 +01:00
|
|
|
invalidate(regs[vmRegOp(regOp)], /* invalidateTag */ true, /* invalidateValue */ true);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
2023-03-24 18:03:04 +00:00
|
|
|
void invalidateRegistersFrom(int firstReg)
|
2023-02-24 21:49:38 +00:00
|
|
|
{
|
2023-03-24 18:03:04 +00:00
|
|
|
for (int i = firstReg; i <= maxReg; ++i)
|
2023-02-24 21:49:38 +00:00
|
|
|
invalidate(regs[i], /* invalidateTag */ true, /* invalidateValue */ true);
|
|
|
|
}
|
|
|
|
|
2023-03-24 18:03:04 +00:00
|
|
|
void invalidateRegisterRange(int firstReg, int count)
|
|
|
|
{
|
2023-06-02 20:52:15 +01:00
|
|
|
if (count == -1)
|
|
|
|
{
|
|
|
|
invalidateRegistersFrom(firstReg);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (int i = firstReg; i < firstReg + count && i <= maxReg; ++i)
|
|
|
|
invalidate(regs[i], /* invalidateTag */ true, /* invalidateValue */ true);
|
|
|
|
}
|
2023-03-24 18:03:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void invalidateCapturedRegisters()
|
|
|
|
{
|
|
|
|
for (int i = 0; i <= maxReg; ++i)
|
|
|
|
{
|
|
|
|
if (function.cfg.captured.regs.test(i))
|
|
|
|
invalidate(regs[i], /* invalidateTag */ true, /* invalidateValue */ true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-30 02:13:05 +01:00
|
|
|
// Value propagation extends the live range of an SSA register
|
|
|
|
// In some cases we can't propagate earlier values because we can't guarantee that we will be able to find a storage/restore location
|
|
|
|
// As an example, when Luau call is performed, both volatile registers and stack slots might be overwritten
|
|
|
|
void invalidateValuePropagation()
|
|
|
|
{
|
|
|
|
valueMap.clear();
|
|
|
|
tryNumToIndexCache.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
// If table memory has changed, we can't reuse previously computed and validated table slot lookups
|
|
|
|
// Same goes for table array elements as well
|
|
|
|
void invalidateHeapTableData()
|
|
|
|
{
|
2025-01-10 19:34:39 +00:00
|
|
|
if (FFlag::LuauCodeGenLimitLiveSlotReuse)
|
|
|
|
getSlotNodeCache.clear();
|
|
|
|
else
|
|
|
|
getSlotNodeCache_DEPRECATED.clear();
|
|
|
|
|
2023-09-30 02:13:05 +01:00
|
|
|
checkSlotMatchCache.clear();
|
|
|
|
|
|
|
|
getArrAddrCache.clear();
|
|
|
|
checkArraySizeCache.clear();
|
|
|
|
}
|
|
|
|
|
2023-12-02 07:46:57 +00:00
|
|
|
void invalidateHeapBufferData()
|
|
|
|
{
|
|
|
|
checkBufferLenCache.clear();
|
|
|
|
}
|
|
|
|
|
2024-06-07 18:51:12 +01:00
|
|
|
void invalidateUserdataData()
|
|
|
|
{
|
|
|
|
useradataTagCache.clear();
|
|
|
|
}
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
void invalidateHeap()
|
|
|
|
{
|
|
|
|
for (int i = 0; i <= maxReg; ++i)
|
|
|
|
invalidateHeap(regs[i]);
|
2023-09-01 18:58:27 +01:00
|
|
|
|
2023-09-30 02:13:05 +01:00
|
|
|
invalidateHeapTableData();
|
2023-12-02 07:46:57 +00:00
|
|
|
|
|
|
|
// Buffer length checks are not invalidated since buffer size is immutable
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void invalidateHeap(RegisterInfo& reg)
|
|
|
|
{
|
|
|
|
reg.knownNotReadonly = false;
|
|
|
|
reg.knownNoMetatable = false;
|
2023-08-18 19:15:41 +01:00
|
|
|
reg.knownTableArraySize = -1;
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
2023-03-24 18:03:04 +00:00
|
|
|
void invalidateUserCall()
|
2023-02-24 21:49:38 +00:00
|
|
|
{
|
2023-03-24 18:03:04 +00:00
|
|
|
invalidateHeap();
|
|
|
|
invalidateCapturedRegisters();
|
2023-02-24 21:49:38 +00:00
|
|
|
inSafeEnv = false;
|
|
|
|
}
|
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
void invalidateTableArraySize()
|
|
|
|
{
|
|
|
|
for (int i = 0; i <= maxReg; ++i)
|
|
|
|
invalidateTableArraySize(regs[i]);
|
|
|
|
|
2023-09-30 02:13:05 +01:00
|
|
|
invalidateHeapTableData();
|
2023-09-01 18:58:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void invalidateTableArraySize(RegisterInfo& reg)
|
|
|
|
{
|
|
|
|
reg.knownTableArraySize = -1;
|
|
|
|
}
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
void createRegLink(uint32_t instIdx, IrOp regOp)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!instLink.contains(instIdx));
|
2023-04-07 22:01:29 +01:00
|
|
|
instLink[instIdx] = RegisterLink{uint8_t(vmRegOp(regOp)), regs[vmRegOp(regOp)].version};
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
RegisterInfo* tryGetRegisterInfo(IrOp op)
|
|
|
|
{
|
|
|
|
if (op.kind == IrOpKind::VmReg)
|
|
|
|
{
|
2023-04-07 22:01:29 +01:00
|
|
|
maxReg = vmRegOp(op) > maxReg ? vmRegOp(op) : maxReg;
|
|
|
|
return ®s[vmRegOp(op)];
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (RegisterLink* link = tryGetRegLink(op))
|
|
|
|
{
|
|
|
|
maxReg = int(link->reg) > maxReg ? int(link->reg) : maxReg;
|
|
|
|
return ®s[link->reg];
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
RegisterLink* tryGetRegLink(IrOp instOp)
|
|
|
|
{
|
|
|
|
if (instOp.kind != IrOpKind::Inst)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
if (RegisterLink* link = instLink.find(instOp.index))
|
|
|
|
{
|
|
|
|
// Check that the target register hasn't changed the value
|
2023-05-25 22:36:34 +01:00
|
|
|
if (link->version < regs[link->reg].version)
|
2023-02-24 21:49:38 +00:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
return link;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
// Attach register version number to the register operand in a load instruction
|
|
|
|
// This is used to allow instructions with register references to be compared for equality
|
|
|
|
IrInst versionedVmRegLoad(IrCmd loadCmd, IrOp op)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(op.kind == IrOpKind::VmReg);
|
2023-04-28 20:55:13 +01:00
|
|
|
uint32_t version = regs[vmRegOp(op)].version;
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(version <= 0xffffff);
|
2023-04-28 20:55:13 +01:00
|
|
|
op.index = vmRegOp(op) | (version << 8);
|
|
|
|
return IrInst{loadCmd, op};
|
|
|
|
}
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
uint32_t* getPreviousInstIndex(const IrInst& inst)
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(useValueNumbering);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
if (uint32_t* prevIdx = valueMap.find(inst))
|
2023-06-02 20:52:15 +01:00
|
|
|
{
|
|
|
|
// Previous load might have been removed as unused
|
2023-08-18 19:15:41 +01:00
|
|
|
if (function.instructions[*prevIdx].useCount != 0)
|
|
|
|
return prevIdx;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t* getPreviousVersionedLoadIndex(IrCmd cmd, IrOp vmReg)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(vmReg.kind == IrOpKind::VmReg);
|
2023-08-18 19:15:41 +01:00
|
|
|
return getPreviousInstIndex(versionedVmRegLoad(cmd, vmReg));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::pair<IrCmd, uint32_t> getPreviousVersionedLoadForTag(uint8_t tag, IrOp vmReg)
|
|
|
|
{
|
|
|
|
if (useValueNumbering && !function.cfg.captured.regs.test(vmRegOp(vmReg)))
|
|
|
|
{
|
|
|
|
if (tag == LUA_TBOOLEAN)
|
2023-06-02 20:52:15 +01:00
|
|
|
{
|
2023-08-18 19:15:41 +01:00
|
|
|
if (uint32_t* prevIdx = getPreviousVersionedLoadIndex(IrCmd::LOAD_INT, vmReg))
|
|
|
|
return std::make_pair(IrCmd::LOAD_INT, *prevIdx);
|
2023-06-02 20:52:15 +01:00
|
|
|
}
|
2023-08-18 19:15:41 +01:00
|
|
|
else if (tag == LUA_TNUMBER)
|
|
|
|
{
|
|
|
|
if (uint32_t* prevIdx = getPreviousVersionedLoadIndex(IrCmd::LOAD_DOUBLE, vmReg))
|
|
|
|
return std::make_pair(IrCmd::LOAD_DOUBLE, *prevIdx);
|
|
|
|
}
|
|
|
|
else if (isGCO(tag))
|
|
|
|
{
|
|
|
|
if (uint32_t* prevIdx = getPreviousVersionedLoadIndex(IrCmd::LOAD_POINTER, vmReg))
|
|
|
|
return std::make_pair(IrCmd::LOAD_POINTER, *prevIdx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(IrCmd::NOP, kInvalidInstIdx);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find existing value of the instruction that is exactly the same, or record current on for future lookups
|
|
|
|
void substituteOrRecord(IrInst& inst, uint32_t instIdx)
|
|
|
|
{
|
|
|
|
if (!useValueNumbering)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (uint32_t* prevIdx = getPreviousInstIndex(inst))
|
|
|
|
{
|
|
|
|
substitute(function, inst, IrOp{IrOpKind::Inst, *prevIdx});
|
|
|
|
return;
|
2023-06-02 20:52:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
valueMap[inst] = instIdx;
|
2023-04-28 20:55:13 +01:00
|
|
|
}
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
// VM register load can be replaced by a previous load of the same version of the register
|
2023-04-28 20:55:13 +01:00
|
|
|
// If there is no previous load, we record the current one for future lookups
|
|
|
|
void substituteOrRecordVmRegLoad(IrInst& loadInst)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(loadInst.a.kind == IrOpKind::VmReg);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
if (!useValueNumbering)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// To avoid captured register invalidation tracking in lowering later, values from loads from captured registers are not propagated
|
2024-12-03 00:16:33 +00:00
|
|
|
// This prevents the case where load value location is linked to memory in case of a spill and is then clobbered in a user call
|
2023-04-28 20:55:13 +01:00
|
|
|
if (function.cfg.captured.regs.test(vmRegOp(loadInst.a)))
|
|
|
|
return;
|
|
|
|
|
|
|
|
IrInst versionedLoad = versionedVmRegLoad(loadInst.cmd, loadInst.a);
|
|
|
|
|
|
|
|
// Check if there is a value that already has this version of the register
|
2023-08-18 19:15:41 +01:00
|
|
|
if (uint32_t* prevIdx = getPreviousInstIndex(versionedLoad))
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
2023-08-18 19:15:41 +01:00
|
|
|
// Previous value might not be linked to a register yet
|
|
|
|
// For example, it could be a NEW_TABLE stored into a register and we might need to track guards made with this value
|
|
|
|
if (!instLink.contains(*prevIdx))
|
|
|
|
createRegLink(*prevIdx, loadInst.a);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
2024-12-03 00:16:33 +00:00
|
|
|
// Substitute load instruction with the previous value
|
2023-08-18 19:15:41 +01:00
|
|
|
substitute(function, loadInst, IrOp{IrOpKind::Inst, *prevIdx});
|
|
|
|
return;
|
2023-04-28 20:55:13 +01:00
|
|
|
}
|
|
|
|
|
2023-06-02 20:52:15 +01:00
|
|
|
uint32_t instIdx = function.getInstIndex(loadInst);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
2023-06-02 20:52:15 +01:00
|
|
|
// Record load of this register version for future substitution
|
|
|
|
valueMap[versionedLoad] = instIdx;
|
|
|
|
|
|
|
|
createRegLink(instIdx, loadInst.a);
|
2023-04-28 20:55:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// VM register loads can use the value that was stored in the same Vm register earlier
|
|
|
|
void forwardVmRegStoreToLoad(const IrInst& storeInst, IrCmd loadCmd)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(storeInst.a.kind == IrOpKind::VmReg);
|
|
|
|
CODEGEN_ASSERT(storeInst.b.kind == IrOpKind::Inst);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
if (!useValueNumbering)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// To avoid captured register invalidation tracking in lowering later, values from stores into captured registers are not propagated
|
2024-12-03 00:16:33 +00:00
|
|
|
// This prevents the case where store creates an alternative value location in case of a spill and is then clobbered in a user call
|
2023-04-28 20:55:13 +01:00
|
|
|
if (function.cfg.captured.regs.test(vmRegOp(storeInst.a)))
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Future loads of this register version can use the value we stored
|
|
|
|
valueMap[versionedVmRegLoad(loadCmd, storeInst.a)] = storeInst.b.index;
|
|
|
|
}
|
|
|
|
|
2025-01-10 19:34:39 +00:00
|
|
|
// Used to compute the pressure of the cached value 'set' on the spill registers
|
|
|
|
// We want to find out the maximum live range intersection count between the cached value at 'slot' and current instruction
|
|
|
|
// Note that this pressure is approximate, as some values that might have been live at one point could have been marked dead later
|
|
|
|
int getMaxInternalOverlap(std::vector<NumberedInstruction>& set, size_t slot)
|
|
|
|
{
|
|
|
|
CODEGEN_ASSERT(FFlag::LuauCodeGenLimitLiveSlotReuse);
|
|
|
|
|
|
|
|
// Start with one live range for the slot we want to reuse
|
|
|
|
int curr = 1;
|
|
|
|
|
|
|
|
// For any slots where lifetime began before the slot of interest, mark as live if lifetime end is still active
|
|
|
|
// This saves us from processing slots [0; slot] in the range sweep later, which requires sorting the lifetime end points
|
|
|
|
for (size_t i = 0; i < slot; i++)
|
|
|
|
{
|
|
|
|
if (set[i].finishPos >= set[slot].startPos)
|
|
|
|
curr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
int max = curr;
|
|
|
|
|
|
|
|
// Collect lifetime end points and sort them
|
|
|
|
rangeEndTemp.clear();
|
|
|
|
|
|
|
|
for (size_t i = slot + 1; i < set.size(); i++)
|
|
|
|
rangeEndTemp.push_back(set[i].finishPos);
|
|
|
|
|
|
|
|
std::sort(rangeEndTemp.begin(), rangeEndTemp.end());
|
|
|
|
|
|
|
|
// Go over the lifetime begin/end ranges that we store as separate array and walk based on the smallest of values
|
|
|
|
for (size_t i1 = slot + 1, i2 = 0; i1 < set.size() && i2 < rangeEndTemp.size();)
|
|
|
|
{
|
|
|
|
if (rangeEndTemp[i2] == set[i1].startPos)
|
|
|
|
{
|
|
|
|
i1++;
|
|
|
|
i2++;
|
|
|
|
}
|
|
|
|
else if (rangeEndTemp[i2] < set[i1].startPos)
|
|
|
|
{
|
|
|
|
CODEGEN_ASSERT(curr > 0);
|
|
|
|
|
|
|
|
curr--;
|
|
|
|
i2++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
curr++;
|
|
|
|
i1++;
|
|
|
|
|
|
|
|
if (curr > max)
|
|
|
|
max = curr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We might have unprocessed lifetime end entries, but we will never have unprocessed lifetime start entries
|
|
|
|
// Not that lifetime end entries can only decrease the current value and do not affect the end result (maximum)
|
|
|
|
return max;
|
|
|
|
}
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
void clear()
|
|
|
|
{
|
|
|
|
for (int i = 0; i <= maxReg; ++i)
|
|
|
|
regs[i] = RegisterInfo();
|
|
|
|
|
|
|
|
maxReg = 0;
|
|
|
|
|
2025-01-10 19:34:39 +00:00
|
|
|
if (FFlag::LuauCodeGenLimitLiveSlotReuse)
|
|
|
|
instPos = 0u;
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
inSafeEnv = false;
|
|
|
|
checkedGc = false;
|
|
|
|
|
|
|
|
instLink.clear();
|
2023-09-30 02:13:05 +01:00
|
|
|
|
|
|
|
invalidateValuePropagation();
|
|
|
|
invalidateHeapTableData();
|
2023-12-02 07:46:57 +00:00
|
|
|
invalidateHeapBufferData();
|
2024-07-08 22:57:06 +01:00
|
|
|
invalidateUserdataData();
|
2023-05-05 22:52:49 +01:00
|
|
|
}
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
IrFunction& function;
|
2023-03-24 18:03:04 +00:00
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
bool useValueNumbering = false;
|
|
|
|
|
|
|
|
std::array<RegisterInfo, 256> regs;
|
2023-02-24 21:49:38 +00:00
|
|
|
|
|
|
|
// For range/full invalidations, we only want to visit a limited number of data that we have recorded
|
|
|
|
int maxReg = 0;
|
|
|
|
|
2025-01-10 19:34:39 +00:00
|
|
|
// Number of the instruction being processed
|
|
|
|
uint32_t instPos = 0;
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
bool inSafeEnv = false;
|
|
|
|
bool checkedGc = false;
|
|
|
|
|
|
|
|
DenseHashMap<uint32_t, RegisterLink> instLink{~0u};
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
DenseHashMap<IrInst, uint32_t, IrInstHash, IrInstEq> valueMap;
|
2023-09-01 18:58:27 +01:00
|
|
|
|
2023-09-30 02:13:05 +01:00
|
|
|
// Some instruction re-uses can't be stored in valueMap because of extra requirements
|
|
|
|
std::vector<uint32_t> tryNumToIndexCache; // Fallback block argument might be different
|
|
|
|
|
|
|
|
// Heap changes might affect table state
|
2025-01-10 19:34:39 +00:00
|
|
|
std::vector<NumberedInstruction> getSlotNodeCache; // Additionally, pcpos argument might be different
|
|
|
|
std::vector<uint32_t> getSlotNodeCache_DEPRECATED; // Additionally, pcpos argument might be different
|
2023-09-30 02:13:05 +01:00
|
|
|
std::vector<uint32_t> checkSlotMatchCache; // Additionally, fallback block argument might be different
|
|
|
|
|
|
|
|
std::vector<uint32_t> getArrAddrCache;
|
|
|
|
std::vector<uint32_t> checkArraySizeCache; // Additionally, fallback block argument might be different
|
2023-12-02 07:46:57 +00:00
|
|
|
|
|
|
|
std::vector<uint32_t> checkBufferLenCache; // Additionally, fallback block argument might be different
|
2024-06-07 18:51:12 +01:00
|
|
|
|
|
|
|
// Userdata tag cache can point to both NEW_USERDATA and CHECK_USERDATA_TAG instructions
|
|
|
|
std::vector<uint32_t> useradataTagCache; // Additionally, fallback block argument might be different
|
2025-01-10 19:34:39 +00:00
|
|
|
|
|
|
|
std::vector<uint32_t> rangeEndTemp;
|
2023-02-24 21:49:38 +00:00
|
|
|
};
|
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
static void handleBuiltinEffects(ConstPropState& state, LuauBuiltinFunction bfid, uint32_t firstReturnReg, int nresults)
|
|
|
|
{
|
|
|
|
// Switch over all values is used to force new items to be handled
|
|
|
|
switch (bfid)
|
|
|
|
{
|
|
|
|
case LBF_NONE:
|
|
|
|
case LBF_ASSERT:
|
|
|
|
case LBF_MATH_ABS:
|
|
|
|
case LBF_MATH_ACOS:
|
|
|
|
case LBF_MATH_ASIN:
|
|
|
|
case LBF_MATH_ATAN2:
|
|
|
|
case LBF_MATH_ATAN:
|
|
|
|
case LBF_MATH_CEIL:
|
|
|
|
case LBF_MATH_COSH:
|
|
|
|
case LBF_MATH_COS:
|
|
|
|
case LBF_MATH_DEG:
|
|
|
|
case LBF_MATH_EXP:
|
|
|
|
case LBF_MATH_FLOOR:
|
|
|
|
case LBF_MATH_FMOD:
|
|
|
|
case LBF_MATH_FREXP:
|
|
|
|
case LBF_MATH_LDEXP:
|
|
|
|
case LBF_MATH_LOG10:
|
|
|
|
case LBF_MATH_LOG:
|
|
|
|
case LBF_MATH_MAX:
|
|
|
|
case LBF_MATH_MIN:
|
|
|
|
case LBF_MATH_MODF:
|
|
|
|
case LBF_MATH_POW:
|
|
|
|
case LBF_MATH_RAD:
|
|
|
|
case LBF_MATH_SINH:
|
|
|
|
case LBF_MATH_SIN:
|
|
|
|
case LBF_MATH_SQRT:
|
|
|
|
case LBF_MATH_TANH:
|
|
|
|
case LBF_MATH_TAN:
|
|
|
|
case LBF_BIT32_ARSHIFT:
|
|
|
|
case LBF_BIT32_BAND:
|
|
|
|
case LBF_BIT32_BNOT:
|
|
|
|
case LBF_BIT32_BOR:
|
|
|
|
case LBF_BIT32_BXOR:
|
|
|
|
case LBF_BIT32_BTEST:
|
|
|
|
case LBF_BIT32_EXTRACT:
|
|
|
|
case LBF_BIT32_LROTATE:
|
|
|
|
case LBF_BIT32_LSHIFT:
|
|
|
|
case LBF_BIT32_REPLACE:
|
|
|
|
case LBF_BIT32_RROTATE:
|
|
|
|
case LBF_BIT32_RSHIFT:
|
|
|
|
case LBF_TYPE:
|
|
|
|
case LBF_STRING_BYTE:
|
|
|
|
case LBF_STRING_CHAR:
|
|
|
|
case LBF_STRING_LEN:
|
|
|
|
case LBF_TYPEOF:
|
|
|
|
case LBF_STRING_SUB:
|
|
|
|
case LBF_MATH_CLAMP:
|
|
|
|
case LBF_MATH_SIGN:
|
|
|
|
case LBF_MATH_ROUND:
|
|
|
|
case LBF_RAWGET:
|
|
|
|
case LBF_RAWEQUAL:
|
|
|
|
case LBF_TABLE_UNPACK:
|
|
|
|
case LBF_VECTOR:
|
|
|
|
case LBF_BIT32_COUNTLZ:
|
|
|
|
case LBF_BIT32_COUNTRZ:
|
|
|
|
case LBF_SELECT_VARARG:
|
|
|
|
case LBF_RAWLEN:
|
|
|
|
case LBF_BIT32_EXTRACTK:
|
|
|
|
case LBF_GETMETATABLE:
|
2023-07-28 16:13:53 +01:00
|
|
|
case LBF_TONUMBER:
|
|
|
|
case LBF_TOSTRING:
|
2023-10-23 16:00:48 +01:00
|
|
|
case LBF_BIT32_BYTESWAP:
|
2023-10-27 22:18:41 +01:00
|
|
|
case LBF_BUFFER_READI8:
|
|
|
|
case LBF_BUFFER_READU8:
|
|
|
|
case LBF_BUFFER_WRITEU8:
|
|
|
|
case LBF_BUFFER_READI16:
|
|
|
|
case LBF_BUFFER_READU16:
|
|
|
|
case LBF_BUFFER_WRITEU16:
|
|
|
|
case LBF_BUFFER_READI32:
|
|
|
|
case LBF_BUFFER_READU32:
|
|
|
|
case LBF_BUFFER_WRITEU32:
|
|
|
|
case LBF_BUFFER_READF32:
|
|
|
|
case LBF_BUFFER_WRITEF32:
|
|
|
|
case LBF_BUFFER_READF64:
|
|
|
|
case LBF_BUFFER_WRITEF64:
|
2024-11-01 19:06:07 +00:00
|
|
|
case LBF_VECTOR_MAGNITUDE:
|
|
|
|
case LBF_VECTOR_NORMALIZE:
|
|
|
|
case LBF_VECTOR_CROSS:
|
|
|
|
case LBF_VECTOR_DOT:
|
|
|
|
case LBF_VECTOR_FLOOR:
|
|
|
|
case LBF_VECTOR_CEIL:
|
|
|
|
case LBF_VECTOR_ABS:
|
|
|
|
case LBF_VECTOR_SIGN:
|
|
|
|
case LBF_VECTOR_CLAMP:
|
|
|
|
case LBF_VECTOR_MIN:
|
|
|
|
case LBF_VECTOR_MAX:
|
2025-01-09 17:42:07 +00:00
|
|
|
case LBF_MATH_LERP:
|
2023-03-03 20:21:14 +00:00
|
|
|
break;
|
2023-08-18 19:15:41 +01:00
|
|
|
case LBF_TABLE_INSERT:
|
|
|
|
state.invalidateHeap();
|
|
|
|
return; // table.insert does not modify result registers.
|
|
|
|
case LBF_RAWSET:
|
|
|
|
state.invalidateHeap();
|
|
|
|
break;
|
2023-03-03 20:21:14 +00:00
|
|
|
case LBF_SETMETATABLE:
|
|
|
|
state.invalidateHeap(); // TODO: only knownNoMetatable is affected and we might know which one
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: classify further using switch above, some fastcalls only modify the value, not the tag
|
2023-04-28 20:55:13 +01:00
|
|
|
// TODO: fastcalls are different from calls and it might be possible to not invalidate all register starting from return
|
2023-03-03 20:21:14 +00:00
|
|
|
state.invalidateRegistersFrom(firstReturnReg);
|
|
|
|
}
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& function, IrBlock& block, IrInst& inst, uint32_t index)
|
|
|
|
{
|
2025-01-10 19:34:39 +00:00
|
|
|
if (FFlag::LuauCodeGenLimitLiveSlotReuse)
|
|
|
|
state.instPos++;
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
switch (inst.cmd)
|
|
|
|
{
|
|
|
|
case IrCmd::LOAD_TAG:
|
|
|
|
if (uint8_t tag = state.tryGetTag(inst.a); tag != 0xff)
|
2023-09-08 01:13:49 +01:00
|
|
|
{
|
2023-02-24 21:49:38 +00:00
|
|
|
substitute(function, inst, build.constTag(tag));
|
2023-09-08 01:13:49 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
else if (inst.a.kind == IrOpKind::VmReg)
|
2023-09-08 01:13:49 +01:00
|
|
|
{
|
2023-11-10 21:10:07 +00:00
|
|
|
state.substituteOrRecordVmRegLoad(inst);
|
2023-09-08 01:13:49 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
|
|
|
case IrCmd::LOAD_POINTER:
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
2023-04-28 20:55:13 +01:00
|
|
|
state.substituteOrRecordVmRegLoad(inst);
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
|
|
|
case IrCmd::LOAD_DOUBLE:
|
2023-07-07 21:10:48 +01:00
|
|
|
{
|
|
|
|
IrOp value = state.tryGetValue(inst.a);
|
|
|
|
|
|
|
|
if (function.asDoubleOp(value))
|
2023-02-24 21:49:38 +00:00
|
|
|
substitute(function, inst, value);
|
|
|
|
else if (inst.a.kind == IrOpKind::VmReg)
|
2023-04-28 20:55:13 +01:00
|
|
|
state.substituteOrRecordVmRegLoad(inst);
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
2023-07-07 21:10:48 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::LOAD_INT:
|
2023-07-07 21:10:48 +01:00
|
|
|
{
|
|
|
|
IrOp value = state.tryGetValue(inst.a);
|
|
|
|
|
|
|
|
if (function.asIntOp(value))
|
2023-02-24 21:49:38 +00:00
|
|
|
substitute(function, inst, value);
|
|
|
|
else if (inst.a.kind == IrOpKind::VmReg)
|
2023-04-28 20:55:13 +01:00
|
|
|
state.substituteOrRecordVmRegLoad(inst);
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
2023-07-07 21:10:48 +01:00
|
|
|
}
|
2024-01-27 03:20:56 +00:00
|
|
|
case IrCmd::LOAD_FLOAT:
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::LOAD_TVALUE:
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
2023-04-28 20:55:13 +01:00
|
|
|
state.substituteOrRecordVmRegLoad(inst);
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
|
|
|
case IrCmd::STORE_TAG:
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
const IrOp source = inst.a;
|
2023-08-18 19:15:41 +01:00
|
|
|
|
|
|
|
IrCmd activeLoadCmd = IrCmd::NOP;
|
|
|
|
uint32_t activeLoadValue = kInvalidInstIdx;
|
2023-04-28 20:55:13 +01:00
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
uint8_t value = function.tagOp(inst.b);
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
// STORE_TAG usually follows a store of the value, but it also bumps the version of the whole register
|
2023-08-18 19:15:41 +01:00
|
|
|
// To be able to propagate STORE_*** into LOAD_***, we find active LOAD_*** value and recreate it with updated version
|
2023-04-28 20:55:13 +01:00
|
|
|
// Register in this optimization cannot be captured to avoid complications in lowering (IrValueLocationTracking doesn't model it)
|
2023-08-18 19:15:41 +01:00
|
|
|
std::tie(activeLoadCmd, activeLoadValue) = state.getPreviousVersionedLoadForTag(value, source);
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
if (state.tryGetTag(source) == value)
|
2024-06-21 00:37:55 +01:00
|
|
|
kill(function, inst);
|
2023-02-24 21:49:38 +00:00
|
|
|
else
|
2023-04-28 20:55:13 +01:00
|
|
|
state.saveTag(source, value);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-04-28 20:55:13 +01:00
|
|
|
state.invalidateTag(source);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
2023-04-28 20:55:13 +01:00
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
// Future LOAD_*** instructions can re-use previous register version load
|
|
|
|
if (activeLoadValue != kInvalidInstIdx)
|
|
|
|
state.valueMap[state.versionedVmRegLoad(activeLoadCmd, source)] = activeLoadValue;
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
break;
|
2024-01-12 22:25:27 +00:00
|
|
|
case IrCmd::STORE_EXTRA:
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::STORE_POINTER:
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
2023-04-28 20:55:13 +01:00
|
|
|
{
|
2023-02-24 21:49:38 +00:00
|
|
|
state.invalidateValue(inst.a);
|
2023-08-04 20:18:54 +01:00
|
|
|
|
2023-11-17 18:46:18 +00:00
|
|
|
if (inst.b.kind == IrOpKind::Inst)
|
2023-08-04 20:18:54 +01:00
|
|
|
{
|
2023-11-17 18:46:18 +00:00
|
|
|
state.forwardVmRegStoreToLoad(inst, IrCmd::LOAD_POINTER);
|
|
|
|
|
|
|
|
if (IrInst* instOp = function.asInstOp(inst.b); instOp && instOp->cmd == IrCmd::NEW_TABLE)
|
2023-08-04 20:18:54 +01:00
|
|
|
{
|
2023-11-17 18:46:18 +00:00
|
|
|
if (RegisterInfo* info = state.tryGetRegisterInfo(inst.a))
|
|
|
|
{
|
|
|
|
info->knownNotReadonly = true;
|
|
|
|
info->knownNoMetatable = true;
|
|
|
|
info->knownTableArraySize = function.uintOp(instOp->a);
|
|
|
|
}
|
2023-08-04 20:18:54 +01:00
|
|
|
}
|
|
|
|
}
|
2023-04-28 20:55:13 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
|
|
|
case IrCmd::STORE_DOUBLE:
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
|
|
|
{
|
|
|
|
if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
2023-03-17 19:20:37 +00:00
|
|
|
if (state.tryGetValue(inst.a) == inst.b)
|
2023-02-24 21:49:38 +00:00
|
|
|
kill(function, inst);
|
|
|
|
else
|
|
|
|
state.saveValue(inst.a, inst.b);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
state.invalidateValue(inst.a);
|
2023-04-28 20:55:13 +01:00
|
|
|
state.forwardVmRegStoreToLoad(inst, IrCmd::LOAD_DOUBLE);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IrCmd::STORE_INT:
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
|
|
|
{
|
|
|
|
if (inst.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
2023-03-17 19:20:37 +00:00
|
|
|
if (state.tryGetValue(inst.a) == inst.b)
|
2023-02-24 21:49:38 +00:00
|
|
|
kill(function, inst);
|
|
|
|
else
|
|
|
|
state.saveValue(inst.a, inst.b);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
state.invalidateValue(inst.a);
|
2023-04-28 20:55:13 +01:00
|
|
|
state.forwardVmRegStoreToLoad(inst, IrCmd::LOAD_INT);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2023-04-07 22:01:29 +01:00
|
|
|
case IrCmd::STORE_VECTOR:
|
|
|
|
state.invalidateValue(inst.a);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::STORE_TVALUE:
|
2023-08-18 19:15:41 +01:00
|
|
|
if (inst.a.kind == IrOpKind::VmReg || inst.a.kind == IrOpKind::Inst)
|
|
|
|
{
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
2023-12-02 07:46:57 +00:00
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
if (inst.b.kind == IrOpKind::Inst)
|
2023-12-02 07:46:57 +00:00
|
|
|
{
|
|
|
|
if (uint32_t* prevIdx = state.getPreviousVersionedLoadIndex(IrCmd::LOAD_TVALUE, inst.a))
|
|
|
|
{
|
|
|
|
if (*prevIdx == inst.b.index)
|
|
|
|
{
|
|
|
|
kill(function, inst);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
state.invalidate(inst.a);
|
2023-12-02 07:46:57 +00:00
|
|
|
}
|
2023-08-18 19:15:41 +01:00
|
|
|
|
|
|
|
uint8_t tag = state.tryGetTag(inst.b);
|
2024-01-27 03:20:56 +00:00
|
|
|
|
|
|
|
// We know the tag of some instructions that result in TValue
|
2024-03-01 18:45:26 +00:00
|
|
|
if (tag == 0xff)
|
2024-01-27 03:20:56 +00:00
|
|
|
{
|
|
|
|
if (IrInst* arg = function.asInstOp(inst.b))
|
|
|
|
{
|
2024-03-30 23:14:44 +00:00
|
|
|
if (arg->cmd == IrCmd::TAG_VECTOR)
|
|
|
|
tag = LUA_TVECTOR;
|
CodeGen: Preserve known tags for LOAD_TVALUE synthesized from LOADK (#1201)
When lowering LOADK for booleans/numbers/nils, we deconstruct the
operation using STORE_TAG which informs the rest of the optimization
pipeline about the tag of the value. This is helpful to remove various
tag checks.
When the constant is a string or a vector, we just use
LOAD_TVALUE/STORE_TVALUE. For strings, this could be replaced by pointer
load/store, but for vectors there's no great alternative using current
IR ops; in either case, the optimization needs to be carefully examined
for profitability as simply copying constants into registers for
function calls could become more expensive.
However, there are cases where it's still valuable to preserve the tag.
For vectors, doing any math with vector constants contains tag checks
that could be removed. For both strings and vectors, storing them into a
table has a barrier that for vectors could be elided, and for strings
could be simplified as there's no need to confirm the tag.
With this change we now carry the optional tag of the value with
LOAD_TVALUE. This has no performance effect on existing benchmarks but
does reduce the generated code for benchmarks by ~0.1%, and it makes
vector code more efficient (~5% lift on X64 log1p approximation).
2024-03-15 16:49:00 +00:00
|
|
|
|
2024-04-25 23:26:09 +01:00
|
|
|
if (arg->cmd == IrCmd::LOAD_TVALUE && arg->c.kind != IrOpKind::None)
|
CodeGen: Preserve known tags for LOAD_TVALUE synthesized from LOADK (#1201)
When lowering LOADK for booleans/numbers/nils, we deconstruct the
operation using STORE_TAG which informs the rest of the optimization
pipeline about the tag of the value. This is helpful to remove various
tag checks.
When the constant is a string or a vector, we just use
LOAD_TVALUE/STORE_TVALUE. For strings, this could be replaced by pointer
load/store, but for vectors there's no great alternative using current
IR ops; in either case, the optimization needs to be carefully examined
for profitability as simply copying constants into registers for
function calls could become more expensive.
However, there are cases where it's still valuable to preserve the tag.
For vectors, doing any math with vector constants contains tag checks
that could be removed. For both strings and vectors, storing them into a
table has a barrier that for vectors could be elided, and for strings
could be simplified as there's no need to confirm the tag.
With this change we now carry the optional tag of the value with
LOAD_TVALUE. This has no performance effect on existing benchmarks but
does reduce the generated code for benchmarks by ~0.1%, and it makes
vector code more efficient (~5% lift on X64 log1p approximation).
2024-03-15 16:49:00 +00:00
|
|
|
tag = function.tagOp(arg->c);
|
2024-01-27 03:20:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
IrOp value = state.tryGetValue(inst.b);
|
|
|
|
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
|
|
|
{
|
|
|
|
if (tag != 0xff)
|
|
|
|
state.saveTag(inst.a, tag);
|
|
|
|
|
|
|
|
if (value.kind != IrOpKind::None)
|
|
|
|
state.saveValue(inst.a, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
IrCmd activeLoadCmd = IrCmd::NOP;
|
|
|
|
uint32_t activeLoadValue = kInvalidInstIdx;
|
|
|
|
|
2024-04-12 18:18:49 +01:00
|
|
|
// If we know the tag, we can try extracting the value from a register used by LOAD_TVALUE
|
|
|
|
// To do that, we have to ensure that the register link of the source value is still valid
|
2024-05-31 20:18:18 +01:00
|
|
|
if (tag != 0xff && state.tryGetRegLink(inst.b) != nullptr)
|
2023-08-18 19:15:41 +01:00
|
|
|
{
|
|
|
|
if (IrInst* arg = function.asInstOp(inst.b); arg && arg->cmd == IrCmd::LOAD_TVALUE && arg->a.kind == IrOpKind::VmReg)
|
|
|
|
{
|
|
|
|
std::tie(activeLoadCmd, activeLoadValue) = state.getPreviousVersionedLoadForTag(tag, arg->a);
|
|
|
|
|
|
|
|
if (activeLoadValue != kInvalidInstIdx)
|
|
|
|
value = IrOp{IrOpKind::Inst, activeLoadValue};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-29 01:34:49 +01:00
|
|
|
// If we have constant tag and value, replace TValue store with tag/value pair store
|
|
|
|
bool canSplitTvalueStore = false;
|
|
|
|
|
|
|
|
if (tag == LUA_TBOOLEAN &&
|
|
|
|
(value.kind == IrOpKind::Inst || (value.kind == IrOpKind::Constant && function.constOp(value).kind == IrConstKind::Int)))
|
|
|
|
canSplitTvalueStore = true;
|
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512)
Instead of doing the dot product related math in scalar IR, we lift the
computation into a dedicated IR instruction.
On x64, we can use VDPPS which was more or less tailor made for this
purpose. This is better than manual scalar lowering that requires
reloading components from memory; it's not always a strict improvement
over the shuffle+add version (which we never had), but this can now be
adjusted in the IR lowering in an optimal fashion (maybe even based on
CPU vendor, although that'd create issues for offline compilation).
On A64, we can either use naive adds or paired adds, as there is no
dedicated vector-wide horizontal instruction until SVE. Both run at
about the same performance on M2, but paired adds require fewer
instructions and temporaries.
I've measured this using mesh-normal-vector benchmark, changing the
benchmark to just report the time of the second loop inside
`calculate_normals`, testing master vs #1504 vs this PR, also increasing
the grid size to 400 for more stable timings.
On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I
see neutral to negative results in #1504.
On M2 (base), this PR is ~28% faster vs master, while #1504 is only
about ~10% faster.
If I measure the second loop in `calculate_tangent_space` instead, I
get:
On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3%
faster
On M2 (base), this PR is ~24% faster vs master, while #1504 is only
about ~13% faster.
Note that the loops in question are not quite optimal, as they store and
reload various vectors to dictionary values due to inappropriate use of
locals. The underlying gains in individual functions are thus larger
than the numbers above; for example, changing the `calculate_normals`
loop to use a local variable to store the normalized vector (but still
saving the result to dictionary value), I get a ~24% performance
increase from this PR on Zen4 vs master instead of just 8% (#1504 is
~15% slower in this setup).
2024-11-09 00:23:09 +00:00
|
|
|
else if (tag == LUA_TNUMBER &&
|
|
|
|
(value.kind == IrOpKind::Inst || (value.kind == IrOpKind::Constant && function.constOp(value).kind == IrConstKind::Double)))
|
2024-06-29 01:34:49 +01:00
|
|
|
canSplitTvalueStore = true;
|
|
|
|
else if (tag != 0xff && isGCO(tag) && value.kind == IrOpKind::Inst)
|
|
|
|
canSplitTvalueStore = true;
|
|
|
|
|
|
|
|
if (canSplitTvalueStore)
|
2023-08-18 19:15:41 +01:00
|
|
|
{
|
2024-06-29 01:34:49 +01:00
|
|
|
replace(function, block, index, {IrCmd::STORE_SPLIT_TVALUE, inst.a, build.constTag(tag), value, inst.c});
|
2023-08-18 19:15:41 +01:00
|
|
|
|
2024-06-29 01:34:49 +01:00
|
|
|
// Value can be propagated to future loads of the same register
|
|
|
|
if (inst.a.kind == IrOpKind::VmReg && activeLoadValue != kInvalidInstIdx)
|
|
|
|
state.valueMap[state.versionedVmRegLoad(activeLoadCmd, inst.a)] = activeLoadValue;
|
2023-08-18 19:15:41 +01:00
|
|
|
}
|
2024-06-29 01:34:49 +01:00
|
|
|
else if (inst.a.kind == IrOpKind::VmReg)
|
2023-08-18 19:15:41 +01:00
|
|
|
{
|
2024-06-29 01:34:49 +01:00
|
|
|
state.forwardVmRegStoreToLoad(inst, IrCmd::LOAD_TVALUE);
|
2023-08-18 19:15:41 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IrCmd::STORE_SPLIT_TVALUE:
|
2023-02-24 21:49:38 +00:00
|
|
|
if (inst.a.kind == IrOpKind::VmReg)
|
|
|
|
{
|
|
|
|
state.invalidate(inst.a);
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
state.saveTag(inst.a, function.tagOp(inst.b));
|
2023-04-28 20:55:13 +01:00
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
if (inst.c.kind == IrOpKind::Constant)
|
|
|
|
state.saveValue(inst.a, inst.c);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IrCmd::JUMP_IF_TRUTHY:
|
|
|
|
if (uint8_t tag = state.tryGetTag(inst.a); tag != 0xff)
|
|
|
|
{
|
|
|
|
if (tag == LUA_TNIL)
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
|
|
|
else if (tag != LUA_TBOOLEAN)
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.b});
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IrCmd::JUMP_IF_FALSY:
|
|
|
|
if (uint8_t tag = state.tryGetTag(inst.a); tag != 0xff)
|
|
|
|
{
|
|
|
|
if (tag == LUA_TNIL)
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.b});
|
|
|
|
else if (tag != LUA_TBOOLEAN)
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IrCmd::JUMP_EQ_TAG:
|
|
|
|
{
|
|
|
|
uint8_t tagA = inst.a.kind == IrOpKind::Constant ? function.tagOp(inst.a) : state.tryGetTag(inst.a);
|
|
|
|
uint8_t tagB = inst.b.kind == IrOpKind::Constant ? function.tagOp(inst.b) : state.tryGetTag(inst.b);
|
|
|
|
|
|
|
|
if (tagA != 0xff && tagB != 0xff)
|
|
|
|
{
|
|
|
|
if (tagA == tagB)
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
|
|
|
else
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
|
|
|
}
|
2023-11-10 21:10:07 +00:00
|
|
|
else if (inst.a == inst.b)
|
2023-02-24 21:49:38 +00:00
|
|
|
{
|
2023-09-08 01:13:49 +01:00
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-09-08 01:13:49 +01:00
|
|
|
case IrCmd::JUMP_CMP_INT:
|
2023-04-21 23:14:26 +01:00
|
|
|
{
|
|
|
|
std::optional<int> valueA = function.asIntOp(inst.a.kind == IrOpKind::Constant ? inst.a : state.tryGetValue(inst.a));
|
|
|
|
std::optional<int> valueB = function.asIntOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b));
|
|
|
|
|
|
|
|
if (valueA && valueB)
|
|
|
|
{
|
2023-09-08 01:13:49 +01:00
|
|
|
if (compare(*valueA, *valueB, conditionOp(inst.c)))
|
2023-04-21 23:14:26 +01:00
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
|
|
|
else
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::JUMP_CMP_NUM:
|
|
|
|
{
|
|
|
|
std::optional<double> valueA = function.asDoubleOp(inst.a.kind == IrOpKind::Constant ? inst.a : state.tryGetValue(inst.a));
|
|
|
|
std::optional<double> valueB = function.asDoubleOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b));
|
|
|
|
|
|
|
|
if (valueA && valueB)
|
|
|
|
{
|
2023-03-24 18:03:04 +00:00
|
|
|
if (compare(*valueA, *valueB, conditionOp(inst.c)))
|
2023-02-24 21:49:38 +00:00
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
|
|
|
else
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.e});
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-10-21 02:10:30 +01:00
|
|
|
case IrCmd::JUMP_FORN_LOOP_COND:
|
|
|
|
{
|
|
|
|
std::optional<double> step = function.asDoubleOp(inst.c.kind == IrOpKind::Constant ? inst.c : state.tryGetValue(inst.c));
|
|
|
|
|
|
|
|
if (!step)
|
|
|
|
break;
|
|
|
|
|
|
|
|
std::optional<double> idx = function.asDoubleOp(inst.a.kind == IrOpKind::Constant ? inst.a : state.tryGetValue(inst.a));
|
|
|
|
std::optional<double> limit = function.asDoubleOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b));
|
|
|
|
|
|
|
|
if (*step > 0)
|
|
|
|
{
|
|
|
|
if (idx && limit)
|
|
|
|
{
|
|
|
|
if (compare(*idx, *limit, IrCondition::NotLessEqual))
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.e});
|
|
|
|
else
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
replace(function, block, index, IrInst{IrCmd::JUMP_CMP_NUM, inst.a, inst.b, build.cond(IrCondition::NotLessEqual), inst.e, inst.d});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (idx && limit)
|
|
|
|
{
|
|
|
|
if (compare(*limit, *idx, IrCondition::NotLessEqual))
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.e});
|
|
|
|
else
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
replace(function, block, index, IrInst{IrCmd::JUMP_CMP_NUM, inst.b, inst.a, build.cond(IrCondition::NotLessEqual), inst.e, inst.d});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::GET_UPVALUE:
|
|
|
|
state.invalidate(inst.a);
|
|
|
|
break;
|
2023-08-18 19:15:41 +01:00
|
|
|
case IrCmd::SET_UPVALUE:
|
|
|
|
if (inst.b.kind == IrOpKind::VmReg)
|
|
|
|
{
|
|
|
|
if (uint8_t tag = state.tryGetTag(inst.b); tag != 0xff)
|
|
|
|
{
|
|
|
|
replace(function, inst.c, build.constTag(tag));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::CHECK_TAG:
|
|
|
|
{
|
|
|
|
uint8_t b = function.tagOp(inst.b);
|
2024-04-25 23:26:09 +01:00
|
|
|
uint8_t tag = state.tryGetTag(inst.a);
|
2023-02-24 21:49:38 +00:00
|
|
|
|
2024-04-25 23:26:09 +01:00
|
|
|
if (tag == 0xff)
|
2023-02-24 21:49:38 +00:00
|
|
|
{
|
2024-04-25 23:26:09 +01:00
|
|
|
if (IrOp value = state.tryGetValue(inst.a); value.kind == IrOpKind::Constant)
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2024-04-25 23:26:09 +01:00
|
|
|
if (function.constOp(value).kind == IrConstKind::Double)
|
|
|
|
tag = LUA_TNUMBER;
|
2024-03-30 23:14:44 +00:00
|
|
|
}
|
2024-04-25 23:26:09 +01:00
|
|
|
}
|
2024-03-30 23:14:44 +00:00
|
|
|
|
2024-04-25 23:26:09 +01:00
|
|
|
if (tag != 0xff)
|
|
|
|
{
|
|
|
|
if (tag == b)
|
2024-03-30 23:14:44 +00:00
|
|
|
{
|
2024-04-25 23:26:09 +01:00
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.c, build.undef());
|
2023-05-25 22:36:34 +01:00
|
|
|
else
|
2024-04-25 23:26:09 +01:00
|
|
|
kill(function, inst);
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
else
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2024-04-25 23:26:09 +01:00
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c}); // Shows a conflict in assumptions on this path
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2024-04-25 23:26:09 +01:00
|
|
|
state.updateTag(inst.a, b); // We can assume the tag value going forward
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-08-04 20:18:54 +01:00
|
|
|
case IrCmd::CHECK_TRUTHY:
|
|
|
|
// It is possible to check if current tag in state is truthy or not, but this case almost never comes up
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::CHECK_READONLY:
|
|
|
|
if (RegisterInfo* info = state.tryGetRegisterInfo(inst.a))
|
|
|
|
{
|
|
|
|
if (info->knownNotReadonly)
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.b, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
else
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2023-02-24 21:49:38 +00:00
|
|
|
info->knownNotReadonly = true;
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IrCmd::CHECK_NO_METATABLE:
|
|
|
|
if (RegisterInfo* info = state.tryGetRegisterInfo(inst.a))
|
|
|
|
{
|
|
|
|
if (info->knownNoMetatable)
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.b, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
else
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2023-02-24 21:49:38 +00:00
|
|
|
info->knownNoMetatable = true;
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case IrCmd::CHECK_SAFE_ENV:
|
|
|
|
if (state.inSafeEnv)
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.a, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
else
|
2023-05-25 22:36:34 +01:00
|
|
|
{
|
2023-02-24 21:49:38 +00:00
|
|
|
state.inSafeEnv = true;
|
2023-05-25 22:36:34 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
2023-11-10 21:10:07 +00:00
|
|
|
case IrCmd::CHECK_BUFFER_LEN:
|
2023-12-02 07:46:57 +00:00
|
|
|
{
|
|
|
|
std::optional<int> bufferOffset = function.asIntOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b));
|
|
|
|
int accessSize = function.intOp(inst.c);
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(accessSize > 0);
|
2023-12-02 07:46:57 +00:00
|
|
|
|
|
|
|
if (bufferOffset)
|
|
|
|
{
|
|
|
|
// Negative offsets and offsets overflowing signed integer will jump to fallback, no need to keep the check
|
|
|
|
if (*bufferOffset < 0 || unsigned(*bufferOffset) + unsigned(accessSize) >= unsigned(INT_MAX))
|
|
|
|
{
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.d});
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (uint32_t prevIdx : state.checkBufferLenCache)
|
|
|
|
{
|
|
|
|
IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.a != inst.a || prev.c != inst.c)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (prev.b == inst.b)
|
|
|
|
{
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.d, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
else if (inst.b.kind == IrOpKind::Constant && prev.b.kind == IrOpKind::Constant)
|
|
|
|
{
|
|
|
|
// If arguments are different constants, we can check if a larger bound was already tested or if the previous bound can be raised
|
|
|
|
int currBound = function.intOp(inst.b);
|
|
|
|
int prevBound = function.intOp(prev.b);
|
|
|
|
|
|
|
|
// Negative and overflowing constant offsets should already be replaced with unconditional jumps to a fallback
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(currBound >= 0);
|
|
|
|
CODEGEN_ASSERT(prevBound >= 0);
|
2023-12-02 07:46:57 +00:00
|
|
|
|
|
|
|
if (unsigned(currBound) >= unsigned(prevBound))
|
|
|
|
replace(function, prev.b, inst.b);
|
|
|
|
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.d, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int(state.checkBufferLenCache.size()) < FInt::LuauCodeGenReuseSlotLimit)
|
|
|
|
state.checkBufferLenCache.push_back(index);
|
2023-11-10 21:10:07 +00:00
|
|
|
break;
|
2023-12-02 07:46:57 +00:00
|
|
|
}
|
2024-06-07 18:51:12 +01:00
|
|
|
case IrCmd::CHECK_USERDATA_TAG:
|
|
|
|
{
|
|
|
|
for (uint32_t prevIdx : state.useradataTagCache)
|
|
|
|
{
|
|
|
|
IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.cmd == IrCmd::CHECK_USERDATA_TAG)
|
|
|
|
{
|
|
|
|
if (prev.a != inst.a || prev.b != inst.b)
|
|
|
|
continue;
|
|
|
|
}
|
2024-07-08 22:57:06 +01:00
|
|
|
else if (prev.cmd == IrCmd::NEW_USERDATA)
|
2024-06-07 18:51:12 +01:00
|
|
|
{
|
|
|
|
if (inst.a.kind != IrOpKind::Inst || prevIdx != inst.a.index || prev.b != inst.b)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.c, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int(state.useradataTagCache.size()) < FInt::LuauCodeGenReuseUdataTagLimit)
|
|
|
|
state.useradataTagCache.push_back(index);
|
|
|
|
break;
|
|
|
|
}
|
2023-11-10 21:10:07 +00:00
|
|
|
case IrCmd::BUFFER_READI8:
|
|
|
|
case IrCmd::BUFFER_READU8:
|
|
|
|
case IrCmd::BUFFER_WRITEI8:
|
|
|
|
case IrCmd::BUFFER_READI16:
|
|
|
|
case IrCmd::BUFFER_READU16:
|
|
|
|
case IrCmd::BUFFER_WRITEI16:
|
|
|
|
case IrCmd::BUFFER_READI32:
|
|
|
|
case IrCmd::BUFFER_WRITEI32:
|
|
|
|
case IrCmd::BUFFER_READF32:
|
|
|
|
case IrCmd::BUFFER_WRITEF32:
|
|
|
|
case IrCmd::BUFFER_READF64:
|
|
|
|
case IrCmd::BUFFER_WRITEF64:
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::CHECK_GC:
|
|
|
|
// It is enough to perform a GC check once in a block
|
|
|
|
if (state.checkedGc)
|
2024-02-02 21:32:42 +00:00
|
|
|
{
|
2023-02-24 21:49:38 +00:00
|
|
|
kill(function, inst);
|
2024-02-02 21:32:42 +00:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
else
|
2024-02-02 21:32:42 +00:00
|
|
|
{
|
2023-02-24 21:49:38 +00:00
|
|
|
state.checkedGc = true;
|
2024-02-02 21:32:42 +00:00
|
|
|
|
2024-03-01 18:45:26 +00:00
|
|
|
// GC assist might modify table data (hash part)
|
|
|
|
state.invalidateHeapTableData();
|
2024-02-02 21:32:42 +00:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
|
|
|
case IrCmd::BARRIER_OBJ:
|
|
|
|
case IrCmd::BARRIER_TABLE_FORWARD:
|
|
|
|
if (inst.b.kind == IrOpKind::VmReg)
|
|
|
|
{
|
|
|
|
if (uint8_t tag = state.tryGetTag(inst.b); tag != 0xff)
|
|
|
|
{
|
|
|
|
// If the written object is not collectable, barrier is not required
|
|
|
|
if (!isGCO(tag))
|
|
|
|
kill(function, inst);
|
2023-06-24 07:19:39 +01:00
|
|
|
else
|
|
|
|
replace(function, inst.c, build.constTag(tag));
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2023-04-14 19:06:22 +01:00
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
case IrCmd::FASTCALL:
|
2024-03-15 23:37:39 +00:00
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
LuauBuiltinFunction bfid = LuauBuiltinFunction(function.uintOp(inst.a));
|
|
|
|
int firstReturnReg = vmRegOp(inst.b);
|
2024-08-23 17:35:30 +01:00
|
|
|
int nresults = function.intOp(inst.d);
|
2024-03-15 23:37:39 +00:00
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
// TODO: FASTCALL is more restrictive than INVOKE_FASTCALL; we should either determine the exact semantics, or rework it
|
|
|
|
handleBuiltinEffects(state, bfid, firstReturnReg, nresults);
|
2024-03-15 23:37:39 +00:00
|
|
|
|
2024-06-21 00:37:55 +01:00
|
|
|
switch (bfid)
|
2024-03-15 23:37:39 +00:00
|
|
|
{
|
2024-06-21 00:37:55 +01:00
|
|
|
case LBF_MATH_MODF:
|
|
|
|
case LBF_MATH_FREXP:
|
|
|
|
state.updateTag(IrOp{IrOpKind::VmReg, uint8_t(firstReturnReg)}, LUA_TNUMBER);
|
|
|
|
|
|
|
|
if (nresults > 1)
|
|
|
|
state.updateTag(IrOp{IrOpKind::VmReg, uint8_t(firstReturnReg + 1)}, LUA_TNUMBER);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
2024-03-15 23:37:39 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2023-03-03 20:21:14 +00:00
|
|
|
case IrCmd::INVOKE_FASTCALL:
|
2024-08-23 17:35:30 +01:00
|
|
|
handleBuiltinEffects(state, LuauBuiltinFunction(function.uintOp(inst.a)), vmRegOp(inst.b), function.intOp(inst.g));
|
2023-03-03 20:21:14 +00:00
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
|
|
|
|
// These instructions don't have an effect on register/memory state we are tracking
|
|
|
|
case IrCmd::NOP:
|
|
|
|
case IrCmd::LOAD_ENV:
|
2023-09-30 02:13:05 +01:00
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::GET_ARR_ADDR:
|
2023-09-30 02:13:05 +01:00
|
|
|
for (uint32_t prevIdx : state.getArrAddrCache)
|
|
|
|
{
|
|
|
|
const IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.a == inst.a && prev.b == inst.b)
|
|
|
|
{
|
|
|
|
substitute(function, inst, IrOp{IrOpKind::Inst, prevIdx});
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int(state.getArrAddrCache.size()) < FInt::LuauCodeGenReuseSlotLimit)
|
|
|
|
state.getArrAddrCache.push_back(index);
|
2023-09-01 18:58:27 +01:00
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::GET_SLOT_NODE_ADDR:
|
2025-01-10 19:34:39 +00:00
|
|
|
if (FFlag::LuauCodeGenLimitLiveSlotReuse)
|
2023-09-01 18:58:27 +01:00
|
|
|
{
|
2025-01-10 19:34:39 +00:00
|
|
|
for (size_t i = 0; i < state.getSlotNodeCache.size(); i++)
|
2023-09-01 18:58:27 +01:00
|
|
|
{
|
2025-01-10 19:34:39 +00:00
|
|
|
auto&& [prevIdx, num, lastNum] = state.getSlotNodeCache[i];
|
|
|
|
|
|
|
|
const IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.a == inst.a && prev.c == inst.c)
|
|
|
|
{
|
|
|
|
// Check if this reuse will increase the overall register pressure over the limit
|
|
|
|
int limit = FInt::LuauCodeGenLiveSlotReuseLimit;
|
|
|
|
|
|
|
|
if (int(state.getSlotNodeCache.size()) > limit && state.getMaxInternalOverlap(state.getSlotNodeCache, i) > limit)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Update live range of the value from the optimization standpoint
|
|
|
|
lastNum = state.instPos;
|
|
|
|
|
|
|
|
substitute(function, inst, IrOp{IrOpKind::Inst, prevIdx});
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
2023-09-01 18:58:27 +01:00
|
|
|
}
|
2025-01-10 19:34:39 +00:00
|
|
|
|
|
|
|
if (int(state.getSlotNodeCache.size()) < FInt::LuauCodeGenReuseSlotLimit)
|
|
|
|
state.getSlotNodeCache.push_back({index, state.instPos, state.instPos});
|
2023-09-01 18:58:27 +01:00
|
|
|
}
|
2025-01-10 19:34:39 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
for (uint32_t prevIdx : state.getSlotNodeCache_DEPRECATED)
|
|
|
|
{
|
|
|
|
const IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.a == inst.a && prev.c == inst.c)
|
|
|
|
{
|
|
|
|
substitute(function, inst, IrOp{IrOpKind::Inst, prevIdx});
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
}
|
2023-09-01 18:58:27 +01:00
|
|
|
|
2025-01-10 19:34:39 +00:00
|
|
|
if (int(state.getSlotNodeCache_DEPRECATED.size()) < FInt::LuauCodeGenReuseSlotLimit)
|
|
|
|
state.getSlotNodeCache_DEPRECATED.push_back(index);
|
|
|
|
}
|
2023-09-01 18:58:27 +01:00
|
|
|
break;
|
2023-03-17 19:20:37 +00:00
|
|
|
case IrCmd::GET_HASH_NODE_ADDR:
|
2023-07-28 16:13:53 +01:00
|
|
|
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
|
2023-04-28 20:55:13 +01:00
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::ADD_INT:
|
|
|
|
case IrCmd::SUB_INT:
|
2024-11-27 12:44:39 +00:00
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::ADD_NUM:
|
|
|
|
case IrCmd::SUB_NUM:
|
2024-11-27 12:44:39 +00:00
|
|
|
if (FFlag::LuauCodeGenArithOpt)
|
|
|
|
{
|
|
|
|
if (std::optional<double> k = function.asDoubleOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b)))
|
|
|
|
{
|
|
|
|
// a + 0.0 and a - (-0.0) can't be folded since the behavior is different for negative zero
|
|
|
|
// however, a - 0.0 and a + (-0.0) can be folded into a
|
|
|
|
if (*k == 0.0 && bool(signbit(*k)) == (inst.cmd == IrCmd::ADD_NUM))
|
|
|
|
substitute(function, inst, inst.a);
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::MUL_NUM:
|
2024-11-27 12:44:39 +00:00
|
|
|
if (FFlag::LuauCodeGenArithOpt)
|
|
|
|
{
|
|
|
|
if (std::optional<double> k = function.asDoubleOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b)))
|
|
|
|
{
|
|
|
|
if (*k == 1.0) // a * 1.0 = a
|
|
|
|
substitute(function, inst, inst.a);
|
|
|
|
else if (*k == 2.0) // a * 2.0 = a + a
|
|
|
|
replace(function, block, index, {IrCmd::ADD_NUM, inst.a, inst.a});
|
|
|
|
else if (*k == -1.0) // a * -1.0 = -a
|
|
|
|
replace(function, block, index, {IrCmd::UNM_NUM, inst.a});
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::DIV_NUM:
|
2024-11-27 12:44:39 +00:00
|
|
|
if (FFlag::LuauCodeGenArithOpt)
|
|
|
|
{
|
|
|
|
if (std::optional<double> k = function.asDoubleOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b)))
|
|
|
|
{
|
|
|
|
if (*k == 1.0) // a / 1.0 = a
|
|
|
|
substitute(function, inst, inst.a);
|
|
|
|
else if (*k == -1.0) // a / -1.0 = -a
|
|
|
|
replace(function, block, index, {IrCmd::UNM_NUM, inst.a});
|
|
|
|
else if (int exp = 0; frexp(*k, &exp) == 0.5 && exp >= -1000 && exp <= 1000) // a / 2^k = a * 2^-k
|
|
|
|
replace(function, block, index, {IrCmd::MUL_NUM, inst.a, build.constDouble(1.0 / *k)});
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-09-01 18:58:27 +01:00
|
|
|
case IrCmd::IDIV_NUM:
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::MOD_NUM:
|
2023-03-03 20:21:14 +00:00
|
|
|
case IrCmd::MIN_NUM:
|
|
|
|
case IrCmd::MAX_NUM:
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::UNM_NUM:
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::FLOOR_NUM:
|
|
|
|
case IrCmd::CEIL_NUM:
|
|
|
|
case IrCmd::ROUND_NUM:
|
|
|
|
case IrCmd::SQRT_NUM:
|
|
|
|
case IrCmd::ABS_NUM:
|
2024-06-29 01:34:49 +01:00
|
|
|
case IrCmd::SIGN_NUM:
|
CodeGen: Implement support for math.lerp lowering (#1609)
To implement math.lerp without branches, we add SELECT_NUM which
selects one of the two inputs based on the comparison condition.
For simplicity, we only support C == D for now; this can be extended to
a more generic version with a IrCondition operand E, but that requires
more work on the SSE side (to flip the comparison for some conditions
like Greater, and expose more generic vcmpsd).
Note: On AArch64 this will effectively result in a change in floating
point
behavior between native code and non-native code: clang synthesizes
fmadd (because floating point contraction is allowed by default, and the
arch always has the instruction), whereas this change will use
fmul+fadd.
I am not sure if this is good or bad, and if this is a problem in C or
not.
Specifically, clang's behavior results in different results between X64
and AArch64 when *not* using codegen, and with this change the behavior
when using codegen is... the same? :)
Fixing this will require either using LERP_NUM instead and hand-coding
lowering, or exposing some sort of "quasi" MADD_NUM (which would
lower to fma on AArch64 and mul+add on X64).
A small benefit to the current approach is `lerp(1, 5, t)`
constant-folds the
subtraction. With LERP_NUM this optimization will need to be implemented
manually as a partial constant-folding for LERP_NUM.
A similar problem exists today for vector.cross & vector.dot. So maybe
this
is not something we need to fix, unsure.
2025-01-16 18:48:27 +00:00
|
|
|
case IrCmd::SELECT_NUM:
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::NOT_ANY:
|
2023-04-28 20:55:13 +01:00
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-08-04 20:18:54 +01:00
|
|
|
case IrCmd::CMP_ANY:
|
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::JUMP:
|
|
|
|
case IrCmd::JUMP_EQ_POINTER:
|
2023-03-17 19:20:37 +00:00
|
|
|
case IrCmd::JUMP_SLOT_MATCH:
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::TABLE_LEN:
|
2023-09-01 18:58:27 +01:00
|
|
|
break;
|
|
|
|
case IrCmd::TABLE_SETNUM:
|
|
|
|
state.invalidateTableArraySize();
|
|
|
|
break;
|
2023-07-07 21:10:48 +01:00
|
|
|
case IrCmd::STRING_LEN:
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::NEW_TABLE:
|
|
|
|
case IrCmd::DUP_TABLE:
|
2023-09-30 02:13:05 +01:00
|
|
|
break;
|
2023-03-17 19:20:37 +00:00
|
|
|
case IrCmd::TRY_NUM_TO_INDEX:
|
2023-09-30 02:13:05 +01:00
|
|
|
for (uint32_t prevIdx : state.tryNumToIndexCache)
|
|
|
|
{
|
|
|
|
const IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.a == inst.a)
|
|
|
|
{
|
|
|
|
substitute(function, inst, IrOp{IrOpKind::Inst, prevIdx});
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int(state.tryNumToIndexCache.size()) < FInt::LuauCodeGenReuseSlotLimit)
|
|
|
|
state.tryNumToIndexCache.push_back(index);
|
|
|
|
break;
|
2023-03-17 19:20:37 +00:00
|
|
|
case IrCmd::TRY_CALL_FASTGETTM:
|
2023-05-19 20:37:30 +01:00
|
|
|
break;
|
2024-06-07 18:51:12 +01:00
|
|
|
case IrCmd::NEW_USERDATA:
|
|
|
|
if (int(state.useradataTagCache.size()) < FInt::LuauCodeGenReuseUdataTagLimit)
|
|
|
|
state.useradataTagCache.push_back(index);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::INT_TO_NUM:
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::UINT_TO_NUM:
|
2023-05-19 20:37:30 +01:00
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::NUM_TO_INT:
|
2023-05-19 20:37:30 +01:00
|
|
|
if (IrInst* src = function.asInstOp(inst.a); src && src->cmd == IrCmd::INT_TO_NUM)
|
|
|
|
substitute(function, inst, src->a);
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::NUM_TO_UINT:
|
2023-05-19 20:37:30 +01:00
|
|
|
if (IrInst* src = function.asInstOp(inst.a); src && src->cmd == IrCmd::UINT_TO_NUM)
|
|
|
|
substitute(function, inst, src->a);
|
|
|
|
else
|
|
|
|
state.substituteOrRecord(inst, index);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::CHECK_ARRAY_SIZE:
|
2023-08-18 19:15:41 +01:00
|
|
|
{
|
|
|
|
std::optional<int> arrayIndex = function.asIntOp(inst.b.kind == IrOpKind::Constant ? inst.b : state.tryGetValue(inst.b));
|
|
|
|
|
2023-10-27 22:18:41 +01:00
|
|
|
// Negative offsets will jump to fallback, no need to keep the check
|
2023-12-02 07:46:57 +00:00
|
|
|
if (arrayIndex && *arrayIndex < 0)
|
2023-10-27 22:18:41 +01:00
|
|
|
{
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-08-18 19:15:41 +01:00
|
|
|
if (RegisterInfo* info = state.tryGetRegisterInfo(inst.a); info && arrayIndex)
|
|
|
|
{
|
|
|
|
if (info->knownTableArraySize >= 0)
|
|
|
|
{
|
|
|
|
if (unsigned(*arrayIndex) < unsigned(info->knownTableArraySize))
|
|
|
|
{
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.c, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
replace(function, block, index, {IrCmd::JUMP, inst.c});
|
|
|
|
}
|
2023-09-30 02:13:05 +01:00
|
|
|
|
2023-10-27 22:18:41 +01:00
|
|
|
break;
|
2023-08-18 19:15:41 +01:00
|
|
|
}
|
|
|
|
}
|
2023-09-30 02:13:05 +01:00
|
|
|
|
|
|
|
for (uint32_t prevIdx : state.checkArraySizeCache)
|
|
|
|
{
|
|
|
|
const IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.a != inst.a)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
bool sameBoundary = prev.b == inst.b;
|
|
|
|
|
|
|
|
// If arguments are different, in case they are both constant, we can check if a larger bound was already tested
|
|
|
|
if (!sameBoundary && inst.b.kind == IrOpKind::Constant && prev.b.kind == IrOpKind::Constant &&
|
2023-10-27 22:18:41 +01:00
|
|
|
unsigned(function.intOp(inst.b)) < unsigned(function.intOp(prev.b)))
|
2023-09-30 02:13:05 +01:00
|
|
|
sameBoundary = true;
|
|
|
|
|
|
|
|
if (sameBoundary)
|
|
|
|
{
|
|
|
|
if (FFlag::DebugLuauAbortingChecks)
|
|
|
|
replace(function, inst.c, build.undef());
|
|
|
|
else
|
|
|
|
kill(function, inst);
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: it should be possible to update previous check with a higher bound if current and previous checks are against a constant
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int(state.checkArraySizeCache.size()) < FInt::LuauCodeGenReuseSlotLimit)
|
|
|
|
state.checkArraySizeCache.push_back(index);
|
2023-08-18 19:15:41 +01:00
|
|
|
break;
|
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::CHECK_SLOT_MATCH:
|
2023-09-01 18:58:27 +01:00
|
|
|
for (uint32_t prevIdx : state.checkSlotMatchCache)
|
|
|
|
{
|
|
|
|
const IrInst& prev = function.instructions[prevIdx];
|
|
|
|
|
|
|
|
if (prev.a == inst.a && prev.b == inst.b)
|
|
|
|
{
|
|
|
|
// Only a check for 'nil' value is left
|
|
|
|
replace(function, block, index, {IrCmd::CHECK_NODE_VALUE, inst.a, inst.c});
|
|
|
|
return; // Break out from both the loop and the switch
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (int(state.checkSlotMatchCache.size()) < FInt::LuauCodeGenReuseSlotLimit)
|
|
|
|
state.checkSlotMatchCache.push_back(index);
|
|
|
|
break;
|
2024-02-21 15:06:11 +00:00
|
|
|
|
|
|
|
case IrCmd::ADD_VEC:
|
|
|
|
case IrCmd::SUB_VEC:
|
|
|
|
case IrCmd::MUL_VEC:
|
|
|
|
case IrCmd::DIV_VEC:
|
CodeGen: Rewrite dot product lowering using a dedicated IR instruction (#1512)
Instead of doing the dot product related math in scalar IR, we lift the
computation into a dedicated IR instruction.
On x64, we can use VDPPS which was more or less tailor made for this
purpose. This is better than manual scalar lowering that requires
reloading components from memory; it's not always a strict improvement
over the shuffle+add version (which we never had), but this can now be
adjusted in the IR lowering in an optimal fashion (maybe even based on
CPU vendor, although that'd create issues for offline compilation).
On A64, we can either use naive adds or paired adds, as there is no
dedicated vector-wide horizontal instruction until SVE. Both run at
about the same performance on M2, but paired adds require fewer
instructions and temporaries.
I've measured this using mesh-normal-vector benchmark, changing the
benchmark to just report the time of the second loop inside
`calculate_normals`, testing master vs #1504 vs this PR, also increasing
the grid size to 400 for more stable timings.
On Zen 4 (7950X), this PR is comfortably ~8% faster vs master, while I
see neutral to negative results in #1504.
On M2 (base), this PR is ~28% faster vs master, while #1504 is only
about ~10% faster.
If I measure the second loop in `calculate_tangent_space` instead, I
get:
On Zen 4 (7950X), this PR is ~12% faster vs master, while #1504 is ~3%
faster
On M2 (base), this PR is ~24% faster vs master, while #1504 is only
about ~13% faster.
Note that the loops in question are not quite optimal, as they store and
reload various vectors to dictionary values due to inappropriate use of
locals. The underlying gains in individual functions are thus larger
than the numbers above; for example, changing the `calculate_normals`
loop to use a local variable to store the normalized vector (but still
saving the result to dictionary value), I get a ~24% performance
increase from this PR on Zen4 vs master instead of just 8% (#1504 is
~15% slower in this setup).
2024-11-09 00:23:09 +00:00
|
|
|
case IrCmd::DOT_VEC:
|
2024-11-22 21:00:51 +00:00
|
|
|
if (inst.cmd == IrCmd::DOT_VEC)
|
|
|
|
LUAU_ASSERT(FFlag::LuauVectorLibNativeDot);
|
|
|
|
|
2024-03-30 23:14:44 +00:00
|
|
|
if (IrInst* a = function.asInstOp(inst.a); a && a->cmd == IrCmd::TAG_VECTOR)
|
|
|
|
replace(function, inst.a, a->a);
|
2024-03-01 18:45:26 +00:00
|
|
|
|
2024-03-30 23:14:44 +00:00
|
|
|
if (IrInst* b = function.asInstOp(inst.b); b && b->cmd == IrCmd::TAG_VECTOR)
|
|
|
|
replace(function, inst.b, b->a);
|
2024-02-21 15:06:11 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case IrCmd::UNM_VEC:
|
2024-03-30 23:14:44 +00:00
|
|
|
if (IrInst* a = function.asInstOp(inst.a); a && a->cmd == IrCmd::TAG_VECTOR)
|
|
|
|
replace(function, inst.a, a->a);
|
2024-02-21 15:06:11 +00:00
|
|
|
break;
|
|
|
|
|
2023-03-17 19:20:37 +00:00
|
|
|
case IrCmd::CHECK_NODE_NO_NEXT:
|
2023-09-01 18:58:27 +01:00
|
|
|
case IrCmd::CHECK_NODE_VALUE:
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::BARRIER_TABLE_BACK:
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::RETURN:
|
|
|
|
case IrCmd::COVERAGE:
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::SET_SAVEDPC: // TODO: we may be able to remove some updates to PC
|
|
|
|
case IrCmd::CLOSE_UPVALS: // Doesn't change memory that we track
|
|
|
|
case IrCmd::CAPTURE:
|
|
|
|
case IrCmd::SUBSTITUTE:
|
|
|
|
case IrCmd::ADJUST_STACK_TO_REG: // Changes stack top, but not the values
|
|
|
|
case IrCmd::ADJUST_STACK_TO_TOP: // Changes stack top, but not the values
|
2023-03-03 20:21:14 +00:00
|
|
|
case IrCmd::CHECK_FASTCALL_RES: // Changes stack top, but not the values
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::BITAND_UINT:
|
|
|
|
case IrCmd::BITXOR_UINT:
|
|
|
|
case IrCmd::BITOR_UINT:
|
|
|
|
case IrCmd::BITNOT_UINT:
|
|
|
|
case IrCmd::BITLSHIFT_UINT:
|
|
|
|
case IrCmd::BITRSHIFT_UINT:
|
|
|
|
case IrCmd::BITARSHIFT_UINT:
|
|
|
|
case IrCmd::BITRROTATE_UINT:
|
|
|
|
case IrCmd::BITLROTATE_UINT:
|
|
|
|
case IrCmd::BITCOUNTLZ_UINT:
|
|
|
|
case IrCmd::BITCOUNTRZ_UINT:
|
2023-11-03 23:45:04 +00:00
|
|
|
case IrCmd::BYTESWAP_UINT:
|
2023-04-21 23:14:26 +01:00
|
|
|
case IrCmd::INVOKE_LIBM:
|
2023-06-24 07:19:39 +01:00
|
|
|
case IrCmd::GET_TYPE:
|
|
|
|
case IrCmd::GET_TYPEOF:
|
2023-07-28 16:13:53 +01:00
|
|
|
case IrCmd::FINDUPVAL:
|
2024-02-21 15:06:11 +00:00
|
|
|
case IrCmd::NUM_TO_VEC:
|
|
|
|
case IrCmd::TAG_VECTOR:
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case IrCmd::DO_ARITH:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidate(inst.a);
|
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::DO_LEN:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidate(inst.a);
|
|
|
|
state.invalidateUserCall(); // TODO: if argument is a string, there will be no user call
|
|
|
|
|
|
|
|
state.saveTag(inst.a, LUA_TNUMBER);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::GET_TABLE:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidate(inst.a);
|
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::SET_TABLE:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::GET_IMPORT:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidate(inst.a);
|
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::CONCAT:
|
2023-04-07 22:01:29 +01:00
|
|
|
state.invalidateRegisterRange(vmRegOp(inst.a), function.uintOp(inst.b));
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidateUserCall(); // TODO: if only strings and numbers are concatenated, there will be no user calls
|
|
|
|
break;
|
|
|
|
case IrCmd::INTERRUPT:
|
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-04-28 20:55:13 +01:00
|
|
|
case IrCmd::SETLIST:
|
2023-08-25 18:23:55 +01:00
|
|
|
if (RegisterInfo* info = state.tryGetRegisterInfo(inst.b); info && info->knownTableArraySize >= 0)
|
|
|
|
replace(function, inst.f, build.constUint(info->knownTableArraySize));
|
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
// TODO: this can be relaxed when x64 emitInstSetList becomes aware of register allocator
|
2023-09-30 02:13:05 +01:00
|
|
|
state.invalidateValuePropagation();
|
|
|
|
state.invalidateHeapTableData();
|
2023-12-02 07:46:57 +00:00
|
|
|
state.invalidateHeapBufferData();
|
2023-04-28 20:55:13 +01:00
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::CALL:
|
2023-04-07 22:01:29 +01:00
|
|
|
state.invalidateRegistersFrom(vmRegOp(inst.a));
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidateUserCall();
|
2023-04-28 20:55:13 +01:00
|
|
|
|
2023-08-25 18:23:55 +01:00
|
|
|
// We cannot guarantee right now that all live values can be rematerialized from non-stack memory locations
|
2023-04-28 20:55:13 +01:00
|
|
|
// To prevent earlier values from being propagated to after the call, we have to clear the map
|
|
|
|
// TODO: remove only the values that don't have a guaranteed restore location
|
2023-09-30 02:13:05 +01:00
|
|
|
state.invalidateValuePropagation();
|
2023-03-24 18:03:04 +00:00
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::FORGLOOP:
|
2023-04-07 22:01:29 +01:00
|
|
|
state.invalidateRegistersFrom(vmRegOp(inst.a) + 2); // Rn and Rn+1 are not modified
|
2023-09-01 18:58:27 +01:00
|
|
|
|
|
|
|
// TODO: this can be relaxed when x64 emitInstForGLoop becomes aware of register allocator
|
2023-09-30 02:13:05 +01:00
|
|
|
state.invalidateValuePropagation();
|
|
|
|
state.invalidateHeapTableData();
|
2023-12-02 07:46:57 +00:00
|
|
|
state.invalidateHeapBufferData();
|
2023-03-24 18:03:04 +00:00
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::FORGLOOP_FALLBACK:
|
2023-04-07 22:01:29 +01:00
|
|
|
state.invalidateRegistersFrom(vmRegOp(inst.a) + 2); // Rn and Rn+1 are not modified
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-03-31 19:42:49 +01:00
|
|
|
case IrCmd::FORGPREP_XNEXT_FALLBACK:
|
2023-03-24 18:03:04 +00:00
|
|
|
// This fallback only conditionally throws an exception
|
|
|
|
break;
|
2023-04-28 20:55:13 +01:00
|
|
|
|
|
|
|
// Full fallback instructions
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_GETGLOBAL:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidate(inst.b);
|
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_SETGLOBAL:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_GETTABLEKS:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidate(inst.b);
|
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_SETTABLEKS:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_NAMECALL:
|
2023-04-07 22:01:29 +01:00
|
|
|
state.invalidate(IrOp{inst.b.kind, vmRegOp(inst.b) + 0u});
|
|
|
|
state.invalidate(IrOp{inst.b.kind, vmRegOp(inst.b) + 1u});
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidateUserCall();
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_PREPVARARGS:
|
2023-03-24 18:03:04 +00:00
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_GETVARARGS:
|
2023-04-28 20:55:13 +01:00
|
|
|
state.invalidateRegisterRange(vmRegOp(inst.b), function.intOp(inst.c));
|
2023-03-24 18:03:04 +00:00
|
|
|
break;
|
2023-07-28 16:13:53 +01:00
|
|
|
case IrCmd::NEWCLOSURE:
|
2023-03-24 18:03:04 +00:00
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_DUPCLOSURE:
|
2023-03-24 18:03:04 +00:00
|
|
|
state.invalidate(inst.b);
|
|
|
|
break;
|
2023-02-24 21:49:38 +00:00
|
|
|
case IrCmd::FALLBACK_FORGPREP:
|
2023-04-07 22:01:29 +01:00
|
|
|
state.invalidate(IrOp{inst.b.kind, vmRegOp(inst.b) + 0u});
|
|
|
|
state.invalidate(IrOp{inst.b.kind, vmRegOp(inst.b) + 1u});
|
|
|
|
state.invalidate(IrOp{inst.b.kind, vmRegOp(inst.b) + 2u});
|
2024-04-05 21:45:09 +01:00
|
|
|
state.invalidateUserCall();
|
2023-02-24 21:49:38 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void constPropInBlock(IrBuilder& build, IrBlock& block, ConstPropState& state)
|
|
|
|
{
|
|
|
|
IrFunction& function = build.function;
|
|
|
|
|
|
|
|
for (uint32_t index = block.start; index <= block.finish; index++)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(index < function.instructions.size());
|
2023-02-24 21:49:38 +00:00
|
|
|
IrInst& inst = function.instructions[index];
|
|
|
|
|
|
|
|
applySubstitutions(function, inst);
|
|
|
|
|
|
|
|
foldConstants(build, function, block, index);
|
|
|
|
|
|
|
|
constPropInInst(state, build, function, block, inst, index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
static void constPropInBlockChain(IrBuilder& build, std::vector<uint8_t>& visited, IrBlock* block, ConstPropState& state)
|
2023-02-24 21:49:38 +00:00
|
|
|
{
|
|
|
|
IrFunction& function = build.function;
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
state.clear();
|
2023-02-24 21:49:38 +00:00
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
const uint32_t startSortkey = block->sortkey;
|
|
|
|
uint32_t chainPos = 0;
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
while (block)
|
|
|
|
{
|
|
|
|
uint32_t blockIdx = function.getBlockIndex(*block);
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!visited[blockIdx]);
|
2023-02-24 21:49:38 +00:00
|
|
|
visited[blockIdx] = true;
|
|
|
|
|
|
|
|
constPropInBlock(build, *block, state);
|
|
|
|
|
2024-01-12 22:25:27 +00:00
|
|
|
// Value numbering and load/store propagation is not performed between blocks
|
|
|
|
state.invalidateValuePropagation();
|
2023-12-02 07:46:57 +00:00
|
|
|
|
2024-01-12 22:25:27 +00:00
|
|
|
// Same for table and buffer data propagation
|
|
|
|
state.invalidateHeapTableData();
|
|
|
|
state.invalidateHeapBufferData();
|
2024-07-08 22:57:06 +01:00
|
|
|
state.invalidateUserdataData();
|
2024-06-07 18:51:12 +01:00
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
// Blocks in a chain are guaranteed to follow each other
|
|
|
|
// We force that by giving all blocks the same sorting key, but consecutive chain keys
|
|
|
|
block->sortkey = startSortkey;
|
|
|
|
block->chainkey = chainPos++;
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
IrInst& termInst = function.instructions[block->finish];
|
|
|
|
|
|
|
|
IrBlock* nextBlock = nullptr;
|
|
|
|
|
|
|
|
// Unconditional jump into a block with a single user (current block) allows us to continue optimization
|
|
|
|
// with the information we have gathered so far (unless we have already visited that block earlier)
|
2023-09-01 18:58:27 +01:00
|
|
|
if (termInst.cmd == IrCmd::JUMP && termInst.a.kind == IrOpKind::Block)
|
2023-02-24 21:49:38 +00:00
|
|
|
{
|
|
|
|
IrBlock& target = function.blockOp(termInst.a);
|
2023-03-03 20:21:14 +00:00
|
|
|
uint32_t targetIdx = function.getBlockIndex(target);
|
2023-02-24 21:49:38 +00:00
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
if (target.useCount == 1 && !visited[targetIdx] && target.kind != IrBlockKind::Fallback)
|
2023-09-01 18:58:27 +01:00
|
|
|
{
|
2023-12-02 07:46:57 +00:00
|
|
|
if (getLiveOutValueCount(function, target) != 0)
|
2023-10-21 02:10:30 +01:00
|
|
|
break;
|
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
// Make sure block ordering guarantee is checked at lowering time
|
|
|
|
block->expectedNextBlock = function.getBlockIndex(target);
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
nextBlock = ⌖
|
2023-09-01 18:58:27 +01:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
block = nextBlock;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
// Note that blocks in the collected path are marked as visited
|
|
|
|
static std::vector<uint32_t> collectDirectBlockJumpPath(IrFunction& function, std::vector<uint8_t>& visited, IrBlock* block)
|
|
|
|
{
|
|
|
|
// Path shouldn't be built starting with a block that has 'live out' values.
|
|
|
|
// One theoretical way to get it is if we had 'block' jumping unconditionally into a successor that uses values from 'block'
|
|
|
|
// * if the successor has only one use, the starting conditions of 'tryCreateLinearBlock' would prevent this
|
|
|
|
// * if the successor has multiple uses, it can't have such 'live in' values without phi nodes that we don't have yet
|
|
|
|
// Another possibility is to have two paths from 'block' into the target through two intermediate blocks
|
|
|
|
// Usually that would mean that we would have a conditional jump at the end of 'block'
|
2023-03-17 19:20:37 +00:00
|
|
|
// But using check guards and fallback blocks it becomes a possible setup
|
2023-03-03 20:21:14 +00:00
|
|
|
// We avoid this by making sure fallbacks rejoin the other immediate successor of 'block'
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(getLiveOutValueCount(function, *block) == 0);
|
2023-03-03 20:21:14 +00:00
|
|
|
|
|
|
|
std::vector<uint32_t> path;
|
|
|
|
|
|
|
|
while (block)
|
|
|
|
{
|
|
|
|
IrInst& termInst = function.instructions[block->finish];
|
|
|
|
IrBlock* nextBlock = nullptr;
|
|
|
|
|
|
|
|
// A chain is made from internal blocks that were not a part of bytecode CFG
|
2023-09-01 18:58:27 +01:00
|
|
|
if (termInst.cmd == IrCmd::JUMP && termInst.a.kind == IrOpKind::Block)
|
2023-03-03 20:21:14 +00:00
|
|
|
{
|
|
|
|
IrBlock& target = function.blockOp(termInst.a);
|
|
|
|
uint32_t targetIdx = function.getBlockIndex(target);
|
|
|
|
|
|
|
|
if (!visited[targetIdx] && target.kind == IrBlockKind::Internal)
|
|
|
|
{
|
|
|
|
// Additional restriction is that to join a block, it cannot produce values that are used in other blocks
|
|
|
|
// And it also can't use values produced in other blocks
|
|
|
|
auto [liveIns, liveOuts] = getLiveInOutValueCount(function, target);
|
|
|
|
|
|
|
|
if (liveIns == 0 && liveOuts == 0)
|
|
|
|
{
|
|
|
|
visited[targetIdx] = true;
|
|
|
|
path.push_back(targetIdx);
|
|
|
|
|
|
|
|
nextBlock = ⌖
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
block = nextBlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
static void tryCreateLinearBlock(IrBuilder& build, std::vector<uint8_t>& visited, IrBlock& startingBlock, ConstPropState& state)
|
2023-03-03 20:21:14 +00:00
|
|
|
{
|
|
|
|
IrFunction& function = build.function;
|
|
|
|
|
|
|
|
uint32_t blockIdx = function.getBlockIndex(startingBlock);
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(!visited[blockIdx]);
|
2023-03-03 20:21:14 +00:00
|
|
|
visited[blockIdx] = true;
|
|
|
|
|
|
|
|
IrInst& termInst = function.instructions[startingBlock.finish];
|
|
|
|
|
|
|
|
// Block has to end with an unconditional jump
|
|
|
|
if (termInst.cmd != IrCmd::JUMP)
|
|
|
|
return;
|
|
|
|
|
2023-09-01 18:58:27 +01:00
|
|
|
// And it can't be jump to a VM exit or undef
|
|
|
|
if (termInst.a.kind != IrOpKind::Block)
|
2023-07-14 19:08:53 +01:00
|
|
|
return;
|
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
// And it has to jump to a block with more than one user
|
|
|
|
// If there's only one use, it should already be optimized by constPropInBlockChain
|
|
|
|
if (function.blockOp(termInst.a).useCount == 1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
uint32_t targetBlockIdx = termInst.a.index;
|
|
|
|
|
|
|
|
// Check if this path is worth it (and it will also mark path blocks as visited)
|
|
|
|
std::vector<uint32_t> path = collectDirectBlockJumpPath(function, visited, &startingBlock);
|
|
|
|
|
|
|
|
// If path is too small, we are not going to linearize it
|
|
|
|
if (int(path.size()) < FInt::LuauCodeGenMinLinearBlockPath)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Initialize state with the knowledge of our current block
|
2023-05-05 22:52:49 +01:00
|
|
|
state.clear();
|
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
constPropInBlock(build, startingBlock, state);
|
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
// Verify that target hasn't changed
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(function.instructions[startingBlock.finish].a.index == targetBlockIdx);
|
2023-03-03 20:21:14 +00:00
|
|
|
|
2023-06-16 18:35:18 +01:00
|
|
|
// Note: using startingBlock after this line is unsafe as the reference may be reallocated by build.block() below
|
2023-09-01 18:58:27 +01:00
|
|
|
const uint32_t startingSortKey = startingBlock.sortkey;
|
|
|
|
const uint32_t startingChainKey = startingBlock.chainkey;
|
2023-06-16 18:35:18 +01:00
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
// Create new linearized block into which we are going to redirect starting block jump
|
|
|
|
IrOp newBlock = build.block(IrBlockKind::Linearized);
|
|
|
|
visited.push_back(false);
|
|
|
|
|
|
|
|
build.beginBlock(newBlock);
|
|
|
|
|
2023-07-14 19:08:53 +01:00
|
|
|
// By default, blocks are ordered according to start instruction; we alter sort order to make sure linearized block is placed right after the
|
|
|
|
// starting block
|
2023-09-01 18:58:27 +01:00
|
|
|
function.blocks[newBlock.index].sortkey = startingSortKey;
|
|
|
|
function.blocks[newBlock.index].chainkey = startingChainKey + 1;
|
|
|
|
|
|
|
|
// Make sure block ordering guarantee is checked at lowering time
|
|
|
|
function.blocks[blockIdx].expectedNextBlock = newBlock.index;
|
2023-06-16 18:35:18 +01:00
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
replace(function, termInst.a, newBlock);
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
// Clone the collected path into our fresh block
|
2023-03-03 20:21:14 +00:00
|
|
|
for (uint32_t pathBlockIdx : path)
|
|
|
|
build.clone(function.blocks[pathBlockIdx], /* removeCurrentTerminator */ true);
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
// If all live in/out data is defined aside from the new block, generate it
|
|
|
|
// Note that liveness information is not strictly correct after optimization passes and may need to be recomputed before next passes
|
|
|
|
// The information generated here is consistent with current state that could be outdated, but still useful in IR inspection
|
|
|
|
if (function.cfg.in.size() == newBlock.index)
|
|
|
|
{
|
2024-02-16 02:04:39 +00:00
|
|
|
CODEGEN_ASSERT(function.cfg.in.size() == function.cfg.out.size());
|
|
|
|
CODEGEN_ASSERT(function.cfg.in.size() == function.cfg.def.size());
|
2023-05-05 22:52:49 +01:00
|
|
|
|
|
|
|
// Live in is the same as the input of the original first block
|
|
|
|
function.cfg.in.push_back(function.cfg.in[path.front()]);
|
|
|
|
|
|
|
|
// Live out is the same as the result of the original last block
|
|
|
|
function.cfg.out.push_back(function.cfg.out[path.back()]);
|
|
|
|
|
|
|
|
// Defs are tricky, registers are joined together, but variadic sequences can be consumed inside the block
|
|
|
|
function.cfg.def.push_back({});
|
|
|
|
RegisterSet& def = function.cfg.def.back();
|
|
|
|
|
|
|
|
for (uint32_t pathBlockIdx : path)
|
|
|
|
{
|
|
|
|
const RegisterSet& pathDef = function.cfg.def[pathBlockIdx];
|
|
|
|
|
|
|
|
def.regs |= pathDef.regs;
|
|
|
|
|
|
|
|
// Taking only the last defined variadic sequence if it's not consumed before before the end
|
|
|
|
if (pathDef.varargSeq && function.cfg.out.back().varargSeq)
|
|
|
|
{
|
|
|
|
def.varargSeq = true;
|
|
|
|
def.varargStart = pathDef.varargStart;
|
|
|
|
}
|
|
|
|
}
|
2023-09-01 18:58:27 +01:00
|
|
|
|
|
|
|
// Update predecessors
|
|
|
|
function.cfg.predecessorsOffsets.push_back(uint32_t(function.cfg.predecessors.size()));
|
|
|
|
function.cfg.predecessors.push_back(blockIdx);
|
|
|
|
|
|
|
|
// Updating successors will require visiting the instructions again and we don't have a current use for linearized block successor list
|
2023-05-05 22:52:49 +01:00
|
|
|
}
|
|
|
|
|
2023-03-03 20:21:14 +00:00
|
|
|
// Optimize our linear block
|
|
|
|
IrBlock& linearBlock = function.blockOp(newBlock);
|
|
|
|
constPropInBlock(build, linearBlock, state);
|
|
|
|
}
|
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
void constPropInBlockChains(IrBuilder& build, bool useValueNumbering)
|
2023-02-24 21:49:38 +00:00
|
|
|
{
|
|
|
|
IrFunction& function = build.function;
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
ConstPropState state{function};
|
|
|
|
state.useValueNumbering = useValueNumbering;
|
|
|
|
|
2023-02-24 21:49:38 +00:00
|
|
|
std::vector<uint8_t> visited(function.blocks.size(), false);
|
|
|
|
|
|
|
|
for (IrBlock& block : function.blocks)
|
|
|
|
{
|
|
|
|
if (block.kind == IrBlockKind::Fallback || block.kind == IrBlockKind::Dead)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (visited[function.getBlockIndex(block)])
|
|
|
|
continue;
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
constPropInBlockChain(build, visited, &block, state);
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
2023-04-21 23:14:26 +01:00
|
|
|
}
|
2023-03-03 20:21:14 +00:00
|
|
|
|
2023-04-28 20:55:13 +01:00
|
|
|
void createLinearBlocks(IrBuilder& build, bool useValueNumbering)
|
2023-04-21 23:14:26 +01:00
|
|
|
{
|
|
|
|
// Go through internal block chains and outline them into a single new block.
|
2023-03-03 20:21:14 +00:00
|
|
|
// Outlining will be able to linearize the execution, even if there was a jump to a block with multiple users,
|
|
|
|
// new 'block' will only be reachable from a single one and all gathered information can be preserved.
|
2023-04-21 23:14:26 +01:00
|
|
|
IrFunction& function = build.function;
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
ConstPropState state{function};
|
|
|
|
state.useValueNumbering = useValueNumbering;
|
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
std::vector<uint8_t> visited(function.blocks.size(), false);
|
2023-03-03 20:21:14 +00:00
|
|
|
|
2023-04-21 23:14:26 +01:00
|
|
|
// This loop can create new 'linear' blocks, so index-based loop has to be used (and it intentionally won't reach those new blocks)
|
2023-03-03 20:21:14 +00:00
|
|
|
size_t originalBlockCount = function.blocks.size();
|
|
|
|
for (size_t i = 0; i < originalBlockCount; i++)
|
|
|
|
{
|
|
|
|
IrBlock& block = function.blocks[i];
|
|
|
|
|
|
|
|
if (block.kind == IrBlockKind::Fallback || block.kind == IrBlockKind::Dead)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (visited[function.getBlockIndex(block)])
|
|
|
|
continue;
|
|
|
|
|
2023-05-05 22:52:49 +01:00
|
|
|
tryCreateLinearBlock(build, visited, block, state);
|
2023-03-03 20:21:14 +00:00
|
|
|
}
|
2023-02-24 21:49:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace CodeGen
|
|
|
|
} // namespace Luau
|