mirror of
https://github.com/luau-lang/luau.git
synced 2025-01-19 17:28:06 +00:00
Merge branch 'master' into merge
This commit is contained in:
commit
532fd109e7
3 changed files with 51 additions and 32 deletions
|
@ -46,10 +46,9 @@ struct GlobalOptions
|
|||
int optimizationLevel = 1;
|
||||
int debugLevel = 1;
|
||||
|
||||
std::string vectorLib;
|
||||
std::string vectorCtor;
|
||||
std::string vectorType;
|
||||
|
||||
const char* vectorLib = nullptr;
|
||||
const char* vectorCtor = nullptr;
|
||||
const char* vectorType = nullptr;
|
||||
} globalOptions;
|
||||
|
||||
static Luau::CompileOptions copts()
|
||||
|
@ -58,10 +57,9 @@ static Luau::CompileOptions copts()
|
|||
result.optimizationLevel = globalOptions.optimizationLevel;
|
||||
result.debugLevel = globalOptions.debugLevel;
|
||||
|
||||
// globalOptions outlive the CompileOptions, so it's safe to use string data pointers here
|
||||
result.vectorLib = globalOptions.vectorLib.c_str();
|
||||
result.vectorCtor = globalOptions.vectorCtor.c_str();
|
||||
result.vectorType = globalOptions.vectorType.c_str();
|
||||
result.vectorLib = globalOptions.vectorLib;
|
||||
result.vectorCtor = globalOptions.vectorCtor;
|
||||
result.vectorType = globalOptions.vectorType;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "lgc.h"
|
||||
|
||||
LUAU_FASTFLAG(LuauCodegenVectorTag)
|
||||
LUAU_FASTFLAGVARIABLE(LuauCodegenVectorOptAnd, false)
|
||||
|
||||
namespace Luau
|
||||
{
|
||||
|
@ -603,13 +604,13 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
|||
{
|
||||
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
|
||||
|
||||
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp1{regs};
|
||||
ScopedRegX64 tmp2{regs};
|
||||
|
||||
// Fourth component is the tag number which is interpreted as a denormal and has to be filtered out
|
||||
build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp());
|
||||
build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp());
|
||||
build.vaddps(inst.regX64, tmp1.reg, tmp2.reg);
|
||||
RegisterX64 tmpa = vecOp(inst.a, tmp1);
|
||||
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
|
||||
|
||||
build.vaddps(inst.regX64, tmpa, tmpb);
|
||||
|
||||
if (!FFlag::LuauCodegenVectorTag)
|
||||
build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp());
|
||||
|
@ -619,13 +620,13 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
|||
{
|
||||
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
|
||||
|
||||
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp1{regs};
|
||||
ScopedRegX64 tmp2{regs};
|
||||
|
||||
// Fourth component is the tag number which is interpreted as a denormal and has to be filtered out
|
||||
build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp());
|
||||
build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp());
|
||||
build.vsubps(inst.regX64, tmp1.reg, tmp2.reg);
|
||||
RegisterX64 tmpa = vecOp(inst.a, tmp1);
|
||||
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
|
||||
|
||||
build.vsubps(inst.regX64, tmpa, tmpb);
|
||||
if (!FFlag::LuauCodegenVectorTag)
|
||||
build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp());
|
||||
break;
|
||||
|
@ -634,13 +635,13 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
|||
{
|
||||
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
|
||||
|
||||
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp1{regs};
|
||||
ScopedRegX64 tmp2{regs};
|
||||
|
||||
// Fourth component is the tag number which is interpreted as a denormal and has to be filtered out
|
||||
build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp());
|
||||
build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp());
|
||||
build.vmulps(inst.regX64, tmp1.reg, tmp2.reg);
|
||||
RegisterX64 tmpa = vecOp(inst.a, tmp1);
|
||||
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
|
||||
|
||||
build.vmulps(inst.regX64, tmpa, tmpb);
|
||||
if (!FFlag::LuauCodegenVectorTag)
|
||||
build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp());
|
||||
break;
|
||||
|
@ -649,13 +650,13 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
|||
{
|
||||
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
|
||||
|
||||
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
|
||||
ScopedRegX64 tmp1{regs};
|
||||
ScopedRegX64 tmp2{regs};
|
||||
|
||||
// Fourth component is the tag number which is interpreted as a denormal and has to be filtered out
|
||||
build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp());
|
||||
build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp());
|
||||
build.vdivps(inst.regX64, tmp1.reg, tmp2.reg);
|
||||
RegisterX64 tmpa = vecOp(inst.a, tmp1);
|
||||
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
|
||||
|
||||
build.vdivps(inst.regX64, tmpa, tmpb);
|
||||
if (!FFlag::LuauCodegenVectorTag)
|
||||
build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3);
|
||||
break;
|
||||
|
@ -2234,6 +2235,24 @@ OperandX64 IrLoweringX64::bufferAddrOp(IrOp bufferOp, IrOp indexOp)
|
|||
return noreg;
|
||||
}
|
||||
|
||||
RegisterX64 IrLoweringX64::vecOp(IrOp op, ScopedRegX64& tmp)
|
||||
{
|
||||
if (FFlag::LuauCodegenVectorOptAnd && FFlag::LuauCodegenVectorTag)
|
||||
{
|
||||
IrInst source = function.instOp(op);
|
||||
CODEGEN_ASSERT(source.cmd != IrCmd::SUBSTITUTE); // we don't process substitutions
|
||||
|
||||
// source that comes from memory or from tag instruction has .w = TVECTOR, which is denormal
|
||||
// to avoid performance degradation on some CPUs we mask this component to produce zero
|
||||
// otherwise we conservatively assume the vector is a result of a well formed math op so .w is a normal number or zero
|
||||
if (source.cmd != IrCmd::LOAD_TVALUE && source.cmd != IrCmd::TAG_VECTOR)
|
||||
return regOp(op);
|
||||
}
|
||||
tmp.alloc(SizeX64::xmmword);
|
||||
build.vandps(tmp.reg, regOp(op), vectorAndMaskOp());
|
||||
return tmp.reg;
|
||||
}
|
||||
|
||||
IrConst IrLoweringX64::constOp(IrOp op) const
|
||||
{
|
||||
return function.constOp(op);
|
||||
|
@ -2279,6 +2298,7 @@ OperandX64 IrLoweringX64::vectorAndMaskOp()
|
|||
|
||||
OperandX64 IrLoweringX64::vectorOrMaskOp()
|
||||
{
|
||||
CODEGEN_ASSERT(!FFlag::LuauCodegenVectorTag);
|
||||
if (vectorOrMask.base == noreg)
|
||||
vectorOrMask = build.u32x4(0, 0, 0, LUA_TVECTOR);
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ struct IrLoweringX64
|
|||
OperandX64 memRegTagOp(IrOp op);
|
||||
RegisterX64 regOp(IrOp op);
|
||||
OperandX64 bufferAddrOp(IrOp bufferOp, IrOp indexOp);
|
||||
RegisterX64 vecOp(IrOp op, ScopedRegX64& tmp);
|
||||
|
||||
IrConst constOp(IrOp op) const;
|
||||
uint8_t tagOp(IrOp op) const;
|
||||
|
|
Loading…
Reference in a new issue