mirror of
https://github.com/luau-lang/luau.git
synced 2025-04-04 10:50:54 +01:00
CodeGen: Use more efficient lowering for UNM_*
UNM_NUM and UNM_VEC were both implemented assuming SSE-style restrictions (2-argument form), but using AVX that doesn't have them. There's no need to copy source to destination separately - we can just vxorpd into destination. Most occurrences of UNM_NUM/UNM_VEC followed the self-xor path, but this saves a couple instructions in trig benchmark and makes it execute ~0.1% fewer instructions (the actual runtime delta is within the noise).
This commit is contained in:
parent
443903aa00
commit
b47cd4521c
1 changed files with 2 additions and 23 deletions
|
@ -542,18 +542,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
|||
{
|
||||
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
|
||||
|
||||
RegisterX64 src = regOp(inst.a);
|
||||
|
||||
if (inst.regX64 == src)
|
||||
{
|
||||
build.vxorpd(inst.regX64, inst.regX64, build.f64(-0.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
build.vmovsd(inst.regX64, src, src);
|
||||
build.vxorpd(inst.regX64, inst.regX64, build.f64(-0.0));
|
||||
}
|
||||
|
||||
build.vxorpd(inst.regX64, regOp(inst.a), build.f64(-0.0));
|
||||
break;
|
||||
}
|
||||
case IrCmd::FLOOR_NUM:
|
||||
|
@ -665,17 +654,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
|||
{
|
||||
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a});
|
||||
|
||||
RegisterX64 src = regOp(inst.a);
|
||||
|
||||
if (inst.regX64 == src)
|
||||
{
|
||||
build.vxorpd(inst.regX64, inst.regX64, build.f32x4(-0.0, -0.0, -0.0, -0.0));
|
||||
}
|
||||
else
|
||||
{
|
||||
build.vmovsd(inst.regX64, src, src);
|
||||
build.vxorpd(inst.regX64, inst.regX64, build.f32x4(-0.0, -0.0, -0.0, -0.0));
|
||||
}
|
||||
build.vxorpd(inst.regX64, regOp(inst.a), build.f32x4(-0.0, -0.0, -0.0, -0.0));
|
||||
|
||||
if (!FFlag::LuauCodegenVectorTag2)
|
||||
build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3);
|
||||
|
|
Loading…
Add table
Reference in a new issue