mirror of
https://github.com/luau-lang/luau.git
synced 2025-04-05 19:30:54 +01:00
CodeGen: Implement a naive version of A64 DOT_VEC
This is using existing instructions and scalar adds to have a baseline. This is still faster than the original implementation of vector. ops.
This commit is contained in:
parent
cd73807c09
commit
8fc458edbd
1 changed files with 13 additions and 1 deletions
|
@ -730,7 +730,19 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
|
||||||
}
|
}
|
||||||
case IrCmd::DOT_VEC:
|
case IrCmd::DOT_VEC:
|
||||||
{
|
{
|
||||||
CODEGEN_ASSERT(!"DOT_VEC is not implemented for A64");
|
inst.regA64 = regs.allocReg(KindA64::d, index);
|
||||||
|
|
||||||
|
RegisterA64 temp1 = regs.allocTemp(KindA64::q);
|
||||||
|
RegisterA64 temp2 = regs.allocTemp(KindA64::q);
|
||||||
|
RegisterA64 temp3 = regs.allocTemp(KindA64::q);
|
||||||
|
|
||||||
|
build.fmul(temp1, regOp(inst.a), regOp(inst.b));
|
||||||
|
build.dup_4s(temp2, temp1, 1);
|
||||||
|
build.dup_4s(temp3, temp1, 2);
|
||||||
|
|
||||||
|
build.fadd(castReg(KindA64::s, temp1), castReg(KindA64::s, temp1), castReg(KindA64::s, temp2));
|
||||||
|
build.fadd(castReg(KindA64::s, temp1), castReg(KindA64::s, temp1), castReg(KindA64::s, temp3));
|
||||||
|
build.fcvt(inst.regA64, castReg(KindA64::s, temp1));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case IrCmd::NOT_ANY:
|
case IrCmd::NOT_ANY:
|
||||||
|
|
Loading…
Add table
Reference in a new issue