CodeGen: Implement DOT_VEC IR opcode

This exposes vdpps on X64 and allows to compute a 3-wide dot product for
two vectors, returning the result as a number.
This commit is contained in:
Arseny Kapoulkine 2024-11-08 10:08:47 -08:00
parent d2c008c3a8
commit 74a91289e9
7 changed files with 31 additions and 1 deletions

View file

@ -194,6 +194,10 @@ enum class IrCmd : uint8_t
// A: TValue // A: TValue
UNM_VEC, UNM_VEC,
// Compute dot product between two vectors
// A, B: TValue
DOT_VEC,
// Compute Luau 'not' operation on destructured TValue // Compute Luau 'not' operation on destructured TValue
// A: tag // A: tag
// B: int (value) // B: int (value)

View file

@ -176,6 +176,7 @@ inline bool hasResult(IrCmd cmd)
case IrCmd::SUB_VEC: case IrCmd::SUB_VEC:
case IrCmd::MUL_VEC: case IrCmd::MUL_VEC:
case IrCmd::DIV_VEC: case IrCmd::DIV_VEC:
case IrCmd::DOT_VEC:
case IrCmd::UNM_VEC: case IrCmd::UNM_VEC:
case IrCmd::NOT_ANY: case IrCmd::NOT_ANY:
case IrCmd::CMP_ANY: case IrCmd::CMP_ANY:

View file

@ -163,6 +163,8 @@ const char* getCmdName(IrCmd cmd)
return "DIV_VEC"; return "DIV_VEC";
case IrCmd::UNM_VEC: case IrCmd::UNM_VEC:
return "UNM_VEC"; return "UNM_VEC";
case IrCmd::DOT_VEC:
return "DOT_VEC";
case IrCmd::NOT_ANY: case IrCmd::NOT_ANY:
return "NOT_ANY"; return "NOT_ANY";
case IrCmd::CMP_ANY: case IrCmd::CMP_ANY:

View file

@ -728,6 +728,11 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
build.fneg(inst.regA64, regOp(inst.a)); build.fneg(inst.regA64, regOp(inst.a));
break; break;
} }
case IrCmd::DOT_VEC:
{
CODEGEN_ASSERT(!"DOT_VEC is not implemented for A64");
break;
}
case IrCmd::NOT_ANY: case IrCmd::NOT_ANY:
{ {
inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b}); inst.regA64 = regs.allocReuse(KindA64::w, index, {inst.a, inst.b});

View file

@ -675,6 +675,20 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
build.vxorpd(inst.regX64, regOp(inst.a), build.f32x4(-0.0, -0.0, -0.0, -0.0)); build.vxorpd(inst.regX64, regOp(inst.a), build.f32x4(-0.0, -0.0, -0.0, -0.0));
break; break;
} }
case IrCmd::DOT_VEC:
{
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a, inst.b});
ScopedRegX64 tmp1{regs};
ScopedRegX64 tmp2{regs};
RegisterX64 tmpa = vecOp(inst.a, tmp1);
RegisterX64 tmpb = (inst.a == inst.b) ? tmpa : vecOp(inst.b, tmp2);
build.vdpps(inst.regX64, tmpa, tmpb, 0x71); // 7 = 0b0111, sum first 3 products into first float
build.vcvtss2sd(inst.regX64, inst.regX64, inst.regX64);
break;
}
case IrCmd::NOT_ANY: case IrCmd::NOT_ANY:
{ {
// TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target // TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target

View file

@ -75,6 +75,8 @@ IrValueKind getCmdValueKind(IrCmd cmd)
case IrCmd::DIV_VEC: case IrCmd::DIV_VEC:
case IrCmd::UNM_VEC: case IrCmd::UNM_VEC:
return IrValueKind::Tvalue; return IrValueKind::Tvalue;
case IrCmd::DOT_VEC:
return IrValueKind::Double;
case IrCmd::NOT_ANY: case IrCmd::NOT_ANY:
case IrCmd::CMP_ANY: case IrCmd::CMP_ANY:
return IrValueKind::Int; return IrValueKind::Int;

View file

@ -768,7 +768,8 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction&
if (tag == LUA_TBOOLEAN && if (tag == LUA_TBOOLEAN &&
(value.kind == IrOpKind::Inst || (value.kind == IrOpKind::Constant && function.constOp(value).kind == IrConstKind::Int))) (value.kind == IrOpKind::Inst || (value.kind == IrOpKind::Constant && function.constOp(value).kind == IrConstKind::Int)))
canSplitTvalueStore = true; canSplitTvalueStore = true;
else if (tag == LUA_TNUMBER && (value.kind == IrOpKind::Inst || (value.kind == IrOpKind::Constant && function.constOp(value).kind == IrConstKind::Double))) else if (tag == LUA_TNUMBER &&
(value.kind == IrOpKind::Inst || (value.kind == IrOpKind::Constant && function.constOp(value).kind == IrConstKind::Double)))
canSplitTvalueStore = true; canSplitTvalueStore = true;
else if (tag != 0xff && isGCO(tag) && value.kind == IrOpKind::Inst) else if (tag != 0xff && isGCO(tag) && value.kind == IrOpKind::Inst)
canSplitTvalueStore = true; canSplitTvalueStore = true;
@ -1342,6 +1343,7 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction&
case IrCmd::SUB_VEC: case IrCmd::SUB_VEC:
case IrCmd::MUL_VEC: case IrCmd::MUL_VEC:
case IrCmd::DIV_VEC: case IrCmd::DIV_VEC:
case IrCmd::DOT_VEC:
if (IrInst* a = function.asInstOp(inst.a); a && a->cmd == IrCmd::TAG_VECTOR) if (IrInst* a = function.asInstOp(inst.a); a && a->cmd == IrCmd::TAG_VECTOR)
replace(function, inst.a, a->a); replace(function, inst.a, a->a);