diff --git a/CodeGen/src/IrTranslateBuiltins.cpp b/CodeGen/src/IrTranslateBuiltins.cpp index cec18204..ebded522 100644 --- a/CodeGen/src/IrTranslateBuiltins.cpp +++ b/CodeGen/src/IrTranslateBuiltins.cpp @@ -14,6 +14,7 @@ static const int kMinMaxUnrolledParams = 5; static const int kBit32BinaryOpUnrolledParams = 5; LUAU_FASTFLAGVARIABLE(LuauVectorLibNativeCodegen); +LUAU_FASTFLAGVARIABLE(LuauVectorLibNativeDot); namespace Luau { @@ -907,15 +908,26 @@ static BuiltinImplResult translateBuiltinVectorMagnitude( build.loadAndCheckTag(arg1, LUA_TVECTOR, build.vmExit(pcpos)); - IrOp x = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(0)); - IrOp y = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(4)); - IrOp z = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(8)); + IrOp sum; - IrOp x2 = build.inst(IrCmd::MUL_NUM, x, x); - IrOp y2 = build.inst(IrCmd::MUL_NUM, y, y); - IrOp z2 = build.inst(IrCmd::MUL_NUM, z, z); + if (FFlag::LuauVectorLibNativeDot) + { + IrOp a = build.inst(IrCmd::LOAD_TVALUE, arg1, build.constInt(0)); - IrOp sum = build.inst(IrCmd::ADD_NUM, build.inst(IrCmd::ADD_NUM, x2, y2), z2); + sum = build.inst(IrCmd::DOT_VEC, a, a); + } + else + { + IrOp x = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(0)); + IrOp y = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(4)); + IrOp z = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(8)); + + IrOp x2 = build.inst(IrCmd::MUL_NUM, x, x); + IrOp y2 = build.inst(IrCmd::MUL_NUM, y, y); + IrOp z2 = build.inst(IrCmd::MUL_NUM, z, z); + + sum = build.inst(IrCmd::ADD_NUM, build.inst(IrCmd::ADD_NUM, x2, y2), z2); + } IrOp mag = build.inst(IrCmd::SQRT_NUM, sum); @@ -945,25 +957,43 @@ static BuiltinImplResult translateBuiltinVectorNormalize( build.loadAndCheckTag(arg1, LUA_TVECTOR, build.vmExit(pcpos)); - IrOp x = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(0)); - IrOp y = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(4)); - IrOp z = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(8)); + if (FFlag::LuauVectorLibNativeDot) + { + IrOp a = build.inst(IrCmd::LOAD_TVALUE, arg1, build.constInt(0)); + IrOp sum = build.inst(IrCmd::DOT_VEC, a, a); - IrOp x2 = build.inst(IrCmd::MUL_NUM, x, x); - IrOp y2 = build.inst(IrCmd::MUL_NUM, y, y); - IrOp z2 = build.inst(IrCmd::MUL_NUM, z, z); + IrOp mag = build.inst(IrCmd::SQRT_NUM, sum); + IrOp inv = build.inst(IrCmd::DIV_NUM, build.constDouble(1.0), mag); + IrOp invvec = build.inst(IrCmd::NUM_TO_VEC, inv); - IrOp sum = build.inst(IrCmd::ADD_NUM, build.inst(IrCmd::ADD_NUM, x2, y2), z2); + IrOp result = build.inst(IrCmd::MUL_VEC, a, invvec); - IrOp mag = build.inst(IrCmd::SQRT_NUM, sum); - IrOp inv = build.inst(IrCmd::DIV_NUM, build.constDouble(1.0), mag); + result = build.inst(IrCmd::TAG_VECTOR, result); - IrOp xr = build.inst(IrCmd::MUL_NUM, x, inv); - IrOp yr = build.inst(IrCmd::MUL_NUM, y, inv); - IrOp zr = build.inst(IrCmd::MUL_NUM, z, inv); + build.inst(IrCmd::STORE_TVALUE, build.vmReg(ra), result); + } + else + { + IrOp x = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(0)); + IrOp y = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(4)); + IrOp z = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(8)); - build.inst(IrCmd::STORE_VECTOR, build.vmReg(ra), xr, yr, zr); - build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TVECTOR)); + IrOp x2 = build.inst(IrCmd::MUL_NUM, x, x); + IrOp y2 = build.inst(IrCmd::MUL_NUM, y, y); + IrOp z2 = build.inst(IrCmd::MUL_NUM, z, z); + + IrOp sum = build.inst(IrCmd::ADD_NUM, build.inst(IrCmd::ADD_NUM, x2, y2), z2); + + IrOp mag = build.inst(IrCmd::SQRT_NUM, sum); + IrOp inv = build.inst(IrCmd::DIV_NUM, build.constDouble(1.0), mag); + + IrOp xr = build.inst(IrCmd::MUL_NUM, x, inv); + IrOp yr = build.inst(IrCmd::MUL_NUM, y, inv); + IrOp zr = build.inst(IrCmd::MUL_NUM, z, inv); + + build.inst(IrCmd::STORE_VECTOR, build.vmReg(ra), xr, yr, zr); + build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TVECTOR)); + } return {BuiltinImplType::Full, 1}; } @@ -1019,19 +1049,31 @@ static BuiltinImplResult translateBuiltinVectorDot(IrBuilder& build, int nparams build.loadAndCheckTag(arg1, LUA_TVECTOR, build.vmExit(pcpos)); build.loadAndCheckTag(args, LUA_TVECTOR, build.vmExit(pcpos)); - IrOp x1 = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(0)); - IrOp x2 = build.inst(IrCmd::LOAD_FLOAT, args, build.constInt(0)); - IrOp xx = build.inst(IrCmd::MUL_NUM, x1, x2); + IrOp sum; - IrOp y1 = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(4)); - IrOp y2 = build.inst(IrCmd::LOAD_FLOAT, args, build.constInt(4)); - IrOp yy = build.inst(IrCmd::MUL_NUM, y1, y2); + if (FFlag::LuauVectorLibNativeDot) + { + IrOp a = build.inst(IrCmd::LOAD_TVALUE, arg1, build.constInt(0)); + IrOp b = build.inst(IrCmd::LOAD_TVALUE, args, build.constInt(0)); - IrOp z1 = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(8)); - IrOp z2 = build.inst(IrCmd::LOAD_FLOAT, args, build.constInt(8)); - IrOp zz = build.inst(IrCmd::MUL_NUM, z1, z2); + sum = build.inst(IrCmd::DOT_VEC, a, b); + } + else + { + IrOp x1 = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(0)); + IrOp x2 = build.inst(IrCmd::LOAD_FLOAT, args, build.constInt(0)); + IrOp xx = build.inst(IrCmd::MUL_NUM, x1, x2); - IrOp sum = build.inst(IrCmd::ADD_NUM, build.inst(IrCmd::ADD_NUM, xx, yy), zz); + IrOp y1 = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(4)); + IrOp y2 = build.inst(IrCmd::LOAD_FLOAT, args, build.constInt(4)); + IrOp yy = build.inst(IrCmd::MUL_NUM, y1, y2); + + IrOp z1 = build.inst(IrCmd::LOAD_FLOAT, arg1, build.constInt(8)); + IrOp z2 = build.inst(IrCmd::LOAD_FLOAT, args, build.constInt(8)); + IrOp zz = build.inst(IrCmd::MUL_NUM, z1, z2); + + sum = build.inst(IrCmd::ADD_NUM, build.inst(IrCmd::ADD_NUM, xx, yy), zz); + } build.inst(IrCmd::STORE_DOUBLE, build.vmReg(ra), sum); build.inst(IrCmd::STORE_TAG, build.vmReg(ra), build.constTag(LUA_TNUMBER));