Eliminate loops in vector ops.

This commit is contained in:
Petri Häkkinen 2021-11-18 10:12:42 +02:00
parent 4fd08425a5
commit 0d3761dca1
6 changed files with 43 additions and 80 deletions

View file

@ -560,7 +560,7 @@ void lua_pushvector(lua_State* L, float x, float y, float z, float w)
#else
void lua_pushvector(lua_State* L, float x, float y, float z)
{
setvvalue(L->top, x, y, z);
setvvalue(L->top, x, y, z, 0.0f);
api_incr_top(L);
return;
}

View file

@ -1019,27 +1019,23 @@ static int luauF_tunpack(lua_State* L, StkId res, TValue* arg0, int nresults, St
static int luauF_vector(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams)
{
#ifdef LUA_FLOAT4_VECTORS
if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1) && ttisnumber(args + 2))
if (nparams >= 4 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1) && ttisnumber(args + 2))
#else
if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1))
#endif
{
double x = nvalue(arg0);
double y = nvalue(args);
double z = nvalue(args + 1);
double w = nvalue(args + 2);
double w = 0.0;
#ifdef LUA_FLOAT4_VECTORS
w = nvalue(args + 2);
#endif
setvvalue(res, float(x), float(y), float(z), float(w));
return 1;
}
#else
if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1))
{
double x = nvalue(arg0);
double y = nvalue(args);
double z = nvalue(args + 1);
setvvalue(res, float(x), float(y), float(z));
return 1;
}
#endif
return -1;
}

View file

@ -117,7 +117,7 @@ typedef struct lua_TValue
i_o->tt = LUA_TVECTOR; \
}
#else
#define setvvalue(obj, x, y, z) \
#define setvvalue(obj, x, y, z, w) \
{ \
TValue* i_o = (obj); \
float* i_v = i_o->value.v; \

View file

@ -97,23 +97,25 @@ static LuaNode* hashnum(const Table* t, double n)
static LuaNode* hashvec(const Table* t, const float* v)
{
unsigned int i[LUA_VECTOR_SIZE];
unsigned int i[4];
memcpy(i, v, sizeof(i));
for(int j = 0; j < LUA_VECTOR_SIZE; j++)
{
// convert -0 to 0 to make sure they hash to the same value
i[j] = (i[j] == 0x8000000) ? 0 : i[j];
i[0] = (i[0] == 0x8000000) ? 0 : i[0];
i[1] = (i[1] == 0x8000000) ? 0 : i[1];
i[2] = (i[2] == 0x8000000) ? 0 : i[2];
// scramble bits to make sure that integer coordinates have entropy in lower bits
i[j] ^= i[j] >> 17;
}
i[0] ^= i[0] >> 17;
i[1] ^= i[1] >> 17;
i[2] ^= i[2] >> 17;
// Optimized Spatial Hashing for Collision Detection of Deformable Objects
static_assert(LUA_VECTOR_SIZE >= 3, "vector size must be 3 or 4 currently");
unsigned int h = (i[0] * 73856093) ^ (i[1] * 19349663) ^ (i[2] * 83492791);
#ifdef LUA_FLOAT4_VECTORS
i[3] = (i[3] == 0x8000000) ? 0 : i[3];
i[3] ^= i[3] >> 17;
h ^= i[3] * 39916801;
#endif

View file

@ -602,7 +602,7 @@ static void luau_execute(lua_State* L)
int ic = (name[0] | ' ') - 'x';
#ifdef LUA_FLOAT4_VECTORS
// 'w' is before 'x' in ascii, so ic is -1 when the string is 'w'
// 'w' is before 'x' in ascii, so ic is -1 when indexing with 'w'
if (ic == -1)
ic = 3;
#endif
@ -1532,9 +1532,7 @@ static void luau_execute(lua_State* L)
{
const float* vb = rb->value.v;
const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] + vc[i];
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]);
VM_NEXT();
}
else
@ -1580,9 +1578,7 @@ static void luau_execute(lua_State* L)
{
const float* vb = rb->value.v;
const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] - vc[i];
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[3] - vc[3]);
VM_NEXT();
}
else
@ -1628,27 +1624,21 @@ static void luau_execute(lua_State* L)
{
const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(rc));
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] * vc;
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[3] * vc);
VM_NEXT();
}
else if (ttisvector(rb) && ttisvector(rc))
{
const float* vb = rb->value.v;
const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] * vc[i];
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[3] * vc[3]);
VM_NEXT();
}
else if (ttisnumber(rb) && ttisvector(rc))
{
float vb = cast_to(float, nvalue(rb));
const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb * vc[i];
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb * vc[0], vb * vc[1], vb * vc[2], vb * vc[3]);
VM_NEXT();
}
else
@ -1695,27 +1685,21 @@ static void luau_execute(lua_State* L)
{
const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(rc));
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] / vc;
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc);
VM_NEXT();
}
else if (ttisvector(rb) && ttisvector(rc))
{
const float* vb = rb->value.v;
const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] / vc[i];
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[3] / vc[3]);
VM_NEXT();
}
else if (ttisnumber(rb) && ttisvector(rc))
{
float vb = cast_to(float, nvalue(rb));
const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb / vc[i];
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb / vc[0], vb / vc[1], vb / vc[2], vb / vc[3]);
VM_NEXT();
}
else
@ -1848,9 +1832,7 @@ static void luau_execute(lua_State* L)
{
const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(kv));
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] * vc;
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[3] * vc);
VM_NEXT();
}
else
@ -1896,9 +1878,7 @@ static void luau_execute(lua_State* L)
{
const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(kv));
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] / vc;
setttype(ra, LUA_TVECTOR);
setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc);
VM_NEXT();
}
else
@ -2063,9 +2043,7 @@ static void luau_execute(lua_State* L)
else if (ttisvector(rb))
{
const float* vb = rb->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = -vb[i];
setttype(ra, LUA_TVECTOR);
setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]);
VM_NEXT();
}
else

View file

@ -398,29 +398,22 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM
if (vb && vc)
{
setttype(ra, LUA_TVECTOR);
switch (op)
{
case TM_ADD:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] + vc[i];
setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]);
return;
case TM_SUB:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] - vc[i];
setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[3] - vc[3]);
return;
case TM_MUL:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] * vc[i];
setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[3] * vc[3]);
return;
case TM_DIV:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] / vc[i];
setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[3] / vc[3]);
return;
case TM_UNM:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = -vb[i];
setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]);
return;
default:
break;
@ -433,17 +426,14 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM
if (c)
{
float nc = cast_to(float, nvalue(c));
setttype(ra, LUA_TVECTOR);
switch (op)
{
case TM_MUL:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] * nc;
setvvalue(ra, vb[0] * nc, vb[1] * nc, vb[2] * nc, vb[3] * nc);
return;
case TM_DIV:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = vb[i] / nc;
setvvalue(ra, vb[0] / nc, vb[1] / nc, vb[2] / nc, vb[3] / nc);
return;
default:
break;
@ -457,17 +447,14 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM
if (b)
{
float nb = cast_to(float, nvalue(b));
setttype(ra, LUA_TVECTOR);
switch (op)
{
case TM_MUL:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = nb * vc[i];
setvvalue(ra, nb * vc[0], nb * vc[1], nb * vc[2], nb * vc[3]);
return;
case TM_DIV:
for (int i = 0; i < LUA_VECTOR_SIZE; i++)
ra->value.v[i] = nb / vc[i];
setvvalue(ra, nb / vc[0], nb / vc[1], nb / vc[2], nb / vc[3]);
return;
default:
break;