Eliminate loops in vector ops.

This commit is contained in:
Petri Häkkinen 2021-11-18 10:12:42 +02:00
parent 4fd08425a5
commit 0d3761dca1
6 changed files with 43 additions and 80 deletions

View file

@ -560,7 +560,7 @@ void lua_pushvector(lua_State* L, float x, float y, float z, float w)
#else #else
void lua_pushvector(lua_State* L, float x, float y, float z) void lua_pushvector(lua_State* L, float x, float y, float z)
{ {
setvvalue(L->top, x, y, z); setvvalue(L->top, x, y, z, 0.0f);
api_incr_top(L); api_incr_top(L);
return; return;
} }

View file

@ -1019,27 +1019,23 @@ static int luauF_tunpack(lua_State* L, StkId res, TValue* arg0, int nresults, St
static int luauF_vector(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) static int luauF_vector(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams)
{ {
#ifdef LUA_FLOAT4_VECTORS #ifdef LUA_FLOAT4_VECTORS
if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1) && ttisnumber(args + 2)) if (nparams >= 4 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1) && ttisnumber(args + 2))
#else
if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1))
#endif
{ {
double x = nvalue(arg0); double x = nvalue(arg0);
double y = nvalue(args); double y = nvalue(args);
double z = nvalue(args + 1); double z = nvalue(args + 1);
double w = nvalue(args + 2); double w = 0.0;
#ifdef LUA_FLOAT4_VECTORS
w = nvalue(args + 2);
#endif
setvvalue(res, float(x), float(y), float(z), float(w)); setvvalue(res, float(x), float(y), float(z), float(w));
return 1; return 1;
} }
#else
if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1))
{
double x = nvalue(arg0);
double y = nvalue(args);
double z = nvalue(args + 1);
setvvalue(res, float(x), float(y), float(z));
return 1;
}
#endif
return -1; return -1;
} }

View file

@ -117,7 +117,7 @@ typedef struct lua_TValue
i_o->tt = LUA_TVECTOR; \ i_o->tt = LUA_TVECTOR; \
} }
#else #else
#define setvvalue(obj, x, y, z) \ #define setvvalue(obj, x, y, z, w) \
{ \ { \
TValue* i_o = (obj); \ TValue* i_o = (obj); \
float* i_v = i_o->value.v; \ float* i_v = i_o->value.v; \

View file

@ -97,23 +97,25 @@ static LuaNode* hashnum(const Table* t, double n)
static LuaNode* hashvec(const Table* t, const float* v) static LuaNode* hashvec(const Table* t, const float* v)
{ {
unsigned int i[LUA_VECTOR_SIZE]; unsigned int i[4];
memcpy(i, v, sizeof(i)); memcpy(i, v, sizeof(i));
for(int j = 0; j < LUA_VECTOR_SIZE; j++) // convert -0 to 0 to make sure they hash to the same value
{ i[0] = (i[0] == 0x8000000) ? 0 : i[0];
// convert -0 to 0 to make sure they hash to the same value i[1] = (i[1] == 0x8000000) ? 0 : i[1];
i[j] = (i[j] == 0x8000000) ? 0 : i[j]; i[2] = (i[2] == 0x8000000) ? 0 : i[2];
// scramble bits to make sure that integer coordinates have entropy in lower bits // scramble bits to make sure that integer coordinates have entropy in lower bits
i[j] ^= i[j] >> 17; i[0] ^= i[0] >> 17;
} i[1] ^= i[1] >> 17;
i[2] ^= i[2] >> 17;
// Optimized Spatial Hashing for Collision Detection of Deformable Objects // Optimized Spatial Hashing for Collision Detection of Deformable Objects
static_assert(LUA_VECTOR_SIZE >= 3, "vector size must be 3 or 4 currently");
unsigned int h = (i[0] * 73856093) ^ (i[1] * 19349663) ^ (i[2] * 83492791); unsigned int h = (i[0] * 73856093) ^ (i[1] * 19349663) ^ (i[2] * 83492791);
#ifdef LUA_FLOAT4_VECTORS #ifdef LUA_FLOAT4_VECTORS
i[3] = (i[3] == 0x8000000) ? 0 : i[3];
i[3] ^= i[3] >> 17;
h ^= i[3] * 39916801; h ^= i[3] * 39916801;
#endif #endif

View file

@ -602,7 +602,7 @@ static void luau_execute(lua_State* L)
int ic = (name[0] | ' ') - 'x'; int ic = (name[0] | ' ') - 'x';
#ifdef LUA_FLOAT4_VECTORS #ifdef LUA_FLOAT4_VECTORS
// 'w' is before 'x' in ascii, so ic is -1 when the string is 'w' // 'w' is before 'x' in ascii, so ic is -1 when indexing with 'w'
if (ic == -1) if (ic == -1)
ic = 3; ic = 3;
#endif #endif
@ -1532,9 +1532,7 @@ static void luau_execute(lua_State* L)
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
const float* vc = rc->value.v; const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]);
ra->value.v[i] = vb[i] + vc[i];
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else else
@ -1580,9 +1578,7 @@ static void luau_execute(lua_State* L)
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
const float* vc = rc->value.v; const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[3] - vc[3]);
ra->value.v[i] = vb[i] - vc[i];
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else else
@ -1628,27 +1624,21 @@ static void luau_execute(lua_State* L)
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(rc)); float vc = cast_to(float, nvalue(rc));
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[3] * vc);
ra->value.v[i] = vb[i] * vc;
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else if (ttisvector(rb) && ttisvector(rc)) else if (ttisvector(rb) && ttisvector(rc))
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
const float* vc = rc->value.v; const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[3] * vc[3]);
ra->value.v[i] = vb[i] * vc[i];
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else if (ttisnumber(rb) && ttisvector(rc)) else if (ttisnumber(rb) && ttisvector(rc))
{ {
float vb = cast_to(float, nvalue(rb)); float vb = cast_to(float, nvalue(rb));
const float* vc = rc->value.v; const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb * vc[0], vb * vc[1], vb * vc[2], vb * vc[3]);
ra->value.v[i] = vb * vc[i];
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else else
@ -1695,27 +1685,21 @@ static void luau_execute(lua_State* L)
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(rc)); float vc = cast_to(float, nvalue(rc));
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc);
ra->value.v[i] = vb[i] / vc;
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else if (ttisvector(rb) && ttisvector(rc)) else if (ttisvector(rb) && ttisvector(rc))
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
const float* vc = rc->value.v; const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[3] / vc[3]);
ra->value.v[i] = vb[i] / vc[i];
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else if (ttisnumber(rb) && ttisvector(rc)) else if (ttisnumber(rb) && ttisvector(rc))
{ {
float vb = cast_to(float, nvalue(rb)); float vb = cast_to(float, nvalue(rb));
const float* vc = rc->value.v; const float* vc = rc->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb / vc[0], vb / vc[1], vb / vc[2], vb / vc[3]);
ra->value.v[i] = vb / vc[i];
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else else
@ -1848,9 +1832,7 @@ static void luau_execute(lua_State* L)
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(kv)); float vc = cast_to(float, nvalue(kv));
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[3] * vc);
ra->value.v[i] = vb[i] * vc;
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else else
@ -1896,9 +1878,7 @@ static void luau_execute(lua_State* L)
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
float vc = cast_to(float, nvalue(kv)); float vc = cast_to(float, nvalue(kv));
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc);
ra->value.v[i] = vb[i] / vc;
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else else
@ -2063,9 +2043,7 @@ static void luau_execute(lua_State* L)
else if (ttisvector(rb)) else if (ttisvector(rb))
{ {
const float* vb = rb->value.v; const float* vb = rb->value.v;
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]);
ra->value.v[i] = -vb[i];
setttype(ra, LUA_TVECTOR);
VM_NEXT(); VM_NEXT();
} }
else else

View file

@ -398,29 +398,22 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM
if (vb && vc) if (vb && vc)
{ {
setttype(ra, LUA_TVECTOR);
switch (op) switch (op)
{ {
case TM_ADD: case TM_ADD:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]);
ra->value.v[i] = vb[i] + vc[i];
return; return;
case TM_SUB: case TM_SUB:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[3] - vc[3]);
ra->value.v[i] = vb[i] - vc[i];
return; return;
case TM_MUL: case TM_MUL:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[3] * vc[3]);
ra->value.v[i] = vb[i] * vc[i];
return; return;
case TM_DIV: case TM_DIV:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[3] / vc[3]);
ra->value.v[i] = vb[i] / vc[i];
return; return;
case TM_UNM: case TM_UNM:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]);
ra->value.v[i] = -vb[i];
return; return;
default: default:
break; break;
@ -433,17 +426,14 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM
if (c) if (c)
{ {
float nc = cast_to(float, nvalue(c)); float nc = cast_to(float, nvalue(c));
setttype(ra, LUA_TVECTOR);
switch (op) switch (op)
{ {
case TM_MUL: case TM_MUL:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] * nc, vb[1] * nc, vb[2] * nc, vb[3] * nc);
ra->value.v[i] = vb[i] * nc;
return; return;
case TM_DIV: case TM_DIV:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, vb[0] / nc, vb[1] / nc, vb[2] / nc, vb[3] / nc);
ra->value.v[i] = vb[i] / nc;
return; return;
default: default:
break; break;
@ -457,17 +447,14 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM
if (b) if (b)
{ {
float nb = cast_to(float, nvalue(b)); float nb = cast_to(float, nvalue(b));
setttype(ra, LUA_TVECTOR);
switch (op) switch (op)
{ {
case TM_MUL: case TM_MUL:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, nb * vc[0], nb * vc[1], nb * vc[2], nb * vc[3]);
ra->value.v[i] = nb * vc[i];
return; return;
case TM_DIV: case TM_DIV:
for (int i = 0; i < LUA_VECTOR_SIZE; i++) setvvalue(ra, nb / vc[0], nb / vc[1], nb / vc[2], nb / vc[3]);
ra->value.v[i] = nb / vc[i];
return; return;
default: default:
break; break;