From 0d3761dca1c73b0da24a4e03d4807cbea3adec7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petri=20H=C3=A4kkinen?= Date: Thu, 18 Nov 2021 10:12:42 +0200 Subject: [PATCH] Eliminate loops in vector ops. --- VM/src/lapi.cpp | 2 +- VM/src/lbuiltins.cpp | 22 +++++++++------------ VM/src/lobject.h | 2 +- VM/src/ltable.cpp | 20 ++++++++++--------- VM/src/lvmexecute.cpp | 46 +++++++++++-------------------------------- VM/src/lvmutils.cpp | 31 +++++++++-------------------- 6 files changed, 43 insertions(+), 80 deletions(-) diff --git a/VM/src/lapi.cpp b/VM/src/lapi.cpp index 4ffb1093..c9b2caf8 100644 --- a/VM/src/lapi.cpp +++ b/VM/src/lapi.cpp @@ -560,7 +560,7 @@ void lua_pushvector(lua_State* L, float x, float y, float z, float w) #else void lua_pushvector(lua_State* L, float x, float y, float z) { - setvvalue(L->top, x, y, z); + setvvalue(L->top, x, y, z, 0.0f); api_incr_top(L); return; } diff --git a/VM/src/lbuiltins.cpp b/VM/src/lbuiltins.cpp index 3067b14a..772b73ff 100644 --- a/VM/src/lbuiltins.cpp +++ b/VM/src/lbuiltins.cpp @@ -1019,27 +1019,23 @@ static int luauF_tunpack(lua_State* L, StkId res, TValue* arg0, int nresults, St static int luauF_vector(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { #ifdef LUA_FLOAT4_VECTORS - if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1) && ttisnumber(args + 2)) + if (nparams >= 4 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1) && ttisnumber(args + 2)) +#else + if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1)) +#endif { double x = nvalue(arg0); double y = nvalue(args); double z = nvalue(args + 1); - double w = nvalue(args + 2); + double w = 0.0; + +#ifdef LUA_FLOAT4_VECTORS + w = nvalue(args + 2); +#endif setvvalue(res, float(x), float(y), float(z), float(w)); return 1; } -#else - if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1)) - { - double x = nvalue(arg0); - double y = nvalue(args); - double z = nvalue(args + 1); - - setvvalue(res, float(x), float(y), float(z)); - return 1; - } -#endif return -1; } diff --git a/VM/src/lobject.h b/VM/src/lobject.h index 0dca7351..24fe54ee 100644 --- a/VM/src/lobject.h +++ b/VM/src/lobject.h @@ -117,7 +117,7 @@ typedef struct lua_TValue i_o->tt = LUA_TVECTOR; \ } #else -#define setvvalue(obj, x, y, z) \ +#define setvvalue(obj, x, y, z, w) \ { \ TValue* i_o = (obj); \ float* i_v = i_o->value.v; \ diff --git a/VM/src/ltable.cpp b/VM/src/ltable.cpp index 87396036..cd69c34b 100644 --- a/VM/src/ltable.cpp +++ b/VM/src/ltable.cpp @@ -97,23 +97,25 @@ static LuaNode* hashnum(const Table* t, double n) static LuaNode* hashvec(const Table* t, const float* v) { - unsigned int i[LUA_VECTOR_SIZE]; + unsigned int i[4]; memcpy(i, v, sizeof(i)); - for(int j = 0; j < LUA_VECTOR_SIZE; j++) - { - // convert -0 to 0 to make sure they hash to the same value - i[j] = (i[j] == 0x8000000) ? 0 : i[j]; + // convert -0 to 0 to make sure they hash to the same value + i[0] = (i[0] == 0x8000000) ? 0 : i[0]; + i[1] = (i[1] == 0x8000000) ? 0 : i[1]; + i[2] = (i[2] == 0x8000000) ? 0 : i[2]; - // scramble bits to make sure that integer coordinates have entropy in lower bits - i[j] ^= i[j] >> 17; - } + // scramble bits to make sure that integer coordinates have entropy in lower bits + i[0] ^= i[0] >> 17; + i[1] ^= i[1] >> 17; + i[2] ^= i[2] >> 17; // Optimized Spatial Hashing for Collision Detection of Deformable Objects - static_assert(LUA_VECTOR_SIZE >= 3, "vector size must be 3 or 4 currently"); unsigned int h = (i[0] * 73856093) ^ (i[1] * 19349663) ^ (i[2] * 83492791); #ifdef LUA_FLOAT4_VECTORS + i[3] = (i[3] == 0x8000000) ? 0 : i[3]; + i[3] ^= i[3] >> 17; h ^= i[3] * 39916801; #endif diff --git a/VM/src/lvmexecute.cpp b/VM/src/lvmexecute.cpp index ac316f7a..3e6a1b91 100644 --- a/VM/src/lvmexecute.cpp +++ b/VM/src/lvmexecute.cpp @@ -602,7 +602,7 @@ static void luau_execute(lua_State* L) int ic = (name[0] | ' ') - 'x'; #ifdef LUA_FLOAT4_VECTORS - // 'w' is before 'x' in ascii, so ic is -1 when the string is 'w' + // 'w' is before 'x' in ascii, so ic is -1 when indexing with 'w' if (ic == -1) ic = 3; #endif @@ -1532,9 +1532,7 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; const float* vc = rc->value.v; - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] + vc[i]; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]); VM_NEXT(); } else @@ -1580,9 +1578,7 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; const float* vc = rc->value.v; - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] - vc[i]; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[3] - vc[3]); VM_NEXT(); } else @@ -1628,27 +1624,21 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(rc)); - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] * vc; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[3] * vc); VM_NEXT(); } else if (ttisvector(rb) && ttisvector(rc)) { const float* vb = rb->value.v; const float* vc = rc->value.v; - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] * vc[i]; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[3] * vc[3]); VM_NEXT(); } else if (ttisnumber(rb) && ttisvector(rc)) { float vb = cast_to(float, nvalue(rb)); const float* vc = rc->value.v; - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb * vc[i]; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb * vc[0], vb * vc[1], vb * vc[2], vb * vc[3]); VM_NEXT(); } else @@ -1695,27 +1685,21 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(rc)); - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] / vc; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc); VM_NEXT(); } else if (ttisvector(rb) && ttisvector(rc)) { const float* vb = rb->value.v; const float* vc = rc->value.v; - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] / vc[i]; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[3] / vc[3]); VM_NEXT(); } else if (ttisnumber(rb) && ttisvector(rc)) { float vb = cast_to(float, nvalue(rb)); const float* vc = rc->value.v; - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb / vc[i]; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb / vc[0], vb / vc[1], vb / vc[2], vb / vc[3]); VM_NEXT(); } else @@ -1848,9 +1832,7 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(kv)); - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] * vc; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[3] * vc); VM_NEXT(); } else @@ -1896,9 +1878,7 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(kv)); - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] / vc; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc); VM_NEXT(); } else @@ -2063,9 +2043,7 @@ static void luau_execute(lua_State* L) else if (ttisvector(rb)) { const float* vb = rb->value.v; - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = -vb[i]; - setttype(ra, LUA_TVECTOR); + setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]); VM_NEXT(); } else diff --git a/VM/src/lvmutils.cpp b/VM/src/lvmutils.cpp index 2d0bd2d1..740a4cfd 100644 --- a/VM/src/lvmutils.cpp +++ b/VM/src/lvmutils.cpp @@ -398,29 +398,22 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM if (vb && vc) { - setttype(ra, LUA_TVECTOR); - switch (op) { case TM_ADD: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] + vc[i]; + setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]); return; case TM_SUB: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] - vc[i]; + setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[3] - vc[3]); return; case TM_MUL: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] * vc[i]; + setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[3] * vc[3]); return; case TM_DIV: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] / vc[i]; + setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[3] / vc[3]); return; case TM_UNM: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = -vb[i]; + setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]); return; default: break; @@ -433,17 +426,14 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM if (c) { float nc = cast_to(float, nvalue(c)); - setttype(ra, LUA_TVECTOR); switch (op) { case TM_MUL: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] * nc; + setvvalue(ra, vb[0] * nc, vb[1] * nc, vb[2] * nc, vb[3] * nc); return; case TM_DIV: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = vb[i] / nc; + setvvalue(ra, vb[0] / nc, vb[1] / nc, vb[2] / nc, vb[3] / nc); return; default: break; @@ -457,17 +447,14 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM if (b) { float nb = cast_to(float, nvalue(b)); - setttype(ra, LUA_TVECTOR); switch (op) { case TM_MUL: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = nb * vc[i]; + setvvalue(ra, nb * vc[0], nb * vc[1], nb * vc[2], nb * vc[3]); return; case TM_DIV: - for (int i = 0; i < LUA_VECTOR_SIZE; i++) - ra->value.v[i] = nb / vc[i]; + setvvalue(ra, nb / vc[0], nb / vc[1], nb / vc[2], nb / vc[3]); return; default: break;