From dcbb8e6fba51160fd20ca0357c3c7d4f20e53701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petri=20H=C3=A4kkinen?= Date: Tue, 16 Nov 2021 11:43:27 +0200 Subject: [PATCH] Proof of concept: extend vectors to 4 components when using LUA_FLOAT4_VECTORS define in lauconf.h. --- VM/include/lua.h | 4 ++++ VM/include/luaconf.h | 2 ++ VM/src/lapi.cpp | 9 ++++++++ VM/src/laux.cpp | 4 ++++ VM/src/lbuiltins.cpp | 18 +++++++++++++++ VM/src/lmem.cpp | 8 +++++++ VM/src/lnumutils.h | 8 +++++++ VM/src/lobject.h | 47 ++++++++++++++++++++++++++++++++++++++ VM/src/ltable.cpp | 34 +++++++++++++++++++++++++++ VM/src/lvmexecute.cpp | 51 +++++++++++++++++++++++++++++++++++++++++ VM/src/lvmutils.cpp | 53 +++++++++++++++++++++++++++++++++++++++++-- 11 files changed, 236 insertions(+), 2 deletions(-) diff --git a/VM/include/lua.h b/VM/include/lua.h index a9d3e875..95ce607c 100644 --- a/VM/include/lua.h +++ b/VM/include/lua.h @@ -157,7 +157,11 @@ LUA_API void lua_pushnil(lua_State* L); LUA_API void lua_pushnumber(lua_State* L, double n); LUA_API void lua_pushinteger(lua_State* L, int n); LUA_API void lua_pushunsigned(lua_State* L, unsigned n); +#ifdef LUA_FLOAT4_VECTORS +LUA_API void lua_pushvector(lua_State* L, float x, float y, float z, float w); +#else LUA_API void lua_pushvector(lua_State* L, float x, float y, float z); +#endif LUA_API void lua_pushlstring(lua_State* L, const char* s, size_t l); LUA_API void lua_pushstring(lua_State* L, const char* s); LUA_API const char* lua_pushvfstring(lua_State* L, const char* fmt, va_list argp); diff --git a/VM/include/luaconf.h b/VM/include/luaconf.h index aa008a24..8bb3d609 100644 --- a/VM/include/luaconf.h +++ b/VM/include/luaconf.h @@ -122,3 +122,5 @@ void* s; \ long l; \ } + +#define LUA_FLOAT4_VECTORS diff --git a/VM/src/lapi.cpp b/VM/src/lapi.cpp index 7e742644..4ffb1093 100644 --- a/VM/src/lapi.cpp +++ b/VM/src/lapi.cpp @@ -550,12 +550,21 @@ void lua_pushunsigned(lua_State* L, unsigned u) return; } +#ifdef LUA_FLOAT4_VECTORS +void lua_pushvector(lua_State* L, float x, float y, float z, float w) +{ + setvvalue(L->top, x, y, z, w); + api_incr_top(L); + return; +} +#else void lua_pushvector(lua_State* L, float x, float y, float z) { setvvalue(L->top, x, y, z); api_incr_top(L); return; } +#endif void lua_pushlstring(lua_State* L, const char* s, size_t len) { diff --git a/VM/src/laux.cpp b/VM/src/laux.cpp index 2a684ee4..baf5093d 100644 --- a/VM/src/laux.cpp +++ b/VM/src/laux.cpp @@ -462,7 +462,11 @@ LUALIB_API const char* luaL_tolstring(lua_State* L, int idx, size_t* len) case LUA_TVECTOR: { const float* v = lua_tovector(L, idx); +#ifdef LUA_FLOAT4_VECTORS + lua_pushfstring(L, LUA_NUMBER_FMT ", " LUA_NUMBER_FMT ", " LUA_NUMBER_FMT ", " LUA_NUMBER_FMT, v[0], v[1], v[2], v[3]); +#else lua_pushfstring(L, LUA_NUMBER_FMT ", " LUA_NUMBER_FMT ", " LUA_NUMBER_FMT, v[0], v[1], v[2]); +#endif break; } default: diff --git a/VM/src/lbuiltins.cpp b/VM/src/lbuiltins.cpp index 9ab57ac9..71182dd3 100644 --- a/VM/src/lbuiltins.cpp +++ b/VM/src/lbuiltins.cpp @@ -1016,6 +1016,23 @@ static int luauF_tunpack(lua_State* L, StkId res, TValue* arg0, int nresults, St return -1; } +#ifdef LUA_FLOAT4_VECTORS +static int luauF_vector(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) +{ + if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1) && ttisnumber(args + 2)) + { + double x = nvalue(arg0); + double y = nvalue(args); + double z = nvalue(args + 1); + double w = nvalue(args + 2); + + setvvalue(res, float(x), float(y), float(z), float(w)); + return 1; + } + + return -1; +} +#else static int luauF_vector(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { if (nparams >= 3 && nresults <= 1 && ttisnumber(arg0) && ttisnumber(args) && ttisnumber(args + 1)) @@ -1030,6 +1047,7 @@ static int luauF_vector(lua_State* L, StkId res, TValue* arg0, int nresults, Stk return -1; } +#endif static int luauF_countlz(lua_State* L, StkId res, TValue* arg0, int nresults, StkId args, int nparams) { diff --git a/VM/src/lmem.cpp b/VM/src/lmem.cpp index d8b265cb..96659823 100644 --- a/VM/src/lmem.cpp +++ b/VM/src/lmem.cpp @@ -33,11 +33,19 @@ #define ABISWITCH(x64, ms32, gcc32) (sizeof(void*) == 8 ? x64 : ms32) #endif +#ifdef LUA_FLOAT4_VECTORS +static_assert(sizeof(TValue) == ABISWITCH(24, 24, 24), "size mismatch for value"); +static_assert(offsetof(TString, data) == ABISWITCH(24, 20, 20), "size mismatch for string header"); +static_assert(offsetof(Udata, data) == ABISWITCH(24, 16, 16), "size mismatch for userdata header"); +static_assert(sizeof(Table) == ABISWITCH(56, 36, 36), "size mismatch for table header"); +static_assert(sizeof(LuaNode) == ABISWITCH(48, 48, 48), "size mismatch for table entry"); +#else static_assert(sizeof(TValue) == ABISWITCH(16, 16, 16), "size mismatch for value"); static_assert(offsetof(TString, data) == ABISWITCH(24, 20, 20), "size mismatch for string header"); static_assert(offsetof(Udata, data) == ABISWITCH(24, 16, 16), "size mismatch for userdata header"); static_assert(sizeof(Table) == ABISWITCH(56, 36, 36), "size mismatch for table header"); static_assert(sizeof(LuaNode) == ABISWITCH(32, 32, 32), "size mismatch for table entry"); +#endif const size_t kSizeClasses = LUA_SIZECLASSES; const size_t kMaxSmallSize = 512; diff --git a/VM/src/lnumutils.h b/VM/src/lnumutils.h index 43f8014b..65043be1 100644 --- a/VM/src/lnumutils.h +++ b/VM/src/lnumutils.h @@ -18,12 +18,20 @@ inline bool luai_veceq(const float* a, const float* b) { +#ifdef LUA_FLOAT4_VECTORS + return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]; +#else return a[0] == b[0] && a[1] == b[1] && a[2] == b[2]; +#endif } inline bool luai_vecisnan(const float* a) { +#ifdef LUA_FLOAT4_VECTORS + return a[0] != a[0] || a[1] != a[1] || a[2] != a[2] || a[3] != a[3]; +#else return a[0] != a[0] || a[1] != a[1] || a[2] != a[2]; +#endif } LUAU_FASTMATH_BEGIN diff --git a/VM/src/lobject.h b/VM/src/lobject.h index c5f2e2f4..eb205910 100644 --- a/VM/src/lobject.h +++ b/VM/src/lobject.h @@ -47,7 +47,11 @@ typedef union typedef struct lua_TValue { Value value; +#ifdef LUA_FLOAT4_VECTORS + int extra[2]; +#else int extra; +#endif int tt; } TValue; @@ -105,6 +109,18 @@ typedef struct lua_TValue i_o->tt = LUA_TNUMBER; \ } +#ifdef LUA_FLOAT4_VECTORS +#define setvvalue(obj, x, y, z, w) \ + { \ + TValue* i_o = (obj); \ + float* i_v = i_o->value.v; \ + i_v[0] = (x); \ + i_v[1] = (y); \ + i_v[2] = (z); \ + i_v[3] = (w); \ + i_o->tt = LUA_TVECTOR; \ + } +#else #define setvvalue(obj, x, y, z) \ { \ TValue* i_o = (obj); \ @@ -114,6 +130,7 @@ typedef struct lua_TValue i_v[2] = (z); \ i_o->tt = LUA_TVECTOR; \ } +#endif #define setpvalue(obj, x) \ { \ @@ -364,7 +381,11 @@ typedef struct Closure typedef struct TKey { ::Value value; +#ifdef LUA_FLOAT4_VECTORS + int extra[2]; +#else int extra; +#endif unsigned tt : 4; int next : 28; /* for chaining */ } TKey; @@ -375,6 +396,31 @@ typedef struct LuaNode TKey key; } LuaNode; +#ifdef LUA_FLOAT4_VECTORS +/* copy a value into a key */ +#define setnodekey(L, node, obj) \ + { \ + LuaNode* n_ = (node); \ + const TValue* i_o = (obj); \ + n_->key.value = i_o->value; \ + n_->key.extra[0] = i_o->extra[0]; \ + n_->key.extra[1] = i_o->extra[1]; \ + n_->key.tt = i_o->tt; \ + checkliveness(L->global, i_o); \ + } + +/* copy a value from a key */ +#define getnodekey(L, obj, node) \ + { \ + TValue* i_o = (obj); \ + const LuaNode* n_ = (node); \ + i_o->value = n_->key.value; \ + i_o->extra[0] = n_->key.extra[0]; \ + i_o->extra[1] = n_->key.extra[1]; \ + i_o->tt = n_->key.tt; \ + checkliveness(L->global, i_o); \ + } +#else /* copy a value into a key */ #define setnodekey(L, node, obj) \ { \ @@ -396,6 +442,7 @@ typedef struct LuaNode i_o->tt = n_->key.tt; \ checkliveness(L->global, i_o); \ } +#endif // clang-format off typedef struct Table diff --git a/VM/src/ltable.cpp b/VM/src/ltable.cpp index 07d22d59..60d8f0c9 100644 --- a/VM/src/ltable.cpp +++ b/VM/src/ltable.cpp @@ -31,10 +31,18 @@ LUAU_FASTFLAGVARIABLE(LuauArrayBoundary, false) #define MAXSIZE (1 << MAXBITS) static_assert(offsetof(LuaNode, val) == 0, "Unexpected Node memory layout, pointer cast in gval2slot is incorrect"); + +#ifdef LUA_FLOAT4_VECTORS +// TKey is bitpacked for memory efficiency so we need to validate bit counts for worst case +static_assert(TKey{{NULL}, {0, 0}, LUA_TDEADKEY, 0}.tt == LUA_TDEADKEY, "not enough bits for tt"); +static_assert(TKey{{NULL}, {0, 0}, LUA_TNIL, MAXSIZE - 1}.next == MAXSIZE - 1, "not enough bits for next"); +static_assert(TKey{{NULL}, {0, 0}, LUA_TNIL, -(MAXSIZE - 1)}.next == -(MAXSIZE - 1), "not enough bits for next"); +#else // TKey is bitpacked for memory efficiency so we need to validate bit counts for worst case static_assert(TKey{{NULL}, 0, LUA_TDEADKEY, 0}.tt == LUA_TDEADKEY, "not enough bits for tt"); static_assert(TKey{{NULL}, 0, LUA_TNIL, MAXSIZE - 1}.next == MAXSIZE - 1, "not enough bits for next"); static_assert(TKey{{NULL}, 0, LUA_TNIL, -(MAXSIZE - 1)}.next == -(MAXSIZE - 1), "not enough bits for next"); +#endif // reset cache of absent metamethods, cache is updated in luaT_gettm #define invalidateTMcache(t) t->flags = 0 @@ -94,6 +102,31 @@ static LuaNode* hashnum(const Table* t, double n) return hashpow2(t, h2); } +#ifdef LUA_FLOAT4_VECTORS +static LuaNode* hashvec(const Table* t, const float* v) +{ + unsigned int i[4]; + memcpy(i, v, sizeof(i)); + + // convert -0 to 0 to make sure they hash to the same value + i[0] = (i[0] == 0x8000000) ? 0 : i[0]; + i[1] = (i[1] == 0x8000000) ? 0 : i[1]; + i[2] = (i[2] == 0x8000000) ? 0 : i[2]; + i[3] = (i[3] == 0x8000000) ? 0 : i[3]; + + // scramble bits to make sure that integer coordinates have entropy in lower bits + i[0] ^= i[0] >> 17; + i[1] ^= i[1] >> 17; + i[2] ^= i[2] >> 17; + i[3] ^= i[3] >> 17; + + // Optimized Spatial Hashing for Collision Detection of Deformable Objects + unsigned int h = (i[0] * 73856093) ^ (i[1] * 19349663) ^ (i[2] * 83492791); + h ^= i[3]; // TODO: proper hashing function for 4D vectors + + return hashpow2(t, h); +} +#else static LuaNode* hashvec(const Table* t, const float* v) { unsigned int i[3]; @@ -114,6 +147,7 @@ static LuaNode* hashvec(const Table* t, const float* v) return hashpow2(t, h); } +#endif /* ** returns the `main' position of an element in a table (that is, the index diff --git a/VM/src/lvmexecute.cpp b/VM/src/lvmexecute.cpp index eed2862b..6c8669a1 100644 --- a/VM/src/lvmexecute.cpp +++ b/VM/src/lvmexecute.cpp @@ -601,7 +601,14 @@ static void luau_execute(lua_State* L) const char* name = getstr(tsvalue(kv)); int ic = (name[0] | ' ') - 'x'; +#ifdef LUA_FLOAT4_VECTORS + if(name[0] == 'w') + ic = 3; + + if (unsigned(ic) < 4 && name[1] == '\0') +#else if (unsigned(ic) < 3 && name[1] == '\0') +#endif { setnvalue(ra, rb->value.v[ic]); VM_NEXT(); @@ -1526,7 +1533,11 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; const float* vc = rc->value.v; +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]); +#else setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2]); +#endif VM_NEXT(); } else @@ -1572,7 +1583,11 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; const float* vc = rc->value.v; +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[3] - vc[3]); +#else setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2]); +#endif VM_NEXT(); } else @@ -1618,21 +1633,33 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(rc)); +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[2] * vc); +#else setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc); +#endif VM_NEXT(); } else if (ttisvector(rb) && ttisvector(rc)) { const float* vb = rb->value.v; const float* vc = rc->value.v; +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[3] * vc[3]); +#else setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2]); +#endif VM_NEXT(); } else if (ttisnumber(rb) && ttisvector(rc)) { float vb = cast_to(float, nvalue(rb)); const float* vc = rc->value.v; +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb * vc[0], vb * vc[1], vb * vc[2], vb * vc[3]); +#else setvvalue(ra, vb * vc[0], vb * vc[1], vb * vc[2]); +#endif VM_NEXT(); } else @@ -1679,21 +1706,33 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(rc)); +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc); +#else setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc); +#endif VM_NEXT(); } else if (ttisvector(rb) && ttisvector(rc)) { const float* vb = rb->value.v; const float* vc = rc->value.v; +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[3] / vc[3]); +#else setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2]); +#endif VM_NEXT(); } else if (ttisnumber(rb) && ttisvector(rc)) { float vb = cast_to(float, nvalue(rb)); const float* vc = rc->value.v; +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb / vc[0], vb / vc[1], vb / vc[2], vb / vc[3]); +#else setvvalue(ra, vb / vc[0], vb / vc[1], vb / vc[2]); +#endif VM_NEXT(); } else @@ -1826,7 +1865,11 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(kv)); +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc, vb[3] * vc); +#else setvvalue(ra, vb[0] * vc, vb[1] * vc, vb[2] * vc); +#endif VM_NEXT(); } else @@ -1872,7 +1915,11 @@ static void luau_execute(lua_State* L) { const float* vb = rb->value.v; float vc = cast_to(float, nvalue(kv)); +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc, vb[3] / vc); +#else setvvalue(ra, vb[0] / vc, vb[1] / vc, vb[2] / vc); +#endif VM_NEXT(); } else @@ -2037,7 +2084,11 @@ static void luau_execute(lua_State* L) else if (ttisvector(rb)) { const float* vb = rb->value.v; +#ifdef LUA_FLOAT4_VECTORS + setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]); +#else setvvalue(ra, -vb[0], -vb[1], -vb[2]); +#endif VM_NEXT(); } else diff --git a/VM/src/lvmutils.cpp b/VM/src/lvmutils.cpp index f52e8e74..acea5401 100644 --- a/VM/src/lvmutils.cpp +++ b/VM/src/lvmutils.cpp @@ -398,6 +398,28 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM if (vb && vc) { +#ifdef LUA_FLOAT4_VECTORS + switch (op) + { + case TM_ADD: + setvvalue(ra, vb[0] + vc[0], vb[1] + vc[1], vb[2] + vc[2], vb[3] + vc[3]); + return; + case TM_SUB: + setvvalue(ra, vb[0] - vc[0], vb[1] - vc[1], vb[2] - vc[2], vb[2] - vc[2]); + return; + case TM_MUL: + setvvalue(ra, vb[0] * vc[0], vb[1] * vc[1], vb[2] * vc[2], vb[2] * vc[2]); + return; + case TM_DIV: + setvvalue(ra, vb[0] / vc[0], vb[1] / vc[1], vb[2] / vc[2], vb[2] / vc[2]); + return; + case TM_UNM: + setvvalue(ra, -vb[0], -vb[1], -vb[2], -vb[3]); + return; + default: + break; + } +#else switch (op) { case TM_ADD: @@ -418,6 +440,7 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM default: break; } +#endif } else if (vb) { @@ -426,7 +449,19 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM if (c) { float nc = cast_to(float, nvalue(c)); - +#ifdef LUA_FLOAT4_VECTORS + switch (op) + { + case TM_MUL: + setvvalue(ra, vb[0] * nc, vb[1] * nc, vb[2] * nc, vb[3] * nc); + return; + case TM_DIV: + setvvalue(ra, vb[0] / nc, vb[1] / nc, vb[2] / nc, vb[3] / nc); + return; + default: + break; + } +#else switch (op) { case TM_MUL: @@ -438,6 +473,7 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM default: break; } +#endif } } else if (vc) @@ -447,7 +483,19 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM if (b) { float nb = cast_to(float, nvalue(b)); - +#ifdef LUA_FLOAT4_VECTORS + switch (op) + { + case TM_MUL: + setvvalue(ra, nb * vc[0], nb * vc[1], nb * vc[2], nb * vc[3]); + return; + case TM_DIV: + setvvalue(ra, nb / vc[0], nb / vc[1], nb / vc[2], nb / vc[2]); + return; + default: + break; + } +#else switch (op) { case TM_MUL: @@ -459,6 +507,7 @@ void luaV_doarith(lua_State* L, StkId ra, const TValue* rb, const TValue* rc, TM default: break; } +#endif } }