From 7ec3117177c948fad0db3d7f11501d2a6fa5c19d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Sat, 27 Apr 2024 18:14:53 -0700 Subject: [PATCH] Make table.concat faster table.concat is idiomatic and should be the fastest way to concatenate all table array elements together, but apparently you can beat it by using string.format, string.rep and table.unpack: string.format(string.rep("%s", #t), table.unpack(t)) ... this just won't do, so we should fix table.concat performance. The deficit comes from two places: - rawgeti overhead followed by other stack accesses, all to extract a string from what is almost always an in-bounds array lookup - addlstring overhead in case separator is empty (extra function calls) This change fixes this by using a fast path for in-bounds array lookup for a string. Note that table.concat also supports numbers (these need to be converted to strings which is a little cumbersome and has innate overhead), and out-of-bounds accesses*. In these cases we fall back to the old implementation. To trigger out-of-bounds accesses, you need to skip the past-array-end element (which is nil per array invariant), but this is achievable because table.concat supports offset+length arguments. This should almost never come up in practice but the per-element branches et al are fairly cheap compared to the eventual string copy/alloc anyway. This change makes table.concat ~2x faster when the separator is empty; the table.concat benchmark shows +40% gains but it uses a variety of string separators of different lengths so it doesn't get the full benefit from this change. --- VM/src/ltablib.cpp | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/VM/src/ltablib.cpp b/VM/src/ltablib.cpp index 27c08f11..e6f32221 100644 --- a/VM/src/ltablib.cpp +++ b/VM/src/ltablib.cpp @@ -12,6 +12,7 @@ #include "lvm.h" LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauFastCrossTableMove, false) +LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauFasterConcat, false) static int foreachi(lua_State* L) { @@ -282,31 +283,42 @@ static int tmove(lua_State* L) return 1; } -static void addfield(lua_State* L, luaL_Strbuf* b, int i) +static void addfield(lua_State* L, luaL_Strbuf* b, int i, Table* t) { - int tt = lua_rawgeti(L, 1, i); - if (tt != LUA_TSTRING && tt != LUA_TNUMBER) - luaL_error(L, "invalid value (%s) at index %d in table for 'concat'", luaL_typename(L, -1), i); - luaL_addvalue(b); + if (DFFlag::LuauFasterConcat && t && unsigned(i - 1) < unsigned(t->sizearray) && ttisstring(&t->array[i - 1])) + { + TString* ts = tsvalue(&t->array[i - 1]); + luaL_addlstring(b, getstr(ts), ts->len); + } + else + { + int tt = lua_rawgeti(L, 1, i); + if (tt != LUA_TSTRING && tt != LUA_TNUMBER) + luaL_error(L, "invalid value (%s) at index %d in table for 'concat'", luaL_typename(L, -1), i); + luaL_addvalue(b); + } } static int tconcat(lua_State* L) { - luaL_Strbuf b; size_t lsep; - int i, last; const char* sep = luaL_optlstring(L, 2, "", &lsep); luaL_checktype(L, 1, LUA_TTABLE); - i = luaL_optinteger(L, 3, 1); - last = luaL_opt(L, luaL_checkinteger, 4, lua_objlen(L, 1)); + int i = luaL_optinteger(L, 3, 1); + int last = luaL_opt(L, luaL_checkinteger, 4, lua_objlen(L, 1)); + + Table* t = DFFlag::LuauFasterConcat ? hvalue(L->base) : NULL; + + luaL_Strbuf b; luaL_buffinit(L, &b); for (; i < last; i++) { - addfield(L, &b, i); - luaL_addlstring(&b, sep, lsep); + addfield(L, &b, i, t); + if (!DFFlag::LuauFasterConcat || lsep) + luaL_addlstring(&b, sep, lsep); } if (i == last) // add last value (if interval was not empty) - addfield(L, &b, i); + addfield(L, &b, i, t); luaL_pushresult(&b); return 1; }