Make table.concat faster

table.concat is idiomatic and should be the fastest way to concatenate
all table array elements together, but apparently you can beat it by
using string.format, string.rep and table.unpack:

	string.format(string.rep("%s", #t), table.unpack(t))

... this just won't do, so we should fix table.concat performance.

The deficit comes from two places:

- rawgeti overhead followed by other stack accesses, all to extract
a string from what is almost always an in-bounds array lookup
- addlstring overhead in case separator is empty (extra function calls)

This change fixes this by using a fast path for in-bounds array lookup
for a string. Note that table.concat also supports numbers (these need
to be converted to strings which is a little cumbersome and has innate
overhead), and out-of-bounds accesses*. In these cases we fall back to
the old implementation.

To trigger out-of-bounds accesses, you need to skip the past-array-end
element (which is nil per array invariant), but this is achievable
because table.concat supports offset+length arguments. This should
almost never come up in practice but the per-element branches et al are
fairly cheap compared to the eventual string copy/alloc anyway.

This change makes table.concat ~2x faster when the separator is empty;
the table.concat benchmark shows +40% gains but it uses a variety of
string separators of different lengths so it doesn't get the full
benefit from this change.
This commit is contained in:
Arseny Kapoulkine 2024-04-27 18:14:53 -07:00
parent af15c3cf17
commit 7ec3117177

View file

@ -12,6 +12,7 @@
#include "lvm.h"
LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauFastCrossTableMove, false)
LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauFasterConcat, false)
static int foreachi(lua_State* L)
{
@ -282,31 +283,42 @@ static int tmove(lua_State* L)
return 1;
}
static void addfield(lua_State* L, luaL_Strbuf* b, int i)
static void addfield(lua_State* L, luaL_Strbuf* b, int i, Table* t)
{
int tt = lua_rawgeti(L, 1, i);
if (tt != LUA_TSTRING && tt != LUA_TNUMBER)
luaL_error(L, "invalid value (%s) at index %d in table for 'concat'", luaL_typename(L, -1), i);
luaL_addvalue(b);
if (DFFlag::LuauFasterConcat && t && unsigned(i - 1) < unsigned(t->sizearray) && ttisstring(&t->array[i - 1]))
{
TString* ts = tsvalue(&t->array[i - 1]);
luaL_addlstring(b, getstr(ts), ts->len);
}
else
{
int tt = lua_rawgeti(L, 1, i);
if (tt != LUA_TSTRING && tt != LUA_TNUMBER)
luaL_error(L, "invalid value (%s) at index %d in table for 'concat'", luaL_typename(L, -1), i);
luaL_addvalue(b);
}
}
static int tconcat(lua_State* L)
{
luaL_Strbuf b;
size_t lsep;
int i, last;
const char* sep = luaL_optlstring(L, 2, "", &lsep);
luaL_checktype(L, 1, LUA_TTABLE);
i = luaL_optinteger(L, 3, 1);
last = luaL_opt(L, luaL_checkinteger, 4, lua_objlen(L, 1));
int i = luaL_optinteger(L, 3, 1);
int last = luaL_opt(L, luaL_checkinteger, 4, lua_objlen(L, 1));
Table* t = DFFlag::LuauFasterConcat ? hvalue(L->base) : NULL;
luaL_Strbuf b;
luaL_buffinit(L, &b);
for (; i < last; i++)
{
addfield(L, &b, i);
luaL_addlstring(&b, sep, lsep);
addfield(L, &b, i, t);
if (!DFFlag::LuauFasterConcat || lsep)
luaL_addlstring(&b, sep, lsep);
}
if (i == last) // add last value (if interval was not empty)
addfield(L, &b, i);
addfield(L, &b, i, t);
luaL_pushresult(&b);
return 1;
}