2021-10-29 21:25:12 +01:00
|
|
|
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
|
|
// This code is based on Lua 5.x implementation licensed under MIT License; see lua_LICENSE.txt for details
|
|
|
|
#include "lstring.h"
|
|
|
|
|
|
|
|
#include "lgc.h"
|
|
|
|
#include "lmem.h"
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
unsigned int luaS_hash(const char* str, size_t len)
|
|
|
|
{
|
|
|
|
// Note that this hashing algorithm is replicated in BytecodeBuilder.cpp, BytecodeBuilder::getStringHash
|
|
|
|
unsigned int a = 0, b = 0;
|
|
|
|
unsigned int h = unsigned(len);
|
|
|
|
|
|
|
|
// hash prefix in 12b chunks (using aligned reads) with ARX based hash (LuaJIT v2.1, lookup3)
|
|
|
|
// note that we stop at length<32 to maintain compatibility with Lua 5.1
|
|
|
|
while (len >= 32)
|
|
|
|
{
|
|
|
|
#define rol(x, s) ((x >> s) | (x << (32 - s)))
|
|
|
|
#define mix(u, v, w) a ^= h, a -= rol(h, u), b ^= a, b -= rol(a, v), h ^= b, h -= rol(b, w)
|
|
|
|
|
|
|
|
// should compile into fast unaligned reads
|
|
|
|
uint32_t block[3];
|
|
|
|
memcpy(block, str, 12);
|
|
|
|
|
|
|
|
a += block[0];
|
|
|
|
b += block[1];
|
|
|
|
h += block[2];
|
|
|
|
mix(14, 11, 25);
|
|
|
|
str += 12;
|
|
|
|
len -= 12;
|
|
|
|
|
|
|
|
#undef mix
|
|
|
|
#undef rol
|
|
|
|
}
|
|
|
|
|
|
|
|
// original Lua 5.1 hash for compatibility (exact match when len<32)
|
|
|
|
for (size_t i = len; i > 0; --i)
|
|
|
|
h ^= (h << 5) + (h >> 2) + (uint8_t)str[i - 1];
|
|
|
|
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
void luaS_resize(lua_State* L, int newsize)
|
|
|
|
{
|
2022-01-21 17:00:19 +00:00
|
|
|
TString** newhash = luaM_newarray(L, newsize, TString*, 0);
|
|
|
|
stringtable* tb = &L->global->strt;
|
|
|
|
for (int i = 0; i < newsize; i++)
|
2021-10-29 21:25:12 +01:00
|
|
|
newhash[i] = NULL;
|
2022-08-04 23:35:33 +01:00
|
|
|
// rehash
|
2022-01-21 17:00:19 +00:00
|
|
|
for (int i = 0; i < tb->size; i++)
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
2022-01-21 17:00:19 +00:00
|
|
|
TString* p = tb->hash[i];
|
2021-10-29 21:25:12 +01:00
|
|
|
while (p)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // for each node in the list
|
|
|
|
TString* next = p->next; // save next
|
2022-01-21 17:00:19 +00:00
|
|
|
unsigned int h = p->hash;
|
2022-08-04 23:35:33 +01:00
|
|
|
int h1 = lmod(h, newsize); // new position
|
2021-10-29 21:25:12 +01:00
|
|
|
LUAU_ASSERT(cast_int(h % newsize) == lmod(h, newsize));
|
2022-08-04 23:35:33 +01:00
|
|
|
p->next = newhash[h1]; // chain it
|
2021-10-29 21:25:12 +01:00
|
|
|
newhash[h1] = p;
|
|
|
|
p = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
luaM_freearray(L, tb->hash, tb->size, TString*, 0);
|
|
|
|
tb->size = newsize;
|
|
|
|
tb->hash = newhash;
|
|
|
|
}
|
|
|
|
|
|
|
|
static TString* newlstr(lua_State* L, const char* str, size_t l, unsigned int h)
|
|
|
|
{
|
|
|
|
TString* ts;
|
|
|
|
stringtable* tb;
|
|
|
|
if (l > MAXSSIZE)
|
|
|
|
luaM_toobig(L);
|
2022-01-21 17:00:19 +00:00
|
|
|
ts = luaM_newgco(L, TString, sizestring(l), L->activememcat);
|
2021-10-29 21:25:12 +01:00
|
|
|
ts->len = unsigned(l);
|
|
|
|
ts->hash = h;
|
|
|
|
ts->marked = luaC_white(L->global);
|
|
|
|
ts->tt = LUA_TSTRING;
|
|
|
|
ts->memcat = L->activememcat;
|
|
|
|
memcpy(ts->data, str, l);
|
2022-08-04 23:35:33 +01:00
|
|
|
ts->data[l] = '\0'; // ending 0
|
2022-07-29 05:24:07 +01:00
|
|
|
ts->atom = ATOM_UNDEF;
|
2021-10-29 21:25:12 +01:00
|
|
|
tb = &L->global->strt;
|
|
|
|
h = lmod(h, tb->size);
|
2022-08-04 23:35:33 +01:00
|
|
|
ts->next = tb->hash[h]; // chain new entry
|
2022-01-21 17:00:19 +00:00
|
|
|
tb->hash[h] = ts;
|
2021-10-29 21:25:12 +01:00
|
|
|
tb->nuse++;
|
|
|
|
if (tb->nuse > cast_to(uint32_t, tb->size) && tb->size <= INT_MAX / 2)
|
2022-08-04 23:35:33 +01:00
|
|
|
luaS_resize(L, tb->size * 2); // too crowded
|
2021-10-29 21:25:12 +01:00
|
|
|
return ts;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void linkstrbuf(lua_State* L, TString* ts)
|
|
|
|
{
|
|
|
|
global_State* g = L->global;
|
2022-01-21 17:00:19 +00:00
|
|
|
|
2022-02-24 23:53:37 +00:00
|
|
|
ts->next = g->strbufgc;
|
|
|
|
g->strbufgc = ts;
|
|
|
|
ts->marked = luaC_white(g);
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void unlinkstrbuf(lua_State* L, TString* ts)
|
|
|
|
{
|
|
|
|
global_State* g = L->global;
|
|
|
|
|
2022-01-21 17:00:19 +00:00
|
|
|
TString** p = &g->strbufgc;
|
2021-10-29 21:25:12 +01:00
|
|
|
|
2022-01-21 17:00:19 +00:00
|
|
|
while (TString* curr = *p)
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
2022-01-21 17:00:19 +00:00
|
|
|
if (curr == ts)
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
2022-02-24 23:53:37 +00:00
|
|
|
*p = curr->next;
|
2021-10-29 21:25:12 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-02-24 23:53:37 +00:00
|
|
|
p = &curr->next;
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LUAU_ASSERT(!"failed to find string buffer");
|
|
|
|
}
|
|
|
|
|
|
|
|
TString* luaS_bufstart(lua_State* L, size_t size)
|
|
|
|
{
|
|
|
|
if (size > MAXSSIZE)
|
|
|
|
luaM_toobig(L);
|
|
|
|
|
2022-01-21 17:00:19 +00:00
|
|
|
TString* ts = luaM_newgco(L, TString, sizestring(size), L->activememcat);
|
2021-10-29 21:25:12 +01:00
|
|
|
|
|
|
|
ts->tt = LUA_TSTRING;
|
|
|
|
ts->memcat = L->activememcat;
|
|
|
|
linkstrbuf(L, ts);
|
|
|
|
|
|
|
|
ts->len = unsigned(size);
|
|
|
|
|
|
|
|
return ts;
|
|
|
|
}
|
|
|
|
|
|
|
|
TString* luaS_buffinish(lua_State* L, TString* ts)
|
|
|
|
{
|
|
|
|
unsigned int h = luaS_hash(ts->data, ts->len);
|
|
|
|
stringtable* tb = &L->global->strt;
|
|
|
|
int bucket = lmod(h, tb->size);
|
|
|
|
|
|
|
|
// search if we already have this string in the hash table
|
2022-02-24 23:53:37 +00:00
|
|
|
for (TString* el = tb->hash[bucket]; el != NULL; el = el->next)
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
|
|
|
if (el->len == ts->len && memcmp(el->data, ts->data, ts->len) == 0)
|
|
|
|
{
|
|
|
|
// string may be dead
|
2022-01-21 17:00:19 +00:00
|
|
|
if (isdead(L->global, obj2gco(el)))
|
|
|
|
changewhite(obj2gco(el));
|
2021-10-29 21:25:12 +01:00
|
|
|
|
|
|
|
return el;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unlinkstrbuf(L, ts);
|
|
|
|
|
|
|
|
ts->hash = h;
|
|
|
|
ts->data[ts->len] = '\0'; // ending 0
|
2022-07-29 05:24:07 +01:00
|
|
|
ts->atom = ATOM_UNDEF;
|
2022-02-24 23:53:37 +00:00
|
|
|
ts->next = tb->hash[bucket]; // chain new entry
|
2022-01-21 17:00:19 +00:00
|
|
|
tb->hash[bucket] = ts;
|
2021-10-29 21:25:12 +01:00
|
|
|
|
|
|
|
tb->nuse++;
|
|
|
|
if (tb->nuse > cast_to(uint32_t, tb->size) && tb->size <= INT_MAX / 2)
|
|
|
|
luaS_resize(L, tb->size * 2); // too crowded
|
|
|
|
|
|
|
|
return ts;
|
|
|
|
}
|
|
|
|
|
|
|
|
TString* luaS_newlstr(lua_State* L, const char* str, size_t l)
|
|
|
|
{
|
|
|
|
unsigned int h = luaS_hash(str, l);
|
2022-02-24 23:53:37 +00:00
|
|
|
for (TString* el = L->global->strt.hash[lmod(h, L->global->strt.size)]; el != NULL; el = el->next)
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
2022-01-21 17:00:19 +00:00
|
|
|
if (el->len == l && (memcmp(str, getstr(el), l) == 0))
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
// string may be dead
|
2022-01-21 17:00:19 +00:00
|
|
|
if (isdead(L->global, obj2gco(el)))
|
|
|
|
changewhite(obj2gco(el));
|
|
|
|
return el;
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
return newlstr(L, str, l, h); // not found
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
2022-01-21 17:00:19 +00:00
|
|
|
static bool unlinkstr(lua_State* L, TString* ts)
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
2022-01-21 17:00:19 +00:00
|
|
|
global_State* g = L->global;
|
|
|
|
|
|
|
|
TString** p = &g->strt.hash[lmod(ts->hash, g->strt.size)];
|
|
|
|
|
|
|
|
while (TString* curr = *p)
|
|
|
|
{
|
|
|
|
if (curr == ts)
|
|
|
|
{
|
2022-02-24 23:53:37 +00:00
|
|
|
*p = curr->next;
|
2022-01-21 17:00:19 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-02-24 23:53:37 +00:00
|
|
|
p = &curr->next;
|
2022-01-21 17:00:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void luaS_free(lua_State* L, TString* ts, lua_Page* page)
|
|
|
|
{
|
2022-02-24 23:53:37 +00:00
|
|
|
// Unchain from the string table
|
|
|
|
if (!unlinkstr(L, ts))
|
|
|
|
unlinkstrbuf(L, ts); // An unlikely scenario when we have a string buffer on our hands
|
2022-01-21 17:00:19 +00:00
|
|
|
else
|
|
|
|
L->global->strt.nuse--;
|
|
|
|
|
2022-02-24 23:53:37 +00:00
|
|
|
luaM_freegco(L, ts, sizestring(ts->len), ts->memcat, page);
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|