2021-10-29 21:25:12 +01:00
|
|
|
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
|
|
// This code is based on Lua 5.x implementation licensed under MIT License; see lua_LICENSE.txt for details
|
|
|
|
#include "lualib.h"
|
|
|
|
|
|
|
|
#include "lstring.h"
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
2023-08-04 20:18:54 +01:00
|
|
|
LUAU_FASTFLAGVARIABLE(LuauFasterInterp, false)
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// macro to `unsign' a character
|
2021-10-29 21:25:12 +01:00
|
|
|
#define uchar(c) ((unsigned char)(c))
|
|
|
|
|
|
|
|
static int str_len(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
luaL_checklstring(L, 1, &l);
|
|
|
|
lua_pushinteger(L, (int)l);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int posrelat(int pos, size_t len)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
// relative string position: negative means back from end
|
2021-10-29 21:25:12 +01:00
|
|
|
if (pos < 0)
|
|
|
|
pos += (int)len + 1;
|
|
|
|
return (pos >= 0) ? pos : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_sub(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, 1, &l);
|
|
|
|
int start = posrelat(luaL_checkinteger(L, 2), l);
|
|
|
|
int end = posrelat(luaL_optinteger(L, 3, -1), l);
|
|
|
|
if (start < 1)
|
|
|
|
start = 1;
|
|
|
|
if (end > (int)l)
|
|
|
|
end = (int)l;
|
|
|
|
if (start <= end)
|
|
|
|
lua_pushlstring(L, s + start - 1, end - start + 1);
|
|
|
|
else
|
|
|
|
lua_pushliteral(L, "");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_reverse(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, 1, &l);
|
|
|
|
luaL_Buffer b;
|
|
|
|
char* ptr = luaL_buffinitsize(L, &b, l);
|
|
|
|
while (l--)
|
|
|
|
*ptr++ = s[l];
|
|
|
|
luaL_pushresultsize(&b, ptr - b.p);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_lower(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, 1, &l);
|
|
|
|
luaL_Buffer b;
|
|
|
|
char* ptr = luaL_buffinitsize(L, &b, l);
|
|
|
|
for (size_t i = 0; i < l; i++)
|
|
|
|
*ptr++ = tolower(uchar(s[i]));
|
|
|
|
luaL_pushresultsize(&b, l);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_upper(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, 1, &l);
|
|
|
|
luaL_Buffer b;
|
|
|
|
char* ptr = luaL_buffinitsize(L, &b, l);
|
|
|
|
for (size_t i = 0; i < l; i++)
|
|
|
|
*ptr++ = toupper(uchar(s[i]));
|
|
|
|
luaL_pushresultsize(&b, l);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_rep(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, 1, &l);
|
|
|
|
int n = luaL_checkinteger(L, 2);
|
|
|
|
|
|
|
|
if (n <= 0)
|
|
|
|
{
|
|
|
|
lua_pushliteral(L, "");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (l > MAXSSIZE / (size_t)n) // may overflow?
|
|
|
|
luaL_error(L, "resulting string too large");
|
|
|
|
|
|
|
|
luaL_Buffer b;
|
|
|
|
char* ptr = luaL_buffinitsize(L, &b, l * n);
|
|
|
|
|
|
|
|
const char* start = ptr;
|
|
|
|
|
|
|
|
size_t left = l * n;
|
|
|
|
size_t step = l;
|
|
|
|
|
|
|
|
memcpy(ptr, s, l);
|
|
|
|
ptr += l;
|
|
|
|
left -= l;
|
|
|
|
|
|
|
|
// use the increasing 'pattern' inside our target buffer to fill the next part
|
|
|
|
while (step < left)
|
|
|
|
{
|
|
|
|
memcpy(ptr, start, step);
|
|
|
|
ptr += step;
|
|
|
|
left -= step;
|
|
|
|
step <<= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// fill tail
|
|
|
|
memcpy(ptr, start, left);
|
|
|
|
ptr += left;
|
|
|
|
|
|
|
|
luaL_pushresultsize(&b, l * n);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_byte(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, 1, &l);
|
|
|
|
int posi = posrelat(luaL_optinteger(L, 2, 1), l);
|
|
|
|
int pose = posrelat(luaL_optinteger(L, 3, posi), l);
|
|
|
|
int n, i;
|
|
|
|
if (posi <= 0)
|
|
|
|
posi = 1;
|
|
|
|
if ((size_t)pose > l)
|
|
|
|
pose = (int)l;
|
|
|
|
if (posi > pose)
|
2022-08-04 23:35:33 +01:00
|
|
|
return 0; // empty interval; return no values
|
2021-10-29 21:25:12 +01:00
|
|
|
n = (int)(pose - posi + 1);
|
2022-08-04 23:35:33 +01:00
|
|
|
if (posi + n <= pose) // overflow?
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_error(L, "string slice too long");
|
|
|
|
luaL_checkstack(L, n, "string slice too long");
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
lua_pushinteger(L, uchar(s[posi + i - 1]));
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_char(lua_State* L)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
int n = lua_gettop(L); // number of arguments
|
2021-10-29 21:25:12 +01:00
|
|
|
|
|
|
|
luaL_Buffer b;
|
|
|
|
char* ptr = luaL_buffinitsize(L, &b, n);
|
|
|
|
|
|
|
|
for (int i = 1; i <= n; i++)
|
|
|
|
{
|
|
|
|
int c = luaL_checkinteger(L, i);
|
|
|
|
luaL_argcheck(L, uchar(c) == c, i, "invalid value");
|
|
|
|
|
|
|
|
*ptr++ = uchar(c);
|
|
|
|
}
|
|
|
|
luaL_pushresultsize(&b, n);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** {======================================================
|
|
|
|
** PATTERN MATCHING
|
|
|
|
** =======================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define CAP_UNFINISHED (-1)
|
|
|
|
#define CAP_POSITION (-2)
|
|
|
|
|
|
|
|
typedef struct MatchState
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
int matchdepth; // control for recursive depth (to avoid C stack overflow)
|
|
|
|
const char* src_init; // init of source string
|
|
|
|
const char* src_end; // end ('\0') of source string
|
|
|
|
const char* p_end; // end ('\0') of pattern
|
2021-10-29 21:25:12 +01:00
|
|
|
lua_State* L;
|
2022-08-04 23:35:33 +01:00
|
|
|
int level; // total number of captures (finished or unfinished)
|
2021-10-29 21:25:12 +01:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
const char* init;
|
|
|
|
ptrdiff_t len;
|
|
|
|
} capture[LUA_MAXCAPTURES];
|
|
|
|
} MatchState;
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// recursive function
|
2021-10-29 21:25:12 +01:00
|
|
|
static const char* match(MatchState* ms, const char* s, const char* p);
|
|
|
|
|
|
|
|
#define L_ESC '%'
|
|
|
|
#define SPECIALS "^$*+?.([%-"
|
|
|
|
|
|
|
|
static int check_capture(MatchState* ms, int l)
|
|
|
|
{
|
|
|
|
l -= '1';
|
|
|
|
if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
|
|
|
|
luaL_error(ms->L, "invalid capture index %%%d", l + 1);
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int capture_to_close(MatchState* ms)
|
|
|
|
{
|
|
|
|
int level = ms->level;
|
|
|
|
for (level--; level >= 0; level--)
|
|
|
|
if (ms->capture[level].len == CAP_UNFINISHED)
|
|
|
|
return level;
|
|
|
|
luaL_error(ms->L, "invalid pattern capture");
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* classend(MatchState* ms, const char* p)
|
|
|
|
{
|
|
|
|
switch (*p++)
|
|
|
|
{
|
|
|
|
case L_ESC:
|
|
|
|
{
|
|
|
|
if (p == ms->p_end)
|
|
|
|
luaL_error(ms->L, "malformed pattern (ends with '%%')");
|
|
|
|
return p + 1;
|
|
|
|
}
|
|
|
|
case '[':
|
|
|
|
{
|
|
|
|
if (*p == '^')
|
|
|
|
p++;
|
|
|
|
do
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // look for a `]'
|
2021-10-29 21:25:12 +01:00
|
|
|
if (p == ms->p_end)
|
|
|
|
luaL_error(ms->L, "malformed pattern (missing ']')");
|
|
|
|
if (*(p++) == L_ESC && p < ms->p_end)
|
2022-08-04 23:35:33 +01:00
|
|
|
p++; // skip escapes (e.g. `%]')
|
2021-10-29 21:25:12 +01:00
|
|
|
} while (*p != ']');
|
|
|
|
return p + 1;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int match_class(int c, int cl)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
switch (tolower(cl))
|
|
|
|
{
|
|
|
|
case 'a':
|
|
|
|
res = isalpha(c);
|
|
|
|
break;
|
|
|
|
case 'c':
|
|
|
|
res = iscntrl(c);
|
|
|
|
break;
|
|
|
|
case 'd':
|
|
|
|
res = isdigit(c);
|
|
|
|
break;
|
|
|
|
case 'g':
|
|
|
|
res = isgraph(c);
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
res = islower(c);
|
|
|
|
break;
|
|
|
|
case 'p':
|
|
|
|
res = ispunct(c);
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
res = isspace(c);
|
|
|
|
break;
|
|
|
|
case 'u':
|
|
|
|
res = isupper(c);
|
|
|
|
break;
|
|
|
|
case 'w':
|
|
|
|
res = isalnum(c);
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
res = isxdigit(c);
|
|
|
|
break;
|
|
|
|
case 'z':
|
|
|
|
res = (c == 0);
|
2022-08-04 23:35:33 +01:00
|
|
|
break; // deprecated option
|
2021-10-29 21:25:12 +01:00
|
|
|
default:
|
|
|
|
return (cl == c);
|
|
|
|
}
|
|
|
|
return (islower(cl) ? res : !res);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int matchbracketclass(int c, const char* p, const char* ec)
|
|
|
|
{
|
|
|
|
int sig = 1;
|
|
|
|
if (*(p + 1) == '^')
|
|
|
|
{
|
|
|
|
sig = 0;
|
2022-08-04 23:35:33 +01:00
|
|
|
p++; // skip the `^'
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
while (++p < ec)
|
|
|
|
{
|
|
|
|
if (*p == L_ESC)
|
|
|
|
{
|
|
|
|
p++;
|
|
|
|
if (match_class(c, uchar(*p)))
|
|
|
|
return sig;
|
|
|
|
}
|
|
|
|
else if ((*(p + 1) == '-') && (p + 2 < ec))
|
|
|
|
{
|
|
|
|
p += 2;
|
|
|
|
if (uchar(*(p - 2)) <= c && c <= uchar(*p))
|
|
|
|
return sig;
|
|
|
|
}
|
|
|
|
else if (uchar(*p) == c)
|
|
|
|
return sig;
|
|
|
|
}
|
|
|
|
return !sig;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int singlematch(MatchState* ms, const char* s, const char* p, const char* ep)
|
|
|
|
{
|
|
|
|
if (s >= ms->src_end)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int c = uchar(*s);
|
|
|
|
switch (*p)
|
|
|
|
{
|
|
|
|
case '.':
|
2022-08-04 23:35:33 +01:00
|
|
|
return 1; // matches any char
|
2021-10-29 21:25:12 +01:00
|
|
|
case L_ESC:
|
|
|
|
return match_class(c, uchar(*(p + 1)));
|
|
|
|
case '[':
|
|
|
|
return matchbracketclass(c, p, ep - 1);
|
|
|
|
default:
|
|
|
|
return (uchar(*p) == c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* matchbalance(MatchState* ms, const char* s, const char* p)
|
|
|
|
{
|
|
|
|
if (p >= ms->p_end - 1)
|
|
|
|
luaL_error(ms->L, "malformed pattern (missing arguments to '%%b')");
|
|
|
|
if (*s != *p)
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int b = *p;
|
|
|
|
int e = *(p + 1);
|
|
|
|
int cont = 1;
|
|
|
|
while (++s < ms->src_end)
|
|
|
|
{
|
|
|
|
if (*s == e)
|
|
|
|
{
|
|
|
|
if (--cont == 0)
|
|
|
|
return s + 1;
|
|
|
|
}
|
|
|
|
else if (*s == b)
|
|
|
|
cont++;
|
|
|
|
}
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
return NULL; // string ends out of balance
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char* max_expand(MatchState* ms, const char* s, const char* p, const char* ep)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
ptrdiff_t i = 0; // counts maximum expand for item
|
2021-10-29 21:25:12 +01:00
|
|
|
while (singlematch(ms, s + i, p, ep))
|
|
|
|
i++;
|
2022-08-04 23:35:33 +01:00
|
|
|
// keeps trying to match with the maximum repetitions
|
2021-10-29 21:25:12 +01:00
|
|
|
while (i >= 0)
|
|
|
|
{
|
|
|
|
const char* res = match(ms, (s + i), ep + 1);
|
|
|
|
if (res)
|
|
|
|
return res;
|
2022-08-04 23:35:33 +01:00
|
|
|
i--; // else didn't match; reduce 1 repetition to try again
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* min_expand(MatchState* ms, const char* s, const char* p, const char* ep)
|
|
|
|
{
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
const char* res = match(ms, s, ep + 1);
|
|
|
|
if (res != NULL)
|
|
|
|
return res;
|
|
|
|
else if (singlematch(ms, s, p, ep))
|
2022-08-04 23:35:33 +01:00
|
|
|
s++; // try with one more repetition
|
2021-10-29 21:25:12 +01:00
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* start_capture(MatchState* ms, const char* s, const char* p, int what)
|
|
|
|
{
|
|
|
|
const char* res;
|
|
|
|
int level = ms->level;
|
|
|
|
if (level >= LUA_MAXCAPTURES)
|
|
|
|
luaL_error(ms->L, "too many captures");
|
|
|
|
ms->capture[level].init = s;
|
|
|
|
ms->capture[level].len = what;
|
|
|
|
ms->level = level + 1;
|
2022-08-04 23:35:33 +01:00
|
|
|
if ((res = match(ms, s, p)) == NULL) // match failed?
|
|
|
|
ms->level--; // undo capture
|
2021-10-29 21:25:12 +01:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* end_capture(MatchState* ms, const char* s, const char* p)
|
|
|
|
{
|
|
|
|
int l = capture_to_close(ms);
|
|
|
|
const char* res;
|
2022-08-04 23:35:33 +01:00
|
|
|
ms->capture[l].len = s - ms->capture[l].init; // close capture
|
|
|
|
if ((res = match(ms, s, p)) == NULL) // match failed?
|
|
|
|
ms->capture[l].len = CAP_UNFINISHED; // undo capture
|
2021-10-29 21:25:12 +01:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* match_capture(MatchState* ms, const char* s, int l)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
l = check_capture(ms, l);
|
|
|
|
len = ms->capture[l].len;
|
|
|
|
if ((size_t)(ms->src_end - s) >= len && memcmp(ms->capture[l].init, s, len) == 0)
|
|
|
|
return s + len;
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* match(MatchState* ms, const char* s, const char* p)
|
|
|
|
{
|
|
|
|
if (ms->matchdepth-- == 0)
|
|
|
|
luaL_error(ms->L, "pattern too complex");
|
2022-08-04 23:35:33 +01:00
|
|
|
init: // using goto's to optimize tail recursion
|
2021-10-29 21:25:12 +01:00
|
|
|
if (p != ms->p_end)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // end of pattern?
|
2021-10-29 21:25:12 +01:00
|
|
|
switch (*p)
|
|
|
|
{
|
|
|
|
case '(':
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // start capture
|
|
|
|
if (*(p + 1) == ')') // position capture?
|
2021-10-29 21:25:12 +01:00
|
|
|
s = start_capture(ms, s, p + 2, CAP_POSITION);
|
|
|
|
else
|
|
|
|
s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ')':
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // end capture
|
2021-10-29 21:25:12 +01:00
|
|
|
s = end_capture(ms, s, p + 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '$':
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
if ((p + 1) != ms->p_end) // is the `$' the last char in pattern?
|
|
|
|
goto dflt; // no; go to default
|
|
|
|
s = (s == ms->src_end) ? s : NULL; // check end of string
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case L_ESC:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // escaped sequences not in the format class[*+?-]?
|
2021-10-29 21:25:12 +01:00
|
|
|
switch (*(p + 1))
|
|
|
|
{
|
|
|
|
case 'b':
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // balanced string?
|
2021-10-29 21:25:12 +01:00
|
|
|
s = matchbalance(ms, s, p + 2);
|
|
|
|
if (s != NULL)
|
|
|
|
{
|
|
|
|
p += 4;
|
2022-08-04 23:35:33 +01:00
|
|
|
goto init; // return match(ms, s, p + 4);
|
|
|
|
} // else fail (s == NULL)
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 'f':
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // frontier?
|
2021-10-29 21:25:12 +01:00
|
|
|
const char* ep;
|
|
|
|
char previous;
|
|
|
|
p += 2;
|
|
|
|
if (*p != '[')
|
|
|
|
luaL_error(ms->L, "missing '[' after '%%f' in pattern");
|
2022-08-04 23:35:33 +01:00
|
|
|
ep = classend(ms, p); // points to what is next
|
2021-10-29 21:25:12 +01:00
|
|
|
previous = (s == ms->src_init) ? '\0' : *(s - 1);
|
|
|
|
if (!matchbracketclass(uchar(previous), p, ep - 1) && matchbracketclass(uchar(*s), p, ep - 1))
|
|
|
|
{
|
|
|
|
p = ep;
|
2022-08-04 23:35:33 +01:00
|
|
|
goto init; // return match(ms, s, ep);
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
s = NULL; // match failed
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '0':
|
|
|
|
case '1':
|
|
|
|
case '2':
|
|
|
|
case '3':
|
|
|
|
case '4':
|
|
|
|
case '5':
|
|
|
|
case '6':
|
|
|
|
case '7':
|
|
|
|
case '8':
|
|
|
|
case '9':
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // capture results (%0-%9)?
|
2021-10-29 21:25:12 +01:00
|
|
|
s = match_capture(ms, s, uchar(*(p + 1)));
|
|
|
|
if (s != NULL)
|
|
|
|
{
|
|
|
|
p += 2;
|
2022-08-04 23:35:33 +01:00
|
|
|
goto init; // return match(ms, s, p + 2)
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
goto dflt;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
dflt:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // pattern class plus optional suffix
|
|
|
|
const char* ep = classend(ms, p); // points to optional suffix
|
|
|
|
// does not match at least once?
|
2021-10-29 21:25:12 +01:00
|
|
|
if (!singlematch(ms, s, p, ep))
|
|
|
|
{
|
|
|
|
if (*ep == '*' || *ep == '?' || *ep == '-')
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // accept empty?
|
2021-10-29 21:25:12 +01:00
|
|
|
p = ep + 1;
|
2022-08-04 23:35:33 +01:00
|
|
|
goto init; // return match(ms, s, ep + 1);
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
else // '+' or no suffix
|
|
|
|
s = NULL; // fail
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
else
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // matched once
|
2021-10-29 21:25:12 +01:00
|
|
|
switch (*ep)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // handle optional suffix
|
2021-10-29 21:25:12 +01:00
|
|
|
case '?':
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // optional
|
2021-10-29 21:25:12 +01:00
|
|
|
const char* res;
|
|
|
|
if ((res = match(ms, s + 1, ep + 1)) != NULL)
|
|
|
|
s = res;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
p = ep + 1;
|
2022-08-04 23:35:33 +01:00
|
|
|
goto init; // else return match(ms, s, ep + 1);
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
case '+': // 1 or more repetitions
|
|
|
|
s++; // 1 match already done
|
|
|
|
// go through
|
|
|
|
case '*': // 0 or more repetitions
|
2021-10-29 21:25:12 +01:00
|
|
|
s = max_expand(ms, s, p, ep);
|
|
|
|
break;
|
2022-08-04 23:35:33 +01:00
|
|
|
case '-': // 0 or more repetitions (minimum)
|
2021-10-29 21:25:12 +01:00
|
|
|
s = min_expand(ms, s, p, ep);
|
|
|
|
break;
|
2022-08-04 23:35:33 +01:00
|
|
|
default: // no suffix
|
2021-10-29 21:25:12 +01:00
|
|
|
s++;
|
|
|
|
p = ep;
|
2022-08-04 23:35:33 +01:00
|
|
|
goto init; // return match(ms, s + 1, ep);
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ms->matchdepth++;
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* lmemfind(const char* s1, size_t l1, const char* s2, size_t l2)
|
|
|
|
{
|
|
|
|
if (l2 == 0)
|
2022-08-04 23:35:33 +01:00
|
|
|
return s1; // empty strings are everywhere
|
2021-10-29 21:25:12 +01:00
|
|
|
else if (l2 > l1)
|
2022-08-04 23:35:33 +01:00
|
|
|
return NULL; // avoids a negative `l1'
|
2021-10-29 21:25:12 +01:00
|
|
|
else
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
const char* init; // to search for a `*s2' inside `s1'
|
|
|
|
l2--; // 1st char will be checked by `memchr'
|
|
|
|
l1 = l1 - l2; // `s2' cannot be found after that
|
2021-10-29 21:25:12 +01:00
|
|
|
while (l1 > 0 && (init = (const char*)memchr(s1, *s2, l1)) != NULL)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
init++; // 1st char is already checked
|
2021-10-29 21:25:12 +01:00
|
|
|
if (memcmp(init, s2 + 1, l2) == 0)
|
|
|
|
return init - 1;
|
|
|
|
else
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // correct `l1' and `s1' to try again
|
2021-10-29 21:25:12 +01:00
|
|
|
l1 -= init - s1;
|
|
|
|
s1 = init;
|
|
|
|
}
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
return NULL; // not found
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void push_onecapture(MatchState* ms, int i, const char* s, const char* e)
|
|
|
|
{
|
|
|
|
if (i >= ms->level)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
if (i == 0) // ms->level == 0, too
|
|
|
|
lua_pushlstring(ms->L, s, e - s); // add whole match
|
2021-10-29 21:25:12 +01:00
|
|
|
else
|
|
|
|
luaL_error(ms->L, "invalid capture index");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ptrdiff_t l = ms->capture[i].len;
|
|
|
|
if (l == CAP_UNFINISHED)
|
|
|
|
luaL_error(ms->L, "unfinished capture");
|
|
|
|
if (l == CAP_POSITION)
|
|
|
|
lua_pushinteger(ms->L, (int)(ms->capture[i].init - ms->src_init) + 1);
|
|
|
|
else
|
|
|
|
lua_pushlstring(ms->L, ms->capture[i].init, l);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int push_captures(MatchState* ms, const char* s, const char* e)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
|
|
|
|
luaL_checkstack(ms->L, nlevels, "too many captures");
|
|
|
|
for (i = 0; i < nlevels; i++)
|
|
|
|
push_onecapture(ms, i, s, e);
|
2022-08-04 23:35:33 +01:00
|
|
|
return nlevels; // number of strings pushed
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// check whether pattern has no special characters
|
2021-10-29 21:25:12 +01:00
|
|
|
static int nospecials(const char* p, size_t l)
|
|
|
|
{
|
|
|
|
size_t upto = 0;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
if (strpbrk(p + upto, SPECIALS))
|
2022-08-04 23:35:33 +01:00
|
|
|
return 0; // pattern has a special character
|
|
|
|
upto += strlen(p + upto) + 1; // may have more after \0
|
2021-10-29 21:25:12 +01:00
|
|
|
} while (upto <= l);
|
2022-08-04 23:35:33 +01:00
|
|
|
return 1; // no special chars found
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void prepstate(MatchState* ms, lua_State* L, const char* s, size_t ls, const char* p, size_t lp)
|
|
|
|
{
|
|
|
|
ms->L = L;
|
|
|
|
ms->matchdepth = LUAI_MAXCCALLS;
|
|
|
|
ms->src_init = s;
|
|
|
|
ms->src_end = s + ls;
|
|
|
|
ms->p_end = p + lp;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void reprepstate(MatchState* ms)
|
|
|
|
{
|
|
|
|
ms->level = 0;
|
|
|
|
LUAU_ASSERT(ms->matchdepth == LUAI_MAXCCALLS);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_find_aux(lua_State* L, int find)
|
|
|
|
{
|
|
|
|
size_t ls, lp;
|
|
|
|
const char* s = luaL_checklstring(L, 1, &ls);
|
|
|
|
const char* p = luaL_checklstring(L, 2, &lp);
|
|
|
|
int init = posrelat(luaL_optinteger(L, 3, 1), ls);
|
|
|
|
if (init < 1)
|
|
|
|
init = 1;
|
|
|
|
else if (init > (int)ls + 1)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // start after string's end?
|
|
|
|
lua_pushnil(L); // cannot find anything
|
2021-10-29 21:25:12 +01:00
|
|
|
return 1;
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
// explicit request or no special characters?
|
2021-10-29 21:25:12 +01:00
|
|
|
if (find && (lua_toboolean(L, 4) || nospecials(p, lp)))
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
// do a plain search
|
2021-10-29 21:25:12 +01:00
|
|
|
const char* s2 = lmemfind(s + init - 1, ls - init + 1, p, lp);
|
|
|
|
if (s2)
|
|
|
|
{
|
|
|
|
lua_pushinteger(L, (int)(s2 - s + 1));
|
|
|
|
lua_pushinteger(L, (int)(s2 - s + lp));
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MatchState ms;
|
|
|
|
const char* s1 = s + init - 1;
|
|
|
|
int anchor = (*p == '^');
|
|
|
|
if (anchor)
|
|
|
|
{
|
|
|
|
p++;
|
2022-08-04 23:35:33 +01:00
|
|
|
lp--; // skip anchor character
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
prepstate(&ms, L, s, ls, p, lp);
|
|
|
|
do
|
|
|
|
{
|
|
|
|
const char* res;
|
|
|
|
reprepstate(&ms);
|
|
|
|
if ((res = match(&ms, s1, p)) != NULL)
|
|
|
|
{
|
|
|
|
if (find)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_pushinteger(L, (int)(s1 - s + 1)); // start
|
|
|
|
lua_pushinteger(L, (int)(res - s)); // end
|
2021-10-29 21:25:12 +01:00
|
|
|
return push_captures(&ms, NULL, 0) + 2;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return push_captures(&ms, s1, res);
|
|
|
|
}
|
|
|
|
} while (s1++ < ms.src_end && !anchor);
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_pushnil(L); // not found
|
2021-10-29 21:25:12 +01:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_find(lua_State* L)
|
|
|
|
{
|
|
|
|
return str_find_aux(L, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_match(lua_State* L)
|
|
|
|
{
|
|
|
|
return str_find_aux(L, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int gmatch_aux(lua_State* L)
|
|
|
|
{
|
|
|
|
MatchState ms;
|
|
|
|
size_t ls, lp;
|
|
|
|
const char* s = lua_tolstring(L, lua_upvalueindex(1), &ls);
|
|
|
|
const char* p = lua_tolstring(L, lua_upvalueindex(2), &lp);
|
|
|
|
const char* src;
|
|
|
|
prepstate(&ms, L, s, ls, p, lp);
|
|
|
|
for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); src <= ms.src_end; src++)
|
|
|
|
{
|
|
|
|
const char* e;
|
|
|
|
reprepstate(&ms);
|
|
|
|
if ((e = match(&ms, src, p)) != NULL)
|
|
|
|
{
|
|
|
|
int newstart = (int)(e - s);
|
|
|
|
if (e == src)
|
2022-08-04 23:35:33 +01:00
|
|
|
newstart++; // empty match? go at least one position
|
2021-10-29 21:25:12 +01:00
|
|
|
lua_pushinteger(L, newstart);
|
|
|
|
lua_replace(L, lua_upvalueindex(3));
|
|
|
|
return push_captures(&ms, src, e);
|
|
|
|
}
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
return 0; // not found
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int gmatch(lua_State* L)
|
|
|
|
{
|
|
|
|
luaL_checkstring(L, 1);
|
|
|
|
luaL_checkstring(L, 2);
|
|
|
|
lua_settop(L, 2);
|
|
|
|
lua_pushinteger(L, 0);
|
2021-11-19 16:10:07 +00:00
|
|
|
lua_pushcclosure(L, gmatch_aux, NULL, 3);
|
2021-10-29 21:25:12 +01:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_s(MatchState* ms, luaL_Buffer* b, const char* s, const char* e)
|
|
|
|
{
|
|
|
|
size_t l, i;
|
|
|
|
const char* news = lua_tolstring(ms->L, 3, &l);
|
|
|
|
|
|
|
|
luaL_reservebuffer(b, l, -1);
|
|
|
|
|
|
|
|
for (i = 0; i < l; i++)
|
|
|
|
{
|
|
|
|
if (news[i] != L_ESC)
|
|
|
|
luaL_addchar(b, news[i]);
|
|
|
|
else
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
i++; // skip ESC
|
2021-10-29 21:25:12 +01:00
|
|
|
if (!isdigit(uchar(news[i])))
|
|
|
|
{
|
|
|
|
if (news[i] != L_ESC)
|
|
|
|
luaL_error(ms->L, "invalid use of '%c' in replacement string", L_ESC);
|
|
|
|
luaL_addchar(b, news[i]);
|
|
|
|
}
|
|
|
|
else if (news[i] == '0')
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(b, s, e - s, -1);
|
2021-10-29 21:25:12 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
push_onecapture(ms, news[i] - '1', s, e);
|
2022-08-04 23:35:33 +01:00
|
|
|
luaL_addvalue(b); // add capture to accumulated result
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_value(MatchState* ms, luaL_Buffer* b, const char* s, const char* e, int tr)
|
|
|
|
{
|
|
|
|
lua_State* L = ms->L;
|
|
|
|
switch (tr)
|
|
|
|
{
|
|
|
|
case LUA_TFUNCTION:
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
lua_pushvalue(L, 3);
|
|
|
|
n = push_captures(ms, s, e);
|
|
|
|
lua_call(L, n, 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case LUA_TTABLE:
|
|
|
|
{
|
|
|
|
push_onecapture(ms, 0, s, e);
|
|
|
|
lua_gettable(L, 3);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // LUA_TNUMBER or LUA_TSTRING
|
2021-10-29 21:25:12 +01:00
|
|
|
add_s(ms, b, s, e);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!lua_toboolean(L, -1))
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // nil or false?
|
2021-10-29 21:25:12 +01:00
|
|
|
lua_pop(L, 1);
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_pushlstring(L, s, e - s); // keep original text
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
else if (!lua_isstring(L, -1))
|
|
|
|
luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
|
2022-08-04 23:35:33 +01:00
|
|
|
luaL_addvalue(b); // add result to accumulator
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int str_gsub(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t srcl, lp;
|
|
|
|
const char* src = luaL_checklstring(L, 1, &srcl);
|
|
|
|
const char* p = luaL_checklstring(L, 2, &lp);
|
|
|
|
int tr = lua_type(L, 3);
|
|
|
|
int max_s = luaL_optinteger(L, 4, (int)srcl + 1);
|
|
|
|
int anchor = (*p == '^');
|
|
|
|
int n = 0;
|
|
|
|
MatchState ms;
|
|
|
|
luaL_Buffer b;
|
|
|
|
luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, "string/function/table");
|
|
|
|
luaL_buffinit(L, &b);
|
|
|
|
if (anchor)
|
|
|
|
{
|
|
|
|
p++;
|
2022-08-04 23:35:33 +01:00
|
|
|
lp--; // skip anchor character
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
prepstate(&ms, L, src, srcl, p, lp);
|
|
|
|
while (n < max_s)
|
|
|
|
{
|
|
|
|
const char* e;
|
|
|
|
reprepstate(&ms);
|
|
|
|
e = match(&ms, src, p);
|
|
|
|
if (e)
|
|
|
|
{
|
|
|
|
n++;
|
|
|
|
add_value(&ms, &b, src, e, tr);
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
if (e && e > src) // non empty match?
|
|
|
|
src = e; // skip it
|
2021-10-29 21:25:12 +01:00
|
|
|
else if (src < ms.src_end)
|
|
|
|
luaL_addchar(&b, *src++);
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
if (anchor)
|
|
|
|
break;
|
|
|
|
}
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(&b, src, ms.src_end - src, -1);
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_pushresult(&b);
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_pushinteger(L, n); // number of substitutions
|
2021-10-29 21:25:12 +01:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// }======================================================
|
2021-10-29 21:25:12 +01:00
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// valid flags in a format specification
|
2021-10-29 21:25:12 +01:00
|
|
|
#define FLAGS "-+ #0"
|
2022-08-04 23:35:33 +01:00
|
|
|
// maximum size of each formatted item (> len(format('%99.99f', -1e308)))
|
2021-10-29 21:25:12 +01:00
|
|
|
#define MAX_ITEM 512
|
2022-08-04 23:35:33 +01:00
|
|
|
// maximum size of each format specification (such as '%-099.99d')
|
2021-10-29 21:25:12 +01:00
|
|
|
#define MAX_FORMAT 32
|
|
|
|
|
|
|
|
static void addquoted(lua_State* L, luaL_Buffer* b, int arg)
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, arg, &l);
|
|
|
|
|
|
|
|
luaL_reservebuffer(b, l + 2, -1);
|
|
|
|
|
|
|
|
luaL_addchar(b, '"');
|
|
|
|
while (l--)
|
|
|
|
{
|
|
|
|
switch (*s)
|
|
|
|
{
|
|
|
|
case '"':
|
|
|
|
case '\\':
|
|
|
|
case '\n':
|
|
|
|
{
|
|
|
|
luaL_addchar(b, '\\');
|
|
|
|
luaL_addchar(b, *s);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '\r':
|
|
|
|
{
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(b, "\\r", 2, -1);
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '\0':
|
|
|
|
{
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(b, "\\000", 4, -1);
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
luaL_addchar(b, *s);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s++;
|
|
|
|
}
|
|
|
|
luaL_addchar(b, '"');
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* scanformat(lua_State* L, const char* strfrmt, char* form, size_t* size)
|
|
|
|
{
|
|
|
|
const char* p = strfrmt;
|
|
|
|
while (*p != '\0' && strchr(FLAGS, *p) != NULL)
|
2022-08-04 23:35:33 +01:00
|
|
|
p++; // skip flags
|
2021-10-29 21:25:12 +01:00
|
|
|
if ((size_t)(p - strfrmt) >= sizeof(FLAGS))
|
|
|
|
luaL_error(L, "invalid format (repeated flags)");
|
|
|
|
if (isdigit(uchar(*p)))
|
2022-08-04 23:35:33 +01:00
|
|
|
p++; // skip width
|
2021-10-29 21:25:12 +01:00
|
|
|
if (isdigit(uchar(*p)))
|
2022-08-04 23:35:33 +01:00
|
|
|
p++; // (2 digits at most)
|
2021-10-29 21:25:12 +01:00
|
|
|
if (*p == '.')
|
|
|
|
{
|
|
|
|
p++;
|
|
|
|
if (isdigit(uchar(*p)))
|
2022-08-04 23:35:33 +01:00
|
|
|
p++; // skip precision
|
2021-10-29 21:25:12 +01:00
|
|
|
if (isdigit(uchar(*p)))
|
2022-08-04 23:35:33 +01:00
|
|
|
p++; // (2 digits at most)
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
if (isdigit(uchar(*p)))
|
|
|
|
luaL_error(L, "invalid format (width or precision too long)");
|
|
|
|
*(form++) = '%';
|
|
|
|
*size = p - strfrmt + 1;
|
|
|
|
strncpy(form, strfrmt, *size);
|
|
|
|
form += *size;
|
|
|
|
*form = '\0';
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void addInt64Format(char form[MAX_FORMAT], char formatIndicator, size_t formatItemSize)
|
|
|
|
{
|
|
|
|
LUAU_ASSERT((formatItemSize + 3) <= MAX_FORMAT);
|
|
|
|
LUAU_ASSERT(form[0] == '%');
|
|
|
|
LUAU_ASSERT(form[formatItemSize] != 0);
|
|
|
|
LUAU_ASSERT(form[formatItemSize + 1] == 0);
|
|
|
|
form[formatItemSize + 0] = 'l';
|
|
|
|
form[formatItemSize + 1] = 'l';
|
|
|
|
form[formatItemSize + 2] = formatIndicator;
|
|
|
|
form[formatItemSize + 3] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_format(lua_State* L)
|
|
|
|
{
|
|
|
|
int top = lua_gettop(L);
|
|
|
|
int arg = 1;
|
|
|
|
size_t sfl;
|
|
|
|
const char* strfrmt = luaL_checklstring(L, arg, &sfl);
|
|
|
|
const char* strfrmt_end = strfrmt + sfl;
|
|
|
|
luaL_Buffer b;
|
|
|
|
luaL_buffinit(L, &b);
|
|
|
|
while (strfrmt < strfrmt_end)
|
|
|
|
{
|
|
|
|
if (*strfrmt != L_ESC)
|
|
|
|
luaL_addchar(&b, *strfrmt++);
|
|
|
|
else if (*++strfrmt == L_ESC)
|
2022-08-04 23:35:33 +01:00
|
|
|
luaL_addchar(&b, *strfrmt++); // %%
|
2023-08-04 20:18:54 +01:00
|
|
|
else if (FFlag::LuauFasterInterp && *strfrmt == '*')
|
|
|
|
{
|
|
|
|
strfrmt++;
|
|
|
|
if (++arg > top)
|
|
|
|
luaL_error(L, "missing argument #%d", arg);
|
|
|
|
|
|
|
|
luaL_addvalueany(&b, arg);
|
|
|
|
}
|
2021-10-29 21:25:12 +01:00
|
|
|
else
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // format item
|
|
|
|
char form[MAX_FORMAT]; // to store the format (`%...')
|
|
|
|
char buff[MAX_ITEM]; // to store the formatted item
|
2021-10-29 21:25:12 +01:00
|
|
|
if (++arg > top)
|
|
|
|
luaL_error(L, "missing argument #%d", arg);
|
|
|
|
size_t formatItemSize = 0;
|
|
|
|
strfrmt = scanformat(L, strfrmt, form, &formatItemSize);
|
|
|
|
char formatIndicator = *strfrmt++;
|
|
|
|
switch (formatIndicator)
|
|
|
|
{
|
|
|
|
case 'c':
|
|
|
|
{
|
2022-07-29 05:24:07 +01:00
|
|
|
snprintf(buff, sizeof(buff), form, (int)luaL_checknumber(L, arg));
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 'd':
|
|
|
|
case 'i':
|
|
|
|
{
|
|
|
|
addInt64Format(form, formatIndicator, formatItemSize);
|
2022-07-29 05:24:07 +01:00
|
|
|
snprintf(buff, sizeof(buff), form, (long long)luaL_checknumber(L, arg));
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 'o':
|
|
|
|
case 'u':
|
|
|
|
case 'x':
|
|
|
|
case 'X':
|
|
|
|
{
|
|
|
|
double argValue = luaL_checknumber(L, arg);
|
|
|
|
addInt64Format(form, formatIndicator, formatItemSize);
|
|
|
|
unsigned long long v = (argValue < 0) ? (unsigned long long)(long long)argValue : (unsigned long long)argValue;
|
2022-07-29 05:24:07 +01:00
|
|
|
snprintf(buff, sizeof(buff), form, v);
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 'e':
|
|
|
|
case 'E':
|
|
|
|
case 'f':
|
|
|
|
case 'g':
|
|
|
|
case 'G':
|
|
|
|
{
|
2022-07-29 05:24:07 +01:00
|
|
|
snprintf(buff, sizeof(buff), form, (double)luaL_checknumber(L, arg));
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 'q':
|
|
|
|
{
|
|
|
|
addquoted(L, &b, arg);
|
2023-01-13 22:10:01 +00:00
|
|
|
continue; // skip the 'luaL_addlstring' at the end
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
case 's':
|
|
|
|
{
|
|
|
|
size_t l;
|
|
|
|
const char* s = luaL_checklstring(L, arg, &l);
|
|
|
|
if (!strchr(form, '.') && l >= 100)
|
|
|
|
{
|
|
|
|
/* no precision and string is too long to be formatted;
|
|
|
|
keep original string */
|
|
|
|
lua_pushvalue(L, arg);
|
|
|
|
luaL_addvalue(&b);
|
2023-01-13 22:10:01 +00:00
|
|
|
continue; // skip the `luaL_addlstring' at the end
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-07-29 05:24:07 +01:00
|
|
|
snprintf(buff, sizeof(buff), form, s);
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2022-08-04 15:22:16 +01:00
|
|
|
case '*':
|
|
|
|
{
|
2023-08-04 20:18:54 +01:00
|
|
|
if (FFlag::LuauFasterInterp || formatItemSize != 1)
|
2022-08-04 15:22:16 +01:00
|
|
|
luaL_error(L, "'%%*' does not take a form");
|
|
|
|
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
size_t length;
|
|
|
|
const char* string = luaL_tolstring(L, arg, &length);
|
2023-01-13 22:10:01 +00:00
|
|
|
|
Sync to upstream/release/562 (#828)
* Fixed rare use-after-free in analysis during table unification
A lot of work these past months went into two new Luau components:
* A near full rewrite of the typechecker using a new deferred constraint
resolution system
* Native code generation for AoT/JiT compilation of VM bytecode into x64
(avx)/arm64 instructions
Both of these components are far from finished and we don't provide
documentation on building and using them at this point.
However, curious community members expressed interest in learning about
changes that go into these components each week, so we are now listing
them here in the 'sync' pull request descriptions.
---
New typechecker can be enabled by setting
DebugLuauDeferredConstraintResolution flag to 'true'.
It is considered unstable right now, so try it at your own risk.
Even though it already provides better type inference than the current
one in some cases, our main goal right now is to reach feature parity
with current typechecker.
Features which improve over the capabilities of the current typechecker
are marked as '(NEW)'.
Changes to new typechecker:
* Regular for loop index and parameters are now typechecked
* Invalid type annotations on local variables are ignored to improve
autocomplete
* Fixed missing autocomplete type suggestions for function arguments
* Type reduction is now performed to produce simpler types to be
presented to the user (error messages, custom LSPs)
* Internally, complex types like '((number | string) & ~(false?)) |
string' can be produced, which is just 'string | number' when simplified
* Fixed spots where support for unknown and never types was missing
* (NEW) Length operator '#' is now valid to use on top table type, this
type comes up when doing typeof(x) == "table" guards and isn't available
in current typechecker
---
Changes to native code generation:
* Additional math library fast calls are now lowered to x64: math.ldexp,
math.round, math.frexp, math.modf, math.sign and math.clamp
2023-02-03 19:26:13 +00:00
|
|
|
luaL_addlstring(&b, string, length, -2);
|
|
|
|
lua_pop(L, 1);
|
2022-08-04 15:22:16 +01:00
|
|
|
|
2023-01-13 22:10:01 +00:00
|
|
|
continue; // skip the `luaL_addlstring' at the end
|
2022-08-04 15:22:16 +01:00
|
|
|
}
|
2021-10-29 21:25:12 +01:00
|
|
|
default:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // also treat cases `pnLlh'
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_error(L, "invalid option '%%%c' to 'format'", *(strfrmt - 1));
|
|
|
|
}
|
|
|
|
}
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(&b, buff, strlen(buff), -1);
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
luaL_pushresult(&b);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_split(lua_State* L)
|
|
|
|
{
|
|
|
|
size_t haystackLen;
|
|
|
|
const char* haystack = luaL_checklstring(L, 1, &haystackLen);
|
|
|
|
size_t needleLen;
|
|
|
|
const char* needle = luaL_optlstring(L, 2, ",", &needleLen);
|
|
|
|
|
|
|
|
const char* begin = haystack;
|
|
|
|
const char* end = haystack + haystackLen;
|
|
|
|
const char* spanStart = begin;
|
|
|
|
int numMatches = 0;
|
|
|
|
|
|
|
|
lua_createtable(L, 0, 0);
|
|
|
|
|
|
|
|
if (needleLen == 0)
|
|
|
|
begin++;
|
|
|
|
|
|
|
|
// Don't iterate the last needleLen - 1 bytes of the string - they are
|
|
|
|
// impossible to be splits and would let us memcmp past the end of the
|
|
|
|
// buffer.
|
|
|
|
for (const char* iter = begin; iter <= end - needleLen; iter++)
|
|
|
|
{
|
|
|
|
// Use of memcmp here instead of strncmp is so that we allow embedded
|
|
|
|
// nulls to be used in either of the haystack or the needle strings.
|
|
|
|
// Most Lua string APIs allow embedded nulls, and this should be no
|
|
|
|
// exception.
|
|
|
|
if (memcmp(iter, needle, needleLen) == 0)
|
|
|
|
{
|
|
|
|
lua_pushinteger(L, ++numMatches);
|
|
|
|
lua_pushlstring(L, spanStart, iter - spanStart);
|
|
|
|
lua_settable(L, -3);
|
|
|
|
|
|
|
|
spanStart = iter + needleLen;
|
|
|
|
if (needleLen > 0)
|
|
|
|
iter += needleLen - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (needleLen > 0)
|
|
|
|
{
|
|
|
|
lua_pushinteger(L, ++numMatches);
|
|
|
|
lua_pushlstring(L, spanStart, end - spanStart);
|
|
|
|
lua_settable(L, -3);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** {======================================================
|
|
|
|
** PACK/UNPACK
|
|
|
|
** =======================================================
|
|
|
|
*/
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// value used for padding
|
2021-10-29 21:25:12 +01:00
|
|
|
#if !defined(LUAL_PACKPADBYTE)
|
|
|
|
#define LUAL_PACKPADBYTE 0x00
|
|
|
|
#endif
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// maximum size for the binary representation of an integer
|
2021-10-29 21:25:12 +01:00
|
|
|
#define MAXINTSIZE 16
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// number of bits in a character
|
2021-10-29 21:25:12 +01:00
|
|
|
#define NB CHAR_BIT
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// mask for one character (NB 1's)
|
2021-10-29 21:25:12 +01:00
|
|
|
#define MC ((1 << NB) - 1)
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// internal size of integers used for pack/unpack
|
2021-10-29 21:25:12 +01:00
|
|
|
#define SZINT (int)sizeof(long long)
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// dummy union to get native endianness
|
2021-10-29 21:25:12 +01:00
|
|
|
static const union
|
|
|
|
{
|
|
|
|
int dummy;
|
2022-08-04 23:35:33 +01:00
|
|
|
char little; // true iff machine is little endian
|
2021-10-29 21:25:12 +01:00
|
|
|
} nativeendian = {1};
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// assume we need to align for double & pointers
|
2021-10-29 21:25:12 +01:00
|
|
|
#define MAXALIGN 8
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Union for serializing floats
|
|
|
|
*/
|
|
|
|
typedef union Ftypes
|
|
|
|
{
|
|
|
|
float f;
|
|
|
|
double d;
|
|
|
|
double n;
|
2022-08-04 23:35:33 +01:00
|
|
|
char buff[5 * sizeof(double)]; // enough for any float type
|
2021-10-29 21:25:12 +01:00
|
|
|
} Ftypes;
|
|
|
|
|
|
|
|
/*
|
|
|
|
** information to pack/unpack stuff
|
|
|
|
*/
|
|
|
|
typedef struct Header
|
|
|
|
{
|
|
|
|
lua_State* L;
|
|
|
|
int islittle;
|
|
|
|
int maxalign;
|
|
|
|
} Header;
|
|
|
|
|
|
|
|
/*
|
|
|
|
** options for pack/unpack
|
|
|
|
*/
|
|
|
|
typedef enum KOption
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
Kint, // signed integers
|
|
|
|
Kuint, // unsigned integers
|
|
|
|
Kfloat, // floating-point numbers
|
|
|
|
Kchar, // fixed-length strings
|
|
|
|
Kstring, // strings with prefixed length
|
|
|
|
Kzstr, // zero-terminated strings
|
|
|
|
Kpadding, // padding
|
|
|
|
Kpaddalign, // padding for alignment
|
|
|
|
Knop // no-op (configuration or spaces)
|
2021-10-29 21:25:12 +01:00
|
|
|
} KOption;
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Read an integer numeral from string 'fmt' or return 'df' if
|
|
|
|
** there is no numeral
|
|
|
|
*/
|
|
|
|
static int digit(int c)
|
|
|
|
{
|
|
|
|
return '0' <= c && c <= '9';
|
|
|
|
}
|
|
|
|
|
|
|
|
static int getnum(Header* h, const char** fmt, int df)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
if (!digit(**fmt)) // no number?
|
|
|
|
return df; // return default value
|
2021-10-29 21:25:12 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
int a = 0;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
a = a * 10 + (*((*fmt)++) - '0');
|
|
|
|
} while (digit(**fmt) && a <= (INT_MAX - 9) / 10);
|
|
|
|
if (a > MAXSSIZE || digit(**fmt))
|
|
|
|
luaL_error(h->L, "size specifier is too large");
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Read an integer numeral and raises an error if it is larger
|
|
|
|
** than the maximum size for integers.
|
|
|
|
*/
|
|
|
|
static int getnumlimit(Header* h, const char** fmt, int df)
|
|
|
|
{
|
|
|
|
int sz = getnum(h, fmt, df);
|
|
|
|
if (sz > MAXINTSIZE || sz <= 0)
|
|
|
|
luaL_error(h->L, "integral size (%d) out of limits [1,%d]", sz, MAXINTSIZE);
|
|
|
|
return sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Initialize Header
|
|
|
|
*/
|
|
|
|
static void initheader(lua_State* L, Header* h)
|
|
|
|
{
|
|
|
|
h->L = L;
|
|
|
|
h->islittle = nativeendian.little;
|
|
|
|
h->maxalign = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Read and classify next option. 'size' is filled with option's size.
|
|
|
|
*/
|
|
|
|
static KOption getoption(Header* h, const char** fmt, int* size)
|
|
|
|
{
|
|
|
|
int opt = *((*fmt)++);
|
2022-08-04 23:35:33 +01:00
|
|
|
*size = 0; // default
|
2021-10-29 21:25:12 +01:00
|
|
|
switch (opt)
|
|
|
|
{
|
|
|
|
case 'b':
|
|
|
|
*size = 1;
|
|
|
|
return Kint;
|
|
|
|
case 'B':
|
|
|
|
*size = 1;
|
|
|
|
return Kuint;
|
|
|
|
case 'h':
|
|
|
|
*size = 2;
|
|
|
|
return Kint;
|
|
|
|
case 'H':
|
|
|
|
*size = 2;
|
|
|
|
return Kuint;
|
|
|
|
case 'l':
|
|
|
|
*size = 8;
|
|
|
|
return Kint;
|
|
|
|
case 'L':
|
|
|
|
*size = 8;
|
|
|
|
return Kuint;
|
|
|
|
case 'j':
|
|
|
|
*size = 4;
|
|
|
|
return Kint;
|
|
|
|
case 'J':
|
|
|
|
*size = 4;
|
|
|
|
return Kuint;
|
|
|
|
case 'T':
|
|
|
|
*size = 4;
|
|
|
|
return Kuint;
|
|
|
|
case 'f':
|
|
|
|
*size = 4;
|
|
|
|
return Kfloat;
|
|
|
|
case 'd':
|
|
|
|
*size = 8;
|
|
|
|
return Kfloat;
|
|
|
|
case 'n':
|
|
|
|
*size = 8;
|
|
|
|
return Kfloat;
|
|
|
|
case 'i':
|
|
|
|
*size = getnumlimit(h, fmt, 4);
|
|
|
|
return Kint;
|
|
|
|
case 'I':
|
|
|
|
*size = getnumlimit(h, fmt, 4);
|
|
|
|
return Kuint;
|
|
|
|
case 's':
|
|
|
|
*size = getnumlimit(h, fmt, 4);
|
|
|
|
return Kstring;
|
|
|
|
case 'c':
|
|
|
|
*size = getnum(h, fmt, -1);
|
|
|
|
if (*size == -1)
|
|
|
|
luaL_error(h->L, "missing size for format option 'c'");
|
|
|
|
return Kchar;
|
|
|
|
case 'z':
|
|
|
|
return Kzstr;
|
|
|
|
case 'x':
|
|
|
|
*size = 1;
|
|
|
|
return Kpadding;
|
|
|
|
case 'X':
|
|
|
|
return Kpaddalign;
|
|
|
|
case ' ':
|
|
|
|
break;
|
|
|
|
case '<':
|
|
|
|
h->islittle = 1;
|
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
h->islittle = 0;
|
|
|
|
break;
|
|
|
|
case '=':
|
|
|
|
h->islittle = nativeendian.little;
|
|
|
|
break;
|
|
|
|
case '!':
|
|
|
|
h->maxalign = getnumlimit(h, fmt, MAXALIGN);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
luaL_error(h->L, "invalid format option '%c'", opt);
|
|
|
|
}
|
|
|
|
return Knop;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Read, classify, and fill other details about the next option.
|
|
|
|
** 'psize' is filled with option's size, 'notoalign' with its
|
|
|
|
** alignment requirements.
|
|
|
|
** Local variable 'size' gets the size to be aligned. (Kpadal option
|
|
|
|
** always gets its full alignment, other options are limited by
|
|
|
|
** the maximum alignment ('maxalign'). Kchar option needs no alignment
|
|
|
|
** despite its size.
|
|
|
|
*/
|
|
|
|
static KOption getdetails(Header* h, size_t totalsize, const char** fmt, int* psize, int* ntoalign)
|
|
|
|
{
|
|
|
|
KOption opt = getoption(h, fmt, psize);
|
2022-08-04 23:35:33 +01:00
|
|
|
int align = *psize; // usually, alignment follows size
|
2021-10-29 21:25:12 +01:00
|
|
|
if (opt == Kpaddalign)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // 'X' gets alignment from following option
|
2021-10-29 21:25:12 +01:00
|
|
|
if (**fmt == '\0' || getoption(h, fmt, &align) == Kchar || align == 0)
|
|
|
|
luaL_argerror(h->L, 1, "invalid next option for option 'X'");
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
if (align <= 1 || opt == Kchar) // need no alignment?
|
2021-10-29 21:25:12 +01:00
|
|
|
*ntoalign = 0;
|
|
|
|
else
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
if (align > h->maxalign) // enforce maximum alignment
|
2021-10-29 21:25:12 +01:00
|
|
|
align = h->maxalign;
|
2022-08-04 23:35:33 +01:00
|
|
|
if ((align & (align - 1)) != 0) // is 'align' not a power of 2?
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_argerror(h->L, 1, "format asks for alignment not power of 2");
|
|
|
|
*ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1);
|
|
|
|
}
|
|
|
|
return opt;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Pack integer 'n' with 'size' bytes and 'islittle' endianness.
|
|
|
|
** The final 'if' handles the case when 'size' is larger than
|
|
|
|
** the size of a Lua integer, correcting the extra sign-extension
|
|
|
|
** bytes if necessary (by default they would be zeros).
|
|
|
|
*/
|
|
|
|
static void packint(luaL_Buffer* b, unsigned long long n, int islittle, int size, int neg)
|
|
|
|
{
|
|
|
|
LUAU_ASSERT(size <= MAXINTSIZE);
|
|
|
|
char buff[MAXINTSIZE];
|
|
|
|
int i;
|
2022-08-04 23:35:33 +01:00
|
|
|
buff[islittle ? 0 : size - 1] = (char)(n & MC); // first byte
|
2021-10-29 21:25:12 +01:00
|
|
|
for (i = 1; i < size; i++)
|
|
|
|
{
|
|
|
|
n >>= NB;
|
|
|
|
buff[islittle ? i : size - 1 - i] = (char)(n & MC);
|
|
|
|
}
|
|
|
|
if (neg && size > SZINT)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // negative number need sign extension?
|
|
|
|
for (i = SZINT; i < size; i++) // correct extra bytes
|
2021-10-29 21:25:12 +01:00
|
|
|
buff[islittle ? i : size - 1 - i] = (char)MC;
|
|
|
|
}
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(b, buff, size, -1); // add result to buffer
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Copy 'size' bytes from 'src' to 'dest', correcting endianness if
|
|
|
|
** given 'islittle' is different from native endianness.
|
|
|
|
*/
|
|
|
|
static void copywithendian(volatile char* dest, volatile const char* src, int size, int islittle)
|
|
|
|
{
|
|
|
|
if (islittle == nativeendian.little)
|
|
|
|
{
|
|
|
|
while (size-- != 0)
|
|
|
|
*(dest++) = *(src++);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
dest += size - 1;
|
|
|
|
while (size-- != 0)
|
|
|
|
*(dest--) = *(src++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_pack(lua_State* L)
|
|
|
|
{
|
|
|
|
luaL_Buffer b;
|
|
|
|
Header h;
|
2022-08-04 23:35:33 +01:00
|
|
|
const char* fmt = luaL_checkstring(L, 1); // format string
|
|
|
|
int arg = 1; // current argument to pack
|
|
|
|
size_t totalsize = 0; // accumulate total size of result
|
2021-10-29 21:25:12 +01:00
|
|
|
initheader(L, &h);
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_pushnil(L); // mark to separate arguments from string buffer
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_buffinit(L, &b);
|
|
|
|
while (*fmt != '\0')
|
|
|
|
{
|
|
|
|
int size, ntoalign;
|
|
|
|
KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign);
|
|
|
|
totalsize += ntoalign + size;
|
|
|
|
while (ntoalign-- > 0)
|
2022-08-04 23:35:33 +01:00
|
|
|
luaL_addchar(&b, LUAL_PACKPADBYTE); // fill alignment
|
2021-10-29 21:25:12 +01:00
|
|
|
arg++;
|
|
|
|
switch (opt)
|
|
|
|
{
|
|
|
|
case Kint:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // signed integers
|
2021-10-29 21:25:12 +01:00
|
|
|
long long n = (long long)luaL_checknumber(L, arg);
|
|
|
|
if (size < SZINT)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // need overflow check?
|
2021-10-29 21:25:12 +01:00
|
|
|
long long lim = (long long)1 << ((size * NB) - 1);
|
|
|
|
luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow");
|
|
|
|
}
|
|
|
|
packint(&b, n, h.islittle, size, (n < 0));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kuint:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // unsigned integers
|
2022-01-14 16:20:09 +00:00
|
|
|
long long n = (long long)luaL_checknumber(L, arg);
|
2022-08-04 23:35:33 +01:00
|
|
|
if (size < SZINT) // need overflow check?
|
2022-01-14 16:20:09 +00:00
|
|
|
luaL_argcheck(L, (unsigned long long)n < ((unsigned long long)1 << (size * NB)), arg, "unsigned overflow");
|
|
|
|
packint(&b, (unsigned long long)n, h.islittle, size, 0);
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kfloat:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // floating-point options
|
2021-10-29 21:25:12 +01:00
|
|
|
volatile Ftypes u;
|
|
|
|
char buff[MAXINTSIZE];
|
2022-08-04 23:35:33 +01:00
|
|
|
double n = luaL_checknumber(L, arg); // get argument
|
2021-10-29 21:25:12 +01:00
|
|
|
if (size == sizeof(u.f))
|
2022-08-04 23:35:33 +01:00
|
|
|
u.f = (float)n; // copy it into 'u'
|
2021-10-29 21:25:12 +01:00
|
|
|
else if (size == sizeof(u.d))
|
|
|
|
u.d = (double)n;
|
|
|
|
else
|
|
|
|
u.n = n;
|
2022-08-04 23:35:33 +01:00
|
|
|
// move 'u' to final result, correcting endianness if needed
|
2021-10-29 21:25:12 +01:00
|
|
|
copywithendian(buff, u.buff, size, h.islittle);
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(&b, buff, size, -1);
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kchar:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // fixed-size string
|
2021-10-29 21:25:12 +01:00
|
|
|
size_t len;
|
|
|
|
const char* s = luaL_checklstring(L, arg, &len);
|
|
|
|
luaL_argcheck(L, len <= (size_t)size, arg, "string longer than given size");
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(&b, s, len, -1); // add string
|
2023-03-03 20:21:14 +00:00
|
|
|
while (len++ < (size_t)size) // pad extra space
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_addchar(&b, LUAL_PACKPADBYTE);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kstring:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // strings with length count
|
2021-10-29 21:25:12 +01:00
|
|
|
size_t len;
|
|
|
|
const char* s = luaL_checklstring(L, arg, &len);
|
|
|
|
luaL_argcheck(L, size >= (int)sizeof(size_t) || len < ((size_t)1 << (size * NB)), arg, "string length does not fit in given size");
|
2022-08-04 23:35:33 +01:00
|
|
|
packint(&b, len, h.islittle, size, 0); // pack length
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(&b, s, len, -1);
|
2021-10-29 21:25:12 +01:00
|
|
|
totalsize += len;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kzstr:
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // zero-terminated string
|
2021-10-29 21:25:12 +01:00
|
|
|
size_t len;
|
|
|
|
const char* s = luaL_checklstring(L, arg, &len);
|
|
|
|
luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros");
|
2023-01-13 22:10:01 +00:00
|
|
|
luaL_addlstring(&b, s, len, -1);
|
2022-08-04 23:35:33 +01:00
|
|
|
luaL_addchar(&b, '\0'); // add zero at the end
|
2021-10-29 21:25:12 +01:00
|
|
|
totalsize += len + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kpadding:
|
2022-08-04 23:35:33 +01:00
|
|
|
luaL_addchar(&b, LUAL_PACKPADBYTE); // FALLTHROUGH
|
2021-10-29 21:25:12 +01:00
|
|
|
case Kpaddalign:
|
|
|
|
case Knop:
|
2022-08-04 23:35:33 +01:00
|
|
|
arg--; // undo increment
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
luaL_pushresult(&b);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_packsize(lua_State* L)
|
|
|
|
{
|
|
|
|
Header h;
|
2022-08-04 23:35:33 +01:00
|
|
|
const char* fmt = luaL_checkstring(L, 1); // format string
|
|
|
|
int totalsize = 0; // accumulate total size of result
|
2021-10-29 21:25:12 +01:00
|
|
|
initheader(L, &h);
|
|
|
|
while (*fmt != '\0')
|
|
|
|
{
|
|
|
|
int size, ntoalign;
|
|
|
|
KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign);
|
|
|
|
luaL_argcheck(L, opt != Kstring && opt != Kzstr, 1, "variable-length format");
|
2022-08-04 23:35:33 +01:00
|
|
|
size += ntoalign; // total space used by option
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_argcheck(L, totalsize <= MAXSSIZE - size, 1, "format result too large");
|
|
|
|
totalsize += size;
|
|
|
|
}
|
|
|
|
lua_pushinteger(L, totalsize);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Unpack an integer with 'size' bytes and 'islittle' endianness.
|
|
|
|
** If size is smaller than the size of a Lua integer and integer
|
|
|
|
** is signed, must do sign extension (propagating the sign to the
|
|
|
|
** higher bits); if size is larger than the size of a Lua integer,
|
|
|
|
** it must check the unread bytes to see whether they do not cause an
|
|
|
|
** overflow.
|
|
|
|
*/
|
|
|
|
static long long unpackint(lua_State* L, const char* str, int islittle, int size, int issigned)
|
|
|
|
{
|
|
|
|
unsigned long long res = 0;
|
|
|
|
int i;
|
|
|
|
int limit = (size <= SZINT) ? size : SZINT;
|
|
|
|
for (i = limit - 1; i >= 0; i--)
|
|
|
|
{
|
|
|
|
res <<= NB;
|
|
|
|
res |= (unsigned char)str[islittle ? i : size - 1 - i];
|
|
|
|
}
|
|
|
|
if (size < SZINT)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // real size smaller than int?
|
2021-10-29 21:25:12 +01:00
|
|
|
if (issigned)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // needs sign extension?
|
2021-10-29 21:25:12 +01:00
|
|
|
unsigned long long mask = (unsigned long long)1 << (size * NB - 1);
|
2022-08-04 23:35:33 +01:00
|
|
|
res = ((res ^ mask) - mask); // do sign extension
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (size > SZINT)
|
2022-08-04 23:35:33 +01:00
|
|
|
{ // must check unread bytes
|
2021-10-29 21:25:12 +01:00
|
|
|
int mask = (!issigned || (long long)res >= 0) ? 0 : MC;
|
|
|
|
for (i = limit; i < size; i++)
|
|
|
|
{
|
|
|
|
if ((unsigned char)str[islittle ? i : size - 1 - i] != mask)
|
|
|
|
luaL_error(L, "%d-byte integer does not fit into Lua Integer", size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (long long)res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int str_unpack(lua_State* L)
|
|
|
|
{
|
|
|
|
Header h;
|
|
|
|
const char* fmt = luaL_checkstring(L, 1);
|
|
|
|
size_t ld;
|
|
|
|
const char* data = luaL_checklstring(L, 2, &ld);
|
|
|
|
int pos = posrelat(luaL_optinteger(L, 3, 1), ld) - 1;
|
|
|
|
if (pos < 0)
|
|
|
|
pos = 0;
|
2022-08-04 23:35:33 +01:00
|
|
|
int n = 0; // number of results
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_argcheck(L, size_t(pos) <= ld, 3, "initial position out of string");
|
|
|
|
initheader(L, &h);
|
|
|
|
while (*fmt != '\0')
|
|
|
|
{
|
|
|
|
int size, ntoalign;
|
|
|
|
KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign);
|
|
|
|
luaL_argcheck(L, (size_t)ntoalign + size <= ld - pos, 2, "data string too short");
|
2022-08-04 23:35:33 +01:00
|
|
|
pos += ntoalign; // skip alignment
|
|
|
|
// stack space for item + next position
|
2021-10-29 21:25:12 +01:00
|
|
|
luaL_checkstack(L, 2, "too many results");
|
|
|
|
n++;
|
|
|
|
switch (opt)
|
|
|
|
{
|
|
|
|
case Kint:
|
|
|
|
{
|
|
|
|
long long res = unpackint(L, data + pos, h.islittle, size, true);
|
|
|
|
lua_pushnumber(L, (double)res);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kuint:
|
|
|
|
{
|
|
|
|
unsigned long long res = unpackint(L, data + pos, h.islittle, size, false);
|
|
|
|
lua_pushnumber(L, (double)res);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kfloat:
|
|
|
|
{
|
|
|
|
volatile Ftypes u;
|
|
|
|
double num;
|
|
|
|
copywithendian(u.buff, data + pos, size, h.islittle);
|
|
|
|
if (size == sizeof(u.f))
|
|
|
|
num = (double)u.f;
|
|
|
|
else if (size == sizeof(u.d))
|
|
|
|
num = (double)u.d;
|
|
|
|
else
|
|
|
|
num = u.n;
|
|
|
|
lua_pushnumber(L, num);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kchar:
|
|
|
|
{
|
|
|
|
lua_pushlstring(L, data + pos, size);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kstring:
|
|
|
|
{
|
|
|
|
size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0);
|
|
|
|
luaL_argcheck(L, len <= ld - pos - size, 2, "data string too short");
|
|
|
|
lua_pushlstring(L, data + pos + size, len);
|
2022-08-04 23:35:33 +01:00
|
|
|
pos += (int)len; // skip string
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kzstr:
|
|
|
|
{
|
|
|
|
size_t len = strlen(data + pos);
|
|
|
|
luaL_argcheck(L, pos + len < ld, 2, "unfinished string for format 'z'");
|
|
|
|
lua_pushlstring(L, data + pos, len);
|
2022-08-04 23:35:33 +01:00
|
|
|
pos += (int)len + 1; // skip string plus final '\0'
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Kpaddalign:
|
|
|
|
case Kpadding:
|
|
|
|
case Knop:
|
2022-08-04 23:35:33 +01:00
|
|
|
n--; // undo increment
|
2021-10-29 21:25:12 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
pos += size;
|
|
|
|
}
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_pushinteger(L, pos + 1); // next position
|
2021-10-29 21:25:12 +01:00
|
|
|
return n + 1;
|
|
|
|
}
|
|
|
|
|
2022-08-04 23:35:33 +01:00
|
|
|
// }======================================================
|
2021-10-29 21:25:12 +01:00
|
|
|
|
|
|
|
static const luaL_Reg strlib[] = {
|
|
|
|
{"byte", str_byte},
|
|
|
|
{"char", str_char},
|
|
|
|
{"find", str_find},
|
|
|
|
{"format", str_format},
|
|
|
|
{"gmatch", gmatch},
|
|
|
|
{"gsub", str_gsub},
|
|
|
|
{"len", str_len},
|
|
|
|
{"lower", str_lower},
|
|
|
|
{"match", str_match},
|
|
|
|
{"rep", str_rep},
|
|
|
|
{"reverse", str_reverse},
|
|
|
|
{"sub", str_sub},
|
|
|
|
{"upper", str_upper},
|
|
|
|
{"split", str_split},
|
|
|
|
{"pack", str_pack},
|
|
|
|
{"packsize", str_packsize},
|
|
|
|
{"unpack", str_unpack},
|
|
|
|
{NULL, NULL},
|
|
|
|
};
|
|
|
|
|
|
|
|
static void createmetatable(lua_State* L)
|
|
|
|
{
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_createtable(L, 0, 1); // create metatable for strings
|
|
|
|
lua_pushliteral(L, ""); // dummy string
|
2021-10-29 21:25:12 +01:00
|
|
|
lua_pushvalue(L, -2);
|
2022-08-04 23:35:33 +01:00
|
|
|
lua_setmetatable(L, -2); // set string metatable
|
|
|
|
lua_pop(L, 1); // pop dummy string
|
|
|
|
lua_pushvalue(L, -2); // string library...
|
|
|
|
lua_setfield(L, -2, "__index"); // ...is the __index metamethod
|
|
|
|
lua_pop(L, 1); // pop metatable
|
2021-10-29 21:25:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Open string library
|
|
|
|
*/
|
2021-11-29 16:14:06 +00:00
|
|
|
int luaopen_string(lua_State* L)
|
2021-10-29 21:25:12 +01:00
|
|
|
{
|
|
|
|
luaL_register(L, LUA_STRLIBNAME, strlib);
|
|
|
|
createmetatable(L);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|