--[[
	PCRE2-based RegEx implemention for Luau
	Version 1.0.0a2
	BSD 2-Clause Licence
	Copyright © 2020 - Blockzez (devforum /u/Blockzez and github.com/Blockzez)
	All rights reserved.

	Redistribution and use in source and binary forms, with or without
	modification, are permitted provided that the following conditions are met:

	1. Redistributions of source code must retain the above copyright notice, this
	   list of conditions and the following disclaimer.

	2. Redistributions in binary form must reproduce the above copyright notice,
	   this list of conditions and the following disclaimer in the documentation
	   and/or other materials provided with the distribution.

	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
	FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
]]
--[[ Settings ]]--
-- You can change them here
local options = {
	-- The maximum cache size for regex so the patterns are cached so it doesn't recompile the pattern
	-- The only accepted value are number values >= 0, strings that can be automatically coered to numbers that are >= 0, false and nil
	-- Do note that empty regex patterns (comment-only patterns included) are never cached regardless
	-- The default is 256
	cacheSize = 256,

	-- A boolean that determines whether this use unicode data
	-- If this value evalulates to false, you can remove _unicodechar_category, _scripts and _xuc safely and it'll now error if:
	-- - You try to compile a RegEx with unicode flag
	-- - You try to use the \p pattern
	-- The default is true
	unicodeData = false,
};

--
local u_categories = options.unicodeData and require(script:WaitForChild("_unicodechar_category"));
local chr_scripts = options.unicodeData and require(script:WaitForChild("_scripts"));
local xuc_chr = options.unicodeData and require(script:WaitForChild("_xuc"));
local proxy = setmetatable({ }, { __mode = 'k' });
local re, re_m, match_m = { }, { }, { };
local lockmsg;

--[[ Functions ]]--
local function to_str_arr(self, init)
	if init then
		self = string.sub(self, utf8.offset(self, init));
	end;
	local len = utf8.len(self);
	if len <= 1999 then
		return { n = len, s = self, utf8.codepoint(self, 1, #self) };
	end;
	local clen = math.ceil(len / 1999);
	local ret = table.create(len);
	local p = 1;
	for i = 1, clen do
		local c = table.pack(utf8.codepoint(self, utf8.offset(self, i * 1999 - 1998), utf8.offset(self, i * 1999 - (i == clen and 1998 - ((len - 1) % 1999 + 1) or - 1)) - 1));
		table.move(c, 1, c.n, p, ret);
		p += c.n;
	end;
	ret.s, ret.n = self, len;
	return ret;
end;

local function from_str_arr(self)
	local len = self.n or #self;
	if len <= 7997 then
		return utf8.char(table.unpack(self));
	end;
	local clen = math.ceil(len / 7997);
	local r = table.create(clen);
	for i = 1, clen do
		r[i] = utf8.char(table.unpack(self, i * 7997 - 7996, i * 7997 - (i == clen and 7997 - ((len - 1) % 7997 + 1) or 0)));
	end;
	return table.concat(r);
end;

local function utf8_sub(self, i, j)
	j = utf8.offset(self, j);
	return string.sub(self, utf8.offset(self, i), j and j - 1);
end;

--
local flag_map = {
	a = 'anchored', i = 'caseless', m = 'multiline', s = 'dotall', u = 'unicode', U = 'ungreedy', x ='extended',
};

local posix_class_names = {
	alnum = true, alpha = true, ascii = true, blank = true, cntrl = true, digit = true, graph = true, lower = true, print = true, punct = true, space = true, upper = true, word = true, xdigit = true,
};

local escape_chars = {
	-- grouped
	-- digit, spaces and words
	[0x44] = { "class", "digit", true }, [0x53] = { "class", "space", true }, [0x57] = { "class", "word", true },
	[0x64] = { "class", "digit", false }, [0x73] = { "class", "space", false }, [0x77] = { "class", "word", false },
	-- horizontal/vertical whitespace and newline
	[0x48] = { "class", "blank", true }, [0x56] = { "class", "vertical_tab", true },
	[0x68] = { "class", "blank", false }, [0x76] = { "class", "vertical_tab", false },
	[0x4E] = { 0x4E }, [0x52] = { 0x52 },

	-- not grouped
	[0x42] = 0x08,
	[0x6E] = 0x0A, [0x72] = 0x0D, [0x74] = 0x09,
};

local b_escape_chars = {
	-- word boundary and not word boundary
	[0x62] = { 0x62, { "class", "word", false } }, [0x42] = { 0x42, { "class", "word", false } },

	-- keep match out
	[0x4B] = { 0x4B },

	-- start & end of string
	[0x47] = { 0x47 }, [0x4A] = { 0x4A }, [0x5A] = { 0x5A }, [0x7A] = { 0x7A },
};

local valid_categories = {
	C = true, Cc = true, Cf = true, Cn = true, Co = true, Cs = true,
	L = true, Ll = true, Lm = true, Lo = true, Lt = true, Lu = true,
	M = true, Mc = true, Me = true, Mn = true,
	N = true, Nd = true, Nl = true, No = true,
	P = true, Pc = true, Pd = true, Pe = true, Pf = true, Pi = true, Po = true, Ps = true,
	S = true, Sc = true, Sk = true, Sm = true, So = true,
	Z = true, Zl = true, Zp = true, Zs = true,

	Xan = true, Xps = true, Xsp = true, Xuc = true, Xwd = true,
};

local class_ascii_punct = {
	[0x21] = true, [0x22] = true, [0x23] = true, [0x24] = true, [0x25] = true, [0x26] = true, [0x27] = true, [0x28] = true, [0x29] = true, [0x2A] = true, [0x2B] = true, [0x2C] = true, [0x2D] = true, [0x2E] = true, [0x2F] = true,
	[0x3A] = true, [0x3B] = true, [0x3C] = true, [0x3D] = true, [0x3E] = true, [0x3F] = true, [0x40] = true, [0x5B] = true, [0x5C] = true, [0x5D] = true, [0x5E] = true, [0x5F] = true, [0x60] = true, [0x7B] = true, [0x7C] = true,
	[0x7D] = true, [0x7E] = true,
};

local end_str = { 0x24 };
local dot = { 0x2E };
local beginning_str = { 0x5E };
local alternation = { 0x7C };

local function check_re(re_type, name, func)
	if re_type == "Match" then
		return function(...)
			local arg_n = select('#', ...);
			if arg_n < 1 then
				error("missing argument #1 (Match expected)", 2);
			end;
			local arg0, arg1 = ...;
			if not (proxy[arg0] and proxy[arg0].name == "Match") then
				error(string.format("invalid argument #1 to %q (Match expected, got %s)", name, typeof(arg0)), 2);
			else
				arg0 = proxy[arg0];
			end;
			if name == "group" or name == "span" then
				if arg1 == nil then
					arg1 = 0;
				end;
			end;
			return func(arg0, arg1);
		end;
	end;
	return function(...)
		local arg_n = select('#', ...);
		if arg_n < 1 then
			error("missing argument #1 (RegEx expected)", 2);
		elseif arg_n < 2 then
			error("missing argument #2 (string expected)", 2);
		end;
		local arg0, arg1, arg2, arg3, arg4, arg5 = ...;
		if not (proxy[arg0] and proxy[arg0].name == "RegEx") then
			if type(arg0) ~= "string" and type(arg0) ~= "number" then
				error(string.format("invalid argument #1 to %q (RegEx expected, got %s)", name, typeof(arg0)), 2);
			end;
			arg0 = re.fromstring(arg0);
		elseif name == "sub" then
			if type(arg2) == "number" then
				arg2 ..= '';
			elseif type(arg2) ~= "string" then
				error(string.format("invalid argument #3 to 'sub' (string expected, got %s)", typeof(arg2)), 2);
			end;
		elseif type(arg1) == "number" then
			arg1 ..= '';
		elseif type(arg1) ~= "string" then
			error(string.format("invalid argument #2 to %q (string expected, got %s)", name, typeof(arg1)), 2);
		end;
		if name ~= "sub" and name ~= "split" then
			local init_type = typeof(arg2);
			if init_type ~= 'nil' then
				arg2 = tonumber(arg2);
				if not arg2 then
					error(string.format("invalid argument #3 to %q (number expected, got %s)", name, init_type), 2);
				elseif arg2 < 0 then
					arg2 = #arg1 + math.floor(arg2 + 0.5) + 1;
				else
					arg2 = math.max(math.floor(arg2 + 0.5), 1);
				end;
			end;
		end;
		arg0 = proxy[arg0];
		if name == "match" or name == "matchiter" then
			arg3 = ...;
		elseif name == "sub" then
			arg5 = ...;
		end;
		return func(arg0, arg1, arg2, arg3, arg4, arg5);
	end;
end;

--[[ Matches ]]--
local function match_tostr(self)
	local spans = proxy[self].spans;
	local s_start, s_end = spans[0][1], spans[0][2];
	if s_end <= s_start then
		return string.format("Match (%d..%d, empty)", s_start, s_end - 1);
	end;
	return string.format("Match (%d..%d): %s", s_start, s_end - 1, utf8_sub(spans.input, s_start, s_end));
end;

local function new_match(span_arr, group_id, re, str)
	span_arr.source, span_arr.input = re, str;
	local object = newproxy(true);
	local object_mt = getmetatable(object);
	object_mt.__metatable = lockmsg;
	object_mt.__index = setmetatable(span_arr, match_m);
	object_mt.__tostring = match_tostr;

	proxy[object] = { name = "Match", spans = span_arr, group_id = group_id };
	return object;
end;

match_m.group = check_re('Match', 'group', function(self, group_id)
	local span = self.spans[type(group_id) == "number" and group_id or self.group_id[group_id]];
	if not span then
		return nil;
	end;
	return utf8_sub(self.spans.input, span[1], span[2]);
end);

match_m.span = check_re('Match', 'span', function(self, group_id)
	local span = self.spans[type(group_id) == "number" and group_id or self.group_id[group_id]];
	if not span then
		return nil;
	end;
	return span[1], span[2] - 1;
end);

match_m.groups = check_re('Match', 'groups', function(self)
	local spans = self.spans;
	if spans.n > 0 then
		local ret = table.create(spans.n);
		for i = 0, spans.n do
			local v = spans[i];
			if v then
				ret[i] = utf8_sub(spans.input, v[1], v[2]);
			end;
		end;
		return table.unpack(ret, 1, spans.n);
	end;
	return utf8_sub(spans.input, spans[0][1], spans[0][2]);
end);

match_m.groupdict = check_re('Match', 'groupdict', function(self)
	local spans = self.spans;
	local ret = { };
	for k, v in pairs(self.group_id) do
		v = spans[v];
		if v then
			ret[k] = utf8_sub(spans.input, v[1], v[2]);
		end;
	end;
	return ret;
end);

match_m.grouparr = check_re('Match', 'groupdict', function(self)
	local spans = self.spans;
	local ret = table.create(spans.n);
	for i = 0, spans.n do
		local v = spans[i];
		if v then
			ret[i] = utf8_sub(spans.input, v[1], v[2]);
		end;
	end;
	ret.n = spans.n;
	return ret;
end);

--
local line_verbs = {
	CR = 0, LF = 1, CRLF = 2, ANYRLF = 3, ANY = 4, NUL = 5,
};
local function is_newline(str_arr, i, verb_flags)
	local line_verb_n = verb_flags.newline;
	local chr = str_arr[i];
	if line_verb_n == 0 then
		-- carriage return
		return chr == 0x0D;
	elseif line_verb_n == 2 then
		-- carriage return followed by line feed
		return chr == 0x0A and str_arr[i - 1] == 0x20;
	elseif line_verb_n == 3 then
		-- any of the above
		return chr == 0x0A or chr == 0x0D;
	elseif line_verb_n == 4 then
		-- any of Unicode newlines
		return chr == 0x0A or chr == 0x0B or chr == 0x0C or chr == 0x0D or chr == 0x85 or chr == 0x2028 or chr == 0x2029;
	elseif line_verb_n == 5 then
		-- null
		return chr == 0;
	end;
	-- linefeed
	return chr == 0x0A;
end;


local function tkn_char_match(tkn_part, str_arr, i, flags, verb_flags)
	local chr = str_arr[i];
	if not chr then
		return false;
	elseif flags.ignoreCase and chr >= 0x61 and chr <= 0x7A then
		chr -= 0x20;
	end;
	if type(tkn_part) == "number" then
		return tkn_part == chr;
	elseif tkn_part[1] == "charset" then
		for _, v in ipairs(tkn_part[3]) do
			if tkn_char_match(v, str_arr, i, flags, verb_flags) then
				return not tkn_part[2];
			end;
		end;
		return tkn_part[2];
	elseif tkn_part[1] == "range" then
		return chr >= tkn_part[2] and chr <= tkn_part[3] or flags.ignoreCase and chr >= 0x41 and chr <= 0x5A and (chr + 0x20) >= tkn_part[2] and (chr + 0x20) <= tkn_part[3];
	elseif tkn_part[1] == "class" then
		local char_class = tkn_part[2];
		local negate = tkn_part[3];
		local match = false;
		-- if and elseifs :(
		-- Might make these into tables in the future
		if char_class == "xdigit" then
			match = chr >= 0x30 and chr <= 0x39 or chr >= 0x41 and chr <= 0x46 or chr >= 0x61 and chr <= 0x66;
		elseif char_class == "ascii" then
			match = chr <= 0x7F;
		-- cannot be accessed through POSIX classes
		elseif char_class == "vertical_tab" then
			match = chr >= 0x0A and chr <= 0x0D or chr == 0x2028 or chr == 0x2029;
		--
		elseif flags.unicode then
			local current_category = u_categories[chr] or 'Cn';
			local first_category = current_category:sub(1, 1);
			if char_class == "alnum" then
				match = first_category == 'L' or current_category == 'Nl' or current_category == 'Nd';
			elseif char_class == "alpha" then
				match = first_category == 'L' or current_category == 'Nl';
			elseif char_class == "blank" then
				match = current_category == 'Zs' or chr == 0x09;
			elseif char_class == "cntrl" then
				match = current_category == 'Cc';
			elseif char_class == "digit" then
				match = current_category == 'Nd';
			elseif char_class == "graph" then
				match = first_category ~= 'P' and first_category ~= 'C';
			elseif char_class == "lower" then
				match = current_category == 'Ll';
			elseif char_class == "print" then
				match = first_category ~= 'C';
			elseif char_class == "punct" then
				match = first_category == 'P';
			elseif char_class == "space" then
				match = first_category == 'Z' or chr >= 0x09 and chr <= 0x0D;
			elseif char_class == "upper" then
				match = current_category == 'Lu';
			elseif char_class == "word" then
				match = first_category == 'L' or current_category == 'Nl' or current_category == 'Nd' or current_category == 'Pc';
			end;
		elseif char_class == "alnum" then
			match = chr >= 0x30 and chr <= 0x39 or chr >= 0x41 and chr <= 0x5A or chr >= 0x61 and chr <= 0x7A;
		elseif char_class == "alpha" then
			match = chr >= 0x41 and chr <= 0x5A or chr >= 0x61 and chr <= 0x7A;
		elseif char_class == "blank" then
			match = chr == 0x09 or chr == 0x20;
		elseif char_class == "cntrl" then
			match = chr <= 0x1F or chr == 0x7F;
		elseif char_class == "digit" then
			match = chr >= 0x30 and chr <= 0x39;
		elseif char_class == "graph" then
			match = chr >= 0x21 and chr <= 0x7E;
		elseif char_class == "lower" then
			match = chr >= 0x61 and chr <= 0x7A;
		elseif char_class == "print" then
			match = chr >= 0x20 and chr <= 0x7E;
		elseif char_class == "punct" then
			match = class_ascii_punct[chr];
		elseif char_class == "space" then
			match = chr >= 0x09 and chr <= 0x0D or chr == 0x20;
		elseif char_class == "upper" then
			match = chr >= 0x41 and chr <= 0x5A;
		elseif char_class == "word" then
			match = chr >= 0x30 and chr <= 0x39 or chr >= 0x41 and chr <= 0x5A or chr >= 0x61 and chr <= 0x7A or chr == 0x5F;
		end;
		if negate then
			return not match;
		end;
		return match;
	elseif tkn_part[1] == "category" then
		local chr_category = u_categories[chr] or 'Cn';
		local category_v = tkn_part[3];
		local category_len = #category_v;
		if category_len == 3 then
			local match = false;
			if category_v == "Xan" or category_v == "Xwd" then
				match = chr_category:find("^[LN]") or category_v == "Xwd" and chr == 0x5F;
			elseif category_v == "Xps" or category_v == "Xsp" then
				match = chr_category:sub(1, 1) == 'Z' or chr >= 0x09 and chr <= 0x0D;
			elseif category_v == "Xuc" then
				match = tkn_char_match(xuc_chr, str_arr, i, flags, verb_flags);
			end;
			if tkn_part[2] then
				return not match;
			end
			return match;
		elseif chr_category:sub(1, category_len) == category_v then
			return not tkn_part[2];
		end;
		return tkn_part[2];
	elseif tkn_part[1] == 0x2E then
		return flags.dotAll or not is_newline(str_arr, i, verb_flags);
	elseif tkn_part[1] == 0x4E then
		return not is_newline(str_arr, i, verb_flags);
	elseif tkn_part[1] == 0x52 then
		if verb_flags.newline_seq == 0 then
			-- CR, LF or CRLF
			return chr == 0x0A or chr == 0x0D;
		end;
		-- any unicode newline
		return chr == 0x0A or chr == 0x0B or chr == 0x0C or chr == 0x0D or chr == 0x85 or chr == 0x2028 or chr == 0x2029;
	end;
	return false;
end;

local function find_alternation(token, i, count)
	while true do
		local v = token[i];
		local is_table = type(v) == "table";
		if v == alternation then
			return i, count;
		elseif is_table and v[1] == 0x28 then
			if count then
				count += v.count;
			end;
			i = v[3];
		elseif is_table and v[1] == "quantifier" and type(v[5]) == "table" and v[5][1] == 0x28 then
			if count then
				count += v[5].count;
			end;
			i = v[5][3];
		elseif not v or is_table and v[1] == 0x29 then
			return nil, count;
		elseif count then
			if is_table and v[1] == "quantifier" then
				count += v[3];
			else
				count += 1;
			end;
		end;
		i += 1;
	end;
end;

local function re_rawfind(token, str_arr, init, flags, verb_flags, as_bool)
	local tkn_i, str_i, start_i = 0, init, init;
	local states = { };
	while tkn_i do
		if tkn_i == 0 then
			tkn_i += 1;
			local next_alt = find_alternation(token, tkn_i);
			if next_alt then
				table.insert(states, 1, { "alternation", next_alt, str_i });
			end;
			continue;
		end;
		local ctkn = token[tkn_i];
		local tkn_type = type(ctkn) == "table" and ctkn[1];
		if not ctkn then
			break;
		elseif ctkn == "ACCEPT" then
			local not_lookaround = true;
			local close_i = tkn_i;
			repeat
				close_i += 1;
				local is_table = type(token[close_i]) == "table";
				local close_i_tkn = token[close_i];
				if is_table and (close_i_tkn[1] == 0x28 or close_i_tkn[1] == "quantifier" and type(close_i_tkn[5]) == "table" and close_i_tkn[5][1] == 0x28) then
					close_i = close_i_tkn[1] == "quantifier" and close_i_tkn[5][3] or close_i_tkn[3];
				elseif is_table and close_i_tkn[1] == 0x29 and (close_i_tkn[4] == 0x21 or close_i_tkn[4] == 0x3D) then
					not_lookaround = false;
					tkn_i = close_i;
					break;
				end;
			until not close_i_tkn;
			if not_lookaround then
				break;
			end;
		elseif ctkn == "PRUNE" or ctkn == "SKIP" then
			table.insert(states, 1, { ctkn, str_i });
			tkn_i += 1;
		elseif tkn_type == 0x28 then
			table.insert(states, 1, { "group", tkn_i, str_i, nil, ctkn[2], ctkn[3], ctkn[4] });
			tkn_i += 1;
			local next_alt, count = find_alternation(token, tkn_i, (ctkn[4] == 0x21 or ctkn[4] == 0x3D) and ctkn[5] and 0);
			if next_alt then
				table.insert(states, 1, { "alternation", next_alt, str_i });
			end;
			if count then
				str_i -= count;
			end;
		elseif tkn_type == 0x29 and ctkn[4] ~= 0x21 then
			if ctkn[4] == 0x21 or ctkn[4] == 0x3D then
				while true do
					local selected_match_start;
					local selected_state = table.remove(states, 1);
					if selected_state[1] == "group" and selected_state[2] == ctkn[3] then
						if (ctkn[4] == 0x21 or ctkn[4] == 0x3D) and not ctkn[5] then
							str_i = selected_state[3];
						end;
						if selected_match_start then
							table.insert(states, 1, selected_match_start);
						end;
						break;
					elseif selected_state[1] == "matchStart" and not selected_match_start and ctkn[4] == 0x3D then
						selected_match_start = selected_state;
					end;
				end;
			elseif ctkn[4] == 0x3E then
				repeat
					local selected_state = table.remove(states, 1);
				until not selected_state or selected_state[1] == "group" and selected_state[2] == ctkn[3];
			else
				for i, v in ipairs(states) do
					if v[1] == "group" and v[2] == ctkn[3] then
						if v.jmp then
							-- recursive match
							tkn_i = v.jmp;
						end;
						v[4] = str_i;
						if v[7] == "quantifier" and v[10] + 1 < v[9] then
							if token[ctkn[3]][4] ~= "lazy" or v[10] + 1 < v[8] then
								tkn_i = ctkn[3];
							end;
							local ctkn1 = token[ctkn[3]];
							local new_group = { "group", v[2], str_i, nil, ctkn1[5][2], ctkn1[5][3], "quantifier", ctkn1[2], ctkn1[3], v[10] + 1, v[11], ctkn1[4] };
							table.insert(states, 1, new_group);
							if v[11] then
								table.insert(states, 1, { "alternation", v[11], str_i });
							end;
						end;
						break;
					end;
				end;
			end;
			tkn_i += 1;
		elseif tkn_type == 0x4B then
			table.insert(states, 1, { "matchStart", str_i });
			tkn_i += 1;
		elseif tkn_type == 0x7C then
			local close_i = tkn_i;
			repeat
				close_i += 1;
				local is_table = type(token[close_i]) == "table";
				local close_i_tkn = token[close_i];
				if is_table and (close_i_tkn[1] == 0x28 or close_i_tkn[1] == "quantifier" and type(close_i_tkn[5]) == "table" and close_i_tkn[5][1] == 0x28) then
					close_i = close_i_tkn[1] == "quantifier" and close_i_tkn[5][3] or close_i_tkn[3];
				end;
			until is_table and close_i_tkn[1] == 0x29 or not close_i_tkn;
			if token[close_i] then
				for _, v in ipairs(states) do
					if v[1] == "group" and v[6] == close_i then
						tkn_i = v[6];
						break;
					end;
				end;
			else
				tkn_i = close_i;
			end;
		elseif tkn_type == "recurmatch" then
			table.insert(states, 1, { "group", ctkn[3], str_i, nil, nil, token[ctkn[3]][3], nil, jmp = tkn_i });
			tkn_i = ctkn[3] + 1;
			local next_alt, count = find_alternation(token, tkn_i);
			if next_alt then
				table.insert(states, 1, { "alternation", next_alt, str_i });
			end;
		else
			local match;
			if ctkn == "FAIL" then
				match = false;
			elseif tkn_type == 0x29 then
				repeat
					local selected_state = table.remove(states, 1);
				until selected_state[1] == "group" and selected_state[2] == ctkn[3];
			elseif tkn_type == "quantifier" then
				if type(ctkn[5]) == "table" and ctkn[5][1] == 0x28 then
					local next_alt = find_alternation(token, tkn_i + 1);
					if next_alt then
						table.insert(states, 1, { "alternation", next_alt, str_i });
					end;
					table.insert(states, next_alt and 2 or 1, { "group", tkn_i, str_i, nil, ctkn[5][2], ctkn[5][3], "quantifier", ctkn[2], ctkn[3], 0, next_alt, ctkn[4] });
					if ctkn[4] == "lazy" and ctkn[2] == 0 then
						tkn_i = ctkn[5][3];
					end;
					match = true;
				else
					local start_i, end_i;
					local pattern_count = 1;
					local is_backref = type(ctkn[5]) == "table" and ctkn[5][1] == "backref";
					if is_backref then
						pattern_count = 0;
						local group_n = ctkn[5][2];
						for _, v in ipairs(states) do
							if v[1] == "group" and v[5] == group_n then
								start_i, end_i = v[3], v[4];
								pattern_count = end_i - start_i;
								break;
							end;
						end;
					end;
					local min_max_i = str_i + ctkn[2] * pattern_count;
					local mcount = 0;
					while mcount < ctkn[3] do
						if is_backref then
							if start_i and end_i then
								local org_i = str_i;
								if utf8_sub(str_arr.s, start_i, end_i) ~= utf8_sub(str_arr.s, org_i, str_i + pattern_count) then
									break;
								end;
							else
								break;
							end;
						elseif not tkn_char_match(ctkn[5], str_arr, str_i, flags, verb_flags) then
							break;
						end;
						str_i += pattern_count;
						mcount += 1;
					end;
					match = mcount >= ctkn[2];
					if match and ctkn[4] ~= "possessive" then
						if ctkn[4] == "lazy" then
							min_max_i, str_i = str_i, min_max_i;
						end;
						table.insert(states, 1, { "quantifier", tkn_i, str_i, math.min(min_max_i, str_arr.n + 1), (ctkn[4] == "lazy" and 1 or -1) * pattern_count });
					end;
				end;
			elseif tkn_type == "backref" then
				local start_i, end_i;
				local group_n = ctkn[2];
				for _, v in ipairs(states) do
					if v[1] == "group" and v[5] == group_n then
						start_i, end_i = v[3], v[4];
						break;
					end;
				end;
				if start_i and end_i then
					local org_i = str_i;
					str_i += end_i - start_i;
					match = utf8_sub(str_arr.s, start_i, end_i) == utf8_sub(str_arr.s, org_i, str_i);
				end;
			else
				local chr = str_arr[str_i];
				if tkn_type == 0x24 or tkn_type == 0x5A or tkn_type == 0x7A then
					match = str_i == str_arr.n + 1 or tkn_type == 0x24 and flags.multiline and is_newline(str_arr, str_i + 1, verb_flags) or tkn_type == 0x5A and str_i == str_arr.n and is_newline(str_arr, str_i, verb_flags);
				elseif tkn_type == 0x5E or tkn_type == 0x41 or tkn_type == 0x47 then
					match = str_i == 1 or tkn_type == 0x5E and flags.multiline and is_newline(str_arr, str_i - 1, verb_flags) or tkn_type == 0x47 and str_i == init;
				elseif tkn_type == 0x42 or tkn_type == 0x62 then
					local start_m = str_i == 1 or flags.multiline and is_newline(str_arr, str_i - 1, verb_flags);
					local end_m = str_i == str_arr.n + 1 or flags.multiline and is_newline(str_arr, str_i, verb_flags);
					local w_m = tkn_char_match(ctkn[2], str_arr[str_i - 1], flags) and 0 or tkn_char_match(ctkn[2], chr, flags) and 1;
					if w_m == 0 then
						match = end_m or not tkn_char_match(ctkn[2], chr, flags);
					elseif w_m then
						match = start_m or not tkn_char_match(ctkn[2], str_arr[str_i - 1], flags);
					end;
					if tkn_type == 0x42 then
						match = not match;
					end;
				else
					match = tkn_char_match(ctkn, str_arr, str_i, flags, verb_flags);
					str_i += 1;
				end;
			end;
			if not match then
				while true do
					local prev_type, prev_state = states[1] and states[1][1], states[1];
					if not prev_type or prev_type == "PRUNE" or prev_type == "SKIP" then
						if prev_type then
							table.clear(states);
						end;
						if start_i > str_arr.n then
							if as_bool then
								return false;
							end;
							return nil;
						end;
						start_i = prev_type == "SKIP" and prev_state[2] or start_i + 1;
						tkn_i, str_i = 0, start_i;
						break;
					elseif prev_type == "alternation" then
						tkn_i, str_i = prev_state[2], prev_state[3];
						local next_alt, count = find_alternation(token, tkn_i + 1);
						if next_alt then
							prev_state[2] = next_alt;
						else
							table.remove(states, 1);
						end;
						if count then
							str_i -= count;
						end;
						break;
					elseif prev_type == "group" then
						if prev_state[7] == "quantifier" then
							if prev_state[12] == "greedy" and prev_state[10] >= prev_state[8]
								or prev_state[12] == "lazy" and prev_state[10] < prev_state[9] and not prev_state[13] then
								tkn_i, str_i = prev_state[12] == "greedy" and prev_state[6] or prev_state[2], prev_state[3];
								if prev_state[12] == "greedy" then
									table.remove(states, 1);
									break;
								elseif prev_state[10] >= prev_state[8] then
									prev_state[13] = true;
									break;
								end;
							end;
						elseif prev_state[7] == 0x21 then
							table.remove(states, 1);
							tkn_i, str_i = prev_state[6], prev_state[3];
							break;
						end;
					elseif prev_type == "quantifier" then
						if math.sign(prev_state[4] - prev_state[3]) == math.sign(prev_state[5]) then
							prev_state[3] += prev_state[5];
							tkn_i, str_i = prev_state[2], prev_state[3];
							break;
						end;
					end;
					-- keep match out state and recursive state, can be safely removed
					-- prevents infinite loop
					table.remove(states, 1);
				end;
			end;
			tkn_i += 1;
		end;
	end;
	if as_bool then
		return true;
	end;
	local match_start_ran = false;
	local span = table.create(token.group_n);
	span[0], span.n = { start_i, str_i }, token.group_n;
	for _, v in ipairs(states) do
		if v[1] == "matchStart" and not match_start_ran then
			span[0][1], match_start_ran = v[2], true;
		elseif v[1] == "group" and v[5] and not span[v[5]] then
			span[v[5]] = { v[3], v[4] };
		end;
	end;
	return span;
end;

--[[ Methods ]]--
re_m.test = check_re('RegEx', 'test', function(self, str, init)
	return re_rawfind(self.token, to_str_arr(str, init), 1, self.flags, self.verb_flags, true);
end);

re_m.match = check_re('RegEx', 'match', function(self, str, init, source)
	local span = re_rawfind(self.token, to_str_arr(str, init), 1, self.flags, self.verb_flags, false);
	if not span then
		return nil;
	end;
	return new_match(span, self.group_id, source, str);
end);

re_m.matchall = check_re('RegEx', 'matchall', function(self, str, init, source)
	str = to_str_arr(str, init);
	local i = 1;
	return function()
		local span = i <= str.n + 1 and re_rawfind(self.token, str, i, self.flags, self.verb_flags, false);
		if not span then
			return nil;
		end;
		i = span[0][2] + (span[0][1] >= span[0][2] and 1 or 0);
		return new_match(span, self.group_id, source, str.s);
	end;
end);

local function insert_tokenized_sub(repl_r, str, span, tkn)
	for _, v in ipairs(tkn) do
		if type(v) == "table" then
			if v[1] == "condition" then
				if span[v[2]] then
					if v[3] then
						insert_tokenized_sub(repl_r, str, span, v[3]);
					else
						table.move(str, span[v[2]][1], span[v[2]][2] - 1, #repl_r + 1, repl_r);
					end;
				elseif v[4] then
					insert_tokenized_sub(repl_r, str, span, v[4]);
				end;
			else
				table.move(v, 1, #v, #repl_r + 1, repl_r);
			end;
		elseif span[v] then
			table.move(str, span[v][1], span[v][2] - 1, #repl_r + 1, repl_r);
		end;
	end;
	repl_r.n = #repl_r;
	return repl_r;
end;

re_m.sub = check_re('RegEx', 'sub', function(self, repl, str, n, repl_flag_str, source)
	if repl_flag_str ~= nil and type(repl_flag_str) ~= "number" and type(repl_flag_str) ~= "string" then
		error(string.format("invalid argument #5 to 'sub' (string expected, got %s)", typeof(repl_flag_str)), 3);
	end
	local repl_flags = {
		l = false, o = false, u = false,
	};
	for f in string.gmatch(repl_flag_str or '', utf8.charpattern) do
		if repl_flags[f] ~= false then
			error("invalid regular expression substitution flag " .. f, 3);
		end;
		repl_flags[f] = true;
	end;
	local repl_type = type(repl);
	if repl_type == "number" then
		repl ..= '';
	elseif repl_type ~= "string" and repl_type ~= "function" and (not repl_flags.o or repl_type ~= "table") then
		error(string.format("invalid argument #2 to 'sub' (string/function%s expected, got %s)", repl_flags.o and "/table" or '', typeof(repl)), 3);
	end;
	if tonumber(n) then
		n = tonumber(n);
		if n <= -1 or n ~= n then
			n = math.huge;
		end;
	elseif n ~= nil then
		error(string.format("invalid argument #4 to 'sub' (number expected, got %s)", typeof(n)), 3);
	else
		n = math.huge;
	end;
	if n < 1 then
		return str, 0;
	end;
	local min_repl_n = 0;
	if repl_type == "string" then
		repl = to_str_arr(repl);
		if not repl_flags.l then
			local i1 = 0;
			local repl_r = table.create(3);
			local group_n = self.token.group_n;
			local conditional_c = { };
			while i1 < repl.n do
				local i2 = i1;
				repeat
					i2 += 1;
				until not repl[i2] or repl[i2] == 0x24 or repl[i2] == 0x5C or (repl[i2] == 0x3A or repl[i2] == 0x7D) and conditional_c[1];
				min_repl_n += i2 - i1 - 1;
				if i2 - i1 > 1 then
					table.insert(repl_r, table.move(repl, i1 + 1, i2 - 1, 1, table.create(i2 - i1 - 1)));
				end;
				if repl[i2] == 0x3A then
					local current_conditional_c = conditional_c[1];
					if current_conditional_c[2] then
						error("malformed substitution pattern", 3);
					end;
					current_conditional_c[2] = table.move(repl_r, current_conditional_c[3], #repl_r, 1, table.create(#repl_r + 1 - current_conditional_c[3]));
					for i3 = #repl_r, current_conditional_c[3], -1 do
						repl_r[i3] = nil;
					end;
				elseif repl[i2] == 0x7D then
					local current_conditional_c = table.remove(conditional_c, 1);
					local second_c = table.move(repl_r, current_conditional_c[3], #repl_r, 1, table.create(#repl_r + 1 - current_conditional_c[3]));
					for i3 = #repl_r, current_conditional_c[3], -1 do
						repl_r[i3] = nil;
					end;
					table.insert(repl_r, { "condition", current_conditional_c[1], current_conditional_c[2] ~= true and (current_conditional_c[2] or second_c), current_conditional_c[2] and second_c });
				elseif repl[i2] then
					i2 += 1;
					local subst_c = repl[i2];
					if not subst_c then
						if repl[i2 - 1] == 0x5C then
							error("replacement string must not end with a trailing backslash", 3);
						end;
						local prev_repl_f = repl_r[#repl_r];
						if type(prev_repl_f) == "table" then
							table.insert(prev_repl_f, repl[i2 - 1]);
						else
							table.insert(repl_r, { repl[i2 - 1] });
						end;
					elseif subst_c == 0x5C and repl[i2 - 1] == 0x24 then
						local prev_repl_f = repl_r[#repl_r];
						if type(prev_repl_f) == "table" then
							table.insert(prev_repl_f, 0x24);
						else
							table.insert(repl_r, { 0x24 });
						end;
						i2 -= 1;
						min_repl_n += 1;
					elseif subst_c == 0x30 then
						table.insert(repl_r, 0);
					elseif subst_c > 0x30 and subst_c <= 0x39 then
						local start_i2 = i2;
						local group_i = subst_c - 0x30;
						while repl[i2 + 1] and repl[i2 + 1] >= 0x30 and repl[i2 + 1] <= 0x39 do
							group_i ..= repl[i2 + 1] - 0x30;
							i2 += 1;
						end;
						group_i = tonumber(group_i);
						if not repl_flags.u and group_i > group_n then
							error("reference to non-existent subpattern", 3);
						end;
						table.insert(repl_r, group_i);
					elseif subst_c == 0x7B and repl[i2 - 1] == 0x24 then
						i2 += 1;
						local start_i2 = i2;
						while repl[i2] and
							(repl[i2] >= 0x30 and repl[i2] <= 0x39
								or repl[i2] >= 0x41 and repl[i2] <= 0x5A
								or repl[i2] >= 0x61 and repl[i2] <= 0x7A
								or repl[i2] == 0x5F) do
							i2 += 1;
						end;
						if (repl[i2] == 0x7D or repl[i2] == 0x3A and (repl[i2 + 1] == 0x2B or repl[i2 + 1] == 0x2D)) and i2 ~= start_i2 then
							local group_k = utf8_sub(repl.s, start_i2, i2);
							if repl[start_i2] >= 0x30 and repl[start_i2] <= 0x39 then
								group_k = tonumber(group_k);
								if not repl_flags.u and group_k > group_n then
									error("reference to non-existent subpattern", 3);
								end;
							else
								group_k = self.group_id[group_k];
								if not repl_flags.u and (not group_k or group_k > group_n) then
									error("reference to non-existent subpattern", 3);
								end;
							end;
							if repl[i2] == 0x3A then
								i2 += 1;
								table.insert(conditional_c, { group_k, repl[i2] == 0x2D, #repl_r + 1 });
							else
								table.insert(repl_r, group_k);
							end;
						else
							error("malformed substitution pattern", 3);
						end;
					else
						local c_escape_char;
						if repl[i2 - 1] == 0x24 then
							if subst_c ~= 0x24 then
								local prev_repl_f = repl_r[#repl_r];
								if type(prev_repl_f) == "table" then
									table.insert(prev_repl_f, 0x24);
								else
									table.insert(repl_r, { 0x24 });
								end;
							end;
						else
							c_escape_char = escape_chars[repl[i2]];
							if type(c_escape_char) ~= "number" then
								c_escape_char = nil;
							end;
						end;
						local prev_repl_f = repl_r[#repl_r];
						if type(prev_repl_f) == "table" then
							table.insert(prev_repl_f, c_escape_char or repl[i2]);
						else
							table.insert(repl_r, { c_escape_char or repl[i2] });
						end;
						min_repl_n += 1;
					end;
				end;
				i1 = i2;
			end;
			if conditional_c[1] then
				error("malformed substitution pattern", 3);
			end;
			if not repl_r[2] and type(repl_r[1]) == "table" and repl_r[1][1] ~= "condition" then
				repl, repl.n = repl_r[1], #repl_r[1];
			else
				repl, repl_type = repl_r, "subst_string";
			end;
		end;
	end;
	str = to_str_arr(str);
	local incr, i0, count = 0, 1, 0;
	while i0 <= str.n + incr + 1 do
		local span = re_rawfind(self.token, str, i0, self.flags, self.verb_flags, false);
		if not span then
			break;
		end;
		local repl_r;
		if repl_type == "string" then
			repl_r = repl;
		elseif repl_type == "subst_string" then
			repl_r = insert_tokenized_sub(table.create(min_repl_n), str, span, repl);
		else
			local re_match;
			local repl_c;
			if repl_type == "table" then
				re_match = utf8_sub(str.s, span[0][1], span[0][2]);
				repl_c = repl[re_match];
			else
				re_match = new_match(span, self.group_id, source, str.s);
				repl_c = repl(re_match);
			end;
			if repl_c == re_match or repl_flags.o and not repl_c then
				local repl_n = span[0][2] - span[0][1];
				repl_r = table.move(str, span[0][1], span[0][2] - 1, 1, table.create(repl_n));
				repl_r.n = repl_n;
			elseif type(repl_c) == "string" then
				repl_r = to_str_arr(repl_c);
			elseif type(repl_c) == "number" then
				repl_r = to_str_arr(repl_c .. '');
			elseif repl_flags.o then
				error(string.format("invalid replacement value (a %s)", type(repl_c)), 3);
			else
				repl_r = { n = 0 };
			end;
		end;
		local match_len = span[0][2] - span[0][1];
		local repl_len = math.min(repl_r.n, match_len);
		for i1 = 0, repl_len - 1 do
			str[span[0][1] + i1] = repl_r[i1 + 1];
		end;
		local i1 = span[0][1] + repl_len;
		i0 = span[0][2];
		if match_len > repl_r.n then
			for i2 = 1, match_len - repl_r.n do
				table.remove(str, i1);
				incr -= 1;
				i0 -= 1;
			end;
		elseif repl_r.n > match_len then
			for i2 = 1, repl_r.n - match_len do
				table.insert(str, i1 + i2 - 1, repl_r[repl_len + i2]);
				incr += 1;
				i0 += 1;
			end;
		end;
		if match_len <= 0 then
			i0 += 1;
		end;
		count += 1;
		if n < count + 1 then
			break;
		end;
	end;
	return from_str_arr(str), count;
end);

re_m.split = check_re('RegEx', 'split', function(self, str, n)
	if tonumber(n) then
		n = tonumber(n);
		if n <= -1 or n ~= n then
			n = math.huge;
		end;
	elseif n ~= nil then
		error(string.format("invalid argument #3 to 'split' (number expected, got %s)", typeof(n)), 3);
	else
		n = math.huge;
	end;
	str = to_str_arr(str);
	local i, count = 1, 0;
	local ret = { };
	local prev_empty = 0;
	while i <= str.n + 1 do
		count += 1;
		local span = n >= count and re_rawfind(self.token, str, i, self.flags, self.verb_flags, false);
		if not span then
			break;
		end;
		table.insert(ret, utf8_sub(str.s, i - prev_empty, span[0][1]));
		prev_empty = span[0][1] >= span[0][2] and 1 or 0;
		i = span[0][2] + prev_empty;
	end;
	table.insert(ret, string.sub(str.s, utf8.offset(str.s, i - prev_empty)));
	return ret;
end);

--
local function re_index(self, index)
	return re_m[index] or proxy[self].flags[index];
end;

local function re_tostr(self)
	return proxy[self].pattern_repr .. proxy[self].flag_repr;
end;
--

local other_valid_group_char = {
	-- non-capturing group
	[0x3A] = true,
	-- lookarounds
	[0x21] = true, [0x3D] = true,
	-- atomic
	[0x3E] = true,
	-- branch reset
	[0x7C] = true,
};

local function tokenize_ptn(codes, flags)
	if flags.unicode and not options.unicodeData then
		return "options.unicodeData cannot be turned off while having unicode flag";
	end;
	local i, len = 1, codes.n;
	local group_n = 0;
	local outln, group_id, verb_flags = { }, { }, {
		newline = 1, newline_seq = 1, not_empty = 0,
	};
	while i <= len do
		local c = codes[i];
		if c == 0x28 then
			-- Match
			local ret;
			if codes[i + 1] == 0x2A then
				i += 2;
				local start_i = i;
				while codes[i]
					and (codes[i] >= 0x30 and codes[i] <= 0x39
					or codes[i] >= 0x41 and codes[i] <= 0x5A
					or codes[i] >= 0x61 and codes[i] <= 0x7A
					or codes[i] == 0x5F or codes[i] == 0x3A) do
					i += 1;
				end;
				if codes[i] ~= 0x29 and codes[i - 1] ~= 0x3A then
					-- fallback as normal and ( can't be repeated
					return "quantifier doesn't follow a repeatable pattern";
				end;
				local selected_verb = utf8_sub(codes.s, start_i, i);
				if selected_verb == "positive_lookahead:" or selected_verb == "negative_lookhead:"
					or selected_verb == "positive_lookbehind:" or selected_verb == "negative_lookbehind:"
					or selected_verb:find("^[pn]l[ab]:$") then
					ret = { 0x28, nil, nil, selected_verb:find('^n') and 0x21 or 0x3D, selected_verb:find('b', 3, true) and 1 };
				elseif selected_verb == "atomic:" then
					ret = { 0x28, nil, nil, 0x3E, nil };
				elseif selected_verb == "ACCEPT" or selected_verb == "FAIL" or selected_verb == 'F' or selected_verb == "PRUNE" or selected_verb == "SKIP" then
					ret = selected_verb == 'F' and "FAIL" or selected_verb;
				else
					if line_verbs[selected_verb] then
						verb_flags.newline = selected_verb;
					elseif selected_verb == "BSR_ANYCRLF" or selected_verb == "BSR_UNICODE" then
						verb_flags.newline_seq = selected_verb == "BSR_UNICODE" and 1 or 0;
					elseif selected_verb == "NOTEMPTY" or selected_verb == "NOTEMPTY_ATSTART" then
						verb_flags.not_empty = selected_verb == "NOTEMPTY" and 1 or 2;
					else
						return "unknown or malformed verb";
					end;
					if outln[1] then
						return "this verb must be placed at the beginning of the regex";
					end;
				end;
			elseif codes[i + 1] == 0x3F then
				-- ? syntax
				i += 2;
				if codes[i] == 0x23 then
					-- comments
					i = table.find(codes, 0x29, i);
					if not i then
						return "unterminated parenthetical";
					end;
					i += 1;
					continue;
				elseif not codes[i] then
					return "unterminated parenthetical";
				end;
				ret = { 0x28, nil, nil, codes[i], nil };
				if codes[i] == 0x30 and codes[i + 1] == 0x29 then
					-- recursive match entire pattern
					ret[1], ret[2], ret[3], ret[5] = "recurmatch", 0, 0, nil;
				elseif codes[i] > 0x30 and codes[i] <= 0x39 then
					-- recursive match
					local org_i = i;
					i += 1;
					while codes[i] >= 0x30 and codes[i] <= 0x30 do
						i += 1;
					end;
					if codes[i] ~= 0x29 then
						return "invalid group structure";
					end;
					ret[1], ret[2], ret[4] = "recurmatch", tonumber(utf8_sub(codes.s, org_i, i)), nil;
				elseif codes[i] == 0x3C and codes[i + 1] == 0x21 or codes[i + 1] == 0x3D then
					-- lookbehinds
					i += 1;
					ret[4], ret[5] = codes[i], 1;
				elseif codes[i] == 0x7C then
					-- branch reset
					ret[5] = group_n;
				elseif codes[i] == 0x50 or codes[i] == 0x3C or codes[i] == 0x27 then
					if codes[i] == 0x50 then
						i += 1;
					end;
					if codes[i] == 0x3D then
						-- backref
						local start_i = i + 1;
						while codes[i] and
							(codes[i] >= 0x30 and codes[i] <= 0x39
								or codes[i] >= 0x41 and codes[i] <= 0x5A
								or codes[i] >= 0x61 and codes[i] <= 0x7A
								or codes[i] == 0x5F) do
							i += 1;
						end;
						if not codes[i] then
							return "unterminated parenthetical";
						elseif codes[i] ~= 0x29 or i == start_i then
							return "invalid group structure";
						end;
						ret = { "backref", utf8_sub(codes.s, start_i, i) };
					elseif codes[i] == 0x3C or codes[i - 1] ~= 0x50 and codes[i] == 0x27 then
						-- named capture
						local delimiter = codes[i] == 0x27 and 0x27 or 0x3E;
						local start_i = i + 1;
						i += 1;
						if codes[i] == 0x29 then
							return "missing character in subpattern";
						elseif codes[i] >= 0x30 and codes[i] <= 0x39 then
							return "subpattern name must not begin with a digit";
						elseif not (codes[i] >= 0x41 and codes[i] <= 0x5A or codes[i] >= 0x61 and codes[i] <= 0x7A or codes[i] == 0x5F) then
							return "invalid character in subpattern";
						end;
						i += 1;
						while codes[i] and
							(codes[i] >= 0x30 and codes[i] <= 0x39
								or codes[i] >= 0x41 and codes[i] <= 0x5A
								or codes[i] >= 0x61 and codes[i] <= 0x7A
								or codes[i] == 0x5F) do
							i += 1;
						end;
						if not codes[i] then
							return "unterminated parenthetical";
						elseif codes[i] ~= delimiter then
							return "invalid character in subpattern";
						end;
						local name = utf8_sub(codes.s, start_i, i);
						group_n += 1;
						if (group_id[name] or group_n) ~= group_n then
							return "subpattern name already exists";
						end;
						for name1, group_n1 in pairs(group_id) do
							if name ~= name1 and group_n == group_n1 then
								return "different names for subpatterns of the same number aren't permitted";
							end;
						end;
						group_id[name] = group_n;
						ret[2], ret[4] = group_n, nil;
					else
						return "invalid group structure";
					end;
				elseif not other_valid_group_char[codes[i]] then
					return "invalid group structure";
				end;
			else
				group_n += 1;
				ret = { 0x28, group_n, nil, nil };
			end;
			if ret then
				table.insert(outln, ret);
			end;
		elseif c == 0x29 then
			-- Close parenthesis
			local i1 = #outln + 1;
			local lookbehind_c = -1;
			local current_lookbehind_c = 0;
			local max_c, group_c = 0, 0;
			repeat
				i1 -= 1;
				local v, is_table = outln[i1], type(outln[i1]) == "table";
				if is_table and v[1] == 0x28 then
					group_c += 1;
					if current_lookbehind_c and v.count then
						current_lookbehind_c += v.count;
					end;
					if not v[3] then
						if v[4] == 0x7C then
							group_n = v[5] + math.max(max_c, group_c);
						end;
						if current_lookbehind_c ~= lookbehind_c and lookbehind_c ~= -1 then
							lookbehind_c = nil;
						else
							lookbehind_c = current_lookbehind_c;
						end;
						break;
					end;
				elseif v == alternation then
					if current_lookbehind_c ~= lookbehind_c and lookbehind_c ~= -1 then
						lookbehind_c, current_lookbehind_c = nil, nil;
					else
						lookbehind_c, current_lookbehind_c = current_lookbehind_c, 0;
					end;
					max_c, group_c = math.max(max_c, group_c), 0;
				elseif current_lookbehind_c then
					if is_table and v[1] == "quantifier" then
						if v[2] == v[3] then
							current_lookbehind_c += v[2];
						else
							current_lookbehind_c = nil;
						end;
					else
						current_lookbehind_c += 1;
					end;
				end;
			until i1 < 1;
			if i1 < 1 then
				return "unmatched ) in regular expression";
			end;
			local v = outln[i1];
			local outln_len_p_1 = #outln + 1;
			local ret = { 0x29, v[2], i1, v[4], v[5], count = lookbehind_c };
			if (v[4] == 0x21 or v[4] == 0x3D) and v[5] and not lookbehind_c then
				return "lookbehind assertion is not fixed width";
			end;
			v[3] = outln_len_p_1;
			table.insert(outln, ret);
		elseif c == 0x2E then
			table.insert(outln, dot);
		elseif c == 0x5B then
			-- Character set
			local negate, char_class = false, nil;
			i += 1;
			local start_i = i;
			if codes[i] == 0x5E then
				negate = true;
				i += 1;
			elseif codes[i] == 0x2E or codes[i] == 0x3A or codes[i] == 0x3D then
				-- POSIX character classes
				char_class = codes[i];
			end;
			local ret;
			if codes[i] == 0x5B or codes[i] == 0x5C then
				ret = { };
			else
				ret = { codes[i] };
				i += 1;
			end;
			while codes[i] ~= 0x5D do
				if not codes[i] then
					return "unterminated character class";
				elseif codes[i] == 0x2D and ret[1] and type(ret[1]) == "number" then
					if codes[i + 1] == 0x5D then
						table.insert(ret, 1, 0x2D);
					else
						i += 1;
						local ret_c = codes[i];
						if ret_c == 0x5B then
							if codes[i + 1] == 0x2E or codes[i + 1] == 0x3A or codes[i + 1] == 0x3D then
								-- Check for POSIX character class, name does not matter
								local i1 = i + 2;
								repeat
									i1 = table.find(codes, 0x5D, i1);
								until not i1 or codes[i1 - 1] ~= 0x5C;
								if not i1 then
									return "unterminated character class";
								elseif codes[i1 - 1] == codes[i + 1] and i1 - 1 ~= i + 1 then
									return "invalid range in character class";
								end;
							end;
							if ret[1] > 0x5B then
								return "invalid range in character class";
							end;
						elseif ret_c == 0x5C then
							i += 1;
							if codes[i] == 0x78 then
								local radix0, radix1;
								i += 1;
								if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then
									radix0 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30);
									i += 1;
									if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then
										radix1 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30);
									else
										i -= 1;
									end;
								else
									i -= 1;
								end;
								ret_c = radix0 and (radix1 and 16 * radix0 + radix1 or radix0) or 0;
							elseif codes[i] >= 0x30 and codes[i] <= 0x37 then
								local radix0, radix1, radix2 = codes[i] - 0x30, nil, nil;
								i += 1;
								if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then
									radix1 = codes[i] - 0x30;
									i += 1;
									if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then
										radix2 = codes[i] - 0x30;
									else
										i -= 1;
									end;
								else
									i -= 1;
								end;
								ret_c = radix1 and (radix2 and 64 * radix0 + 8 * radix1 + radix2 or 8 * radix0 + radix1) or radix0;
							else
								ret_c = escape_chars[codes[i]] or codes[i];
								if type(ret_c) ~= "number" then
									return "invalid range in character class";
								end;
							end;
						elseif ret[1] > ret_c then
							return "invalid range in character class";
						end;
						ret[1] = { "range", ret[1], ret_c };
					end;
				elseif codes[i] == 0x5B then
					if codes[i + 1] == 0x2E or codes[i + 1] == 0x3A or codes[i + 1] == 0x3D then
						local i1 = i + 2;
						repeat
							i1 = table.find(codes, 0x5D, i1);
						until not i1 or codes[i1 - 1] ~= 0x5C;
						if not i1 then
							return "unterminated character class";
						elseif codes[i1 - 1] ~= codes[i + 1] or i1 - 1 == i + 1 then
							table.insert(ret, 1, 0x5B);
						elseif codes[i1 - 1] == 0x2E or codes[i1 - 1] == 0x3D then
							return "POSIX collating elements aren't supported";
						elseif codes[i1 - 1] == 0x3A then
							-- I have no plans to support escape codes (\) in character class names
							local negate = codes[i + 3] == 0x5E;
							local class_name = utf8_sub(codes.s, i + (negate and 3 or 2), i1 - 1);
							--  If not valid then throw an error
							if not posix_class_names[class_name] then
								return "unknown POSIX class name";
							end;
							table.insert(ret, 1, { "class", class_name, negate });
							i = i1;
						end;
					else
						table.insert(ret, 1, 0x5B);
					end;
				elseif codes[i] == 0x5C then
					i += 1;
					if codes[i] == 0x78 then
						local radix0, radix1;
						i += 1;
						if codes[i] == 0x7B then
							i += 1;
							local org_i = i;
							while codes[i] and
								(codes[i] >= 0x30 and codes[i] <= 0x39
									or codes[i] >= 0x41 and codes[i] <= 0x46
									or codes[i] >= 0x61 and codes[i] <= 0x66) do
								i += 1;
							end;
							if codes[i] ~= 0x7D or i == org_i then
								return "malformed hexadecimal character";
							elseif i - org_i > 4 then
								return "character offset too large";
							end;
							table.insert(ret, 1, tonumber(utf8_sub(codes.s, org_i, i), 16));
						else
							if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then
								radix0 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30);
								i += 1;
								if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then
									radix1 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30);
								else
									i -= 1;
								end;
							else
								i -= 1;
							end;
							table.insert(ret, 1, radix0 and (radix1 and 16 * radix0 + radix1 or radix0) or 0);
						end;
					elseif codes[i] >= 0x30 and codes[i] <= 0x37 then
						local radix0, radix1, radix2 = codes[i] - 0x30, nil, nil;
						i += 1;
						if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then
							radix1 = codes[i] - 0x30;
							i += 1;
							if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then
								radix2 = codes[i] - 0x30;
							else
								i -= 1;
							end;
						else
							i -= 1;
						end;
						table.insert(ret, 1, radix1 and (radix2 and 64 * radix0 + 8 * radix1 + radix2 or 8 * radix0 + radix1) or radix0);
					elseif codes[i] == 0x45 then
						-- intentionally left blank, \E that's not preceded \Q is ignored
					elseif codes[i] == 0x51 then
						local start_i = i + 1;
						repeat
							i = table.find(codes, 0x5C, i + 1);
						until not i or codes[i + 1] == 0x45;
						table.move(codes, start_i, i and i - 1 or #codes, #outln + 1, outln);
						if not i then
							break;
						end;
						i += 1;
					elseif codes[i] == 0x4E then
						if codes[i + 1] == 0x7B and codes[i + 2] == 0x55 and codes[i + 3] == 0x2B and flags.unicode then
							i += 4;
							local start_i = i;
							while codes[i] and
								(codes[i] >= 0x30 and codes[i] <= 0x39
									or codes[i] >= 0x41 and codes[i] <= 0x46
									or codes[i] >= 0x61 and codes[i] <= 0x66) do
								i += 1;
							end;
							if codes[i] ~= 0x7D or i == start_i then
								return "malformed Unicode code point";
							end;
							local code_point = tonumber(utf8_sub(codes.s, start_i, i));
							table.insert(ret, 1, code_point);
						else
							return "invalid escape sequence";
						end;
					elseif codes[i] == 0x50 or codes[i] == 0x70 then
						if not options.unicodeData then
							return "options.unicodeData cannot be turned off when using \\p";
						end;
						i += 1;
						if codes[i] ~= 0x7B then
							local c_name = utf8.char(codes[i] or 0);
							if not valid_categories[c_name] then
								return "unknown or malformed script name";
							end;
							table.insert(ret, 1, { "category", false, c_name });
						else
							local negate = codes[i] == 0x50;
							i += 1;
							if codes[i] == 0x5E then
								i += 1;
								negate = not negate;
							end;
							local start_i = i;
							while codes[i] and
								(codes[i] >= 0x30 and codes[i] <= 0x39
									or codes[i] >= 0x41 and codes[i] <= 0x5A
									or codes[i] >= 0x61 and codes[i] <= 0x7A
									or codes[i] == 0x5F) do
								i += 1;
							end;
							if codes[i] ~= 0x7D then
								return "unknown or malformed script name";
							end;
							local c_name = utf8_sub(codes.s, start_i, i);
							local script_set = chr_scripts[c_name];
							if script_set then
								table.insert(ret, 1, { "charset", negate, script_set });
							elseif not valid_categories[c_name] then
								return "unknown or malformed script name";
							else
								table.insert(ret, 1, { "category", negate, c_name });
							end;
						end;
					elseif codes[i] == 0x6F then
						i += 1;
						if codes[i] ~= 0x7B then
							return "malformed octal code";
						end;
						i += 1;
						local org_i = i;
						while codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 do
							i += 1;
						end;
						if codes[i] ~= 0x7D or i == org_i then
							return "malformed octal code";
						end;
						local ret_chr = tonumber(utf8_sub(codes.s, org_i, i), 8);
						if ret_chr > 0xFFFF then
							return "character offset too large";
						end;
						table.insert(ret, 1, ret_chr);
					else
						local esc_char = escape_chars[codes[i]];
						table.insert(ret, 1, type(esc_char) == "string" and { "class", esc_char, false } or esc_char or codes[i]);
					end;
				elseif flags.ignoreCase and codes[i] >= 0x61 and codes[i] <= 0x7A then
					table.insert(ret, 1, codes[i] - 0x20);
				else
					table.insert(ret, 1, codes[i]);
				end;
				i += 1;
			end;
			if codes[i - 1] == char_class and i - 1 ~= start_i then
				return char_class == 0x3A and "POSIX named classes are only support within a character set" or "POSIX collating elements aren't supported";
			end;
			if not ret[2] and not negate then
				table.insert(outln, ret[1]);
			else
				table.insert(outln, { "charset", negate, ret });
			end;
		elseif c == 0x5C then
			-- Escape char
			i += 1;
			local escape_c = codes[i];
			if not escape_c then
				return "pattern may not end with a trailing backslash";
			elseif escape_c >= 0x30 and escape_c <= 0x39 then
				local org_i = i;
				while codes[i + 1] and codes[i + 1] >= 0x30 and codes[i + 1] <= 0x39 do
					i += 1;
				end;
				local escape_d = tonumber(utf8_sub(codes.s, org_i, i + 1));
				if escape_d > group_n and i ~= org_i then
					i = org_i;
					local radix0, radix1, radix2;
					if codes[i] <= 0x37 then
						radix0 = codes[i] - 0x30;
						i += 1;
						if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then
							radix1 = codes[i] - 0x30;
							i += 1;
							if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then
								radix2 = codes[i] - 0x30;
							else
								i -= 1;
							end;
						else
							i -= 1;
						end;
					end;
					table.insert(outln, radix0 and (radix1 and (radix2 and 64 * radix0 + 8 * radix1 + radix2 or 8 * radix0 + radix1) or radix0) or codes[org_i]);
				else
					table.insert(outln, { "backref", escape_d });
				end;
			elseif escape_c == 0x45 then
				-- intentionally left blank, \E that's not preceded \Q is ignored
			elseif escape_c == 0x51 then
				local start_i = i + 1;
				repeat
					i = table.find(codes, 0x5C, i + 1);
				until not i or codes[i + 1] == 0x45;
				table.move(codes, start_i, i and i - 1 or #codes, #outln + 1, outln);
				if not i then
					break;
				end;
				i += 1;
			elseif escape_c == 0x4E then
				if codes[i + 1] == 0x7B and codes[i + 2] == 0x55 and codes[i + 3] == 0x2B and flags.unicode then
					i += 4;
					local start_i = i;
					while codes[i] and
						(codes[i] >= 0x30 and codes[i] <= 0x39
							or codes[i] >= 0x41 and codes[i] <= 0x46
							or codes[i] >= 0x61 and codes[i] <= 0x66) do
						i += 1;
					end;
					if codes[i] ~= 0x7D or i == start_i then
						return "malformed Unicode code point";
					end;
					local code_point = tonumber(utf8_sub(codes.s, start_i, i));
					table.insert(outln, code_point);
				else
					table.insert(outln, escape_chars[0x4E]);
				end;
			elseif escape_c == 0x50 or escape_c == 0x70 then
				if not options.unicodeData then
					return "options.unicodeData cannot be turned off when using \\p";
				end;
				i += 1;
				if codes[i] ~= 0x7B then
					local c_name = utf8.char(codes[i] or 0);
					if not valid_categories[c_name] then
						return "unknown or malformed script name";
					end;
					table.insert(outln, { "category", false, c_name });
				else
					local negate = escape_c == 0x50;
					i += 1;
					if codes[i] == 0x5E then
						i += 1;
						negate = not negate;
					end;
					local start_i = i;
					while codes[i] and
						(codes[i] >= 0x30 and codes[i] <= 0x39
							or codes[i] >= 0x41 and codes[i] <= 0x5A
							or codes[i] >= 0x61 and codes[i] <= 0x7A
							or codes[i] == 0x5F) do
						i += 1;
					end;
					if codes[i] ~= 0x7D then
						return "unknown or malformed script name";
					end;
					local c_name = utf8_sub(codes.s, start_i, i);
					local script_set = chr_scripts[c_name];
					if script_set then
						table.insert(outln, { "charset", negate, script_set });
					elseif not valid_categories[c_name] then
						return "unknown or malformed script name";
					else
						table.insert(outln, { "category", negate, c_name });
					end;
				end;
			elseif escape_c == 0x67 and (codes[i + 1] == 0x7B or codes[i + 1] >= 0x30 and codes[i + 1] <= 0x39) then
				local is_grouped = false;
				i += 1;
				if codes[i] == 0x7B then
					i += 1;
					is_grouped = true;
				elseif codes[i] < 0x30 or codes[i] > 0x39 then
					return "malformed reference code";
				end;
				local org_i = i;
				while codes[i] and
					(codes[i] >= 0x30 and codes[i] <= 0x39
						or codes[i] >= 0x41 and codes[i] <= 0x46
						or codes[i] >= 0x61 and codes[i] <= 0x66) do
					i += 1;
				end;
				if is_grouped and codes[i] ~= 0x7D then
					return "malformed reference code";
				end;
				local ref_name = tonumber(utf8_sub(codes.s, org_i, i + (is_grouped and 0 or 1)));
				table.insert(outln, { "backref", ref_name });
				if not is_grouped then
					i -= 1;
				end;
			elseif escape_c == 0x6F then
				i += 1;
				if codes[i + 1] ~= 0x7B then
					return "malformed octal code";
				end
				i += 1;
				local org_i = i;
				while codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 do
					i += 1;
				end;
				if codes[i] ~= 0x7D or i == org_i then
					return "malformed octal code";
				end;
				local ret_chr = tonumber(utf8_sub(codes.s, org_i, i), 8);
				if ret_chr > 0xFFFF then
					return "character offset too large";
				end;
				table.insert(outln, ret_chr);
			elseif escape_c == 0x78 then
				local radix0, radix1;
				i += 1;
				if codes[i] == 0x7B then
					i += 1;
					local org_i = i;
					while codes[i] and
						(codes[i] >= 0x30 and codes[i] <= 0x39
							or codes[i] >= 0x41 and codes[i] <= 0x46
							or codes[i] >= 0x61 and codes[i] <= 0x66) do
						i += 1;
					end;
					if codes[i] ~= 0x7D or i == org_i then
						return "malformed hexadecimal code";
					elseif i - org_i > 4 then
						return "character offset too large";
					end;
					table.insert(outln, tonumber(utf8_sub(codes.s, org_i, i), 16));
				else
					if codes[i] and (codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66) then
						radix0 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30);
						i += 1;
						if codes[i] and (codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66) then
							radix1 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30);
						else
							i -= 1;
						end;
					else
						i -= 1;
					end;
					table.insert(outln, radix0 and (radix1 and 16 * radix0 + radix1 or radix0) or 0);
				end;
			else
				local esc_char = b_escape_chars[escape_c] or escape_chars[escape_c];
				table.insert(outln, esc_char or escape_c);
			end;
		elseif c == 0x2A or c == 0x2B or c == 0x3F or c == 0x7B then
			-- Quantifier
			local start_q, end_q;
			if c == 0x7B then
				local org_i = i + 1;
				local start_i;
				while codes[i + 1] and (codes[i + 1] >= 0x30 and codes[i + 1] <= 0x39 or codes[i + 1] == 0x2C and not start_i and i + 1 ~= org_i) do
					i += 1;
					if codes[i] == 0x2C then
						start_i = i;
					end;
				end;
				if codes[i + 1] == 0x7D then
					i += 1;
					if not start_i then
						start_q = tonumber(utf8_sub(codes.s, org_i, i));
						end_q = start_q;
					else
						start_q, end_q = tonumber(utf8_sub(codes.s, org_i, start_i)), start_i + 1 == i and math.huge or tonumber(utf8_sub(codes.s, start_i + 1, i));
						if end_q < start_q then
							return "numbers out of order in {} quantifier";
						end;
					end;
				else
					table.move(codes, org_i - 1, i, #outln + 1, outln);
				end;
			else
				start_q, end_q = c == 0x2B and 1 or 0, c == 0x3F and 1 or math.huge;
			end;
			if start_q then
				local quantifier_type = flags.ungreedy and "lazy" or "greedy";
				if codes[i + 1] == 0x2B or codes[i + 1] == 0x3F then
					i += 1;
					quantifier_type = codes[i] == 0x2B and "possessive" or flags.ungreedy and "greedy" or "lazy";
				end;
				local outln_len = #outln;
				local last_outln_value = outln[outln_len];
				if not last_outln_value or type(last_outln_value) == "table" and (last_outln_value[1] == "quantifier" or last_outln_value[1] == 0x28 or b_escape_chars[last_outln_value[1]])
					or last_outln_value == alternation or type(last_outln_value) == "string" then
					return "quantifier doesn't follow a repeatable pattern";
				end;
				if end_q == 0 then
					table.remove(outln);
				elseif start_q ~= 1 or end_q ~= 1 then
					if type(last_outln_value) == "table" and last_outln_value[1] == 0x29 then
						outln_len = last_outln_value[3];
					end;
					outln[outln_len] = { "quantifier", start_q, end_q, quantifier_type, outln[outln_len] };
				end;
			end;
		elseif c == 0x7C then
			-- Alternation
			table.insert(outln, alternation);
			local i1 = #outln;
			repeat
				i1 -= 1;
				local v1, is_table = outln[i1], type(outln[i1]) == "table";
				if is_table and v1[1] == 0x29 then
					i1 = outln[i1][3];
				elseif is_table and v1[1] == 0x28 then
					if v1[4] == 0x7C then
						group_n = v1[5];
					end;
					break;
				end;
			until not v1;
		elseif c == 0x24 or c == 0x5E then
			table.insert(outln, c == 0x5E and beginning_str or end_str);
		elseif flags.ignoreCase and c >= 0x61 and c <= 0x7A then
			table.insert(outln, c - 0x20);
		elseif flags.extended and (c >= 0x09 and c <= 0x0D or c == 0x20 or c == 0x23) then
			if c == 0x23 then
				repeat
					i += 1;
				until not codes[i] or codes[i] == 0x0A or codes[i] == 0x0D;
			end;
		else
			table.insert(outln, c);
		end;
		i += 1;
	end;
	local max_group_n = 0;
	for i, v in ipairs(outln) do
		if type(v) == "table" and (v[1] == 0x28 or v[1] == "quantifier" and type(v[5]) == "table" and v[5][1] == 0x28) then
			if v[1] == "quantifier" then
				v = v[5];
			end;
			if not v[3] then
				return "unterminated parenthetical";
			elseif v[2] then
				max_group_n = math.max(max_group_n, v[2]);
			end;
		elseif type(v) == "table" and (v[1] == "backref" or v[1] == "recurmatch") then
			if not group_id[v[2]] and (type(v[2]) ~= "number" or v[2] > group_n) then
				return "reference to a non-existent or invalid subpattern";
			elseif v[1] == "recurmatch" and v[2] ~= 0 then
				for i1, v1 in ipairs(outln) do
					if type(v1) == "table" and v1[1] == 0x28 and v1[2] == v[2] then
						v[3] = i1;
						break;
					end;
				end;
			elseif type(v[2]) == "string" then
				v[2] = group_id[v[2]];
			end;
		end;
	end;
	outln.group_n = max_group_n;
	return outln, group_id, verb_flags;
end;

if not tonumber(options.cacheSize) then
	error(string.format("expected number for options.cacheSize, got %s", typeof(options.cacheSize)), 2);
end;
local cacheSize = math.floor(options.cacheSize or 0) ~= 0 and tonumber(options.cacheSize);
local cache_pattern, cache_pattern_names;
if not cacheSize then
elseif cacheSize < 0 or cacheSize ~= cacheSize then
	error("cache size cannot be a negative number or a NaN", 2);
elseif cacheSize == math.huge then
	cache_pattern, cache_pattern_names = { nil }, { nil };
elseif cacheSize >= 2 ^ 32 then
	error("cache size too large", 2);
else
	cache_pattern, cache_pattern_names = table.create(options.cacheSize), table.create(options.cacheSize);
end;
if cacheSize then
	function re.pruge()
		table.clear(cache_pattern_names);
		table.clear(cache_pattern);
	end;
end;

local function new_re(str_arr, flags, flag_repr, pattern_repr)
	local tokenized_ptn, group_id, verb_flags;
	local cache_format = cacheSize and string.format("%s|%s", str_arr.s, flag_repr);
	local cached_token = cacheSize and cache_pattern[table.find(cache_pattern_names, cache_format)];
	if cached_token then
		tokenized_ptn, group_id, verb_flags = table.unpack(cached_token, 1, 3);
	else
		tokenized_ptn, group_id, verb_flags = tokenize_ptn(str_arr, flags);
		if type(tokenized_ptn) == "string" then
			error(tokenized_ptn, 2);
		end;
		if cacheSize and tokenized_ptn[1] then
			table.insert(cache_pattern_names, 1, cache_format);
			table.insert(cache_pattern, 1, { tokenized_ptn, group_id, verb_flags });
			if cacheSize ~= math.huge then
				table.remove(cache_pattern_names, cacheSize + 1);
				table.remove(cache_pattern, cacheSize + 1);
			end;
		end;
	end;

	local object = newproxy(true);
	proxy[object] = { name = "RegEx", flags = flags, flag_repr = flag_repr, pattern_repr = pattern_repr, token = tokenized_ptn, group_id = group_id, verb_flags = verb_flags };
	local object_mt = getmetatable(object);
	object_mt.__index = setmetatable(flags, re_m);
	object_mt.__tostring = re_tostr;
	object_mt.__metatable = lockmsg;

	return object;
end;

local function escape_fslash(pre)
	return (#pre % 2 == 0 and '\\' or '') .. pre .. '.';
end;

local function sort_flag_chr(a, b)
	return a:lower() < b:lower();
end;

function re.new(...)
	if select('#', ...) == 0 then
		error("missing argument #1 (string expected)", 2);
	end;
	local ptn, flags_str = ...;
	if type(ptn) == "number" then
		ptn ..= '';
	elseif type(ptn) ~= "string" then
		error(string.format("invalid argument #1 (string expected, got %s)", typeof(ptn)), 2);
	end;
	if type(flags_str) ~= "string" and type(flags_str) ~= "number" and flags_str ~= nil then
		error(string.format("invalid argument #2 (string expected, got %s)", typeof(flags_str)), 2);
	end;

	local flags = {
		anchored = false, caseless = false, multiline = false, dotall = false, unicode = false, ungreedy = false, extended = false,
	};
	local flag_repr = { };
	for f in string.gmatch(flags_str or '', utf8.charpattern) do
		if flags[flag_map[f]] ~= false then
			error("invalid regular expression flag " .. f, 3);
		end;
		flags[flag_map[f]] = true;
		table.insert(flag_repr, f);
	end;
	table.sort(flag_repr, sort_flag_chr);
	flag_repr = table.concat(flag_repr);
	return new_re(to_str_arr(ptn), flags, flag_repr, string.format("/%s/", ptn:gsub("(\\*)/", escape_fslash)));
end;

function re.fromstring(...)
	if select('#', ...) == 0 then
		error("missing argument #1 (string expected)", 2);
	end;
	local ptn = ...;
	if type(ptn) == "number" then
		ptn ..= '';
	elseif type(ptn) ~= "string" then
		error(string.format("invalid argument #1 (string expected, got %s)", typeof(ptn), 2));
	end;
	local str_arr = to_str_arr(ptn);
	local delimiter = str_arr[1];
	if not delimiter then
		error("empty regex", 2);
	elseif delimiter == 0x5C or (delimiter >= 0x30 and delimiter <= 0x39) or (delimiter >= 0x41 and delimiter <= 0x5A) or (delimiter >= 0x61 and delimiter <= 0x7A) then
		error("delimiter must not be alphanumeric or a backslash", 2);
	end;

	local i0 = 1;
	repeat
		i0 = table.find(str_arr, delimiter, i0 + 1);
		if not i0 then
			error(string.format("no ending delimiter ('%s') found", utf8.char(delimiter)), 2);
		end;
		local escape_count = 1;
		while str_arr[i0 - escape_count] == 0x5C do
			escape_count += 1;
		end;
	until escape_count % 2 == 1;

	local flags = {
		anchored = false, caseless = false, multiline = false, dotall = false, unicode = false, ungreedy = false, extended = false,
	};
	local flag_repr = { };
	while str_arr.n > i0 do
		local f = utf8.char(table.remove(str_arr));
		str_arr.n -= 1;
		if flags[flag_map[f]] ~= false then
			error("invalid regular expression flag " .. f, 3);
		end;
		flags[flag_map[f]] = true;
		table.insert(flag_repr, f);
	end;
	table.sort(flag_repr, sort_flag_chr);
	flag_repr = table.concat(flag_repr);
	table.remove(str_arr, 1);
	table.remove(str_arr);
	str_arr.n -= 2;
	str_arr.s = string.sub(str_arr.s, 2, 1 + str_arr.n);
	return new_re(str_arr, flags, flag_repr, string.sub(ptn, 1, 2 + str_arr.n));
end;

local re_escape_line_chrs = {
	['\0'] = '\\x00', ['\n'] = '\\n', ['\t'] = '\\t', ['\r'] = '\\r', ['\f'] = '\\f',
};

function re.escape(...)
	if select('#', ...) == 0 then
		error("missing argument #1 (string expected)", 2);
	end;
	local str, extended, delimiter = ...;
	if type(str) == "number" then
		str ..= '';
	elseif type(str) ~= "string" then
		error(string.format("invalid argument #1 to 'escape' (string expected, got %s)", typeof(str)), 2);
	end;
	if delimiter == nil then
		delimiter = '';
	elseif type(delimiter) == "number" then
		delimiter ..= '';
	elseif type(delimiter) ~= "string" then
		error(string.format("invalid argument #3 to 'escape' (string expected, got %s)", typeof(delimiter)), 2);
	end;
	if utf8.len(delimiter) > 1 or delimiter:match("^[%a\\]$") then
		error("delimiter have not be alphanumeric", 2);
	end;
	return (string.gsub(str, "[\0\f\n\r\t]", re_escape_line_chrs):gsub(string.format("[\\%s#()%%%%*+.?[%%]^{|%s]", extended and '%s' or '', (delimiter:find'^[%%%]]$' and '%' or '') .. delimiter), "\\%1"));
end;

function re.type(...)
	if select('#', ...) == 0 then
		error("missing argument #1", 2);
	end;
	return proxy[...] and proxy[...].name;
end;

-- TODO: table.foreach is currently used as top-level loops needlessly increase native code size for this module
table.foreach(re_m, function(k, f) re[k] = f end)

re_m = { __index = re_m };

lockmsg = re.fromstring([[/The\s*metatable\s*is\s*(?:locked|inaccessible)(?#Nice try :])/i]]);
getmetatable(lockmsg).__metatable = lockmsg;

local function readonly_table()
	error("Attempt to modify a readonly table", 2);
end;

match_m = {
	__index = match_m,
	__metatable = lockmsg,
	__newindex = readonly_table,
};

re.Match = setmetatable({ }, match_m);

return setmetatable({ }, {
	__index = re,
	__metatable = lockmsg,
	__newindex = readonly_table,
});