2021-10-29 21:25:12 +01:00
|
|
|
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
|
|
|
|
#pragma once
|
|
|
|
|
2024-11-08 21:41:45 +00:00
|
|
|
#include "Luau/Allocator.h"
|
2021-10-29 21:25:12 +01:00
|
|
|
#include "Luau/Ast.h"
|
|
|
|
#include "Luau/Location.h"
|
|
|
|
#include "Luau/DenseHash.h"
|
|
|
|
#include "Luau/Common.h"
|
|
|
|
|
2022-09-23 20:17:25 +01:00
|
|
|
#include <vector>
|
|
|
|
|
2021-10-29 21:25:12 +01:00
|
|
|
namespace Luau
|
|
|
|
{
|
|
|
|
|
|
|
|
struct Lexeme
|
|
|
|
{
|
|
|
|
enum Type
|
|
|
|
{
|
|
|
|
Eof = 0,
|
|
|
|
|
|
|
|
// 1..255 means actual character values
|
|
|
|
Char_END = 256,
|
|
|
|
|
|
|
|
Equal,
|
|
|
|
LessEqual,
|
|
|
|
GreaterEqual,
|
|
|
|
NotEqual,
|
|
|
|
Dot2,
|
|
|
|
Dot3,
|
|
|
|
SkinnyArrow,
|
|
|
|
DoubleColon,
|
2023-09-01 18:58:27 +01:00
|
|
|
FloorDiv,
|
2021-10-29 21:25:12 +01:00
|
|
|
|
2022-08-24 20:01:00 +01:00
|
|
|
InterpStringBegin,
|
|
|
|
InterpStringMid,
|
|
|
|
InterpStringEnd,
|
|
|
|
// An interpolated string with no expressions (like `x`)
|
|
|
|
InterpStringSimple,
|
|
|
|
|
2021-10-29 21:25:12 +01:00
|
|
|
AddAssign,
|
|
|
|
SubAssign,
|
|
|
|
MulAssign,
|
|
|
|
DivAssign,
|
2023-09-01 18:58:27 +01:00
|
|
|
FloorDivAssign,
|
2021-10-29 21:25:12 +01:00
|
|
|
ModAssign,
|
|
|
|
PowAssign,
|
|
|
|
ConcatAssign,
|
|
|
|
|
|
|
|
RawString,
|
|
|
|
QuotedString,
|
|
|
|
Number,
|
|
|
|
Name,
|
|
|
|
|
|
|
|
Comment,
|
|
|
|
BlockComment,
|
|
|
|
|
2024-06-07 18:51:12 +01:00
|
|
|
Attribute,
|
|
|
|
|
2021-10-29 21:25:12 +01:00
|
|
|
BrokenString,
|
|
|
|
BrokenComment,
|
|
|
|
BrokenUnicode,
|
2022-08-24 20:01:00 +01:00
|
|
|
BrokenInterpDoubleBrace,
|
2021-10-29 21:25:12 +01:00
|
|
|
Error,
|
|
|
|
|
|
|
|
Reserved_BEGIN,
|
|
|
|
ReservedAnd = Reserved_BEGIN,
|
|
|
|
ReservedBreak,
|
|
|
|
ReservedDo,
|
|
|
|
ReservedElse,
|
|
|
|
ReservedElseif,
|
|
|
|
ReservedEnd,
|
|
|
|
ReservedFalse,
|
|
|
|
ReservedFor,
|
|
|
|
ReservedFunction,
|
|
|
|
ReservedIf,
|
|
|
|
ReservedIn,
|
|
|
|
ReservedLocal,
|
|
|
|
ReservedNil,
|
|
|
|
ReservedNot,
|
|
|
|
ReservedOr,
|
|
|
|
ReservedRepeat,
|
|
|
|
ReservedReturn,
|
|
|
|
ReservedThen,
|
|
|
|
ReservedTrue,
|
|
|
|
ReservedUntil,
|
|
|
|
ReservedWhile,
|
|
|
|
Reserved_END
|
|
|
|
};
|
|
|
|
|
|
|
|
Type type;
|
|
|
|
Location location;
|
2024-06-07 18:51:12 +01:00
|
|
|
|
|
|
|
// Field declared here, before the union, to ensure that Lexeme size is 32 bytes.
|
|
|
|
private:
|
|
|
|
// length is used to extract a slice from the input buffer.
|
|
|
|
// This field is only valid for certain lexeme types which don't duplicate portions of input
|
|
|
|
// but instead store a pointer to a location in the input buffer and the length of lexeme.
|
2021-10-29 21:25:12 +01:00
|
|
|
unsigned int length;
|
|
|
|
|
2024-06-07 18:51:12 +01:00
|
|
|
public:
|
2021-10-29 21:25:12 +01:00
|
|
|
union
|
|
|
|
{
|
|
|
|
const char* data; // String, Number, Comment
|
|
|
|
const char* name; // Name
|
|
|
|
unsigned int codepoint; // BrokenUnicode
|
|
|
|
};
|
|
|
|
|
|
|
|
Lexeme(const Location& location, Type type);
|
|
|
|
Lexeme(const Location& location, char character);
|
|
|
|
Lexeme(const Location& location, Type type, const char* data, size_t size);
|
|
|
|
Lexeme(const Location& location, Type type, const char* name);
|
|
|
|
|
2024-06-07 18:51:12 +01:00
|
|
|
unsigned int getLength() const;
|
|
|
|
|
2021-10-29 21:25:12 +01:00
|
|
|
std::string toString() const;
|
|
|
|
};
|
|
|
|
|
2024-06-07 18:51:12 +01:00
|
|
|
static_assert(sizeof(Lexeme) <= 32, "Size of `Lexeme` struct should be up to 32 bytes.");
|
|
|
|
|
2021-10-29 21:25:12 +01:00
|
|
|
class AstNameTable
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
AstNameTable(Allocator& allocator);
|
|
|
|
|
|
|
|
AstName addStatic(const char* name, Lexeme::Type type = Lexeme::Name);
|
|
|
|
|
|
|
|
std::pair<AstName, Lexeme::Type> getOrAddWithType(const char* name, size_t length);
|
|
|
|
std::pair<AstName, Lexeme::Type> getWithType(const char* name, size_t length) const;
|
|
|
|
|
|
|
|
AstName getOrAdd(const char* name);
|
|
|
|
AstName get(const char* name) const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
struct Entry
|
|
|
|
{
|
|
|
|
AstName value;
|
|
|
|
uint32_t length;
|
|
|
|
Lexeme::Type type;
|
|
|
|
|
|
|
|
bool operator==(const Entry& other) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct EntryHash
|
|
|
|
{
|
|
|
|
size_t operator()(const Entry& e) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
DenseHashSet<Entry, EntryHash> data;
|
|
|
|
|
|
|
|
Allocator& allocator;
|
|
|
|
};
|
|
|
|
|
|
|
|
class Lexer
|
|
|
|
{
|
|
|
|
public:
|
2024-12-03 00:16:33 +00:00
|
|
|
Lexer(const char* buffer, std::size_t bufferSize, AstNameTable& names, Position startPosition = {0, 0});
|
2021-10-29 21:25:12 +01:00
|
|
|
|
|
|
|
void setSkipComments(bool skip);
|
|
|
|
void setReadNames(bool read);
|
|
|
|
|
|
|
|
const Location& previousLocation() const
|
|
|
|
{
|
|
|
|
return prevLocation;
|
|
|
|
}
|
|
|
|
|
|
|
|
const Lexeme& next();
|
2022-04-15 00:57:43 +01:00
|
|
|
const Lexeme& next(bool skipComments, bool updatePrevLocation);
|
2021-10-29 21:25:12 +01:00
|
|
|
void nextline();
|
|
|
|
|
|
|
|
Lexeme lookahead();
|
|
|
|
|
|
|
|
const Lexeme& current() const
|
|
|
|
{
|
|
|
|
return lexeme;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isReserved(const std::string& word);
|
|
|
|
|
|
|
|
static bool fixupQuotedString(std::string& data);
|
|
|
|
static void fixupMultilineString(std::string& data);
|
|
|
|
|
|
|
|
private:
|
|
|
|
char peekch() const;
|
|
|
|
char peekch(unsigned int lookahead) const;
|
|
|
|
|
|
|
|
Position position() const;
|
|
|
|
|
2023-08-25 18:23:55 +01:00
|
|
|
// consume() assumes current character is not a newline for performance; when that is not known, consumeAny() should be used instead.
|
2021-10-29 21:25:12 +01:00
|
|
|
void consume();
|
2023-08-25 18:23:55 +01:00
|
|
|
void consumeAny();
|
2021-10-29 21:25:12 +01:00
|
|
|
|
|
|
|
Lexeme readCommentBody();
|
|
|
|
|
|
|
|
// Given a sequence [===[ or ]===], returns:
|
|
|
|
// 1. number of equal signs (or 0 if none present) between the brackets
|
|
|
|
// 2. -1 if this is not a long comment/string separator
|
|
|
|
// 3. -N if this is a malformed separator
|
|
|
|
// Does *not* consume the closing brace.
|
|
|
|
int skipLongSeparator();
|
|
|
|
|
|
|
|
Lexeme readLongString(const Position& start, int sep, Lexeme::Type ok, Lexeme::Type broken);
|
|
|
|
Lexeme readQuotedString();
|
|
|
|
|
2022-08-24 20:01:00 +01:00
|
|
|
Lexeme readInterpolatedStringBegin();
|
|
|
|
Lexeme readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType);
|
|
|
|
|
|
|
|
void readBackslashInString();
|
|
|
|
|
2021-10-29 21:25:12 +01:00
|
|
|
std::pair<AstName, Lexeme::Type> readName();
|
|
|
|
|
|
|
|
Lexeme readNumber(const Position& start, unsigned int startOffset);
|
|
|
|
|
|
|
|
Lexeme readUtf8Error();
|
|
|
|
Lexeme readNext();
|
|
|
|
|
|
|
|
const char* buffer;
|
|
|
|
std::size_t bufferSize;
|
|
|
|
|
|
|
|
unsigned int offset;
|
|
|
|
|
|
|
|
unsigned int line;
|
|
|
|
unsigned int lineOffset;
|
|
|
|
|
|
|
|
Lexeme lexeme;
|
|
|
|
|
|
|
|
Location prevLocation;
|
|
|
|
|
|
|
|
AstNameTable& names;
|
|
|
|
|
|
|
|
bool skipComments;
|
|
|
|
bool readNames;
|
2022-08-24 20:01:00 +01:00
|
|
|
|
|
|
|
enum class BraceType
|
|
|
|
{
|
|
|
|
InterpolatedString,
|
|
|
|
Normal
|
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<BraceType> braceStack;
|
2021-10-29 21:25:12 +01:00
|
|
|
};
|
|
|
|
|
2022-02-18 01:18:01 +00:00
|
|
|
inline bool isSpace(char ch)
|
|
|
|
{
|
|
|
|
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\v' || ch == '\f';
|
|
|
|
}
|
|
|
|
|
2021-10-29 21:25:12 +01:00
|
|
|
} // namespace Luau
|