luau/Ast/include/Luau/Lexer.h

256 lines
5.8 KiB
C
Raw Normal View History

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Allocator.h"
#include "Luau/Ast.h"
#include "Luau/Location.h"
#include "Luau/DenseHash.h"
#include "Luau/Common.h"
2022-09-23 12:17:25 -07:00
#include <vector>
namespace Luau
{
struct Lexeme
{
enum Type
{
Eof = 0,
// 1..255 means actual character values
Char_END = 256,
Equal,
LessEqual,
GreaterEqual,
NotEqual,
Dot2,
Dot3,
SkinnyArrow,
DoubleColon,
FloorDiv,
InterpStringBegin,
InterpStringMid,
InterpStringEnd,
// An interpolated string with no expressions (like `x`)
InterpStringSimple,
AddAssign,
SubAssign,
MulAssign,
DivAssign,
FloorDivAssign,
ModAssign,
PowAssign,
ConcatAssign,
RawString,
QuotedString,
Number,
Name,
Comment,
BlockComment,
Attribute,
BrokenString,
BrokenComment,
BrokenUnicode,
BrokenInterpDoubleBrace,
Error,
Reserved_BEGIN,
ReservedAnd = Reserved_BEGIN,
ReservedBreak,
ReservedDo,
ReservedElse,
ReservedElseif,
ReservedEnd,
ReservedFalse,
ReservedFor,
ReservedFunction,
ReservedIf,
ReservedIn,
ReservedLocal,
ReservedNil,
ReservedNot,
ReservedOr,
ReservedRepeat,
ReservedReturn,
ReservedThen,
ReservedTrue,
ReservedUntil,
ReservedWhile,
Reserved_END
};
Sync to upstream/release/660 (#1643) # General This release introduces initial work on a Roundtrippable AST for Luau, and numerous fixes to the new type solver, runtime, and fragment autocomplete. ## Roundtrippable AST To support tooling around source code transformations, we are extending the parser to retain source information so that we can re-emit the initial source code exactly as the author wrote it. We have made numerous changes to the Transpiler, added new AST types such as `AstTypeGroup`, and added source information to AST nodes such as `AstExprInterpString`, `AstExprIfElse`, `AstTypeTable`, `AstTypeReference`, `AstTypeSingletonString`, and `AstTypeTypeof`. ## New Type Solver * Implement `setmetatable` and `getmetatable` type functions. * Fix handling of nested and recursive union type functions to prevent the solver from getting stuck. * Free types in both old and new solver now have an upper and lower bound to resolve mixed mode usage of the solvers in fragment autocomplete. * Fix infinite recursion during normalization of cyclic tables. * Add normalization support for intersections of subclasses with negated superclasses. ## Runtime * Fix compilation error in Luau buffer bit operations for big-endian machines. ## Miscellaneous * Add test and bugfixes to fragment autocomplete. * Fixed `clang-tidy` warnings in `Simplify.cpp`. **Full Changelog**: https://github.com/luau-lang/luau/compare/0.659...0.660 --- Co-authored-by: Ariel Weiss <aaronweiss@roblox.com> Co-authored-by: Hunter Goldstein <hgoldstein@roblox.com> Co-authored-by: Talha Pathan <tpathan@roblox.com> Co-authored-by: Varun Saini <vsaini@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Hunter Goldstein <hgoldstein@roblox.com> Co-authored-by: Varun Saini <61795485+vrn-sn@users.noreply.github.com> Co-authored-by: Alexander Youngblood <ayoungblood@roblox.com> Co-authored-by: Menarul Alam <malam@roblox.com>
2025-02-07 16:17:11 -08:00
enum struct QuoteStyle
{
Single,
Double,
};
Type type;
Location location;
// Field declared here, before the union, to ensure that Lexeme size is 32 bytes.
private:
// length is used to extract a slice from the input buffer.
// This field is only valid for certain lexeme types which don't duplicate portions of input
// but instead store a pointer to a location in the input buffer and the length of lexeme.
unsigned int length;
public:
union
{
const char* data; // String, Number, Comment
const char* name; // Name
unsigned int codepoint; // BrokenUnicode
};
Lexeme(const Location& location, Type type);
Lexeme(const Location& location, char character);
Lexeme(const Location& location, Type type, const char* data, size_t size);
Lexeme(const Location& location, Type type, const char* name);
unsigned int getLength() const;
Sync to upstream/release/660 (#1643) # General This release introduces initial work on a Roundtrippable AST for Luau, and numerous fixes to the new type solver, runtime, and fragment autocomplete. ## Roundtrippable AST To support tooling around source code transformations, we are extending the parser to retain source information so that we can re-emit the initial source code exactly as the author wrote it. We have made numerous changes to the Transpiler, added new AST types such as `AstTypeGroup`, and added source information to AST nodes such as `AstExprInterpString`, `AstExprIfElse`, `AstTypeTable`, `AstTypeReference`, `AstTypeSingletonString`, and `AstTypeTypeof`. ## New Type Solver * Implement `setmetatable` and `getmetatable` type functions. * Fix handling of nested and recursive union type functions to prevent the solver from getting stuck. * Free types in both old and new solver now have an upper and lower bound to resolve mixed mode usage of the solvers in fragment autocomplete. * Fix infinite recursion during normalization of cyclic tables. * Add normalization support for intersections of subclasses with negated superclasses. ## Runtime * Fix compilation error in Luau buffer bit operations for big-endian machines. ## Miscellaneous * Add test and bugfixes to fragment autocomplete. * Fixed `clang-tidy` warnings in `Simplify.cpp`. **Full Changelog**: https://github.com/luau-lang/luau/compare/0.659...0.660 --- Co-authored-by: Ariel Weiss <aaronweiss@roblox.com> Co-authored-by: Hunter Goldstein <hgoldstein@roblox.com> Co-authored-by: Talha Pathan <tpathan@roblox.com> Co-authored-by: Varun Saini <vsaini@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Hunter Goldstein <hgoldstein@roblox.com> Co-authored-by: Varun Saini <61795485+vrn-sn@users.noreply.github.com> Co-authored-by: Alexander Youngblood <ayoungblood@roblox.com> Co-authored-by: Menarul Alam <malam@roblox.com>
2025-02-07 16:17:11 -08:00
unsigned int getBlockDepth() const;
QuoteStyle getQuoteStyle() const;
std::string toString() const;
};
static_assert(sizeof(Lexeme) <= 32, "Size of `Lexeme` struct should be up to 32 bytes.");
class AstNameTable
{
public:
AstNameTable(Allocator& allocator);
AstName addStatic(const char* name, Lexeme::Type type = Lexeme::Name);
std::pair<AstName, Lexeme::Type> getOrAddWithType(const char* name, size_t length);
std::pair<AstName, Lexeme::Type> getWithType(const char* name, size_t length) const;
AstName getOrAdd(const char* name);
AstName get(const char* name) const;
private:
struct Entry
{
AstName value;
uint32_t length;
Lexeme::Type type;
bool operator==(const Entry& other) const;
};
struct EntryHash
{
size_t operator()(const Entry& e) const;
};
DenseHashSet<Entry, EntryHash> data;
Allocator& allocator;
};
class Lexer
{
public:
Lexer(const char* buffer, std::size_t bufferSize, AstNameTable& names, Position startPosition = {0, 0});
void setSkipComments(bool skip);
void setReadNames(bool read);
const Location& previousLocation() const
{
return prevLocation;
}
const Lexeme& next();
2022-04-14 16:57:43 -07:00
const Lexeme& next(bool skipComments, bool updatePrevLocation);
void nextline();
Lexeme lookahead();
const Lexeme& current() const
{
return lexeme;
}
static bool isReserved(const std::string& word);
static bool fixupQuotedString(std::string& data);
static void fixupMultilineString(std::string& data);
private:
char peekch() const;
char peekch(unsigned int lookahead) const;
Position position() const;
// consume() assumes current character is not a newline for performance; when that is not known, consumeAny() should be used instead.
void consume();
void consumeAny();
Lexeme readCommentBody();
// Given a sequence [===[ or ]===], returns:
// 1. number of equal signs (or 0 if none present) between the brackets
// 2. -1 if this is not a long comment/string separator
// 3. -N if this is a malformed separator
// Does *not* consume the closing brace.
int skipLongSeparator();
Lexeme readLongString(const Position& start, int sep, Lexeme::Type ok, Lexeme::Type broken);
Lexeme readQuotedString();
Lexeme readInterpolatedStringBegin();
Lexeme readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType);
void readBackslashInString();
std::pair<AstName, Lexeme::Type> readName();
Lexeme readNumber(const Position& start, unsigned int startOffset);
Lexeme readUtf8Error();
Lexeme readNext();
const char* buffer;
std::size_t bufferSize;
unsigned int offset;
unsigned int line;
unsigned int lineOffset;
Lexeme lexeme;
Location prevLocation;
AstNameTable& names;
bool skipComments;
bool readNames;
enum class BraceType
{
InterpolatedString,
Normal
};
std::vector<BraceType> braceStack;
};
2022-02-17 17:18:01 -08:00
inline bool isSpace(char ch)
{
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\v' || ch == '\f';
}
} // namespace Luau