mirror of
https://github.com/luau-lang/luau.git
synced 2025-04-10 22:00:54 +01:00
Tokenize whitespace in Lexer
This commit is contained in:
parent
2e6fdd90a0
commit
00a5e2f2e9
4 changed files with 84 additions and 24 deletions
|
@ -53,6 +53,7 @@ struct Lexeme
|
||||||
|
|
||||||
Comment,
|
Comment,
|
||||||
BlockComment,
|
BlockComment,
|
||||||
|
Whitespace,
|
||||||
|
|
||||||
Attribute,
|
Attribute,
|
||||||
|
|
||||||
|
@ -100,7 +101,7 @@ private:
|
||||||
public:
|
public:
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
const char* data; // String, Number, Comment
|
const char* data; // String, Number, Comment, Whitespace
|
||||||
const char* name; // Name
|
const char* name; // Name
|
||||||
unsigned int codepoint; // BrokenUnicode
|
unsigned int codepoint; // BrokenUnicode
|
||||||
};
|
};
|
||||||
|
@ -155,7 +156,7 @@ class Lexer
|
||||||
public:
|
public:
|
||||||
Lexer(const char* buffer, std::size_t bufferSize, AstNameTable& names, Position startPosition = {0, 0});
|
Lexer(const char* buffer, std::size_t bufferSize, AstNameTable& names, Position startPosition = {0, 0});
|
||||||
|
|
||||||
void setSkipComments(bool skip);
|
void setSkipTrivia(bool skip);
|
||||||
void setReadNames(bool read);
|
void setReadNames(bool read);
|
||||||
|
|
||||||
const Location& previousLocation() const
|
const Location& previousLocation() const
|
||||||
|
@ -164,7 +165,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
const Lexeme& next();
|
const Lexeme& next();
|
||||||
const Lexeme& next(bool skipComments, bool updatePrevLocation);
|
const Lexeme& next(bool skipTrivia, bool updatePrevLocation);
|
||||||
void nextline();
|
void nextline();
|
||||||
|
|
||||||
Lexeme lookahead();
|
Lexeme lookahead();
|
||||||
|
@ -227,7 +228,7 @@ private:
|
||||||
|
|
||||||
AstNameTable& names;
|
AstNameTable& names;
|
||||||
|
|
||||||
bool skipComments;
|
bool skipTrivia;
|
||||||
bool readNames;
|
bool readNames;
|
||||||
|
|
||||||
enum class BraceType
|
enum class BraceType
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
LUAU_FASTFLAGVARIABLE(LexerResumesFromPosition2)
|
LUAU_FASTFLAGVARIABLE(LexerResumesFromPosition2)
|
||||||
|
LUAU_FASTFLAGVARIABLE(LuauLexerTokenizesWhitespace)
|
||||||
namespace Luau
|
namespace Luau
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -36,7 +37,7 @@ Lexeme::Lexeme(const Location& location, Type type, const char* data, size_t siz
|
||||||
{
|
{
|
||||||
LUAU_ASSERT(
|
LUAU_ASSERT(
|
||||||
type == RawString || type == QuotedString || type == InterpStringBegin || type == InterpStringMid || type == InterpStringEnd ||
|
type == RawString || type == QuotedString || type == InterpStringBegin || type == InterpStringMid || type == InterpStringEnd ||
|
||||||
type == InterpStringSimple || type == BrokenInterpDoubleBrace || type == Number || type == Comment || type == BlockComment
|
type == InterpStringSimple || type == BrokenInterpDoubleBrace || type == Number || type == Comment || type == BlockComment || type == Whitespace
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,7 +54,7 @@ unsigned int Lexeme::getLength() const
|
||||||
{
|
{
|
||||||
LUAU_ASSERT(
|
LUAU_ASSERT(
|
||||||
type == RawString || type == QuotedString || type == InterpStringBegin || type == InterpStringMid || type == InterpStringEnd ||
|
type == RawString || type == QuotedString || type == InterpStringBegin || type == InterpStringMid || type == InterpStringEnd ||
|
||||||
type == InterpStringSimple || type == BrokenInterpDoubleBrace || type == Number || type == Comment || type == BlockComment
|
type == InterpStringSimple || type == BrokenInterpDoubleBrace || type == Number || type == Comment || type == BlockComment || type == Whitespace
|
||||||
);
|
);
|
||||||
|
|
||||||
return length;
|
return length;
|
||||||
|
@ -315,14 +316,14 @@ Lexer::Lexer(const char* buffer, size_t bufferSize, AstNameTable& names, Positio
|
||||||
Lexeme::Eof
|
Lexeme::Eof
|
||||||
)
|
)
|
||||||
, names(names)
|
, names(names)
|
||||||
, skipComments(false)
|
, skipTrivia(false)
|
||||||
, readNames(true)
|
, readNames(true)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::setSkipComments(bool skip)
|
void Lexer::setSkipTrivia(bool skip)
|
||||||
{
|
{
|
||||||
skipComments = skip;
|
skipTrivia = skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::setReadNames(bool read)
|
void Lexer::setReadNames(bool read)
|
||||||
|
@ -332,24 +333,27 @@ void Lexer::setReadNames(bool read)
|
||||||
|
|
||||||
const Lexeme& Lexer::next()
|
const Lexeme& Lexer::next()
|
||||||
{
|
{
|
||||||
return next(this->skipComments, true);
|
return next(this->skipTrivia, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
const Lexeme& Lexer::next(bool skipComments, bool updatePrevLocation)
|
const Lexeme& Lexer::next(bool skipTrivia, bool updatePrevLocation)
|
||||||
{
|
{
|
||||||
// in skipComments mode we reject valid comments
|
// in skipTrivia mode we reject valid comments
|
||||||
do
|
do
|
||||||
|
{
|
||||||
|
if (!FFlag::LuauLexerTokenizesWhitespace)
|
||||||
{
|
{
|
||||||
// consume whitespace before the token
|
// consume whitespace before the token
|
||||||
while (isSpace(peekch()))
|
while (isSpace(peekch()))
|
||||||
consumeAny();
|
consumeAny();
|
||||||
|
}
|
||||||
|
|
||||||
if (updatePrevLocation)
|
if (updatePrevLocation)
|
||||||
prevLocation = lexeme.location;
|
prevLocation = lexeme.location;
|
||||||
|
|
||||||
lexeme = readNext();
|
lexeme = readNext();
|
||||||
updatePrevLocation = false;
|
updatePrevLocation = false;
|
||||||
} while (skipComments && (lexeme.type == Lexeme::Comment || lexeme.type == Lexeme::BlockComment));
|
} while (skipTrivia && (lexeme.type == Lexeme::Comment || lexeme.type == Lexeme::BlockComment || lexeme.type == Lexeme::Whitespace));
|
||||||
|
|
||||||
return lexeme;
|
return lexeme;
|
||||||
}
|
}
|
||||||
|
@ -967,6 +971,15 @@ Lexeme Lexer::readNext()
|
||||||
|
|
||||||
return Lexeme(Location(start, position()), name.second, name.first.value);
|
return Lexeme(Location(start, position()), name.second, name.first.value);
|
||||||
}
|
}
|
||||||
|
else if (FFlag::LuauLexerTokenizesWhitespace && isSpace(peekch()))
|
||||||
|
{
|
||||||
|
size_t startOffset = offset;
|
||||||
|
|
||||||
|
while (isSpace(peekch()))
|
||||||
|
consumeAny();
|
||||||
|
|
||||||
|
return Lexeme(Location(start, position()), Lexeme::Whitespace, &buffer[startOffset], offset - startOffset);
|
||||||
|
}
|
||||||
else if (peekch() & 0x80)
|
else if (peekch() & 0x80)
|
||||||
{
|
{
|
||||||
return readUtf8Error();
|
return readUtf8Error();
|
||||||
|
|
|
@ -208,7 +208,7 @@ Parser::Parser(const char* buffer, size_t bufferSize, AstNameTable& names, Alloc
|
||||||
matchRecoveryStopOnToken[Lexeme::Type::Eof] = 1;
|
matchRecoveryStopOnToken[Lexeme::Type::Eof] = 1;
|
||||||
|
|
||||||
// required for lookahead() to work across a comment boundary and for nextLexeme() to work when captureComments is false
|
// required for lookahead() to work across a comment boundary and for nextLexeme() to work when captureComments is false
|
||||||
lexer.setSkipComments(true);
|
lexer.setSkipTrivia(true);
|
||||||
|
|
||||||
// read first lexeme (any hot comments get .header = true)
|
// read first lexeme (any hot comments get .header = true)
|
||||||
LUAU_ASSERT(hotcommentHeader);
|
LUAU_ASSERT(hotcommentHeader);
|
||||||
|
@ -3572,13 +3572,13 @@ AstTypeError* Parser::reportMissingTypeError(const Location& parseErrorLocation,
|
||||||
|
|
||||||
void Parser::nextLexeme()
|
void Parser::nextLexeme()
|
||||||
{
|
{
|
||||||
Lexeme::Type type = lexer.next(/* skipComments= */ false, true).type;
|
Lexeme::Type type = lexer.next(/* skipTrivia= */ false, true).type;
|
||||||
|
|
||||||
while (type == Lexeme::BrokenComment || type == Lexeme::Comment || type == Lexeme::BlockComment)
|
while (type == Lexeme::BrokenComment || type == Lexeme::Comment || type == Lexeme::BlockComment || type == Lexeme::Whitespace)
|
||||||
{
|
{
|
||||||
const Lexeme& lexeme = lexer.current();
|
const Lexeme& lexeme = lexer.current();
|
||||||
|
|
||||||
if (options.captureComments)
|
if (options.captureComments && type != Lexeme::Whitespace)
|
||||||
commentLocations.push_back(Comment{lexeme.type, lexeme.location});
|
commentLocations.push_back(Comment{lexeme.type, lexeme.location});
|
||||||
|
|
||||||
// Subtlety: Broken comments are weird because we record them as comments AND pass them to the parser as a lexeme.
|
// Subtlety: Broken comments are weird because we record them as comments AND pass them to the parser as a lexeme.
|
||||||
|
@ -3598,7 +3598,7 @@ void Parser::nextLexeme()
|
||||||
hotcomments.push_back({hotcommentHeader, lexeme.location, std::string(text + 1, text + end)});
|
hotcomments.push_back({hotcommentHeader, lexeme.location, std::string(text + 1, text + end)});
|
||||||
}
|
}
|
||||||
|
|
||||||
type = lexer.next(/* skipComments= */ false, /* updatePrevLocation= */ false).type;
|
type = lexer.next(/* skipTrivia= */ false, /* updatePrevLocation= */ false).type;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,8 @@
|
||||||
|
|
||||||
using namespace Luau;
|
using namespace Luau;
|
||||||
|
|
||||||
|
LUAU_FASTFLAG(LuauLexerTokenizesWhitespace)
|
||||||
|
|
||||||
TEST_SUITE_BEGIN("LexerTests");
|
TEST_SUITE_BEGIN("LexerTests");
|
||||||
|
|
||||||
TEST_CASE("broken_string_works")
|
TEST_CASE("broken_string_works")
|
||||||
|
@ -38,7 +40,7 @@ TEST_CASE("broken_comment_kept")
|
||||||
Luau::Allocator alloc;
|
Luau::Allocator alloc;
|
||||||
AstNameTable table(alloc);
|
AstNameTable table(alloc);
|
||||||
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
||||||
lexer.setSkipComments(true);
|
lexer.setSkipTrivia(true);
|
||||||
CHECK_EQ(lexer.next().type, Lexeme::Type::BrokenComment);
|
CHECK_EQ(lexer.next().type, Lexeme::Type::BrokenComment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +50,7 @@ TEST_CASE("comment_skipped")
|
||||||
Luau::Allocator alloc;
|
Luau::Allocator alloc;
|
||||||
AstNameTable table(alloc);
|
AstNameTable table(alloc);
|
||||||
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
||||||
lexer.setSkipComments(true);
|
lexer.setSkipTrivia(true);
|
||||||
CHECK_EQ(lexer.next().type, Lexeme::Type::Eof);
|
CHECK_EQ(lexer.next().type, Lexeme::Type::Eof);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,7 +105,7 @@ TEST_CASE("lookahead")
|
||||||
Luau::Allocator alloc;
|
Luau::Allocator alloc;
|
||||||
AstNameTable table(alloc);
|
AstNameTable table(alloc);
|
||||||
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
||||||
lexer.setSkipComments(true);
|
lexer.setSkipTrivia(true);
|
||||||
lexer.next(); // must call next() before reading data from lexer at least once
|
lexer.next(); // must call next() before reading data from lexer at least once
|
||||||
|
|
||||||
CHECK_EQ(lexer.current().type, Lexeme::Name);
|
CHECK_EQ(lexer.current().type, Lexeme::Name);
|
||||||
|
@ -242,4 +244,48 @@ TEST_CASE("string_interpolation_with_unicode_escape")
|
||||||
CHECK_EQ(lexer.next().type, Lexeme::Eof);
|
CHECK_EQ(lexer.next().type, Lexeme::Eof);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("lexer_tokenizes_whitespace")
|
||||||
|
{
|
||||||
|
ScopedFastFlag sff{FFlag::LuauLexerTokenizesWhitespace, true};
|
||||||
|
|
||||||
|
const std::string testInput = "local x = 1";
|
||||||
|
Luau::Allocator alloc;
|
||||||
|
AstNameTable table(alloc);
|
||||||
|
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
||||||
|
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::ReservedLocal);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Name);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, '=');
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Number);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Eof);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("lexer_tokenizes_multiline_whitespace")
|
||||||
|
{
|
||||||
|
ScopedFastFlag sff{FFlag::LuauLexerTokenizesWhitespace, true};
|
||||||
|
|
||||||
|
const std::string testInput = R"(local x
|
||||||
|
|
||||||
|
y = 2
|
||||||
|
)";
|
||||||
|
Luau::Allocator alloc;
|
||||||
|
AstNameTable table(alloc);
|
||||||
|
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
||||||
|
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::ReservedLocal);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Name);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Name);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, '=');
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Number);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Whitespace);
|
||||||
|
CHECK_EQ(lexer.next().type, Lexeme::Eof);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_SUITE_END();
|
TEST_SUITE_END();
|
||||||
|
|
Loading…
Add table
Reference in a new issue