mirror of
https://github.com/luau-lang/luau.git
synced 2025-05-04 10:33:46 +01:00
New lexer method
This commit is contained in:
parent
7a437aad17
commit
8bb8f47325
5 changed files with 95 additions and 34 deletions
|
@ -64,6 +64,8 @@ struct Lexeme
|
||||||
InterpStringBegin,
|
InterpStringBegin,
|
||||||
InterpStringMid,
|
InterpStringMid,
|
||||||
InterpStringEnd,
|
InterpStringEnd,
|
||||||
|
// An interpolated string with no expressions (like `x`)
|
||||||
|
InterpStringSimple,
|
||||||
|
|
||||||
AddAssign,
|
AddAssign,
|
||||||
SubAssign,
|
SubAssign,
|
||||||
|
@ -173,8 +175,6 @@ public:
|
||||||
void setSkipComments(bool skip);
|
void setSkipComments(bool skip);
|
||||||
void setReadNames(bool read);
|
void setReadNames(bool read);
|
||||||
|
|
||||||
const Lexeme& nextInterpolatedString();
|
|
||||||
|
|
||||||
const Location& previousLocation() const
|
const Location& previousLocation() const
|
||||||
{
|
{
|
||||||
return prevLocation;
|
return prevLocation;
|
||||||
|
@ -244,6 +244,14 @@ private:
|
||||||
|
|
||||||
bool skipComments;
|
bool skipComments;
|
||||||
bool readNames;
|
bool readNames;
|
||||||
|
|
||||||
|
enum BraceType
|
||||||
|
{
|
||||||
|
InterpolatedString,
|
||||||
|
Normal
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<BraceType> braceStack;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool isSpace(char ch)
|
inline bool isSpace(char ch)
|
||||||
|
|
|
@ -97,6 +97,7 @@ Lexeme::Lexeme(const Location& location, Type type, const char* data, size_t siz
|
||||||
|| type == InterpStringBegin
|
|| type == InterpStringBegin
|
||||||
|| type == InterpStringMid
|
|| type == InterpStringMid
|
||||||
|| type == InterpStringEnd
|
|| type == InterpStringEnd
|
||||||
|
|| type == InterpStringSimple
|
||||||
|| type == Number
|
|| type == Number
|
||||||
|| type == Comment
|
|| type == Comment
|
||||||
|| type == BlockComment
|
|| type == BlockComment
|
||||||
|
@ -171,6 +172,18 @@ std::string Lexeme::toString() const
|
||||||
case QuotedString:
|
case QuotedString:
|
||||||
return data ? format("\"%.*s\"", length, data) : "string";
|
return data ? format("\"%.*s\"", length, data) : "string";
|
||||||
|
|
||||||
|
case InterpStringBegin:
|
||||||
|
return data ? format("the beginning of an interpolated string (`%.*s`)", length, data) : "the beginning of an interpolated string";
|
||||||
|
|
||||||
|
case InterpStringMid:
|
||||||
|
return data ? format("the middle of an interpolated string (`%.*s`)", length, data) : "the middle of an interpolated string";
|
||||||
|
|
||||||
|
case InterpStringEnd:
|
||||||
|
return data ? format("the end of an interpolated string (`%.*s`)", length, data) : "the end of an interpolated string";
|
||||||
|
|
||||||
|
case InterpStringSimple:
|
||||||
|
return data ? format("`%.*s`", length, data) : "interpolated string";
|
||||||
|
|
||||||
case Number:
|
case Number:
|
||||||
return data ? format("'%.*s'", length, data) : "number";
|
return data ? format("'%.*s'", length, data) : "number";
|
||||||
|
|
||||||
|
@ -587,14 +600,6 @@ Lexeme Lexer::readQuotedString()
|
||||||
return Lexeme(Location(start, position()), Lexeme::QuotedString, &buffer[startOffset], offset - startOffset - 1);
|
return Lexeme(Location(start, position()), Lexeme::QuotedString, &buffer[startOffset], offset - startOffset - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const Lexeme& Lexer::nextInterpolatedString()
|
|
||||||
{
|
|
||||||
Position start = position();
|
|
||||||
|
|
||||||
lexeme = readInterpolatedStringSection(start, Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
|
|
||||||
return lexeme;
|
|
||||||
}
|
|
||||||
|
|
||||||
Lexeme Lexer::readInterpolatedStringBegin()
|
Lexeme Lexer::readInterpolatedStringBegin()
|
||||||
{
|
{
|
||||||
LUAU_ASSERT(peekch() == '`');
|
LUAU_ASSERT(peekch() == '`');
|
||||||
|
@ -602,7 +607,7 @@ Lexeme Lexer::readInterpolatedStringBegin()
|
||||||
Position start = position();
|
Position start = position();
|
||||||
consume();
|
consume();
|
||||||
|
|
||||||
return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringEnd);
|
return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringSimple);
|
||||||
}
|
}
|
||||||
|
|
||||||
Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType)
|
Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType)
|
||||||
|
@ -616,6 +621,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
|
||||||
case 0:
|
case 0:
|
||||||
case '\r':
|
case '\r':
|
||||||
case '\n':
|
case '\n':
|
||||||
|
// INTERP TODO: Clear anything we've added to the brace stack, and write a test to see what happens if we don't
|
||||||
return Lexeme(Location(start, position()), Lexeme::BrokenString);
|
return Lexeme(Location(start, position()), Lexeme::BrokenString);
|
||||||
|
|
||||||
case '\\':
|
case '\\':
|
||||||
|
@ -629,6 +635,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
|
||||||
return Lexeme(Location(start, position()), Lexeme::BrokenInterpDoubleBrace);
|
return Lexeme(Location(start, position()), Lexeme::BrokenInterpDoubleBrace);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
braceStack.push_back(BraceType::InterpolatedString);
|
||||||
Lexeme lexemeOutput(Location(start, position()), Lexeme::InterpStringBegin, &buffer[startOffset], offset - startOffset);
|
Lexeme lexemeOutput(Location(start, position()), Lexeme::InterpStringBegin, &buffer[startOffset], offset - startOffset);
|
||||||
consume();
|
consume();
|
||||||
return lexemeOutput;
|
return lexemeOutput;
|
||||||
|
@ -735,6 +742,34 @@ Lexeme Lexer::readNext()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case '{':
|
||||||
|
{
|
||||||
|
consume();
|
||||||
|
|
||||||
|
braceStack.push_back(BraceType::Normal);
|
||||||
|
return Lexeme(Location(start, 1), '{');
|
||||||
|
}
|
||||||
|
|
||||||
|
case '}':
|
||||||
|
{
|
||||||
|
consume();
|
||||||
|
|
||||||
|
if (braceStack.empty())
|
||||||
|
{
|
||||||
|
return Lexeme(Location(start, 1), '}');
|
||||||
|
}
|
||||||
|
|
||||||
|
const BraceType braceStackTop = braceStack.back();
|
||||||
|
braceStack.pop_back();
|
||||||
|
|
||||||
|
if (braceStackTop != BraceType::InterpolatedString)
|
||||||
|
{
|
||||||
|
return Lexeme(Location(start, 1), '}');
|
||||||
|
}
|
||||||
|
|
||||||
|
return readInterpolatedStringSection(position(), Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
|
||||||
|
}
|
||||||
|
|
||||||
case '=':
|
case '=':
|
||||||
{
|
{
|
||||||
consume();
|
consume();
|
||||||
|
@ -901,8 +936,6 @@ Lexeme Lexer::readNext()
|
||||||
|
|
||||||
case '(':
|
case '(':
|
||||||
case ')':
|
case ')':
|
||||||
case '{':
|
|
||||||
case '}':
|
|
||||||
case ']':
|
case ']':
|
||||||
case ';':
|
case ';':
|
||||||
case ',':
|
case ',':
|
||||||
|
|
|
@ -1573,7 +1573,7 @@ AstTypeOrPack Parser::parseSimpleTypeAnnotation(bool allowPack)
|
||||||
else
|
else
|
||||||
return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "String literal contains malformed escape sequence")};
|
return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "String literal contains malformed escape sequence")};
|
||||||
}
|
}
|
||||||
else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd)
|
else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple)
|
||||||
{
|
{
|
||||||
parseInterpString();
|
parseInterpString();
|
||||||
|
|
||||||
|
@ -2014,7 +2014,7 @@ AstExpr* Parser::parsePrimaryExpr(bool asStatement)
|
||||||
|| lexer.current().type == Lexeme::RawString
|
|| lexer.current().type == Lexeme::RawString
|
||||||
|| lexer.current().type == Lexeme::QuotedString
|
|| lexer.current().type == Lexeme::QuotedString
|
||||||
|| lexer.current().type == Lexeme::InterpStringBegin
|
|| lexer.current().type == Lexeme::InterpStringBegin
|
||||||
|| lexer.current().type == Lexeme::InterpStringEnd
|
|| lexer.current().type == Lexeme::InterpStringSimple
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
expr = parseFunctionArgs(expr, false, Location());
|
expr = parseFunctionArgs(expr, false, Location());
|
||||||
|
@ -2286,7 +2286,7 @@ AstExpr* Parser::parseSimpleExpr()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringEnd))
|
else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringSimple))
|
||||||
{
|
{
|
||||||
return parseString();
|
return parseString();
|
||||||
}
|
}
|
||||||
|
@ -2375,7 +2375,7 @@ AstExpr* Parser::parseFunctionArgs(AstExpr* func, bool self, const Location& sel
|
||||||
|
|
||||||
return allocator.alloc<AstExprCall>(Location(func->location, expr->location), func, copy(&expr, 1), self, argLocation);
|
return allocator.alloc<AstExprCall>(Location(func->location, expr->location), func, copy(&expr, 1), self, argLocation);
|
||||||
}
|
}
|
||||||
else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd))
|
else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple))
|
||||||
{
|
{
|
||||||
Position argStart = lexer.current().location.end;
|
Position argStart = lexer.current().location.end;
|
||||||
AstExpr* expr = parseInterpString();
|
AstExpr* expr = parseInterpString();
|
||||||
|
@ -2694,11 +2694,11 @@ AstArray<AstTypeOrPack> Parser::parseTypeParams()
|
||||||
|
|
||||||
std::optional<AstArray<char>> Parser::parseCharArray()
|
std::optional<AstArray<char>> Parser::parseCharArray()
|
||||||
{
|
{
|
||||||
LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringEnd);
|
LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringSimple);
|
||||||
|
|
||||||
scratchData.assign(lexer.current().data, lexer.current().length);
|
scratchData.assign(lexer.current().data, lexer.current().length);
|
||||||
|
|
||||||
if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringEnd)
|
if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringSimple)
|
||||||
{
|
{
|
||||||
if (!Lexer::fixupQuotedString(scratchData))
|
if (!Lexer::fixupQuotedString(scratchData))
|
||||||
{
|
{
|
||||||
|
@ -2734,7 +2734,12 @@ AstExpr* Parser::parseInterpString()
|
||||||
|
|
||||||
do {
|
do {
|
||||||
Lexeme currentLexeme = lexer.current();
|
Lexeme currentLexeme = lexer.current();
|
||||||
LUAU_ASSERT(currentLexeme.type == Lexeme::InterpStringBegin || currentLexeme.type == Lexeme::InterpStringMid || currentLexeme.type == Lexeme::InterpStringEnd);
|
LUAU_ASSERT(
|
||||||
|
currentLexeme.type == Lexeme::InterpStringBegin
|
||||||
|
|| currentLexeme.type == Lexeme::InterpStringMid
|
||||||
|
|| currentLexeme.type == Lexeme::InterpStringEnd
|
||||||
|
|| currentLexeme.type == Lexeme::InterpStringSimple
|
||||||
|
);
|
||||||
|
|
||||||
Location location = currentLexeme.location;
|
Location location = currentLexeme.location;
|
||||||
|
|
||||||
|
@ -2754,7 +2759,7 @@ AstExpr* Parser::parseInterpString()
|
||||||
|
|
||||||
strings.push_back(chars);
|
strings.push_back(chars);
|
||||||
|
|
||||||
if (currentLexeme.type == Lexeme::InterpStringEnd)
|
if (currentLexeme.type == Lexeme::InterpStringEnd || currentLexeme.type == Lexeme::InterpStringSimple)
|
||||||
{
|
{
|
||||||
AstArray<AstArray<char>> stringsArray = copy(strings);
|
AstArray<AstArray<char>> stringsArray = copy(strings);
|
||||||
AstArray<AstExpr*> expressionsArray = copy(expressions);
|
AstArray<AstExpr*> expressionsArray = copy(expressions);
|
||||||
|
@ -2764,24 +2769,20 @@ AstExpr* Parser::parseInterpString()
|
||||||
|
|
||||||
AstExpr* expression = parseExpr();
|
AstExpr* expression = parseExpr();
|
||||||
|
|
||||||
// INTERP CODE REVIEW: I want to use expectMatchAndConsume, but using that
|
|
||||||
// consumes the rest of the string, not the `}`
|
|
||||||
if (lexer.current().type != static_cast<Lexeme::Type>(static_cast<unsigned char>('}'))) {
|
|
||||||
return reportExprError(startOfBrace, {}, "Expected '}' after interpolated string expression");
|
|
||||||
}
|
|
||||||
|
|
||||||
expressions.push_back(expression);
|
expressions.push_back(expression);
|
||||||
|
|
||||||
Lexeme next = lexer.nextInterpolatedString();
|
switch (lexer.current().type)
|
||||||
|
|
||||||
switch (next.type)
|
|
||||||
{
|
{
|
||||||
case Lexeme::BrokenString:
|
case Lexeme::InterpStringBegin:
|
||||||
return reportExprError(location, {}, "Malformed interpolated string");
|
case Lexeme::InterpStringMid:
|
||||||
|
case Lexeme::InterpStringEnd:
|
||||||
|
break;
|
||||||
case Lexeme::BrokenInterpDoubleBrace:
|
case Lexeme::BrokenInterpDoubleBrace:
|
||||||
return reportExprError(location, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE);
|
return reportExprError(location, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE);
|
||||||
|
case Lexeme::BrokenString:
|
||||||
|
return reportExprError(location, {}, "Malformed interpolated string, did you forget to add a '}'?");
|
||||||
default:
|
default:
|
||||||
break;
|
return reportExprError(location, {}, "Malformed interpolated string, got %s", lexer.current().toString().c_str());
|
||||||
}
|
}
|
||||||
} while (true);
|
} while (true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -138,4 +138,23 @@ TEST_CASE("lookahead")
|
||||||
CHECK_EQ(lexer.lookahead().type, Lexeme::Eof);
|
CHECK_EQ(lexer.lookahead().type, Lexeme::Eof);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("stringInterpolation")
|
||||||
|
{
|
||||||
|
ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true};
|
||||||
|
|
||||||
|
const std::string testInput = R"(`foo {"bar"}`)";
|
||||||
|
Luau::Allocator alloc;
|
||||||
|
AstNameTable table(alloc);
|
||||||
|
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
||||||
|
|
||||||
|
Lexeme interpBegin = lexer.next();
|
||||||
|
CHECK_EQ(interpBegin.type, Lexeme::InterpStringBegin);
|
||||||
|
|
||||||
|
Lexeme quote = lexer.next();
|
||||||
|
CHECK_EQ(quote.type, Lexeme::QuotedString);
|
||||||
|
|
||||||
|
Lexeme interpEnd = lexer.next();
|
||||||
|
CHECK_EQ(interpEnd.type, Lexeme::InterpStringEnd);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_SUITE_END();
|
TEST_SUITE_END();
|
||||||
|
|
|
@ -954,7 +954,7 @@ TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_end_brace")
|
||||||
catch (const ParseErrors& e)
|
catch (const ParseErrors& e)
|
||||||
{
|
{
|
||||||
auto error = e.getErrors().front();
|
auto error = e.getErrors().front();
|
||||||
CHECK_EQ("Expected '}' after interpolated string expression", error.getMessage());
|
CHECK_EQ("Malformed interpolated string, did you forget to add a '}'?", error.getMessage());
|
||||||
return error.getLocation().begin.column;
|
return error.getLocation().begin.column;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue