mirror of
https://github.com/luau-lang/luau.git
synced 2025-05-04 10:33:46 +01:00
New lexer method
This commit is contained in:
parent
7a437aad17
commit
8bb8f47325
5 changed files with 95 additions and 34 deletions
|
@ -64,6 +64,8 @@ struct Lexeme
|
|||
InterpStringBegin,
|
||||
InterpStringMid,
|
||||
InterpStringEnd,
|
||||
// An interpolated string with no expressions (like `x`)
|
||||
InterpStringSimple,
|
||||
|
||||
AddAssign,
|
||||
SubAssign,
|
||||
|
@ -173,8 +175,6 @@ public:
|
|||
void setSkipComments(bool skip);
|
||||
void setReadNames(bool read);
|
||||
|
||||
const Lexeme& nextInterpolatedString();
|
||||
|
||||
const Location& previousLocation() const
|
||||
{
|
||||
return prevLocation;
|
||||
|
@ -244,6 +244,14 @@ private:
|
|||
|
||||
bool skipComments;
|
||||
bool readNames;
|
||||
|
||||
enum BraceType
|
||||
{
|
||||
InterpolatedString,
|
||||
Normal
|
||||
};
|
||||
|
||||
std::vector<BraceType> braceStack;
|
||||
};
|
||||
|
||||
inline bool isSpace(char ch)
|
||||
|
|
|
@ -97,6 +97,7 @@ Lexeme::Lexeme(const Location& location, Type type, const char* data, size_t siz
|
|||
|| type == InterpStringBegin
|
||||
|| type == InterpStringMid
|
||||
|| type == InterpStringEnd
|
||||
|| type == InterpStringSimple
|
||||
|| type == Number
|
||||
|| type == Comment
|
||||
|| type == BlockComment
|
||||
|
@ -171,6 +172,18 @@ std::string Lexeme::toString() const
|
|||
case QuotedString:
|
||||
return data ? format("\"%.*s\"", length, data) : "string";
|
||||
|
||||
case InterpStringBegin:
|
||||
return data ? format("the beginning of an interpolated string (`%.*s`)", length, data) : "the beginning of an interpolated string";
|
||||
|
||||
case InterpStringMid:
|
||||
return data ? format("the middle of an interpolated string (`%.*s`)", length, data) : "the middle of an interpolated string";
|
||||
|
||||
case InterpStringEnd:
|
||||
return data ? format("the end of an interpolated string (`%.*s`)", length, data) : "the end of an interpolated string";
|
||||
|
||||
case InterpStringSimple:
|
||||
return data ? format("`%.*s`", length, data) : "interpolated string";
|
||||
|
||||
case Number:
|
||||
return data ? format("'%.*s'", length, data) : "number";
|
||||
|
||||
|
@ -587,14 +600,6 @@ Lexeme Lexer::readQuotedString()
|
|||
return Lexeme(Location(start, position()), Lexeme::QuotedString, &buffer[startOffset], offset - startOffset - 1);
|
||||
}
|
||||
|
||||
const Lexeme& Lexer::nextInterpolatedString()
|
||||
{
|
||||
Position start = position();
|
||||
|
||||
lexeme = readInterpolatedStringSection(start, Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
|
||||
return lexeme;
|
||||
}
|
||||
|
||||
Lexeme Lexer::readInterpolatedStringBegin()
|
||||
{
|
||||
LUAU_ASSERT(peekch() == '`');
|
||||
|
@ -602,7 +607,7 @@ Lexeme Lexer::readInterpolatedStringBegin()
|
|||
Position start = position();
|
||||
consume();
|
||||
|
||||
return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringEnd);
|
||||
return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringSimple);
|
||||
}
|
||||
|
||||
Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType)
|
||||
|
@ -616,6 +621,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
|
|||
case 0:
|
||||
case '\r':
|
||||
case '\n':
|
||||
// INTERP TODO: Clear anything we've added to the brace stack, and write a test to see what happens if we don't
|
||||
return Lexeme(Location(start, position()), Lexeme::BrokenString);
|
||||
|
||||
case '\\':
|
||||
|
@ -629,6 +635,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
|
|||
return Lexeme(Location(start, position()), Lexeme::BrokenInterpDoubleBrace);
|
||||
}
|
||||
|
||||
braceStack.push_back(BraceType::InterpolatedString);
|
||||
Lexeme lexemeOutput(Location(start, position()), Lexeme::InterpStringBegin, &buffer[startOffset], offset - startOffset);
|
||||
consume();
|
||||
return lexemeOutput;
|
||||
|
@ -735,6 +742,34 @@ Lexeme Lexer::readNext()
|
|||
}
|
||||
}
|
||||
|
||||
case '{':
|
||||
{
|
||||
consume();
|
||||
|
||||
braceStack.push_back(BraceType::Normal);
|
||||
return Lexeme(Location(start, 1), '{');
|
||||
}
|
||||
|
||||
case '}':
|
||||
{
|
||||
consume();
|
||||
|
||||
if (braceStack.empty())
|
||||
{
|
||||
return Lexeme(Location(start, 1), '}');
|
||||
}
|
||||
|
||||
const BraceType braceStackTop = braceStack.back();
|
||||
braceStack.pop_back();
|
||||
|
||||
if (braceStackTop != BraceType::InterpolatedString)
|
||||
{
|
||||
return Lexeme(Location(start, 1), '}');
|
||||
}
|
||||
|
||||
return readInterpolatedStringSection(position(), Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
|
||||
}
|
||||
|
||||
case '=':
|
||||
{
|
||||
consume();
|
||||
|
@ -901,8 +936,6 @@ Lexeme Lexer::readNext()
|
|||
|
||||
case '(':
|
||||
case ')':
|
||||
case '{':
|
||||
case '}':
|
||||
case ']':
|
||||
case ';':
|
||||
case ',':
|
||||
|
|
|
@ -1573,7 +1573,7 @@ AstTypeOrPack Parser::parseSimpleTypeAnnotation(bool allowPack)
|
|||
else
|
||||
return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "String literal contains malformed escape sequence")};
|
||||
}
|
||||
else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd)
|
||||
else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple)
|
||||
{
|
||||
parseInterpString();
|
||||
|
||||
|
@ -2014,7 +2014,7 @@ AstExpr* Parser::parsePrimaryExpr(bool asStatement)
|
|||
|| lexer.current().type == Lexeme::RawString
|
||||
|| lexer.current().type == Lexeme::QuotedString
|
||||
|| lexer.current().type == Lexeme::InterpStringBegin
|
||||
|| lexer.current().type == Lexeme::InterpStringEnd
|
||||
|| lexer.current().type == Lexeme::InterpStringSimple
|
||||
)
|
||||
{
|
||||
expr = parseFunctionArgs(expr, false, Location());
|
||||
|
@ -2286,7 +2286,7 @@ AstExpr* Parser::parseSimpleExpr()
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringEnd))
|
||||
else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringSimple))
|
||||
{
|
||||
return parseString();
|
||||
}
|
||||
|
@ -2375,7 +2375,7 @@ AstExpr* Parser::parseFunctionArgs(AstExpr* func, bool self, const Location& sel
|
|||
|
||||
return allocator.alloc<AstExprCall>(Location(func->location, expr->location), func, copy(&expr, 1), self, argLocation);
|
||||
}
|
||||
else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd))
|
||||
else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple))
|
||||
{
|
||||
Position argStart = lexer.current().location.end;
|
||||
AstExpr* expr = parseInterpString();
|
||||
|
@ -2694,11 +2694,11 @@ AstArray<AstTypeOrPack> Parser::parseTypeParams()
|
|||
|
||||
std::optional<AstArray<char>> Parser::parseCharArray()
|
||||
{
|
||||
LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringEnd);
|
||||
LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringSimple);
|
||||
|
||||
scratchData.assign(lexer.current().data, lexer.current().length);
|
||||
|
||||
if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringEnd)
|
||||
if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringSimple)
|
||||
{
|
||||
if (!Lexer::fixupQuotedString(scratchData))
|
||||
{
|
||||
|
@ -2734,7 +2734,12 @@ AstExpr* Parser::parseInterpString()
|
|||
|
||||
do {
|
||||
Lexeme currentLexeme = lexer.current();
|
||||
LUAU_ASSERT(currentLexeme.type == Lexeme::InterpStringBegin || currentLexeme.type == Lexeme::InterpStringMid || currentLexeme.type == Lexeme::InterpStringEnd);
|
||||
LUAU_ASSERT(
|
||||
currentLexeme.type == Lexeme::InterpStringBegin
|
||||
|| currentLexeme.type == Lexeme::InterpStringMid
|
||||
|| currentLexeme.type == Lexeme::InterpStringEnd
|
||||
|| currentLexeme.type == Lexeme::InterpStringSimple
|
||||
);
|
||||
|
||||
Location location = currentLexeme.location;
|
||||
|
||||
|
@ -2754,7 +2759,7 @@ AstExpr* Parser::parseInterpString()
|
|||
|
||||
strings.push_back(chars);
|
||||
|
||||
if (currentLexeme.type == Lexeme::InterpStringEnd)
|
||||
if (currentLexeme.type == Lexeme::InterpStringEnd || currentLexeme.type == Lexeme::InterpStringSimple)
|
||||
{
|
||||
AstArray<AstArray<char>> stringsArray = copy(strings);
|
||||
AstArray<AstExpr*> expressionsArray = copy(expressions);
|
||||
|
@ -2764,24 +2769,20 @@ AstExpr* Parser::parseInterpString()
|
|||
|
||||
AstExpr* expression = parseExpr();
|
||||
|
||||
// INTERP CODE REVIEW: I want to use expectMatchAndConsume, but using that
|
||||
// consumes the rest of the string, not the `}`
|
||||
if (lexer.current().type != static_cast<Lexeme::Type>(static_cast<unsigned char>('}'))) {
|
||||
return reportExprError(startOfBrace, {}, "Expected '}' after interpolated string expression");
|
||||
}
|
||||
|
||||
expressions.push_back(expression);
|
||||
|
||||
Lexeme next = lexer.nextInterpolatedString();
|
||||
|
||||
switch (next.type)
|
||||
switch (lexer.current().type)
|
||||
{
|
||||
case Lexeme::BrokenString:
|
||||
return reportExprError(location, {}, "Malformed interpolated string");
|
||||
case Lexeme::InterpStringBegin:
|
||||
case Lexeme::InterpStringMid:
|
||||
case Lexeme::InterpStringEnd:
|
||||
break;
|
||||
case Lexeme::BrokenInterpDoubleBrace:
|
||||
return reportExprError(location, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE);
|
||||
case Lexeme::BrokenString:
|
||||
return reportExprError(location, {}, "Malformed interpolated string, did you forget to add a '}'?");
|
||||
default:
|
||||
break;
|
||||
return reportExprError(location, {}, "Malformed interpolated string, got %s", lexer.current().toString().c_str());
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
|
|
|
@ -138,4 +138,23 @@ TEST_CASE("lookahead")
|
|||
CHECK_EQ(lexer.lookahead().type, Lexeme::Eof);
|
||||
}
|
||||
|
||||
TEST_CASE("stringInterpolation")
|
||||
{
|
||||
ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true};
|
||||
|
||||
const std::string testInput = R"(`foo {"bar"}`)";
|
||||
Luau::Allocator alloc;
|
||||
AstNameTable table(alloc);
|
||||
Lexer lexer(testInput.c_str(), testInput.size(), table);
|
||||
|
||||
Lexeme interpBegin = lexer.next();
|
||||
CHECK_EQ(interpBegin.type, Lexeme::InterpStringBegin);
|
||||
|
||||
Lexeme quote = lexer.next();
|
||||
CHECK_EQ(quote.type, Lexeme::QuotedString);
|
||||
|
||||
Lexeme interpEnd = lexer.next();
|
||||
CHECK_EQ(interpEnd.type, Lexeme::InterpStringEnd);
|
||||
}
|
||||
|
||||
TEST_SUITE_END();
|
||||
|
|
|
@ -954,7 +954,7 @@ TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_end_brace")
|
|||
catch (const ParseErrors& e)
|
||||
{
|
||||
auto error = e.getErrors().front();
|
||||
CHECK_EQ("Expected '}' after interpolated string expression", error.getMessage());
|
||||
CHECK_EQ("Malformed interpolated string, did you forget to add a '}'?", error.getMessage());
|
||||
return error.getLocation().begin.column;
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue