New lexer method

This commit is contained in:
Kampfkarren 2022-08-17 09:45:12 -07:00
parent 7a437aad17
commit 8bb8f47325
5 changed files with 95 additions and 34 deletions

View file

@ -64,6 +64,8 @@ struct Lexeme
InterpStringBegin, InterpStringBegin,
InterpStringMid, InterpStringMid,
InterpStringEnd, InterpStringEnd,
// An interpolated string with no expressions (like `x`)
InterpStringSimple,
AddAssign, AddAssign,
SubAssign, SubAssign,
@ -173,8 +175,6 @@ public:
void setSkipComments(bool skip); void setSkipComments(bool skip);
void setReadNames(bool read); void setReadNames(bool read);
const Lexeme& nextInterpolatedString();
const Location& previousLocation() const const Location& previousLocation() const
{ {
return prevLocation; return prevLocation;
@ -244,6 +244,14 @@ private:
bool skipComments; bool skipComments;
bool readNames; bool readNames;
enum BraceType
{
InterpolatedString,
Normal
};
std::vector<BraceType> braceStack;
}; };
inline bool isSpace(char ch) inline bool isSpace(char ch)

View file

@ -97,6 +97,7 @@ Lexeme::Lexeme(const Location& location, Type type, const char* data, size_t siz
|| type == InterpStringBegin || type == InterpStringBegin
|| type == InterpStringMid || type == InterpStringMid
|| type == InterpStringEnd || type == InterpStringEnd
|| type == InterpStringSimple
|| type == Number || type == Number
|| type == Comment || type == Comment
|| type == BlockComment || type == BlockComment
@ -171,6 +172,18 @@ std::string Lexeme::toString() const
case QuotedString: case QuotedString:
return data ? format("\"%.*s\"", length, data) : "string"; return data ? format("\"%.*s\"", length, data) : "string";
case InterpStringBegin:
return data ? format("the beginning of an interpolated string (`%.*s`)", length, data) : "the beginning of an interpolated string";
case InterpStringMid:
return data ? format("the middle of an interpolated string (`%.*s`)", length, data) : "the middle of an interpolated string";
case InterpStringEnd:
return data ? format("the end of an interpolated string (`%.*s`)", length, data) : "the end of an interpolated string";
case InterpStringSimple:
return data ? format("`%.*s`", length, data) : "interpolated string";
case Number: case Number:
return data ? format("'%.*s'", length, data) : "number"; return data ? format("'%.*s'", length, data) : "number";
@ -587,14 +600,6 @@ Lexeme Lexer::readQuotedString()
return Lexeme(Location(start, position()), Lexeme::QuotedString, &buffer[startOffset], offset - startOffset - 1); return Lexeme(Location(start, position()), Lexeme::QuotedString, &buffer[startOffset], offset - startOffset - 1);
} }
const Lexeme& Lexer::nextInterpolatedString()
{
Position start = position();
lexeme = readInterpolatedStringSection(start, Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
return lexeme;
}
Lexeme Lexer::readInterpolatedStringBegin() Lexeme Lexer::readInterpolatedStringBegin()
{ {
LUAU_ASSERT(peekch() == '`'); LUAU_ASSERT(peekch() == '`');
@ -602,7 +607,7 @@ Lexeme Lexer::readInterpolatedStringBegin()
Position start = position(); Position start = position();
consume(); consume();
return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringEnd); return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringSimple);
} }
Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType) Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType)
@ -616,6 +621,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
case 0: case 0:
case '\r': case '\r':
case '\n': case '\n':
// INTERP TODO: Clear anything we've added to the brace stack, and write a test to see what happens if we don't
return Lexeme(Location(start, position()), Lexeme::BrokenString); return Lexeme(Location(start, position()), Lexeme::BrokenString);
case '\\': case '\\':
@ -629,6 +635,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
return Lexeme(Location(start, position()), Lexeme::BrokenInterpDoubleBrace); return Lexeme(Location(start, position()), Lexeme::BrokenInterpDoubleBrace);
} }
braceStack.push_back(BraceType::InterpolatedString);
Lexeme lexemeOutput(Location(start, position()), Lexeme::InterpStringBegin, &buffer[startOffset], offset - startOffset); Lexeme lexemeOutput(Location(start, position()), Lexeme::InterpStringBegin, &buffer[startOffset], offset - startOffset);
consume(); consume();
return lexemeOutput; return lexemeOutput;
@ -735,6 +742,34 @@ Lexeme Lexer::readNext()
} }
} }
case '{':
{
consume();
braceStack.push_back(BraceType::Normal);
return Lexeme(Location(start, 1), '{');
}
case '}':
{
consume();
if (braceStack.empty())
{
return Lexeme(Location(start, 1), '}');
}
const BraceType braceStackTop = braceStack.back();
braceStack.pop_back();
if (braceStackTop != BraceType::InterpolatedString)
{
return Lexeme(Location(start, 1), '}');
}
return readInterpolatedStringSection(position(), Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
}
case '=': case '=':
{ {
consume(); consume();
@ -901,8 +936,6 @@ Lexeme Lexer::readNext()
case '(': case '(':
case ')': case ')':
case '{':
case '}':
case ']': case ']':
case ';': case ';':
case ',': case ',':

View file

@ -1573,7 +1573,7 @@ AstTypeOrPack Parser::parseSimpleTypeAnnotation(bool allowPack)
else else
return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "String literal contains malformed escape sequence")}; return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "String literal contains malformed escape sequence")};
} }
else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd) else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple)
{ {
parseInterpString(); parseInterpString();
@ -2014,7 +2014,7 @@ AstExpr* Parser::parsePrimaryExpr(bool asStatement)
|| lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::RawString
|| lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::QuotedString
|| lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringBegin
|| lexer.current().type == Lexeme::InterpStringEnd || lexer.current().type == Lexeme::InterpStringSimple
) )
{ {
expr = parseFunctionArgs(expr, false, Location()); expr = parseFunctionArgs(expr, false, Location());
@ -2286,7 +2286,7 @@ AstExpr* Parser::parseSimpleExpr()
} }
} }
} }
else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringEnd)) else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringSimple))
{ {
return parseString(); return parseString();
} }
@ -2375,7 +2375,7 @@ AstExpr* Parser::parseFunctionArgs(AstExpr* func, bool self, const Location& sel
return allocator.alloc<AstExprCall>(Location(func->location, expr->location), func, copy(&expr, 1), self, argLocation); return allocator.alloc<AstExprCall>(Location(func->location, expr->location), func, copy(&expr, 1), self, argLocation);
} }
else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd)) else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple))
{ {
Position argStart = lexer.current().location.end; Position argStart = lexer.current().location.end;
AstExpr* expr = parseInterpString(); AstExpr* expr = parseInterpString();
@ -2694,11 +2694,11 @@ AstArray<AstTypeOrPack> Parser::parseTypeParams()
std::optional<AstArray<char>> Parser::parseCharArray() std::optional<AstArray<char>> Parser::parseCharArray()
{ {
LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringEnd); LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringSimple);
scratchData.assign(lexer.current().data, lexer.current().length); scratchData.assign(lexer.current().data, lexer.current().length);
if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringEnd) if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringSimple)
{ {
if (!Lexer::fixupQuotedString(scratchData)) if (!Lexer::fixupQuotedString(scratchData))
{ {
@ -2734,7 +2734,12 @@ AstExpr* Parser::parseInterpString()
do { do {
Lexeme currentLexeme = lexer.current(); Lexeme currentLexeme = lexer.current();
LUAU_ASSERT(currentLexeme.type == Lexeme::InterpStringBegin || currentLexeme.type == Lexeme::InterpStringMid || currentLexeme.type == Lexeme::InterpStringEnd); LUAU_ASSERT(
currentLexeme.type == Lexeme::InterpStringBegin
|| currentLexeme.type == Lexeme::InterpStringMid
|| currentLexeme.type == Lexeme::InterpStringEnd
|| currentLexeme.type == Lexeme::InterpStringSimple
);
Location location = currentLexeme.location; Location location = currentLexeme.location;
@ -2754,7 +2759,7 @@ AstExpr* Parser::parseInterpString()
strings.push_back(chars); strings.push_back(chars);
if (currentLexeme.type == Lexeme::InterpStringEnd) if (currentLexeme.type == Lexeme::InterpStringEnd || currentLexeme.type == Lexeme::InterpStringSimple)
{ {
AstArray<AstArray<char>> stringsArray = copy(strings); AstArray<AstArray<char>> stringsArray = copy(strings);
AstArray<AstExpr*> expressionsArray = copy(expressions); AstArray<AstExpr*> expressionsArray = copy(expressions);
@ -2764,24 +2769,20 @@ AstExpr* Parser::parseInterpString()
AstExpr* expression = parseExpr(); AstExpr* expression = parseExpr();
// INTERP CODE REVIEW: I want to use expectMatchAndConsume, but using that
// consumes the rest of the string, not the `}`
if (lexer.current().type != static_cast<Lexeme::Type>(static_cast<unsigned char>('}'))) {
return reportExprError(startOfBrace, {}, "Expected '}' after interpolated string expression");
}
expressions.push_back(expression); expressions.push_back(expression);
Lexeme next = lexer.nextInterpolatedString(); switch (lexer.current().type)
switch (next.type)
{ {
case Lexeme::BrokenString: case Lexeme::InterpStringBegin:
return reportExprError(location, {}, "Malformed interpolated string"); case Lexeme::InterpStringMid:
case Lexeme::InterpStringEnd:
break;
case Lexeme::BrokenInterpDoubleBrace: case Lexeme::BrokenInterpDoubleBrace:
return reportExprError(location, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE); return reportExprError(location, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE);
case Lexeme::BrokenString:
return reportExprError(location, {}, "Malformed interpolated string, did you forget to add a '}'?");
default: default:
break; return reportExprError(location, {}, "Malformed interpolated string, got %s", lexer.current().toString().c_str());
} }
} while (true); } while (true);
} }

View file

@ -138,4 +138,23 @@ TEST_CASE("lookahead")
CHECK_EQ(lexer.lookahead().type, Lexeme::Eof); CHECK_EQ(lexer.lookahead().type, Lexeme::Eof);
} }
TEST_CASE("stringInterpolation")
{
ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true};
const std::string testInput = R"(`foo {"bar"}`)";
Luau::Allocator alloc;
AstNameTable table(alloc);
Lexer lexer(testInput.c_str(), testInput.size(), table);
Lexeme interpBegin = lexer.next();
CHECK_EQ(interpBegin.type, Lexeme::InterpStringBegin);
Lexeme quote = lexer.next();
CHECK_EQ(quote.type, Lexeme::QuotedString);
Lexeme interpEnd = lexer.next();
CHECK_EQ(interpEnd.type, Lexeme::InterpStringEnd);
}
TEST_SUITE_END(); TEST_SUITE_END();

View file

@ -954,7 +954,7 @@ TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_end_brace")
catch (const ParseErrors& e) catch (const ParseErrors& e)
{ {
auto error = e.getErrors().front(); auto error = e.getErrors().front();
CHECK_EQ("Expected '}' after interpolated string expression", error.getMessage()); CHECK_EQ("Malformed interpolated string, did you forget to add a '}'?", error.getMessage());
return error.getLocation().begin.column; return error.getLocation().begin.column;
} }
}; };