New lexer method

This commit is contained in:
Kampfkarren 2022-08-17 09:45:12 -07:00
parent 7a437aad17
commit 8bb8f47325
5 changed files with 95 additions and 34 deletions

View file

@ -64,6 +64,8 @@ struct Lexeme
InterpStringBegin,
InterpStringMid,
InterpStringEnd,
// An interpolated string with no expressions (like `x`)
InterpStringSimple,
AddAssign,
SubAssign,
@ -173,8 +175,6 @@ public:
void setSkipComments(bool skip);
void setReadNames(bool read);
const Lexeme& nextInterpolatedString();
const Location& previousLocation() const
{
return prevLocation;
@ -244,6 +244,14 @@ private:
bool skipComments;
bool readNames;
enum BraceType
{
InterpolatedString,
Normal
};
std::vector<BraceType> braceStack;
};
inline bool isSpace(char ch)

View file

@ -97,6 +97,7 @@ Lexeme::Lexeme(const Location& location, Type type, const char* data, size_t siz
|| type == InterpStringBegin
|| type == InterpStringMid
|| type == InterpStringEnd
|| type == InterpStringSimple
|| type == Number
|| type == Comment
|| type == BlockComment
@ -171,6 +172,18 @@ std::string Lexeme::toString() const
case QuotedString:
return data ? format("\"%.*s\"", length, data) : "string";
case InterpStringBegin:
return data ? format("the beginning of an interpolated string (`%.*s`)", length, data) : "the beginning of an interpolated string";
case InterpStringMid:
return data ? format("the middle of an interpolated string (`%.*s`)", length, data) : "the middle of an interpolated string";
case InterpStringEnd:
return data ? format("the end of an interpolated string (`%.*s`)", length, data) : "the end of an interpolated string";
case InterpStringSimple:
return data ? format("`%.*s`", length, data) : "interpolated string";
case Number:
return data ? format("'%.*s'", length, data) : "number";
@ -587,14 +600,6 @@ Lexeme Lexer::readQuotedString()
return Lexeme(Location(start, position()), Lexeme::QuotedString, &buffer[startOffset], offset - startOffset - 1);
}
const Lexeme& Lexer::nextInterpolatedString()
{
Position start = position();
lexeme = readInterpolatedStringSection(start, Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
return lexeme;
}
Lexeme Lexer::readInterpolatedStringBegin()
{
LUAU_ASSERT(peekch() == '`');
@ -602,7 +607,7 @@ Lexeme Lexer::readInterpolatedStringBegin()
Position start = position();
consume();
return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringEnd);
return readInterpolatedStringSection(start, Lexeme::InterpStringBegin, Lexeme::InterpStringSimple);
}
Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType)
@ -616,6 +621,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
case 0:
case '\r':
case '\n':
// INTERP TODO: Clear anything we've added to the brace stack, and write a test to see what happens if we don't
return Lexeme(Location(start, position()), Lexeme::BrokenString);
case '\\':
@ -629,6 +635,7 @@ Lexeme Lexer::readInterpolatedStringSection(Position start, Lexeme::Type formatT
return Lexeme(Location(start, position()), Lexeme::BrokenInterpDoubleBrace);
}
braceStack.push_back(BraceType::InterpolatedString);
Lexeme lexemeOutput(Location(start, position()), Lexeme::InterpStringBegin, &buffer[startOffset], offset - startOffset);
consume();
return lexemeOutput;
@ -735,6 +742,34 @@ Lexeme Lexer::readNext()
}
}
case '{':
{
consume();
braceStack.push_back(BraceType::Normal);
return Lexeme(Location(start, 1), '{');
}
case '}':
{
consume();
if (braceStack.empty())
{
return Lexeme(Location(start, 1), '}');
}
const BraceType braceStackTop = braceStack.back();
braceStack.pop_back();
if (braceStackTop != BraceType::InterpolatedString)
{
return Lexeme(Location(start, 1), '}');
}
return readInterpolatedStringSection(position(), Lexeme::InterpStringMid, Lexeme::InterpStringEnd);
}
case '=':
{
consume();
@ -901,8 +936,6 @@ Lexeme Lexer::readNext()
case '(':
case ')':
case '{':
case '}':
case ']':
case ';':
case ',':

View file

@ -1573,7 +1573,7 @@ AstTypeOrPack Parser::parseSimpleTypeAnnotation(bool allowPack)
else
return {reportTypeAnnotationError(begin, {}, /*isMissing*/ false, "String literal contains malformed escape sequence")};
}
else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd)
else if (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple)
{
parseInterpString();
@ -2014,7 +2014,7 @@ AstExpr* Parser::parsePrimaryExpr(bool asStatement)
|| lexer.current().type == Lexeme::RawString
|| lexer.current().type == Lexeme::QuotedString
|| lexer.current().type == Lexeme::InterpStringBegin
|| lexer.current().type == Lexeme::InterpStringEnd
|| lexer.current().type == Lexeme::InterpStringSimple
)
{
expr = parseFunctionArgs(expr, false, Location());
@ -2286,7 +2286,7 @@ AstExpr* Parser::parseSimpleExpr()
}
}
}
else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringEnd))
else if (lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::QuotedString || (FFlag::LuauInterpolatedStringBaseSupport && lexer.current().type == Lexeme::InterpStringSimple))
{
return parseString();
}
@ -2375,7 +2375,7 @@ AstExpr* Parser::parseFunctionArgs(AstExpr* func, bool self, const Location& sel
return allocator.alloc<AstExprCall>(Location(func->location, expr->location), func, copy(&expr, 1), self, argLocation);
}
else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringEnd))
else if (FFlag::LuauInterpolatedStringBaseSupport && (lexer.current().type == Lexeme::InterpStringBegin || lexer.current().type == Lexeme::InterpStringSimple))
{
Position argStart = lexer.current().location.end;
AstExpr* expr = parseInterpString();
@ -2694,11 +2694,11 @@ AstArray<AstTypeOrPack> Parser::parseTypeParams()
std::optional<AstArray<char>> Parser::parseCharArray()
{
LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringEnd);
LUAU_ASSERT(lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::RawString || lexer.current().type == Lexeme::InterpStringSimple);
scratchData.assign(lexer.current().data, lexer.current().length);
if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringEnd)
if (lexer.current().type == Lexeme::QuotedString || lexer.current().type == Lexeme::InterpStringSimple)
{
if (!Lexer::fixupQuotedString(scratchData))
{
@ -2734,7 +2734,12 @@ AstExpr* Parser::parseInterpString()
do {
Lexeme currentLexeme = lexer.current();
LUAU_ASSERT(currentLexeme.type == Lexeme::InterpStringBegin || currentLexeme.type == Lexeme::InterpStringMid || currentLexeme.type == Lexeme::InterpStringEnd);
LUAU_ASSERT(
currentLexeme.type == Lexeme::InterpStringBegin
|| currentLexeme.type == Lexeme::InterpStringMid
|| currentLexeme.type == Lexeme::InterpStringEnd
|| currentLexeme.type == Lexeme::InterpStringSimple
);
Location location = currentLexeme.location;
@ -2754,7 +2759,7 @@ AstExpr* Parser::parseInterpString()
strings.push_back(chars);
if (currentLexeme.type == Lexeme::InterpStringEnd)
if (currentLexeme.type == Lexeme::InterpStringEnd || currentLexeme.type == Lexeme::InterpStringSimple)
{
AstArray<AstArray<char>> stringsArray = copy(strings);
AstArray<AstExpr*> expressionsArray = copy(expressions);
@ -2764,24 +2769,20 @@ AstExpr* Parser::parseInterpString()
AstExpr* expression = parseExpr();
// INTERP CODE REVIEW: I want to use expectMatchAndConsume, but using that
// consumes the rest of the string, not the `}`
if (lexer.current().type != static_cast<Lexeme::Type>(static_cast<unsigned char>('}'))) {
return reportExprError(startOfBrace, {}, "Expected '}' after interpolated string expression");
}
expressions.push_back(expression);
Lexeme next = lexer.nextInterpolatedString();
switch (next.type)
switch (lexer.current().type)
{
case Lexeme::BrokenString:
return reportExprError(location, {}, "Malformed interpolated string");
case Lexeme::InterpStringBegin:
case Lexeme::InterpStringMid:
case Lexeme::InterpStringEnd:
break;
case Lexeme::BrokenInterpDoubleBrace:
return reportExprError(location, {}, ERROR_INVALID_INTERP_DOUBLE_BRACE);
case Lexeme::BrokenString:
return reportExprError(location, {}, "Malformed interpolated string, did you forget to add a '}'?");
default:
break;
return reportExprError(location, {}, "Malformed interpolated string, got %s", lexer.current().toString().c_str());
}
} while (true);
}

View file

@ -138,4 +138,23 @@ TEST_CASE("lookahead")
CHECK_EQ(lexer.lookahead().type, Lexeme::Eof);
}
TEST_CASE("stringInterpolation")
{
ScopedFastFlag sff{"LuauInterpolatedStringBaseSupport", true};
const std::string testInput = R"(`foo {"bar"}`)";
Luau::Allocator alloc;
AstNameTable table(alloc);
Lexer lexer(testInput.c_str(), testInput.size(), table);
Lexeme interpBegin = lexer.next();
CHECK_EQ(interpBegin.type, Lexeme::InterpStringBegin);
Lexeme quote = lexer.next();
CHECK_EQ(quote.type, Lexeme::QuotedString);
Lexeme interpEnd = lexer.next();
CHECK_EQ(interpEnd.type, Lexeme::InterpStringEnd);
}
TEST_SUITE_END();

View file

@ -954,7 +954,7 @@ TEST_CASE_FIXTURE(Fixture, "parse_interpolated_string_without_end_brace")
catch (const ParseErrors& e)
{
auto error = e.getErrors().front();
CHECK_EQ("Expected '}' after interpolated string expression", error.getMessage());
CHECK_EQ("Malformed interpolated string, did you forget to add a '}'?", error.getMessage());
return error.getLocation().begin.column;
}
};