XPlor/libs/dsl/parser.cpp

570 lines
18 KiB
C++
Raw Normal View History

2026-01-01 22:18:25 -05:00
#include "parser.h"
#include <stdexcept>
Parser::Parser(Lexer lex) : m_lex(std::move(lex)) {}
Module Parser::parseModule() {
Module m;
while (peek().kind != TokenKind::End) {
TypeDef t = parseTypeDef();
if (m.types.contains(t.name)) {
throw std::runtime_error(("Duplicate type: " + t.name).toStdString());
}
m.types.insert(t.name, std::move(t));
}
return m;
}
UiFlags Parser::parseUiFlags(bool defaultUi)
{
UiFlags f;
f.ui = defaultUi;
if (!match(TokenKind::LBracket))
return f;
while (true) {
Token a = expect(TokenKind::Identifier, "Expected attribute name");
const QString raw = a.text;
const QString s = raw.toLower();
if (s == "ui") f.ui = true;
else if (s == "readonly") f.readOnly = true;
else if (s == "hidden") f.hidden = true;
else if (s == "display") {
expect(TokenKind::Assign, "Expected '=' after display");
Token v = expect(TokenKind::String, "Expected string after display=");
f.display = v.text;
}
else if (s == "table") {
expect(TokenKind::Assign, "Expected '=' after table");
Token v = expect(TokenKind::String, "Expected string after table=");
f.tableTitle = v.text;
}
else if (s == "columns") {
expect(TokenKind::Assign, "Expected '=' after columns");
Token v = expect(TokenKind::String, "Expected string after columns=");
f.columnsCsv = v.text; // parse later into QStringList
}
else if (s.startsWith("format_")) {
// format_assetPtr="hex"
const QString colKey = raw.mid(QString("format_").size()); // preserve case after prefix if you want
expect(TokenKind::Assign, "Expected '=' after format_<col>");
Token v = expect(TokenKind::String, "Expected string after format_<col>=");
f.formats.insert(colKey, v.text.toLower()); // "hex"
}
else {
throw std::runtime_error(("Unknown attribute: " + a.text).toStdString());
}
if (match(TokenKind::Comma)) continue;
break;
}
expect(TokenKind::RBracket, "Expected ']'");
return f;
}
TypeAttrs Parser::parseTypeAttrs()
{
TypeAttrs a;
if (!match(TokenKind::LBracket))
return a;
while (true) {
Token t = expect(TokenKind::Identifier, "Expected type attribute");
const QString s = t.text.toLower();
if (s == "root") a.root = true;
else if (s == "display") {
expect(TokenKind::Assign, "Expected '=' after display");
Token v = expect(TokenKind::String, "Expected string after display=");
a.display = v.text;
} else {
throw std::runtime_error(("Unknown type attribute: " + t.text).toStdString());
}
if (match(TokenKind::Comma)) continue;
break;
}
expect(TokenKind::RBracket, "Expected ']'");
return a;
}
TypeDef Parser::parseTypeDef() {
expect(TokenKind::KwType, "Expected 'type'");
Token nameTok = expect(TokenKind::Identifier, "Expected type name");
TypeDef t;
t.name = nameTok.text;
const TypeAttrs attrs = parseTypeAttrs();
t.isRoot = attrs.root;
t.display = attrs.display;
// optional: byteorder LE|BE
if (match(TokenKind::KwByteOrder)) {
t.hasExplicitByteOrder = true; // <<< add
if (match(TokenKind::KwLE)) t.order = ByteOrder::LE;
else if (match(TokenKind::KwBE)) t.order = ByteOrder::BE;
else throw std::runtime_error("Expected LE or BE after byteorder");
}
expect(TokenKind::LBrace, "Expected '{' after type header");
while (peek().kind == TokenKind::KwCriteria) {
// only allow one criteria block; you can also merge multiple blocks
if (!t.criteria.isEmpty())
throw std::runtime_error("Duplicate criteria block");
t.criteria = parseCriteriaBlock();
}
t.body = parseBlock();
expect(TokenKind::RBrace, "Expected '}' after type body");
return t;
}
QVector<StmtPtr> Parser::parseBlock() {
QVector<StmtPtr> out;
while (peek().kind != TokenKind::RBrace && peek().kind != TokenKind::End) {
out.push_back(parseStatement());
}
return out;
}
StmtPtr Parser::parseStatement() {
switch (peek().kind) {
case TokenKind::KwU8:
case TokenKind::KwU16:
case TokenKind::KwU32:
case TokenKind::KwU64:
case TokenKind::KwI8:
case TokenKind::KwI16:
case TokenKind::KwI32:
case TokenKind::KwI64:
return parseScalarStmt();
case TokenKind::KwSkip:
return parseSkipStmt();
case TokenKind::KwAlign:
return parseAlignStmt();
case TokenKind::KwSeek:
return parseSeekStmt();
case TokenKind::KwIf:
return parseIfStmt();
case TokenKind::KwWhile:
return parseWhileStmt();
case TokenKind::KwRepeat:
return parseRepeatStmt();
case TokenKind::KwFor:
return parseForStmt();
case TokenKind::Identifier:
return parseAssignStmt();
case TokenKind::KwRequire:
return parseRequireStmt();
case TokenKind::KwByteOrder:
return parseByteOrderStmt();
case TokenKind::KwBool:
return parseScalarStmt();
default:
throw std::runtime_error(("Unexpected token in statement: " + peek().text).toStdString());
}
}
ScalarType Parser::scalarFrom(TokenKind k) const {
switch (k) {
case TokenKind::KwU8: return ScalarType::U8;
case TokenKind::KwI8: return ScalarType::I8;
case TokenKind::KwU16: return ScalarType::U16;
case TokenKind::KwI16: return ScalarType::I16;
case TokenKind::KwU32: return ScalarType::U32;
case TokenKind::KwI32: return ScalarType::I32;
case TokenKind::KwU64: return ScalarType::U64;
case TokenKind::KwI64: return ScalarType::I64;
case TokenKind::KwBool: return ScalarType::Bool;
default: break;
}
throw std::runtime_error("Not a scalar token");
}
StmtPtr Parser::parseScalarStmt() {
Token tTok = m_lex.next();
Token nameTok = expect(TokenKind::Identifier, "Expected variable name after scalar");
UiFlags flags = parseUiFlags(false);
expect(TokenKind::Semicolon, "Expected ';' after scalar statement");
auto s = QSharedPointer<Stmt>::create();
s->node = Stmt::ReadScalar{ scalarFrom(tTok.kind), nameTok.text, flags };
return s;
}
StmtPtr Parser::parseSkipStmt() {
Token kw = expect(TokenKind::KwSkip, "Expected skip");
ExprPtr e = parseExpr();
expect(TokenKind::Semicolon, "Expected ';' after skip");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::Skip{ std::move(e) };
return s;
}
StmtPtr Parser::parseAlignStmt() {
Token kw = expect(TokenKind::KwAlign, "Expected align");
expect(TokenKind::LParen, "Expected '(' after align");
ExprPtr n = parseExpr();
expect(TokenKind::RParen, "Expected ')' after align(arg)");
expect(TokenKind::Semicolon, "Expected ';' after align");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::Align{ std::move(n) };
return s;
}
StmtPtr Parser::parseSeekStmt() {
Token kw = expect(TokenKind::KwSeek, "Expected seek");
expect(TokenKind::LParen, "Expected '(' after seek");
ExprPtr p = parseExpr();
expect(TokenKind::RParen, "Expected ')' after seek(arg)");
expect(TokenKind::Semicolon, "Expected ';' after seek");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::Seek{ std::move(p) };
return s;
}
StmtPtr Parser::parseAssignStmt() {
Token nameTok = expect(TokenKind::Identifier, "Expected identifier");
// Check if this is a function call statement or an assignment
if (peek().kind == TokenKind::LParen) {
// Function call statement: functionName(args);
// Parse function arguments
expect(TokenKind::LParen, "Expected '('");
QVector<ExprPtr> args;
if (peek().kind != TokenKind::RParen) {
args.push_back(parseExpr());
while (peek().kind == TokenKind::Comma) {
m_lex.next(); // consume comma
args.push_back(parseExpr());
}
}
expect(TokenKind::RParen, "Expected ')'");
expect(TokenKind::Semicolon, "Expected ';' after function call");
// Create function call expression
auto callExpr = QSharedPointer<Expr>::create();
callExpr->node = Expr::Call{ nameTok.text, std::move(args) };
auto s = QSharedPointer<Stmt>::create();
s->node = Stmt::CallStmt{ std::move(callExpr) };
return s;
}
// Regular assignment: name = expr;
Token eq = expect(TokenKind::Assign, "Expected '='");
ExprPtr e = parseExpr();
UiFlags flags = parseUiFlags(false);
expect(TokenKind::Semicolon, "Expected ';' after assignment");
auto s = QSharedPointer<Stmt>::create();
s->node = Stmt::Assign{ nameTok.text, std::move(e), flags };
return s;
}
QVector<StmtPtr> Parser::parseCriteriaBlock()
{
expect(TokenKind::KwCriteria, "Expected criteria");
expect(TokenKind::LBrace, "Expected '{' after criteria");
QVector<StmtPtr> blk = parseBlock(); // allow u16, if, seek, assign, etc.
expect(TokenKind::RBrace, "Expected '}' after criteria block");
return blk;
}
StmtPtr Parser::parseRequireStmt()
{
Token kw = expect(TokenKind::KwRequire, "Expected require");
ExprPtr cond = parseExpr();
expect(TokenKind::Semicolon, "Expected ';' after require");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::Require{ std::move(cond) };
return s;
}
StmtPtr Parser::parseIfStmt() {
Token kw = expect(TokenKind::KwIf, "Expected if");
expect(TokenKind::LParen, "Expected '(' after if");
ExprPtr cond = parseExpr();
expect(TokenKind::RParen, "Expected ')' after if condition");
expect(TokenKind::LBrace, "Expected '{' after if");
QVector<StmtPtr> thenBody = parseBlock();
expect(TokenKind::RBrace, "Expected '}' after if body");
QVector<StmtPtr> elseBody;
if (match(TokenKind::KwElse)) {
expect(TokenKind::LBrace, "Expected '{' after else");
elseBody = parseBlock();
expect(TokenKind::RBrace, "Expected '}' after else body");
}
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::If{ std::move(cond), std::move(thenBody), std::move(elseBody) };
return s;
}
StmtPtr Parser::parseWhileStmt() {
Token kw = expect(TokenKind::KwWhile, "Expected while");
expect(TokenKind::LParen, "Expected '(' after while");
ExprPtr cond = parseExpr();
expect(TokenKind::RParen, "Expected ')' after while condition");
expect(TokenKind::LBrace, "Expected '{' after while");
QVector<StmtPtr> body = parseBlock();
expect(TokenKind::RBrace, "Expected '}' after while body");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::While{ std::move(cond), std::move(body) };
return s;
}
StmtPtr Parser::parseRepeatStmt() {
Token kw = expect(TokenKind::KwRepeat, "Expected repeat");
expect(TokenKind::LParen, "Expected '(' after repeat");
ExprPtr count = parseExpr();
expect(TokenKind::RParen, "Expected ')' after repeat(count)");
expect(TokenKind::LBrace, "Expected '{' after repeat");
QVector<StmtPtr> body = parseBlock();
expect(TokenKind::RBrace, "Expected '}' after repeat body");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::Repeat{ std::move(count), std::move(body) };
return s;
}
StmtPtr Parser::parseForStmt() {
Token kw = expect(TokenKind::KwFor, "Expected for");
Token varTok = expect(TokenKind::Identifier, "Expected loop variable name after for");
expect(TokenKind::KwIn, "Expected 'in'");
// parse: start .. end
ExprPtr start = parseExpr();
expect(TokenKind::DotDot, "Expected '..' in for range");
ExprPtr end = parseExpr();
expect(TokenKind::LBrace, "Expected '{' after for range");
QVector<StmtPtr> body = parseBlock();
expect(TokenKind::RBrace, "Expected '}' after for body");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line; s->col = kw.col;
s->node = Stmt::ForRange{ varTok.text, std::move(start), std::move(end), std::move(body) };
return s;
}
// --- Expression parsing (Pratt) ---
int Parser::precedence(const Token& t) const {
switch (t.kind) {
case TokenKind::OrOr: return 1;
case TokenKind::AndAnd: return 2;
case TokenKind::EqEq:
case TokenKind::NotEq: return 3;
case TokenKind::Lt:
case TokenKind::Lte:
case TokenKind::Gt:
case TokenKind::Gte: return 4;
case TokenKind::Bar: return 5;
case TokenKind::Caret: return 6;
case TokenKind::Amp: return 7;
case TokenKind::LShift:
case TokenKind::RShift: return 8;
case TokenKind::Plus:
case TokenKind::Minus: return 9;
case TokenKind::Star:
case TokenKind::Slash:
case TokenKind::Percent:return 10;
default: return 0;
}
}
QString Parser::tokenOpText(const Token& t) const {
return t.text;
}
ExprPtr Parser::parseExpr(int minPrec) {
ExprPtr lhs = parsePrimary();
lhs = parsePostfix(std::move(lhs));
// pipeline binds tighter than binary ops (so parse it right away)
lhs = parsePipeIfAny(std::move(lhs));
while (true) {
const Token& op = peek();
const int prec = precedence(op);
if (prec < minPrec || prec == 0) break;
Token opTok = m_lex.next();
ExprPtr rhs = parseExpr(prec + 1);
auto b = QSharedPointer<Expr>::create();
b->line = opTok.line; b->col = opTok.col;
b->node = Expr::Binary{ opTok.text, std::move(lhs), std::move(rhs) };
lhs = std::move(b);
// allow pipe after a binary expression too (e.g. read(x+1)|zlib)
lhs = parsePipeIfAny(std::move(lhs));
}
return lhs;
}
ExprPtr Parser::parsePrimary() {
Token t = peek();
if (match(TokenKind::Number)) {
auto e = QSharedPointer<Expr>::create();
e->line = t.line; e->col = t.col;
e->node = Expr::Int{ t.number };
return e;
}
if (match(TokenKind::String)) {
auto e = QSharedPointer<Expr>::create();
e->line = t.line; e->col = t.col;
e->node = Expr::String{ t.text };
return e;
}
if (match(TokenKind::KwEOF)) {
// represent EOF as variable-like identifier "EOF" to be used only inside read(...)
auto e = QSharedPointer<Expr>::create();
e->line = t.line; e->col = t.col;
e->node = Expr::Var{ "EOF" };
return e;
}
if (match(TokenKind::Identifier)) {
auto e = QSharedPointer<Expr>::create();
e->line = t.line; e->col = t.col;
e->node = Expr::Var{ t.text };
return e;
}
if (match(TokenKind::LParen)) {
ExprPtr e = parseExpr();
expect(TokenKind::RParen, "Expected ')'");
return e;
}
// unary
if (match(TokenKind::Bang) || match(TokenKind::Minus) || match(TokenKind::Plus)) {
Token op = t;
ExprPtr rhs = parseExpr(11);
auto e = QSharedPointer<Expr>::create();
e->line = op.line; e->col = op.col;
e->node = Expr::Unary{ op.text, std::move(rhs) };
return e;
}
throw std::runtime_error(("Unexpected token in expression: " + t.text).toStdString());
}
ExprPtr Parser::parsePostfix(ExprPtr lhs) {
// function call: var(...)
while (peek().kind == TokenKind::LParen) {
// lhs must be Var for function calls
if (!std::holds_alternative<Expr::Var>(lhs->node)) {
throw std::runtime_error("Only identifiers can be called as functions");
}
const QString fn = std::get<Expr::Var>(lhs->node).name;
expect(TokenKind::LParen, "Expected '('");
QVector<ExprPtr> args;
if (peek().kind != TokenKind::RParen) {
while (true) {
args.push_back(parseExpr());
if (match(TokenKind::Comma)) continue;
break;
}
}
expect(TokenKind::RParen, "Expected ')' after args");
auto call = QSharedPointer<Expr>::create();
call->line = lhs->line; call->col = lhs->col;
call->node = Expr::Call{ fn, std::move(args) };
lhs = std::move(call);
}
return lhs;
}
ExprPtr Parser::parsePipeIfAny(ExprPtr base) {
if (peek().kind != TokenKind::Pipe) return base;
auto p = QSharedPointer<Expr>::create();
p->line = base->line; p->col = base->col;
Expr::Pipe pipe;
pipe.base = std::move(base);
while (match(TokenKind::Pipe)) {
// stage can be: identifier (e.g. zlib)
// or: parse <TypeName> (as two identifiers "parse" "zonefile")
Token stage = peek();
if (match(TokenKind::Identifier)) {
Expr::Call c;
c.fn = stage.text;
// special case: "parse <TypeName>"
if (c.fn == "parse") {
Token typeTok = expect(TokenKind::Identifier, "Expected type name after 'parse' in pipeline");
c.args.push_back(QSharedPointer<Expr>::create(Expr{ Expr::String{ typeTok.text }, typeTok.line, typeTok.col }));
}
pipe.stages.push_back(std::move(c));
continue;
}
throw std::runtime_error("Expected stage identifier after '|'");
}
p->node = std::move(pipe);
return p;
}
StmtPtr Parser::parseByteOrderStmt() {
Token kw = expect(TokenKind::KwByteOrder, "Expected byteorder");
ByteOrder bo;
if (match(TokenKind::KwLE))
bo = ByteOrder::LE;
else if (match(TokenKind::KwBE))
bo = ByteOrder::BE;
else
throw std::runtime_error("Expected LE or BE after byteorder");
expect(TokenKind::Semicolon, "Expected ';' after byteorder");
auto s = QSharedPointer<Stmt>::create();
s->line = kw.line;
s->col = kw.col;
s->node = Stmt::SetByteOrder{bo};
return s;
}