1 module vayne.source.parser; 2 3 4 import std.algorithm; 5 import std.array; 6 import std.conv; 7 import std.format; 8 import std.string; 9 10 import vayne.ast.node; 11 import vayne.source.compress; 12 import vayne.source.context; 13 import vayne.source.lexer; 14 import vayne.source.source; 15 import vayne.source.token; 16 17 18 struct ParserOptions { 19 CompressOptions compress; 20 } 21 22 23 class ParserErrorsException : Exception { 24 this(string[] errors) { 25 assert(!errors.empty); 26 super(errors[0]); 27 28 this.errors = errors; 29 } 30 31 string[] errors; 32 } 33 34 35 private class ParserException : Exception { 36 this(SourceLoc loc, string msg) { 37 super(msg); 38 39 this.loc = loc; 40 } 41 42 SourceLoc loc; 43 } 44 45 46 Node parse(ref SourceManager mgr, uint id, ParserOptions options) { 47 return Parser(mgr, id, options)(); 48 } 49 50 51 private struct Parser { 52 this(ref SourceManager mgr, uint id, ParserOptions options) { 53 source_ = mgr.get(id); 54 mgr_ = &mgr; 55 options_ = options; 56 } 57 58 Node opCall() { 59 return parse(); 60 } 61 62 private: 63 Node parse() { 64 try { 65 if (auto root = parse(source_)) 66 return root; 67 } catch(Exception error) { 68 if (auto ctxError = cast(ContextException)error) { 69 errors_ ~= format("%s: %s", mgr_.loc(ctxError.loc), error.msg); 70 } else if (auto parserError = cast(ParserException)error) { 71 errors_ ~= format("%s: %s", mgr_.loc(parserError.loc), error.msg); 72 } else { 73 errors_ ~= error.msg; 74 } 75 } 76 77 throw new ParserErrorsException(errors_); 78 } 79 80 Node parse(Source source) { 81 auto context = new Context(source); 82 83 insert_ = new StatementBlock(Token(context.loc)); 84 85 const end = source.buffer.length - 2; 86 87 while (context.cursor < end) { 88 auto remaining = context.remaining(); 89 auto indexOpen = remaining.indexOf("{{"); 90 if (indexOpen == -1) 91 break; 92 93 escaper(context, remaining[0..indexOpen]); 94 context.advance(indexOpen); 95 96 const triple = ((indexOpen + 1 < remaining.length) && (remaining[indexOpen + 2] == '{')); 97 const open = triple ? "{{{" : "{{"; 98 const contentStart = indexOpen + open.length; 99 const close = triple ? "}}}" : "}}"; 100 auto indexClose = remaining.indexOf(close, contentStart); 101 while (indexClose != -1) { 102 if (balancedQuotes(remaining[contentStart..indexClose])) 103 break; 104 105 indexClose = remaining.indexOf(close, indexClose + close.length); 106 } 107 108 if (indexClose == -1) 109 throw new ParserException(context.loc, format("missing '%s' to close tag '%s'", close, open)); 110 111 context.advance(close.length); 112 indexClose -= contentStart; 113 114 try { 115 compile(context, source.buffer[context.cursor..context.cursor + indexClose], open, close); 116 } catch (Exception error) { 117 errors_ ~= format("%s: %s", mgr_.loc(context.loc), error.msg); 118 } 119 120 context.advance(indexClose + close.length); 121 } 122 context.expectClosed(); 123 124 if (context.cursor > 0) { 125 escaper(context, context.remaining()); 126 } else { 127 escaper(context, source.buffer); 128 } 129 130 context.advance(context.remaining.length); 131 132 assert(insertStack_.empty); 133 134 if (!errors_.empty) 135 return null; 136 137 return new Module(Token(context.loc), [ insert_ ]); 138 } 139 140 void compile(Context context, string content, string tagOpen, string tagClose) { 141 void ensureSimpleTag(string tag) { 142 if (tagOpen.length != 2) 143 throw new ParserException(context.loc, format("'%s' not supported for tag '%s'", tagOpen, tag)); 144 } 145 146 if (content.length > 0) { 147 auto tag = content[0..1]; 148 switch(tag) { 149 case "*": 150 ensureSimpleTag(tag); 151 iterate(context, content); 152 break; 153 case "/": 154 ensureSimpleTag(tag); 155 close(context, content); 156 break; 157 case "?": 158 ensureSimpleTag(tag); 159 conditional(context, content); 160 break; 161 case ":": 162 ensureSimpleTag(tag); 163 orElse(context, content); 164 break; 165 case "~": 166 translate(context, content, tagOpen.length == 2); 167 break; 168 case ";": 169 ensureSimpleTag(tag); 170 meta(context, content); 171 break; 172 case "!": 173 break; 174 case "#": 175 ensureSimpleTag(tag); 176 define(context, content); 177 break; 178 case "@": 179 ensureSimpleTag(tag); 180 withs(context, content); 181 break; 182 case "&": 183 ensureSimpleTag(tag); 184 break; 185 default: 186 interpolate(context, content, tagOpen.length == 2); 187 break; 188 } 189 } 190 } 191 192 void iterate(Context context, string content) { 193 context.open(content[0..1], content[1..$]); 194 content = content[1..$].strip(); 195 196 auto loopStmt = parseLoop(Source(source_.id, source_.parent, content), context.loc); 197 auto bodyBlock = create!StatementBlock(Token(context.loc)); 198 loopStmt.children[2] = bodyBlock; 199 insert_.children ~= loopStmt; 200 201 insertStack_ ~= insert_; 202 insert_ = bodyBlock; 203 } 204 205 void close(Context context, string content) { 206 context.close(); 207 208 if (insertStack_.length) { 209 insert_ = insertStack_.back; 210 insertStack_.popBack; 211 } else { 212 assert(errors_.length); 213 } 214 } 215 216 void conditional(Context context, string content) { 217 context.open(content[0..1], content[1..$]); 218 content = content[1..$].strip; 219 220 auto ifStmt = create!IfStatement(Token(context.loc), parseExpr(Source(source_.id, source_.parent, content), context.loc), create!StatementBlock(Token(context.loc)), null); 221 insert_.children ~= ifStmt; 222 223 insertStack_ ~= insert_; 224 insert_ = cast(StatementBlock)ifStmt.children[1]; 225 } 226 227 void orElse(Context context, string content) { 228 context.expectOpen("?", ":"); 229 content = content[1..$].strip; 230 231 if (insertStack_.length) { 232 insert_ = insertStack_.back; 233 insertStack_.popBack; 234 } else { 235 assert(errors_.length); 236 auto ifStmt = cast(IfStatement)insert_.children.back; 237 if (!ifStmt) 238 insert_.children ~= create!IfStatement(Token(context.loc), null, create!StatementBlock(Token(context.loc)), null); 239 } 240 241 auto ifStmt = cast(IfStatement)insert_.children.back; 242 assert(ifStmt !is null); 243 244 while (ifStmt.children[2] !is null) { 245 ifStmt = cast(IfStatement)ifStmt.children[2]; 246 if (ifStmt is null) 247 throw new ParserException(context.loc, "unexpected else statement"); 248 } 249 250 if (content.length) { 251 assert(ifStmt.children[2] is null); 252 253 auto elseIfStmt = create!IfStatement(Token(context.loc), parseExpr(Source(source_.id, source_.parent, content), context.loc), create!StatementBlock(Token(context.loc)), null); 254 ifStmt.children[2] = elseIfStmt; 255 256 insertStack_ ~= insert_; 257 insert_ = cast(StatementBlock)elseIfStmt.children[1]; 258 } else { 259 assert(ifStmt.children[2] is null); 260 261 auto elseBlock = create!StatementBlock(Token(context.loc)); 262 ifStmt.children[2] = elseBlock; 263 264 insertStack_ ~= insert_; 265 insert_ = cast(StatementBlock)ifStmt.children[2]; 266 } 267 } 268 269 void meta(Context context, string content) { 270 content = content[1..$].strip; 271 auto values = content.splitter(':'); 272 auto type = values.front.strip; 273 values.popFront; 274 275 switch (type) { 276 case "src": 277 auto id = values.front.to!uint; 278 values.popFront; 279 280 auto line = values.front.to!uint; 281 values.popFront; 282 283 auto column = values.front.splitter(' ').front.to!uint; 284 context.loc = SourceLoc(id, line, column); 285 break; 286 default: 287 throw new ParserException(context.loc, format("unknown meta type '%s'", type)); 288 } 289 } 290 291 void define(Context context, string content) { 292 //insert_.children ~= new Output(Token(context.loc), parseExpr(Source(source_.id, source_.parent, content), context.loc)); 293 } 294 295 void withs(Context context, string content) { 296 context.open(content[0..1], content[1..$]); 297 content = content[1..$].strip; 298 299 auto withStmt = parseWith(Source(source_.id, source_.parent, content), context.loc); 300 auto bodyBlock = create!StatementBlock(Token(context.loc)); 301 withStmt.children[$-1] = bodyBlock; 302 insert_.children ~= withStmt; 303 304 insertStack_ ~= insert_; 305 insert_ = bodyBlock; 306 } 307 308 Node escapeHTML(Node text, SourceLoc loc) { 309 auto escape = create!Identifier(Token(Token.Kind.Identifier, "__escape", loc)); 310 auto html = cast(Node)create!Constant(Token("html", Token.LiteralKind.String, 0, 0, loc)); 311 312 return cast(Node)create!FunctionCall(Token(loc), escape, [ text, html ]); 313 } 314 315 void translate(Context context, string content, bool autoEscape) { 316 content = content[1..$].strip; 317 318 auto args = cast(Node[])parseExprList(Source(source_.id, source_.parent, content), context.loc); 319 auto translateFunc = create!Identifier(Token(Token.Kind.Identifier, "__translate", context.loc)); 320 321 auto translated = cast(Node)create!FunctionCall(Token(context.loc), translateFunc, args); 322 if (autoEscape) 323 translated = escapeHTML(translated, context.loc); 324 insert_.children ~= create!Output(Token(context.loc), translated); 325 } 326 327 void interpolate(Context context, string content, bool autoEscape) { 328 auto args = cast(Node[])parseExprList(Source(source_.id, source_.parent, content), context.loc); 329 foreach (arg; args) { 330 if (autoEscape) 331 arg = escapeHTML(arg, context.loc); 332 insert_.children ~= create!Output(Token(context.loc), arg); 333 } 334 } 335 336 void escaper(Context context, string content) { 337 if (content.length) { 338 if (options_.compress) 339 content = compress(content, options_.compress); 340 content = content.replace("\r", ""); 341 } 342 343 if (content.length) 344 insert_.children ~= create!Output(Token(context.loc), create!Constant(Token(content, Token.LiteralKind.String, 0, 0, context.loc))); 345 } 346 347 Source source_; 348 349 StatementBlock insert_; 350 StatementBlock[] insertStack_; 351 352 SourceManager* mgr_; 353 ParserOptions options_; 354 355 string[] errors_; 356 } 357 358 359 private class ExprParserException : Exception { 360 this(Token tok, string msg) { 361 super(msg); 362 this.tok = tok; 363 } 364 365 Token tok; 366 } 367 368 369 auto parseExpr(Source source, SourceLoc loc) { 370 auto parser = ExprParser(source, loc); 371 scope (success) parser.ensureEndOfInput(); 372 return parser.parseExpression(); 373 } 374 375 376 auto parseExprList(Source source, SourceLoc loc) { 377 auto parser = ExprParser(source, loc); 378 scope (success) parser.ensureEndOfInput(); 379 return parser.parseExpressionList(); 380 } 381 382 383 auto parseLoop(Source source, SourceLoc loc) { 384 auto parser = ExprParser(source, loc); 385 scope (success) parser.ensureEndOfInput(); 386 return parser.parseLoopStatement(loc); 387 } 388 389 390 auto parseWith(Source source, SourceLoc loc) { 391 auto parser = ExprParser(source, loc); 392 scope (success) parser.ensureEndOfInput(); 393 return parser.parseWithStatement(loc); 394 } 395 396 397 private struct ExprParser { 398 this(Source source, SourceLoc loc) { 399 lexer_ = Lexer(source, loc); 400 401 warmUp(); 402 } 403 404 Expression parseExpression() { 405 auto start = tok_; 406 407 if (auto left = parseExpressionPrimary()) { 408 auto expr = parseBinaryOp(left, 0); 409 if (!expr) 410 expr = left; 411 412 if (auto cond = parseConditional(expr)) 413 return create!Expression(start, cond); 414 return create!Expression(start, expr); 415 } 416 return null; 417 } 418 419 Expression[] parseExpressionList() { 420 if (auto expr = parseExpression) { 421 Expression[] exprs; 422 exprs ~= expr; 423 424 while (tok_.sep(',')) { 425 eat(); 426 if (auto next = parseExpression()) { 427 exprs ~= next; 428 continue; 429 } 430 throw new ExprParserException(tok_, format("expected an expression following ',', not '%s'", tok_)); 431 } 432 return exprs; 433 } 434 return null; 435 } 436 437 WithStatement parseWithStatement(SourceLoc loc) { 438 if (auto expr = parseWithExpression) { 439 WithExpression[] exprs; 440 exprs ~= expr; 441 442 while (tok_.sep(',')) { 443 eat(); 444 if (auto next = parseWithExpression()) { 445 exprs ~= next; 446 continue; 447 } 448 throw new ExprParserException(tok_, format("expected a with expression following ',', not '%s'", tok_)); 449 } 450 return create!WithStatement(Token(loc), cast(Node[])exprs, null); 451 } 452 return null; 453 } 454 455 WithExpression parseWithExpression() { 456 if (auto expr = parseExpression()) { 457 Token name; 458 if (tok_.keyword(Token.KeywordKind.As)) { 459 eat(); 460 if (!tok_.ident()) 461 throw new ExprParserException(tok_, format("expected an identifier following 'as', not '%s'", tok_)); 462 name = eat(); 463 } 464 return create!WithExpression(expr.tok, expr, name); 465 } 466 return null; 467 } 468 469 LoopStatement parseLoopStatement(SourceLoc loc) { 470 if (!tok_.ident) 471 throw new ExprParserException(tok_, format("expected an identifier, not '%s'", tok_)); 472 473 Token key = eat(); 474 Token name; 475 if (tok_.sep(',')) { 476 eat(); 477 if (!tok_.ident) 478 throw new ExprParserException(tok_, format("expected an identifier, not '%s'", tok_)); 479 name = eat(); 480 if (!tok_.sep(';')) 481 throw new ExprParserException(tok_, format("expected ';', not '%s'", tok_)); 482 eat(); 483 } else if (tok_.sep(';')) { 484 eat(); 485 swap(key, name); 486 } else { 487 throw new ExprParserException(tok_, format("expected ';' or ',' followed by an identifier, not '%s'", tok_)); 488 } 489 490 Expression obj; 491 Expression end; 492 493 obj = parseExpression(); 494 if (!obj) 495 throw new ExprParserException(tok_, format("expected an expression, not '%s'", tok_)); 496 497 if (tok_.name == "..") { 498 eat(); 499 end = parseExpression(); 500 if (!end) 501 throw new ExprParserException(tok_, format("expected an expression, not '%s'", tok_)); 502 } 503 504 return create!LoopStatement(Token(loc), key, name, obj, end, null); 505 } 506 507 Node parseExpressionPrimary() { 508 if (auto lexpr = parseExpressionPrimarySimple()) { 509 if (tok_.sep()) { 510 if (auto primary = parseSuffixOp(lexpr)) { 511 while (!tok_.eoi() && tok_.sep()) { 512 if (auto suffix = parseSuffixOp(primary)) { 513 primary = suffix; 514 continue; 515 } 516 return primary; 517 } 518 return primary; 519 } 520 } 521 return lexpr; 522 } 523 return null; 524 } 525 526 Node parseExpressionPrimarySimple() { 527 switch (tok_.kind) with (Token.Kind) { 528 case Identifier: 529 return parseIdentifierExpr(); 530 case Separator: 531 if (tok_.sep('(')) { 532 auto start = eat(); 533 auto expr = parseExpression(); 534 if (!expr) 535 throw new ExprParserException(tok_, format("expected an expression, not '%s'", tok_)); 536 537 close(')'); 538 return create!Expression(start, expr, true); 539 } 540 541 if (auto expr = parseUnaryOp()) 542 return expr; 543 if (auto expr = parsePrefixOp()) 544 return expr; 545 break; 546 case Literal: 547 return parseLiteralExpr(); 548 case Keyword: 549 switch (tok_.kindKeyword) with (Token.KeywordKind) { 550 case True: 551 return create!Constant(eat()); 552 case False: 553 return create!Constant(eat()); 554 case Null: 555 return create!Constant(eat()); 556 default: 557 throw new ExprParserException(tok_, format("unexpected '%s'", tok_)); 558 } 559 case Undefined: 560 case EndOfInput: 561 default: 562 break; 563 } 564 565 return null; 566 } 567 568 Node parseArrayConstructor() { 569 570 return null; 571 } 572 573 Node parseIdentifierExpr() { 574 assert(tok_.ident()); 575 return create!Identifier(eat()); 576 } 577 578 bool isUnaryOp(Token tok) const { 579 if (tok.sep()) { 580 switch (tok.length) { 581 case 1: 582 switch(tok.front) { 583 case '-': 584 case '+': 585 case '!': 586 case '~': 587 case '^': 588 case '*': 589 return true; 590 default: 591 break; 592 } 593 break; 594 default: 595 break; 596 } 597 } 598 return false; 599 } 600 601 UnaryOp parseUnaryOp() { 602 assert(tok_.sep()); 603 if (isUnaryOp(tok_)) { 604 auto op = eat(); 605 auto expr = parseExpressionPrimary(); 606 if (!expr) 607 throw new ExprParserException(tok_, format("expected an expression, not '%s'", tok_)); 608 return create!UnaryOp(op, expr); 609 } 610 return null; 611 } 612 613 Node parsePrefixOp() { 614 switch(tok_.name) { 615 case "--": 616 case "++": 617 auto op = eat(); 618 auto expr = parseExpressionPrimary(); 619 if (!expr) 620 throw new ExprParserException(tok_, format("expected an expression, not '%s'", tok_)); 621 return create!PrefixOp(op, expr); 622 default: 623 break; 624 } 625 return null; 626 } 627 628 Node parseSuffixOp(Node expr) { 629 switch(tok_.name) { 630 case "[": 631 auto op = eat(); 632 auto index = parseExpression(); 633 Expression end; 634 if (!index) 635 throw new ExprParserException(tok_, format("expected an index expression, not '%s'", tok_)); 636 637 if (tok_.sep("..")) { 638 eat(); 639 end = parseExpression; 640 if (!end) 641 throw new ExprParserException(tok_, format("expected an expression following '..', not '%s'", tok_)); 642 } 643 close(']'); 644 645 if (end is null) 646 return create!IndexOp(op, expr, index); 647 return create!SliceOp(op, expr, index, end); 648 case ".": 649 auto op = eat(); 650 if (!tok_.ident()) 651 throw new ExprParserException(tok_, format("expected an identifier following '.', not '%s'", tok_)); 652 auto ident = eat(); 653 return create!DispatchOp(op, expr, ident); 654 case "(": 655 auto op = eat(); 656 657 Node[] args; 658 659 if (!tok_.sep(')')) { 660 while (true) { 661 if (auto arg = parseExpression()) { 662 args ~= arg; 663 if (tok_.sep(')')) 664 break; 665 if (tok_.sep(',')) { 666 eat(); 667 continue; 668 } 669 670 throw new ExprParserException(tok_, format("expected ')' or ',' not '%s'", tok_)); 671 } 672 break; 673 } 674 } 675 676 close(')'); 677 return create!FunctionCall(op, expr, args); 678 case "!": 679 auto op = eat(); 680 681 Node[] args; 682 if (tok_.literal()) { 683 args ~= parseLiteralExpr(); 684 } else if (tok_.ident()) { 685 args ~= parseIdentifierExpr(); 686 } else { 687 throw new ExprParserException(tok_, format("expected a literal or an identifier following '!', not '%s'", tok_)); 688 } 689 690 return create!FunctionCall(op, expr, args); 691 case "--": 692 case "++": 693 auto op = eat(); 694 return create!SuffixOp(op, expr); 695 default: 696 break; 697 } 698 return null; 699 } 700 701 Constant parseLiteralExpr() { 702 assert(tok_.literal()); 703 return create!Constant(eat()); 704 } 705 706 Node parseUnaryExpr() { 707 return null; 708 } 709 710 enum OperatorPriority : size_t { 711 Logic = 10, 712 Compare = 20, 713 Arithmetic = 30, 714 Algebraic = 40, 715 Bitwise = 50, 716 } 717 718 size_t isBinaryOp(string name) const { 719 switch(name.front) with (OperatorPriority) { 720 case '&': 721 case '|': 722 return (name.length == 1) ? 0/*Bitwise*/ : Logic; 723 case '>': // >, >=, >>, >>= 724 case '<': // <, <=, <<, <<= 725 return ((name.length == 1) || (name[1] == '=')) ? Compare : 0/*Bitwise*/; 726 case '^': // ^, ^=, ^^, ^^= 727 return ((name.length == 1) || (name[1] == '=')) ? 0/*Bitwise*/ : Algebraic; 728 case '+': 729 case '-': 730 return (name.length == 1) ? Arithmetic : 0; 731 case '~': 732 case '*': 733 case '/': 734 case '%': 735 return (name.length == 1) ? Algebraic : 0; 736 case '=': 737 return (name.length == 2) ? Compare : 0; 738 case '!': 739 return (name.length == 2) ? Compare : 0; 740 default: 741 if ((name.length == 2) && (name == "in")) 742 return Compare/*Bitwise*/; 743 break; 744 } 745 return 0; 746 } 747 748 Node parseBinaryOp(Node left, size_t prioExpr) { 749 while (true) { 750 auto prio = isBinaryOp(tok_.name); 751 if (!prio || (prio < prioExpr)) 752 return left; 753 auto op = eat(); 754 if (prio == OperatorPriority.Logic) { 755 auto right = parseExpression(); 756 left = create!BinaryOp(op, left, right); 757 } else { 758 auto right = parseExpressionPrimary(); 759 if (!right) 760 return null; 761 auto prioNext = isBinaryOp(tok_.name); 762 if (prio < prioNext) { 763 right = parseBinaryOp(right, prio + 1); 764 if (!right) 765 return null; 766 } 767 left = create!BinaryOp(op, left, right); 768 } 769 } 770 } 771 772 Node parseConditional(Node expr) { 773 if (tok_.sep('?')) { 774 auto op = eat(); 775 Node trueCase = parseExpression(); 776 Node falseCase = null; 777 if (trueCase) { 778 if (tok_.sep(':')) { 779 eat(); 780 falseCase = parseExpression(); 781 if (!falseCase) 782 throw new ExprParserException(tok_, format("expected an expression, not '%s'", tok_)); 783 } else { 784 throw new ExprParserException(tok_, format("expected ':', not '%s'", tok_)); 785 } 786 } else { 787 throw new ExprParserException(tok_, format("expected an expression, not '%s'", tok_)); 788 } 789 790 return create!ConditionalExpression(op, expr, trueCase, falseCase); 791 } 792 793 return null; 794 } 795 796 void warmUp() { 797 tok_ = lexer_.front; 798 lexer_.popFront; 799 800 if (!lexer_.empty) { 801 foreach (ref atok; ahead_) { 802 atok = lexer_.front; 803 lexer_.popFront; 804 if (lexer_.empty) 805 return; 806 } 807 } 808 } 809 810 Token eat() { 811 if (!tok_.eoi) { 812 foreach (i; 1..behind_.length) 813 behind_[i] = behind_[i - 1]; 814 815 behind_[0] = tok_; 816 tok_ = ahead_[0]; 817 foreach (i; 1..ahead_.length) 818 ahead_[i - 1] = ahead_[i]; 819 820 if (!lexer_.empty) { 821 ahead_[$ - 1] = lexer_.front; 822 lexer_.popFront; 823 } 824 825 return behind_[0]; 826 } 827 828 return tok_; 829 } 830 831 void open(char separator, Token by) { 832 if (!tok_.sep(separator)) { 833 throw new ExprParserException(tok_, format("expected '%s' following '%s', not '%s'", separator, by, tok_)); 834 } else { 835 eat(); 836 } 837 } 838 839 void close(char separator) { 840 if (!tok_.sep(separator)) { 841 throw new ExprParserException(tok_, format("expected '%s', not '%s'", separator, tok_)); 842 } else { 843 eat(); 844 } 845 } 846 847 void ensureEndOfInput() { 848 if (!tok_.eoi) 849 throw new ExprParserException(tok_, format("unexpected '%s'", tok_)); 850 } 851 852 private: 853 Token tok_; 854 Token[1] behind_; 855 Token[1] ahead_; 856 Lexer lexer_; 857 }