1 module vayne.source.preparser; 2 3 4 import std.array; 5 import std.format; 6 import std..string; 7 import std.traits; 8 9 import vayne.source.context; 10 import vayne.source.lexer; 11 import vayne.source.mime; 12 import vayne.source.source; 13 import vayne.source.token; 14 15 16 struct PreParserOptions { 17 bool lineNumbers; 18 } 19 20 21 class PreParserException : Exception { 22 this(SourceLoc loc, string msg) { 23 super(msg); 24 25 this.loc = loc; 26 } 27 28 SourceLoc loc; 29 } 30 31 32 string preparse(ref SourceManager mgr, uint id, PreParserOptions options) { 33 return PreParser(mgr, id, options)(); 34 } 35 36 37 private struct PreParser { 38 this(ref SourceManager mgr, uint id, PreParserOptions options) { 39 mgr_ = &mgr; 40 source_ = mgr_.get(id); 41 options_ = options; 42 } 43 44 string opCall() { 45 return parse(); 46 } 47 48 private: 49 string parse() { 50 string errors; 51 52 try { 53 ++envs_.length; 54 ++defs_.length; 55 56 return parse(source_, SourceLoc(source_.id, 1, 0)); 57 } catch(Exception error) { 58 if (auto ctxError = cast(ContextException)error) { 59 errors = format("%s: %s", mgr_.loc(ctxError.loc), error.msg); 60 } else if (auto preError = cast(PreParserException)error) { 61 errors = format("%s: %s", mgr_.loc(preError.loc), error.msg); 62 } else { 63 errors = format("%s: %s", mgr_.loc(contexts_.back.loc), error.msg); 64 } 65 66 if (!contexts_.empty) { 67 foreach_reverse (context; contexts_[0..$-1]) 68 errors ~= format("\n> %s", mgr_.loc(context.loc)); 69 } 70 } 71 72 throw new Exception(errors); 73 } 74 75 string parse(Source source, SourceLoc loc) { 76 auto context = new Context(source, loc); 77 contexts_ ~= context; 78 scope (success) --contexts_.length; 79 80 Appender!string app; 81 app.reserve(16 * 1024); 82 83 const end = context.remaining.length - 2; 84 while (context.cursor < end) { 85 auto remaining = context.remaining; 86 auto indexOpen = remaining.indexOf("{{"); 87 while (indexOpen != -1) { 88 auto tag = remaining[indexOpen + 2..indexOpen + 2 + 1]; 89 if ((tag == "#") || (tag == "&") || (tag == "%") || (tag == "!")) 90 break; 91 indexOpen = remaining.indexOf("{{", indexOpen + 2); 92 } 93 if (indexOpen == -1) 94 break; 95 96 auto inComment = context.isOpen && (context.open().tag == "!!"); 97 if (!def_ && !inComment) 98 app.put(remaining[0..indexOpen]); 99 context.advance(indexOpen); 100 101 const contentStart = indexOpen + 2; 102 auto indexClose = remaining.indexOf("}}", contentStart); 103 if (indexClose == -1) 104 throw new PreParserException(context.loc, "missing '}}' to close tag '{{'"); 105 106 context.advance(2); 107 indexClose -= contentStart; 108 109 auto replaced = replacer(context.source.buffer[context.cursor..context.cursor + indexClose], context); 110 if (!def_ && !inComment) 111 app.put(replaced); 112 context.advance(indexClose + 2); 113 } 114 context.expectClosed(); 115 116 app.put((context.cursor > 0) ? context.remaining() : context.source.buffer); 117 118 if (needsLineNumbers(app.data)) 119 return sourceInfo(loc) ~ app.data; 120 return app.data; 121 } 122 123 string include(string content, Context context) { 124 auto source = mgr_.open(content[1..$].strip, false, context.source.id); 125 mgr_.dependency(context.source.id, source.id, context.loc); 126 127 auto result = parse(source, SourceLoc(source.id, 1, 0)); 128 129 if (needsLineNumbers(result)) 130 result ~= sourceInfo(context.loc); 131 return result; 132 } 133 134 string embed(string content, Context context) { 135 auto mime = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0; 136 auto source = mgr_.open(content[1 + mime..$].strip, true, context.source.id); 137 mgr_.dependency(context.source.id, source.id, context.loc); 138 139 string result; 140 if (mime) { 141 result = format("data:%s;base64,%s", mgr_.name(source.id).mimeType, encode(source.buffer)); 142 } else { 143 result = cast(string)source.buffer; 144 } 145 146 result = rawInfo(result.length) ~ result; 147 148 if (needsLineNumbers(result)) 149 result ~= sourceInfo(context.loc); 150 return result; 151 } 152 153 string comment(string content, Context context) { 154 auto block = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0; 155 if (block) { 156 if ((content.length == 2) || (content[2] != '/')) { 157 context.open(content[0..2], content[2..$]); 158 } else if ((content.length > 2) && (content[2] == '/')) { 159 context.expectOpen("!!", "!!/"); 160 context.close(); 161 } 162 } 163 return null; 164 } 165 166 string define(string content, Context context) { 167 auto lex = Lexer(Source(source_.id, source_.parent, content[1..$].strip)); 168 auto tok = expect(lex, context, Token.KeywordKind.Def, Token.KeywordKind.Undef, Token.KeywordKind.Push, Token.KeywordKind.Pop, Token.KeywordKind.Set, Token.Kind.Identifier, "/"); 169 170 if (tok.keyword(Token.KeywordKind.Def)) { 171 auto def = parseDef(lex, context); 172 173 if (auto pdef = def.name in defs_.back) { 174 if (pdef.loc != def.loc) 175 throw new PreParserException(context.loc, format("redefinition of macro '%s' - first defined in '%s' - if this is intended undefine first", def.name, mgr_.loc(pdef.loc))); 176 } 177 178 if (def.flags & Def.Flags.Inline) { 179 defs_.back[def.name] = def; 180 } else { 181 context.open(content[0..1], content[1..$]); 182 def.flags |= Def.Flags.NotYetDefined; 183 184 if (depth_ == 0) 185 def_ = &(defs_.back[def.name] = def); 186 ++depth_; 187 } 188 } else if (tok.keyword(Token.KeywordKind.Undef)) { 189 tok = expect(lex, context, Token.Kind.Identifier); 190 191 auto name = tok.value; 192 if (auto pdef = name in defs_.back) { 193 if (pdef.flags & Def.Flags.NotYetDefined) 194 throw new PreParserException(context.loc, format("trying to undefine macro '%s' inside it's own definition", name)); 195 196 defs_.back.remove(name); 197 } else { 198 throw new PreParserException(context.loc, format("trying to undefine unknown macro '%s'", name)); 199 } 200 } else if (tok.keyword(Token.KeywordKind.Set) || tok.keyword(Token.KeywordKind.Push)) { 201 auto name = expect(lex, context, Token.Kind.Identifier); 202 auto value = expect(lex, context, Token.Kind.Literal, Token.KeywordKind.True, Token.KeywordKind.False); 203 expect(lex, context, Token.Kind.EndOfInput); 204 205 return "{{" ~ content ~ "}}"; 206 } else if (tok.keyword(Token.KeywordKind.Pop)) { 207 auto name = expect(lex, context, Token.Kind.Identifier); 208 expect(lex, context, Token.Kind.EndOfInput); 209 210 return "{{" ~ content ~ "}}"; 211 } else if (tok.ident) { 212 if (!def_) 213 return expand(lex, context, tok.value); 214 } else { 215 close(lex, context); 216 } 217 218 return null; 219 } 220 221 void close(ref Lexer lex, Context context) { 222 expect(lex, context, Token.Kind.EndOfInput); 223 224 auto tag = context.close(); 225 226 if (depth_ == 1) { 227 auto start = tag.cursor + tag.content.length + 3; 228 auto end = context.cursor - 2; 229 auto value = context.source.buffer[start..end]; 230 231 assert(def_); 232 assert(def_.flags & Def.Flags.NotYetDefined); 233 234 def_.value = value; 235 def_.flags &= ~Def.Flags.NotYetDefined; 236 def_ = null; 237 } 238 239 --depth_; 240 } 241 242 string expand(ref Lexer lex, Context context, string name) { 243 auto optional = (lex.front.kind == Token.Kind.Separator) && (lex.front.name == "?"); 244 if (optional) 245 lex.popFront; 246 247 foreach (k; 0..envs_.length) { 248 auto top = envs_.length - k - 1; 249 auto env = envs_[top]; 250 251 if (auto penv = name in env) { 252 if (needsLineNumbers(*penv)) 253 return *penv ~ sourceInfo(context.loc); 254 return *penv; 255 } 256 257 auto defs = defs_[top]; 258 if (auto pdef = name in defs) { 259 auto tok = lex.front; 260 261 string[] args; 262 263 if (!tok.eoi && tok.sep('(')) { 264 lex.popFront; 265 266 tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal); 267 268 if (!tok.sep(')')) { 269 while (true) { 270 if (tok.ident) { 271 args ~= expand(lex, context, tok.value); 272 } else { 273 args ~= tok.unescaped; 274 } 275 276 tok = expect(lex, context, ",", ")"); 277 if (tok.sep(',')) { 278 tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal); 279 continue; 280 } 281 if (tok.sep(')')) 282 break; 283 } 284 } 285 286 if (pdef.args.length < args.length) 287 throw new PreParserException(context.loc, format("too many parameters for macro '%s'", pdef.pretty(name))); 288 } 289 290 string[string] envArgs; 291 292 foreach(i, arg; pdef.args) 293 envArgs[pdef.args[i]] = (i < args.length) ? args[i] : null; 294 295 { 296 ++defs_.length; 297 scope(exit) --defs_.length; 298 299 envs_ ~= envArgs; 300 scope(exit) --envs_.length; 301 302 auto source = mgr_.add(format("#%s at %s", pdef.name, mgr_.name(pdef.loc.id)), pdef.value, pdef.loc.id); 303 auto result = parse(source, SourceLoc(source.id, pdef.loc.line, pdef.loc.column)); 304 if (needsLineNumbers(result)) 305 result ~= sourceInfo(context.loc); 306 return result; 307 } 308 } 309 } 310 311 if (!optional) 312 throw new PreParserException(context.loc, format("unknown macro '%s'", name)); 313 return null; 314 } 315 316 string[] parseFormalArgList(ref Lexer lex, Context context) { 317 string[] args; 318 319 auto tok = expect(lex, context, ")", Token.Kind.Identifier); 320 321 if (!tok.sep(')')) { 322 while (true) { 323 args ~= tok.value; 324 325 tok = expect(lex, context, ",", ")"); 326 if (tok.sep(',')) { 327 tok = expect(lex, context, ")", Token.Kind.Identifier); 328 continue; 329 } 330 if (tok.sep(')')) 331 break; 332 } 333 } 334 335 return args; 336 } 337 338 339 Def parseDef(ref Lexer lex, Context context) { 340 auto tok = expect(lex, context, Token.Kind.Identifier); 341 342 Def def; 343 def.name = tok.value; 344 def.loc = context.loc; 345 346 tok = expect(lex, context, "(", ":", Token.Kind.EndOfInput); 347 348 if (tok.kind != Token.Kind.EndOfInput) { 349 if (tok.sep("(")) 350 def.args = parseFormalArgList(lex, context); 351 352 if (tok.sep(":")) { 353 def.flags |= Def.Flags.Inline; 354 lex.popFront; 355 } else { 356 expect(lex, context, Token.Kind.EndOfInput); 357 } 358 } 359 360 return def; 361 } 362 363 string replacer(string content, Context context) { 364 if (content.length > 0) { 365 auto tag = content[0]; 366 switch(tag) { 367 case '!': 368 return comment(content, context); 369 case '&': 370 return include(content, context).strip; 371 case '%': 372 return embed(content, context); 373 case '#': 374 return define(content, context).strip; 375 default: 376 assert(0); 377 } 378 } 379 return null; 380 } 381 382 auto needsLineNumbers(string content) const { 383 return options_.lineNumbers && !isAllWhite(content); 384 } 385 386 string sourceInfo(SourceLoc loc) { 387 return format("{{;src:%d:%d:%d %s}}", loc.id, loc.line, loc.column, mgr_.name(loc.id)); 388 } 389 390 string rawInfo(size_t length) { 391 return format("{{;raw:%s}}", length); 392 } 393 394 private: 395 Token expectNot(Args...)(ref Lexer lex, Context context, auto ref Args args) { 396 auto tok = lex.front; 397 lex.popFront; 398 399 foreach(i, Arg; Args) { 400 static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind)); 401 static if (is(Arg == Token)) { 402 if (tok != args[i]) 403 continue; 404 } else static if (isSomeString!(OriginalType!Arg)) { 405 if ((tok.kind != Token.Kind.Separator) || (tok.value != args[i])) 406 continue; 407 } else static if (is(Arg == Token.Kind)) { 408 if (tok.kind != args[i]) 409 continue; 410 } else static if (is(Arg == Token.KeywordKind)) { 411 if (!tok.keyword(args[i])) 412 continue; 413 } 414 415 { 416 static if (is(Arg == Token)) { 417 throw new PreParserException(context.loc, format("unexpected '%s'", args[i])); 418 } else { 419 throw new PreParserException(context.loc, format("unexpected '%s'", tok.value)); 420 } 421 } 422 } 423 424 return tok; 425 } 426 427 Token expect(Args...)(ref Lexer lex, Context context, auto ref Args args) { 428 auto tok = lex.front; 429 lex.popFront; 430 431 foreach(i, Arg; Args) { 432 static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind)); 433 static if (is(Arg == Token)) { 434 if (tok == args[i]) 435 return tok; 436 } else static if (isSomeString!(OriginalType!Arg)) { 437 if ((tok.kind == Token.Kind.Separator) && (tok.value == args[i])) 438 return tok; 439 } else static if (is(Arg == Token.Kind)) { 440 if (tok.kind == args[i]) 441 return tok; 442 } else static if (is(Arg == Token.KeywordKind)) { 443 if (tok.keyword(args[i])) 444 return tok; 445 } 446 } 447 448 auto exception = appender!string; 449 450 foreach(i, arg; args) { 451 if (i == 0) { 452 formattedWrite(&exception, "expected '%s'", arg); 453 } else if (i + 1 == args.length) { 454 formattedWrite(&exception, ", or '%s'", arg); 455 } else { 456 formattedWrite(&exception, ", '%s'", arg); 457 } 458 } 459 460 if (tok.kind == Token.Kind.Separator) { 461 formattedWrite(&exception, " but found '%s'", tok); 462 } else { 463 formattedWrite(&exception, " but found '%s'", tok); 464 } 465 466 throw new PreParserException(context.loc, exception.data); 467 } 468 469 static struct Def { 470 enum Flags : uint { 471 NotYetDefined = 1 << 0, 472 Inline = 1 << 1, 473 } 474 475 string name; 476 string value; 477 string[] args; 478 uint flags; 479 480 SourceLoc loc; 481 482 string pretty(string name) const { 483 Appender!string app; 484 app.reserve(1024); 485 486 app.put(name); 487 488 app.put("("); 489 490 foreach(i, arg; args) { 491 app.put(arg); 492 if (i != args.length - 1) 493 app.put(", "); 494 } 495 app.put(")"); 496 497 return app.data; 498 } 499 } 500 501 Def* def_; 502 size_t depth_; 503 504 Def[string][] defs_; 505 string[string][] envs_; 506 507 Source source_; 508 509 PreParserOptions options_; 510 511 SourceManager* mgr_; 512 Context[] contexts_; 513 } 514 515 516 private @property bool isAllWhite(R)(R range) { 517 import std.uni : isWhite; 518 foreach (ch; range) { 519 if (!isWhite(ch)) 520 return false; 521 } 522 return true; 523 }