1 module vayne.source.preparser; 2 3 4 import std.array; 5 import std.format; 6 import std.string; 7 import std.traits; 8 9 import vayne.source.context; 10 import vayne.source.lexer; 11 import vayne.source.mime; 12 import vayne.source.source; 13 import vayne.source.token; 14 15 16 struct PreParserOptions { 17 bool lineNumbers; 18 } 19 20 21 class PreParserException : Exception { 22 this(SourceLoc loc, string msg) { 23 super(msg); 24 25 this.loc = loc; 26 } 27 28 SourceLoc loc; 29 } 30 31 32 string preparse(ref SourceManager mgr, uint id, PreParserOptions options) { 33 return PreParser(mgr, id, options)(); 34 } 35 36 37 private struct PreParser { 38 this(ref SourceManager mgr, uint id, PreParserOptions options) { 39 mgr_ = &mgr; 40 source_ = mgr_.get(id); 41 options_ = options; 42 } 43 44 string opCall() { 45 return parse(); 46 } 47 48 private: 49 string parse() { 50 string errors; 51 52 try { 53 ++envs_.length; 54 ++defs_.length; 55 56 return parse(source_, SourceLoc(source_.id, 1, 0)); 57 } catch(Exception error) { 58 if (auto ctxError = cast(ContextException)error) { 59 errors = format("%s: %s", mgr_.loc(ctxError.loc), error.msg); 60 } else if (auto preError = cast(PreParserException)error) { 61 errors = format("%s: %s", mgr_.loc(preError.loc), error.msg); 62 } else { 63 errors = format("%s: %s", mgr_.loc(contexts_.back.loc), error.msg); 64 } 65 66 if (!contexts_.empty) { 67 foreach_reverse (context; contexts_[0..$-1]) 68 errors ~= format("\n> %s", mgr_.loc(context.loc)); 69 } 70 } 71 72 throw new Exception(errors); 73 } 74 75 string parse(Source source, SourceLoc loc) { 76 auto context = new Context(source, loc); 77 contexts_ ~= context; 78 scope (success) --contexts_.length; 79 80 Appender!string app; 81 app.reserve(16 * 1024); 82 83 const end = context.remaining.length - 2; 84 while (context.cursor < end) { 85 auto remaining = context.remaining; 86 auto indexOpen = remaining.indexOf("{{"); 87 while (indexOpen != -1) { 88 auto tag = remaining[indexOpen + 2..indexOpen + 2 + 1]; 89 if ((tag == "#") || (tag == "&")) 90 break; 91 indexOpen = remaining.indexOf("{{", indexOpen + 2); 92 } 93 if (indexOpen == -1) 94 break; 95 96 if (!def_) 97 app.put(remaining[0..indexOpen]); 98 context.advance(indexOpen); 99 100 const contentStart = indexOpen + 2; 101 auto indexClose = remaining.indexOf("}}", contentStart); 102 while (indexClose != -1) { 103 if (balancedQuotes(remaining[contentStart..indexClose])) 104 break; 105 106 indexClose = remaining.indexOf("}}", indexClose + 2); 107 } 108 109 if (indexClose == -1) 110 throw new PreParserException(context.loc, "missing '}}' to close tag '{{'"); 111 112 context.advance(2); 113 indexClose -= contentStart; 114 115 auto replaced = replacer(context.source.buffer[context.cursor..context.cursor + indexClose], context); 116 if (!def_) 117 app.put(replaced); 118 context.advance(indexClose + 2); 119 } 120 context.expectClosed(); 121 122 app.put((context.cursor > 0) ? context.remaining() : context.source.buffer); 123 124 if (needsLineNumbers(app.data)) 125 return sourceInfo(loc) ~ app.data; 126 return app.data; 127 } 128 129 string include(string content, Context context) { 130 auto embed = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0; 131 auto source = mgr_.open(content[1 + embed..$].strip, embed != 0, context.source.id); 132 mgr_.dependency(context.source.id, source.id, context.loc); 133 134 string result; 135 if (!embed) { 136 result = parse(source, SourceLoc(source.id, 1, 0)); 137 } else { 138 auto name = mgr_.name(source.id); 139 result = format("data:%s;base64,%s", name.mimeType, encode(source.buffer)); 140 } 141 142 if (needsLineNumbers(result)) 143 result ~= sourceInfo(context.loc); 144 return result; 145 } 146 147 string define(string content, Context context) { 148 auto lex = Lexer(Source(source_.id, source_.parent, content[1..$].strip)); 149 auto tok = expect(lex, context, Token.KeywordKind.Def, Token.KeywordKind.Undef, Token.Kind.Identifier, "/"); 150 151 if (tok.keyword(Token.KeywordKind.Def)) { 152 auto def = parseDef(lex, context); 153 154 if (auto pdef = def.name in defs_.back) { 155 if (pdef.loc != def.loc) 156 throw new PreParserException(context.loc, format("redefinition of macro '%s' - first defined in '%s' - if this is intended undefine first", def.name, mgr_.loc(pdef.loc))); 157 } 158 159 if (def.flags & Def.Flags.Inline) { 160 defs_.back[def.name] = def; 161 } else { 162 context.open(content[0..1], content[1..$]); 163 def.flags |= Def.Flags.NotYetDefined; 164 165 if (depth_ == 0) 166 def_ = &(defs_.back[def.name] = def); 167 ++depth_; 168 } 169 } else if (tok.keyword(Token.KeywordKind.Undef)) { 170 tok = expect(lex, context, Token.Kind.Identifier); 171 172 auto name = tok.value; 173 if (auto pdef = name in defs_.back) { 174 if (pdef.flags & Def.Flags.NotYetDefined) 175 throw new PreParserException(context.loc, format("trying to undefine macro '%s' inside it's own definition", name)); 176 177 defs_.back.remove(name); 178 } else { 179 throw new PreParserException(context.loc, format("trying to undefine unknown macro '%s'", name)); 180 } 181 } else if (tok.ident) { 182 if (!def_) 183 return expand(lex, context, tok.value); 184 } else { 185 close(lex, context); 186 } 187 188 return null; 189 } 190 191 void close(ref Lexer lex, Context context) { 192 expect(lex, context, Token.Kind.EndOfInput); 193 194 auto tag = context.close(); 195 196 if (depth_ == 1) { 197 auto start = tag.cursor + tag.content.length + 3; 198 auto end = context.cursor - 2; 199 auto value = context.source.buffer[start..end]; 200 201 assert(def_); 202 assert(def_.flags & Def.Flags.NotYetDefined); 203 204 def_.value = value; 205 def_.flags &= ~Def.Flags.NotYetDefined; 206 def_ = null; 207 } 208 209 --depth_; 210 } 211 212 string expand(ref Lexer lex, Context context, string name) { 213 auto optional = (lex.front.kind == Token.Kind.Separator) && (lex.front.name == "?"); 214 if (optional) 215 lex.popFront; 216 217 foreach (k; 0..envs_.length) { 218 auto top = envs_.length - k - 1; 219 auto env = envs_[top]; 220 221 if (auto penv = name in env) { 222 if (needsLineNumbers(*penv)) 223 return *penv ~ sourceInfo(context.loc); 224 return *penv; 225 } 226 227 auto defs = defs_[top]; 228 if (auto pdef = name in defs) { 229 auto tok = lex.front; 230 231 string[] args; 232 233 if (!tok.eoi && tok.sep('(')) { 234 lex.popFront; 235 236 tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal); 237 238 if (!tok.sep(')')) { 239 while (true) { 240 if (tok.ident) { 241 args ~= expand(lex, context, tok.value); 242 } else { 243 args ~= tok.unescaped; 244 } 245 246 tok = expect(lex, context, ",", ")"); 247 if (tok.sep(',')) { 248 tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal); 249 continue; 250 } 251 if (tok.sep(')')) 252 break; 253 } 254 } 255 256 if (pdef.args.length < args.length) 257 throw new PreParserException(context.loc, format("too many parameters for macro '%s'", pdef.pretty(name))); 258 } 259 260 string[string] envArgs; 261 262 foreach(i, arg; pdef.args) 263 envArgs[pdef.args[i]] = (i < args.length) ? args[i] : null; 264 265 { 266 ++defs_.length; 267 scope(exit) --defs_.length; 268 269 envs_ ~= envArgs; 270 scope(exit) --envs_.length; 271 272 auto source = mgr_.add(format("#%s at %s", pdef.name, mgr_.name(pdef.loc.id)), pdef.value, pdef.loc.id); 273 auto result = parse(source, SourceLoc(source.id, pdef.loc.line, pdef.loc.column)); 274 if (needsLineNumbers(result)) 275 result ~= sourceInfo(context.loc); 276 return result; 277 } 278 } 279 } 280 281 if (!optional) 282 throw new PreParserException(context.loc, format("unknown macro '%s'", name)); 283 return null; 284 } 285 286 string[] parseFormalArgList(ref Lexer lex, Context context) { 287 string[] args; 288 289 auto tok = expect(lex, context, ")", Token.Kind.Identifier); 290 291 if (!tok.sep(')')) { 292 while (true) { 293 args ~= tok.value; 294 295 tok = expect(lex, context, ",", ")"); 296 if (tok.sep(',')) { 297 tok = expect(lex, context, ")", Token.Kind.Identifier); 298 continue; 299 } 300 if (tok.sep(')')) 301 break; 302 } 303 } 304 305 return args; 306 } 307 308 309 Def parseDef(ref Lexer lex, Context context) { 310 auto tok = expect(lex, context, Token.Kind.Identifier); 311 312 Def def; 313 def.name = tok.value; 314 def.loc = context.loc; 315 316 tok = expect(lex, context, "(", ":", Token.Kind.EndOfInput); 317 318 if (tok.kind != Token.Kind.EndOfInput) { 319 if (tok.sep("(")) 320 def.args = parseFormalArgList(lex, context); 321 322 if (tok.sep(":")) { 323 def.flags |= Def.Flags.Inline; 324 lex.popFront; 325 } else { 326 expect(lex, context, Token.Kind.EndOfInput); 327 } 328 } 329 330 return def; 331 } 332 333 string replacer(string content, Context context) { 334 if (content.length > 0) { 335 auto tag = content[0]; 336 switch(tag) { 337 case '&': 338 return include(content, context).strip; 339 case '#': 340 return define(content, context).strip; 341 default: 342 assert(0); 343 } 344 } 345 return null; 346 } 347 348 auto needsLineNumbers(string content) const { 349 return options_.lineNumbers && !isAllWhite(content); 350 } 351 352 string sourceInfo(SourceLoc loc) { 353 return format("{{;src:%d:%d:%d %s}}", loc.id, loc.line, loc.column, mgr_.name(loc.id)); 354 } 355 356 private: 357 Token expectNot(Args...)(ref Lexer lex, Context context, auto ref Args args) { 358 auto tok = lex.front; 359 lex.popFront; 360 361 foreach(i, Arg; Args) { 362 static assert(is(Arg == Token) || isSomeString!Arg || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind)); 363 static if (is(Arg == Token)) { 364 if (tok != args[i]) 365 continue; 366 } else static if (isSomeString!Arg) { 367 if ((tok.kind != Token.Kind.Separator) || (tok.value != args[i])) 368 continue; 369 } else static if (is(Arg == Token.Kind)) { 370 if (tok.kind != args[i]) 371 continue; 372 } else static if (is(Arg == Token.KeywordKind)) { 373 if (!tok.keyword(args[i])) 374 continue; 375 } 376 377 { 378 static if (is(Arg == Token)) { 379 throw new PreParserException(context.loc, format("unexpected '%s'", args[i])); 380 } else { 381 throw new PreParserException(context.loc, format("unexpected '%s'", tok.value)); 382 } 383 } 384 } 385 386 return tok; 387 } 388 389 Token expect(Args...)(ref Lexer lex, Context context, auto ref Args args) { 390 auto tok = lex.front; 391 lex.popFront; 392 393 foreach(i, Arg; Args) { 394 static assert(is(Arg == Token) || isSomeString!Arg || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind)); 395 static if (is(Arg == Token)) { 396 if (tok == args[i]) 397 return tok; 398 } else static if (isSomeString!Arg) { 399 if ((tok.kind == Token.Kind.Separator) && (tok.value == args[i])) 400 return tok; 401 } else static if (is(Arg == Token.Kind)) { 402 if (tok.kind == args[i]) 403 return tok; 404 } else static if (is(Arg == Token.KeywordKind)) { 405 if (tok.keyword(args[i])) 406 return tok; 407 } 408 } 409 410 auto exception = appender!string; 411 412 foreach(i, arg; args) { 413 if (i == 0) { 414 formattedWrite(&exception, "expected '%s'", arg); 415 } else if (i + 1 == args.length) { 416 formattedWrite(&exception, ", or '%s'", arg); 417 } else { 418 formattedWrite(&exception, ", '%s'", arg); 419 } 420 } 421 422 if (tok.kind == Token.Kind.Separator) { 423 formattedWrite(&exception, " but found '%s'", tok); 424 } else { 425 formattedWrite(&exception, " but found '%s'", tok); 426 } 427 428 throw new PreParserException(context.loc, exception.data); 429 } 430 431 static struct Def { 432 enum Flags : uint { 433 NotYetDefined = 1 << 0, 434 Inline = 1 << 1, 435 } 436 437 string name; 438 string value; 439 string[] args; 440 uint flags; 441 442 SourceLoc loc; 443 444 string pretty(string name) const { 445 Appender!string app; 446 app.reserve(1024); 447 448 app.put(name); 449 450 app.put("("); 451 452 foreach(i, arg; args) { 453 app.put(arg); 454 if (i != args.length - 1) 455 app.put(", "); 456 } 457 app.put(")"); 458 459 return app.data; 460 } 461 } 462 463 Def* def_; 464 size_t depth_; 465 466 Def[string][] defs_; 467 string[string][] envs_; 468 469 Source source_; 470 471 PreParserOptions options_; 472 473 SourceManager* mgr_; 474 Context[] contexts_; 475 } 476 477 478 private @property bool isAllWhite(R)(R range) { 479 foreach (ch; range) { 480 if (!std.uni.isWhite(ch)) 481 return false; 482 } 483 return true; 484 }