1 module vayne.source.preparser;
2 
3 
4 import std.array;
5 import std.format;
6 import std.string;
7 import std.traits;
8 
9 import vayne.source.context;
10 import vayne.source.lexer;
11 import vayne.source.mime;
12 import vayne.source.source;
13 import vayne.source.token;
14 
15 
16 struct PreParserOptions {
17 	bool lineNumbers;
18 	bool verboseIncludes;
19 }
20 
21 
22 class PreParserException : Exception {
23 	this(SourceLoc loc, string msg) {
24 		super(msg);
25 
26 		this.loc = loc;
27 	}
28 
29 	SourceLoc loc;
30 }
31 
32 
33 string preparse(ref SourceManager mgr, uint id, PreParserOptions options) {
34 	return PreParser(mgr, id, options)();
35 }
36 
37 
38 private struct PreParser {
39 	this(ref SourceManager mgr, uint id, PreParserOptions options) {
40 		mgr_ = &mgr;
41 		source_ = mgr_.get(id);
42 		options_ = options;
43 	}
44 
45 	string opCall() {
46 		return parse();
47 	}
48 
49 private:
50 	string parse() {
51 		string errors;
52 
53 		try {
54 			++envs_.length;
55 			++defs_.length;
56 
57 			return parse(source_, SourceLoc(source_.id, 1, 0));
58 		} catch(Exception error) {
59 			if (auto ctxError = cast(ContextException)error) {
60 				errors = format("%s: %s", mgr_.loc(ctxError.loc), error.msg);
61 			} else if (auto preError = cast(PreParserException)error) {
62 				errors = format("%s: %s", mgr_.loc(preError.loc), error.msg);
63 			} else {
64 				errors = format("%s: %s", mgr_.loc(contexts_.back.loc), error.msg);
65 			}
66 
67 			if (!contexts_.empty) {
68 				foreach_reverse (context; contexts_[0..$-1])
69 					errors ~= format("\n> %s", mgr_.loc(context.loc));
70 			}
71 		}
72 
73 		throw new Exception(errors);
74 	}
75 
76 	string parse(Source source, SourceLoc loc) {
77 		auto context = new Context(source, loc);
78 		contexts_ ~= context;
79 		scope (success) --contexts_.length;
80 
81 		Appender!string app;
82 		app.reserve(16 * 1024);
83 
84 		const end = context.remaining.length - 2;
85 		while (context.cursor < end) {
86 			auto remaining = context.remaining;
87 			auto indexOpen = remaining.indexOf("{{");
88 			while (indexOpen != -1) {
89 				auto tag = remaining[indexOpen + 2..indexOpen + 2 + 1];
90 				if ((tag == "#") || (tag == "&") || (tag == "!"))
91 					break;
92 				indexOpen = remaining.indexOf("{{", indexOpen + 2);
93 			}
94 			if (indexOpen == -1)
95 				break;
96 
97 			auto inComment = context.isOpen && (context.open().tag == "!!");
98 			if (!def_ && !inComment)
99 				app.put(remaining[0..indexOpen]);
100 			context.advance(indexOpen);
101 
102 			const contentStart = indexOpen + 2;
103 			auto indexClose = remaining.indexOf("}}", contentStart);
104 			if (indexClose == -1)
105 				throw new PreParserException(context.loc, "missing '}}' to close tag '{{'");
106 
107 			context.advance(2);
108 			indexClose -= contentStart;
109 
110 			auto replaced = replacer(context.source.buffer[context.cursor..context.cursor + indexClose], context);
111 			if (!def_ && !inComment)
112 				app.put(replaced);
113 			context.advance(indexClose + 2);
114 		}
115 		context.expectClosed();
116 
117 		app.put((context.cursor > 0) ? context.remaining() : context.source.buffer);
118 
119 		if (needsLineNumbers(app.data))
120 			return sourceInfo(context.loc) ~ app.data;
121 		return app.data;
122 	}
123 
124 	string include(string content, Context context) {
125 		auto embed = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0;
126 		auto source = mgr_.open(content[1 + embed..$].strip, embed != 0, context.source.id);
127 		mgr_.dependency(context.source.id, source.id, context.loc);
128 
129 		string result;
130 		if (!embed) {
131 			result = parse(source, SourceLoc(source.id, 1, 0));
132 		} else {
133 			auto name = mgr_.name(source.id);
134 			result = format("data:%s;base64,%s", name.mimeType, encode(source.buffer));
135 		}
136 
137 		if (needsLineNumbers(result))
138 			result ~= sourceInfo(context.loc);
139 
140 		if (needsIncludeNames())
141 			result = format("<!-- begin include %s -->%s<!-- end include %s -->", content, result, content);
142 		return result;
143 	}
144 
145 	string comment(string content, Context context) {
146 		auto block = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0;
147 		if (block) {
148 			if ((content.length == 2) || (content[2] != '/')) {
149 				context.open(content[0..2], content[2..$]);
150 			} else if ((content.length > 2) && (content[2] == '/')) {
151 				context.expectOpen("!!", "!!/");
152 				context.close();
153 			}
154 		}
155 		return null;
156 	}
157 
158 	string define(string content, Context context) {
159 		auto lex = Lexer(Source(source_.id, source_.parent, content[1..$].strip));
160 		auto tok = expect(lex, context, Token.KeywordKind.Def, Token.KeywordKind.Undef, Token.KeywordKind.Push, Token.KeywordKind.Pop, Token.KeywordKind.Set, Token.Kind.Identifier, "/");
161 
162 		if (tok.keyword(Token.KeywordKind.Def)) {
163 			auto def = parseDef(lex, context);
164 
165 			if (auto pdef = def.name in defs_.back) {
166 				if (pdef.loc != def.loc)
167 					throw new PreParserException(context.loc, format("redefinition of macro '%s' - first defined in '%s' - if this is intended undefine first", def.name, mgr_.loc(pdef.loc)));
168 			}
169 
170 			if (def.flags & Def.Flags.Inline) {
171 				defs_.back[def.name] = def;
172 			} else {
173 				context.open(content[0..1], content[1..$]);
174 				def.flags |= Def.Flags.NotYetDefined;
175 
176 				if (depth_ == 0)
177 					def_ = &(defs_.back[def.name] = def);
178 				++depth_;
179 			}
180 		} else if (tok.keyword(Token.KeywordKind.Undef)) {
181 			tok = expect(lex, context, Token.Kind.Identifier);
182 
183 			auto name = tok.value;
184 			if (auto pdef = name in defs_.back) {
185 				if (pdef.flags & Def.Flags.NotYetDefined)
186 					throw new PreParserException(context.loc, format("trying to undefine macro '%s' inside it's own definition", name));
187 
188 				defs_.back.remove(name);
189 			} else {
190 				throw new PreParserException(context.loc, format("trying to undefine unknown macro '%s'", name));
191 			}
192 		} else if (tok.keyword(Token.KeywordKind.Set) || tok.keyword(Token.KeywordKind.Push)) {
193 			auto name = expect(lex, context, Token.Kind.Identifier);
194 			auto value = expect(lex, context, Token.Kind.Literal, Token.KeywordKind.True, Token.KeywordKind.False);
195 			expect(lex, context, Token.Kind.EndOfInput);
196 
197 			return "{{" ~ content ~ "}}";
198 		} else if (tok.keyword(Token.KeywordKind.Pop)) {
199 			auto name = expect(lex, context, Token.Kind.Identifier);
200 			expect(lex, context, Token.Kind.EndOfInput);
201 
202 			return "{{" ~ content ~ "}}";
203 		} else if (tok.ident) {
204 			if (!def_)
205 				return expand(lex, context, tok.value);
206 		} else {
207 			close(lex, context);
208 		}
209 
210 		return null;
211 	}
212 
213 	void close(ref Lexer lex, Context context) {
214 		expect(lex, context, Token.Kind.EndOfInput);
215 
216 		auto tag = context.close();
217 
218 		if (depth_ == 1) {
219 			auto start = tag.cursor + tag.content.length + 3;
220 			auto end = context.cursor - 2;
221 			auto value = context.source.buffer[start..end];
222 
223 			assert(def_);
224 			assert(def_.flags & Def.Flags.NotYetDefined);
225 
226 			def_.value = value;
227 			def_.flags &= ~Def.Flags.NotYetDefined;
228 			def_ = null;
229 		}
230 
231 		--depth_;
232 	}
233 
234 	string expand(ref Lexer lex, Context context, string name) {
235 		auto optional = (lex.front.kind == Token.Kind.Separator) && (lex.front.name == "?");
236 		if (optional)
237 			lex.popFront;
238 
239 		foreach (k; 0..envs_.length) {
240 			auto top = envs_.length - k - 1;
241 			auto env = envs_[top];
242 
243 			if (auto penv = name in env) {
244 				if (needsLineNumbers(*penv))
245 					return *penv ~ sourceInfo(context.loc);
246 				return *penv;
247 			}
248 
249 			auto defs = defs_[top];
250 			if (auto pdef = name in defs) {
251 				auto tok = lex.front;
252 
253 				string[] args;
254 
255 				if (!tok.eoi && tok.sep('(')) {
256 					lex.popFront;
257 
258 					tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
259 
260 					if (!tok.sep(')')) {
261 						while (true) {
262 							if (tok.ident) {
263 								args ~= expand(lex, context, tok.value);
264 							} else {
265 								args ~= tok.unescaped;
266 							}
267 
268 							tok = expect(lex, context, ",", ")");
269 							if (tok.sep(',')) {
270 								tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
271 								continue;
272 							}
273 							if (tok.sep(')'))
274 								break;
275 						}
276 					}
277 
278 					if (pdef.args.length < args.length)
279 						throw new PreParserException(context.loc, format("too many parameters for macro '%s'", pdef.pretty(name)));
280 				}
281 
282 				string[string] envArgs;
283 
284 				foreach(i, arg; pdef.args)
285 					envArgs[pdef.args[i]] = (i < args.length) ? args[i] : null;
286 
287 				{
288 					++defs_.length;
289 					scope(exit) --defs_.length;
290 
291 					envs_ ~= envArgs;
292 					scope(exit) --envs_.length;
293 
294 					auto source = mgr_.add(format("#%s at %s", pdef.name, mgr_.name(pdef.loc.id)), pdef.value, pdef.loc.id);
295 					auto result = parse(source, SourceLoc(source.id, pdef.loc.line, pdef.loc.column));
296 					if (needsLineNumbers(result))
297 						result ~= sourceInfo(context.loc);
298 
299 					if (needsIncludeNames())
300 						result = format("<!-- begin macro #%s -->%s<!-- end macro #%s -->", name, result, name);
301 					return result;
302 				}
303 			}
304 		}
305 
306 		if (!optional)
307 			throw new PreParserException(context.loc, format("unknown macro '%s'", name));
308 		return null;
309 	}
310 
311 	string[] parseFormalArgList(ref Lexer lex, Context context) {
312 		string[] args;
313 
314 		auto tok = expect(lex, context, ")", Token.Kind.Identifier);
315 
316 		if (!tok.sep(')')) {
317 			while (true) {
318 				args ~= tok.value;
319 
320 				tok = expect(lex, context, ",", ")");
321 				if (tok.sep(',')) {
322 					tok = expect(lex, context, ")", Token.Kind.Identifier);
323 					continue;
324 				}
325 				if (tok.sep(')'))
326 					break;
327 			}
328 		}
329 
330 		return args;
331 	}
332 
333 
334 	Def parseDef(ref Lexer lex, Context context) {
335 		auto tok = expect(lex, context, Token.Kind.Identifier);
336 
337 		Def def;
338 		def.name = tok.value;
339 		def.loc = context.loc;
340 
341 		tok = expect(lex, context, "(", ":", Token.Kind.EndOfInput);
342 
343 		if (tok.kind != Token.Kind.EndOfInput) {
344 			if (tok.sep("("))
345 				def.args = parseFormalArgList(lex, context);
346 
347 			if (tok.sep(":")) {
348 				def.flags |= Def.Flags.Inline;
349 				lex.popFront;
350 			} else {
351 				expect(lex, context, Token.Kind.EndOfInput);
352 			}
353 		}
354 
355 		return def;
356 	}
357 
358 	string replacer(string content, Context context) {
359 		if (content.length > 0) {
360 			auto tag = content[0];
361 			switch(tag) {
362 			case '!':
363 				return comment(content, context);
364 			case '&':
365 				return include(content, context).strip;
366 			case '#':
367 				return define(content, context).strip;
368 			default:
369 				assert(0);
370 			}
371 		}
372 		return null;
373 	}
374 
375 	auto needsLineNumbers(string content) const {
376 		return options_.lineNumbers && !isAllWhite(content);
377 	}
378 
379 	auto needsIncludeNames() {
380 		return options_.verboseIncludes;
381 	}
382 
383 	string sourceInfo(SourceLoc loc) {
384 		return format("{{;src:%d:%d:%d %s}}", loc.id, loc.line, loc.column, mgr_.name(loc.id));
385 	}
386 
387 private:
388 	Token expectNot(Args...)(ref Lexer lex, Context context, auto ref Args args) {
389 		auto tok = lex.front;
390 		lex.popFront;
391 
392 		foreach(i, Arg; Args) {
393 			static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
394 			static if (is(Arg == Token)) {
395 				if (tok != args[i])
396 					continue;
397 			} else static if (isSomeString!(OriginalType!Arg)) {
398 				if ((tok.kind != Token.Kind.Separator) || (tok.value != args[i]))
399 					continue;
400 			} else static if (is(Arg == Token.Kind)) {
401 				if (tok.kind != args[i])
402 					continue;
403 			} else static if (is(Arg == Token.KeywordKind)) {
404 				if (!tok.keyword(args[i]))
405 					continue;
406 			}
407 
408 			{
409 				static if (is(Arg == Token)) {
410 					throw new PreParserException(context.loc, format("unexpected '%s'", args[i]));
411 				} else {
412 					throw new PreParserException(context.loc, format("unexpected '%s'", tok.value));
413 				}
414 			}
415 		}
416 
417 		return tok;
418 	}
419 
420 	Token expect(Args...)(ref Lexer lex, Context context, auto ref Args args) {
421 		auto tok = lex.front;
422 		lex.popFront;
423 
424 		foreach(i, Arg; Args) {
425 			static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
426 			static if (is(Arg == Token)) {
427 				if (tok == args[i])
428 					return tok;
429 			} else static if (isSomeString!(OriginalType!Arg)) {
430 				if ((tok.kind == Token.Kind.Separator) && (tok.value == args[i]))
431 					return tok;
432 			} else static if (is(Arg == Token.Kind)) {
433 				if (tok.kind == args[i])
434 					return tok;
435 			} else static if (is(Arg == Token.KeywordKind)) {
436 				if (tok.keyword(args[i]))
437 					return tok;
438 			}
439 		}
440 
441 		auto exception = appender!string;
442 
443 		foreach(i, arg; args) {
444 			if (i == 0) {
445 				formattedWrite(&exception, "expected '%s'", arg);
446 			} else if (i + 1 == args.length) {
447 				formattedWrite(&exception, ", or '%s'", arg);
448 			} else {
449 				formattedWrite(&exception, ", '%s'", arg);
450 			}
451 		}
452 
453 		if (tok.kind == Token.Kind.Separator) {
454 			formattedWrite(&exception, " but found '%s'", tok);
455 		} else {
456 			formattedWrite(&exception, " but found '%s'", tok);
457 		}
458 
459 		throw new PreParserException(context.loc, exception.data);
460 	}
461 
462 	static struct Def {
463 		enum Flags : uint {
464 			NotYetDefined   = 1 << 0,
465 			Inline          = 1 << 1,
466 		}
467 
468 		string name;
469 		string value;
470 		string[] args;
471 		uint flags;
472 
473 		SourceLoc loc;
474 
475 		string pretty(string name) const {
476 			Appender!string app;
477 			app.reserve(1024);
478 
479 			app.put(name);
480 
481 			app.put("(");
482 
483 			foreach(i, arg; args) {
484 				app.put(arg);
485 				if (i != args.length - 1)
486 					app.put(", ");
487 			}
488 			app.put(")");
489 
490 			return app.data;
491 		}
492 	}
493 
494 	Def* def_;
495 	size_t depth_;
496 
497 	Def[string][] defs_;
498 	string[string][] envs_;
499 
500 	Source source_;
501 
502 	PreParserOptions options_;
503 
504 	SourceManager* mgr_;
505 	Context[] contexts_;
506 }
507 
508 
509 private @property bool isAllWhite(R)(R range) {
510 	import std.uni : isWhite;
511 	foreach (ch; range) {
512 		if (!isWhite(ch))
513 			return false;
514 	}
515 	return true;
516 }