1 module vayne.source.preparser;
2 
3 
4 import std.array;
5 import std.format;
6 import std..string;
7 import std.traits;
8 
9 import vayne.source.context;
10 import vayne.source.lexer;
11 import vayne.source.mime;
12 import vayne.source.source;
13 import vayne.source.token;
14 
15 
16 struct PreParserOptions {
17 	bool lineNumbers;
18 }
19 
20 
21 class PreParserException : Exception {
22 	this(SourceLoc loc, string msg) {
23 		super(msg);
24 
25 		this.loc = loc;
26 	}
27 
28 	SourceLoc loc;
29 }
30 
31 
32 string preparse(ref SourceManager mgr, uint id, PreParserOptions options) {
33 	return PreParser(mgr, id, options)();
34 }
35 
36 
37 private struct PreParser {
38 	this(ref SourceManager mgr, uint id, PreParserOptions options) {
39 		mgr_ = &mgr;
40 		source_ = mgr_.get(id);
41 		options_ = options;
42 	}
43 
44 	string opCall() {
45 		return parse();
46 	}
47 
48 private:
49 	string parse() {
50 		string errors;
51 
52 		try {
53 			++envs_.length;
54 			++defs_.length;
55 
56 			return parse(source_, SourceLoc(source_.id, 1, 0));
57 		} catch(Exception error) {
58 			if (auto ctxError = cast(ContextException)error) {
59 				errors = format("%s: %s", mgr_.loc(ctxError.loc), error.msg);
60 			} else if (auto preError = cast(PreParserException)error) {
61 				errors = format("%s: %s", mgr_.loc(preError.loc), error.msg);
62 			} else {
63 				errors = format("%s: %s", mgr_.loc(contexts_.back.loc), error.msg);
64 			}
65 
66 			if (!contexts_.empty) {
67 				foreach_reverse (context; contexts_[0..$-1])
68 					errors ~= format("\n> %s", mgr_.loc(context.loc));
69 			}
70 		}
71 
72 		throw new Exception(errors);
73 	}
74 
75 	string parse(Source source, SourceLoc loc) {
76 		auto context = new Context(source, loc);
77 		contexts_ ~= context;
78 		scope (success) --contexts_.length;
79 
80 		Appender!string app;
81 		app.reserve(16 * 1024);
82 
83 		const end = context.remaining.length - 2;
84 		while (context.cursor < end) {
85 			auto remaining = context.remaining;
86 			auto indexOpen = remaining.indexOf("{{");
87 			while (indexOpen != -1) {
88 				auto tag = remaining[indexOpen + 2..indexOpen + 2 + 1];
89 				if ((tag == "#") || (tag == "&") || (tag == "%") || (tag == "!"))
90 					break;
91 				indexOpen = remaining.indexOf("{{", indexOpen + 2);
92 			}
93 			if (indexOpen == -1)
94 				break;
95 
96 			auto inComment = context.isOpen && (context.open().tag == "!!");
97 			if (!def_ && !inComment)
98 				app.put(remaining[0..indexOpen]);
99 			context.advance(indexOpen);
100 
101 			const contentStart = indexOpen + 2;
102 			auto indexClose = remaining.indexOf("}}", contentStart);
103 			if (indexClose == -1)
104 				throw new PreParserException(context.loc, "missing '}}' to close tag '{{'");
105 
106 			context.advance(2);
107 			indexClose -= contentStart;
108 
109 			auto replaced = replacer(context.source.buffer[context.cursor..context.cursor + indexClose], context);
110 			if (!def_ && !inComment)
111 				app.put(replaced);
112 			context.advance(indexClose + 2);
113 		}
114 		context.expectClosed();
115 
116 		app.put((context.cursor > 0) ? context.remaining() : context.source.buffer);
117 
118 		if (needsLineNumbers(app.data))
119 			return sourceInfo(loc) ~ app.data;
120 		return app.data;
121 	}
122 
123 	string include(string content, Context context) {
124 		auto source = mgr_.open(content[1..$].strip, false, context.source.id);
125 		mgr_.dependency(context.source.id, source.id, context.loc);
126 
127 		auto result = parse(source, SourceLoc(source.id, 1, 0));
128 
129 		if (needsLineNumbers(result))
130 			result ~= sourceInfo(context.loc);
131 		return result;
132 	}
133 
134 	string embed(string content, Context context) {
135 		auto mime = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0;
136 		auto source = mgr_.open(content[1 + mime..$].strip, true, context.source.id);
137 		mgr_.dependency(context.source.id, source.id, context.loc);
138 
139 		string result;
140 		if (mime) {
141 			result = format("data:%s;base64,%s", mgr_.name(source.id).mimeType, encode(source.buffer));
142 		} else {
143 			result = cast(string)source.buffer;
144 		}
145 
146 		result = rawInfo(result.length) ~ result;
147 
148 		if (needsLineNumbers(result))
149 			result ~= sourceInfo(context.loc);
150 		return result;
151 	}
152 
153 	string comment(string content, Context context) {
154 		auto block = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0;
155 		if (block) {
156 			if ((content.length == 2) || (content[2] != '/')) {
157 				context.open(content[0..2], content[2..$]);
158 			} else if ((content.length > 2) && (content[2] == '/')) {
159 				context.expectOpen("!!", "!!/");
160 				context.close();
161 			}
162 		}
163 		return null;
164 	}
165 
166 	string define(string content, Context context) {
167 		auto lex = Lexer(Source(source_.id, source_.parent, content[1..$].strip));
168 		auto tok = expect(lex, context, Token.KeywordKind.Def, Token.KeywordKind.Undef, Token.KeywordKind.Push, Token.KeywordKind.Pop, Token.KeywordKind.Set, Token.Kind.Identifier, "/");
169 
170 		if (tok.keyword(Token.KeywordKind.Def)) {
171 			auto def = parseDef(lex, context);
172 
173 			if (auto pdef = def.name in defs_.back) {
174 				if (pdef.loc != def.loc)
175 					throw new PreParserException(context.loc, format("redefinition of macro '%s' - first defined in '%s' - if this is intended undefine first", def.name, mgr_.loc(pdef.loc)));
176 			}
177 
178 			if (def.flags & Def.Flags.Inline) {
179 				defs_.back[def.name] = def;
180 			} else {
181 				context.open(content[0..1], content[1..$]);
182 				def.flags |= Def.Flags.NotYetDefined;
183 
184 				if (depth_ == 0)
185 					def_ = &(defs_.back[def.name] = def);
186 				++depth_;
187 			}
188 		} else if (tok.keyword(Token.KeywordKind.Undef)) {
189 			tok = expect(lex, context, Token.Kind.Identifier);
190 
191 			auto name = tok.value;
192 			if (auto pdef = name in defs_.back) {
193 				if (pdef.flags & Def.Flags.NotYetDefined)
194 					throw new PreParserException(context.loc, format("trying to undefine macro '%s' inside it's own definition", name));
195 
196 				defs_.back.remove(name);
197 			} else {
198 				throw new PreParserException(context.loc, format("trying to undefine unknown macro '%s'", name));
199 			}
200 		} else if (tok.keyword(Token.KeywordKind.Set) || tok.keyword(Token.KeywordKind.Push)) {
201 			auto name = expect(lex, context, Token.Kind.Identifier);
202 			auto value = expect(lex, context, Token.Kind.Literal, Token.KeywordKind.True, Token.KeywordKind.False);
203 			expect(lex, context, Token.Kind.EndOfInput);
204 
205 			return "{{" ~ content ~ "}}";
206 		} else if (tok.keyword(Token.KeywordKind.Pop)) {
207 			auto name = expect(lex, context, Token.Kind.Identifier);
208 			expect(lex, context, Token.Kind.EndOfInput);
209 
210 			return "{{" ~ content ~ "}}";
211 		} else if (tok.ident) {
212 			if (!def_)
213 				return expand(lex, context, tok.value);
214 		} else {
215 			close(lex, context);
216 		}
217 
218 		return null;
219 	}
220 
221 	void close(ref Lexer lex, Context context) {
222 		expect(lex, context, Token.Kind.EndOfInput);
223 
224 		auto tag = context.close();
225 
226 		if (depth_ == 1) {
227 			auto start = tag.cursor + tag.content.length + 3;
228 			auto end = context.cursor - 2;
229 			auto value = context.source.buffer[start..end];
230 
231 			assert(def_);
232 			assert(def_.flags & Def.Flags.NotYetDefined);
233 
234 			def_.value = value;
235 			def_.flags &= ~Def.Flags.NotYetDefined;
236 			def_ = null;
237 		}
238 
239 		--depth_;
240 	}
241 
242 	string expand(ref Lexer lex, Context context, string name) {
243 		auto optional = (lex.front.kind == Token.Kind.Separator) && (lex.front.name == "?");
244 		if (optional)
245 			lex.popFront;
246 
247 		foreach (k; 0..envs_.length) {
248 			auto top = envs_.length - k - 1;
249 			auto env = envs_[top];
250 
251 			if (auto penv = name in env) {
252 				if (needsLineNumbers(*penv))
253 					return *penv ~ sourceInfo(context.loc);
254 				return *penv;
255 			}
256 
257 			auto defs = defs_[top];
258 			if (auto pdef = name in defs) {
259 				auto tok = lex.front;
260 
261 				string[] args;
262 
263 				if (!tok.eoi && tok.sep('(')) {
264 					lex.popFront;
265 
266 					tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
267 
268 					if (!tok.sep(')')) {
269 						while (true) {
270 							if (tok.ident) {
271 								args ~= expand(lex, context, tok.value);
272 							} else {
273 								args ~= tok.unescaped;
274 							}
275 
276 							tok = expect(lex, context, ",", ")");
277 							if (tok.sep(',')) {
278 								tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
279 								continue;
280 							}
281 							if (tok.sep(')'))
282 								break;
283 						}
284 					}
285 
286 					if (pdef.args.length < args.length)
287 						throw new PreParserException(context.loc, format("too many parameters for macro '%s'", pdef.pretty(name)));
288 				}
289 
290 				string[string] envArgs;
291 
292 				foreach(i, arg; pdef.args)
293 					envArgs[pdef.args[i]] = (i < args.length) ? args[i] : null;
294 
295 				{
296 					++defs_.length;
297 					scope(exit) --defs_.length;
298 
299 					envs_ ~= envArgs;
300 					scope(exit) --envs_.length;
301 
302 					auto source = mgr_.add(format("#%s at %s", pdef.name, mgr_.name(pdef.loc.id)), pdef.value, pdef.loc.id);
303 					auto result = parse(source, SourceLoc(source.id, pdef.loc.line, pdef.loc.column));
304 					if (needsLineNumbers(result))
305 						result ~= sourceInfo(context.loc);
306 					return result;
307 				}
308 			}
309 		}
310 
311 		if (!optional)
312 			throw new PreParserException(context.loc, format("unknown macro '%s'", name));
313 		return null;
314 	}
315 
316 	string[] parseFormalArgList(ref Lexer lex, Context context) {
317 		string[] args;
318 
319 		auto tok = expect(lex, context, ")", Token.Kind.Identifier);
320 
321 		if (!tok.sep(')')) {
322 			while (true) {
323 				args ~= tok.value;
324 
325 				tok = expect(lex, context, ",", ")");
326 				if (tok.sep(',')) {
327 					tok = expect(lex, context, ")", Token.Kind.Identifier);
328 					continue;
329 				}
330 				if (tok.sep(')'))
331 					break;
332 			}
333 		}
334 
335 		return args;
336 	}
337 
338 
339 	Def parseDef(ref Lexer lex, Context context) {
340 		auto tok = expect(lex, context, Token.Kind.Identifier);
341 
342 		Def def;
343 		def.name = tok.value;
344 		def.loc = context.loc;
345 
346 		tok = expect(lex, context, "(", ":", Token.Kind.EndOfInput);
347 
348 		if (tok.kind != Token.Kind.EndOfInput) {
349 			if (tok.sep("("))
350 				def.args = parseFormalArgList(lex, context);
351 
352 			if (tok.sep(":")) {
353 				def.flags |= Def.Flags.Inline;
354 				lex.popFront;
355 			} else {
356 				expect(lex, context, Token.Kind.EndOfInput);
357 			}
358 		}
359 
360 		return def;
361 	}
362 
363 	string replacer(string content, Context context) {
364 		if (content.length > 0) {
365 			auto tag = content[0];
366 			switch(tag) {
367 			case '!':
368 				return comment(content, context);
369 			case '&':
370 				return include(content, context).strip;
371 			case '%':
372 				return embed(content, context);
373 			case '#':
374 				return define(content, context).strip;
375 			default:
376 				assert(0);
377 			}
378 		}
379 		return null;
380 	}
381 
382 	auto needsLineNumbers(string content) const {
383 		return options_.lineNumbers && !isAllWhite(content);
384 	}
385 
386 	string sourceInfo(SourceLoc loc) {
387 		return format("{{;src:%d:%d:%d %s}}", loc.id, loc.line, loc.column, mgr_.name(loc.id));
388 	}
389 
390 	string rawInfo(size_t length) {
391 		return format("{{;raw:%s}}", length);
392 	}
393 
394 private:
395 	Token expectNot(Args...)(ref Lexer lex, Context context, auto ref Args args) {
396 		auto tok = lex.front;
397 		lex.popFront;
398 
399 		foreach(i, Arg; Args) {
400 			static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
401 			static if (is(Arg == Token)) {
402 				if (tok != args[i])
403 					continue;
404 			} else static if (isSomeString!(OriginalType!Arg)) {
405 				if ((tok.kind != Token.Kind.Separator) || (tok.value != args[i]))
406 					continue;
407 			} else static if (is(Arg == Token.Kind)) {
408 				if (tok.kind != args[i])
409 					continue;
410 			} else static if (is(Arg == Token.KeywordKind)) {
411 				if (!tok.keyword(args[i]))
412 					continue;
413 			}
414 
415 			{
416 				static if (is(Arg == Token)) {
417 					throw new PreParserException(context.loc, format("unexpected '%s'", args[i]));
418 				} else {
419 					throw new PreParserException(context.loc, format("unexpected '%s'", tok.value));
420 				}
421 			}
422 		}
423 
424 		return tok;
425 	}
426 
427 	Token expect(Args...)(ref Lexer lex, Context context, auto ref Args args) {
428 		auto tok = lex.front;
429 		lex.popFront;
430 
431 		foreach(i, Arg; Args) {
432 			static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
433 			static if (is(Arg == Token)) {
434 				if (tok == args[i])
435 					return tok;
436 			} else static if (isSomeString!(OriginalType!Arg)) {
437 				if ((tok.kind == Token.Kind.Separator) && (tok.value == args[i]))
438 					return tok;
439 			} else static if (is(Arg == Token.Kind)) {
440 				if (tok.kind == args[i])
441 					return tok;
442 			} else static if (is(Arg == Token.KeywordKind)) {
443 				if (tok.keyword(args[i]))
444 					return tok;
445 			}
446 		}
447 
448 		auto exception = appender!string;
449 
450 		foreach(i, arg; args) {
451 			if (i == 0) {
452 				formattedWrite(&exception, "expected '%s'", arg);
453 			} else if (i + 1 == args.length) {
454 				formattedWrite(&exception, ", or '%s'", arg);
455 			} else {
456 				formattedWrite(&exception, ", '%s'", arg);
457 			}
458 		}
459 
460 		if (tok.kind == Token.Kind.Separator) {
461 			formattedWrite(&exception, " but found '%s'", tok);
462 		} else {
463 			formattedWrite(&exception, " but found '%s'", tok);
464 		}
465 
466 		throw new PreParserException(context.loc, exception.data);
467 	}
468 
469 	static struct Def {
470 		enum Flags : uint {
471 			NotYetDefined   = 1 << 0,
472 			Inline          = 1 << 1,
473 		}
474 
475 		string name;
476 		string value;
477 		string[] args;
478 		uint flags;
479 
480 		SourceLoc loc;
481 
482 		string pretty(string name) const {
483 			Appender!string app;
484 			app.reserve(1024);
485 
486 			app.put(name);
487 
488 			app.put("(");
489 
490 			foreach(i, arg; args) {
491 				app.put(arg);
492 				if (i != args.length - 1)
493 					app.put(", ");
494 			}
495 			app.put(")");
496 
497 			return app.data;
498 		}
499 	}
500 
501 	Def* def_;
502 	size_t depth_;
503 
504 	Def[string][] defs_;
505 	string[string][] envs_;
506 
507 	Source source_;
508 
509 	PreParserOptions options_;
510 
511 	SourceManager* mgr_;
512 	Context[] contexts_;
513 }
514 
515 
516 private @property bool isAllWhite(R)(R range) {
517 	import std.uni : isWhite;
518 	foreach (ch; range) {
519 		if (!isWhite(ch))
520 			return false;
521 	}
522 	return true;
523 }