1 module vayne.source.preparser;
2 
3 
4 import std.array;
5 import std.format;
6 import std.string;
7 import std.traits;
8 
9 import vayne.source.context;
10 import vayne.source.lexer;
11 import vayne.source.mime;
12 import vayne.source.source;
13 import vayne.source.token;
14 
15 
16 struct PreParserOptions {
17 	bool lineNumbers;
18 }
19 
20 
21 class PreParserException : Exception {
22 	this(SourceLoc loc, string msg) {
23 		super(msg);
24 
25 		this.loc = loc;
26 	}
27 
28 	SourceLoc loc;
29 }
30 
31 
32 string preparse(ref SourceManager mgr, uint id, PreParserOptions options) {
33 	return PreParser(mgr, id, options)();
34 }
35 
36 
37 private struct PreParser {
38 	this(ref SourceManager mgr, uint id, PreParserOptions options) {
39 		mgr_ = &mgr;
40 		source_ = mgr_.get(id);
41 		options_ = options;
42 	}
43 
44 	string opCall() {
45 		return parse();
46 	}
47 
48 private:
49 	string parse() {
50 		string errors;
51 
52 		try {
53 			++envs_.length;
54 			++defs_.length;
55 
56 			return parse(source_, SourceLoc(source_.id, 1, 0));
57 		} catch(Exception error) {
58 			if (auto ctxError = cast(ContextException)error) {
59 				errors = format("%s: %s", mgr_.loc(ctxError.loc), error.msg);
60 			} else if (auto preError = cast(PreParserException)error) {
61 				errors = format("%s: %s", mgr_.loc(preError.loc), error.msg);
62 			} else {
63 				errors = format("%s: %s", mgr_.loc(contexts_.back.loc), error.msg);
64 			}
65 
66 			if (!contexts_.empty) {
67 				foreach_reverse (context; contexts_[0..$-1])
68 					errors ~= format("\n> %s", mgr_.loc(context.loc));
69 			}
70 		}
71 
72 		throw new Exception(errors);
73 	}
74 
75 	string parse(Source source, SourceLoc loc) {
76 		auto context = new Context(source, loc);
77 		contexts_ ~= context;
78 		scope (success) --contexts_.length;
79 
80 		Appender!string app;
81 		app.reserve(16 * 1024);
82 
83 		const end = context.remaining.length - 2;
84 		while (context.cursor < end) {
85 			auto remaining = context.remaining;
86 			auto indexOpen = remaining.indexOf("{{");
87 			while (indexOpen != -1) {
88 				auto tag = remaining[indexOpen + 2..indexOpen + 2 + 1];
89 				if ((tag == "#") || (tag == "&") || (tag == "!"))
90 					break;
91 				indexOpen = remaining.indexOf("{{", indexOpen + 2);
92 			}
93 			if (indexOpen == -1)
94 				break;
95 
96 			if (!def_)
97 				app.put(remaining[0..indexOpen]);
98 			context.advance(indexOpen);
99 
100 			const contentStart = indexOpen + 2;
101 			auto indexClose = remaining.indexOf("}}", contentStart);
102 			if (indexClose == -1)
103 				throw new PreParserException(context.loc, "missing '}}' to close tag '{{'");
104 
105 			context.advance(2);
106 			indexClose -= contentStart;
107 
108 			auto replaced = replacer(context.source.buffer[context.cursor..context.cursor + indexClose], context);
109 			if (!def_)
110 				app.put(replaced);
111 			context.advance(indexClose + 2);
112 		}
113 		context.expectClosed();
114 
115 		app.put((context.cursor > 0) ? context.remaining() : context.source.buffer);
116 
117 		if (needsLineNumbers(app.data))
118 			return sourceInfo(loc) ~ app.data;
119 		return app.data;
120 	}
121 
122 	string include(string content, Context context) {
123 		auto embed = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0;
124 		auto source = mgr_.open(content[1 + embed..$].strip, embed != 0, context.source.id);
125 		mgr_.dependency(context.source.id, source.id, context.loc);
126 
127 		string result;
128 		if (!embed) {
129 			result = parse(source, SourceLoc(source.id, 1, 0));
130 		} else {
131 			auto name = mgr_.name(source.id);
132 			result = format("data:%s;base64,%s", name.mimeType, encode(source.buffer));
133 		}
134 
135 		if (needsLineNumbers(result))
136 			result ~= sourceInfo(context.loc);
137 		return result;
138 	}
139 
140 	string define(string content, Context context) {
141 		auto lex = Lexer(Source(source_.id, source_.parent, content[1..$].strip));
142 		auto tok = expect(lex, context, Token.KeywordKind.Def, Token.KeywordKind.Undef, Token.KeywordKind.Push, Token.KeywordKind.Pop, Token.KeywordKind.Set, Token.Kind.Identifier, "/");
143 
144 		if (tok.keyword(Token.KeywordKind.Def)) {
145 			auto def = parseDef(lex, context);
146 
147 			if (auto pdef = def.name in defs_.back) {
148 				if (pdef.loc != def.loc)
149 					throw new PreParserException(context.loc, format("redefinition of macro '%s' - first defined in '%s' - if this is intended undefine first", def.name, mgr_.loc(pdef.loc)));
150 			}
151 
152 			if (def.flags & Def.Flags.Inline) {
153 				defs_.back[def.name] = def;
154 			} else {
155 				context.open(content[0..1], content[1..$]);
156 				def.flags |= Def.Flags.NotYetDefined;
157 
158 				if (depth_ == 0)
159 					def_ = &(defs_.back[def.name] = def);
160 				++depth_;
161 			}
162 		} else if (tok.keyword(Token.KeywordKind.Undef)) {
163 			tok = expect(lex, context, Token.Kind.Identifier);
164 
165 			auto name = tok.value;
166 			if (auto pdef = name in defs_.back) {
167 				if (pdef.flags & Def.Flags.NotYetDefined)
168 					throw new PreParserException(context.loc, format("trying to undefine macro '%s' inside it's own definition", name));
169 
170 				defs_.back.remove(name);
171 			} else {
172 				throw new PreParserException(context.loc, format("trying to undefine unknown macro '%s'", name));
173 			}
174 		} else if (tok.keyword(Token.KeywordKind.Set) || tok.keyword(Token.KeywordKind.Push)) {
175 			auto name = expect(lex, context, Token.Kind.Identifier);
176 			auto value = expect(lex, context, Token.Kind.Literal, Token.KeywordKind.True, Token.KeywordKind.False);
177 			expect(lex, context, Token.Kind.EndOfInput);
178 
179 			return "{{" ~ content ~ "}}";
180 		} else if (tok.keyword(Token.KeywordKind.Pop)) {
181 			auto name = expect(lex, context, Token.Kind.Identifier);
182 			expect(lex, context, Token.Kind.EndOfInput);
183 
184 			return "{{" ~ content ~ "}}";
185 		} else if (tok.ident) {
186 			if (!def_)
187 				return expand(lex, context, tok.value);
188 		} else {
189 			close(lex, context);
190 		}
191 
192 		return null;
193 	}
194 
195 	void close(ref Lexer lex, Context context) {
196 		expect(lex, context, Token.Kind.EndOfInput);
197 
198 		auto tag = context.close();
199 
200 		if (depth_ == 1) {
201 			auto start = tag.cursor + tag.content.length + 3;
202 			auto end = context.cursor - 2;
203 			auto value = context.source.buffer[start..end];
204 
205 			assert(def_);
206 			assert(def_.flags & Def.Flags.NotYetDefined);
207 
208 			def_.value = value;
209 			def_.flags &= ~Def.Flags.NotYetDefined;
210 			def_ = null;
211 		}
212 
213 		--depth_;
214 	}
215 
216 	string expand(ref Lexer lex, Context context, string name) {
217 		auto optional = (lex.front.kind == Token.Kind.Separator) && (lex.front.name == "?");
218 		if (optional)
219 			lex.popFront;
220 
221 		foreach (k; 0..envs_.length) {
222 			auto top = envs_.length - k - 1;
223 			auto env = envs_[top];
224 
225 			if (auto penv = name in env) {
226 				if (needsLineNumbers(*penv))
227 					return *penv ~ sourceInfo(context.loc);
228 				return *penv;
229 			}
230 
231 			auto defs = defs_[top];
232 			if (auto pdef = name in defs) {
233 				auto tok = lex.front;
234 
235 				string[] args;
236 
237 				if (!tok.eoi && tok.sep('(')) {
238 					lex.popFront;
239 
240 					tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
241 
242 					if (!tok.sep(')')) {
243 						while (true) {
244 							if (tok.ident) {
245 								args ~= expand(lex, context, tok.value);
246 							} else {
247 								args ~= tok.unescaped;
248 							}
249 
250 							tok = expect(lex, context, ",", ")");
251 							if (tok.sep(',')) {
252 								tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
253 								continue;
254 							}
255 							if (tok.sep(')'))
256 								break;
257 						}
258 					}
259 
260 					if (pdef.args.length < args.length)
261 						throw new PreParserException(context.loc, format("too many parameters for macro '%s'", pdef.pretty(name)));
262 				}
263 
264 				string[string] envArgs;
265 
266 				foreach(i, arg; pdef.args)
267 					envArgs[pdef.args[i]] = (i < args.length) ? args[i] : null;
268 
269 				{
270 					++defs_.length;
271 					scope(exit) --defs_.length;
272 
273 					envs_ ~= envArgs;
274 					scope(exit) --envs_.length;
275 
276 					auto source = mgr_.add(format("#%s at %s", pdef.name, mgr_.name(pdef.loc.id)), pdef.value, pdef.loc.id);
277 					auto result = parse(source, SourceLoc(source.id, pdef.loc.line, pdef.loc.column));
278 					if (needsLineNumbers(result))
279 						result ~= sourceInfo(context.loc);
280 					return result;
281 				}
282 			}
283 		}
284 
285 		if (!optional)
286 			throw new PreParserException(context.loc, format("unknown macro '%s'", name));
287 		return null;
288 	}
289 
290 	string[] parseFormalArgList(ref Lexer lex, Context context) {
291 		string[] args;
292 
293 		auto tok = expect(lex, context, ")", Token.Kind.Identifier);
294 
295 		if (!tok.sep(')')) {
296 			while (true) {
297 				args ~= tok.value;
298 
299 				tok = expect(lex, context, ",", ")");
300 				if (tok.sep(',')) {
301 					tok = expect(lex, context, ")", Token.Kind.Identifier);
302 					continue;
303 				}
304 				if (tok.sep(')'))
305 					break;
306 			}
307 		}
308 
309 		return args;
310 	}
311 
312 
313 	Def parseDef(ref Lexer lex, Context context) {
314 		auto tok = expect(lex, context, Token.Kind.Identifier);
315 
316 		Def def;
317 		def.name = tok.value;
318 		def.loc = context.loc;
319 
320 		tok = expect(lex, context, "(", ":", Token.Kind.EndOfInput);
321 
322 		if (tok.kind != Token.Kind.EndOfInput) {
323 			if (tok.sep("("))
324 				def.args = parseFormalArgList(lex, context);
325 
326 			if (tok.sep(":")) {
327 				def.flags |= Def.Flags.Inline;
328 				lex.popFront;
329 			} else {
330 				expect(lex, context, Token.Kind.EndOfInput);
331 			}
332 		}
333 
334 		return def;
335 	}
336 
337 	string replacer(string content, Context context) {
338 		if (content.length > 0) {
339 			auto tag = content[0];
340 			switch(tag) {
341 			case '!':
342 				return null;
343 			case '&':
344 				return include(content, context).strip;
345 			case '#':
346 				return define(content, context).strip;
347 			default:
348 				assert(0);
349 			}
350 		}
351 		return null;
352 	}
353 
354 	auto needsLineNumbers(string content) const {
355 		return options_.lineNumbers && !isAllWhite(content);
356 	}
357 
358 	string sourceInfo(SourceLoc loc) {
359 		return format("{{;src:%d:%d:%d %s}}", loc.id, loc.line, loc.column, mgr_.name(loc.id));
360 	}
361 
362 private:
363 	Token expectNot(Args...)(ref Lexer lex, Context context, auto ref Args args) {
364 		auto tok = lex.front;
365 		lex.popFront;
366 
367 		foreach(i, Arg; Args) {
368 			static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
369 			static if (is(Arg == Token)) {
370 				if (tok != args[i])
371 					continue;
372 			} else static if (isSomeString!(OriginalType!Arg)) {
373 				if ((tok.kind != Token.Kind.Separator) || (tok.value != args[i]))
374 					continue;
375 			} else static if (is(Arg == Token.Kind)) {
376 				if (tok.kind != args[i])
377 					continue;
378 			} else static if (is(Arg == Token.KeywordKind)) {
379 				if (!tok.keyword(args[i]))
380 					continue;
381 			}
382 
383 			{
384 				static if (is(Arg == Token)) {
385 					throw new PreParserException(context.loc, format("unexpected '%s'", args[i]));
386 				} else {
387 					throw new PreParserException(context.loc, format("unexpected '%s'", tok.value));
388 				}
389 			}
390 		}
391 
392 		return tok;
393 	}
394 
395 	Token expect(Args...)(ref Lexer lex, Context context, auto ref Args args) {
396 		auto tok = lex.front;
397 		lex.popFront;
398 
399 		foreach(i, Arg; Args) {
400 			static assert(is(Arg == Token) || isSomeString!(OriginalType!Arg) || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
401 			static if (is(Arg == Token)) {
402 				if (tok == args[i])
403 					return tok;
404 			} else static if (isSomeString!(OriginalType!Arg)) {
405 				if ((tok.kind == Token.Kind.Separator) && (tok.value == args[i]))
406 					return tok;
407 			} else static if (is(Arg == Token.Kind)) {
408 				if (tok.kind == args[i])
409 					return tok;
410 			} else static if (is(Arg == Token.KeywordKind)) {
411 				if (tok.keyword(args[i]))
412 					return tok;
413 			}
414 		}
415 
416 		auto exception = appender!string;
417 
418 		foreach(i, arg; args) {
419 			if (i == 0) {
420 				formattedWrite(&exception, "expected '%s'", arg);
421 			} else if (i + 1 == args.length) {
422 				formattedWrite(&exception, ", or '%s'", arg);
423 			} else {
424 				formattedWrite(&exception, ", '%s'", arg);
425 			}
426 		}
427 
428 		if (tok.kind == Token.Kind.Separator) {
429 			formattedWrite(&exception, " but found '%s'", tok);
430 		} else {
431 			formattedWrite(&exception, " but found '%s'", tok);
432 		}
433 
434 		throw new PreParserException(context.loc, exception.data);
435 	}
436 
437 	static struct Def {
438 		enum Flags : uint {
439 			NotYetDefined   = 1 << 0,
440 			Inline          = 1 << 1,
441 		}
442 
443 		string name;
444 		string value;
445 		string[] args;
446 		uint flags;
447 
448 		SourceLoc loc;
449 
450 		string pretty(string name) const {
451 			Appender!string app;
452 			app.reserve(1024);
453 
454 			app.put(name);
455 
456 			app.put("(");
457 
458 			foreach(i, arg; args) {
459 				app.put(arg);
460 				if (i != args.length - 1)
461 					app.put(", ");
462 			}
463 			app.put(")");
464 
465 			return app.data;
466 		}
467 	}
468 
469 	Def* def_;
470 	size_t depth_;
471 
472 	Def[string][] defs_;
473 	string[string][] envs_;
474 
475 	Source source_;
476 
477 	PreParserOptions options_;
478 
479 	SourceManager* mgr_;
480 	Context[] contexts_;
481 }
482 
483 
484 private @property bool isAllWhite(R)(R range) {
485 	import std.uni : isWhite;
486 	foreach (ch; range) {
487 		if (!isWhite(ch))
488 			return false;
489 	}
490 	return true;
491 }