1 module vayne.source.preparser;
2 
3 
4 import std.array;
5 import std.format;
6 import std.string;
7 import std.traits;
8 
9 import vayne.source.context;
10 import vayne.source.lexer;
11 import vayne.source.mime;
12 import vayne.source.source;
13 import vayne.source.token;
14 
15 
16 struct PreParserOptions {
17 	bool lineNumbers;
18 }
19 
20 
21 class PreParserException : Exception {
22 	this(SourceLoc loc, string msg) {
23 		super(msg);
24 
25 		this.loc = loc;
26 	}
27 
28 	SourceLoc loc;
29 }
30 
31 
32 string preparse(ref SourceManager mgr, uint id, PreParserOptions options) {
33 	return PreParser(mgr, id, options)();
34 }
35 
36 
37 private struct PreParser {
38 	this(ref SourceManager mgr, uint id, PreParserOptions options) {
39 		mgr_ = &mgr;
40 		source_ = mgr_.get(id);
41 		options_ = options;
42 	}
43 
44 	string opCall() {
45 		return parse();
46 	}
47 
48 private:
49 	string parse() {
50 		string errors;
51 
52 		try {
53 			++envs_.length;
54 			++defs_.length;
55 
56 			return parse(source_, SourceLoc(source_.id, 1, 0));
57 		} catch(Exception error) {
58 			if (auto ctxError = cast(ContextException)error) {
59 				errors = format("%s: %s", mgr_.loc(ctxError.loc), error.msg);
60 			} else if (auto preError = cast(PreParserException)error) {
61 				errors = format("%s: %s", mgr_.loc(preError.loc), error.msg);
62 			} else {
63 				errors = format("%s: %s", mgr_.loc(contexts_.back.loc), error.msg);
64 			}
65 
66 			if (!contexts_.empty) {
67 				foreach_reverse (context; contexts_[0..$-1])
68 					errors ~= format("\n> %s", mgr_.loc(context.loc));
69 			}
70 		}
71 
72 		throw new Exception(errors);
73 	}
74 
75 	string parse(Source source, SourceLoc loc) {
76 		auto context = new Context(source, loc);
77 		contexts_ ~= context;
78 		scope (success) --contexts_.length;
79 
80 		Appender!string app;
81 		app.reserve(16 * 1024);
82 
83 		const end = context.remaining.length - 2;
84 		while (context.cursor < end) {
85 			auto remaining = context.remaining;
86 			auto indexOpen = remaining.indexOf("{{");
87 			while (indexOpen != -1) {
88 				auto tag = remaining[indexOpen + 2..indexOpen + 2 + 1];
89 				if ((tag == "#") || (tag == "&"))
90 					break;
91 				indexOpen = remaining.indexOf("{{", indexOpen + 2);
92 			}
93 			if (indexOpen == -1)
94 				break;
95 
96 			if (!def_)
97 				app.put(remaining[0..indexOpen]);
98 			context.advance(indexOpen);
99 
100 			const contentStart = indexOpen + 2;
101 			auto indexClose = remaining.indexOf("}}", contentStart);
102 			while (indexClose != -1) {
103 				if (balancedQuotes(remaining[contentStart..indexClose]))
104 					break;
105 
106 				indexClose = remaining.indexOf("}}", indexClose + 2);
107 			}
108 
109 			if (indexClose == -1)
110 				throw new PreParserException(context.loc, "missing '}}' to close tag '{{'");
111 
112 			context.advance(2);
113 			indexClose -= contentStart;
114 
115 			auto replaced = replacer(context.source.buffer[context.cursor..context.cursor + indexClose], context);
116 			if (!def_)
117 				app.put(replaced);
118 			context.advance(indexClose + 2);
119 		}
120 		context.expectClosed();
121 
122 		app.put((context.cursor > 0) ? context.remaining() : context.source.buffer);
123 
124 		if (needsLineNumbers(app.data))
125 			return sourceInfo(loc) ~ app.data;
126 		return app.data;
127 	}
128 
129 	string include(string content, Context context) {
130 		auto embed = ((content.length > 1) && (content[1] == content[0])) ? 1 : 0;
131 		auto source = mgr_.open(content[1 + embed..$].strip, embed != 0, context.source.id);
132 		mgr_.dependency(context.source.id, source.id, context.loc);
133 
134 		string result;
135 		if (!embed) {
136 			result = parse(source, SourceLoc(source.id, 1, 0));
137 		} else {
138 			auto name = mgr_.name(source.id);
139 			result = format("data:%s;base64,%s", name.mimeType, encode(source.buffer));
140 		}
141 
142 		if (needsLineNumbers(result))
143 			result ~= sourceInfo(context.loc);
144 		return result;
145 	}
146 
147 	string define(string content, Context context) {
148 		auto lex = Lexer(Source(source_.id, source_.parent, content[1..$].strip));
149 		auto tok = expect(lex, context, Token.KeywordKind.Def, Token.KeywordKind.Undef, Token.Kind.Identifier, "/");
150 
151 		if (tok.keyword(Token.KeywordKind.Def)) {
152 			auto def = parseDef(lex, context);
153 
154 			if (auto pdef = def.name in defs_.back) {
155 				if (pdef.loc != def.loc)
156 					throw new PreParserException(context.loc, format("redefinition of macro '%s' - first defined in '%s' - if this is intended undefine first", def.name, mgr_.loc(pdef.loc)));
157 			}
158 
159 			if (def.flags & Def.Flags.Inline) {
160 				defs_.back[def.name] = def;
161 			} else {
162 				context.open(content[0..1], content[1..$]);
163 				def.flags |= Def.Flags.NotYetDefined;
164 
165 				if (depth_ == 0)
166 					def_ = &(defs_.back[def.name] = def);
167 				++depth_;
168 			}
169 		} else if (tok.keyword(Token.KeywordKind.Undef)) {
170 			tok = expect(lex, context, Token.Kind.Identifier);
171 
172 			auto name = tok.value;
173 			if (auto pdef = name in defs_.back) {
174 				if (pdef.flags & Def.Flags.NotYetDefined)
175 					throw new PreParserException(context.loc, format("trying to undefine macro '%s' inside it's own definition", name));
176 
177 				defs_.back.remove(name);
178 			} else {
179 				throw new PreParserException(context.loc, format("trying to undefine unknown macro '%s'", name));
180 			}
181 		} else if (tok.ident) {
182 			if (!def_)
183 				return expand(lex, context, tok.value);
184 		} else {
185 			close(lex, context);
186 		}
187 
188 		return null;
189 	}
190 
191 	void close(ref Lexer lex, Context context) {
192 		expect(lex, context, Token.Kind.EndOfInput);
193 
194 		auto tag = context.close();
195 
196 		if (depth_ == 1) {
197 			auto start = tag.cursor + tag.content.length + 3;
198 			auto end = context.cursor - 2;
199 			auto value = context.source.buffer[start..end];
200 
201 			assert(def_);
202 			assert(def_.flags & Def.Flags.NotYetDefined);
203 
204 			def_.value = value;
205 			def_.flags &= ~Def.Flags.NotYetDefined;
206 			def_ = null;
207 		}
208 
209 		--depth_;
210 	}
211 
212 	string expand(ref Lexer lex, Context context, string name) {
213 		auto optional = (lex.front.kind == Token.Kind.Separator) && (lex.front.name == "?");
214 		if (optional)
215 			lex.popFront;
216 
217 		foreach (k; 0..envs_.length) {
218 			auto top = envs_.length - k - 1;
219 			auto env = envs_[top];
220 
221 			if (auto penv = name in env) {
222 				if (needsLineNumbers(*penv))
223 					return *penv ~ sourceInfo(context.loc);
224 				return *penv;
225 			}
226 
227 			auto defs = defs_[top];
228 			if (auto pdef = name in defs) {
229 				auto tok = lex.front;
230 
231 				string[] args;
232 
233 				if (!tok.eoi && tok.sep('(')) {
234 					lex.popFront;
235 
236 					tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
237 
238 					if (!tok.sep(')')) {
239 						while (true) {
240 							if (tok.ident) {
241 								args ~= expand(lex, context, tok.value);
242 							} else {
243 								args ~= tok.unescaped;
244 							}
245 
246 							tok = expect(lex, context, ",", ")");
247 							if (tok.sep(',')) {
248 								tok = expect(lex, context, ")", Token.Kind.Identifier, Token.Kind.Literal);
249 								continue;
250 							}
251 							if (tok.sep(')'))
252 								break;
253 						}
254 					}
255 
256 					if (pdef.args.length < args.length)
257 						throw new PreParserException(context.loc, format("too many parameters for macro '%s'", pdef.pretty(name)));
258 				}
259 
260 				string[string] envArgs;
261 
262 				foreach(i, arg; pdef.args)
263 					envArgs[pdef.args[i]] = (i < args.length) ? args[i] : null;
264 
265 				{
266 					++defs_.length;
267 					scope(exit) --defs_.length;
268 
269 					envs_ ~= envArgs;
270 					scope(exit) --envs_.length;
271 
272 					auto source = mgr_.add(format("#%s at %s", pdef.name, mgr_.name(pdef.loc.id)), pdef.value, pdef.loc.id);
273 					auto result = parse(source, SourceLoc(source.id, pdef.loc.line, pdef.loc.column));
274 					if (needsLineNumbers(result))
275 						result ~= sourceInfo(context.loc);
276 					return result;
277 				}
278 			}
279 		}
280 
281 		if (!optional)
282 			throw new PreParserException(context.loc, format("unknown macro '%s'", name));
283 		return null;
284 	}
285 
286 	string[] parseFormalArgList(ref Lexer lex, Context context) {
287 		string[] args;
288 
289 		auto tok = expect(lex, context, ")", Token.Kind.Identifier);
290 
291 		if (!tok.sep(')')) {
292 			while (true) {
293 				args ~= tok.value;
294 
295 				tok = expect(lex, context, ",", ")");
296 				if (tok.sep(',')) {
297 					tok = expect(lex, context, ")", Token.Kind.Identifier);
298 					continue;
299 				}
300 				if (tok.sep(')'))
301 					break;
302 			}
303 		}
304 
305 		return args;
306 	}
307 
308 
309 	Def parseDef(ref Lexer lex, Context context) {
310 		auto tok = expect(lex, context, Token.Kind.Identifier);
311 
312 		Def def;
313 		def.name = tok.value;
314 		def.loc = context.loc;
315 
316 		tok = expect(lex, context, "(", ":", Token.Kind.EndOfInput);
317 
318 		if (tok.kind != Token.Kind.EndOfInput) {
319 			if (tok.sep("("))
320 				def.args = parseFormalArgList(lex, context);
321 
322 			if (tok.sep(":")) {
323 				def.flags |= Def.Flags.Inline;
324 				lex.popFront;
325 			} else {
326 				expect(lex, context, Token.Kind.EndOfInput);
327 			}
328 		}
329 
330 		return def;
331 	}
332 
333 	string replacer(string content, Context context) {
334 		if (content.length > 0) {
335 			auto tag = content[0];
336 			switch(tag) {
337 			case '&':
338 				return include(content, context).strip;
339 			case '#':
340 				return define(content, context).strip;
341 			default:
342 				assert(0);
343 			}
344 		}
345 		return null;
346 	}
347 
348 	auto needsLineNumbers(string content) const {
349 		return options_.lineNumbers && !isAllWhite(content);
350 	}
351 
352 	string sourceInfo(SourceLoc loc) {
353 		return format("{{;src:%d:%d:%d %s}}", loc.id, loc.line, loc.column, mgr_.name(loc.id));
354 	}
355 
356 private:
357 	Token expectNot(Args...)(ref Lexer lex, Context context, auto ref Args args) {
358 		auto tok = lex.front;
359 		lex.popFront;
360 
361 		foreach(i, Arg; Args) {
362 			static assert(is(Arg == Token) || isSomeString!Arg || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
363 			static if (is(Arg == Token)) {
364 				if (tok != args[i])
365 					continue;
366 			} else static if (isSomeString!Arg) {
367 				if ((tok.kind != Token.Kind.Separator) || (tok.value != args[i]))
368 					continue;
369 			} else static if (is(Arg == Token.Kind)) {
370 				if (tok.kind != args[i])
371 					continue;
372 			} else static if (is(Arg == Token.KeywordKind)) {
373 				if (!tok.keyword(args[i]))
374 					continue;
375 			}
376 
377 			{
378 				static if (is(Arg == Token)) {
379 					throw new PreParserException(context.loc, format("unexpected '%s'", args[i]));
380 				} else {
381 					throw new PreParserException(context.loc, format("unexpected '%s'", tok.value));
382 				}
383 			}
384 		}
385 
386 		return tok;
387 	}
388 
389 	Token expect(Args...)(ref Lexer lex, Context context, auto ref Args args) {
390 		auto tok = lex.front;
391 		lex.popFront;
392 
393 		foreach(i, Arg; Args) {
394 			static assert(is(Arg == Token) || isSomeString!Arg || is(Arg == Token.Kind) || is(Arg == Token.KeywordKind));
395 			static if (is(Arg == Token)) {
396 				if (tok == args[i])
397 					return tok;
398 			} else static if (isSomeString!Arg) {
399 				if ((tok.kind == Token.Kind.Separator) && (tok.value == args[i]))
400 					return tok;
401 			} else static if (is(Arg == Token.Kind)) {
402 				if (tok.kind == args[i])
403 					return tok;
404 			} else static if (is(Arg == Token.KeywordKind)) {
405 				if (tok.keyword(args[i]))
406 					return tok;
407 			}
408 		}
409 
410 		auto exception = appender!string;
411 
412 		foreach(i, arg; args) {
413 			if (i == 0) {
414 				formattedWrite(&exception, "expected '%s'", arg);
415 			} else if (i + 1 == args.length) {
416 				formattedWrite(&exception, ", or '%s'", arg);
417 			} else {
418 				formattedWrite(&exception, ", '%s'", arg);
419 			}
420 		}
421 
422 		if (tok.kind == Token.Kind.Separator) {
423 			formattedWrite(&exception, " but found '%s'", tok);
424 		} else {
425 			formattedWrite(&exception, " but found '%s'", tok);
426 		}
427 
428 		throw new PreParserException(context.loc, exception.data);
429 	}
430 
431 	static struct Def {
432 		enum Flags : uint {
433 			NotYetDefined   = 1 << 0,
434 			Inline          = 1 << 1,
435 		}
436 
437 		string name;
438 		string value;
439 		string[] args;
440 		uint flags;
441 
442 		SourceLoc loc;
443 
444 		string pretty(string name) const {
445 			Appender!string app;
446 			app.reserve(1024);
447 
448 			app.put(name);
449 
450 			app.put("(");
451 
452 			foreach(i, arg; args) {
453 				app.put(arg);
454 				if (i != args.length - 1)
455 					app.put(", ");
456 			}
457 			app.put(")");
458 
459 			return app.data;
460 		}
461 	}
462 
463 	Def* def_;
464 	size_t depth_;
465 
466 	Def[string][] defs_;
467 	string[string][] envs_;
468 
469 	Source source_;
470 
471 	PreParserOptions options_;
472 
473 	SourceManager* mgr_;
474 	Context[] contexts_;
475 }
476 
477 
478 private @property bool isAllWhite(R)(R range) {
479 	foreach (ch; range) {
480 		if (!std.uni.isWhite(ch))
481 			return false;
482 	}
483 	return true;
484 }