1 module vayne.source.compress;
2 
3 
4 import std.array;
5 import std.regex;
6 import std.string;
7 
8 
9 enum CompressOptions : uint {
10 	none					= 0,
11 	removeMultiSpaces 		= 1 << 0,	// replace multiple consecutive spaces with a single space
12 	removeLineBreaks		= 1 << 1,	// replace line breaks with a space
13 	removeHTMLComments 		= 1 << 2,	// remove any html comments - preserving conditional comments
14 	removeTagSpaces			= 1 << 3,	// remove unnecessary spaces around = in tags i.e. <div id = "foo"> -> <div id="foo">
15 	removeTagQuotes			= 1 << 4,	// remove unnecessary quotes around attribute values <div id="foo"> -> <div id=foo>
16 	removeTagSurroundSpaces	= 1 << 5,	// remove spaces around some tags i.e <ul> <li>
17 
18 	defaults = ~0U,
19 }
20 
21 
22 auto compress(string content, CompressOptions options) {
23 	if ((options & CompressOptions.removeLineBreaks) == 0)
24 		content = content.replaceAll(linebreaks, "%%~LB~%%");
25 
26 	if (options & CompressOptions.removeMultiSpaces)
27 		content = content.replaceAll(multispaces, " ");
28 
29 	if (options & CompressOptions.removeHTMLComments)
30 		content = content.replaceAll(htmlComments, "");
31 
32 	if (options & CompressOptions.removeTagSpaces) {
33 		content = content.replaceAll(tagSpaces, "$1=");
34 
35 		static string removeEndSpaces(Captures!(string) capture) {
36 			// keep space if attribute is unquoted before trailing slash
37 			return ((capture[2][0] == '/') && (!matchAll(capture[1], tagSpacesEndLastQuote).empty)) ? (capture[1] ~ " " ~ capture[2]) : (capture[1] ~ capture[2]);
38 		}
39 
40 		content = content.replaceAll!removeEndSpaces(tagSpacesEnd);
41 	}
42 
43 	if (options & CompressOptions.removeTagQuotes) {
44 		static string removeQuotes(Captures!(string) capture) {
45 			return (capture[3].strip.empty) ? ("=" ~ capture[2]) : format("=%s %s", capture[2], capture[3]);
46 		}
47 
48 		content = content.replaceAll!removeQuotes(tagQuotes);
49 	}
50 
51 	if (options & CompressOptions.removeTagSurroundSpaces)
52 		content = content.replaceAll(tagSurround, "$1");
53 
54 	if ((options & CompressOptions.removeLineBreaks) == 0)
55 		content = content.replace("%%~LB~%%", "\n");
56 
57 	return content;
58 }
59 
60 
61 private __gshared {
62 	auto multispaces = regex(`\s+`, "i");
63 	auto linebreaks = regex(`(?:\r\n)|(?:\n)`, "i");
64 	auto htmlComments = regex(`<!---->|<!--[^\[].*?-->`, "i");
65 	auto tagSpaces = regex(`(\s\w+)\s*=\s*(?=[^<]*?>)`, "i");
66 	auto tagSpacesEnd = regex(`(<\w+(?:\s+[a-z0-9-_]+(?:\s*=\s*(?:(?:[a-z0-9-_]+)|(?:"[^"]*")|(?:'[^']*')))?)*)(?:\s+?)(/?>)`, "i");
67 	auto tagSpacesEndLastQuote = regex(`"=\s*[a-z0-9-_]+$"`, "i");
68 	auto tagQuotes = regex(`\s*=\s*(["'])([a-z0-9-_]+?)\1(/?)(?=[^<]*?>)`, "i");
69 	auto tagSurround = regex(`\s*(</?(?:html|head|link|script|style|body|br|p|div|center|dl|form|hr|ol|ul|table|tbody|tr|td|th|tfoot|thead)(?:>|[\s/][^>]*>))\s*`, "i");
70 	auto tagInterSpace = regex(`>\s+<`, "i");
71 }