1 module vayne.source.token;
2 
3 
4 import std.array;
5 import std.ascii;
6 import std.format;
7 
8 
9 import vayne.source.source;
10 
11 
12 struct Token {
13 	enum Kind : ubyte {
14 		Undefined = 0,
15 		EndOfInput,
16 		Separator,
17 		Identifier,
18 		Keyword,
19 		Literal,
20 	}
21 
22 	enum KeywordKind : ubyte {
23 		False,
24 		Null,
25 		True,
26 		In,
27 		Def,
28 		Undef,
29 		As,
30 	}
31 
32 	enum LiteralKind : ubyte {
33 		Char = 0,
34 		String,
35 		Bin,
36 		Oct,
37 		Dec,
38 		Hex,
39 		Float,
40 	}
41 
42 	enum Flags : ubyte {
43 		None 			= 0,
44 		NeedsUnescaping   = 1 << 0,
45 	}
46 
47 	this(SourceLoc loc) {
48 		kind_ = Kind.Undefined;
49 		loc_ = loc;
50 	}
51 
52 	this(string name, SourceLoc loc) {
53 		name_ = name;
54 		kind_ = Kind.Identifier;
55 		loc_ = loc;
56 	}
57 
58 	this(Kind kind, string name, SourceLoc loc) {
59 		name_ = name;
60 		kind_ = kind;
61 		loc_ = loc;
62 	}
63 
64 	this(string name, LiteralKind kindLiteral, size_t suffixSize, size_t flags, SourceLoc loc) {
65 		assert(flags < typeof(flags).max);
66 		assert(suffixSize < typeof(suffixSize).max);
67 
68 		kind_ = Kind.Literal;
69 		kindLiteralOrKeyword_ = kindLiteral;
70 		literalSuffixSize_ = cast(ubyte)suffixSize;
71 		flags_ = cast(ubyte)flags;
72 		name_ = name;
73 		loc_ = loc;
74 	}
75 
76 	this(string name, KeywordKind kindKeyword, size_t flags, SourceLoc loc) {
77 		assert(flags < typeof(flags).max);
78 
79 		kind_ = Kind.Keyword;
80 		kindLiteralOrKeyword_ = kindKeyword;
81 		literalSuffixSize_ = 0;
82 		flags_ = cast(ubyte)flags;
83 		name_ = name;
84 		loc_ = loc;
85 	}
86 
87 	auto opEquals(in Token other) const {
88 		return (bits_ == other.bits_) && (name_ == other.name_);
89 	}
90 
91 	auto eoi() const {
92 		return kind_ == Kind.EndOfInput;
93 	}
94 
95 	auto ident(string name = null) const {
96 		return isa(Kind.Identifier, name);
97 	}
98 
99 	auto sep(string name = null) const {
100 		return isa(Kind.Separator, name);
101 	}
102 
103 	auto sep(char ch) const {
104 		return (name_.length == 1) && (name_.front == ch);
105 	}
106 
107 	auto literal(string name = null) const {
108 		return isa(Kind.Literal, name);
109 	}
110 
111 	auto keyword() const {
112 		return (kind_ == Kind.Keyword);
113 	}
114 
115 	auto keyword(KeywordKind keyword) const {
116 		return (kind_ == Kind.Keyword) && (kindLiteralOrKeyword_ == keyword);
117 	}
118 
119 	auto numeric() const {
120 		return (kind_ == Kind.Literal) && (kindLiteralOrKeyword_ > LiteralKind.String);
121 	}
122 
123 	auto isa(Kind kind, string name = null) const {
124 		return (kind_ == kind) && (name.empty || (name == name_));
125 	}
126 
127 	SourceLoc loc() const {
128 		return loc_;
129 	}
130 
131 	auto needsUnescaping() const {
132 		assert(kind_ == Kind.Literal);
133 		return (flags_ & Flags.NeedsUnescaping) != 0;
134 	}
135 
136 	string value() const {
137 		return name_;
138 	}
139 
140 	string name() const {
141 		final switch (kind_) with (Kind) {
142 		case Literal:
143 			return "literal";
144 		case Identifier:
145 		case Separator:
146 		case Keyword:
147 			return name_;
148 		case EndOfInput:
149 			return "end of source";
150 		}
151 	}
152 
153 	@property auto ptr() const {
154 		return name_.ptr;
155 	}
156 
157 	@property auto length() const {
158 		return name_.length;
159 	}
160 
161 	@property auto empty() const {
162 		return name_.empty;
163 	}
164 
165 	@property auto tail() const {
166 		return name_.ptr + name_.length;
167 	}
168 
169 	@property auto front() const {
170 		return name_.front;
171 	}
172 
173 	@property auto back() const {
174 		return name_.back;
175 	}
176 
177 	@property auto opIndex(size_t index) const {
178 		return name_[index];
179 	}
180 
181 	@property KeywordKind kindKeyword() const {
182 		assert(kind_ == Kind.Keyword);
183 		return cast(KeywordKind)(kindLiteralOrKeyword_);
184 	}
185 
186 	@property LiteralKind kindLiteral() const {
187 		assert(kind_ == Kind.Literal);
188 		return cast(LiteralKind)(kindLiteralOrKeyword_);
189 	}
190 
191 	@property string suffix() const {
192 		assert(kind_ == Kind.Literal);
193 		return tail[0..literalSuffixSize_];
194 	}
195 
196 	@property Kind kind() const {
197 		return cast(Kind)(kind_);
198 	}
199 
200 	@property auto unescaped() const {
201 		static int hexValue(char x) {
202 			assert(isHexDigit(x));
203 			return (x <= '9') ? (x - '0') : (10 + (toLower(x) - 'a'));
204 		}
205 
206 		assert(kind == Kind.Literal);
207 
208 		if (!needsUnescaping)
209 			return value;
210 
211 		auto ptr = name_.ptr;
212 		auto end = ptr + name_.length;
213 
214 		auto app = appender!string;
215 
216 		auto run = ptr;
217 		while (ptr != end) {
218 			auto ch = *ptr++;
219 			if (ch != '\\')
220 				continue;
221 
222 			app.put(run[0..(ptr - run) - 1]);
223 
224 			switch (*ptr) {
225 			case 'n': app.put('\n'); ++ptr; break;
226 			case 'r': app.put('\r'); ++ptr; break;
227 			case 't': app.put('\t'); ++ptr; break;
228 			case '\\':app.put('\\'); ++ptr; break;
229 			case '\"':app.put('"');  ++ptr; break;
230 			case '\'':app.put('\''); ++ptr; break;
231 			case '?': app.put(cast(char)0x3f); ++ptr; break;
232 			case 'a': app.put('\a'); ++ptr; break;
233 			case 'b': app.put('\b'); ++ptr; break;
234 			case 'f': app.put('\f'); ++ptr; break;
235 			case 'v': app.put('\v'); ++ptr; break;
236 			case 'x':
237 				++ptr;
238 				if (isHexDigit(*ptr)) {
239 					int code = hexValue(*ptr);
240 					++ptr;
241 					if (isHexDigit(*ptr)) {
242 						code <<= 4;
243 						code |= hexValue(*ptr);
244 						++ptr;
245 					}
246 					app.put(cast(dchar)code);
247 				} else {
248 					assert(0); // invalid escape sequence - espected digit after \x
249 				}
250 				break;
251 			default:
252 				if (isOctalDigit(*ptr)) {
253 					int code = *ptr - '0';
254 					++ptr;
255 					if (isOctalDigit(*ptr)) {
256 						code <<= 3;
257 						code |= *ptr - '0';
258 						++ptr;
259 						if (isOctalDigit(*ptr)) {
260 							code <<= 3;
261 							code |= *ptr - '0';
262 							++ptr;
263 						}
264 						app.put(cast(dchar)code);
265 					}
266 				} else {
267 					assert(0); // invalid escape sequence
268 				}
269 				break;
270 			}
271 			run = ptr;
272 		}
273 
274 		app.put(run[0.. end - run]);
275 		return app.data;
276 	}
277 
278 	string toString() const {
279 		if ((kind == Kind.Literal) && (kindLiteral == LiteralKind.String))
280 			return format("\"%s\"", name_);
281 		return name_;
282 	}
283 
284 private:
285 	union {
286 		uint bits_;
287 		struct {
288 			ubyte kind_;
289 			ubyte kindLiteralOrKeyword_;
290 			ubyte literalSuffixSize_;
291 			ubyte flags_;
292 		}
293 	}
294 
295 	string name_;
296 	SourceLoc loc_;
297 }