1 module vayne.source.token; 2 3 4 import std.array; 5 import std.ascii; 6 import std.format; 7 8 9 import vayne.source.source; 10 11 12 struct Token { 13 enum Kind : ubyte { 14 Undefined = 0, 15 EndOfInput, 16 Separator, 17 Identifier, 18 Keyword, 19 Literal, 20 } 21 22 enum KeywordKind : ubyte { 23 False, 24 Null, 25 True, 26 In, 27 Def, 28 Undef, 29 As, 30 } 31 32 enum LiteralKind : ubyte { 33 Char = 0, 34 String, 35 Bin, 36 Oct, 37 Dec, 38 Hex, 39 Float, 40 } 41 42 enum Flags : ubyte { 43 None = 0, 44 NeedsUnescaping = 1 << 0, 45 } 46 47 this(SourceLoc loc) { 48 kind_ = Kind.Undefined; 49 loc_ = loc; 50 } 51 52 this(string name, SourceLoc loc) { 53 name_ = name; 54 kind_ = Kind.Identifier; 55 loc_ = loc; 56 } 57 58 this(Kind kind, string name, SourceLoc loc) { 59 name_ = name; 60 kind_ = kind; 61 loc_ = loc; 62 } 63 64 this(string name, LiteralKind kindLiteral, size_t suffixSize, size_t flags, SourceLoc loc) { 65 assert(flags < typeof(flags).max); 66 assert(suffixSize < typeof(suffixSize).max); 67 68 kind_ = Kind.Literal; 69 kindLiteralOrKeyword_ = kindLiteral; 70 literalSuffixSize_ = cast(ubyte)suffixSize; 71 flags_ = cast(ubyte)flags; 72 name_ = name; 73 loc_ = loc; 74 } 75 76 this(string name, KeywordKind kindKeyword, size_t flags, SourceLoc loc) { 77 assert(flags < typeof(flags).max); 78 79 kind_ = Kind.Keyword; 80 kindLiteralOrKeyword_ = kindKeyword; 81 literalSuffixSize_ = 0; 82 flags_ = cast(ubyte)flags; 83 name_ = name; 84 loc_ = loc; 85 } 86 87 auto opEquals(in Token other) const { 88 return (bits_ == other.bits_) && (name_ == other.name_); 89 } 90 91 auto eoi() const { 92 return kind_ == Kind.EndOfInput; 93 } 94 95 auto ident(string name = null) const { 96 return isa(Kind.Identifier, name); 97 } 98 99 auto sep(string name = null) const { 100 return isa(Kind.Separator, name); 101 } 102 103 auto sep(char ch) const { 104 return (name_.length == 1) && (name_.front == ch); 105 } 106 107 auto literal(string name = null) const { 108 return isa(Kind.Literal, name); 109 } 110 111 auto keyword() const { 112 return (kind_ == Kind.Keyword); 113 } 114 115 auto keyword(KeywordKind keyword) const { 116 return (kind_ == Kind.Keyword) && (kindLiteralOrKeyword_ == keyword); 117 } 118 119 auto numeric() const { 120 return (kind_ == Kind.Literal) && (kindLiteralOrKeyword_ > LiteralKind.String); 121 } 122 123 auto isa(Kind kind, string name = null) const { 124 return (kind_ == kind) && (name.empty || (name == name_)); 125 } 126 127 SourceLoc loc() const { 128 return loc_; 129 } 130 131 auto needsUnescaping() const { 132 assert(kind_ == Kind.Literal); 133 return (flags_ & Flags.NeedsUnescaping) != 0; 134 } 135 136 string value() const { 137 return name_; 138 } 139 140 string name() const { 141 final switch (kind_) with (Kind) { 142 case Literal: 143 return "literal"; 144 case Identifier: 145 case Separator: 146 case Keyword: 147 return name_; 148 case EndOfInput: 149 return "end of source"; 150 } 151 } 152 153 @property auto ptr() const { 154 return name_.ptr; 155 } 156 157 @property auto length() const { 158 return name_.length; 159 } 160 161 @property auto empty() const { 162 return name_.empty; 163 } 164 165 @property auto tail() const { 166 return name_.ptr + name_.length; 167 } 168 169 @property auto front() const { 170 return name_.front; 171 } 172 173 @property auto back() const { 174 return name_.back; 175 } 176 177 @property auto opIndex(size_t index) const { 178 return name_[index]; 179 } 180 181 @property KeywordKind kindKeyword() const { 182 assert(kind_ == Kind.Keyword); 183 return cast(KeywordKind)(kindLiteralOrKeyword_); 184 } 185 186 @property LiteralKind kindLiteral() const { 187 assert(kind_ == Kind.Literal); 188 return cast(LiteralKind)(kindLiteralOrKeyword_); 189 } 190 191 @property string suffix() const { 192 assert(kind_ == Kind.Literal); 193 return tail[0..literalSuffixSize_]; 194 } 195 196 @property Kind kind() const { 197 return cast(Kind)(kind_); 198 } 199 200 @property auto unescaped() const { 201 static int hexValue(char x) { 202 assert(isHexDigit(x)); 203 return (x <= '9') ? (x - '0') : (10 + (toLower(x) - 'a')); 204 } 205 206 assert(kind == Kind.Literal); 207 208 if (!needsUnescaping) 209 return value; 210 211 auto ptr = name_.ptr; 212 auto end = ptr + name_.length; 213 214 auto app = appender!string; 215 216 auto run = ptr; 217 while (ptr != end) { 218 auto ch = *ptr++; 219 if (ch != '\\') 220 continue; 221 222 app.put(run[0..(ptr - run) - 1]); 223 224 switch (*ptr) { 225 case 'n': app.put('\n'); ++ptr; break; 226 case 'r': app.put('\r'); ++ptr; break; 227 case 't': app.put('\t'); ++ptr; break; 228 case '\\':app.put('\\'); ++ptr; break; 229 case '\"':app.put('"'); ++ptr; break; 230 case '\'':app.put('\''); ++ptr; break; 231 case '?': app.put(cast(char)0x3f); ++ptr; break; 232 case 'a': app.put('\a'); ++ptr; break; 233 case 'b': app.put('\b'); ++ptr; break; 234 case 'f': app.put('\f'); ++ptr; break; 235 case 'v': app.put('\v'); ++ptr; break; 236 case 'x': 237 ++ptr; 238 if (isHexDigit(*ptr)) { 239 int code = hexValue(*ptr); 240 ++ptr; 241 if (isHexDigit(*ptr)) { 242 code <<= 4; 243 code |= hexValue(*ptr); 244 ++ptr; 245 } 246 app.put(cast(dchar)code); 247 } else { 248 assert(0); // invalid escape sequence - espected digit after \x 249 } 250 break; 251 default: 252 if (isOctalDigit(*ptr)) { 253 int code = *ptr - '0'; 254 ++ptr; 255 if (isOctalDigit(*ptr)) { 256 code <<= 3; 257 code |= *ptr - '0'; 258 ++ptr; 259 if (isOctalDigit(*ptr)) { 260 code <<= 3; 261 code |= *ptr - '0'; 262 ++ptr; 263 } 264 app.put(cast(dchar)code); 265 } 266 } else { 267 assert(0); // invalid escape sequence 268 } 269 break; 270 } 271 run = ptr; 272 } 273 274 app.put(run[0.. end - run]); 275 return app.data; 276 } 277 278 string toString() const { 279 if ((kind == Kind.Literal) && (kindLiteral == LiteralKind.String)) 280 return format("\"%s\"", name_); 281 return name_; 282 } 283 284 private: 285 union { 286 uint bits_; 287 struct { 288 ubyte kind_; 289 ubyte kindLiteralOrKeyword_; 290 ubyte literalSuffixSize_; 291 ubyte flags_; 292 } 293 } 294 295 string name_; 296 SourceLoc loc_; 297 }