1 module vayne.source.token; 2 3 4 import std.array; 5 import std.ascii; 6 import std.format; 7 8 9 import vayne.source.source; 10 11 12 struct Token { 13 enum Kind : ubyte { 14 Undefined = 0, 15 EndOfInput, 16 Separator, 17 Identifier, 18 Keyword, 19 Literal, 20 } 21 22 enum KeywordKind : ubyte { 23 False, 24 Null, 25 True, 26 In, 27 Def, 28 Undef, 29 Set, 30 Push, 31 Pop, 32 As, 33 } 34 35 enum LiteralKind : ubyte { 36 Char = 0, 37 String, 38 Bin, 39 Oct, 40 Dec, 41 Hex, 42 Float, 43 } 44 45 enum Flags : ubyte { 46 None = 0, 47 NeedsUnescaping = 1 << 0, 48 } 49 50 this(SourceLoc loc) { 51 kind_ = Kind.Undefined; 52 loc_ = loc; 53 } 54 55 this(string name, SourceLoc loc) { 56 name_ = name; 57 kind_ = Kind.Identifier; 58 loc_ = loc; 59 } 60 61 this(Kind kind, string name, SourceLoc loc) { 62 name_ = name; 63 kind_ = kind; 64 loc_ = loc; 65 } 66 67 this(string name, LiteralKind kindLiteral, size_t suffixSize, size_t flags, SourceLoc loc) { 68 assert(flags < typeof(flags).max); 69 assert(suffixSize < typeof(suffixSize).max); 70 71 kind_ = Kind.Literal; 72 kindLiteralOrKeyword_ = kindLiteral; 73 literalSuffixSize_ = cast(ubyte)suffixSize; 74 flags_ = cast(ubyte)flags; 75 name_ = name; 76 loc_ = loc; 77 } 78 79 this(string name, KeywordKind kindKeyword, size_t flags, SourceLoc loc) { 80 assert(flags < typeof(flags).max); 81 82 kind_ = Kind.Keyword; 83 kindLiteralOrKeyword_ = kindKeyword; 84 literalSuffixSize_ = 0; 85 flags_ = cast(ubyte)flags; 86 name_ = name; 87 loc_ = loc; 88 } 89 90 auto opEquals(in Token other) const { 91 return (bits_ == other.bits_) && (name_ == other.name_); 92 } 93 94 auto eoi() const { 95 return kind_ == Kind.EndOfInput; 96 } 97 98 auto ident(string name = null) const { 99 return isa(Kind.Identifier, name); 100 } 101 102 auto sep(string name = null) const { 103 return isa(Kind.Separator, name); 104 } 105 106 auto sep(char ch) const { 107 return (name_.length == 1) && (name_.front == ch); 108 } 109 110 auto literal(string name = null) const { 111 return isa(Kind.Literal, name); 112 } 113 114 auto keyword() const { 115 return (kind_ == Kind.Keyword); 116 } 117 118 auto keyword(KeywordKind keyword) const { 119 return (kind_ == Kind.Keyword) && (kindLiteralOrKeyword_ == keyword); 120 } 121 122 auto numeric() const { 123 return (kind_ == Kind.Literal) && (kindLiteralOrKeyword_ > LiteralKind.String); 124 } 125 126 auto isa(Kind kind, string name = null) const { 127 return (kind_ == kind) && (name.empty || (name == name_)); 128 } 129 130 SourceLoc loc() const { 131 return loc_; 132 } 133 134 auto needsUnescaping() const { 135 assert(kind_ == Kind.Literal); 136 return (flags_ & Flags.NeedsUnescaping) != 0; 137 } 138 139 string value() const { 140 return name_; 141 } 142 143 string name() const { 144 final switch (kind_) with (Kind) { 145 case Literal: 146 return "literal"; 147 case Identifier: 148 case Separator: 149 case Keyword: 150 return name_; 151 case EndOfInput: 152 return "end of source"; 153 } 154 } 155 156 @property auto ptr() const { 157 return name_.ptr; 158 } 159 160 @property auto length() const { 161 return name_.length; 162 } 163 164 @property auto empty() const { 165 return name_.empty; 166 } 167 168 @property auto tail() const { 169 return name_.ptr + name_.length; 170 } 171 172 @property auto front() const { 173 return name_.front; 174 } 175 176 @property auto back() const { 177 return name_.back; 178 } 179 180 @property auto opIndex(size_t index) const { 181 return name_[index]; 182 } 183 184 @property KeywordKind kindKeyword() const { 185 assert(kind_ == Kind.Keyword); 186 return cast(KeywordKind)(kindLiteralOrKeyword_); 187 } 188 189 @property LiteralKind kindLiteral() const { 190 assert(kind_ == Kind.Literal); 191 return cast(LiteralKind)(kindLiteralOrKeyword_); 192 } 193 194 @property string suffix() const { 195 assert(kind_ == Kind.Literal); 196 return tail[0..literalSuffixSize_]; 197 } 198 199 @property Kind kind() const { 200 return cast(Kind)(kind_); 201 } 202 203 @property auto unescaped() const { 204 static int hexValue(char x) { 205 assert(isHexDigit(x)); 206 return (x <= '9') ? (x - '0') : (10 + (toLower(x) - 'a')); 207 } 208 209 assert(kind == Kind.Literal); 210 211 if (!needsUnescaping) 212 return value; 213 214 auto ptr = name_.ptr; 215 auto end = ptr + name_.length; 216 217 auto app = appender!string; 218 219 auto run = ptr; 220 while (ptr != end) { 221 auto ch = *ptr++; 222 if (ch != '\\') 223 continue; 224 225 app.put(run[0..(ptr - run) - 1]); 226 227 switch (*ptr) { 228 case 'n': app.put('\n'); ++ptr; break; 229 case 'r': app.put('\r'); ++ptr; break; 230 case 't': app.put('\t'); ++ptr; break; 231 case '\\':app.put('\\'); ++ptr; break; 232 case '\"':app.put('"'); ++ptr; break; 233 case '\'':app.put('\''); ++ptr; break; 234 case '?': app.put(cast(char)0x3f); ++ptr; break; 235 case 'a': app.put('\a'); ++ptr; break; 236 case 'b': app.put('\b'); ++ptr; break; 237 case 'f': app.put('\f'); ++ptr; break; 238 case 'v': app.put('\v'); ++ptr; break; 239 case 'x': 240 ++ptr; 241 if (isHexDigit(*ptr)) { 242 int code = hexValue(*ptr); 243 ++ptr; 244 if (isHexDigit(*ptr)) { 245 code <<= 4; 246 code |= hexValue(*ptr); 247 ++ptr; 248 } 249 app.put(cast(dchar)code); 250 } else { 251 assert(0); // invalid escape sequence - espected digit after \x 252 } 253 break; 254 default: 255 if (isOctalDigit(*ptr)) { 256 int code = *ptr - '0'; 257 ++ptr; 258 if (isOctalDigit(*ptr)) { 259 code <<= 3; 260 code |= *ptr - '0'; 261 ++ptr; 262 if (isOctalDigit(*ptr)) { 263 code <<= 3; 264 code |= *ptr - '0'; 265 ++ptr; 266 } 267 app.put(cast(dchar)code); 268 } 269 } else { 270 assert(0); // invalid escape sequence 271 } 272 break; 273 } 274 run = ptr; 275 } 276 277 app.put(run[0.. end - run]); 278 return app.data; 279 } 280 281 string toString() const { 282 if ((kind == Kind.Literal) && (kindLiteral == LiteralKind.String)) 283 return format("\"%s\"", name_); 284 return name_; 285 } 286 287 private: 288 union { 289 uint bits_; 290 struct { 291 ubyte kind_; 292 ubyte kindLiteralOrKeyword_; 293 ubyte literalSuffixSize_; 294 ubyte flags_; 295 } 296 } 297 298 string name_; 299 SourceLoc loc_; 300 }