1 module zua.parser.lexer;
2 import zua.diagnostic;
3 import std.variant;
4 import std.utf;
5 import std.typecons;
6 import std.string;
7 import std.container.rbtree;
8 import std.range.primitives;
9 import std.format;
10 import std.conv;
11 
12 /** Describes the type of a token */
13 enum TokenType {
14 	Eof,
15 	Identifier,
16 	Keyword,
17 	String,
18 	Number,
19 	Symbol,
20 	Comment,
21 	BlockComment
22 }
23 
24 /** A single token in a source file */
25 struct Token {
26 
27 	/** Describes the type of this token */
28 	TokenType type;
29 
30 	/** The raw value of this token */
31 	string rawValue;
32 
33 	/** The parsed value of this token */
34 	string value;
35 
36 	/** Only used for numbers; the numeric value of this token */
37 	double numValue;
38 
39 	/** The index at which this token is located */
40 	size_t index;
41 
42 }
43 
44 alias IgnoreCommentTokens = Flag!"ignoreCommentTokens";
45 alias ToConsume = Flag!"toConsume";
46 
47 private struct LexerState {
48 	size_t diagnosticsLength;
49 	size_t index;
50 	Nullable!Token last;
51 }
52 
53 /** A lexer for Lua */
54 final class Lexer {
55 
56 	private string code;
57 	private size_t index;
58 	private Diagnostic[]* diagnostics;
59 
60 	private LexerState[] stateStack;
61 
62 	private RedBlackTree!string keywords;
63 
64 	private string[] symbols;
65 
66 	/** The last consumed token */
67 	Nullable!Token last;
68 
69 	/** Create a new lexer from a piece of code */
70 	this(string code, ref Diagnostic[] diagnostics) {
71 		this.code = code;
72 		this.diagnostics = &diagnostics;
73 
74 		keywords = redBlackTree!string(
75 			"and", "break", "do", "else", "elseif",
76 			"end", "false", "for", "function", "if",
77 			"in", "local", "nil", "not", "or",
78 			"repeat", "return", "then", "true", "until", "while"
79 		);
80 
81 		symbols = [
82 			"...",
83 
84 			"..",
85 			"==", "~=", "<=", ">=",
86 
87 			"+", "-", "*", "/", "%", "^", "#",
88 			"<", ">", "=",
89 			"(", ")", "{", "}", "[", "]",
90 			";", ":", ",", ".", 
91 		];
92 	}
93 
94 	/** Get the source code that this Lexer is lexing */
95 	string source() {
96 		return code;
97 	}
98 
99 	/** Save the current state of the lexer */
100 	void save() {
101 		const LexerState state = {
102 			diagnosticsLength: (*diagnostics).length,
103 			index: index,
104 			last: last
105 		};
106 
107 		stateStack ~= state;
108 	}
109 
110 	/** Restore the previously-saved lexer state */
111 	void restore() {
112 		const LexerState state = stateStack.back;
113 		(*diagnostics) = (*diagnostics)[0..state.diagnosticsLength];
114 		index = state.index;
115 		last = state.last;
116 
117 		discard();
118 	}
119 
120 	/** Discard the previously-saved lexer state */
121 	void discard() {
122 		stateStack.popBack();
123 	}
124 
125 	/** Peek a segment of characters */
126 	private dstring peekChars(size_t length) {
127 		dstring result;
128 
129 		size_t prevIndex = index;
130 
131 		foreach (i; 0..length) {
132 			if (prevIndex >= code.length) return "";
133 			result ~= code.decode!(Yes.useReplacementDchar)(prevIndex);
134 		}
135 
136 		return result;
137 	}
138 
139 	/** Peek the next char */
140 	private dchar peekChar() {
141 		if (charEof) return 0;
142 		size_t prevIndex = index;
143 		return code.decode!(Yes.useReplacementDchar)(prevIndex);
144 	}
145 
146 	/** Consume a segment of characters */
147 	private dstring nextChars(size_t length) {
148 		dstring result;
149 
150 		foreach (i; 0..length) {
151 			if (charEof) return "";
152 			result ~= code.decode!(Yes.useReplacementDchar)(index);
153 		}
154 
155 		return result;
156 	}
157 
158 	/** Consume the next char in the lexer */
159 	private dchar nextChar() {
160 		if (charEof) return 0;
161 		return code.decode!(Yes.useReplacementDchar)(index);
162 	}
163 
164 	/** Check if we're at the end of the file */
165 	private bool charEof() {
166 		return index >= code.length;
167 	}
168 
169 	/** Read a string for as long as the given predicate holds true */
170 	private string readWhile(bool delegate(dchar c) predicate) {
171 		string result;
172 		while (!charEof && predicate(peekChar())) {
173 			result ~= nextChar();
174 		}
175 		return result;
176 	}
177 
178 	private void skipWhitespace() {
179 		readWhile(c => " \t\r\n"d.indexOf(c) != -1);
180 	}
181 
182 	private bool isIdentifierCharacter(dchar c) {
183 		return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
184 	}
185 
186 	private bool isNumeralCharacter(dchar c) {
187 		return (c >= '0' && c <= '9') || c == '.';
188 	}
189 
190 	private bool isHexadecimalCharacter(dchar c) {
191 		return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
192 	}
193 
194 	private Token* readLongString(string tokenName) {
195 		// assume char = '['
196 		const size_t prevIndex = index;
197 		nextChar();
198 		string open = "[";
199 		string closeSeq = "]";
200 		uint level = 0;
201 		while (peekChar() == '=') {
202 			level++;
203 			open ~= '=';
204 			closeSeq ~= '=';
205 			nextChar();
206 		}
207 		closeSeq ~= ']';
208 		if (peekChar() == '[') {
209 			int closeLevel = -1;
210 			bool closed = false;
211 			size_t closeBeginP = -1;
212 			size_t closeBegin = -1;
213 			size_t closeEnd = -1;
214 			string rawValue = open ~ readWhile(delegate(ic) {
215 				if (ic == ']') {
216 					if (closeBeginP != -1) {
217 						closeBegin = closeBeginP;
218 						closeEnd = index + 1;
219 					}
220 
221 					if (closeLevel == level) {
222 						closed = true;
223 						return false;
224 					}
225 					else {
226 						closeLevel = 0;
227 						closeBeginP = index;
228 					}
229 				}
230 				else if (ic == '=' && closeLevel != -1) {
231 					closeLevel++;
232 				}
233 				else {
234 					closeLevel = -1;
235 				}
236 				return true;
237 			});
238 
239 			if (!closed) {
240 				Diagnostic err;
241 				err.type = DiagnosticType.Error;
242 				err.message = "unclosed " ~ tokenName;
243 				err.add(index);
244 				const Quickfix close = {
245 					message: "add closing sequence to " ~ tokenName,
246 					ranges: [QuickfixRange(index, index, closeSeq)]
247 				};
248 				err.quickfix ~= close;
249 				if (closeBegin != -1) {
250 					const Quickfix fixExistingClose = {
251 						message: "fix existing closing sequence to match level of " ~ tokenName,
252 						ranges: [QuickfixRange(closeBegin, closeEnd, closeSeq)]
253 					};
254 					err.quickfix ~= fixExistingClose;
255 				}
256 				*diagnostics ~= err;
257 			}
258 			else {
259 				nextChar();
260 				rawValue ~= "]";
261 			}
262 
263 			string value = rawValue[2 + level .. $ - (2 + level)];
264 
265 			if (value.length > 0 && value[0] == '\n') {
266 				value = value[1..$];
267 			}
268 
269 			Token* result = new Token;
270 			result.value = value;
271 			result.rawValue = rawValue;
272 			result.type = TokenType.String;
273 			result.index = prevIndex;
274 			return result;
275 		}
276 		else { // turns out it wasn't a string...
277 			index = prevIndex;
278 			return null;
279 		}
280 	}
281 
282 	private Token nextInternal() {
283 		skipWhitespace();
284 
285 		Token result;
286 		result.index = index;
287 
288 		if (charEof) {
289 			result.type = TokenType.Eof;
290 			return result;
291 		}
292 
293 		const dchar c = peekChar();
294 
295 		if (peekChars(2) == "--"d) {
296 			nextChars(2);
297 
298 			if (peekChar() == '[') {
299 				const auto res = readLongString("block comment");
300 				if (res !is null) {
301 					result = *res;
302 					result.type = TokenType.BlockComment;
303 					return result;
304 				}
305 			}
306 
307 			result.rawValue = result.value = readWhile(delegate(c) {
308 				return c != '\n';
309 			});
310 
311 			result.type = TokenType.Comment;
312 			return result;
313 		}
314 
315 		if (peekChars(2) == "0x"d) {
316 			nextChars(2);
317 			const string value = readWhile(delegate(c) {
318 				return isHexadecimalCharacter(c);
319 			});
320 
321 			if (value == "") { // if it's just 0x, it's clearly not a number
322 				index -= 2; // put it back and continue
323 			}
324 			else {
325 				result.value = value.to!ulong(16).to!string;
326 				result.numValue = value.to!ulong(16).to!double;
327 				result.rawValue = "0x" ~ value;
328 				result.type = TokenType.Number;
329 				return result;
330 			}
331 		}
332 		else if (isNumeralCharacter(c)) {
333 			bool dot = false;
334 			string value = readWhile(delegate(c) {
335 				if (c == '.') {
336 					if (dot) return false; // if we already have a dot, don't read two dots
337 					dot = true;
338 				}
339 
340 				return isNumeralCharacter(c);
341 			});
342 
343 			if (value == ".") { // if it's just a dot, it's clearly not a number
344 				index--; // put it back and continue
345 			}
346 			else {
347 				double numValue = value.to!double;
348 				if (peekChar() == 'e') {
349 					value ~= 'e';
350 					nextChar();
351 					bool pos = true;
352 					if (peekChar() == '+') {
353 						value ~= '+';
354 						nextChar();
355 					}
356 					else if (peekChar() == '-') {
357 						value ~= '-';
358 						pos = false;
359 						nextChar();
360 					}
361 					const string exp = readWhile(delegate(c) {
362 						return isNumeralCharacter(c) && c != '.';
363 					});
364 					value ~= exp;
365 					numValue *= 10.0 ^^ ((!pos ? -1 : 1) * exp.to!double);
366 				}
367 				result.numValue = numValue;
368 				result.value = value;
369 				result.rawValue = value;
370 				result.type = TokenType.Number;
371 				return result;
372 			}
373 		}
374 
375 		if (isIdentifierCharacter(c)) {
376 			const string value = readWhile(&isIdentifierCharacter);
377 			result.value = value;
378 			result.rawValue = value;
379 			result.type = value in keywords ? TokenType.Keyword : TokenType.Identifier;
380 			return result;
381 		}
382 
383 		if (c == '"' || c == '\'') {
384 			bool escape = false;
385 			int digits = 0;
386 			int codepoint = 0;
387 			string value;
388 			bool closed = false;
389 			string rawValue = [nextChar()].toUTF8 ~ readWhile(delegate(ic) {
390 				if (escape) {
391 					if (ic == 'a') value ~= '\a';
392 					else if (ic == 'b') value ~= '\b';
393 					else if (ic == 'f') value ~= '\f';
394 					else if (ic == 'n') value ~= '\n';
395 					else if (ic == 'r') value ~= '\r';
396 					else if (ic == 't') value ~= '\t';
397 					else if (ic == 'v') value ~= '\v';
398 					else if (ic >= '0' && ic <= '9') {
399 						digits = 1;
400 						codepoint = ic - '0';
401 					}
402 					else value ~= ic;
403 					escape = false;
404 				}
405 				else if (ic == '\n') {
406 					return false;
407 				}
408 				else if (ic >= '0' && ic <= '9' && digits > 0 && digits < 3) {
409 					digits++;
410 					codepoint *= 10;
411 					codepoint += ic - '0';
412 				}
413 				else {
414 					if (digits > 0) {
415 						value ~= cast(char)codepoint;
416 						digits = 0;
417 					}
418 
419 					if (ic == '\\') escape = true;
420 					else if (ic == c) {
421 						closed = true;
422 						return false;
423 					}
424 					else value ~= ic;
425 				}
426 				return true;
427 			});
428 
429 			if (digits > 0) value ~= cast(char)codepoint;
430 
431 			if (!closed) {
432 				Diagnostic err;
433 				err.type = DiagnosticType.Error;
434 				err.message = "unclosed string";
435 				err.add(index);
436 				if (escape) {
437 					const Quickfix close = {
438 						message: "remove extraneous '\\' and add closing quote to string",
439 						ranges: [QuickfixRange(index - 1, index, [c].toUTF8)]
440 					};
441 					err.quickfix ~= close;
442 				}
443 				else {
444 					const Quickfix close = {
445 						message: "add closing quote to string",
446 						ranges: [QuickfixRange(index, index, [c].toUTF8)]
447 					};
448 					err.quickfix ~= close;
449 				}
450 				*diagnostics ~= err;
451 			}
452 			else {
453 				rawValue ~= [nextChar()].toUTF8;
454 			}
455 
456 			result.value = value;
457 			result.rawValue = rawValue;
458 			result.type = TokenType.String;
459 			return result;
460 		}
461 
462 		if (c == '[') {
463 			const auto res = readLongString("string");
464 			if (res !is null) return *res;
465 		}
466 
467 		foreach (symbol; symbols) {
468 			const auto utf32sym = symbol.toUTF32;
469 			if (peekChars(utf32sym.length) == utf32sym) {
470 				result.value = symbol;
471 				result.rawValue = symbol;
472 				result.type = TokenType.Symbol;
473 				nextChars(utf32sym.length);
474 				return result;
475 			}
476 		}
477 
478 		nextChar();
479 		Diagnostic err;
480 		err.type = DiagnosticType.Error;
481 		err.message = "unknown character";
482 		err.add(result.index, index);
483 		const Quickfix remove = {
484 			message: "remove unknown character",
485 			ranges: [QuickfixRange(result.index, index, "")]
486 		};
487 		err.quickfix ~= remove;
488 		*diagnostics ~= err;
489 
490 		return nextInternal();
491 	}
492 
493 	/** Consume the next token and return it */
494 	Token next(IgnoreCommentTokens ignoreCommentTokens = Yes.ignoreCommentTokens)() {
495 		static if (ignoreCommentTokens == Yes.ignoreCommentTokens) {
496 			Token result = next!(No.ignoreCommentTokens)();
497 			if (result.type == TokenType.Comment || result.type == TokenType.BlockComment) return next();
498 			else return result;
499 		}
500 		else {
501 			const Token result = nextInternal();
502 			last = result;
503 			return result;
504 		}
505 	}
506 
507 	/** Peek the next token and return it, leaving the lexer in the same state as it was before */
508 	Token peek(IgnoreCommentTokens ignoreCommentTokens = Yes.ignoreCommentTokens)() {
509 		save();
510 		const Token result = next!ignoreCommentTokens();
511 		restore();
512 		return result;
513 	}
514 
515 	/**
516 	 * Attempt to consume the next token.
517 	 * If the token is missing and toConsume is Yes, the following token will be consumed regardless of whether it matches the given pattern.
518 	 * Returns (false, Token) if the token is not of the required type; returns (true, Token) otherwise
519 	 */
520 	Tuple!(bool, Token) consume(ToConsume toConsume = Yes.toConsume, string message = "")
521 		(TokenType type, string value = "") {
522 		save();
523 
524 		Token token;
525 		if (type == TokenType.Comment || type == TokenType.BlockComment) token = next!(No.ignoreCommentTokens);
526 		else token = next();
527 
528 		if (token.type == type && (value == "" || token.value == value)) {
529 			return tuple(true, token);
530 		}
531 		else {
532 			static if (toConsume == No.toConsume) restore();
533 			else discard();
534 
535 			string finalMessage = "expected " ~ type.to!string;
536 
537 			if (value != "") finalMessage ~= " '" ~ value ~  "'";
538 			if (message != "") finalMessage ~= " " ~ message;
539 
540 			finalMessage ~= ", got " ~ token.type.to!string;
541 
542 			Diagnostic err;
543 			err.type = DiagnosticType.Error;
544 			err.message = finalMessage;
545 			err.add(token);
546 			*diagnostics ~= err;
547 
548 			return tuple(false, token);
549 		}
550 	}
551 
552 	/** Check if the next token matches the given pattern */
553 	bool isNext(TokenType type, string value = "") {
554 		Token token;
555 		if (type == TokenType.Comment || type == TokenType.BlockComment) token = peek!(No.ignoreCommentTokens);
556 		else token = peek();
557 
558 		return token.type == type && (value == "" || token.value == value);
559 	}
560 
561 	/**
562 	 * Attempt to consume the next token if it matches the given pattern.
563 	 * Has no effect on the state of the lexer if the token is not found.
564 	 */
565 	bool tryConsume(TokenType type, string value = "") {
566 		if (isNext(type, value)) {
567 			consume(type, value);
568 			return true;
569 		}
570 		else return false;
571 	}
572 
573 	/** Check if the next token is EOF */
574 	bool eof() {
575 		return isNext(TokenType.Eof);
576 	}
577 
578 }
579 
580 unittest {
581 	Diagnostic[] diagnostics;
582 	Lexer lex = new Lexer(q"(--[[ hello
583 world ]] ident0__)", diagnostics);
584 
585 	assert(lex.isNext(TokenType.BlockComment, " hello\nworld "));
586 	assert(lex.isNext(TokenType.Identifier));
587 	assert(!lex.isNext(TokenType.Identifier, "a"));
588 
589 	const Token t = lex.next!(No.ignoreCommentTokens);
590 	assert(t.type == TokenType.BlockComment);
591 	assert(t.value == " hello\nworld ");
592 
593 	assert(diagnostics.length == 0);
594 }
595 
596 unittest {
597 	Diagnostic[] diagnostics;
598 	Lexer lex = new Lexer(q"(--[[ hello
599 world ]] ident0__)", diagnostics);
600 
601 	const Token t = lex.next();
602 	assert(t.type == TokenType.Identifier);
603 	assert(t.value == "ident0__");
604 
605 	assert(diagnostics.length == 0);
606 }
607 
608 unittest {
609 	Diagnostic[] diagnostics;
610 	Lexer lex = new Lexer(q"(--[[ hello
611 world ]] ident0__)", diagnostics);
612 
613 	const Tuple!(bool, Token) t = lex.consume(TokenType.Identifier);
614 	assert(t[0] == true);
615 	assert(t[1].type == TokenType.Identifier);
616 	assert(t[1].value == "ident0__");
617 
618 	assert(diagnostics.length == 0);
619 }
620 
621 unittest {
622 	Diagnostic[] diagnostics;
623 	Lexer lex = new Lexer(q"(--[[ hello
624 world ]] ident0__)", diagnostics);
625 
626 	const Tuple!(bool, Token) t = lex.consume(TokenType.Identifier, "ident0__");
627 	assert(t[0] == true);
628 	assert(t[1].type == TokenType.Identifier);
629 	assert(t[1].value == "ident0__");
630 
631 	assert(diagnostics.length == 0);
632 }
633 
634 unittest {
635 	Diagnostic[] diagnostics;
636 	Lexer lex = new Lexer(q"(--[[ hello
637 world ]] ident0__)", diagnostics);
638 
639 	const Tuple!(bool, Token) t = lex.consume(TokenType.Identifier, "asd");
640 	assert(t[0] == false);
641 	assert(t[1].type == TokenType.Identifier);
642 	assert(t[1].value == "ident0__");
643 
644 	assert(diagnostics.length == 1);
645 }
646 
647 unittest {
648 	Diagnostic[] diagnostics;
649 	Lexer lex = new Lexer(q"(--[[ hello
650 world ]] 2. ident0__)", diagnostics);
651 
652 	const Tuple!(bool, Token) t = lex.consume(TokenType.Identifier);
653 	assert(t[0] == false);
654 	assert(t[1].type == TokenType.Number);
655 	assert(t[1].value == "2.");
656 
657 	assert(lex.peek().type == TokenType.Identifier);
658 
659 	assert(diagnostics.length == 1);
660 }
661 
662 unittest {
663 	Diagnostic[] diagnostics;
664 	Lexer lex = new Lexer(q"(--[[ hello
665 world ]] 2. ident0__)", diagnostics);
666 
667 	const Tuple!(bool, Token) t = lex.consume!(No.toConsume)(TokenType.Identifier);
668 	assert(t[0] == false);
669 	assert(t[1].type == TokenType.Number);
670 	assert(t[1].value == "2.");
671 
672 	assert(lex.peek().type == TokenType.Number);
673 
674 	assert(diagnostics.length == 1);
675 }
676 
677 unittest {
678 	Diagnostic[] diagnostics;
679 	const Token a = new Lexer(q"('alo\n123"')", diagnostics).nextInternal();
680 	const Token b = new Lexer(q"("alo\n123\"")", diagnostics).nextInternal();
681 	const Token c = new Lexer(q"('\97lo\10\04923"')", diagnostics).nextInternal();
682 	const Token d = new Lexer(q"([[alo
683 123"]])", diagnostics).nextInternal();
684 	const Token e = new Lexer(q"([==[
685 alo
686 123"]==])", diagnostics).nextInternal();
687 
688 	const Token f = new Lexer(q"([==[]==])", diagnostics).nextInternal();
689 	const Token g = new Lexer(q"([==[]]]==])", diagnostics).nextInternal();
690 	const Token h = new Lexer(q"([=[[==[]==]]=])", diagnostics).nextInternal();
691 
692 	assert(a.type == TokenType.String);
693 	assert(b.type == TokenType.String);
694 	assert(c.type == TokenType.String);
695 	assert(d.type == TokenType.String);
696 	assert(e.type == TokenType.String);
697 	assert(f.type == TokenType.String);
698 	assert(g.type == TokenType.String);
699 	assert(h.type == TokenType.String);
700 
701 	assert(a.value == "alo\n123\"");
702 	assert(a.value == b.value);
703 	assert(a.value == c.value);
704 	assert(a.value == d.value);
705 	assert(a.value == e.value);
706 
707 	assert(f.value == "");
708 	assert(g.value == "]]");
709 	assert(h.value == "[==[]==]");
710 
711 	assert(diagnostics.length == 0);
712 
713 	Diagnostic[] da;
714 
715 	new Lexer(q"("x\")", da).nextInternal();
716 	new Lexer(q"([==[x]=])", da).nextInternal();
717 	new Lexer(q"([==[x]=]])", da).nextInternal();
718 
719 	assert(da.length == 3);
720 
721 	assert(da[0].quickfix.length == 1);
722 	assert(da[1].quickfix.length == 2);
723 	assert(da[2].quickfix.length == 2);
724 
725 	assert(da[0].quickfix[0].apply(q"("x\")") == q"("x\"")");
726 
727 	assert(da[1].quickfix[0].apply(q"([==[x]=])") == q"([==[x]=]]==])");
728 	assert(da[1].quickfix[1].apply(q"([==[x]=])") == q"([==[x]==])");
729 
730 	assert(da[2].quickfix[0].apply(q"([==[x]=]])") == q"([==[x]=]]]==])");
731 	assert(da[2].quickfix[1].apply(q"([==[x]=]])") == q"([==[x]=]==])");
732 }
733 
734 unittest {
735 	Diagnostic[] diagnostics;
736 	Lexer l = new Lexer(" \r\r\n    \thi", diagnostics);
737 	assert(l.index == 0);
738 	l.skipWhitespace();
739 	assert(l.index == 9);
740 	assert(l.peekChar() == 'h');
741 	assert(l.nextChar() == 'h');
742 	assert(l.nextChar() == 'i');
743 	assert(l.nextChar() == 0);
744 
745 	assert(diagnostics.length == 0);
746 }
747 
748 unittest {
749 	Diagnostic[] diagnostics;
750 	Lexer l = new Lexer(" \r\r\n   \t", diagnostics);
751 	assert(l.index == 0);
752 	l.skipWhitespace();
753 	assert(l.index == 8);
754 	assert(l.peekChar() == 0);
755 
756 	assert(diagnostics.length == 0);
757 }
758 
759 unittest {
760 	Diagnostic[] diagnostics;
761 	Lexer l = new Lexer(" hi and bye", diagnostics);
762 
763 	const Token hi = l.nextInternal();
764 	const Token and = l.nextInternal();
765 	const Token bye = l.nextInternal();
766 
767 	assert(hi.type == TokenType.Identifier);
768 	assert(and.type == TokenType.Keyword);
769 	assert(bye.type == TokenType.Identifier);
770 
771 	assert(hi.value == "hi");
772 	assert(and.value == "and");
773 	assert(bye.value == "bye");
774 
775 	assert(hi.index == 1);
776 	assert(and.index == 4);
777 	assert(bye.index == 8);
778 
779 	assert(diagnostics.length == 0);
780 }
781 
782 unittest {
783 	Diagnostic[] diagnostics;
784 	Lexer l = new Lexer(" hi And bye", diagnostics);
785 
786 	const Token hi = l.nextInternal();
787 	const Token and = l.nextInternal();
788 	const Token bye = l.nextInternal();
789 
790 	assert(hi.type == TokenType.Identifier);
791 	assert(and.type == TokenType.Identifier);
792 	assert(bye.type == TokenType.Identifier);
793 
794 	assert(hi.value == "hi");
795 	assert(and.value == "And");
796 	assert(bye.value == "bye");
797 
798 	assert(hi.index == 1);
799 	assert(and.index == 4);
800 	assert(bye.index == 8);
801 
802 	assert(diagnostics.length == 0);
803 }