aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--users/fcuny/exp/monkey/pkg/lexer/lexer.go114
-rw-r--r--users/fcuny/exp/monkey/pkg/lexer/lexer_test.go104
2 files changed, 218 insertions, 0 deletions
diff --git a/users/fcuny/exp/monkey/pkg/lexer/lexer.go b/users/fcuny/exp/monkey/pkg/lexer/lexer.go
new file mode 100644
index 0000000..fc29371
--- /dev/null
+++ b/users/fcuny/exp/monkey/pkg/lexer/lexer.go
@@ -0,0 +1,114 @@
+package lexer
+
+import "monkey/pkg/token"
+
+// Lexer represents the lexer
+type Lexer struct {
+ input string
+ // current position in input
+ position int
+ // current reading position in input (after a char)
+ readPosition int
+ // current character under examination
+ ch byte
+}
+
+// New returns a new lexer
+func New(input string) *Lexer {
+ l := &Lexer{input: input}
+ l.readChar()
+ return l
+}
+
+// Read the current character and advances our position in the input string.
+func (l *Lexer) readChar() {
+ // if we've reached the end of the input, we set the current character to 0,
+ // which is the ASCII code for NUL.
+ if l.readPosition >= len(l.input) {
+ l.ch = 0
+ } else {
+ l.ch = l.input[l.readPosition]
+ }
+ l.position = l.readPosition
+ l.readPosition++
+}
+
+func (l *Lexer) readIdentifier() string {
+ position := l.position
+ for isLetter(l.ch) {
+ l.readChar()
+ }
+ return l.input[position:l.position]
+}
+
+func (l *Lexer) readNumber() string {
+ position := l.position
+ for isDigit(l.ch) {
+ l.readChar()
+ }
+ return l.input[position:l.position]
+}
+
+// we don't care about white space characters, we skip them when we find them.
+func (l *Lexer) skipWhitespace() {
+ for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
+ l.readChar()
+ }
+}
+
+// NextToken reads the next token from the lexer and returns the current token.
+func (l *Lexer) NextToken() token.Token {
+ var tok token.Token
+
+ l.skipWhitespace()
+
+ switch l.ch {
+ case '=':
+ tok = newToken(token.ASSIGN, l.ch)
+ case '+':
+ tok = newToken(token.PLUS, l.ch)
+ case ';':
+ tok = newToken(token.SEMICOLON, l.ch)
+ case ',':
+ tok = newToken(token.COMMA, l.ch)
+ case '(':
+ tok = newToken(token.LPAREN, l.ch)
+ case ')':
+ tok = newToken(token.RPAREN, l.ch)
+ case '{':
+ tok = newToken(token.LBRACE, l.ch)
+ case '}':
+ tok = newToken(token.RBRACE, l.ch)
+ case 0:
+ tok.Literal = ""
+ tok.Type = token.EOF
+ default:
+ if isLetter(l.ch) {
+ tok.Literal = l.readIdentifier()
+ tok.Type = token.LookupIdent(tok.Literal)
+ return tok
+ } else if isDigit(l.ch) {
+ tok.Type = token.INT
+ tok.Literal = l.readNumber()
+ return tok
+ } else {
+ tok = newToken(token.ILLEGAL, l.ch)
+ }
+
+ }
+
+ l.readChar()
+ return tok
+}
+
+func newToken(tokenType token.TokenType, ch byte) token.Token {
+ return token.Token{Type: tokenType, Literal: string(ch)}
+}
+
+func isLetter(ch byte) bool {
+ return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
+}
+
+func isDigit(ch byte) bool {
+ return '0' <= ch && ch <= '9'
+}
diff --git a/users/fcuny/exp/monkey/pkg/lexer/lexer_test.go b/users/fcuny/exp/monkey/pkg/lexer/lexer_test.go
new file mode 100644
index 0000000..73b27fb
--- /dev/null
+++ b/users/fcuny/exp/monkey/pkg/lexer/lexer_test.go
@@ -0,0 +1,104 @@
+package lexer
+
+import (
+ "monkey/pkg/token"
+ "testing"
+)
+
+func TestNextTokenBasic(t *testing.T) {
+ input := `=+(){},;`
+
+ tests := []struct {
+ expectedType token.TokenType
+ expectedLiteral string
+ }{
+ {token.ASSIGN, "="},
+ {token.PLUS, "+"},
+ {token.LPAREN, "("},
+ {token.RPAREN, ")"},
+ {token.LBRACE, "{"},
+ {token.RBRACE, "}"},
+ {token.COMMA, ","},
+ {token.SEMICOLON, ";"},
+ }
+
+ l := New(input)
+
+ for i, tt := range tests {
+ tok := l.NextToken()
+ if tok.Type != tt.expectedType {
+ t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", i, tt.expectedType, tok.Type)
+ }
+
+ if tok.Literal != tt.expectedLiteral {
+ t.Fatalf("tests[%d] - tokenliteral wrong. expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
+ }
+ }
+}
+
+func TestNextTokenMonkey(t *testing.T) {
+ input := `let five = 5;
+let ten = 10;
+
+let add = fn(x, y) {
+ x + y
+};
+
+let result = add(five, ten);`
+
+ tests := []struct {
+ expectedType token.TokenType
+ expectedLiteral string
+ }{
+ {token.LET, "let"},
+ {token.IDENT, "five"},
+ {token.ASSIGN, "="},
+ {token.INT, "5"},
+ {token.SEMICOLON, ";"},
+
+ {token.LET, "let"},
+ {token.IDENT, "ten"},
+ {token.ASSIGN, "="},
+ {token.INT, "10"},
+ {token.SEMICOLON, ";"},
+
+ {token.LET, "let"},
+ {token.IDENT, "add"},
+ {token.ASSIGN, "="},
+ {token.FUNCTION, "fn"},
+ {token.LPAREN, "("},
+ {token.IDENT, "x"},
+ {token.COMMA, ","},
+ {token.IDENT, "y"},
+ {token.RPAREN, ")"},
+ {token.LBRACE, "{"},
+ {token.IDENT, "x"},
+ {token.PLUS, "+"},
+ {token.IDENT, "y"},
+ {token.RBRACE, "}"},
+ {token.SEMICOLON, ";"},
+
+ {token.LET, "let"},
+ {token.IDENT, "result"},
+ {token.ASSIGN, "="},
+ {token.IDENT, "add"},
+ {token.LPAREN, "("},
+ {token.IDENT, "five"},
+ {token.COMMA, ","},
+ {token.IDENT, "ten"},
+ {token.RPAREN, ")"},
+ {token.SEMICOLON, ";"},
+ }
+
+ l := New(input)
+ for i, tt := range tests {
+ tok := l.NextToken()
+ if tok.Type != tt.expectedType {
+ t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", i, tt.expectedType, tok.Type)
+ }
+
+ if tok.Literal != tt.expectedLiteral {
+ t.Fatalf("tests[%d] - tokenliteral wrong. expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
+ }
+ }
+}