lexer.*Lexer.peekChar   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 5
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 4
nop 0
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
package lexer
2
3
import (
4
	"github.com/Permify/permify/pkg/dsl/token"
5
)
6
7
// Lexer - represents a lexical analyzer for the input source code.
8
type Lexer struct {
9
	// The input source code to be analyzed.
10
	input string
11
	// The current position in the input source code.
12
	position int
13
	// The next position to read in the input source code.
14
	readPosition int
15
	// The current line position in the input source code.
16
	linePosition int
17
	// The current column position in the input source code.
18
	columnPosition int
19
	// The current character being read from the input source code.
20
	ch byte
21
}
22
23
// NewLexer - creates a new Lexer instance with the given input source code.
24
func NewLexer(input string) (l *Lexer) {
25
	l = &Lexer{input: input, linePosition: 1, columnPosition: 1}
26
	l.readChar()
27
	return
28
}
29
30
// GetLinePosition - returns the current line position of the Lexer in the input source code.
31
func (l *Lexer) GetLinePosition() int {
32
	return l.linePosition
33
}
34
35
// GetColumnPosition - returns the current column position of the Lexer in the input source code.
36
func (l *Lexer) GetColumnPosition() int {
37
	return l.columnPosition
38
}
39
40
// readChar - reads the next character from the input source code and updates the Lexer's position and column position.
41
func (l *Lexer) readChar() {
42
	if l.readPosition >= len(l.input) {
43
		l.ch = 0
44
	} else {
45
		l.ch = l.input[l.readPosition]
46
	}
47
	l.position = l.readPosition
48
	l.readPosition++
49
	l.columnPosition++
50
}
51
52
// peekChar - peeks the next character from the input source code without advancing the Lexer's position.
53
func (l *Lexer) peekChar() byte {
54
	if l.readPosition >= len(l.input) {
55
		return 0
56
	}
57
	return l.input[l.readPosition]
58
}
59
60
// NextToken returns the next token from the input string
61
func (l *Lexer) NextToken() (tok token.Token) {
62
	// switch statement to determine the type of token based on the current character
63
	switch l.ch {
64
	case '\t':
65
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.TAB, l.ch)
66
	case ' ':
67
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.SPACE, l.ch)
68
	case '\n':
69
		l.newLine()
70
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
71
	case '\r':
72
		l.newLine()
73
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
74
	case ';':
75
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
76
	case ':':
77
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.COLON, l.ch)
78
	case '=':
79
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.ASSIGN, l.ch)
80
	case '@':
81
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.SIGN, l.ch)
82
	case '(':
83
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LP, l.ch)
84
	case ')':
85
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RP, l.ch)
86
	case '{':
87
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LCB, l.ch)
88
	case '}':
89
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RCB, l.ch)
90
	case '[':
91
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LSB, l.ch)
92
	case ']':
93
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RSB, l.ch)
94
	case '+':
95
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.PLUS, l.ch)
96
	case '-':
97
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.MINUS, l.ch)
98
	case '*':
99
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.TIMES, l.ch)
100
	case '%':
101
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.MOD, l.ch)
102
	case '^':
103
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.POW, l.ch)
104
	case '>':
105
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.GT, l.ch)
106
	case '<':
107
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LT, l.ch)
108
	case '!':
109
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.EXCL, l.ch)
110
	case '?':
111
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.QM, l.ch)
112
	case ',':
113
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.COMMA, l.ch)
114
	case '#':
115
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.HASH, l.ch)
116
	case '.':
117
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.DOT, l.ch)
118
	case '\'':
119
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.APOS, l.ch)
120
	case '&':
121
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.AMPERSAND, l.ch)
122
	case 0:
123
		tok = token.Token{PositionInfo: positionInfo(l.linePosition, l.columnPosition), Type: token.EOF, Literal: ""}
124
	case '/':
125
		switch l.peekChar() {
126
		case '/':
127
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
128
			tok.Literal = l.lexSingleLineComment()
129
			tok.Type = token.SINGLE_LINE_COMMENT
130
			return
131
		case '*':
132
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
133
			tok.Literal = l.lexMultiLineComment()
134
			tok.Type = token.MULTI_LINE_COMMENT
135
			return
136
		default:
137
			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.DIVIDE, l.ch)
138
		}
139
	case '"':
140
		// check if the character is a double quote, indicating a string
141
		tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
142
		tok.Literal = l.lexString()
143
		tok.Type = token.STRING
144
		return
145
	default:
146
		// check if the character is a letter, and if so, lex the identifier and look up the keyword
147
		if isLetter(l.ch) {
148
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
149
			tok.Literal = l.lexIdent()
150
			if tok.Literal == "true" || tok.Literal == "false" {
151
				tok.Type = token.BOOLEAN
152
				return
153
			}
154
			tok.Type = token.LookupKeywords(tok.Literal)
155
			return
156
		} else if isDigit(l.ch) {
157
			var isDouble bool
158
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
159
			tok.Literal, isDouble = l.lexNumber()
160
			if isDouble {
161
				tok.Type = token.DOUBLE
162
			} else {
163
				tok.Type = token.INTEGER
164
			}
165
			return
166
		} else {
167
			// if none of the above cases match, create an illegal token with the current character
168
			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.ILLEGAL, l.ch)
169
		}
170
	}
171
	// read the next character and return the token
172
	l.readChar()
173
	return
174
}
175
176
// newLine - increments the line position and resets the column position to 1.
177
func (l *Lexer) newLine() {
178
	l.linePosition++
179
	l.columnPosition = 1
180
}
181
182
// lexIdent - reads and returns an identifier.
183
// An identifier is a sequence of letters (upper and lowercase) and underscores.
184
func (l *Lexer) lexIdent() string {
185
	position := l.position
186
	for isLetter(l.ch) {
187
		l.readChar()
188
	}
189
	return l.input[position:l.position]
190
}
191
192
// lexNumber - reads and returns a number.
193
func (l *Lexer) lexNumber() (string, bool) {
194
	position := l.position
195
	seenDot := false
196
	for isDigit(l.ch) || (!seenDot && l.ch == '.') {
197
		if l.ch == '.' {
198
			seenDot = true
199
		}
200
		l.readChar()
201
	}
202
	return l.input[position:l.position], seenDot
203
}
204
205
// lexString lex a string literal. It does not support escape sequences or multi-line strings.
206
func (l *Lexer) lexString() string {
207
	// Skip the initial quotation mark.
208
	l.readChar()
209
	position := l.position
210
	var str string
211
	for {
212
		if l.ch == '\\' {
213
			str += l.input[position:l.position]
214
			l.readChar() // Skip the backslash
215
			switch l.ch {
216
			case 'n':
217
				str += "\n"
218
			case 't':
219
				str += "\t"
220
			case '"':
221
				str += "\""
222
			case '\\':
223
				str += "\\"
224
			}
225
			position = l.position + 1
226
		} else if l.ch == '"' || l.ch == 0 {
227
			break
228
		}
229
		l.readChar()
230
	}
231
	str += l.input[position:l.position]
232
	if l.ch == '"' {
233
		l.readChar()
234
	}
235
	return str
236
}
237
238
// lexSingleLineComment - reads and returns a single line comment.
239
// A single line comment starts with "//" and ends at the end of the line.
240
func (l *Lexer) lexSingleLineComment() string {
241
	l.readChar()
242
	l.readChar()
243
	position := l.position
244
	for !isNewline(l.ch) {
245
		if l.ch == 0 {
246
			return l.input[position:l.position]
247
		}
248
		l.readChar()
249
	}
250
	return l.input[position:l.position]
251
}
252
253
// lexMultiLineComment - reads and returns a multi-line comment.
254
// A multi-line comment starts with "/" and ends with "/".
255
func (l *Lexer) lexMultiLineComment() string {
256
	l.readChar()
257
	l.readChar()
258
	position := l.position
259
	for !(l.ch == '*' && l.peekChar() == '/') {
260
		if l.ch == 0 {
261
			return l.input[position:l.position]
262
		}
263
		l.readChar()
264
	}
265
	l.readChar()
266
	l.readChar()
267
	return l.input[position : l.position-2]
268
}
269
270
// isNewline - returns true if the given byte is a newline character (\r or \n).
271
func isNewline(r byte) bool {
272
	return r == '\r' || r == '\n'
273
}
274
275
// isLetter - returns true if the given byte is a letter (upper or lowercase) or an underscore.
276
func isLetter(ch byte) bool {
277
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
278
}
279
280
// isDigit - returns true if the given byte is a digit.
281
func isDigit(ch byte) bool {
282
	return '0' <= ch && ch <= '9'
283
}
284
285
// positionInfo - returns a token.PositionInfo struct with the current line and column position.
286
func positionInfo(line, column int) token.PositionInfo {
287
	return token.PositionInfo{
288
		LinePosition:   line,
289
		ColumnPosition: column,
290
	}
291
}
292