lexer.*Lexer.lexIdent - Code Metrics - Permify/permify - Measure and Improve Code Quality continuously with Scrutinizer

lexer.*Lexer.lexIdent A
last analyzed 2025-06-13 13:06 UTC

↳ Parent: pkg/dsl/lexer/lexer.go

Complexity

Conditions

Size

Total Lines	6
Code Lines	5

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	5
nop	0
dl	0
loc	6
rs	10
c	0
b	0
f	0

package lexer

import (
	"github.com/Permify/permify/pkg/dsl/token"
)

// Lexer - represents a lexical analyzer for the input source code.
type Lexer struct {
	// The input source code to be analyzed.
	input string
	// The current position in the input source code.
	position int
	// The next position to read in the input source code.
	readPosition int
	// The current line position in the input source code.
	linePosition int
	// The current column position in the input source code.
	columnPosition int
	// The current character being read from the input source code.
	ch byte
}

// NewLexer - creates a new Lexer instance with the given input source code.
func NewLexer(input string) (l *Lexer) {
	l = &Lexer{input: input, linePosition: 1, columnPosition: 1}
	l.readChar()
	return
}

// GetLinePosition - returns the current line position of the Lexer in the input source code.
func (l *Lexer) GetLinePosition() int {
	return l.linePosition
}

// GetColumnPosition - returns the current column position of the Lexer in the input source code.
func (l *Lexer) GetColumnPosition() int {
	return l.columnPosition
}

// readChar - reads the next character from the input source code and updates the Lexer's position and column position.
func (l *Lexer) readChar() {
	if l.readPosition >= len(l.input) {
		l.ch = 0
	} else {
		l.ch = l.input[l.readPosition]
	}
	l.position = l.readPosition
	l.readPosition++
	l.columnPosition++
}

// peekChar - peeks the next character from the input source code without advancing the Lexer's position.
func (l *Lexer) peekChar() byte {
	if l.readPosition >= len(l.input) {
		return 0
	}
	return l.input[l.readPosition]
}

// NextToken returns the next token from the input string
func (l *Lexer) NextToken() (tok token.Token) {
	// switch statement to determine the type of token based on the current character
	switch l.ch {
	case '\t':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.TAB, l.ch)
	case ' ':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.SPACE, l.ch)
	case '\n':
		l.newLine()
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
	case '\r':
		l.newLine()
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
	case ';':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
	case ':':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.COLON, l.ch)
	case '=':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.ASSIGN, l.ch)
	case '@':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.SIGN, l.ch)
	case '(':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LP, l.ch)
	case ')':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RP, l.ch)
	case '{':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LCB, l.ch)
	case '}':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RCB, l.ch)
	case '[':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LSB, l.ch)
	case ']':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RSB, l.ch)
	case '+':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.PLUS, l.ch)
	case '-':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.MINUS, l.ch)
	case '*':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.TIMES, l.ch)
	case '%':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.MOD, l.ch)
	case '^':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.POW, l.ch)
	case '>':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.GT, l.ch)
	case '<':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LT, l.ch)
	case '!':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.EXCL, l.ch)
	case '?':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.QM, l.ch)
	case ',':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.COMMA, l.ch)
	case '#':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.HASH, l.ch)
	case '.':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.DOT, l.ch)
	case '\'':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.APOS, l.ch)
	case '&':
		tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.AMPERSAND, l.ch)
	case 0:
		tok = token.Token{PositionInfo: positionInfo(l.linePosition, l.columnPosition), Type: token.EOF, Literal: ""}
	case '/':
		switch l.peekChar() {
		case '/':
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
			tok.Literal = l.lexSingleLineComment()
			tok.Type = token.SINGLE_LINE_COMMENT
			return
		case '*':
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
			tok.Literal = l.lexMultiLineComment()
			tok.Type = token.MULTI_LINE_COMMENT
			return
		default:
			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.DIVIDE, l.ch)
		}
	case '"':
		// check if the character is a double quote, indicating a string
		tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
		tok.Literal = l.lexString()
		tok.Type = token.STRING
		return
	default:
		// check if the character is a letter, and if so, lex the identifier and look up the keyword
		if isLetter(l.ch) {
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
			tok.Literal = l.lexIdent()
			if tok.Literal == "true" || tok.Literal == "false" {
				tok.Type = token.BOOLEAN
				return
			}
			tok.Type = token.LookupKeywords(tok.Literal)
			return
		} else if isDigit(l.ch) {
			var isDouble bool
			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
			tok.Literal, isDouble = l.lexNumber()
			if isDouble {
				tok.Type = token.DOUBLE
			} else {
				tok.Type = token.INTEGER
			}
			return
		} else {
			// if none of the above cases match, create an illegal token with the current character
			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.ILLEGAL, l.ch)
		}
	}
	// read the next character and return the token
	l.readChar()
	return
}

// newLine - increments the line position and resets the column position to 1.
func (l *Lexer) newLine() {
	l.linePosition++
	l.columnPosition = 1
}

// lexIdent - reads and returns an identifier.
// An identifier is a sequence of letters (upper and lowercase) and underscores.
func (l *Lexer) lexIdent() string {
	position := l.position
	for isLetter(l.ch) {
		l.readChar()
	}
	return l.input[position:l.position]
}

// lexNumber - reads and returns a number.
func (l *Lexer) lexNumber() (string, bool) {
	position := l.position
	seenDot := false
	for isDigit(l.ch) || (!seenDot && l.ch == '.') {
		if l.ch == '.' {
			seenDot = true
		}
		l.readChar()
	}
	return l.input[position:l.position], seenDot
}

// lexString lex a string literal. It does not support escape sequences or multi-line strings.
func (l *Lexer) lexString() string {
	// Skip the initial quotation mark.
	l.readChar()
	position := l.position
	var str string
	for {
		if l.ch == '\\' {
			str += l.input[position:l.position]
			l.readChar() // Skip the backslash
			switch l.ch {
			case 'n':
				str += "\n"
			case 't':
				str += "\t"
			case '"':
				str += "\""
			case '\\':
				str += "\\"
			}
			position = l.position + 1
		} else if l.ch == '"' || l.ch == 0 {
			break
		}
		l.readChar()
	}
	str += l.input[position:l.position]
	if l.ch == '"' {
		l.readChar()
	}
	return str
}

// lexSingleLineComment - reads and returns a single line comment.
// A single line comment starts with "//" and ends at the end of the line.
func (l *Lexer) lexSingleLineComment() string {
	l.readChar()
	l.readChar()
	position := l.position
	for !isNewline(l.ch) {
		if l.ch == 0 {
			return l.input[position:l.position]
		}
		l.readChar()
	}
	return l.input[position:l.position]
}

// lexMultiLineComment - reads and returns a multi-line comment.
// A multi-line comment starts with "/" and ends with "/".
func (l *Lexer) lexMultiLineComment() string {
	l.readChar()
	l.readChar()
	position := l.position
	for !(l.ch == '*' && l.peekChar() == '/') {
		if l.ch == 0 {
			return l.input[position:l.position]
		}
		l.readChar()
	}
	l.readChar()
	l.readChar()
	return l.input[position : l.position-2]
}

// isNewline - returns true if the given byte is a newline character (\r or \n).
func isNewline(r byte) bool {
	return r == '\r' || r == '\n'
}

// isLetter - returns true if the given byte is a letter (upper or lowercase) or an underscore.
func isLetter(ch byte) bool {
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}

// isDigit - returns true if the given byte is a digit.
func isDigit(ch byte) bool {
	return '0' <= ch && ch <= '9'
}

// positionInfo - returns a token.PositionInfo struct with the current line and column position.
func positionInfo(line, column int) token.PositionInfo {
	return token.PositionInfo{
		LinePosition:   line,
		ColumnPosition: column,
	}
}


1			package lexer
2
3			import (
4			"github.com/Permify/permify/pkg/dsl/token"
5			)
6
7			// Lexer - represents a lexical analyzer for the input source code.
8			type Lexer struct {
9			// The input source code to be analyzed.
10			input string
11			// The current position in the input source code.
12			position int
13			// The next position to read in the input source code.
14			readPosition int
15			// The current line position in the input source code.
16			linePosition int
17			// The current column position in the input source code.
18			columnPosition int
19			// The current character being read from the input source code.
20			ch byte
21			}
22
23			// NewLexer - creates a new Lexer instance with the given input source code.
24			func NewLexer(input string) (l *Lexer) {
25			l = &Lexer{input: input, linePosition: 1, columnPosition: 1}
26			l.readChar()
27			return
28			}
29
30			// GetLinePosition - returns the current line position of the Lexer in the input source code.
31			func (l *Lexer) GetLinePosition() int {
32			return l.linePosition
33			}
34
35			// GetColumnPosition - returns the current column position of the Lexer in the input source code.
36			func (l *Lexer) GetColumnPosition() int {
37			return l.columnPosition
38			}
39
40			// readChar - reads the next character from the input source code and updates the Lexer's position and column position.
41			func (l *Lexer) readChar() {
42			if l.readPosition >= len(l.input) {
43			l.ch = 0
44			} else {
45			l.ch = l.input[l.readPosition]
46			}
47			l.position = l.readPosition
48			l.readPosition++
49			l.columnPosition++
50			}
51
52			// peekChar - peeks the next character from the input source code without advancing the Lexer's position.
53			func (l *Lexer) peekChar() byte {
54			if l.readPosition >= len(l.input) {
55			return 0
56			}
57			return l.input[l.readPosition]
58			}
59
60			// NextToken returns the next token from the input string
61			func (l *Lexer) NextToken() (tok token.Token) {
62			// switch statement to determine the type of token based on the current character
63			switch l.ch {
64			case '\t':
65			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.TAB, l.ch)
66			case ' ':
67			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.SPACE, l.ch)
68			case '\n':
69			l.newLine()
70			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
71			case '\r':
72			l.newLine()
73			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
74			case ';':
75			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.NEWLINE, l.ch)
76			case ':':
77			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.COLON, l.ch)
78			case '=':
79			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.ASSIGN, l.ch)
80			case '@':
81			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.SIGN, l.ch)
82			case '(':
83			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LP, l.ch)
84			case ')':
85			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RP, l.ch)
86			case '{':
87			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LCB, l.ch)
88			case '}':
89			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RCB, l.ch)
90			case '[':
91			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LSB, l.ch)
92			case ']':
93			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.RSB, l.ch)
94			case '+':
95			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.PLUS, l.ch)
96			case '-':
97			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.MINUS, l.ch)
98			case '*':
99			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.TIMES, l.ch)
100			case '%':
101			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.MOD, l.ch)
102			case '^':
103			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.POW, l.ch)
104			case '>':
105			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.GT, l.ch)
106			case '<':
107			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.LT, l.ch)
108			case '!':
109			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.EXCL, l.ch)
110			case '?':
111			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.QM, l.ch)
112			case ',':
113			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.COMMA, l.ch)
114			case '#':
115			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.HASH, l.ch)
116			case '.':
117			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.DOT, l.ch)
118			case '\'':
119			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.APOS, l.ch)
120			case '&':
121			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.AMPERSAND, l.ch)
122			case 0:
123			tok = token.Token{PositionInfo: positionInfo(l.linePosition, l.columnPosition), Type: token.EOF, Literal: ""}
124			case '/':
125			switch l.peekChar() {
126			case '/':
127			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
128			tok.Literal = l.lexSingleLineComment()
129			tok.Type = token.SINGLE_LINE_COMMENT
130			return
131			case '*':
132			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
133			tok.Literal = l.lexMultiLineComment()
134			tok.Type = token.MULTI_LINE_COMMENT
135			return
136			default:
137			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.DIVIDE, l.ch)
138			}
139			case '"':
140			// check if the character is a double quote, indicating a string
141			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
142			tok.Literal = l.lexString()
143			tok.Type = token.STRING
144			return
145			default:
146			// check if the character is a letter, and if so, lex the identifier and look up the keyword
147			if isLetter(l.ch) {
148			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
149			tok.Literal = l.lexIdent()
150			if tok.Literal == "true" \|\| tok.Literal == "false" {
151			tok.Type = token.BOOLEAN
152			return
153			}
154			tok.Type = token.LookupKeywords(tok.Literal)
155			return
156			} else if isDigit(l.ch) {
157			var isDouble bool
158			tok.PositionInfo = positionInfo(l.linePosition, l.columnPosition)
159			tok.Literal, isDouble = l.lexNumber()
160			if isDouble {
161			tok.Type = token.DOUBLE
162			} else {
163			tok.Type = token.INTEGER
164			}
165			return
166			} else {
167			// if none of the above cases match, create an illegal token with the current character
168			tok = token.New(positionInfo(l.linePosition, l.columnPosition), token.ILLEGAL, l.ch)
169			}
170			}
171			// read the next character and return the token
172			l.readChar()
173			return
174			}
175
176			// newLine - increments the line position and resets the column position to 1.
177			func (l *Lexer) newLine() {
178			l.linePosition++
179			l.columnPosition = 1
180			}
181
182			// lexIdent - reads and returns an identifier.
183			// An identifier is a sequence of letters (upper and lowercase) and underscores.
184			func (l *Lexer) lexIdent() string {
185			position := l.position
186			for isLetter(l.ch) {
187			l.readChar()
188			}
189			return l.input[position:l.position]
190			}
191
192			// lexNumber - reads and returns a number.
193			func (l *Lexer) lexNumber() (string, bool) {
194			position := l.position
195			seenDot := false
196			for isDigit(l.ch) \|\| (!seenDot && l.ch == '.') {
197			if l.ch == '.' {
198			seenDot = true
199			}
200			l.readChar()
201			}
202			return l.input[position:l.position], seenDot
203			}
204
205			// lexString lex a string literal. It does not support escape sequences or multi-line strings.
206			func (l *Lexer) lexString() string {
207			// Skip the initial quotation mark.
208			l.readChar()
209			position := l.position
210			var str string
211			for {
212			if l.ch == '\\' {
213			str += l.input[position:l.position]
214			l.readChar() // Skip the backslash
215			switch l.ch {
216			case 'n':
217			str += "\n"
218			case 't':
219			str += "\t"
220			case '"':
221			str += "\""
222			case '\\':
223			str += "\\"
224			}
225			position = l.position + 1
226			} else if l.ch == '"' \|\| l.ch == 0 {
227			break
228			}
229			l.readChar()
230			}
231			str += l.input[position:l.position]
232			if l.ch == '"' {
233			l.readChar()
234			}
235			return str
236			}
237
238			// lexSingleLineComment - reads and returns a single line comment.
239			// A single line comment starts with "//" and ends at the end of the line.
240			func (l *Lexer) lexSingleLineComment() string {
241			l.readChar()
242			l.readChar()
243			position := l.position
244			for !isNewline(l.ch) {
245			if l.ch == 0 {
246			return l.input[position:l.position]
247			}
248			l.readChar()
249			}
250			return l.input[position:l.position]
251			}
252
253			// lexMultiLineComment - reads and returns a multi-line comment.
254			// A multi-line comment starts with "/" and ends with "/".
255			func (l *Lexer) lexMultiLineComment() string {
256			l.readChar()
257			l.readChar()
258			position := l.position
259			for !(l.ch == '*' && l.peekChar() == '/') {
260			if l.ch == 0 {
261			return l.input[position:l.position]
262			}
263			l.readChar()
264			}
265			l.readChar()
266			l.readChar()
267			return l.input[position : l.position-2]
268			}
269
270			// isNewline - returns true if the given byte is a newline character (\r or \n).
271			func isNewline(r byte) bool {
272			return r == '\r' \|\| r == '\n'
273			}
274
275			// isLetter - returns true if the given byte is a letter (upper or lowercase) or an underscore.
276			func isLetter(ch byte) bool {
277			return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_'
278			}
279
280			// isDigit - returns true if the given byte is a digit.
281			func isDigit(ch byte) bool {
282			return '0' <= ch && ch <= '9'
283			}
284
285			// positionInfo - returns a token.PositionInfo struct with the current line and column position.
286			func positionInfo(line, column int) token.PositionInfo {
287			return token.PositionInfo{
288			LinePosition: line,
289			ColumnPosition: column,
290			}
291			}
292

Permify / permify

lexer.*Lexer.lexIdent A last analyzed 2025-06-13 13:06 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

lexer.*Lexer.lexIdent A
last analyzed 2025-06-13 13:06 UTC