Completed
Push — master ( 69499d...0c05fb )
by Richard
03:08
created

Tokenizer::doSimpleTokens()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 7
rs 9.4285
cc 2
eloc 5
nc 2
nop 2
1
<?php
2
/* @description     Transformation Style Sheets - Revolutionising PHP templating    *
3
 * @author          Tom Butler [email protected]                                             *
4
 * @copyright       2015 Tom Butler <[email protected]> | https://r.je/                      *
5
 * @license         http://www.opensource.org/licenses/bsd-license.php  BSD License *
6
 * @version         1.0                                                             */
7
namespace Transphporm\Parser;
8
class Tokenizer {
9
	private $str;
10
	const NAME = 'LITERAL';
11
	const STRING = 'STRING';
12
	const OPEN_BRACKET = 'OPEN_BRACKET';
13
	const CLOSE_BRACKET = 'CLOSE_BRACKET';
14
	const OPEN_SQUARE_BRACKET = 'SQUARE_BRACKET';
15
	const CLOSE_SQUARE_BRACKET = 'CLOSE_SQUARE_BRACKET';
16
	const CONCAT = 'CONCAT';
17
	const ARG = 'ARG';
18
	const WHITESPACE = 'WHITESPACE';
19
	const NEW_LINE = 'NEW_LINE';
20
	const DOT = 'DOT';
21
	const NUMERIC = 'NUMERIC';
22
	const EQUALS = 'EQUALS';
23
	const NOT = 'NOT';
24
	const OPEN_BRACE = 'OPEN_BRACE';
25
	const CLOSE_BRACE = 'CLOSE_BRACE';
26
	const BOOL = 'BOOL';
27
	const COLON = 'COLON';
28
	const SEMI_COLON = 'SEMI_COLON';
29
	const NUM_SIGN = 'NUM_SIGN';
30
	const GREATER_THAN = 'GREATER_THAN';
31
	const AT_SIGN = 'AT_SIGN';
32
	const SUBTRACT = 'SUBTRACT';
33
	const MULTIPLY = 'MULTIPLY';
34
	const DIVIDE = 'DIVIDE';
35
36
	private $lineNo = 1;
37
38
	private $chars = [
39
		'"' => self::STRING,
40
		'\'' => self::STRING,
41
		'(' => self::OPEN_BRACKET,
42
		')' => self::CLOSE_BRACKET,
43
		'[' => self::OPEN_SQUARE_BRACKET,
44
		']' => self::CLOSE_SQUARE_BRACKET,
45
		'+' => self::CONCAT,
46
		',' => self::ARG,
47
		'.' => self::DOT,
48
		'!' => self::NOT,
49
		'=' => self::EQUALS,
50
		'{' => self::OPEN_BRACE,
51
		'}' => self::CLOSE_BRACE,
52
		':' => self::COLON,
53
		';' => self::SEMI_COLON,
54
		'#' => self::NUM_SIGN,
55
		'>' => self::GREATER_THAN,
56
		'@' => self::AT_SIGN,
57
		'-' => self::SUBTRACT,
58
		'*' => self::MULTIPLY,
59
		'/' => self::DIVIDE,
60
		' ' => self::WHITESPACE,
61
		"\n" => self::NEW_LINE,
62
		"\r" => self::WHITESPACE,
63
		"\t" => self::WHITESPACE
64
	];
65
66
	public function __construct($str) {
67
		$this->str = $str;
68
	}
69
70
	public function getTokens($returnObj = true) {
71
		$tokens = [];
72
73
		for ($i = 0; $i < strlen($this->str); $i++) {
74
			$char = $this->identifyChar($this->str[$i]);
75
			if ($commentChangeI = $this->doComments($tokens, $char, $i)) {
76
                $i += $commentChangeI;
77
                continue;
78
            }
79
80
			$this->doNewLine($tokens, $char);
81
			$this->doSimpleTokens($tokens, $char);
82
			$this->doLiterals($tokens, $char, $i);
83
			$i += $this->doStrings($tokens, $char, $i);
84
			$i += $this->doBrackets($tokens, $char, $i);
85
86
		}
87
		if ($returnObj) return new Tokens($tokens);
88
		else return $tokens;
89
	}
90
91
    private function doComments(&$tokens, $char, $i) {
92
        return $this->doSingleLineComments($tokens, $char, $i) +
93
               $this->doMultiLineComments($tokens, $char, $i);
94
    }
95
96
	private function doSingleLineComments(&$tokens, $char, $i) {
0 ignored issues
show
Unused Code introduced by
The parameter $tokens is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
97
		if ($char == Tokenizer::DIVIDE && isset($this->str[$i+1]) && $this->identifyChar($this->str[$i+1]) == Tokenizer::DIVIDE) {
98
			$pos = strpos($this->str, "\n", $i);
99
			return $pos ? $pos-2 : 0;
100
		}
101
	}
102
103
	private function doMultiLineComments(&$tokens, $char, $i) {
0 ignored issues
show
Unused Code introduced by
The parameter $tokens is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
104
		if ($char == Tokenizer::DIVIDE && isset($this->str[$i+1]) && $this->identifyChar($this->str[$i+1]) == Tokenizer::MULTIPLY) {
105
			$pos = strpos($this->str, '*/', $i)+2;
106
			if ($this->str[$i+$pos] == "\n") $pos++;
107
			return $pos ? $pos : 0;
108
		}
109
	}
110
111
	private function doSimpleTokens(&$tokens, $char) {
112
		if (in_array($char, [Tokenizer::ARG, Tokenizer::CONCAT, Tokenizer::DOT, Tokenizer::NOT, Tokenizer::EQUALS,
113
			Tokenizer::COLON, Tokenizer::SEMI_COLON, Tokenizer::WHITESPACE, Tokenizer::NUM_SIGN,
114
			Tokenizer::GREATER_THAN, Tokenizer::AT_SIGN, Tokenizer::SUBTRACT, Tokenizer::MULTIPLY, Tokenizer::DIVIDE])) {
115
			$tokens[] = ['type' => $char, 'line' => $this->lineNo];
116
		}
117
	}
118
119
	private function doNewLine(&$tokens, $char) {
120
		if ($char == Tokenizer::NEW_LINE) {
121
			$this->lineNo++;
122
			$tokens[] = ['type' => $char, 'line' => $this->lineNo];
123
		}
124
	}
125
126
	private function isLiteral($n) {
127
		//Is it a normal literal character
128
		return isset($this->str[$n]) && ($this->identifyChar($this->str[$n]) == self::NAME
129
		//but a subtract can be part of a class name or a mathematical operation
130
				|| ($this->identifyChar($this->str[$n]) == self::SUBTRACT && !is_numeric($this->str[$n-1])));
131
	}
132
133
	private function doLiterals(&$tokens, $char, &$i) {
134
		if ($char === self::NAME) {
135
			$name = $this->str[$i];
136
			while ($this->isLiteral($i+1)) {
137
				$name .= $this->str[$i+1];
138
				$i++;
139
			}
140
			$this->processLiterals($tokens, $name);
141
		}
142
	}
143
144
	private function processLiterals(&$tokens, $name) {
145
		if (is_numeric($name)) $tokens[] = ['type' => self::NUMERIC, 'value' => $name];
146
		else if ($name == 'true') $tokens[] = ['type' => self::BOOL, 'value' => true];
147
		else if ($name == 'false') $tokens[] = ['type' => self::BOOL, 'value' => false];
148
		else $tokens[] = ['type' => self::NAME, 'value' => $name, 'line' => $this->lineNo];
149
	}
150
151
	private function doBrackets(&$tokens, $char, $i) {
152
		$types = [
153
			self::OPEN_BRACKET => ['(', ')'],
154
			self::OPEN_BRACE => ['{', '}'],
155
			self::OPEN_SQUARE_BRACKET => ['[', ']']
156
		];
157
158
		foreach ($types as $type => $brackets) {
159
			if ($char === $type) {
160
				$contents = $this->extractBrackets($i, $brackets[0], $brackets[1]);
161
				$tokenizer = new Tokenizer($contents);
162
				$tokens[] = ['type' => $type, 'value' => $tokenizer->getTokens(), 'string' => $contents, 'line' => $this->lineNo];
163
				return strlen($contents);
164
			}
165
		}
166
	}
167
168
	private function doStrings(&$tokens, $char, $i) {
169
		if ($char === self::STRING) {
170
			$string = $this->extractString($i);
171
			$length = strlen($string)+1;
172
			$string = str_replace('\\' . $this->str[$i], $this->str[$i], $string);
173
			$tokens[] = ['type' => self::STRING, 'value' => $string, 'line' => $this->lineNo];
174
			return $length;
175
		}
176
	}
177
178
	private function extractString($pos) {
179
		$char = $this->str[$pos];
180
		$end = strpos($this->str, $char, $pos+1);
181
		while ($end !== false && $this->str[$end-1] == '\\') $end = strpos($this->str, $char, $end+1);
182
183
		return substr($this->str, $pos+1, $end-$pos-1);
184
	}
185
186
	private function extractBrackets($open, $startBracket = '(', $closeBracket = ')') {
187
		$close = strpos($this->str, $closeBracket, $open);
188
189
		$cPos = $open+1;
190
		while (($cPos = strpos($this->str, $startBracket, $cPos+1)) !== false && $cPos < $close) $close = strpos($this->str, $closeBracket, $close+1);
191
		return substr($this->str, $open+1, $close-$open-1);
192
	}
193
194
	private function identifyChar($chr) {
195
		if (isset($this->chars[$chr])) return $this->chars[$chr];
196
		else return self::NAME;
197
	}
198
199
	private function getChar($num) {
0 ignored issues
show
Unused Code introduced by
This method is not used, and could be removed.
Loading history...
200
		$chars = array_reverse($this->chars);
201
		if (isset($chars[$num])) return $chars[$num];
202
		else return false;
203
	}
204
}
205