1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Linna Filter |
5
|
|
|
* |
6
|
|
|
* @author Sebastian Rapetti <[email protected]> |
7
|
|
|
* @copyright (c) 2018, Sebastian Rapetti |
8
|
|
|
* @license http://opensource.org/licenses/MIT MIT License |
9
|
|
|
*/ |
10
|
|
|
declare(strict_types=1); |
11
|
|
|
|
12
|
|
|
namespace Linna\Filter; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Lexer. |
16
|
|
|
*/ |
17
|
|
|
class Lexer |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* Split period in tokens. |
21
|
|
|
* |
22
|
|
|
* @param string $period |
23
|
|
|
* |
24
|
|
|
* @return array |
25
|
|
|
*/ |
26
|
102 |
|
public function tokenize(string $period): array |
27
|
|
|
{ |
28
|
102 |
|
$chars = \str_split(\rtrim(\ltrim($period))); |
29
|
102 |
|
$count = \count($chars); |
30
|
102 |
|
$words = $temp = []; |
31
|
|
|
|
32
|
102 |
|
for ($i = 0; $i < $count; $i++) { |
33
|
102 |
|
$char = $chars[$i]; |
34
|
102 |
|
$ord = \ord($char); |
35
|
|
|
|
36
|
|
|
//treat delimited string separately |
37
|
|
|
//this fix some problems with regex rule |
38
|
102 |
|
if (\in_array($ord, [34, 35, 39, 47, 126])) { |
39
|
8 |
|
$temp[] = $this->mergeDelimitedString($count, $ord, $i, $chars); |
40
|
8 |
|
continue; |
41
|
|
|
} |
42
|
|
|
|
43
|
102 |
|
if (\in_array($ord, [32, 44, 58, 59])) { |
44
|
102 |
|
$words[] = \implode('', $temp); |
45
|
102 |
|
$temp = []; |
46
|
102 |
|
continue; |
47
|
|
|
} |
48
|
|
|
|
49
|
102 |
|
$temp[] = $char; |
50
|
|
|
} |
51
|
|
|
|
52
|
102 |
|
$words[] = \implode('', $temp); |
53
|
|
|
|
54
|
102 |
|
return \array_values(\array_filter($words, 'trim')); |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Merge delimited string separately from main lexer cicle. |
59
|
|
|
* |
60
|
|
|
* @param int $count Size of chars array. |
61
|
|
|
* @param int $ord Delimiter that triggered this method. |
62
|
|
|
* @param int $i Main cilce counter. |
63
|
|
|
* @param array $chars Chars of period. |
64
|
|
|
* |
65
|
|
|
* @return string |
66
|
|
|
*/ |
67
|
8 |
|
private function mergeDelimitedString(int $count, int $ord, int &$i, array &$chars): string |
68
|
|
|
{ |
69
|
8 |
|
$tmp = []; |
70
|
|
|
|
71
|
8 |
|
while (++$i < $count) { |
72
|
8 |
|
$char = $chars[$i]; |
73
|
|
|
|
74
|
8 |
|
if ($ord === \ord($char)) { |
75
|
8 |
|
break; |
76
|
|
|
} |
77
|
|
|
|
78
|
8 |
|
$tmp[] = $char; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
//fix for regex, add delimiter |
82
|
8 |
|
if (\in_array($ord, [35, 47, 126])) { |
83
|
3 |
|
\array_unshift($tmp, \chr($ord)); |
84
|
3 |
|
\array_push($tmp, \chr($ord)); |
85
|
|
|
} |
86
|
|
|
|
87
|
8 |
|
return \implode('', $tmp); |
88
|
|
|
} |
89
|
|
|
} |
90
|
|
|
|