|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* Linna Filter |
|
5
|
|
|
* |
|
6
|
|
|
* @author Sebastian Rapetti <[email protected]> |
|
7
|
|
|
* @copyright (c) 2018, Sebastian Rapetti |
|
8
|
|
|
* @license http://opensource.org/licenses/MIT MIT License |
|
9
|
|
|
*/ |
|
10
|
|
|
declare(strict_types=1); |
|
11
|
|
|
|
|
12
|
|
|
namespace Linna\Filter; |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* Lexer. |
|
16
|
|
|
*/ |
|
17
|
|
|
class Lexer |
|
18
|
|
|
{ |
|
19
|
|
|
/** |
|
20
|
|
|
* Split period in tokens. |
|
21
|
|
|
* |
|
22
|
|
|
* @param string $period |
|
23
|
|
|
* @return array |
|
24
|
|
|
*/ |
|
25
|
102 |
|
public function tokenize(string $period): array |
|
26
|
|
|
{ |
|
27
|
102 |
|
$chars = str_split(rtrim(ltrim($period))); |
|
28
|
102 |
|
$count = count($chars); |
|
29
|
102 |
|
$words = $temp = []; |
|
30
|
|
|
|
|
31
|
102 |
|
for ($i = 0; $i < $count; $i++) { |
|
32
|
102 |
|
$char = $chars[$i]; |
|
33
|
102 |
|
$ord = ord($char); |
|
34
|
|
|
|
|
35
|
|
|
//treat delimited string separately |
|
36
|
|
|
//this fix some problems with regex rule |
|
37
|
102 |
|
if (in_array($ord, [34, 35, 39, 47, 126])) { |
|
38
|
5 |
|
$temp[] = $this->mergeDelimitedString($count, $ord, $i, $chars); |
|
39
|
5 |
|
continue; |
|
40
|
|
|
} |
|
41
|
|
|
|
|
42
|
102 |
|
if (in_array($ord, [32, 44, 58, 59])) { |
|
43
|
102 |
|
$words[] = implode('', $temp); |
|
44
|
102 |
|
$temp = []; |
|
45
|
102 |
|
continue; |
|
46
|
|
|
} |
|
47
|
|
|
|
|
48
|
102 |
|
$temp[] = $char; |
|
49
|
|
|
} |
|
50
|
|
|
|
|
51
|
102 |
|
$words[] = implode('', $temp); |
|
52
|
|
|
|
|
53
|
102 |
|
return array_values(array_filter($words, 'trim')); |
|
54
|
|
|
} |
|
55
|
|
|
|
|
56
|
|
|
/** |
|
57
|
|
|
* Merge delimited string separately from main lexer cicle. |
|
58
|
|
|
* |
|
59
|
|
|
* @param int $count Size of chars array. |
|
60
|
|
|
* @param int $ord Delimiter that triggered this method. |
|
61
|
|
|
* @param int $i Main cilce counter. |
|
62
|
|
|
* @param array $chars Chars of period. |
|
63
|
|
|
* |
|
64
|
|
|
* @return string |
|
65
|
|
|
*/ |
|
66
|
5 |
|
private function mergeDelimitedString(int $count, int $ord, int &$i, array &$chars): string |
|
67
|
|
|
{ |
|
68
|
5 |
|
$tmp = []; |
|
69
|
|
|
|
|
70
|
5 |
|
while (++$i < $count) { |
|
71
|
5 |
|
$char = $chars[$i]; |
|
72
|
|
|
|
|
73
|
5 |
|
if ($ord === ord($char)) { |
|
74
|
5 |
|
break; |
|
75
|
|
|
} |
|
76
|
|
|
|
|
77
|
5 |
|
$tmp[] = $char; |
|
78
|
|
|
} |
|
79
|
|
|
|
|
80
|
|
|
//fix for regex, add delimiter |
|
81
|
5 |
|
if (in_array($ord, [35, 47, 126])) { |
|
82
|
|
|
array_unshift($tmp, chr($ord)); |
|
83
|
|
|
array_push($tmp, chr($ord)); |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
5 |
|
return implode('', $tmp); |
|
87
|
|
|
} |
|
88
|
|
|
} |
|
89
|
|
|
|