1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Linna Filter |
5
|
|
|
* |
6
|
|
|
* @author Sebastian Rapetti <[email protected]> |
7
|
|
|
* @copyright (c) 2018, Sebastian Rapetti |
8
|
|
|
* @license http://opensource.org/licenses/MIT MIT License |
9
|
|
|
*/ |
10
|
|
|
declare(strict_types=1); |
11
|
|
|
|
12
|
|
|
namespace Linna\Filter; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Lexer. |
16
|
|
|
*/ |
17
|
|
|
class Lexer |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* Split period in tokens. |
21
|
|
|
* |
22
|
|
|
* @param string $period |
23
|
|
|
* @return array |
24
|
|
|
*/ |
25
|
102 |
|
public function tokenize(string $period): array |
26
|
|
|
{ |
27
|
102 |
|
$chars = str_split(rtrim(ltrim($period))); |
28
|
102 |
|
$count = count($chars); |
29
|
102 |
|
$words = $temp = []; |
30
|
|
|
|
31
|
102 |
|
for ($i = 0; $i < $count; $i++) { |
32
|
102 |
|
$char = $chars[$i]; |
33
|
102 |
|
$ord = ord($char); |
34
|
|
|
|
35
|
|
|
//treat delimited string separately |
36
|
|
|
//this fix some problems with regex rule |
37
|
102 |
|
if (in_array($ord, [34, 35, 39, 47, 126])) { |
38
|
5 |
|
$temp[] = $this->mergeDelimitedString($count, $ord, $i, $chars); |
39
|
5 |
|
continue; |
40
|
|
|
} |
41
|
|
|
|
42
|
102 |
|
if (in_array($ord, [32, 44, 58, 59])) { |
43
|
102 |
|
$words[] = implode('', $temp); |
44
|
102 |
|
$temp = []; |
45
|
102 |
|
continue; |
46
|
|
|
} |
47
|
|
|
|
48
|
102 |
|
$temp[] = $char; |
49
|
|
|
} |
50
|
|
|
|
51
|
102 |
|
$words[] = implode('', $temp); |
52
|
|
|
|
53
|
102 |
|
return array_values(array_filter($words, 'trim')); |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* Merge delimited string separately from main lexer cicle. |
58
|
|
|
* |
59
|
|
|
* @param int $count Size of chars array. |
60
|
|
|
* @param int $ord Delimiter that triggered this method. |
61
|
|
|
* @param int $i Main cilce counter. |
62
|
|
|
* @param array $chars Chars of period. |
63
|
|
|
* |
64
|
|
|
* @return string |
65
|
|
|
*/ |
66
|
5 |
|
private function mergeDelimitedString(int $count, int $ord, int &$i, array &$chars): string |
67
|
|
|
{ |
68
|
5 |
|
$tmp = []; |
69
|
|
|
|
70
|
5 |
|
while (++$i < $count) { |
71
|
5 |
|
$char = $chars[$i]; |
72
|
|
|
|
73
|
5 |
|
if ($ord === ord($char)) { |
74
|
5 |
|
break; |
75
|
|
|
} |
76
|
|
|
|
77
|
5 |
|
$tmp[] = $char; |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
//fix for regex, add delimiter |
81
|
5 |
|
if (in_array($ord, [35, 47, 126])) { |
82
|
|
|
array_unshift($tmp, chr($ord)); |
83
|
|
|
array_push($tmp, chr($ord)); |
84
|
|
|
} |
85
|
|
|
|
86
|
5 |
|
return implode('', $tmp); |
87
|
|
|
} |
88
|
|
|
} |
89
|
|
|
|