1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* This file is part of Railt package. |
4
|
|
|
* |
5
|
|
|
* For the full copyright and license information, please view the LICENSE |
6
|
|
|
* file that was distributed with this source code. |
7
|
|
|
*/ |
8
|
|
|
declare(strict_types=1); |
9
|
|
|
|
10
|
|
|
namespace Railt\Lexer\Iterator; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* An iterator which returns a list of regex groups |
14
|
|
|
*/ |
15
|
|
|
class RegexIterator implements \IteratorAggregate |
16
|
|
|
{ |
17
|
|
|
public const PREG_PARSING_ERROR = 'The error occurs while compiling PCRE'; |
18
|
|
|
public const PREG_INTERNAL_ERROR = 'There was an internal PCRE error'; |
19
|
|
|
public const PREG_BACKTRACK_LIMIT_ERROR = 'Backtrack limit was exhausted'; |
20
|
|
|
public const PREG_RECURSION_LIMIT_ERROR = 'Recursion limit was exhausted'; |
21
|
|
|
public const PREG_BAD_UTF8_ERROR = 'The offset didn\'t correspond to the begin of a valid UTF-8 code point'; |
22
|
|
|
public const PREG_BAD_UTF8_OFFSET_ERROR = 'Malformed UTF-8 data'; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* @var string |
26
|
|
|
*/ |
27
|
|
|
private $pattern; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* @var string |
31
|
|
|
*/ |
32
|
|
|
private $subject; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* RegexIterator constructor. |
36
|
|
|
* @param string $pattern |
37
|
|
|
* @param string $subject |
38
|
|
|
*/ |
39
|
17 |
|
public function __construct(string $pattern, string $subject) |
40
|
|
|
{ |
41
|
17 |
|
$this->pattern = $pattern; |
42
|
17 |
|
$this->subject = $subject; |
43
|
17 |
|
} |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @return \Traversable|array[] |
47
|
|
|
* @throws \InvalidArgumentException |
48
|
|
|
* @throws \RuntimeException |
49
|
|
|
*/ |
50
|
17 |
|
public function getIterator(): \Traversable |
51
|
|
|
{ |
52
|
17 |
|
$result = new \SplQueue(); |
53
|
|
|
|
54
|
17 |
|
$status = @\preg_replace_callback($this->pattern, function (array $matches) use ($result): void { |
55
|
17 |
|
$result->push($matches); |
56
|
17 |
|
}, $this->subject); |
57
|
|
|
|
58
|
17 |
|
$this->validate($status); |
59
|
|
|
|
60
|
17 |
|
return $result; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* @param $status |
65
|
|
|
* @return void |
66
|
|
|
* @throws \InvalidArgumentException |
67
|
|
|
* @throws \RuntimeException |
68
|
|
|
*/ |
69
|
17 |
|
private function validate($status): void |
70
|
|
|
{ |
71
|
17 |
|
$code = \preg_last_error(); |
72
|
|
|
|
73
|
17 |
|
if ($code !== \PREG_NO_ERROR) { |
74
|
|
|
throw new \RuntimeException($this->getErrorMessage($code), $code); |
75
|
|
|
} |
76
|
|
|
|
77
|
17 |
|
if ($status === null) { |
78
|
|
|
$parts = \explode(':', \error_get_last()['message'] ?? ''); |
79
|
|
|
$error = \sprintf('%s, %s', self::PREG_PARSING_ERROR, \trim(\end($parts))); |
80
|
|
|
throw new \InvalidArgumentException($error); |
81
|
|
|
} |
82
|
17 |
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* @param int $code |
86
|
|
|
* @return string |
87
|
|
|
*/ |
88
|
|
|
private function getErrorMessage(int $code): string |
89
|
|
|
{ |
90
|
|
|
switch ($code) { |
91
|
|
|
case \PREG_INTERNAL_ERROR: |
|
|
|
|
92
|
|
|
return self::PREG_INTERNAL_ERROR; |
93
|
|
|
|
94
|
|
|
case \PREG_BACKTRACK_LIMIT_ERROR: |
95
|
|
|
return self::PREG_BACKTRACK_LIMIT_ERROR; |
96
|
|
|
|
97
|
|
|
case \PREG_RECURSION_LIMIT_ERROR: |
98
|
|
|
return self::PREG_RECURSION_LIMIT_ERROR; |
99
|
|
|
|
100
|
|
|
case \PREG_BAD_UTF8_ERROR: |
101
|
|
|
return self::PREG_BAD_UTF8_ERROR; |
102
|
|
|
|
103
|
|
|
case \PREG_BAD_UTF8_OFFSET_ERROR: |
104
|
|
|
return self::PREG_BAD_UTF8_OFFSET_ERROR; |
105
|
|
|
} |
106
|
|
|
return 'Unexpected PCRE error (Code ' . $code . ')'; |
107
|
|
|
} |
108
|
17 |
|
|
109
|
|
|
/** |
110
|
17 |
|
* Destroy current body |
111
|
17 |
|
*/ |
112
|
|
|
public function __destruct() |
113
|
|
|
{ |
114
|
|
|
unset($this->pattern, $this->subject); |
115
|
|
|
} |
116
|
|
|
} |
117
|
|
|
|
As per the PSR-2 coding standard, case statements should not be wrapped in curly braces. There is no need for braces, since each case is terminated by the next
break
.There is also the option to use a semicolon instead of a colon, this is discouraged because many programmers do not even know it works and the colon is universal between programming languages.
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.