@@ -11,32 +11,32 @@ |
||
11 | 11 | |
12 | 12 | class Utf8 implements OutputInterface |
13 | 13 | { |
14 | - /** |
|
15 | - * {@inheritdoc} |
|
16 | - */ |
|
17 | - public function output($value) |
|
18 | - { |
|
19 | - if ($value < 0x80) |
|
20 | - { |
|
21 | - return chr($value); |
|
22 | - } |
|
23 | - if ($value < 0x800) |
|
24 | - { |
|
25 | - return chr(0xC0 | ($value >> 6)) . chr(0x80 | ($value & 0x3F)); |
|
26 | - } |
|
27 | - if ($value < 0x10000) |
|
28 | - { |
|
29 | - return chr(0xE0 | ($value >> 12)) |
|
30 | - . chr(0x80 | (($value >> 6) & 0x3F)) |
|
31 | - . chr(0x80 | ($value & 0x3F)); |
|
32 | - } |
|
33 | - if ($value < 0x110000) |
|
34 | - { |
|
35 | - return chr(0xF0 | ($value >> 18)) |
|
36 | - . chr(0x80 | (($value >> 12) & 0x3F)) |
|
37 | - . chr(0x80 | (($value >> 6) & 0x3F)) |
|
38 | - . chr(0x80 | ($value & 0x3F)); |
|
39 | - } |
|
40 | - throw new InvalidArgumentException('Invalid UTF-8 codepoint 0x' . dechex($value)); |
|
41 | - } |
|
14 | + /** |
|
15 | + * {@inheritdoc} |
|
16 | + */ |
|
17 | + public function output($value) |
|
18 | + { |
|
19 | + if ($value < 0x80) |
|
20 | + { |
|
21 | + return chr($value); |
|
22 | + } |
|
23 | + if ($value < 0x800) |
|
24 | + { |
|
25 | + return chr(0xC0 | ($value >> 6)) . chr(0x80 | ($value & 0x3F)); |
|
26 | + } |
|
27 | + if ($value < 0x10000) |
|
28 | + { |
|
29 | + return chr(0xE0 | ($value >> 12)) |
|
30 | + . chr(0x80 | (($value >> 6) & 0x3F)) |
|
31 | + . chr(0x80 | ($value & 0x3F)); |
|
32 | + } |
|
33 | + if ($value < 0x110000) |
|
34 | + { |
|
35 | + return chr(0xF0 | ($value >> 18)) |
|
36 | + . chr(0x80 | (($value >> 12) & 0x3F)) |
|
37 | + . chr(0x80 | (($value >> 6) & 0x3F)) |
|
38 | + . chr(0x80 | ($value & 0x3F)); |
|
39 | + } |
|
40 | + throw new InvalidArgumentException('Invalid UTF-8 codepoint 0x' . dechex($value)); |
|
41 | + } |
|
42 | 42 | } |
43 | 43 | \ No newline at end of file |
@@ -11,189 +11,189 @@ |
||
11 | 11 | |
12 | 12 | class Serializer |
13 | 13 | { |
14 | - /** |
|
15 | - * @var Escaper |
|
16 | - */ |
|
17 | - protected $escaper; |
|
18 | - |
|
19 | - /** |
|
20 | - * @var OutputInterface |
|
21 | - */ |
|
22 | - protected $output; |
|
23 | - |
|
24 | - /** |
|
25 | - * @param OutputInterface $output |
|
26 | - * @param Escaper $escaper |
|
27 | - */ |
|
28 | - public function __construct(OutputInterface $output, Escaper $escaper) |
|
29 | - { |
|
30 | - $this->escaper = $escaper; |
|
31 | - $this->output = $output; |
|
32 | - } |
|
33 | - |
|
34 | - /** |
|
35 | - * Serialize given strings into a regular expression |
|
36 | - * |
|
37 | - * @param array[] $strings |
|
38 | - * @return string |
|
39 | - */ |
|
40 | - public function serializeStrings(array $strings) |
|
41 | - { |
|
42 | - $info = $this->analyzeStrings($strings); |
|
43 | - $alternations = $this->buildAlternations($info); |
|
44 | - $expr = implode('|', $alternations); |
|
45 | - |
|
46 | - if (count($alternations) > 1 || $this->isOneOptionalString($info)) |
|
47 | - { |
|
48 | - $expr = '(?:' . $expr . ')'; |
|
49 | - } |
|
50 | - |
|
51 | - return $expr . $info['quantifier']; |
|
52 | - } |
|
53 | - |
|
54 | - /** |
|
55 | - * Analyze given strings to determine how to serialize them |
|
56 | - * |
|
57 | - * The returned array may contains any of the following elements: |
|
58 | - * |
|
59 | - * - (string) quantifier Either '' or '?' |
|
60 | - * - (array) chars List of values from single-char strings |
|
61 | - * - (array) strings List of multi-char strings |
|
62 | - * |
|
63 | - * @param array[] $strings |
|
64 | - * @return array |
|
65 | - */ |
|
66 | - protected function analyzeStrings(array $strings) |
|
67 | - { |
|
68 | - $info = ['quantifier' => '']; |
|
69 | - $chars = []; |
|
70 | - foreach ($strings as $k => $string) |
|
71 | - { |
|
72 | - if (empty($string)) |
|
73 | - { |
|
74 | - $info['quantifier'] = '?'; |
|
75 | - unset($strings[$k]); |
|
76 | - } |
|
77 | - elseif (!isset($string[1])) |
|
78 | - { |
|
79 | - $chars[$k] = $string[0]; |
|
80 | - } |
|
81 | - } |
|
82 | - |
|
83 | - if (count($chars) > 1) |
|
84 | - { |
|
85 | - $info['chars'] = array_values($chars); |
|
86 | - $strings = array_diff_key($strings, $chars); |
|
87 | - } |
|
88 | - |
|
89 | - $info['strings'] = array_values($strings); |
|
90 | - |
|
91 | - return $info; |
|
92 | - } |
|
93 | - |
|
94 | - /** |
|
95 | - * Build the list of alternations based on given info |
|
96 | - * |
|
97 | - * @param array $info |
|
98 | - * @return string[] |
|
99 | - */ |
|
100 | - protected function buildAlternations(array $info) |
|
101 | - { |
|
102 | - $alternations = []; |
|
103 | - if (!empty($info['chars'])) |
|
104 | - { |
|
105 | - $alternations[] = $this->serializeCharacterClass($info['chars']); |
|
106 | - } |
|
107 | - foreach ($info['strings'] as $string) |
|
108 | - { |
|
109 | - $alternations[] = $this->serializeString($string); |
|
110 | - } |
|
111 | - |
|
112 | - return $alternations; |
|
113 | - } |
|
114 | - |
|
115 | - /** |
|
116 | - * Get the list of ranges that cover all given values |
|
117 | - * |
|
118 | - * @param integer[] $values Ordered list of values |
|
119 | - * @return array[] List of ranges in the form [start, end] |
|
120 | - */ |
|
121 | - protected function getRanges(array $values) |
|
122 | - { |
|
123 | - $i = 0; |
|
124 | - $cnt = count($values); |
|
125 | - $start = $values[0]; |
|
126 | - $end = $start; |
|
127 | - $ranges = []; |
|
128 | - while (++$i < $cnt) |
|
129 | - { |
|
130 | - if ($values[$i] === $end + 1) |
|
131 | - { |
|
132 | - ++$end; |
|
133 | - } |
|
134 | - else |
|
135 | - { |
|
136 | - $ranges[] = [$start, $end]; |
|
137 | - $start = $end = $values[$i]; |
|
138 | - } |
|
139 | - } |
|
140 | - $ranges[] = [$start, $end]; |
|
141 | - |
|
142 | - return $ranges; |
|
143 | - } |
|
144 | - |
|
145 | - /** |
|
146 | - * Test whether a string is optional and has more than one character |
|
147 | - * |
|
148 | - * @param array $info |
|
149 | - * @return bool |
|
150 | - */ |
|
151 | - protected function isOneOptionalString(array $info) |
|
152 | - { |
|
153 | - // Test whether the first string has a quantifier and more than one element |
|
154 | - return (!empty($info['quantifier']) && isset($info['strings'][0][1])); |
|
155 | - } |
|
156 | - |
|
157 | - /** |
|
158 | - * Serialize a given list of values into a character class |
|
159 | - * |
|
160 | - * @param integer[] $values |
|
161 | - * @return string |
|
162 | - */ |
|
163 | - protected function serializeCharacterClass(array $values) |
|
164 | - { |
|
165 | - $expr = '['; |
|
166 | - foreach ($this->getRanges($values) as list($start, $end)) |
|
167 | - { |
|
168 | - $expr .= $this->escaper->escapeCharacterClass($this->output->output($start)); |
|
169 | - if ($end > $start) |
|
170 | - { |
|
171 | - if ($end > $start + 1) |
|
172 | - { |
|
173 | - $expr .= '-'; |
|
174 | - } |
|
175 | - $expr .= $this->escaper->escapeCharacterClass($this->output->output($end)); |
|
176 | - } |
|
177 | - } |
|
178 | - $expr .= ']'; |
|
179 | - |
|
180 | - return $expr; |
|
181 | - } |
|
182 | - |
|
183 | - /** |
|
184 | - * Serialize a given string into a regular expression |
|
185 | - * |
|
186 | - * @param array $string |
|
187 | - * @return string |
|
188 | - */ |
|
189 | - protected function serializeString(array $string) |
|
190 | - { |
|
191 | - $expr = ''; |
|
192 | - foreach ($string as $element) |
|
193 | - { |
|
194 | - $expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element)); |
|
195 | - } |
|
196 | - |
|
197 | - return $expr; |
|
198 | - } |
|
14 | + /** |
|
15 | + * @var Escaper |
|
16 | + */ |
|
17 | + protected $escaper; |
|
18 | + |
|
19 | + /** |
|
20 | + * @var OutputInterface |
|
21 | + */ |
|
22 | + protected $output; |
|
23 | + |
|
24 | + /** |
|
25 | + * @param OutputInterface $output |
|
26 | + * @param Escaper $escaper |
|
27 | + */ |
|
28 | + public function __construct(OutputInterface $output, Escaper $escaper) |
|
29 | + { |
|
30 | + $this->escaper = $escaper; |
|
31 | + $this->output = $output; |
|
32 | + } |
|
33 | + |
|
34 | + /** |
|
35 | + * Serialize given strings into a regular expression |
|
36 | + * |
|
37 | + * @param array[] $strings |
|
38 | + * @return string |
|
39 | + */ |
|
40 | + public function serializeStrings(array $strings) |
|
41 | + { |
|
42 | + $info = $this->analyzeStrings($strings); |
|
43 | + $alternations = $this->buildAlternations($info); |
|
44 | + $expr = implode('|', $alternations); |
|
45 | + |
|
46 | + if (count($alternations) > 1 || $this->isOneOptionalString($info)) |
|
47 | + { |
|
48 | + $expr = '(?:' . $expr . ')'; |
|
49 | + } |
|
50 | + |
|
51 | + return $expr . $info['quantifier']; |
|
52 | + } |
|
53 | + |
|
54 | + /** |
|
55 | + * Analyze given strings to determine how to serialize them |
|
56 | + * |
|
57 | + * The returned array may contains any of the following elements: |
|
58 | + * |
|
59 | + * - (string) quantifier Either '' or '?' |
|
60 | + * - (array) chars List of values from single-char strings |
|
61 | + * - (array) strings List of multi-char strings |
|
62 | + * |
|
63 | + * @param array[] $strings |
|
64 | + * @return array |
|
65 | + */ |
|
66 | + protected function analyzeStrings(array $strings) |
|
67 | + { |
|
68 | + $info = ['quantifier' => '']; |
|
69 | + $chars = []; |
|
70 | + foreach ($strings as $k => $string) |
|
71 | + { |
|
72 | + if (empty($string)) |
|
73 | + { |
|
74 | + $info['quantifier'] = '?'; |
|
75 | + unset($strings[$k]); |
|
76 | + } |
|
77 | + elseif (!isset($string[1])) |
|
78 | + { |
|
79 | + $chars[$k] = $string[0]; |
|
80 | + } |
|
81 | + } |
|
82 | + |
|
83 | + if (count($chars) > 1) |
|
84 | + { |
|
85 | + $info['chars'] = array_values($chars); |
|
86 | + $strings = array_diff_key($strings, $chars); |
|
87 | + } |
|
88 | + |
|
89 | + $info['strings'] = array_values($strings); |
|
90 | + |
|
91 | + return $info; |
|
92 | + } |
|
93 | + |
|
94 | + /** |
|
95 | + * Build the list of alternations based on given info |
|
96 | + * |
|
97 | + * @param array $info |
|
98 | + * @return string[] |
|
99 | + */ |
|
100 | + protected function buildAlternations(array $info) |
|
101 | + { |
|
102 | + $alternations = []; |
|
103 | + if (!empty($info['chars'])) |
|
104 | + { |
|
105 | + $alternations[] = $this->serializeCharacterClass($info['chars']); |
|
106 | + } |
|
107 | + foreach ($info['strings'] as $string) |
|
108 | + { |
|
109 | + $alternations[] = $this->serializeString($string); |
|
110 | + } |
|
111 | + |
|
112 | + return $alternations; |
|
113 | + } |
|
114 | + |
|
115 | + /** |
|
116 | + * Get the list of ranges that cover all given values |
|
117 | + * |
|
118 | + * @param integer[] $values Ordered list of values |
|
119 | + * @return array[] List of ranges in the form [start, end] |
|
120 | + */ |
|
121 | + protected function getRanges(array $values) |
|
122 | + { |
|
123 | + $i = 0; |
|
124 | + $cnt = count($values); |
|
125 | + $start = $values[0]; |
|
126 | + $end = $start; |
|
127 | + $ranges = []; |
|
128 | + while (++$i < $cnt) |
|
129 | + { |
|
130 | + if ($values[$i] === $end + 1) |
|
131 | + { |
|
132 | + ++$end; |
|
133 | + } |
|
134 | + else |
|
135 | + { |
|
136 | + $ranges[] = [$start, $end]; |
|
137 | + $start = $end = $values[$i]; |
|
138 | + } |
|
139 | + } |
|
140 | + $ranges[] = [$start, $end]; |
|
141 | + |
|
142 | + return $ranges; |
|
143 | + } |
|
144 | + |
|
145 | + /** |
|
146 | + * Test whether a string is optional and has more than one character |
|
147 | + * |
|
148 | + * @param array $info |
|
149 | + * @return bool |
|
150 | + */ |
|
151 | + protected function isOneOptionalString(array $info) |
|
152 | + { |
|
153 | + // Test whether the first string has a quantifier and more than one element |
|
154 | + return (!empty($info['quantifier']) && isset($info['strings'][0][1])); |
|
155 | + } |
|
156 | + |
|
157 | + /** |
|
158 | + * Serialize a given list of values into a character class |
|
159 | + * |
|
160 | + * @param integer[] $values |
|
161 | + * @return string |
|
162 | + */ |
|
163 | + protected function serializeCharacterClass(array $values) |
|
164 | + { |
|
165 | + $expr = '['; |
|
166 | + foreach ($this->getRanges($values) as list($start, $end)) |
|
167 | + { |
|
168 | + $expr .= $this->escaper->escapeCharacterClass($this->output->output($start)); |
|
169 | + if ($end > $start) |
|
170 | + { |
|
171 | + if ($end > $start + 1) |
|
172 | + { |
|
173 | + $expr .= '-'; |
|
174 | + } |
|
175 | + $expr .= $this->escaper->escapeCharacterClass($this->output->output($end)); |
|
176 | + } |
|
177 | + } |
|
178 | + $expr .= ']'; |
|
179 | + |
|
180 | + return $expr; |
|
181 | + } |
|
182 | + |
|
183 | + /** |
|
184 | + * Serialize a given string into a regular expression |
|
185 | + * |
|
186 | + * @param array $string |
|
187 | + * @return string |
|
188 | + */ |
|
189 | + protected function serializeString(array $string) |
|
190 | + { |
|
191 | + $expr = ''; |
|
192 | + foreach ($string as $element) |
|
193 | + { |
|
194 | + $expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element)); |
|
195 | + } |
|
196 | + |
|
197 | + return $expr; |
|
198 | + } |
|
199 | 199 | } |
200 | 200 | \ No newline at end of file |