1 | <?php |
||
2 | |||
3 | namespace Sepia\PoParser; |
||
4 | |||
5 | use Sepia\PoParser\Catalog\Catalog; |
||
6 | use Sepia\PoParser\Catalog\CatalogArray; |
||
7 | use Sepia\PoParser\Catalog\EntryFactory; |
||
8 | use Sepia\PoParser\Catalog\Header; |
||
9 | use Sepia\PoParser\Exception\ParseException; |
||
10 | use Sepia\PoParser\SourceHandler\FileSystem; |
||
11 | use Sepia\PoParser\SourceHandler\SourceHandler; |
||
12 | use Sepia\PoParser\SourceHandler\StringSource; |
||
13 | |||
14 | /** |
||
15 | * Copyright (c) 2012 Raúl Ferràs [email protected] |
||
16 | * All rights reserved. |
||
17 | * |
||
18 | * Redistribution and use in source and binary forms, with or without |
||
19 | * modification, are permitted provided that the following conditions |
||
20 | * are met: |
||
21 | * 1. Redistributions of source code must retain the above copyright |
||
22 | * notice, this list of conditions and the following disclaimer. |
||
23 | * 2. Redistributions in binary form must reproduce the above copyright |
||
24 | * notice, this list of conditions and the following disclaimer in the |
||
25 | * documentation and/or other materials provided with the distribution. |
||
26 | * 3. Neither the name of copyright holders nor the names of its |
||
27 | * contributors may be used to endorse or promote products derived |
||
28 | * from this software without specific prior written permission. |
||
29 | * |
||
30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||
31 | * ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
||
32 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
||
33 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS |
||
34 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
||
35 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
||
36 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
||
37 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||
38 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||
39 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||
40 | * POSSIBILITY OF SUCH DAMAGE. |
||
41 | * |
||
42 | * https://github.com/raulferras/PHP-po-parser |
||
43 | * |
||
44 | * Class to parse .po file and extract its strings. |
||
45 | * |
||
46 | * @version 5.0 |
||
47 | */ |
||
48 | class Parser |
||
49 | { |
||
50 | /** @var SourceHandler */ |
||
51 | protected $sourceHandler; |
||
52 | |||
53 | /** @var int */ |
||
54 | protected $lineNumber; |
||
55 | |||
56 | /** @var string */ |
||
57 | protected $property; |
||
58 | |||
59 | /** |
||
60 | * Reads and parses a string |
||
61 | * |
||
62 | * @param string $string po content |
||
63 | * |
||
64 | * @throws \Exception. |
||
65 | * @return Catalog |
||
66 | */ |
||
67 | public static function parseString($string) |
||
68 | { |
||
69 | $parser = new Parser(new StringSource($string)); |
||
70 | |||
71 | return $parser->parse(); |
||
72 | } |
||
73 | |||
74 | /** |
||
75 | * Reads and parses a file |
||
76 | * |
||
77 | * @param string $filePath |
||
78 | * |
||
79 | * @throws \Exception. |
||
80 | * @return Catalog |
||
81 | */ |
||
82 | public static function parseFile($filePath) |
||
83 | { |
||
84 | $parser = new Parser(new FileSystem($filePath)); |
||
85 | |||
86 | return $parser->parse(); |
||
87 | } |
||
88 | |||
89 | public function __construct(SourceHandler $sourceHandler) |
||
90 | { |
||
91 | $this->sourceHandler = $sourceHandler; |
||
92 | } |
||
93 | |||
94 | /** |
||
95 | * Reads and parses strings of a .po file. |
||
96 | * |
||
97 | * @param SourceHandler . Optional |
||
98 | * |
||
99 | * @throws \Exception, \InvalidArgumentException, ParseException |
||
100 | * @return Catalog |
||
101 | */ |
||
102 | public function parse(Catalog $catalog = null) |
||
103 | { |
||
104 | $catalog = $catalog === null ? new CatalogArray() : $catalog; |
||
105 | $this->lineNumber = 0; |
||
106 | $entry = array(); |
||
107 | $this->property = null; // current property |
||
108 | |||
109 | // Flags |
||
110 | $headersFound = false; |
||
111 | |||
112 | while (!$this->sourceHandler->ended()) { |
||
113 | $line = \trim($this->sourceHandler->getNextLine()); |
||
114 | |||
115 | if ($this->shouldIgnoreLine($line, $entry)) { |
||
116 | $this->lineNumber++; |
||
117 | continue; |
||
118 | } |
||
119 | |||
120 | if ($this->shouldCloseEntry($line, $entry)) { |
||
121 | if (!$headersFound && $this->isHeader($entry)) { |
||
122 | $headersFound = true; |
||
123 | $catalog->addHeaders( |
||
124 | $this->parseHeaders($entry['msgstr']) |
||
125 | ); |
||
126 | } else { |
||
127 | $catalog->addEntry(EntryFactory::createFromArray($entry)); |
||
128 | } |
||
129 | |||
130 | $entry = array(); |
||
131 | $this->property = null; |
||
132 | |||
133 | if (empty($line)) { |
||
134 | $this->lineNumber++; |
||
135 | continue; |
||
136 | } |
||
137 | } |
||
138 | |||
139 | $entry = $this->parseLine($line, $entry); |
||
140 | |||
141 | $this->lineNumber++; |
||
142 | continue; |
||
143 | } |
||
144 | $this->sourceHandler->close(); |
||
145 | |||
146 | // add final entry |
||
147 | if (\count($entry)) { |
||
148 | if ($this->isHeader($entry)) { |
||
149 | $catalog->addHeaders( |
||
150 | $this->parseHeaders($entry['msgstr']) |
||
151 | ); |
||
152 | } else { |
||
153 | $catalog->addEntry(EntryFactory::createFromArray($entry)); |
||
154 | } |
||
155 | } |
||
156 | |||
157 | return $catalog; |
||
158 | } |
||
159 | |||
160 | /** |
||
161 | * @param string $line |
||
162 | * @param array $entry |
||
163 | * |
||
164 | * @return array |
||
165 | * @throws ParseException |
||
166 | */ |
||
167 | protected function parseLine($line, $entry) |
||
168 | { |
||
169 | $firstChar = \strlen($line) > 0 ? $line[0] : ''; |
||
170 | |||
171 | switch ($firstChar) { |
||
172 | case '#': |
||
173 | $entry = $this->parseComment($line, $entry); |
||
174 | break; |
||
175 | |||
176 | case 'm': |
||
177 | $entry = $this->parseProperty($line, $entry); |
||
178 | break; |
||
179 | |||
180 | case '"': |
||
181 | $entry = $this->parseMultiline($line, $entry); |
||
182 | break; |
||
183 | } |
||
184 | |||
185 | return $entry; |
||
186 | } |
||
187 | |||
188 | /** |
||
189 | * @param string $line |
||
190 | * @param array $entry |
||
191 | * |
||
192 | * @return array |
||
193 | * @throws ParseException |
||
194 | */ |
||
195 | protected function parseProperty($line, array $entry) |
||
196 | { |
||
197 | list($key, $value) = $this->getProperty($line); |
||
198 | |||
199 | if (!isset($entry[$key])) { |
||
200 | $entry[$key] = ''; |
||
201 | } |
||
202 | |||
203 | switch (true) { |
||
204 | case $key === 'msgctxt': |
||
205 | case $key === 'msgid': |
||
206 | case $key === 'msgid_plural': |
||
207 | case $key === 'msgstr': |
||
208 | $entry[$key] .= $this->unquote($value); |
||
209 | $this->property = $key; |
||
210 | break; |
||
211 | |||
212 | case \strpos($key, 'msgstr[') !== false: |
||
213 | $entry[$key] .= $this->unquote($value); |
||
214 | $this->property = $key; |
||
215 | break; |
||
216 | |||
217 | default: |
||
218 | throw new ParseException(\sprintf('Could not parse %s at line %d', $key, $this->lineNumber)); |
||
219 | } |
||
220 | |||
221 | return $entry; |
||
222 | } |
||
223 | |||
224 | /** |
||
225 | * @param string $line |
||
226 | * @param array $entry |
||
227 | * |
||
228 | * @return array |
||
229 | * @throws ParseException |
||
230 | */ |
||
231 | protected function parseMultiline($line, $entry) |
||
232 | { |
||
233 | switch (true) { |
||
234 | case $this->property === 'msgctxt': |
||
235 | case $this->property === 'msgid': |
||
236 | case $this->property === 'msgid_plural': |
||
237 | case $this->property === 'msgstr': |
||
238 | case \strpos($this->property, 'msgstr[') !== false: |
||
239 | $entry[$this->property] .= $this->unquote($line); |
||
240 | break; |
||
241 | |||
242 | default: |
||
243 | throw new ParseException( |
||
244 | \sprintf('Error parsing property %s as multiline.', $this->property) |
||
245 | ); |
||
246 | } |
||
247 | |||
248 | return $entry; |
||
249 | } |
||
250 | |||
251 | /** |
||
252 | * @param string $line |
||
253 | * @param array $entry |
||
254 | * |
||
255 | * @return array |
||
256 | * @throws ParseException |
||
257 | */ |
||
258 | protected function parseComment($line, $entry) |
||
259 | { |
||
260 | $comment = \trim(\substr($line, 0, 2)); |
||
261 | |||
262 | switch ($comment) { |
||
263 | case '#,': |
||
264 | $line = \trim(\substr($line, 2)); |
||
265 | $entry['flags'] = \preg_split('/,\s*/', $line); |
||
266 | break; |
||
267 | |||
268 | case '#.': |
||
269 | $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment']; |
||
270 | $entry['ccomment'][] = \trim(\substr($line, 2)); |
||
271 | break; |
||
272 | |||
273 | |||
274 | case '#|': // Previous string |
||
275 | case '#~': // Old entry |
||
276 | case '#~|': // Previous string old |
||
277 | $mode = array( |
||
278 | '#|' => 'previous', |
||
279 | '#~' => 'obsolete', |
||
280 | '#~|' => 'previous-obsolete' |
||
281 | ); |
||
282 | |||
283 | $line = \trim(\substr($line, 2)); |
||
284 | $property = $mode[$comment]; |
||
285 | if ($property === 'previous') { |
||
286 | if (!isset($entry[$property])) { |
||
287 | $subEntry = array(); |
||
288 | } else { |
||
289 | $subEntry = $entry[$property]; |
||
290 | } |
||
291 | |||
292 | $subEntry = $this->parseLine($line, $subEntry); |
||
293 | //$subEntry = $this->parseProperty($line, $subEntry); |
||
0 ignored issues
–
show
|
|||
294 | $entry[$property] = $subEntry; |
||
295 | } else { |
||
296 | $entry = $this->parseLine($line, $entry); |
||
297 | $entry['obsolete'] = true; |
||
298 | } |
||
299 | break; |
||
300 | |||
301 | // Reference |
||
302 | case '#:': |
||
303 | $entry['reference'][] = \trim(\substr($line, 2)); |
||
304 | break; |
||
305 | |||
306 | case '#': |
||
307 | default: |
||
308 | $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment']; |
||
309 | $entry['tcomment'][] = \trim(\substr($line, 1)); |
||
310 | break; |
||
311 | } |
||
312 | |||
313 | return $entry; |
||
314 | } |
||
315 | |||
316 | /** |
||
317 | * @param string $msgstr |
||
318 | * |
||
319 | * @return Header |
||
320 | */ |
||
321 | protected function parseHeaders($msgstr) |
||
322 | { |
||
323 | $headers = \array_filter(\explode("\n", $msgstr)); |
||
324 | |||
325 | return new Header($headers); |
||
326 | } |
||
327 | |||
328 | /** |
||
329 | * @param string $line |
||
330 | * @param array $entry |
||
331 | * |
||
332 | * @return bool |
||
333 | */ |
||
334 | protected function shouldIgnoreLine($line, array $entry) |
||
335 | { |
||
336 | return empty($line) && \count($entry) === 0; |
||
337 | } |
||
338 | |||
339 | /** |
||
340 | * @param string $line |
||
341 | * @param array $entry |
||
342 | * |
||
343 | * @return bool |
||
344 | */ |
||
345 | protected function shouldCloseEntry($line, array $entry) |
||
346 | { |
||
347 | $tokens = $this->getProperty($line); |
||
348 | $property = $tokens[0]; |
||
349 | |||
350 | return ($line === '' || ($property === 'msgid' && isset($entry['msgid']))); |
||
351 | } |
||
352 | |||
353 | /** |
||
354 | * @param string $value |
||
355 | * @return string |
||
356 | */ |
||
357 | protected function unquote($value) |
||
358 | { |
||
359 | return \stripcslashes(\preg_replace('/^\"|\"$/', '', $value)); |
||
360 | } |
||
361 | |||
362 | /** |
||
363 | * Checks if entry is a header by |
||
364 | * |
||
365 | * @param array $entry |
||
366 | * |
||
367 | * @return bool |
||
368 | */ |
||
369 | protected function isHeader(array $entry) |
||
370 | { |
||
371 | if (empty($entry) || !isset($entry['msgstr'])) { |
||
372 | return false; |
||
373 | } |
||
374 | |||
375 | if (!isset($entry['msgid']) || !empty($entry['msgid'])) { |
||
376 | return false; |
||
377 | } |
||
378 | |||
379 | $standardHeaders = array( |
||
380 | 'Project-Id-Version:', |
||
381 | 'Report-Msgid-Bugs-To:', |
||
382 | 'POT-Creation-Date:', |
||
383 | 'PO-Revision-Date:', |
||
384 | 'Last-Translator:', |
||
385 | 'Language-Team:', |
||
386 | 'MIME-Version:', |
||
387 | 'Content-Type:', |
||
388 | 'Content-Transfer-Encoding:', |
||
389 | 'Plural-Forms:', |
||
390 | ); |
||
391 | |||
392 | $headers = \explode("\n", $entry['msgstr']); |
||
393 | // Remove text after double colon |
||
394 | $headers = \array_map( |
||
395 | function ($header) { |
||
396 | $pattern = '/(.*?:)(.*)/i'; |
||
397 | $replace = '${1}'; |
||
398 | return \preg_replace($pattern, $replace, $header); |
||
399 | }, |
||
400 | $headers |
||
401 | ); |
||
402 | |||
403 | if (\count(\array_intersect($standardHeaders, $headers)) > 0) { |
||
404 | return true; |
||
405 | } |
||
406 | |||
407 | // If it does not contain any of the standard headers |
||
408 | // Let's see if it contains any custom header. |
||
409 | $customHeaders = \array_filter( |
||
410 | $headers, |
||
411 | function ($header) { |
||
412 | return \preg_match('/^X\-(.*):/i', $header) === 1; |
||
413 | } |
||
414 | ); |
||
415 | |||
416 | return \count($customHeaders) > 0; |
||
417 | } |
||
418 | |||
419 | /** |
||
420 | * @param string $line |
||
421 | * |
||
422 | * @return array |
||
423 | */ |
||
424 | protected function getProperty($line) |
||
425 | { |
||
426 | $tokens = \preg_split('/\s+/ ', $line, 2); |
||
427 | |||
428 | return $tokens; |
||
429 | } |
||
430 | } |
||
431 |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.