Issues (10)

src/Parser.php (1 issue)

1
<?php
2
3
namespace Sepia\PoParser;
4
5
use Sepia\PoParser\Catalog\Catalog;
6
use Sepia\PoParser\Catalog\CatalogArray;
7
use Sepia\PoParser\Catalog\EntryFactory;
8
use Sepia\PoParser\Catalog\Header;
9
use Sepia\PoParser\Exception\ParseException;
10
use Sepia\PoParser\SourceHandler\FileSystem;
11
use Sepia\PoParser\SourceHandler\SourceHandler;
12
use Sepia\PoParser\SourceHandler\StringSource;
13
14
/**
15
 *    Copyright (c) 2012 Raúl Ferràs [email protected]
16
 *    All rights reserved.
17
 *
18
 *    Redistribution and use in source and binary forms, with or without
19
 *    modification, are permitted provided that the following conditions
20
 *    are met:
21
 *    1. Redistributions of source code must retain the above copyright
22
 *       notice, this list of conditions and the following disclaimer.
23
 *    2. Redistributions in binary form must reproduce the above copyright
24
 *       notice, this list of conditions and the following disclaimer in the
25
 *       documentation and/or other materials provided with the distribution.
26
 *    3. Neither the name of copyright holders nor the names of its
27
 *       contributors may be used to endorse or promote products derived
28
 *       from this software without specific prior written permission.
29
 *
30
 *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31
 *    ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32
 *    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33
 *    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
34
 *    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37
 *    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38
 *    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39
 *    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40
 *    POSSIBILITY OF SUCH DAMAGE.
41
 *
42
 * https://github.com/raulferras/PHP-po-parser
43
 *
44
 * Class to parse .po file and extract its strings.
45
 *
46
 * @version 5.0
47
 */
48
class Parser
49
{
50
    /** @var SourceHandler */
51
    protected $sourceHandler;
52
53
    /** @var int */
54
    protected $lineNumber;
55
56
    /** @var string */
57
    protected $property;
58
59
    /**
60
     * Reads and parses a string
61
     *
62
     * @param string $string po content
63
     *
64
     * @throws \Exception.
65
     * @return Catalog
66
     */
67
    public static function parseString($string)
68
    {
69
        $parser = new Parser(new StringSource($string));
70
71
        return $parser->parse();
72
    }
73
74
    /**
75
     * Reads and parses a file
76
     *
77
     * @param string $filePath
78
     *
79
     * @throws \Exception.
80
     * @return Catalog
81
     */
82
    public static function parseFile($filePath)
83
    {
84
        $parser = new Parser(new FileSystem($filePath));
85
86
        return $parser->parse();
87
    }
88
89
    public function __construct(SourceHandler $sourceHandler)
90
    {
91
        $this->sourceHandler = $sourceHandler;
92
    }
93
94
    /**
95
     * Reads and parses strings of a .po file.
96
     *
97
     * @param SourceHandler . Optional
98
     *
99
     * @throws \Exception, \InvalidArgumentException, ParseException
100
     * @return Catalog
101
     */
102
    public function parse(Catalog $catalog = null)
103
    {
104
        $catalog = $catalog === null ? new CatalogArray() : $catalog;
105
        $this->lineNumber = 0;
106
        $entry = array();
107
        $this->property = null; // current property
108
109
        // Flags
110
        $headersFound = false;
111
112
        while (!$this->sourceHandler->ended()) {
113
            $line = \trim($this->sourceHandler->getNextLine());
114
115
            if ($this->shouldIgnoreLine($line, $entry)) {
116
                $this->lineNumber++;
117
                continue;
118
            }
119
120
            if ($this->shouldCloseEntry($line, $entry)) {
121
                if (!$headersFound && $this->isHeader($entry)) {
122
                    $headersFound = true;
123
                    $catalog->addHeaders(
124
                        $this->parseHeaders($entry['msgstr'])
125
                    );
126
                } else {
127
                    $catalog->addEntry(EntryFactory::createFromArray($entry));
128
                }
129
130
                $entry = array();
131
                $this->property = null;
132
133
                if (empty($line)) {
134
                    $this->lineNumber++;
135
                    continue;
136
                }
137
            }
138
139
            $entry = $this->parseLine($line, $entry);
140
141
            $this->lineNumber++;
142
            continue;
143
        }
144
        $this->sourceHandler->close();
145
146
        // add final entry
147
        if (\count($entry)) {
148
            if ($this->isHeader($entry)) {
149
                $catalog->addHeaders(
150
                    $this->parseHeaders($entry['msgstr'])
151
                );
152
            } else {
153
                $catalog->addEntry(EntryFactory::createFromArray($entry));
154
            }
155
        }
156
157
        return $catalog;
158
    }
159
160
    /**
161
     * @param string $line
162
     * @param array  $entry
163
     *
164
     * @return array
165
     * @throws ParseException
166
     */
167
    protected function parseLine($line, $entry)
168
    {
169
        $firstChar = \strlen($line) > 0 ? $line[0] : '';
170
171
        switch ($firstChar) {
172
            case '#':
173
                $entry = $this->parseComment($line, $entry);
174
                break;
175
176
            case 'm':
177
                $entry = $this->parseProperty($line, $entry);
178
                break;
179
180
            case '"':
181
                $entry = $this->parseMultiline($line, $entry);
182
                break;
183
        }
184
185
        return $entry;
186
    }
187
188
    /**
189
     * @param string $line
190
     * @param array  $entry
191
     *
192
     * @return array
193
     * @throws ParseException
194
     */
195
    protected function parseProperty($line, array $entry)
196
    {
197
        list($key, $value) = $this->getProperty($line);
198
199
        if (!isset($entry[$key])) {
200
            $entry[$key] = '';
201
        }
202
203
        switch (true) {
204
            case $key === 'msgctxt':
205
            case $key === 'msgid':
206
            case $key === 'msgid_plural':
207
            case $key === 'msgstr':
208
                $entry[$key] .= $this->unquote($value);
209
                $this->property = $key;
210
                break;
211
212
            case \strpos($key, 'msgstr[') !== false:
213
                $entry[$key] .= $this->unquote($value);
214
                $this->property = $key;
215
                break;
216
217
            default:
218
                throw new ParseException(\sprintf('Could not parse %s at line %d', $key, $this->lineNumber));
219
        }
220
221
        return $entry;
222
    }
223
224
    /**
225
     * @param string $line
226
     * @param array  $entry
227
     *
228
     * @return array
229
     * @throws ParseException
230
     */
231
    protected function parseMultiline($line, $entry)
232
    {
233
        switch (true) {
234
            case $this->property === 'msgctxt':
235
            case $this->property === 'msgid':
236
            case $this->property === 'msgid_plural':
237
            case $this->property === 'msgstr':
238
            case \strpos($this->property, 'msgstr[') !== false:
239
                $entry[$this->property] .= $this->unquote($line);
240
                break;
241
242
            default:
243
                throw new ParseException(
244
                    \sprintf('Error parsing property %s as multiline.', $this->property)
245
                );
246
        }
247
248
        return $entry;
249
    }
250
251
    /**
252
     * @param string $line
253
     * @param array  $entry
254
     *
255
     * @return array
256
     * @throws ParseException
257
     */
258
    protected function parseComment($line, $entry)
259
    {
260
        $comment = \trim(\substr($line, 0, 2));
261
262
        switch ($comment) {
263
            case '#,':
264
                $line = \trim(\substr($line, 2));
265
                $entry['flags'] = \preg_split('/,\s*/', $line);
266
                break;
267
268
            case '#.':
269
                $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment'];
270
                $entry['ccomment'][] = \trim(\substr($line, 2));
271
                break;
272
273
274
            case '#|':  // Previous string
275
            case '#~':  // Old entry
276
            case '#~|': // Previous string old
277
                $mode = array(
278
                    '#|' => 'previous',
279
                    '#~' => 'obsolete',
280
                    '#~|' => 'previous-obsolete'
281
                );
282
283
                $line = \trim(\substr($line, 2));
284
                $property = $mode[$comment];
285
                if ($property === 'previous') {
286
                    if (!isset($entry[$property])) {
287
                        $subEntry = array();
288
                    } else {
289
                        $subEntry = $entry[$property];
290
                    }
291
292
                    $subEntry = $this->parseLine($line, $subEntry);
293
                    //$subEntry = $this->parseProperty($line, $subEntry);
0 ignored issues
show
Unused Code Comprehensibility introduced by
65% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
294
                    $entry[$property] = $subEntry;
295
                } else {
296
                    $entry = $this->parseLine($line, $entry);
297
                    $entry['obsolete'] = true;
298
                }
299
                break;
300
301
            // Reference
302
            case '#:':
303
                $entry['reference'][] = \trim(\substr($line, 2));
304
                break;
305
306
            case '#':
307
            default:
308
                $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment'];
309
                $entry['tcomment'][] = \trim(\substr($line, 1));
310
                break;
311
        }
312
313
        return $entry;
314
    }
315
316
    /**
317
     * @param string $msgstr
318
     *
319
     * @return Header
320
     */
321
    protected function parseHeaders($msgstr)
322
    {
323
        $headers = \array_filter(\explode("\n", $msgstr));
324
325
        return new Header($headers);
326
    }
327
328
    /**
329
     * @param string $line
330
     * @param array  $entry
331
     *
332
     * @return bool
333
     */
334
    protected function shouldIgnoreLine($line, array $entry)
335
    {
336
        return empty($line) && \count($entry) === 0;
337
    }
338
339
    /**
340
     * @param string $line
341
     * @param array  $entry
342
     *
343
     * @return bool
344
     */
345
    protected function shouldCloseEntry($line, array $entry)
346
    {
347
        $tokens = $this->getProperty($line);
348
        $property = $tokens[0];
349
350
        return ($line === '' || ($property === 'msgid' && isset($entry['msgid'])));
351
    }
352
353
    /**
354
     * @param string $value
355
     * @return string
356
     */
357
    protected function unquote($value)
358
    {
359
        return \stripcslashes(\preg_replace('/^\"|\"$/', '', $value));
360
    }
361
362
    /**
363
     * Checks if entry is a header by
364
     *
365
     * @param array $entry
366
     *
367
     * @return bool
368
     */
369
    protected function isHeader(array $entry)
370
    {
371
        if (empty($entry) || !isset($entry['msgstr'])) {
372
            return false;
373
        }
374
375
        if (!isset($entry['msgid']) || !empty($entry['msgid'])) {
376
            return false;
377
        }
378
379
        $standardHeaders = array(
380
            'Project-Id-Version:',
381
            'Report-Msgid-Bugs-To:',
382
            'POT-Creation-Date:',
383
            'PO-Revision-Date:',
384
            'Last-Translator:',
385
            'Language-Team:',
386
            'MIME-Version:',
387
            'Content-Type:',
388
            'Content-Transfer-Encoding:',
389
            'Plural-Forms:',
390
        );
391
392
        $headers = \explode("\n", $entry['msgstr']);
393
        // Remove text after double colon
394
        $headers = \array_map(
395
            function ($header) {
396
                $pattern = '/(.*?:)(.*)/i';
397
                $replace = '${1}';
398
                return \preg_replace($pattern, $replace, $header);
399
            },
400
            $headers
401
        );
402
403
        if (\count(\array_intersect($standardHeaders, $headers)) > 0) {
404
            return true;
405
        }
406
407
        // If it does not contain any of the standard headers
408
        // Let's see if it contains any custom header.
409
        $customHeaders = \array_filter(
410
            $headers,
411
            function ($header) {
412
                return \preg_match('/^X\-(.*):/i', $header) === 1;
413
            }
414
        );
415
416
        return \count($customHeaders) > 0;
417
    }
418
419
    /**
420
     * @param string $line
421
     *
422
     * @return array
423
     */
424
    protected function getProperty($line)
425
    {
426
        $tokens = \preg_split('/\s+/ ', $line, 2);
427
428
        return $tokens;
429
    }
430
}
431