Completed
Pull Request — master (#71)
by
unknown
01:22
created

Parser::parseMultiline()   B

Complexity

Conditions 6
Paths 6

Size

Total Lines 19
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 19
rs 8.8571
c 0
b 0
f 0
cc 6
eloc 13
nc 6
nop 2
1
<?php
2
3
namespace Sepia\PoParser;
4
5
use Sepia\PoParser\Catalog\EntryFactory;
6
use Sepia\PoParser\Catalog\Header;
7
use Sepia\PoParser\Exception\ParseException;
8
use Sepia\PoParser\SourceHandler\FileSystem;
9
use Sepia\PoParser\SourceHandler\SourceHandler;
10
use Sepia\PoParser\SourceHandler\StringSource;
11
12
/**
13
 *    Copyright (c) 2012 Raúl Ferràs [email protected]
14
 *    All rights reserved.
15
 *
16
 *    Redistribution and use in source and binary forms, with or without
17
 *    modification, are permitted provided that the following conditions
18
 *    are met:
19
 *    1. Redistributions of source code must retain the above copyright
20
 *       notice, this list of conditions and the following disclaimer.
21
 *    2. Redistributions in binary form must reproduce the above copyright
22
 *       notice, this list of conditions and the following disclaimer in the
23
 *       documentation and/or other materials provided with the distribution.
24
 *    3. Neither the name of copyright holders nor the names of its
25
 *       contributors may be used to endorse or promote products derived
26
 *       from this software without specific prior written permission.
27
 *
28
 *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29
 *    ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30
 *    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31
 *    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
32
 *    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
 *    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
 *    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
 *    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
 *    POSSIBILITY OF SUCH DAMAGE.
39
 *
40
 * https://github.com/raulferras/PHP-po-parser
41
 *
42
 * Class to parse .po file and extract its strings.
43
 *
44
 * @version 5.0
45
 */
46
class Parser
47
{
48
    /** @var SourceHandler */
49
    protected $sourceHandler;
50
51
    /** @var int */
52
    protected $lineNumber;
53
54
    /** @var string */
55
    protected $property;
56
57
    /**
58
     * Reads and parses a string
59
     *
60
     * @param string $string po content
61
     *
62
     * @throws \Exception.
63
     * @return Parser
64
     */
65
    public static function parseString($string)
66
    {
67
        $parser = new Parser(new StringSource($string));
68
        $parser->parse();
69
70
        return $parser;
71
    }
72
73
    /**
74
     * Reads and parses a file
75
     *
76
     * @param string $filePath
77
     *
78
     * @throws \Exception.
79
     * @return Catalog
80
     */
81
    public static function parseFile($filePath)
82
    {
83
        $parser = new Parser(new FileSystem($filePath));
84
85
        return $parser->parse();
86
    }
87
88
    public function __construct(SourceHandler $sourceHandler)
89
    {
90
        $this->sourceHandler = $sourceHandler;
91
    }
92
93
    /**
94
     * Reads and parses strings of a .po file.
95
     *
96
     * @param SourceHandler . Optional
97
     *
98
     * @throws \Exception, \InvalidArgumentException, ParseException
99
     * @return Catalog
100
     */
101
    public function parse()
102
    {
103
        $catalog = new Catalog();
104
        $this->lineNumber = 0;
105
        $entry = array();
106
        $this->property = null; // current property
107
108
        // Flags
109
        $headersFound = false;
110
111
        while (!$this->sourceHandler->ended()) {
112
            $line = trim($this->sourceHandler->getNextLine());
113
114
            if ($this->shouldIgnoreLine($line, $entry)) {
115
                $this->lineNumber++;
116
                continue;
117
            }
118
119
            if ($this->shouldCloseEntry($line, $entry)) {
120
                if (!$headersFound && $this->isHeader($entry)) {
121
                    $headersFound = true;
122
                    $catalog->addHeaders(
123
                        $this->parseHeaders($entry['msgstr'])
124
                    );
125
                } else {
126
                    $catalog->addEntry(EntryFactory::createFromArray($entry));
127
                }
128
129
                $entry = array();
130
                $this->property = null;
131
132
                if (empty($line)) {
133
                    $this->lineNumber++;
134
                    continue;
135
                }
136
            }
137
138
            $entry = $this->parseLine($line, $entry);
139
140
            $this->lineNumber++;
141
            continue;
142
        }
143
        $this->sourceHandler->close();
144
145
        // add final entry
146
        if (count($entry)) {
147
            if ($this->isHeader($entry)) {
148
                $catalog->addHeaders(
149
                    $this->parseHeaders($entry['msgstr'])
150
                );
151
            } else {
152
                $catalog->addEntry(EntryFactory::createFromArray($entry));
153
            }
154
        }
155
156
        return $catalog;
157
    }
158
159
    /**
160
     * @param string $line
161
     * @param array  $entry
162
     *
163
     * @return array
164
     * @throws ParseException
165
     */
166
    protected function parseLine($line, $entry)
167
    {
168
        $firstChar = strlen($line) > 0 ? $line[0] : '';
169
170
        switch ($firstChar) {
171
            case '#':
172
                $entry = $this->parseComment($line, $entry);
173
                break;
174
175
            case 'm':
176
                $entry = $this->parseProperty($line, $entry);
177
                break;
178
179
            case '"':
180
                $entry = $this->parseMultiline($line, $entry);
181
                break;
182
        }
183
184
        return $entry;
185
    }
186
187
    /**
188
     * @param string $line
189
     * @param array  $entry
190
     *
191
     * @return array
192
     * @throws ParseException
193
     */
194
    protected function parseProperty($line, array $entry)
195
    {
196
        list($key, $value) = $this->getProperty($line);
197
198
        if (!isset($entry[$key])) {
199
            $entry[$key] = '';
200
        }
201
202
        switch (true) {
203
            case $key === 'msgctxt':
204
            case $key === 'msgid':
205
            case $key === 'msgid_plural':
206
            case $key === 'msgstr':
207
                $entry[$key] .= $this->unquote($value);
208
                $this->property = $key;
209
                break;
210
211
            case strpos($key, 'msgstr[') !== false:
212
                $entry[$key] .= $this->unquote($value);
213
                $this->property = $key;
214
                break;
215
216
            default:
217
                throw new ParseException(sprintf('Could not parse %s at line %d', $key, $this->lineNumber));
218
        }
219
220
        return $entry;
221
    }
222
223
    /**
224
     * @param string $line
225
     * @param array  $entry
226
     *
227
     * @return array
228
     * @throws ParseException
229
     */
230
    protected function parseMultiline($line, $entry)
231
    {
232
        switch (true) {
233
            case $this->property === 'msgctxt':
234
            case $this->property === 'msgid':
235
            case $this->property === 'msgid_plural':
236
            case $this->property === 'msgstr':
237
            case strpos($this->property, 'msgstr[') !== false:
238
                $entry[$this->property] .= $this->unquote($line);
239
                break;
240
241
            default:
242
                throw new ParseException(
243
                    sprintf('Error parsing property %s as multiline.', $this->property)
244
                );
245
        }
246
247
        return $entry;
248
    }
249
250
    /**
251
     * @param string $line
252
     * @param array  $entry
253
     *
254
     * @return array
255
     * @throws ParseException
256
     */
257
    protected function parseComment($line, $entry)
258
    {
259
        $comment = trim(substr($line, 0, 2));
260
261
        switch ($comment) {
262
            case '#,':
263
                $line = trim(substr($line, 2));
264
                $entry['flags'] = preg_split('/,\s*/', $line);
265
                break;
266
267
            case '#.':
268
                $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment'];
269
                $entry['ccomment'][] = trim(substr($line, 2));
270
                break;
271
272
273
            case '#|':  // Previous string
274
            case '#~':  // Old entry
275
            case '#~|': // Previous string old
276
                $mode = array(
277
                    '#|' => 'previous',
278
                    '#~' => 'obsolete',
279
                    '#~|' => 'previous-obsolete'
280
                );
281
282
                $line = trim(substr($line, 2));
283
                $property = $mode[$comment];
284
                if ($property === 'previous') {
285
                    if (!isset($entry[$property])) {
286
                        $subEntry = array();
287
                    } else {
288
                        $subEntry = $entry[$property];
289
                    }
290
291
                    $subEntry = $this->parseLine($line, $subEntry);
292
                    //$subEntry = $this->parseProperty($line, $subEntry);
293
                    $entry[$property] = $subEntry;
294
                } else {
295
                    $entry = $this->parseLine($line, $entry);
296
                    $entry['obsolete'] = true;
297
                }
298
                break;
299
300
            // Reference
301
            case '#:':
302
                $entry['reference'][] = trim(substr($line, 2));
303
                break;
304
305
            case '#':
306
            default:
307
                $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment'];
308
                $entry['tcomment'][] = trim(substr($line, 1));
309
                break;
310
        }
311
312
        return $entry;
313
    }
314
315
    /**
316
     * @param string $msgstr
317
     *
318
     * @return Header
319
     */
320
    protected function parseHeaders($msgstr)
321
    {
322
        $headers = array_filter(explode('\\n', $msgstr));
323
324
        return new Header($headers);
325
    }
326
327
    /**
328
     * @param string $line
329
     * @param array  $entry
330
     *
331
     * @return bool
332
     */
333
    protected function shouldIgnoreLine($line, array $entry)
334
    {
335
        return empty($line) && count($entry) === 0;
336
    }
337
338
    /**
339
     * @param string $line
340
     * @param array  $entry
341
     *
342
     * @return bool
343
     */
344
    protected function shouldCloseEntry($line, array $entry)
345
    {
346
        $tokens = $this->getProperty($line);
347
        $property = $tokens[0];
348
349
        return ($line === '' || ($property === 'msgid' && isset($entry['msgid'])));
350
    }
351
352
    /**
353
     * @param string $value
354
     * @return string
355
     */
356
    protected function unquote($value)
357
    {
358
        return preg_replace('/^\"|\"$/', '', $value);
359
    }
360
361
    /**
362
     * Checks if entry is a header by
363
     *
364
     * @param array $entry
365
     *
366
     * @return bool
367
     */
368
    protected function isHeader(array $entry)
369
    {
370
        if (empty($entry) || !isset($entry['msgstr'])) {
371
            return false;
372
        }
373
374
        if (!isset($entry['msgid']) || !empty($entry['msgid'])) {
375
            return false;
376
        }
377
378
        $standardHeaders = array(
379
            'Project-Id-Version:',
380
            'Report-Msgid-Bugs-To:',
381
            'POT-Creation-Date:',
382
            'PO-Revision-Date:',
383
            'Last-Translator:',
384
            'Language-Team:',
385
            'MIME-Version:',
386
            'Content-Type:',
387
            'Content-Transfer-Encoding:',
388
            'Plural-Forms:',
389
        );
390
391
        $headers = explode('\n', $entry['msgstr']);
392
        // Remove text after double colon
393
        $headers = array_map(
394
            function ($header) {
395
                $pattern = '/(.*?:)(.*)/i';
396
                $replace = '${1}';
397
                return preg_replace($pattern, $replace, $header);
398
            },
399
            $headers
400
        );
401
402
        if (count(array_intersect($standardHeaders, $headers)) > 0) {
403
            return true;
404
        }
405
406
        // If it does not contain any of the standard headers
407
        // Let's see if it contains any custom header.
408
        $customHeaders = array_filter(
409
            $headers,
410
            function ($header) {
411
                return preg_match('/^X\-(.*):/i', $header) === 1;
412
            }
413
        );
414
415
        return count($customHeaders) > 0;
416
    }
417
418
    /**
419
     * @param string $line
420
     *
421
     * @return array
422
     */
423
    protected function getProperty($line)
424
    {
425
        $tokens = preg_split('/\s+/ ', $line, 2);
426
427
        return $tokens;
428
    }
429
}
430