Completed
Pull Request — master (#69)
by Raúl
02:27 queued 01:13
created

Parser::isHeader()   B

Complexity

Conditions 6
Paths 4

Size

Total Lines 49
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 49
rs 8.5906
c 0
b 0
f 0
cc 6
eloc 30
nc 4
nop 1
1
<?php
2
3
namespace Sepia\PoParser;
4
5
use Sepia\PoParser\Catalog\EntryFactory;
6
use Sepia\PoParser\Exception\ParseException;
7
use Sepia\PoParser\SourceHandler\FileSystem;
8
use Sepia\PoParser\SourceHandler\SourceHandler;
9
use Sepia\PoParser\SourceHandler\StringSource;
10
11
/**
12
 *    Copyright (c) 2012 Raúl Ferràs [email protected]
13
 *    All rights reserved.
14
 *
15
 *    Redistribution and use in source and binary forms, with or without
16
 *    modification, are permitted provided that the following conditions
17
 *    are met:
18
 *    1. Redistributions of source code must retain the above copyright
19
 *       notice, this list of conditions and the following disclaimer.
20
 *    2. Redistributions in binary form must reproduce the above copyright
21
 *       notice, this list of conditions and the following disclaimer in the
22
 *       documentation and/or other materials provided with the distribution.
23
 *    3. Neither the name of copyright holders nor the names of its
24
 *       contributors may be used to endorse or promote products derived
25
 *       from this software without specific prior written permission.
26
 *
27
 *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28
 *    ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29
 *    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30
 *    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
31
 *    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
 *    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
 *    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
 *    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
 *    POSSIBILITY OF SUCH DAMAGE.
38
 *
39
 * https://github.com/raulferras/PHP-po-parser
40
 *
41
 * Class to parse .po file and extract its strings.
42
 *
43
 * @version 5.0
44
 */
45
class Parser
46
{
47
    /** @var SourceHandler */
48
    protected $sourceHandler;
49
50
    /** @var int */
51
    protected $lineNumber;
52
53
    /** @var string */
54
    protected $property;
55
56
    /**
57
     * Reads and parses a string
58
     *
59
     * @param string $string po content
60
     *
61
     * @throws \Exception.
62
     * @return Parser
63
     */
64
    public static function parseString($string)
65
    {
66
        $parser = new Parser(new StringSource($string));
67
        $parser->parse();
68
69
        return $parser;
70
    }
71
72
    /**
73
     * Reads and parses a file
74
     *
75
     * @param string $filePath
76
     *
77
     * @throws \Exception.
78
     * @return Catalog
79
     */
80
    public static function parseFile($filePath)
81
    {
82
        $parser = new Parser(new FileSystem($filePath));
83
84
        return $parser->parse();
85
    }
86
87
    public function __construct(SourceHandler $sourceHandler)
88
    {
89
        $this->sourceHandler = $sourceHandler;
90
    }
91
92
    /**
93
     * Reads and parses strings of a .po file.
94
     *
95
     * @param SourceHandler . Optional
96
     *
97
     * @throws \Exception, \InvalidArgumentException, ParseException
98
     * @return Catalog
99
     */
100
    public function parse()
101
    {
102
        $catalog = new Catalog();
103
        $this->lineNumber = 0;
104
        $entry = array();
105
        $this->property = null; // current property
106
107
        // Flags
108
        $headersFound = false;
109
110
        while (!$this->sourceHandler->ended()) {
111
            $line = trim($this->sourceHandler->getNextLine());
112
113
            if ($this->shouldIgnoreLine($line, $entry)) {
114
                $this->lineNumber++;
115
                continue;
116
            }
117
118
            if ($this->shouldCloseEntry($line, $entry)) {
119
                if (!$headersFound && $this->isHeader($entry)) {
120
                    $headersFound = true;
121
                    $catalog->addHeaders(
122
                        $this->parseHeaders($entry['msgstr'])
123
                    );
124
                } else {
125
                    $catalog->addEntry(EntryFactory::createFromArray($entry));
126
                }
127
128
                $entry = array();
129
                $this->property = null;
130
131
                if (empty($line)) {
132
                    $this->lineNumber++;
133
                    continue;
134
                }
135
            }
136
137
            $entry = $this->parseLine($line, $entry);
138
139
            $this->lineNumber++;
140
            continue;
141
        }
142
        $this->sourceHandler->close();
143
144
        // add final entry
145
        if (count($entry)) {
146
            if ($this->isHeader($entry)) {
147
                $catalog->addHeaders(
148
                    $this->parseHeaders($entry['msgstr'])
149
                );
150
            } else {
151
                $catalog->addEntry(EntryFactory::createFromArray($entry));
152
            }
153
        }
154
155
        return $catalog;
156
    }
157
158
    /**
159
     * @param string $line
160
     * @param array  $entry
161
     *
162
     * @return array
163
     * @throws ParseException
164
     */
165
    protected function parseLine($line, $entry)
166
    {
167
        $firstChar = strlen($line) > 0 ? $line[0] : '';
168
169
        switch ($firstChar) {
170
            case '#':
171
                $entry = $this->parseComment($line, $entry);
172
                break;
173
174
            case 'm':
175
                $entry = $this->parseProperty($line, $entry);
176
                break;
177
178
            case '"':
179
                $entry = $this->parseMultiline($line, $entry);
180
                break;
181
        }
182
183
        return $entry;
184
    }
185
186
    /**
187
     * @param string $line
188
     * @param array  $entry
189
     *
190
     * @return array
191
     * @throws ParseException
192
     */
193
    protected function parseProperty($line, array $entry)
194
    {
195
        list($key, $value) = $this->getProperty($line);
196
197
        if (!isset($entry[$key])) {
198
            $entry[$key] = '';
199
        }
200
201
        switch (true) {
202
            case $key === 'msgctxt':
203
            case $key === 'msgid':
204
            case $key === 'msgid_plural':
205
            case $key === 'msgstr':
206
                $entry[$key] .= $this->unquote($value);
207
                $this->property = $key;
208
                break;
209
210
            case strpos($key, 'msgstr[') !== false:
211
                $entry[$key] .= $this->unquote($value);
212
                $this->property = $key;
213
                break;
214
215
            default:
216
                throw new ParseException(sprintf('Could not parse %s at line %d', $key, $this->lineNumber));
217
        }
218
219
        return $entry;
220
    }
221
222
    /**
223
     * @param string $line
224
     * @param array  $entry
225
     *
226
     * @return array
227
     * @throws ParseException
228
     */
229
    protected function parseMultiline($line, $entry)
230
    {
231
        switch (true) {
232
            case $this->property === 'msgctxt':
233
            case $this->property === 'msgid':
234
            case $this->property === 'msgid_plural':
235
            case $this->property === 'msgstr':
236
            case strpos($this->property, 'msgstr[') !== false:
237
                $entry[$this->property] .= $this->unquote($line);
238
                break;
239
240
            default:
241
                throw new ParseException(
242
                    sprintf('Error parsing property %s as multiline.', $this->property)
243
                );
244
        }
245
246
        return $entry;
247
    }
248
249
    /**
250
     * @param string $line
251
     * @param array  $entry
252
     *
253
     * @return array
254
     * @throws ParseException
255
     */
256
    protected function parseComment($line, $entry)
257
    {
258
        $comment = trim(substr($line, 0, 2));
259
260
        switch ($comment) {
261
            case '#,':
262
                $line = trim(substr($line, 2));
263
                $entry['flags'] = preg_split('/,\s*/', $line);
264
                break;
265
266
            case '#.':
267
                $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment'];
268
                $entry['ccomment'][] = trim(substr($line, 2));
269
                break;
270
271
272
            case '#|':  // Previous string
273
            case '#~':  // Old entry
274
            case '#~|': // Previous string old
275
                $mode = array(
276
                    '#|' => 'previous',
277
                    '#~' => 'obsolete',
278
                    '#~|' => 'previous-obsolete'
279
                );
280
281
                $line = trim(substr($line, 2));
282
                $property = $mode[$comment];
283
                if ($property === 'previous') {
284
                    if (!isset($entry[$property])) {
285
                        $subEntry = array();
286
                    } else {
287
                        $subEntry = $entry[$property];
288
                    }
289
290
                    $subEntry = $this->parseLine($line, $subEntry);
291
                    //$subEntry = $this->parseProperty($line, $subEntry);
292
                    $entry[$property] = $subEntry;
293
                } else {
294
                    $entry = $this->parseLine($line, $entry);
295
                    $entry['obsolete'] = true;
296
                }
297
                break;
298
299
            // Reference
300
            case '#:':
301
                $entry['reference'][] = trim(substr($line, 2));
302
                break;
303
304
            case '#':
305
            default:
306
                $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment'];
307
                $entry['tcomment'][] = trim(substr($line, 1));
308
                break;
309
        }
310
311
        return $entry;
312
    }
313
314
    /**
315
     * @param string $msgstr
316
     *
317
     * @return array
318
     */
319
    protected function parseHeaders($msgstr)
320
    {
321
        $headers = array_filter(explode('\\n', $msgstr));
322
323
        return $headers;
324
    }
325
326
    /**
327
     * @param string $line
328
     * @param array  $entry
329
     *
330
     * @return bool
331
     */
332
    protected function shouldIgnoreLine($line, array $entry)
333
    {
334
        return empty($line) && count($entry) === 0;
335
    }
336
337
    /**
338
     * @param string $line
339
     * @param array  $entry
340
     *
341
     * @return bool
342
     */
343
    protected function shouldCloseEntry($line, array $entry)
344
    {
345
        $tokens = $this->getProperty($line);
346
        $property = $tokens[0];
347
348
        return ($line === '' || ($property === 'msgid' && isset($entry['msgid'])));
349
    }
350
351
    /**
352
     * @param string $value
353
     * @return string
354
     */
355
    protected function unquote($value)
356
    {
357
        return preg_replace('/^\"|\"$/', '', $value);
358
    }
359
360
    /**
361
     * Checks if entry is a header by
362
     *
363
     * @param array $entry
364
     *
365
     * @return bool
366
     */
367
    protected function isHeader(array $entry)
368
    {
369
        if (empty($entry) || !isset($entry['msgstr'])) {
370
            return false;
371
        }
372
373
        if (!isset($entry['msgid']) || !empty($entry['msgid'])) {
374
            return false;
375
        }
376
377
        $standardHeaders = array(
378
            'Project-Id-Version:',
379
            'Report-Msgid-Bugs-To:',
380
            'POT-Creation-Date:',
381
            'PO-Revision-Date:',
382
            'Last-Translator:',
383
            'Language-Team:',
384
            'MIME-Version:',
385
            'Content-Type:',
386
            'Content-Transfer-Encoding:',
387
            'Plural-Forms:',
388
        );
389
390
        $headers = explode('\n', $entry['msgstr']);
391
        // Remove text after double colon
392
        $headers = array_map(
393
            function ($header) {
394
                $pattern = '/(.*?:)(.*)/i';
395
                $replace = '${1}';
396
                return preg_replace($pattern, $replace, $header);
397
            },
398
            $headers
399
        );
400
401
        if (count(array_intersect($standardHeaders, $headers)) > 0) {
402
            return true;
403
        }
404
405
        // If it does not contain any of the standard headers
406
        // Let's see if it contains any custom header.
407
        $customHeaders = array_filter(
408
            $headers,
409
            function ($header) {
410
                return preg_match('/^X\-(.*):/i', $header) === 1;
411
            }
412
        );
413
414
        return count($customHeaders) > 0;
415
    }
416
417
    /**
418
     * @param string $line
419
     *
420
     * @return array
421
     */
422
    protected function getProperty($line)
423
    {
424
        $tokens = preg_split('/\s+/ ', $line, 2);
425
426
        return $tokens;
427
    }
428
}
429