Completed
Push — feature/catalog_interface ( 8c3abf )
by Raúl
04:01
created

Parser::parseHeaders()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 6
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
1
<?php
2
3
namespace Sepia\PoParser;
4
5
use Sepia\PoParser\Catalog\Catalog;
6
use Sepia\PoParser\Catalog\CatalogArray;
7
use Sepia\PoParser\Catalog\EntryFactory;
8
use Sepia\PoParser\Catalog\Header;
9
use Sepia\PoParser\Exception\ParseException;
10
use Sepia\PoParser\SourceHandler\FileSystem;
11
use Sepia\PoParser\SourceHandler\SourceHandler;
12
use Sepia\PoParser\SourceHandler\StringSource;
13
14
/**
15
 *    Copyright (c) 2012 Raúl Ferràs [email protected]
16
 *    All rights reserved.
17
 *
18
 *    Redistribution and use in source and binary forms, with or without
19
 *    modification, are permitted provided that the following conditions
20
 *    are met:
21
 *    1. Redistributions of source code must retain the above copyright
22
 *       notice, this list of conditions and the following disclaimer.
23
 *    2. Redistributions in binary form must reproduce the above copyright
24
 *       notice, this list of conditions and the following disclaimer in the
25
 *       documentation and/or other materials provided with the distribution.
26
 *    3. Neither the name of copyright holders nor the names of its
27
 *       contributors may be used to endorse or promote products derived
28
 *       from this software without specific prior written permission.
29
 *
30
 *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31
 *    ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32
 *    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33
 *    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
34
 *    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37
 *    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38
 *    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39
 *    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40
 *    POSSIBILITY OF SUCH DAMAGE.
41
 *
42
 * https://github.com/raulferras/PHP-po-parser
43
 *
44
 * Class to parse .po file and extract its strings.
45
 *
46
 * @version 5.0
47
 */
48
class Parser
49
{
50
    /** @var SourceHandler */
51
    protected $sourceHandler;
52
53
    /** @var int */
54
    protected $lineNumber;
55
56
    /** @var string */
57
    protected $property;
58
59
    /**
60
     * Reads and parses a string
61
     *
62
     * @param string $string po content
63
     *
64
     * @throws \Exception.
65
     * @return Parser
66
     */
67
    public static function parseString($string)
68
    {
69
        $parser = new Parser(new StringSource($string));
70
        $parser->parse();
71
72
        return $parser;
73
    }
74
75
    /**
76
     * Reads and parses a file
77
     *
78
     * @param string $filePath
79
     *
80
     * @throws \Exception.
81
     * @return Catalog
82
     */
83
    public static function parseFile($filePath)
84
    {
85
        $parser = new Parser(new FileSystem($filePath));
86
87
        return $parser->parse();
88
    }
89
90
    public function __construct(SourceHandler $sourceHandler)
91
    {
92
        $this->sourceHandler = $sourceHandler;
93
    }
94
95
    /**
96
     * Reads and parses strings of a .po file.
97
     *
98
     * @param SourceHandler . Optional
99
     *
100
     * @throws \Exception, \InvalidArgumentException, ParseException
101
     * @return Catalog
102
     */
103
    public function parse(Catalog $catalog = null)
104
    {
105
        $catalog = $catalog === null ? new CatalogArray() : $catalog;
106
        $this->lineNumber = 0;
107
        $entry = array();
108
        $this->property = null; // current property
109
110
        // Flags
111
        $headersFound = false;
112
113
        while (!$this->sourceHandler->ended()) {
114
            $line = trim($this->sourceHandler->getNextLine());
115
116
            if ($this->shouldIgnoreLine($line, $entry)) {
117
                $this->lineNumber++;
118
                continue;
119
            }
120
121
            if ($this->shouldCloseEntry($line, $entry)) {
122
                if (!$headersFound && $this->isHeader($entry)) {
123
                    $headersFound = true;
124
                    $catalog->addHeaders(
125
                        $this->parseHeaders($entry['msgstr'])
126
                    );
127
                } else {
128
                    $catalog->addEntry(EntryFactory::createFromArray($entry));
129
                }
130
131
                $entry = array();
132
                $this->property = null;
133
134
                if (empty($line)) {
135
                    $this->lineNumber++;
136
                    continue;
137
                }
138
            }
139
140
            $entry = $this->parseLine($line, $entry);
141
142
            $this->lineNumber++;
143
            continue;
144
        }
145
        $this->sourceHandler->close();
146
147
        // add final entry
148
        if (count($entry)) {
149
            if ($this->isHeader($entry)) {
150
                $catalog->addHeaders(
151
                    $this->parseHeaders($entry['msgstr'])
152
                );
153
            } else {
154
                $catalog->addEntry(EntryFactory::createFromArray($entry));
155
            }
156
        }
157
158
        return $catalog;
159
    }
160
161
    /**
162
     * @param string $line
163
     * @param array  $entry
164
     *
165
     * @return array
166
     * @throws ParseException
167
     */
168
    protected function parseLine($line, $entry)
169
    {
170
        $firstChar = strlen($line) > 0 ? $line[0] : '';
171
172
        switch ($firstChar) {
173
            case '#':
174
                $entry = $this->parseComment($line, $entry);
175
                break;
176
177
            case 'm':
178
                $entry = $this->parseProperty($line, $entry);
179
                break;
180
181
            case '"':
182
                $entry = $this->parseMultiline($line, $entry);
183
                break;
184
        }
185
186
        return $entry;
187
    }
188
189
    /**
190
     * @param string $line
191
     * @param array  $entry
192
     *
193
     * @return array
194
     * @throws ParseException
195
     */
196
    protected function parseProperty($line, array $entry)
197
    {
198
        list($key, $value) = $this->getProperty($line);
199
200
        if (!isset($entry[$key])) {
201
            $entry[$key] = '';
202
        }
203
204
        switch (true) {
205
            case $key === 'msgctxt':
206
            case $key === 'msgid':
207
            case $key === 'msgid_plural':
208
            case $key === 'msgstr':
209
                $entry[$key] .= $this->unquote($value);
210
                $this->property = $key;
211
                break;
212
213
            case strpos($key, 'msgstr[') !== false:
214
                $entry[$key] .= $this->unquote($value);
215
                $this->property = $key;
216
                break;
217
218
            default:
219
                throw new ParseException(sprintf('Could not parse %s at line %d', $key, $this->lineNumber));
220
        }
221
222
        return $entry;
223
    }
224
225
    /**
226
     * @param string $line
227
     * @param array  $entry
228
     *
229
     * @return array
230
     * @throws ParseException
231
     */
232
    protected function parseMultiline($line, $entry)
233
    {
234
        switch (true) {
235
            case $this->property === 'msgctxt':
236
            case $this->property === 'msgid':
237
            case $this->property === 'msgid_plural':
238
            case $this->property === 'msgstr':
239
            case strpos($this->property, 'msgstr[') !== false:
240
                $entry[$this->property] .= $this->unquote($line);
241
                break;
242
243
            default:
244
                throw new ParseException(
245
                    sprintf('Error parsing property %s as multiline.', $this->property)
246
                );
247
        }
248
249
        return $entry;
250
    }
251
252
    /**
253
     * @param string $line
254
     * @param array  $entry
255
     *
256
     * @return array
257
     * @throws ParseException
258
     */
259
    protected function parseComment($line, $entry)
260
    {
261
        $comment = trim(substr($line, 0, 2));
262
263
        switch ($comment) {
264
            case '#,':
265
                $line = trim(substr($line, 2));
266
                $entry['flags'] = preg_split('/,\s*/', $line);
267
                break;
268
269
            case '#.':
270
                $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment'];
271
                $entry['ccomment'][] = trim(substr($line, 2));
272
                break;
273
274
275
            case '#|':  // Previous string
276
            case '#~':  // Old entry
277
            case '#~|': // Previous string old
278
                $mode = array(
279
                    '#|' => 'previous',
280
                    '#~' => 'obsolete',
281
                    '#~|' => 'previous-obsolete'
282
                );
283
284
                $line = trim(substr($line, 2));
285
                $property = $mode[$comment];
286
                if ($property === 'previous') {
287
                    if (!isset($entry[$property])) {
288
                        $subEntry = array();
289
                    } else {
290
                        $subEntry = $entry[$property];
291
                    }
292
293
                    $subEntry = $this->parseLine($line, $subEntry);
294
                    //$subEntry = $this->parseProperty($line, $subEntry);
295
                    $entry[$property] = $subEntry;
296
                } else {
297
                    $entry = $this->parseLine($line, $entry);
298
                    $entry['obsolete'] = true;
299
                }
300
                break;
301
302
            // Reference
303
            case '#:':
304
                $entry['reference'][] = trim(substr($line, 2));
305
                break;
306
307
            case '#':
308
            default:
309
                $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment'];
310
                $entry['tcomment'][] = trim(substr($line, 1));
311
                break;
312
        }
313
314
        return $entry;
315
    }
316
317
    /**
318
     * @param string $msgstr
319
     *
320
     * @return Header
321
     */
322
    protected function parseHeaders($msgstr)
323
    {
324
        $headers = array_filter(explode('\\n', $msgstr));
325
326
        return new Header($headers);
327
    }
328
329
    /**
330
     * @param string $line
331
     * @param array  $entry
332
     *
333
     * @return bool
334
     */
335
    protected function shouldIgnoreLine($line, array $entry)
336
    {
337
        return empty($line) && count($entry) === 0;
338
    }
339
340
    /**
341
     * @param string $line
342
     * @param array  $entry
343
     *
344
     * @return bool
345
     */
346
    protected function shouldCloseEntry($line, array $entry)
347
    {
348
        $tokens = $this->getProperty($line);
349
        $property = $tokens[0];
350
351
        return ($line === '' || ($property === 'msgid' && isset($entry['msgid'])));
352
    }
353
354
    /**
355
     * @param string $value
356
     * @return string
357
     */
358
    protected function unquote($value)
359
    {
360
        return preg_replace('/^\"|\"$/', '', $value);
361
    }
362
363
    /**
364
     * Checks if entry is a header by
365
     *
366
     * @param array $entry
367
     *
368
     * @return bool
369
     */
370
    protected function isHeader(array $entry)
371
    {
372
        if (empty($entry) || !isset($entry['msgstr'])) {
373
            return false;
374
        }
375
376
        if (!isset($entry['msgid']) || !empty($entry['msgid'])) {
377
            return false;
378
        }
379
380
        $standardHeaders = array(
381
            'Project-Id-Version:',
382
            'Report-Msgid-Bugs-To:',
383
            'POT-Creation-Date:',
384
            'PO-Revision-Date:',
385
            'Last-Translator:',
386
            'Language-Team:',
387
            'MIME-Version:',
388
            'Content-Type:',
389
            'Content-Transfer-Encoding:',
390
            'Plural-Forms:',
391
        );
392
393
        $headers = explode('\n', $entry['msgstr']);
394
        // Remove text after double colon
395
        $headers = array_map(
396
            function ($header) {
397
                $pattern = '/(.*?:)(.*)/i';
398
                $replace = '${1}';
399
                return preg_replace($pattern, $replace, $header);
400
            },
401
            $headers
402
        );
403
404
        if (count(array_intersect($standardHeaders, $headers)) > 0) {
405
            return true;
406
        }
407
408
        // If it does not contain any of the standard headers
409
        // Let's see if it contains any custom header.
410
        $customHeaders = array_filter(
411
            $headers,
412
            function ($header) {
413
                return preg_match('/^X\-(.*):/i', $header) === 1;
414
            }
415
        );
416
417
        return count($customHeaders) > 0;
418
    }
419
420
    /**
421
     * @param string $line
422
     *
423
     * @return array
424
     */
425
    protected function getProperty($line)
426
    {
427
        $tokens = preg_split('/\s+/ ', $line, 2);
428
429
        return $tokens;
430
    }
431
}
432