Completed
Pull Request — master (#68)
by Raúl
02:32 queued 01:17
created

Parser::isHeader()   B

Complexity

Conditions 5
Paths 4

Size

Total Lines 37
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 37
rs 8.439
c 0
b 0
f 0
cc 5
eloc 19
nc 4
nop 1
1
<?php
2
3
namespace Sepia\PoParser;
4
5
use Sepia\PoParser\Catalog\EntryFactory;
6
use Sepia\PoParser\SourceHandler\FileSystem;
7
use Sepia\PoParser\SourceHandler\SourceHandler;
8
use Sepia\PoParser\SourceHandler\StringSource;
9
10
/**
11
 *    Copyright (c) 2012 Raúl Ferràs [email protected]
12
 *    All rights reserved.
13
 *
14
 *    Redistribution and use in source and binary forms, with or without
15
 *    modification, are permitted provided that the following conditions
16
 *    are met:
17
 *    1. Redistributions of source code must retain the above copyright
18
 *       notice, this list of conditions and the following disclaimer.
19
 *    2. Redistributions in binary form must reproduce the above copyright
20
 *       notice, this list of conditions and the following disclaimer in the
21
 *       documentation and/or other materials provided with the distribution.
22
 *    3. Neither the name of copyright holders nor the names of its
23
 *       contributors may be used to endorse or promote products derived
24
 *       from this software without specific prior written permission.
25
 *
26
 *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27
 *    ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28
 *    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
 *    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
30
 *    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
 *    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
 *    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
 *    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
 *    POSSIBILITY OF SUCH DAMAGE.
37
 *
38
 * https://github.com/raulferras/PHP-po-parser
39
 *
40
 * Class to parse .po file and extract its strings.
41
 *
42
 * @version 5.0
43
 */
44
class Parser
45
{
46
    /** @var SourceHandler */
47
    protected $sourceHandler;
48
49
    /**
50
     * Reads and parses a string
51
     *
52
     * @param string $string po content
53
     *
54
     * @throws \Exception.
55
     * @return Parser
56
     */
57
    public static function parseString($string)
58
    {
59
        $parser = new Parser(new StringSource($string));
60
        $parser->parse();
61
62
        return $parser;
63
    }
64
65
    /**
66
     * Reads and parses a file
67
     *
68
     * @param string $filePath
69
     *
70
     * @throws \Exception.
71
     * @return Catalog
72
     */
73
    public static function parseFile($filePath)
74
    {
75
        $parser = new Parser(new FileSystem($filePath));
76
77
        return $parser->parse();
78
    }
79
80
    public function __construct(SourceHandler $sourceHandler)
81
    {
82
        $this->sourceHandler = $sourceHandler;
83
    }
84
85
    /**
86
     * Reads and parses strings of a .po file.
87
     *
88
     * @param SourceHandler . Optional
89
     *
90
     * @throws \Exception, \InvalidArgumentException
91
     * @return Catalog
92
     */
93
    public function parse()
94
    {
95
        $catalog = new Catalog();
96
        $entry = array();
97
98
        // A new entry has been just inserted.
99
        $justNewEntry = false;
100
        $firstLine = true;
101
102
        // Used to remember last key in a multiline previous entry.
103
        $lastPreviousKey = null;
104
        $state = null;
105
        $lineNumber = 0;
106
107
        while (!$this->sourceHandler->ended()) {
108
            $line = trim($this->sourceHandler->getNextLine());
109
            $split = preg_split('/\s+/ ', $line, 2);
110
            $key = $split[0];
111
112
            if (empty($line) && count($entry) === 0) {
113
                $lineNumber++;
114
                continue;
115
            }
116
117
            // If a blank line is found, or a new msgid when already got one
118
            if ($line === '' || ($key === 'msgid' && isset($entry['msgid']))) {
119
                // Two consecutive blank lines
120
                if ($justNewEntry) {
121
                    $lineNumber++;
122
                    continue;
123
                }
124
125
                if ($firstLine) {
126
                    $firstLine = false;
127
                    if (self::isHeader($entry)) {
128
                        $catalog->addHeaders(array_filter(explode('\\n', $entry['msgstr'])));
129
                    } else {
130
                        $catalog->addEntry(EntryFactory::createFromArray($entry));
131
                    }
132
                } else {
133
                    // A new entry is found!
134
                    $catalog->addEntry(EntryFactory::createFromArray($entry));
135
                }
136
137
                $entry = array();
138
                $state = null;
139
                $justNewEntry = true;
140
                $lastPreviousKey = null;
141
                if ($line === '') {
142
                    $lineNumber++;
143
                    continue;
144
                }
145
            }
146
147
            $justNewEntry = false; // ?
148
            $data = isset($split[1]) ? $split[1] : null;
149
150
            switch ($key) {
151
                // Flagged translation
152
                case '#,':
153
                    $entry['flags'] = preg_split('/,\s*/', $data);
154
                    break;
155
156
                // # Translator comments
157
                case '#':
158
                    $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment'];
159
                    $entry['tcomment'][] = $data;
160
                    break;
161
162
                // #. Comments extracted from source code
163
                case '#.':
164
                    $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment'];
165
                    $entry['ccomment'][] = $data;
166
                    break;
167
168
                // Reference
169
                case '#:':
170
                    $entry['reference'][] = addslashes($data);
171
                    break;
172
173
174
                case '#|':      // #| Previous untranslated string
175
                case '#~':      // #~ Old entry
176
                case '#~|':     // #~| Previous-Old untranslated string. Reported by @Cellard
177
                    switch ($key) {
178
                        case '#|':
179
                            $key = 'previous';
180
                            break;
181
                        case '#~':
182
                            $key = 'obsolete';
183
                            break;
184
                        case '#~|':
185
                            $key = 'previous-obsolete';
186
                            break;
187
                    }
188
189
                    $tmpParts = explode(' ', $data);
190
                    $tmpKey = $tmpParts[0];
191
192
                    if (!in_array($tmpKey, array('msgid', 'msgid_plural', 'msgstr', 'msgctxt'), true)) {
193
                        // If there is a multi-line previous string we must remember
194
                        // what key was first line.
195
                        $tmpKey = $lastPreviousKey;
196
                        $str = $data;
197
                    } else {
198
                        $str = implode(' ', array_slice($tmpParts, 1));
199
                    }
200
201
                    //array('obsolete' => true, 'msgid' => '', 'msgstr' => '');
202
203
                    if (strpos($key, 'obsolete') !== false) {
204
                        $entry['obsolete'] = true;
205
                        switch ($tmpKey) {
206
                            case 'msgid':
207
                                if (!isset($entry['msgid'])) {
208
                                    $entry['msgid'] = '';
209
                                }
210
                                $entry['msgid'].= trim($str, '"');
211
                                $lastPreviousKey = $tmpKey;
212
                                break;
213
214
                            case 'msgstr':
215
                                if (!isset($entry['msgstr'])) {
216
                                    $entry['msgstr'] = '';
217
                                }
218
                                $entry['msgstr'].= trim($str, '"');
219
                                $lastPreviousKey = $tmpKey;
220
                                break;
221
222
                            case 'msgctxt':
223
                                $entry['msgctxt'] = trim($str, '"');
224
                                $lastPreviousKey = $tmpKey;
225
                                break;
226
227
                            default:
228
                                break;
229
                        }
230
                    } else {
231
                        $entry[$key] = isset($entry[$key]) ? $entry[$key] : array('msgid' => '', 'msgstr' => '');
232
                    }
233
234
                    if ($key !== 'obsolete') {
235
                        switch ($tmpKey) {
236
                            case 'msgid':
237
                            case 'msgid_plural':
238
                            case 'msgstr':
239
                                if (!isset($entry[$key][$tmpKey])) {
240
                                    $entry[$key][$tmpKey] = '';
241
                                }
242
                                $entry[$key][$tmpKey].= trim($str, '"');
243
                                $lastPreviousKey = $tmpKey;
244
                                break;
245
246
                            default:
247
                                $entry[$key][$tmpKey] = $str;
248
                                break;
249
                        }
250
                    }
251
                    break;
252
253
254
                // context
255
                // Allows disambiguations of different messages that have same msgid.
256
                // Example:
257
                //
258
                // #: tools/observinglist.cpp:700
259
                // msgctxt "First letter in 'Scope'"
260
                // msgid "S"
261
                // msgstr ""
262
                //
263
                // #: skycomponents/horizoncomponent.cpp:429
264
                // msgctxt "South"
265
                // msgid "S"
266
                // msgstr ""
267
                case 'msgctxt':
268
                    // untranslated-string
269
                case 'msgid':
270
                    // untranslated-string-plural
271
                case 'msgid_plural':
272
                    $state = $key;
273
                    if (!isset($entry[$state])) {
274
                        $entry[$state] = '';
275
                    }
276
277
                    $entry[$state] .= trim($data, '"');
278
                    break;
279
                // translated-string
280
                case 'msgstr':
281
                    $state = 'msgstr';
282
                    $entry[$state] = trim($data, '"');
283
                    break;
284
285
                default:
286
                    if (strpos($key, 'msgstr[') !== false) {
287
                        // translated-string-case-n
288
                        $state = $key;
289
                        $entry[$state] = trim($data, '"');
290
                    } else {
291
                        // "multiline" lines
292
                        switch ($state) {
293
                            case 'msgctxt':
294
                            case 'msgid':
295
                            case 'msgid_plural':
296
                            case (strpos($state, 'msgstr[') !== false):
297
                                if (!isset($entry[$state])) {
298
                                    $entry[$state] = '';
299
                                }
300
301
                                if (is_string($entry[$state])) {
302
                                    // Convert it to array
303
                                    //$entry[$state] = array($entry[$state]);
304
                                    $entry[$state] = trim($entry[$state], '"');
305
                                }
306
                                $entry[$state] .= trim($line, '"');
307
                                break;
308
309
                            case 'msgstr':
310
                                // Special fix where msgid is ""
311
                                $entry['msgstr'] .= trim($line, '"');
312
                                /*if ($entry['msgid'] === "\"\"") {
313
                                    $entry['msgstr'].= trim($line, '"');
314
                                } else {
315
                                    $entry['msgstr'].= $line;
316
                                }*/
317
                                break;
318
319
                            default:
320
                                throw new \Exception(
321
                                    'Parser: Parse error! Unknown key "'.$key.'" on line '.($lineNumber + 1)
322
                                );
323
                        }
324
                    }
325
                    break;
326
            }
327
328
            $lineNumber++;
329
        }
330
        $this->sourceHandler->close();
331
332
        // add final entry
333
        if ($state === 'msgstr') {
334
            $catalog->addEntry(EntryFactory::createFromArray($entry));
335
        }
336
337
        return $catalog;
338
    }
339
340
341
    /**
342
     * Checks if entry is a header by
343
     *
344
     * @param array $entry
345
     *
346
     * @return bool
347
     */
348
    protected static function isHeader(array $entry)
349
    {
350
        if (empty($entry) || !isset($entry['msgstr'])) {
351
            return false;
352
        }
353
354
        $headerKeys = array(
355
            'Project-Id-Version:' => false,
356
            //  'Report-Msgid-Bugs-To:' => false,
357
            //  'POT-Creation-Date:'    => false,
358
            'PO-Revision-Date:' => false,
359
            //  'Last-Translator:'      => false,
360
            //  'Language-Team:'        => false,
361
            'MIME-Version:' => false,
362
            //  'Content-Type:'         => false,
363
            //  'Content-Transfer-Encoding:' => false,
364
            //  'Plural-Forms:'         => false
365
        );
366
        $count = count($headerKeys);
367
        $keys = array_keys($headerKeys);
368
369
        $headerItems = 0;
370
        $lines = explode("\\n", $entry['msgstr']);
371
372
        foreach ($lines as $str) {
373
            $tokens = explode(':', $str);
374
            $tokens[0] = trim($tokens[0], '"').':';
375
376
            if (in_array($tokens[0], $keys, true)) {
377
                $headerItems++;
378
                unset($headerKeys[$tokens[0]]);
379
                $keys = array_keys($headerKeys);
380
            }
381
        }
382
383
        return $headerItems === $count;
384
    }
385
}
386