Completed
Push — feature/5.0.1 ( 805125...db298f )
by Raúl
01:18
created

Parser::parseString()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 7
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 4
nc 1
nop 1
1
<?php
2
3
namespace Sepia\PoParser;
4
5
use Sepia\PoParser\Catalog\EntryFactory;
6
use Sepia\PoParser\Exception\ParseException;
7
use Sepia\PoParser\SourceHandler\FileSystem;
8
use Sepia\PoParser\SourceHandler\SourceHandler;
9
use Sepia\PoParser\SourceHandler\StringSource;
10
11
/**
12
 *    Copyright (c) 2012 Raúl Ferràs [email protected]
13
 *    All rights reserved.
14
 *
15
 *    Redistribution and use in source and binary forms, with or without
16
 *    modification, are permitted provided that the following conditions
17
 *    are met:
18
 *    1. Redistributions of source code must retain the above copyright
19
 *       notice, this list of conditions and the following disclaimer.
20
 *    2. Redistributions in binary form must reproduce the above copyright
21
 *       notice, this list of conditions and the following disclaimer in the
22
 *       documentation and/or other materials provided with the distribution.
23
 *    3. Neither the name of copyright holders nor the names of its
24
 *       contributors may be used to endorse or promote products derived
25
 *       from this software without specific prior written permission.
26
 *
27
 *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28
 *    ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29
 *    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30
 *    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
31
 *    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
 *    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
 *    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
 *    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
 *    POSSIBILITY OF SUCH DAMAGE.
38
 *
39
 * https://github.com/raulferras/PHP-po-parser
40
 *
41
 * Class to parse .po file and extract its strings.
42
 *
43
 * @version 5.0
44
 */
45
class Parser
46
{
47
    /** @var SourceHandler */
48
    protected $sourceHandler;
49
50
    /** @var int */
51
    protected $lineNumber;
52
53
    /** @var string */
54
    protected $property;
55
56
    /**
57
     * Reads and parses a string
58
     *
59
     * @param string $string po content
60
     *
61
     * @throws \Exception.
62
     * @return Parser
63
     */
64
    public static function parseString($string)
65
    {
66
        $parser = new Parser(new StringSource($string));
67
        $parser->parse();
68
69
        return $parser;
70
    }
71
72
    /**
73
     * Reads and parses a file
74
     *
75
     * @param string $filePath
76
     *
77
     * @throws \Exception.
78
     * @return Catalog
79
     */
80
    public static function parseFile($filePath)
81
    {
82
        $parser = new Parser(new FileSystem($filePath));
83
84
        return $parser->parse();
85
    }
86
87
    public function __construct(SourceHandler $sourceHandler)
88
    {
89
        $this->sourceHandler = $sourceHandler;
90
    }
91
92
    /**
93
     * Reads and parses strings of a .po file.
94
     *
95
     * @param SourceHandler . Optional
96
     *
97
     * @throws \Exception, \InvalidArgumentException, ParseException
98
     * @return Catalog
99
     */
100
    public function parse()
101
    {
102
        $catalog = new Catalog();
103
        $this->lineNumber = 0;
104
        $entry = array();
105
        $this->mode = null;     // current mode
0 ignored issues
show
Bug introduced by
The property mode does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
106
        $this->property = null; // current property
107
108
        // Flags
109
        $headersFound = false;
110
111
        while (!$this->sourceHandler->ended()) {
0 ignored issues
show
Coding Style introduced by
Blank line found at start of control structure
Loading history...
112
113
            $line = trim($this->sourceHandler->getNextLine());
114
115
            if ($this->shouldIgnoreLine($line, $entry)) {
116
                $this->lineNumber++;
117
                continue;
118
            }
119
120
            if ($this->shouldCloseEntry($line, $entry)) {
121
                if (!$headersFound && $this->isHeader($entry)) {
122
                    $headersFound = true;
123
                    $catalog->addHeaders(
124
                        $this->parseHeaders($entry['msgstr'])
125
                    );
126
                } else {
127
                    $catalog->addEntry(EntryFactory::createFromArray($entry));
128
                }
129
130
                $entry = array();
131
                $this->mode = null;
132
                $this->property = null;
133
134
                if (empty($line)) {
135
                    $this->lineNumber++;
136
                    continue;
137
                }
138
            }
139
140
            $entry = $this->parseLine($line, $entry);
141
142
            $this->lineNumber++;
143
            continue;
144
        }
145
        $this->sourceHandler->close();
146
147
        // add final entry
148
        if (count($entry)) {
149
            $catalog->addEntry(EntryFactory::createFromArray($entry));
150
        }
151
152
        return $catalog;
153
    }
154
155
    /**
156
     * @param string $line
157
     * @param array  $entry
158
     *
159
     * @return array
160
     * @throws ParseException
161
     */
162
    protected function parseLine($line, $entry)
163
    {
164
        $firstChar = strlen($line) > 0 ? $line[0] : '';
165
166
        switch ($firstChar) {
167
            case '#':
168
                $entry = $this->parseComment($line, $entry);
169
                break;
170
171
            case 'm':
172
                $entry = $this->parseProperty($line, $entry);
173
                break;
174
175
            case '"':
176
                $entry = $this->parseMultiline($line, $entry);
177
                break;
178
        }
179
180
        return $entry;
181
    }
182
183
    /**
184
     * @param string $line
185
     * @param array  $entry
186
     *
187
     * @return array
188
     * @throws ParseException
189
     */
190
    protected function parseProperty($line, array $entry)
191
    {
192
        list($key, $value) = $this->getProperty($line);
193
194
        if (!isset($entry[$key])) {
195
            $entry[$key] = '';
196
        }
197
198
        switch (true) {
199
            case $key === 'msgctxt':
200
            case $key === 'msgid':
201
            case $key === 'msgid_plural':
202
            case $key === 'msgstr':
203
                $entry[$key] .= $this->unquote($value);
204
                $this->property = $key;
205
                break;
206
207
            case strpos($key, 'msgstr[') !== false:
208
                $entry[$key] .= $this->unquote($value);
209
                $this->property = $key;
210
                break;
211
212
            default:
213
                throw new ParseException(sprintf('Could not parse %s at line %d', $key, $this->lineNumber));
214
        }
215
216
        return $entry;
217
    }
218
219
    /**
220
     * @param string $line
221
     * @param array  $entry
222
     *
223
     * @return array
224
     * @throws ParseException
225
     */
226
    protected function parseMultiline($line, $entry)
227
    {
228
        switch (true) {
229
            case $this->property === 'msgctxt':
230
            case $this->property === 'msgid':
231
            case $this->property === 'msgid_plural':
232
            case $this->property === 'msgstr':
233
            case strpos($this->property, 'msgstr[') !== false:
234
                $entry[$this->property] .= $this->unquote($line);
235
                break;
236
237
            default:
238
                throw new ParseException(
239
                    sprintf('Error parsing property %s as multiline.', $this->property)
240
                );
241
        }
242
243
        return $entry;
244
    }
245
246
    /**
247
     * @param string $line
248
     * @param array  $entry
249
     *
250
     * @return array
251
     * @throws ParseException
252
     */
253
    protected function parseComment($line, $entry)
254
    {
255
        $comment = trim(substr($line, 0, 2));
256
257
        switch ($comment) {
258
            case '#,':
259
                $line = trim(substr($line, 2));
260
                $entry['flags'] = preg_split('/,\s*/', $line);
261
                break;
262
263
            case '#.':
264
                $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment'];
265
                $entry['ccomment'][] = trim(substr($line, 2));
266
                break;
267
268
269
            case '#|':  // Previous string
270
            case '#~':  // Old entry
271
            case '#~|': // Previous string old
272
                $mode = array(
273
                    '#|' => 'previous',
274
                    '#~' => 'obsolete',
275
                    '#~|' => 'previous-obsolete'
276
                );
277
278
                $line = trim(substr($line, 2));
279
                $property = $mode[$comment];
280
                if (!isset($entry[$property])) {
281
                    $subEntry = array();
282
                } else {
283
                    $subEntry = $entry[$property];
284
                }
285
286
                $subEntry = $this->parseLine($line, $subEntry);
287
                //$subEntry = $this->parseProperty($line, $subEntry);
288
                $entry[$property] = $subEntry;
289
                break;
290
291
292
            case '#':
293
            default:
294
                $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment'];
295
                $entry['tcomment'][] = trim(substr($line, 1));
296
                break;
297
        }
298
299
        return $entry;
300
    }
301
302
    /**
303
     * @param string $msgstr
304
     *
305
     * @return array
306
     */
307
    protected function parseHeaders($msgstr)
308
    {
309
        $headers = array_filter(explode('\\n', $msgstr));
310
311
        return $headers;
312
    }
313
314
    /**
315
     * @param string $line
316
     * @param array  $entry
317
     *
318
     * @return bool
319
     */
320
    protected function shouldIgnoreLine($line, array $entry)
321
    {
322
        return empty($line) && count($entry) === 0;
323
    }
324
325
    /**
326
     * @param string $line
327
     * @param array  $entry
328
     *
329
     * @return bool
330
     */
331
    protected function shouldCloseEntry($line, array $entry)
332
    {
333
        $lineKey = '';
334
335
        return ($line === '' || ($lineKey === 'msgid' && isset($entry['msgid'])));
336
    }
337
338
    /**
339
     * @param string $value
340
     * @return string
341
     */
342
    protected function unquote($value)
343
    {
344
        return preg_replace('/^\"|\"$/', '', $value);
345
    }
346
347
    /**
348
     * Checks if entry is a header by
349
     *
350
     * @param array $entry
351
     *
352
     * @return bool
353
     */
354
    protected function isHeader(array $entry)
355
    {
356
        if (empty($entry) || !isset($entry['msgstr'])) {
357
            return false;
358
        }
359
360
        if (!isset($entry['msgid']) || !empty($entry['msgid'])) {
361
            return false;
362
        }
363
364
        $standardHeaders = array(
365
            'Project-Id-Version:',
366
            'Report-Msgid-Bugs-To:',
367
            'POT-Creation-Date:',
368
            'PO-Revision-Date:',
369
            'Last-Translator:',
370
            'Language-Team:',
371
            'MIME-Version:',
372
            'Content-Type:',
373
            'Content-Transfer-Encoding:',
374
            'Plural-Forms:',
375
        );
376
377
        $headers = explode('\n', $entry['msgstr']);
378
        // Remove text after double colon
379
        $headers = array_map(
380
            function ($header) {
381
                $pattern = '/(.*?:)(.*)/i';
382
                $replace = '${1}';
383
                return preg_replace($pattern, $replace, $header);
384
            },
385
            $headers
386
        );
387
388
        if (count(array_intersect($standardHeaders, $headers)) > 0) {
389
            return true;
390
        }
391
392
        // If it does not contain any of the standard headers
393
        // Let's see if it contains any custom header.
394
        $customHeaders = array_filter(
395
            $headers,
396
            function ($header) {
397
                return preg_match('/^X\-(.*):/i', $header) === 1;
398
            }
399
        );
400
401
        return count($customHeaders) > 0;
402
    }
403
404
    /**
405
     * @param string $line
406
     *
407
     * @return array
408
     */
409
    protected function getProperty($line)
410
    {
411
        $tokens = preg_split('/\s+/ ', $line, 2);
412
413
        return $tokens;
414
    }
415
}
416