Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php namespace Sepia\PoParser; |
||
44 | class Parser |
||
45 | { |
||
46 | const OPTION_EOL_KEY = 'multiline-glue'; |
||
47 | const OPTION_EOC_KEY = 'context-glue'; |
||
48 | |||
49 | const OPTION_EOL_VALUE = '<##EOL##>'; // End of Line token. |
||
50 | const OPTION_EOC_VALUE = '<##EOC##>'; // End of Context token. |
||
51 | |||
52 | /** |
||
53 | * @var array |
||
54 | */ |
||
55 | protected $entries = array(); |
||
56 | |||
57 | /** |
||
58 | * @var string[] |
||
59 | */ |
||
60 | protected $headers = array(); |
||
61 | |||
62 | /** |
||
63 | * @var null|HandlerInterface |
||
64 | */ |
||
65 | protected $sourceHandle = null; |
||
66 | |||
67 | /** |
||
68 | * @var array |
||
69 | */ |
||
70 | protected $options = array(); |
||
71 | |||
72 | /** |
||
73 | * Reads and parses a string |
||
74 | * |
||
75 | * @param string $string po content |
||
76 | * @param array $options |
||
77 | * |
||
78 | * @throws \Exception. |
||
79 | * @return $this |
||
80 | */ |
||
81 | public static function parseString($string, $options = array()) |
||
88 | |||
89 | |||
90 | |||
91 | /** |
||
92 | * Reads and parses a file |
||
93 | * |
||
94 | * @param string $filepath |
||
95 | * @param array $options |
||
96 | * |
||
97 | * @return $this |
||
98 | * @throws \Exception. |
||
99 | */ |
||
100 | 39 | public static function parseFile($filepath, $options = array()) |
|
107 | |||
108 | |||
109 | 48 | public function __construct(HandlerInterface $handler, $options = array()) |
|
114 | |||
115 | /** |
||
116 | * Sets options. |
||
117 | * Those options not set will the default value. |
||
118 | * |
||
119 | * @param $options |
||
120 | * |
||
121 | * @return $this |
||
122 | */ |
||
123 | 48 | public function setOptions($options) |
|
136 | |||
137 | /** |
||
138 | * Get parser options. |
||
139 | * |
||
140 | * @return array |
||
141 | */ |
||
142 | 9 | public function getOptions() |
|
146 | |||
147 | /** |
||
148 | * Gets source Handler. |
||
149 | * |
||
150 | * @return null|HandlerInterface |
||
151 | */ |
||
152 | 3 | public function getSourceHandle() |
|
156 | |||
157 | /** |
||
158 | * @param null|HandlerInterface $sourceHandle |
||
159 | * |
||
160 | * @return $this |
||
161 | */ |
||
162 | 48 | public function setSourceHandle(HandlerInterface $sourceHandle) |
|
168 | |||
169 | /** |
||
170 | * Get headers from .po file |
||
171 | * |
||
172 | * @return string[] |
||
173 | */ |
||
174 | 12 | public function getHeaders() |
|
178 | |||
179 | /** |
||
180 | * Set new headers. |
||
181 | * |
||
182 | * @param array $newHeaders |
||
183 | * |
||
184 | * @return $this |
||
185 | */ |
||
186 | 3 | public function setHeaders(array $newHeaders) |
|
192 | |||
193 | /** |
||
194 | * Gets entries. |
||
195 | * |
||
196 | * @return array |
||
197 | */ |
||
198 | 30 | public function getEntries() |
|
202 | |||
203 | /** |
||
204 | * Reads and parses strings of a .po file. |
||
205 | * |
||
206 | * @return arrays List of entries found in .po file. |
||
207 | * @throws \Exception, \InvalidArgumentException |
||
208 | */ |
||
209 | 39 | public function parse() |
|
210 | { |
||
211 | 39 | $handle = $this->sourceHandle; |
|
212 | |||
213 | 39 | $headers = array(); |
|
214 | 39 | $hash = array(); |
|
215 | 39 | $entry = array(); |
|
216 | 39 | $justNewEntry = false; // A new entry has been just inserted. |
|
217 | 39 | $firstLine = true; |
|
218 | 39 | $lastPreviousKey = null; // Used to remember last key in a multiline previous entry. |
|
219 | 39 | $state = null; |
|
220 | 39 | $lineNumber = 0; |
|
221 | |||
222 | 39 | while (!$handle->ended()) { |
|
223 | 39 | $line = trim($handle->getNextLine()); |
|
224 | 39 | $split = preg_split('/\s+/ ', $line, 2); |
|
225 | 39 | $key = $split[0]; |
|
226 | |||
227 | // If a blank line is found, or a new msgid when already got one |
||
228 | 39 | if ($line === '' || ($key=='msgid' && isset($entry['msgid']))) { |
|
229 | // Two consecutive blank lines |
||
230 | 39 | if ($justNewEntry) { |
|
231 | 9 | $lineNumber++; |
|
232 | 9 | continue; |
|
233 | } |
||
234 | |||
235 | 39 | if ($firstLine) { |
|
236 | 39 | $firstLine = false; |
|
237 | 39 | if (self::isHeader($entry)) { |
|
238 | 36 | array_shift($entry['msgstr']); |
|
239 | 36 | $headers = $entry['msgstr']; |
|
240 | 12 | } else { |
|
241 | 29 | $hash[] = $entry; |
|
242 | } |
||
243 | 13 | } else { |
|
244 | // A new entry is found! |
||
245 | 36 | $hash[] = $entry; |
|
246 | } |
||
247 | |||
248 | 39 | $entry = array(); |
|
249 | 39 | $state = null; |
|
250 | 39 | $justNewEntry = true; |
|
251 | 39 | $lastPreviousKey = null; |
|
252 | 39 | if ($line==='') { |
|
253 | 36 | $lineNumber++; |
|
254 | 36 | continue; |
|
255 | } |
||
256 | 1 | } |
|
257 | |||
258 | 39 | $justNewEntry = false; |
|
259 | 39 | $data = isset($split[1]) ? $split[1] : null; |
|
260 | |||
261 | switch ($key) { |
||
262 | // Flagged translation |
||
263 | 39 | case '#,': |
|
264 | 18 | $entry['flags'] = preg_split('/,\s*/', $data); |
|
265 | 18 | break; |
|
266 | |||
267 | // # Translator comments |
||
268 | 39 | case '#': |
|
269 | 18 | $entry['tcomment'] = !isset($entry['tcomment']) ? array() : $entry['tcomment']; |
|
270 | 18 | $entry['tcomment'][] = $data; |
|
271 | 18 | break; |
|
272 | |||
273 | // #. Comments extracted from source code |
||
274 | 39 | case '#.': |
|
275 | 3 | $entry['ccomment'] = !isset($entry['ccomment']) ? array() : $entry['ccomment']; |
|
276 | 3 | $entry['ccomment'][] = $data; |
|
277 | 3 | break; |
|
278 | |||
279 | // Reference |
||
280 | 39 | case '#:': |
|
281 | 33 | $entry['reference'][] = addslashes($data); |
|
282 | 33 | break; |
|
283 | |||
284 | |||
285 | 39 | case '#|': // #| Previous untranslated string |
|
286 | 39 | case '#~': // #~ Old entry |
|
287 | 39 | case '#~|': // #~| Previous-Old untranslated string. Reported by @Cellard |
|
|
|||
288 | |||
289 | switch ($key) { |
||
290 | 9 | case '#|': |
|
291 | 6 | $key = 'previous'; |
|
292 | 6 | break; |
|
293 | |||
294 | 6 | case '#~': |
|
295 | 6 | $key = 'obsolete'; |
|
296 | 6 | break; |
|
297 | |||
298 | case '#~|': |
||
299 | $key = 'previous-obsolete'; |
||
300 | break; |
||
301 | } |
||
302 | |||
303 | 9 | $tmpParts = explode(' ', $data); |
|
304 | 9 | $tmpKey = $tmpParts[0]; |
|
305 | |||
306 | 9 | if (!in_array($tmpKey, array('msgid','msgid_plural','msgstr','msgctxt'))) { |
|
307 | // If there is a multiline previous string we must remember what key was first line. |
||
308 | 6 | $tmpKey = $lastPreviousKey; |
|
309 | 6 | $str = $data; |
|
310 | 2 | } else { |
|
311 | 9 | $str = implode(' ', array_slice($tmpParts, 1)); |
|
312 | } |
||
313 | |||
314 | 9 | $entry[$key] = isset($entry[$key])? $entry[$key]:array('msgid'=>array(),'msgstr'=>array()); |
|
315 | |||
316 | 9 | if (strpos($key, 'obsolete')!==false) { |
|
317 | 6 | $entry['obsolete'] = true; |
|
318 | switch ($tmpKey) { |
||
319 | 6 | case 'msgid': |
|
320 | 6 | $entry['msgid'][] = $str; |
|
321 | 6 | $lastPreviousKey = $tmpKey; |
|
322 | 6 | break; |
|
323 | |||
324 | 6 | case 'msgstr': |
|
325 | 6 | if ($str == "\"\"") { |
|
326 | 6 | $entry['msgstr'][] = trim($str, '"'); |
|
327 | 2 | } else { |
|
328 | 6 | $entry['msgstr'][] = $str; |
|
329 | } |
||
330 | 6 | $lastPreviousKey = $tmpKey; |
|
331 | 6 | break; |
|
332 | |||
333 | default: |
||
334 | break; |
||
335 | } |
||
336 | 2 | } |
|
337 | |||
338 | 9 | if ($key!=='obsolete') { |
|
339 | switch ($tmpKey) { |
||
340 | 6 | case 'msgid': |
|
341 | 6 | case 'msgid_plural': |
|
342 | 6 | case 'msgstr': |
|
343 | 6 | $entry[$key][$tmpKey][] = $str; |
|
344 | 6 | $lastPreviousKey = $tmpKey; |
|
345 | 6 | break; |
|
346 | |||
347 | default: |
||
348 | $entry[$key][$tmpKey] = $str; |
||
349 | break; |
||
350 | } |
||
351 | 2 | } |
|
352 | 9 | break; |
|
353 | |||
354 | |||
355 | // context |
||
356 | // Allows disambiguations of different messages that have same msgid. |
||
357 | // Example: |
||
358 | // |
||
359 | // #: tools/observinglist.cpp:700 |
||
360 | // msgctxt "First letter in 'Scope'" |
||
361 | // msgid "S" |
||
362 | // msgstr "" |
||
363 | // |
||
364 | // #: skycomponents/horizoncomponent.cpp:429 |
||
365 | // msgctxt "South" |
||
366 | // msgid "S" |
||
367 | // msgstr "" |
||
368 | 39 | case 'msgctxt': |
|
369 | // untranslated-string |
||
370 | 39 | case 'msgid': |
|
371 | // untranslated-string-plural |
||
372 | 39 | case 'msgid_plural': |
|
373 | 39 | $state = $key; |
|
374 | 39 | $entry[$state][] = $data; |
|
375 | 39 | break; |
|
376 | // translated-string |
||
377 | 39 | case 'msgstr': |
|
378 | 39 | $state = 'msgstr'; |
|
379 | 39 | $entry[$state][] = $data; |
|
380 | 39 | break; |
|
381 | |||
382 | 12 | default: |
|
383 | 36 | if (strpos($key, 'msgstr[') !== false) { |
|
384 | // translated-string-case-n |
||
385 | 9 | $state = $key; |
|
386 | 9 | $entry[$state][] = $data; |
|
387 | 3 | } else { |
|
388 | // "multiline" lines |
||
389 | switch ($state) { |
||
390 | 36 | case 'msgctxt': |
|
391 | 36 | case 'msgid': |
|
392 | 36 | case 'msgid_plural': |
|
393 | 36 | case (strpos($state, 'msgstr[') !== false): |
|
394 | 9 | if (is_string($entry[$state])) { |
|
395 | // Convert it to array |
||
396 | $entry[$state] = array($entry[$state]); |
||
397 | } |
||
398 | 9 | $entry[$state][] = $line; |
|
399 | 9 | break; |
|
400 | |||
401 | 36 | case 'msgstr': |
|
402 | // Special fix where msgid is "" |
||
403 | 36 | if ($entry['msgid'] == "\"\"") { |
|
404 | $entry['msgstr'][] = trim($line, '"'); |
||
405 | } else { |
||
406 | 36 | $entry['msgstr'][] = $line; |
|
407 | } |
||
408 | 36 | break; |
|
409 | |||
410 | default: |
||
411 | throw new \Exception( |
||
412 | 'PoParser: Parse error! Unknown key "' . $key . '" on line ' . ($lineNumber+1) |
||
413 | ); |
||
414 | } |
||
415 | } |
||
416 | 36 | break; |
|
417 | 12 | } |
|
418 | |||
419 | 39 | $lineNumber++; |
|
420 | 13 | } |
|
421 | 39 | $handle->close(); |
|
422 | |||
423 | // add final entry |
||
424 | 39 | if ($state == 'msgstr') { |
|
425 | 3 | $hash[] = $entry; |
|
426 | 1 | } |
|
427 | |||
428 | // - Cleanup header data |
||
429 | 39 | $this->headers = array(); |
|
430 | 39 | foreach ($headers as $header) { |
|
431 | 36 | $header = $this->clean($header); |
|
432 | 36 | $this->headers[] = "\"" . preg_replace("/\\n/", '\n', $header) . "\""; |
|
433 | 13 | } |
|
434 | |||
435 | // - Cleanup data, |
||
436 | // - merge multiline entries |
||
437 | // - Reindex hash for ksort |
||
438 | 39 | $temp = $hash; |
|
439 | 39 | $this->entries = array(); |
|
440 | 39 | foreach ($temp as $entry) { |
|
441 | 39 | foreach ($entry as &$v) { |
|
442 | 39 | $or = $v; |
|
443 | 39 | $v = $this->clean($v); |
|
444 | 39 | if ($v === false) { |
|
445 | // parse error |
||
446 | throw new \Exception( |
||
447 | 26 | 'PoParser: Parse error! poparser::clean returned false on "' . htmlspecialchars($or) . '"' |
|
448 | ); |
||
449 | } |
||
450 | 13 | } |
|
451 | |||
452 | // check if msgid and a key starting with msgstr exists |
||
453 | 39 | if (isset($entry['msgid']) && count(preg_grep('/^msgstr/', array_keys($entry)))) { |
|
454 | 39 | $id = $this->getEntryId($entry); |
|
455 | 39 | $this->entries[$id] = $entry; |
|
456 | 13 | } |
|
457 | 13 | } |
|
458 | |||
459 | 39 | return $this->entries; |
|
460 | } |
||
461 | |||
462 | /** |
||
463 | * Updates an entry. |
||
464 | * If entry not found returns false. If $createNew is true, a new entry will be created. |
||
465 | * $entry is an array that can contain following indexes: |
||
466 | * - msgid: string[]. Required. |
||
467 | * - msgstr: string[]. Required. |
||
468 | * - reference: string[]. |
||
469 | * - msgctxt: string. Disambiguating context. |
||
470 | * - tcomment: string[]. Translator comments. |
||
471 | * - ccomment: string[]. Source comments. |
||
472 | * - msgid_plural: string[]. |
||
473 | * - flags: array. List of entry flags. Example: ['fuzzy', 'php-format'] |
||
474 | * - previous: array. Contains previous untranslated strings in a sub array with msgid and msgstr. |
||
475 | * |
||
476 | * @param string $msgid Id of entry. Be aware that some entries have a multiline msgid. |
||
477 | * In that case \n must be replaced by the value of 'multiline-glue' |
||
478 | * option (by default "<##EOL##>"). |
||
479 | * @param array $entry Array with all entry data. Fields not setted will be removed. |
||
480 | * @param bool $createNew If msgid not found, it will create a new entry. By default true. |
||
481 | * You want to set this to false if need to change the msgid of an entry. |
||
482 | */ |
||
483 | 12 | public function setEntry($msgid, $entry, $createNew = true) |
|
503 | |||
504 | /** |
||
505 | * @param string $msgid Message Id. |
||
506 | * @param bool $plural |
||
507 | */ |
||
508 | public function setEntryPlural($msgid, $plural = false) |
||
516 | |||
517 | /** |
||
518 | * @param string $msgid Message Id. |
||
519 | * @param bool $context |
||
520 | */ |
||
521 | public function setEntryContext($msgid, $context = false) |
||
529 | |||
530 | /** |
||
531 | * Saves current translation back into source. |
||
532 | * |
||
533 | * @param mixed $params Parameters to pass to the source handler. |
||
534 | * |
||
535 | * @return $this |
||
536 | * @throws \Exception |
||
537 | */ |
||
538 | 15 | public function save($params) |
|
545 | |||
546 | |||
547 | |||
548 | |||
549 | /** |
||
550 | * Compiles entries into a string |
||
551 | * |
||
552 | * @return string |
||
553 | * @throws \Exception |
||
554 | */ |
||
555 | 15 | public function compile() |
|
690 | |||
691 | |||
692 | /** |
||
693 | * Prepares a string to be output into a file. |
||
694 | * |
||
695 | * @param string $string The string to be converted. |
||
696 | * @return string |
||
697 | */ |
||
698 | 15 | protected function cleanExport($string) |
|
717 | |||
718 | |||
719 | /** |
||
720 | * Generates the internal key for a msgid. |
||
721 | * |
||
722 | * @param array $entry |
||
723 | * |
||
724 | * @return string |
||
725 | */ |
||
726 | protected function getEntryId(array $entry) |
||
736 | |||
737 | |||
738 | /** |
||
739 | * Undo `cleanExport` actions on a string. |
||
740 | * |
||
741 | * @param string|array $x |
||
742 | * |
||
743 | * @return string|array |
||
744 | */ |
||
745 | protected function clean($x) |
||
767 | |||
768 | |||
769 | /** |
||
770 | * Checks if entry is a header by |
||
771 | * |
||
772 | * @param array $entry |
||
773 | * @return bool |
||
774 | */ |
||
775 | protected static function isHeader(array $entry) |
||
808 | } |
||
809 |
According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.
}
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.