Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
21 | class Parser |
||
22 | { |
||
23 | const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?'; |
||
24 | |||
25 | private $offset = 0; |
||
26 | private $lines = array(); |
||
27 | private $currentLineNb = -1; |
||
28 | private $currentLine = ''; |
||
29 | private $refs = array(); |
||
30 | |||
31 | /** |
||
32 | * Constructor. |
||
33 | * |
||
34 | * @param int $offset The offset of YAML document (used for line numbers in error messages) |
||
35 | */ |
||
36 | public function __construct($offset = 0) |
||
37 | { |
||
38 | $this->offset = $offset; |
||
39 | } |
||
40 | |||
41 | /** |
||
42 | * Parses a YAML string to a PHP value. |
||
43 | * |
||
44 | * @param string $value A YAML string |
||
45 | * @param bool $exceptionOnInvalidType true if an exception must be thrown on invalid types (a PHP resource or object), false otherwise |
||
46 | * @param bool $objectSupport true if object support is enabled, false otherwise |
||
47 | * @param bool $objectForMap true if maps should return a stdClass instead of array() |
||
48 | * |
||
49 | * @return mixed A PHP value |
||
50 | * |
||
51 | * @throws ParseException If the YAML is not valid |
||
52 | */ |
||
53 | public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false) |
||
54 | { |
||
55 | if (!preg_match('//u', $value)) { |
||
56 | throw new ParseException('The YAML value does not appear to be valid UTF-8.'); |
||
57 | } |
||
58 | $this->currentLineNb = -1; |
||
59 | $this->currentLine = ''; |
||
60 | $value = $this->cleanup($value); |
||
61 | $this->lines = explode("\n", $value); |
||
62 | |||
63 | if (2 /* MB_OVERLOAD_STRING */ & (int) ini_get('mbstring.func_overload')) { |
||
64 | $mbEncoding = mb_internal_encoding(); |
||
65 | mb_internal_encoding('UTF-8'); |
||
66 | } |
||
67 | |||
68 | $data = array(); |
||
69 | $context = null; |
||
70 | $allowOverwrite = false; |
||
71 | while ($this->moveToNextLine()) { |
||
72 | if ($this->isCurrentLineEmpty()) { |
||
73 | continue; |
||
74 | } |
||
75 | |||
76 | // tab? |
||
77 | if ("\t" === $this->currentLine[0]) { |
||
78 | throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
79 | } |
||
80 | |||
81 | $isRef = $mergeNode = false; |
||
82 | if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) { |
||
83 | if ($context && 'mapping' == $context) { |
||
84 | throw new ParseException('You cannot define a sequence item when in a mapping'); |
||
85 | } |
||
86 | $context = 'sequence'; |
||
87 | |||
88 | if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { |
||
89 | $isRef = $matches['ref']; |
||
90 | $values['value'] = $matches['value']; |
||
91 | } |
||
92 | |||
93 | // array |
||
94 | if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) { |
||
95 | $c = $this->getRealCurrentLineNb() + 1; |
||
96 | $parser = new self($c); |
||
97 | $parser->refs = &$this->refs; |
||
98 | $data[] = $parser->parse($this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap); |
||
99 | } else { |
||
100 | if (isset($values['leadspaces']) |
||
101 | && preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches) |
||
102 | ) { |
||
103 | // this is a compact notation element, add to next block and parse |
||
104 | $c = $this->getRealCurrentLineNb(); |
||
105 | $parser = new self($c); |
||
106 | $parser->refs = &$this->refs; |
||
107 | |||
108 | $block = $values['value']; |
||
109 | if ($this->isNextLineIndented()) { |
||
110 | $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + strlen($values['leadspaces']) + 1); |
||
111 | } |
||
112 | |||
113 | $data[] = $parser->parse($block, $exceptionOnInvalidType, $objectSupport, $objectForMap); |
||
114 | } else { |
||
115 | $data[] = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport, $objectForMap, $context); |
||
116 | } |
||
117 | } |
||
118 | if ($isRef) { |
||
119 | $this->refs[$isRef] = end($data); |
||
120 | } |
||
121 | } elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) { |
||
122 | if ($context && 'sequence' == $context) { |
||
123 | throw new ParseException('You cannot define a mapping item when in a sequence'); |
||
124 | } |
||
125 | $context = 'mapping'; |
||
126 | |||
127 | // force correct settings |
||
128 | Inline::parse(null, $exceptionOnInvalidType, $objectSupport, $objectForMap, $this->refs); |
||
129 | try { |
||
130 | $key = Inline::parseScalar($values['key']); |
||
131 | } catch (ParseException $e) { |
||
132 | $e->setParsedLine($this->getRealCurrentLineNb() + 1); |
||
133 | $e->setSnippet($this->currentLine); |
||
134 | |||
135 | throw $e; |
||
136 | } |
||
137 | |||
138 | // Convert float keys to strings, to avoid being converted to integers by PHP |
||
139 | if (is_float($key)) { |
||
140 | $key = (string) $key; |
||
141 | } |
||
142 | |||
143 | if ('<<' === $key) { |
||
144 | $mergeNode = true; |
||
145 | $allowOverwrite = true; |
||
146 | if (isset($values['value']) && 0 === strpos($values['value'], '*')) { |
||
147 | $refName = substr($values['value'], 1); |
||
148 | if (!array_key_exists($refName, $this->refs)) { |
||
149 | throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
150 | } |
||
151 | |||
152 | $refValue = $this->refs[$refName]; |
||
153 | |||
154 | if (!is_array($refValue)) { |
||
155 | throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
156 | } |
||
157 | |||
158 | foreach ($refValue as $key => $value) { |
||
159 | if (!isset($data[$key])) { |
||
160 | $data[$key] = $value; |
||
161 | } |
||
162 | } |
||
163 | } else { |
||
164 | if (isset($values['value']) && $values['value'] !== '') { |
||
165 | $value = $values['value']; |
||
166 | } else { |
||
167 | $value = $this->getNextEmbedBlock(); |
||
168 | } |
||
169 | $c = $this->getRealCurrentLineNb() + 1; |
||
170 | $parser = new self($c); |
||
171 | $parser->refs = &$this->refs; |
||
172 | $parsed = $parser->parse($value, $exceptionOnInvalidType, $objectSupport, $objectForMap); |
||
173 | |||
174 | if (!is_array($parsed)) { |
||
175 | throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
176 | } |
||
177 | |||
178 | if (isset($parsed[0])) { |
||
179 | // If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes |
||
180 | // and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier |
||
181 | // in the sequence override keys specified in later mapping nodes. |
||
182 | foreach ($parsed as $parsedItem) { |
||
183 | if (!is_array($parsedItem)) { |
||
184 | throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem); |
||
185 | } |
||
186 | |||
187 | foreach ($parsedItem as $key => $value) { |
||
188 | if (!isset($data[$key])) { |
||
189 | $data[$key] = $value; |
||
190 | } |
||
191 | } |
||
192 | } |
||
193 | } else { |
||
194 | // If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the |
||
195 | // current mapping, unless the key already exists in it. |
||
196 | foreach ($parsed as $key => $value) { |
||
197 | if (!isset($data[$key])) { |
||
198 | $data[$key] = $value; |
||
199 | } |
||
200 | } |
||
201 | } |
||
202 | } |
||
203 | } elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { |
||
204 | $isRef = $matches['ref']; |
||
205 | $values['value'] = $matches['value']; |
||
206 | } |
||
207 | |||
208 | if ($mergeNode) { |
||
209 | // Merge keys |
||
210 | } elseif (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) { |
||
211 | // hash |
||
212 | // if next line is less indented or equal, then it means that the current value is null |
||
213 | if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) { |
||
214 | // Spec: Keys MUST be unique; first one wins. |
||
215 | // But overwriting is allowed when a merge node is used in current block. |
||
216 | if ($allowOverwrite || !isset($data[$key])) { |
||
217 | $data[$key] = null; |
||
218 | } |
||
219 | } else { |
||
220 | $c = $this->getRealCurrentLineNb() + 1; |
||
221 | $parser = new self($c); |
||
222 | $parser->refs = &$this->refs; |
||
223 | $value = $parser->parse($this->getNextEmbedBlock(), $exceptionOnInvalidType, $objectSupport, $objectForMap); |
||
224 | // Spec: Keys MUST be unique; first one wins. |
||
225 | // But overwriting is allowed when a merge node is used in current block. |
||
226 | if ($allowOverwrite || !isset($data[$key])) { |
||
227 | $data[$key] = $value; |
||
228 | } |
||
229 | } |
||
230 | } else { |
||
231 | $value = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport, $objectForMap, $context); |
||
232 | // Spec: Keys MUST be unique; first one wins. |
||
233 | // But overwriting is allowed when a merge node is used in current block. |
||
234 | if ($allowOverwrite || !isset($data[$key])) { |
||
235 | $data[$key] = $value; |
||
236 | } |
||
237 | } |
||
238 | if ($isRef) { |
||
239 | $this->refs[$isRef] = $data[$key]; |
||
240 | } |
||
241 | } else { |
||
242 | // multiple documents are not supported |
||
243 | if ('---' === $this->currentLine) { |
||
244 | throw new ParseException('Multiple documents are not supported.'); |
||
245 | } |
||
246 | |||
247 | // 1-liner optionally followed by newline(s) |
||
248 | if (is_string($value) && $this->lines[0] === trim($value)) { |
||
249 | try { |
||
250 | $value = Inline::parse($this->lines[0], $exceptionOnInvalidType, $objectSupport, $objectForMap, $this->refs); |
||
251 | } catch (ParseException $e) { |
||
252 | $e->setParsedLine($this->getRealCurrentLineNb() + 1); |
||
253 | $e->setSnippet($this->currentLine); |
||
254 | |||
255 | throw $e; |
||
256 | } |
||
257 | |||
258 | if (is_array($value)) { |
||
259 | $first = reset($value); |
||
260 | if (is_string($first) && 0 === strpos($first, '*')) { |
||
261 | $data = array(); |
||
262 | foreach ($value as $alias) { |
||
263 | $data[] = $this->refs[substr($alias, 1)]; |
||
264 | } |
||
265 | $value = $data; |
||
266 | } |
||
267 | } |
||
268 | |||
269 | if (isset($mbEncoding)) { |
||
270 | mb_internal_encoding($mbEncoding); |
||
271 | } |
||
272 | |||
273 | return $value; |
||
274 | } |
||
275 | |||
276 | switch (preg_last_error()) { |
||
277 | case PREG_INTERNAL_ERROR: |
||
278 | $error = 'Internal PCRE error.'; |
||
279 | break; |
||
280 | case PREG_BACKTRACK_LIMIT_ERROR: |
||
281 | $error = 'pcre.backtrack_limit reached.'; |
||
282 | break; |
||
283 | case PREG_RECURSION_LIMIT_ERROR: |
||
284 | $error = 'pcre.recursion_limit reached.'; |
||
285 | break; |
||
286 | case PREG_BAD_UTF8_ERROR: |
||
287 | $error = 'Malformed UTF-8 data.'; |
||
288 | break; |
||
289 | case PREG_BAD_UTF8_OFFSET_ERROR: |
||
290 | $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.'; |
||
291 | break; |
||
292 | default: |
||
293 | $error = 'Unable to parse.'; |
||
294 | } |
||
295 | |||
296 | throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
297 | } |
||
298 | } |
||
299 | |||
300 | if (isset($mbEncoding)) { |
||
301 | mb_internal_encoding($mbEncoding); |
||
302 | } |
||
303 | |||
304 | if ($objectForMap && !is_object($data)) { |
||
305 | $data = (object) $data; |
||
306 | } |
||
307 | |||
308 | return empty($data) ? null : $data; |
||
309 | } |
||
310 | |||
311 | /** |
||
312 | * Returns the current line number (takes the offset into account). |
||
313 | * |
||
314 | * @return int The current line number |
||
315 | */ |
||
316 | private function getRealCurrentLineNb() |
||
317 | { |
||
318 | return $this->currentLineNb + $this->offset; |
||
319 | } |
||
320 | |||
321 | /** |
||
322 | * Returns the current line indentation. |
||
323 | * |
||
324 | * @return int The current line indentation |
||
325 | */ |
||
326 | private function getCurrentLineIndentation() |
||
327 | { |
||
328 | return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' ')); |
||
329 | } |
||
330 | |||
331 | /** |
||
332 | * Returns the next embed block of YAML. |
||
333 | * |
||
334 | * @param int $indentation The indent level at which the block is to be read, or null for default |
||
335 | * @param bool $inSequence True if the enclosing data structure is a sequence |
||
336 | * |
||
337 | * @return string A YAML string |
||
338 | * |
||
339 | * @throws ParseException When indentation problem are detected |
||
340 | */ |
||
341 | private function getNextEmbedBlock($indentation = null, $inSequence = false) |
||
342 | { |
||
343 | $oldLineIndentation = $this->getCurrentLineIndentation(); |
||
344 | $blockScalarIndentations = array(); |
||
345 | |||
346 | if ($this->isBlockScalarHeader()) { |
||
347 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
348 | } |
||
349 | |||
350 | if (!$this->moveToNextLine()) { |
||
351 | return; |
||
352 | } |
||
353 | |||
354 | if (null === $indentation) { |
||
355 | $newIndent = $this->getCurrentLineIndentation(); |
||
356 | |||
357 | $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem(); |
||
358 | |||
359 | if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) { |
||
360 | throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
361 | } |
||
362 | } else { |
||
363 | $newIndent = $indentation; |
||
364 | } |
||
365 | |||
366 | $data = array(); |
||
367 | if ($this->getCurrentLineIndentation() >= $newIndent) { |
||
368 | $data[] = substr($this->currentLine, $newIndent); |
||
369 | } else { |
||
370 | $this->moveToPreviousLine(); |
||
371 | |||
372 | return; |
||
373 | } |
||
374 | |||
375 | if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && '-' === $data[0][0]) { |
||
376 | // the previous line contained a dash but no item content, this line is a sequence item with the same indentation |
||
377 | // and therefore no nested list or mapping |
||
378 | $this->moveToPreviousLine(); |
||
379 | |||
380 | return; |
||
381 | } |
||
382 | |||
383 | $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem(); |
||
384 | |||
385 | if (empty($blockScalarIndentations) && $this->isBlockScalarHeader()) { |
||
386 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
387 | } |
||
388 | |||
389 | $previousLineIndentation = $this->getCurrentLineIndentation(); |
||
390 | |||
391 | while ($this->moveToNextLine()) { |
||
392 | $indent = $this->getCurrentLineIndentation(); |
||
393 | |||
394 | // terminate all block scalars that are more indented than the current line |
||
395 | if (!empty($blockScalarIndentations) && $indent < $previousLineIndentation && trim($this->currentLine) !== '') { |
||
396 | foreach ($blockScalarIndentations as $key => $blockScalarIndentation) { |
||
397 | if ($blockScalarIndentation >= $this->getCurrentLineIndentation()) { |
||
398 | unset($blockScalarIndentations[$key]); |
||
399 | } |
||
400 | } |
||
401 | } |
||
402 | |||
403 | if (empty($blockScalarIndentations) && !$this->isCurrentLineComment() && $this->isBlockScalarHeader()) { |
||
404 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
405 | } |
||
406 | |||
407 | $previousLineIndentation = $indent; |
||
408 | |||
409 | if ($isItUnindentedCollection && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) { |
||
410 | $this->moveToPreviousLine(); |
||
411 | break; |
||
412 | } |
||
413 | |||
414 | if ($this->isCurrentLineBlank()) { |
||
415 | $data[] = substr($this->currentLine, $newIndent); |
||
416 | continue; |
||
417 | } |
||
418 | |||
419 | // we ignore "comment" lines only when we are not inside a scalar block |
||
420 | if (empty($blockScalarIndentations) && $this->isCurrentLineComment()) { |
||
421 | continue; |
||
422 | } |
||
423 | |||
424 | if ($indent >= $newIndent) { |
||
425 | $data[] = substr($this->currentLine, $newIndent); |
||
426 | } elseif (0 == $indent) { |
||
427 | $this->moveToPreviousLine(); |
||
428 | |||
429 | break; |
||
430 | } else { |
||
431 | throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
432 | } |
||
433 | } |
||
434 | |||
435 | return implode("\n", $data); |
||
436 | } |
||
437 | |||
438 | /** |
||
439 | * Moves the parser to the next line. |
||
440 | * |
||
441 | * @return bool |
||
442 | */ |
||
443 | private function moveToNextLine() |
||
444 | { |
||
445 | if ($this->currentLineNb >= count($this->lines) - 1) { |
||
446 | return false; |
||
447 | } |
||
448 | |||
449 | $this->currentLine = $this->lines[++$this->currentLineNb]; |
||
450 | |||
451 | return true; |
||
452 | } |
||
453 | |||
454 | /** |
||
455 | * Moves the parser to the previous line. |
||
456 | */ |
||
457 | private function moveToPreviousLine() |
||
458 | { |
||
459 | $this->currentLine = $this->lines[--$this->currentLineNb]; |
||
460 | } |
||
461 | |||
462 | /** |
||
463 | * Parses a YAML value. |
||
464 | * |
||
465 | * @param string $value A YAML value |
||
466 | * @param bool $exceptionOnInvalidType True if an exception must be thrown on invalid types false otherwise |
||
467 | * @param bool $objectSupport True if object support is enabled, false otherwise |
||
468 | * @param bool $objectForMap true if maps should return a stdClass instead of array() |
||
469 | * @param string $context The parser context (either sequence or mapping) |
||
470 | * |
||
471 | * @return mixed A PHP value |
||
472 | * |
||
473 | * @throws ParseException When reference does not exist |
||
474 | */ |
||
475 | private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $objectForMap, $context) |
||
476 | { |
||
477 | if (0 === strpos($value, '*')) { |
||
478 | if (false !== $pos = strpos($value, '#')) { |
||
479 | $value = substr($value, 1, $pos - 2); |
||
480 | } else { |
||
481 | $value = substr($value, 1); |
||
482 | } |
||
483 | |||
484 | if (!array_key_exists($value, $this->refs)) { |
||
485 | throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLine); |
||
486 | } |
||
487 | |||
488 | return $this->refs[$value]; |
||
489 | } |
||
490 | |||
491 | if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { |
||
492 | $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : ''; |
||
493 | |||
494 | return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers)); |
||
495 | } |
||
496 | |||
497 | try { |
||
498 | $parsedValue = Inline::parse($value, $exceptionOnInvalidType, $objectSupport, $objectForMap, $this->refs); |
||
499 | |||
500 | if ('mapping' === $context && '"' !== $value[0] && "'" !== $value[0] && '[' !== $value[0] && '{' !== $value[0] && '!' !== $value[0] && false !== strpos($parsedValue, ': ')) { |
||
501 | throw new ParseException('A colon cannot be used in an unquoted mapping value.'); |
||
502 | } |
||
503 | |||
504 | return $parsedValue; |
||
505 | } catch (ParseException $e) { |
||
506 | $e->setParsedLine($this->getRealCurrentLineNb() + 1); |
||
507 | $e->setSnippet($this->currentLine); |
||
508 | |||
509 | throw $e; |
||
510 | } |
||
511 | } |
||
512 | |||
513 | /** |
||
514 | * Parses a block scalar. |
||
515 | * |
||
516 | * @param string $style The style indicator that was used to begin this block scalar (| or >) |
||
517 | * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -) |
||
518 | * @param int $indentation The indentation indicator that was used to begin this block scalar |
||
519 | * |
||
520 | * @return string The text value |
||
521 | */ |
||
522 | private function parseBlockScalar($style, $chomping = '', $indentation = 0) |
||
523 | { |
||
524 | $notEOF = $this->moveToNextLine(); |
||
525 | if (!$notEOF) { |
||
526 | return ''; |
||
527 | } |
||
528 | |||
529 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
530 | $blockLines = array(); |
||
531 | |||
532 | // leading blank lines are consumed before determining indentation |
||
533 | while ($notEOF && $isCurrentLineBlank) { |
||
534 | // newline only if not EOF |
||
535 | if ($notEOF = $this->moveToNextLine()) { |
||
536 | $blockLines[] = ''; |
||
537 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
538 | } |
||
539 | } |
||
540 | |||
541 | // determine indentation if not specified |
||
542 | if (0 === $indentation) { |
||
543 | if (preg_match('/^ +/', $this->currentLine, $matches)) { |
||
544 | $indentation = strlen($matches[0]); |
||
545 | } |
||
546 | } |
||
547 | |||
548 | if ($indentation > 0) { |
||
549 | $pattern = sprintf('/^ {%d}(.*)$/', $indentation); |
||
550 | |||
551 | while ( |
||
552 | $notEOF && ( |
||
553 | $isCurrentLineBlank || |
||
554 | preg_match($pattern, $this->currentLine, $matches) |
||
555 | ) |
||
556 | ) { |
||
557 | if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { |
||
558 | $blockLines[] = substr($this->currentLine, $indentation); |
||
559 | } elseif ($isCurrentLineBlank) { |
||
560 | $blockLines[] = ''; |
||
561 | } else { |
||
562 | $blockLines[] = $matches[1]; |
||
563 | } |
||
564 | |||
565 | // newline only if not EOF |
||
566 | if ($notEOF = $this->moveToNextLine()) { |
||
567 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
568 | } |
||
569 | } |
||
570 | } elseif ($notEOF) { |
||
571 | $blockLines[] = ''; |
||
572 | } |
||
573 | |||
574 | if ($notEOF) { |
||
575 | $blockLines[] = ''; |
||
576 | $this->moveToPreviousLine(); |
||
577 | } |
||
578 | |||
579 | // folded style |
||
580 | if ('>' === $style) { |
||
581 | $text = ''; |
||
582 | $previousLineIndented = false; |
||
583 | $previousLineBlank = false; |
||
584 | |||
585 | for ($i = 0; $i < count($blockLines); ++$i) { |
||
586 | if ('' === $blockLines[$i]) { |
||
587 | $text .= "\n"; |
||
588 | $previousLineIndented = false; |
||
589 | $previousLineBlank = true; |
||
590 | } elseif (' ' === $blockLines[$i][0]) { |
||
591 | $text .= "\n".$blockLines[$i]; |
||
592 | $previousLineIndented = true; |
||
593 | $previousLineBlank = false; |
||
594 | } elseif ($previousLineIndented) { |
||
595 | $text .= "\n".$blockLines[$i]; |
||
596 | $previousLineIndented = false; |
||
597 | $previousLineBlank = false; |
||
598 | } elseif ($previousLineBlank || 0 === $i) { |
||
599 | $text .= $blockLines[$i]; |
||
600 | $previousLineIndented = false; |
||
601 | $previousLineBlank = false; |
||
602 | } else { |
||
603 | $text .= ' '.$blockLines[$i]; |
||
604 | $previousLineIndented = false; |
||
605 | $previousLineBlank = false; |
||
606 | } |
||
607 | } |
||
608 | } else { |
||
609 | $text = implode("\n", $blockLines); |
||
610 | } |
||
611 | |||
612 | // deal with trailing newlines |
||
613 | if ('' === $chomping) { |
||
614 | $text = preg_replace('/\n+$/', "\n", $text); |
||
615 | } elseif ('-' === $chomping) { |
||
616 | $text = preg_replace('/\n+$/', '', $text); |
||
617 | } |
||
618 | |||
619 | return $text; |
||
620 | } |
||
621 | |||
622 | /** |
||
623 | * Returns true if the next line is indented. |
||
624 | * |
||
625 | * @return bool Returns true if the next line is indented, false otherwise |
||
626 | */ |
||
627 | private function isNextLineIndented() |
||
628 | { |
||
629 | $currentIndentation = $this->getCurrentLineIndentation(); |
||
630 | $EOF = !$this->moveToNextLine(); |
||
631 | |||
632 | while (!$EOF && $this->isCurrentLineEmpty()) { |
||
633 | $EOF = !$this->moveToNextLine(); |
||
634 | } |
||
635 | |||
636 | if ($EOF) { |
||
637 | return false; |
||
638 | } |
||
639 | |||
640 | $ret = false; |
||
641 | if ($this->getCurrentLineIndentation() > $currentIndentation) { |
||
642 | $ret = true; |
||
643 | } |
||
644 | |||
645 | $this->moveToPreviousLine(); |
||
646 | |||
647 | return $ret; |
||
648 | } |
||
649 | |||
650 | /** |
||
651 | * Returns true if the current line is blank or if it is a comment line. |
||
652 | * |
||
653 | * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise |
||
654 | */ |
||
655 | private function isCurrentLineEmpty() |
||
656 | { |
||
657 | return $this->isCurrentLineBlank() || $this->isCurrentLineComment(); |
||
658 | } |
||
659 | |||
660 | /** |
||
661 | * Returns true if the current line is blank. |
||
662 | * |
||
663 | * @return bool Returns true if the current line is blank, false otherwise |
||
664 | */ |
||
665 | private function isCurrentLineBlank() |
||
666 | { |
||
667 | return '' == trim($this->currentLine, ' '); |
||
668 | } |
||
669 | |||
670 | /** |
||
671 | * Returns true if the current line is a comment line. |
||
672 | * |
||
673 | * @return bool Returns true if the current line is a comment line, false otherwise |
||
674 | */ |
||
675 | private function isCurrentLineComment() |
||
676 | { |
||
677 | //checking explicitly the first char of the trim is faster than loops or strpos |
||
678 | $ltrimmedLine = ltrim($this->currentLine, ' '); |
||
679 | |||
680 | return '' !== $ltrimmedLine && $ltrimmedLine[0] === '#'; |
||
681 | } |
||
682 | |||
683 | /** |
||
684 | * Cleanups a YAML string to be parsed. |
||
685 | * |
||
686 | * @param string $value The input YAML string |
||
687 | * |
||
688 | * @return string A cleaned up YAML string |
||
689 | */ |
||
690 | private function cleanup($value) |
||
691 | { |
||
692 | $value = str_replace(array("\r\n", "\r"), "\n", $value); |
||
693 | |||
694 | // strip YAML header |
||
695 | $count = 0; |
||
696 | $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count); |
||
697 | $this->offset += $count; |
||
698 | |||
699 | // remove leading comments |
||
700 | $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count); |
||
701 | if ($count == 1) { |
||
702 | // items have been removed, update the offset |
||
703 | $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); |
||
704 | $value = $trimmedValue; |
||
705 | } |
||
706 | |||
707 | // remove start of the document marker (---) |
||
708 | $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count); |
||
709 | if ($count == 1) { |
||
710 | // items have been removed, update the offset |
||
711 | $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); |
||
712 | $value = $trimmedValue; |
||
713 | |||
714 | // remove end of the document marker (...) |
||
715 | $value = preg_replace('#\.\.\.\s*$#', '', $value); |
||
716 | } |
||
717 | |||
718 | return $value; |
||
719 | } |
||
720 | |||
721 | /** |
||
722 | * Returns true if the next line starts unindented collection. |
||
723 | * |
||
724 | * @return bool Returns true if the next line starts unindented collection, false otherwise |
||
725 | */ |
||
726 | private function isNextLineUnIndentedCollection() |
||
727 | { |
||
728 | $currentIndentation = $this->getCurrentLineIndentation(); |
||
729 | $notEOF = $this->moveToNextLine(); |
||
730 | |||
731 | while ($notEOF && $this->isCurrentLineEmpty()) { |
||
732 | $notEOF = $this->moveToNextLine(); |
||
733 | } |
||
734 | |||
735 | if (false === $notEOF) { |
||
736 | return false; |
||
737 | } |
||
738 | |||
739 | $ret = false; |
||
740 | if ( |
||
741 | $this->getCurrentLineIndentation() == $currentIndentation |
||
742 | && |
||
743 | $this->isStringUnIndentedCollectionItem() |
||
744 | ) { |
||
745 | $ret = true; |
||
746 | } |
||
747 | |||
748 | $this->moveToPreviousLine(); |
||
749 | |||
750 | return $ret; |
||
751 | } |
||
752 | |||
753 | /** |
||
754 | * Returns true if the string is un-indented collection item. |
||
755 | * |
||
756 | * @return bool Returns true if the string is un-indented collection item, false otherwise |
||
757 | */ |
||
758 | private function isStringUnIndentedCollectionItem() |
||
759 | { |
||
760 | return 0 === strpos($this->currentLine, '- '); |
||
761 | } |
||
762 | |||
763 | /** |
||
764 | * Tests whether or not the current line is the header of a block scalar. |
||
765 | * |
||
766 | * @return bool |
||
767 | */ |
||
768 | private function isBlockScalarHeader() |
||
769 | { |
||
770 | return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine); |
||
771 | } |
||
772 | } |
||
773 |