Total Complexity | 82 |
Total Lines | 443 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like XliffParserV2 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use XliffParserV2, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
14 | class XliffParserV2 extends AbstractXliffParser |
||
15 | { |
||
16 | /** |
||
17 | * @inheritDoc |
||
18 | * @throws \Exception |
||
19 | */ |
||
20 | public function parse( DOMDocument $dom, $output = []) |
||
21 | { |
||
22 | $i = 1; |
||
23 | /** @var DOMElement $file */ |
||
24 | foreach ($dom->getElementsByTagName('file') as $file) { |
||
25 | |||
26 | // metadata |
||
27 | $output[ 'files' ][ $i ][ 'attr' ] = $this->extractMetadata($dom); |
||
28 | |||
29 | // notes |
||
30 | $output[ 'files' ][ $i ]['notes'] = $this->extractNotes($file); |
||
31 | |||
32 | // trans-units |
||
33 | $transUnitIdArrayForUniquenessCheck = []; |
||
34 | $j = 1; |
||
35 | /** @var DOMElement $transUnit */ |
||
36 | foreach ($file->childNodes as $childNode) { |
||
37 | $this->extractTuFromNode($childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j); |
||
38 | } |
||
39 | |||
40 | // trans-unit re-count check |
||
41 | $totalTransUnitsId = count($transUnitIdArrayForUniquenessCheck); |
||
42 | $transUnitsUniqueId = count(array_unique($transUnitIdArrayForUniquenessCheck)); |
||
43 | if ($totalTransUnitsId != $transUnitsUniqueId) { |
||
44 | throw new DuplicateTransUnitIdInXliff("Invalid trans-unit id, duplicate found.", 400); |
||
45 | } |
||
46 | |||
47 | $i++; |
||
48 | } |
||
49 | |||
50 | return $output; |
||
51 | } |
||
52 | |||
53 | /** |
||
54 | * @param DOMDocument $dom |
||
55 | * |
||
56 | * @return array |
||
57 | */ |
||
58 | private function extractMetadata( DOMDocument $dom) |
||
59 | { |
||
60 | $metadata = []; |
||
61 | |||
62 | $xliffNode = $dom->getElementsByTagName('xliff')->item(0); |
||
63 | $fileNode = $dom->getElementsByTagName('file')->item(0); |
||
64 | |||
65 | // original |
||
66 | $metadata[ 'original' ] = (null !== $fileNode->attributes->getNamedItem('original')) ? $fileNode->attributes->getNamedItem('original')->nodeValue : 'no-name'; |
||
|
|||
67 | |||
68 | // source-language |
||
69 | $metadata[ 'source-language' ] = (null !== $xliffNode->attributes->getNamedItem('srcLang')) ? $xliffNode->attributes->getNamedItem('srcLang')->nodeValue : 'en-US'; |
||
70 | |||
71 | // datatype |
||
72 | // @TODO to be implemented |
||
73 | |||
74 | // target-language |
||
75 | $metadata[ 'target-language' ] = (null !== $xliffNode->attributes->getNamedItem('trgLang')) ? $xliffNode->attributes->getNamedItem('trgLang')->nodeValue : 'en-US'; |
||
76 | |||
77 | // custom MateCat x-attribute |
||
78 | // @TODO to be implemented |
||
79 | |||
80 | return $metadata; |
||
81 | } |
||
82 | |||
83 | /** |
||
84 | * @param DOMElement $file |
||
85 | * |
||
86 | * @return array |
||
87 | * @throws \Exception |
||
88 | */ |
||
89 | private function extractNotes( DOMElement $file) |
||
106 | } |
||
107 | |||
108 | /** |
||
109 | * Extract and populate 'trans-units' array |
||
110 | * |
||
111 | * @param $transUnit |
||
112 | * @param $transUnitIdArrayForUniquenessCheck |
||
113 | * @param $dom |
||
114 | * @param $output |
||
115 | * @param $i |
||
116 | * @param $j |
||
117 | * |
||
118 | * @throws \Exception |
||
119 | */ |
||
120 | protected function extractTransUnit($transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j) |
||
121 | { |
||
122 | // metadata |
||
123 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata($transUnit, $transUnitIdArrayForUniquenessCheck); |
||
124 | |||
125 | // notes |
||
126 | // merge <notes> with key and key-note contained in metadata <mda:metaGroup> |
||
127 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] = $this->extractTransUnitNotes($transUnit); |
||
128 | |||
129 | // uuid |
||
130 | foreach ($output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] as $note){ |
||
131 | if(isset($note['raw-content']) && Strings::isAValidUuid($note['raw-content'])){ |
||
132 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ]['uuid'] = $note['raw-content']; |
||
133 | } |
||
134 | } |
||
135 | |||
136 | // original-data (exclusive for V2) |
||
137 | // http://docs.oasis-open.org/xliff/xliff-core/v2.0/xliff-core-v2.0.html#originaldata |
||
138 | $originalData = $this->extractTransUnitOriginalData($transUnit); |
||
139 | if (!empty($originalData)) { |
||
140 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'original-data' ] = $originalData; |
||
141 | $dataRefMap = $this->getDataRefMap($originalData); |
||
142 | } |
||
143 | |||
144 | // additionalTagData (exclusive for V2) |
||
145 | $additionalTagData = $this->extractTransUnitAdditionalTagData($transUnit); |
||
146 | if (!empty($additionalTagData)) { |
||
147 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'additional-tag-data' ] = $additionalTagData; |
||
148 | } |
||
149 | |||
150 | // content |
||
151 | |||
152 | $source = [ |
||
153 | 'attr' => [], |
||
154 | 'raw-content' => [], |
||
155 | ]; |
||
156 | |||
157 | $target = [ |
||
158 | 'attr' => [], |
||
159 | 'raw-content' => [], |
||
160 | ]; |
||
161 | |||
162 | $segSource = []; |
||
163 | $segTarget = []; |
||
164 | |||
165 | /** @var DOMElement $segment */ |
||
166 | $c = 0; |
||
167 | foreach ($transUnit->childNodes as $segment) { |
||
168 | if ($segment->nodeName === 'segment') { |
||
169 | |||
170 | // check segment id consistency |
||
171 | $attr = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ]; |
||
172 | $this->checkSegmentIdConsistency($segment, $attr); |
||
173 | |||
174 | // loop <segment> to get nested <source> and <target> tag |
||
175 | foreach ($segment->childNodes as $childNode) { |
||
176 | if ($childNode->nodeName === 'source') { |
||
177 | $extractedSource = $this->extractContent($dom, $childNode); |
||
178 | $source['raw-content'][$c] = $extractedSource['raw-content']; |
||
179 | |||
180 | if (!empty($originalData)) { |
||
181 | $source['replaced-content'][$c] = (new DataRefReplacer($dataRefMap))->replace($source['raw-content'][$c]); |
||
182 | } |
||
183 | |||
184 | if (!empty($extractedSource['attr'])) { |
||
185 | $source['attr'][$c] = $extractedSource['attr']; |
||
186 | } |
||
187 | |||
188 | // append value to 'seg-source' |
||
189 | if ($this->stringContainsMarks($extractedSource['raw-content'])) { |
||
190 | $segSource = $this->extractContentWithMarksAndExtTags($dom, $childNode, $extractedSource['raw-content'], $originalData); |
||
191 | } else { |
||
192 | $segSource[] = [ |
||
193 | 'mid' => count($segSource) > 0 ? count($segSource) : 0, |
||
194 | 'ext-prec-tags' => '', |
||
195 | 'raw-content' => $extractedSource['raw-content'], |
||
196 | 'replaced-content' => (!empty($originalData)) ? (new DataRefReplacer($dataRefMap))->replace($extractedSource['raw-content']) : null, |
||
197 | 'ext-succ-tags' => '', |
||
198 | ]; |
||
199 | } |
||
200 | } |
||
201 | |||
202 | if ($childNode->nodeName === 'target') { |
||
203 | $extractedTarget = $this->extractContent($dom, $childNode); |
||
204 | $target['raw-content'][$c] = $extractedTarget['raw-content']; |
||
205 | |||
206 | if (!empty($originalData)) { |
||
207 | $target['replaced-content'][$c] = (new DataRefReplacer($dataRefMap))->replace($target['raw-content'][$c]); |
||
208 | } |
||
209 | |||
210 | if (!empty($extractedTarget['attr'])) { |
||
211 | $target['attr'][$c] = $extractedTarget['attr']; |
||
212 | } |
||
213 | |||
214 | // append value to 'seg-target' |
||
215 | if ($this->stringContainsMarks($extractedTarget['raw-content'])) { |
||
216 | $segTarget = $this->extractContentWithMarksAndExtTags($dom, $childNode, $extractedTarget['raw-content'], $originalData); |
||
217 | } else { |
||
218 | $segTarget[] = [ |
||
219 | 'mid' => count($segTarget) > 0 ? count($segTarget) : 0, |
||
220 | 'ext-prec-tags' => '', |
||
221 | 'raw-content' => $extractedTarget['raw-content'], |
||
222 | 'replaced-content' => (!empty($originalData)) ? (new DataRefReplacer($dataRefMap))->replace($extractedTarget['raw-content']) : null, |
||
223 | 'ext-succ-tags' => '', |
||
224 | ]; |
||
225 | } |
||
226 | } |
||
227 | } |
||
228 | |||
229 | $c++; |
||
230 | } |
||
231 | } |
||
232 | |||
233 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ] = $source; |
||
234 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ] = $target; |
||
235 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] = $segSource; |
||
236 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ] = $segTarget; |
||
237 | |||
238 | $j++; |
||
239 | } |
||
240 | |||
241 | /** |
||
242 | * @param DOMElement $transUnit |
||
243 | * @param $transUnitIdArrayForUniquenessCheck |
||
244 | * |
||
245 | * @return array |
||
246 | */ |
||
247 | private function extractTransUnitMetadata( DOMElement $transUnit, &$transUnitIdArrayForUniquenessCheck) |
||
248 | { |
||
249 | $metadata = []; |
||
250 | |||
251 | // id |
||
252 | if (null === $transUnit->attributes->getNamedItem('id')) { |
||
253 | throw new NotFoundIdInTransUnit('Invalid trans-unit id found. EMPTY value', 400); |
||
254 | } |
||
255 | |||
256 | $id = $transUnit->attributes->getNamedItem('id')->nodeValue; |
||
257 | |||
258 | if(strlen($id) > 100){ |
||
259 | throw new SegmentIdTooLongException('Segment-id too long. Max 100 characters allowed', 400); |
||
260 | } |
||
261 | |||
262 | $transUnitIdArrayForUniquenessCheck[] = $id; |
||
263 | $metadata[ 'id' ] = $id; |
||
264 | |||
265 | // translate |
||
266 | if (null !== $transUnit->attributes->getNamedItem('translate')) { |
||
267 | $metadata[ 'translate' ] = $transUnit->attributes->getNamedItem('translate')->nodeValue; |
||
268 | } |
||
269 | |||
270 | // tGroupBegin |
||
271 | if (null !== $transUnit->attributes->getNamedItem('tGroupBegin')) { |
||
272 | $metadata[ 'tGroupBegin' ] = $transUnit->attributes->getNamedItem('tGroupBegin')->nodeValue; |
||
273 | } |
||
274 | |||
275 | // tGroupEnd |
||
276 | if (null !== $transUnit->attributes->getNamedItem('tGroupEnd')) { |
||
277 | $metadata[ 'tGroupEnd' ] = $transUnit->attributes->getNamedItem('tGroupEnd')->nodeValue; |
||
278 | } |
||
279 | |||
280 | // sizeRestriction |
||
281 | if (null !== $transUnit->attributes->getNamedItem('sizeRestriction') && '' !== $transUnit->attributes->getNamedItem('sizeRestriction')->nodeValue ) { |
||
282 | $metadata[ 'sizeRestriction' ] = (int)$transUnit->attributes->getNamedItem('sizeRestriction')->nodeValue; |
||
283 | } |
||
284 | |||
285 | return $metadata; |
||
286 | } |
||
287 | |||
288 | /** |
||
289 | * @param DOMElement $transUnit |
||
290 | * |
||
291 | * @return array |
||
292 | * @throws \Exception |
||
293 | */ |
||
294 | private function extractTransUnitOriginalData( DOMElement $transUnit) |
||
295 | { |
||
296 | $originalData = []; |
||
297 | |||
298 | // loop <originalData> to get nested content |
||
299 | foreach ($transUnit->childNodes as $childNode) { |
||
300 | if ($childNode->nodeName === 'originalData') { |
||
301 | foreach ($childNode->childNodes as $data) { |
||
302 | if (null!== $data->attributes && null !== $data->attributes->getNamedItem('id')) { |
||
303 | $dataId = $data->attributes->getNamedItem('id')->nodeValue; |
||
304 | |||
305 | $dataValue = str_replace(Placeholder::WHITE_SPACE_PLACEHOLDER, ' ', $data->nodeValue); |
||
306 | $dataValue = str_replace(Placeholder::NEW_LINE_PLACEHOLDER,'\n', $dataValue); |
||
307 | $dataValue = str_replace(Placeholder::TAB_PLACEHOLDER, '\t', $dataValue); |
||
308 | |||
309 | if ('' !== $dataValue) { |
||
310 | |||
311 | $jsonOrRawContentArray = $this->JSONOrRawContentArray($dataValue, false); |
||
312 | |||
313 | // restore xliff tags |
||
314 | if (isset($jsonOrRawContentArray['json'])){ |
||
315 | $jsonOrRawContentArray['json'] = str_replace([Placeholder::LT_PLACEHOLDER, Placeholder::GT_PLACEHOLDER], ['<','>'], $jsonOrRawContentArray['json']); |
||
316 | } |
||
317 | |||
318 | if (isset($jsonOrRawContentArray['raw-content'])){ |
||
319 | $jsonOrRawContentArray['raw-content'] = str_replace([Placeholder::LT_PLACEHOLDER, Placeholder::GT_PLACEHOLDER], ['<','>'], $jsonOrRawContentArray['raw-content']); |
||
320 | } |
||
321 | |||
322 | $originalData[] = array_merge( |
||
323 | $jsonOrRawContentArray, |
||
324 | [ |
||
325 | 'attr' => [ |
||
326 | 'id' => $dataId |
||
327 | ] |
||
328 | ] |
||
329 | ); |
||
330 | } |
||
331 | } |
||
332 | } |
||
333 | } |
||
334 | } |
||
335 | |||
336 | return $originalData; |
||
337 | } |
||
338 | |||
339 | /** |
||
340 | * @param DOMElement $transUnit |
||
341 | * |
||
342 | * @return array |
||
343 | */ |
||
344 | private function extractTransUnitAdditionalTagData( DOMElement $transUnit) |
||
345 | { |
||
346 | $additionalTagData = []; |
||
347 | |||
348 | // loop <originalData> to get nested content |
||
349 | foreach ($transUnit->childNodes as $childNode) { |
||
350 | if ($childNode->nodeName === 'memsource:additionalTagData') { |
||
351 | foreach ($childNode->childNodes as $data) { |
||
352 | $dataArray = []; |
||
353 | |||
354 | // id |
||
355 | if ($data->nodeName === 'memsource:tag') { |
||
356 | if (null!== $data->attributes && null !== $data->attributes->getNamedItem('id')) { |
||
357 | $dataId = $data->attributes->getNamedItem('id')->nodeValue; |
||
358 | $dataArray['attr']['id'] = $dataId; |
||
359 | } |
||
360 | } |
||
361 | |||
362 | // in PHP 7.4 $data->childNodes is an empty DomNodeList, it is iterable with size 0 |
||
363 | // PHP 5.6 check: in php 5.6 $data->childNodes can be null |
||
364 | if( $data->childNodes != null ){ |
||
365 | |||
366 | // content |
||
367 | foreach ($data->childNodes as $datum) { |
||
368 | if ($datum->nodeName === 'memsource:tagId') { |
||
369 | $dataArray['raw-content']['tagId'] = $datum->nodeValue; |
||
370 | } |
||
371 | |||
372 | if ($datum->nodeName === 'memsource:type') { |
||
373 | $dataArray['raw-content']['type'] = $datum->nodeValue; |
||
374 | } |
||
375 | } |
||
376 | |||
377 | } |
||
378 | |||
379 | if (!empty($dataArray)) { |
||
380 | $additionalTagData[] = $dataArray; |
||
381 | } |
||
382 | } |
||
383 | } |
||
384 | } |
||
385 | |||
386 | return $additionalTagData; |
||
387 | } |
||
388 | |||
389 | /** |
||
390 | * Check if segment id is present within tGroupBegin and tGroupEnd attributes |
||
391 | * |
||
392 | * @param DOMElement $segment |
||
393 | * @param array $attr |
||
394 | */ |
||
395 | private function checkSegmentIdConsistency( DOMElement $segment, array $attr) |
||
396 | { |
||
397 | if (isset($attr[ 'tGroupBegin' ]) && isset($attr[ 'tGroupEnd' ]) && $segment->attributes->getNamedItem('id')) { |
||
398 | $id = $segment->attributes->getNamedItem('id')->nodeValue; |
||
399 | $min = (int)$attr[ 'tGroupBegin' ]; |
||
400 | $max = (int)$attr[ 'tGroupEnd' ]; |
||
401 | |||
402 | if (false === (($min <= $id) && ($id <= $max))) { |
||
403 | if ($this->logger) { |
||
404 | $this->logger->warning('Segment #' . $id . ' is not included within tGroupBegin and tGroupEnd'); |
||
405 | } |
||
406 | } |
||
407 | } |
||
408 | } |
||
409 | |||
410 | /** |
||
411 | * @param DOMElement $transUnit |
||
412 | * |
||
413 | * @return array |
||
414 | * @throws \Exception |
||
415 | */ |
||
416 | private function extractTransUnitNotes( DOMElement $transUnit) |
||
457 | } |
||
458 | } |
||
459 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.