Total Complexity | 78 |
Total Lines | 425 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like XliffParserV2 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use XliffParserV2, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
14 | class XliffParserV2 extends AbstractXliffParser { |
||
15 | /** |
||
16 | * @inheritDoc |
||
17 | * @throws Exception |
||
18 | */ |
||
19 | public function parse( DOMDocument $dom, ?array $output = [] ): array { |
||
20 | $i = 1; |
||
21 | /** @var DOMElement $file */ |
||
22 | foreach ( $dom->getElementsByTagName( 'file' ) as $file ) { |
||
23 | |||
24 | // metadata |
||
25 | $output[ 'files' ][ $i ][ 'attr' ] = $this->extractMetadata( $dom ); |
||
26 | |||
27 | // notes |
||
28 | $output[ 'files' ][ $i ][ 'notes' ] = $this->extractNotes( $file ); |
||
29 | |||
30 | // trans-units |
||
31 | $transUnitIdArrayForUniquenessCheck = []; |
||
32 | $j = 1; |
||
33 | /** @var DOMElement $transUnit */ |
||
34 | foreach ( $file->childNodes as $childNode ) { |
||
35 | $this->extractTuFromNode( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j ); |
||
36 | } |
||
37 | |||
38 | // trans-unit re-count check |
||
39 | $totalTransUnitsId = count( $transUnitIdArrayForUniquenessCheck ); |
||
40 | $transUnitsUniqueId = count( array_unique( $transUnitIdArrayForUniquenessCheck ) ); |
||
41 | if ( $totalTransUnitsId != $transUnitsUniqueId ) { |
||
42 | throw new DuplicateTransUnitIdInXliff( "Invalid trans-unit id, duplicate found.", 400 ); |
||
43 | } |
||
44 | |||
45 | $i++; |
||
46 | } |
||
47 | |||
48 | return $output; |
||
49 | } |
||
50 | |||
51 | /** |
||
52 | * @param DOMDocument $dom |
||
53 | * |
||
54 | * @return array |
||
55 | */ |
||
56 | private function extractMetadata( DOMDocument $dom ): array { |
||
57 | $metadata = []; |
||
58 | |||
59 | $xliffNode = $dom->getElementsByTagName( 'xliff' )->item( 0 ); |
||
60 | $fileNode = $dom->getElementsByTagName( 'file' )->item( 0 ); |
||
61 | |||
62 | // original |
||
63 | $metadata[ 'original' ] = ( null !== $fileNode->attributes->getNamedItem( 'original' ) ) ? $fileNode->attributes->getNamedItem( 'original' )->nodeValue : 'no-name'; |
||
|
|||
64 | |||
65 | // source-language |
||
66 | $metadata[ 'source-language' ] = ( null !== $xliffNode->attributes->getNamedItem( 'srcLang' ) ) ? $xliffNode->attributes->getNamedItem( 'srcLang' )->nodeValue : 'en-US'; |
||
67 | |||
68 | // datatype |
||
69 | // @TODO to be implemented |
||
70 | |||
71 | // target-language |
||
72 | $metadata[ 'target-language' ] = ( null !== $xliffNode->attributes->getNamedItem( 'trgLang' ) ) ? $xliffNode->attributes->getNamedItem( 'trgLang' )->nodeValue : 'en-US'; |
||
73 | |||
74 | // custom MateCat x-attribute |
||
75 | // @TODO to be implemented |
||
76 | |||
77 | return $metadata; |
||
78 | } |
||
79 | |||
80 | /** |
||
81 | * @param DOMElement $file |
||
82 | * |
||
83 | * @return array |
||
84 | * @throws Exception |
||
85 | */ |
||
86 | private function extractNotes( DOMElement $file ): array { |
||
102 | } |
||
103 | |||
104 | /** |
||
105 | * Extract and populate 'trans-units' array |
||
106 | * |
||
107 | * @param DOMElement $transUnit |
||
108 | * @param array $transUnitIdArrayForUniquenessCheck |
||
109 | * @param DOMDocument $dom |
||
110 | * @param array $output |
||
111 | * @param int $i |
||
112 | * @param int $j |
||
113 | * @param array|null $contextGroups |
||
114 | * |
||
115 | * @throws Exception |
||
116 | */ |
||
117 | protected function extractTransUnit( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck, DomDocument $dom, array &$output, int &$i, int &$j, ?array $contextGroups = [] ) { |
||
118 | // metadata |
||
119 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata( $transUnit, $transUnitIdArrayForUniquenessCheck ); |
||
120 | |||
121 | // notes |
||
122 | // merge <notes> with key and key-note contained in metadata <mda:metaGroup> |
||
123 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] = $this->extractTransUnitNotes( $transUnit ); |
||
124 | |||
125 | // uuid |
||
126 | foreach ( $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] as $note ) { |
||
127 | if ( isset( $note[ 'raw-content' ] ) && Strings::isAValidUuid( $note[ 'raw-content' ] ) ) { |
||
128 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ][ 'uuid' ] = $note[ 'raw-content' ]; |
||
129 | } |
||
130 | } |
||
131 | |||
132 | // original-data (exclusive for V2) |
||
133 | // http://docs.oasis-open.org/xliff/xliff-core/v2.0/xliff-core-v2.0.html#originaldata |
||
134 | $originalData = $this->extractTransUnitOriginalData( $transUnit ); |
||
135 | if ( !empty( $originalData ) ) { |
||
136 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'original-data' ] = $originalData; |
||
137 | } |
||
138 | |||
139 | // additionalTagData (exclusive for V2) |
||
140 | $additionalTagData = $this->extractTransUnitAdditionalTagData( $transUnit ); |
||
141 | if ( !empty( $additionalTagData ) ) { |
||
142 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'additional-tag-data' ] = $additionalTagData; |
||
143 | } |
||
144 | |||
145 | // content |
||
146 | |||
147 | $source = [ |
||
148 | 'attr' => [], |
||
149 | 'raw-content' => [], |
||
150 | ]; |
||
151 | |||
152 | $target = [ |
||
153 | 'attr' => [], |
||
154 | 'raw-content' => [], |
||
155 | ]; |
||
156 | |||
157 | $segSource = []; |
||
158 | $segTarget = []; |
||
159 | |||
160 | /** @var DOMElement $segment */ |
||
161 | $c = 0; |
||
162 | foreach ( $transUnit->childNodes as $segment ) { |
||
163 | if ( $segment->nodeName === 'segment' ) { |
||
164 | |||
165 | // check segment id consistency |
||
166 | $attr = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ]; |
||
167 | $this->checkSegmentIdConsistency( $segment, $attr ); |
||
168 | |||
169 | // loop <segment> to get nested <source> and <target> tag |
||
170 | foreach ( $segment->childNodes as $childNode ) { |
||
171 | if ( $childNode->nodeName === 'source' ) { |
||
172 | $extractedSource = $this->extractContent( $dom, $childNode ); |
||
173 | $source[ 'raw-content' ][ $c ] = $extractedSource[ 'raw-content' ]; |
||
174 | |||
175 | if ( !empty( $extractedSource[ 'attr' ] ) ) { |
||
176 | $source[ 'attr' ][ $c ] = $extractedSource[ 'attr' ]; |
||
177 | } |
||
178 | |||
179 | // append value to 'seg-source' |
||
180 | if ( $this->stringContainsMarks( $extractedSource[ 'raw-content' ] ) ) { |
||
181 | $segSource = $this->extractContentWithMarksAndExtTags( $dom, $childNode ); |
||
182 | } else { |
||
183 | $segSource[] = [ |
||
184 | 'attr' => $this->extractTagAttributes( $segment ), |
||
185 | 'mid' => count( $segSource ) > 0 ? count( $segSource ) : 0, |
||
186 | 'ext-prec-tags' => '', |
||
187 | 'raw-content' => $extractedSource[ 'raw-content' ], |
||
188 | 'ext-succ-tags' => '', |
||
189 | ]; |
||
190 | } |
||
191 | } |
||
192 | |||
193 | if ( $childNode->nodeName === 'target' ) { |
||
194 | $extractedTarget = $this->extractContent( $dom, $childNode ); |
||
195 | $target[ 'raw-content' ][ $c ] = $extractedTarget[ 'raw-content' ]; |
||
196 | |||
197 | if ( !empty( $extractedTarget[ 'attr' ] ) ) { |
||
198 | $target[ 'attr' ][ $c ] = $extractedTarget[ 'attr' ]; |
||
199 | } |
||
200 | |||
201 | // append value to 'seg-target' |
||
202 | if ( $this->stringContainsMarks( $extractedTarget[ 'raw-content' ] ) ) { |
||
203 | $segTarget = $this->extractContentWithMarksAndExtTags( $dom, $childNode ); |
||
204 | } else { |
||
205 | $segTarget[] = [ |
||
206 | 'attr' => $this->extractTagAttributes( $segment ), |
||
207 | 'mid' => count( $segTarget ) > 0 ? count( $segTarget ) : 0, |
||
208 | 'ext-prec-tags' => '', |
||
209 | 'raw-content' => $extractedTarget[ 'raw-content' ], |
||
210 | 'ext-succ-tags' => '', |
||
211 | ]; |
||
212 | } |
||
213 | } |
||
214 | } |
||
215 | |||
216 | $c++; |
||
217 | } |
||
218 | } |
||
219 | |||
220 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ] = $source; |
||
221 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ] = $target; |
||
222 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] = $segSource; |
||
223 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ] = $segTarget; |
||
224 | |||
225 | $j++; |
||
226 | } |
||
227 | |||
228 | /** |
||
229 | * @param DOMElement $transUnit |
||
230 | * @param array|null $transUnitIdArrayForUniquenessCheck |
||
231 | * |
||
232 | * @return array |
||
233 | */ |
||
234 | private function extractTransUnitMetadata( DOMElement $transUnit, ?array &$transUnitIdArrayForUniquenessCheck = [] ): array { |
||
235 | $metadata = []; |
||
236 | |||
237 | // id |
||
238 | if ( null === $transUnit->attributes->getNamedItem( 'id' ) ) { |
||
239 | throw new NotFoundIdInTransUnit( 'Invalid trans-unit id found. EMPTY value', 400 ); |
||
240 | } |
||
241 | |||
242 | $id = $transUnit->attributes->getNamedItem( 'id' )->nodeValue; |
||
243 | |||
244 | if ( strlen( $id ) > 100 ) { |
||
245 | throw new SegmentIdTooLongException( 'Segment-id too long. Max 100 characters allowed', 400 ); |
||
246 | } |
||
247 | |||
248 | $transUnitIdArrayForUniquenessCheck[] = $id; |
||
249 | $metadata[ 'id' ] = $id; |
||
250 | |||
251 | // translate |
||
252 | if ( null !== $transUnit->attributes->getNamedItem( 'translate' ) ) { |
||
253 | $metadata[ 'translate' ] = $transUnit->attributes->getNamedItem( 'translate' )->nodeValue; |
||
254 | } |
||
255 | |||
256 | // tGroupBegin |
||
257 | if ( null !== $transUnit->attributes->getNamedItem( 'tGroupBegin' ) ) { |
||
258 | $metadata[ 'tGroupBegin' ] = $transUnit->attributes->getNamedItem( 'tGroupBegin' )->nodeValue; |
||
259 | } |
||
260 | |||
261 | // tGroupEnd |
||
262 | if ( null !== $transUnit->attributes->getNamedItem( 'tGroupEnd' ) ) { |
||
263 | $metadata[ 'tGroupEnd' ] = $transUnit->attributes->getNamedItem( 'tGroupEnd' )->nodeValue; |
||
264 | } |
||
265 | |||
266 | // sizeRestriction |
||
267 | if ( null !== $transUnit->attributes->getNamedItem( 'sizeRestriction' ) && '' !== $transUnit->attributes->getNamedItem( 'sizeRestriction' )->nodeValue ) { |
||
268 | $metadata[ 'sizeRestriction' ] = (int)$transUnit->attributes->getNamedItem( 'sizeRestriction' )->nodeValue; |
||
269 | } |
||
270 | |||
271 | return $metadata; |
||
272 | } |
||
273 | |||
274 | /** |
||
275 | * @param DOMElement $transUnit |
||
276 | * |
||
277 | * @return array |
||
278 | * @throws Exception |
||
279 | */ |
||
280 | private function extractTransUnitOriginalData( DOMElement $transUnit ): array { |
||
281 | $originalData = []; |
||
282 | |||
283 | // loop <originalData> to get nested content |
||
284 | foreach ( $transUnit->childNodes as $childNode ) { |
||
285 | if ( $childNode->nodeName === 'originalData' ) { |
||
286 | foreach ( $childNode->childNodes as $data ) { |
||
287 | if ( null !== $data->attributes && null !== $data->attributes->getNamedItem( 'id' ) ) { |
||
288 | $dataId = $data->attributes->getNamedItem( 'id' )->nodeValue; |
||
289 | |||
290 | $dataValue = str_replace( Placeholder::WHITE_SPACE_PLACEHOLDER, ' ', $data->nodeValue ); |
||
291 | $dataValue = str_replace( Placeholder::NEW_LINE_PLACEHOLDER, '\n', $dataValue ); |
||
292 | $dataValue = str_replace( Placeholder::TAB_PLACEHOLDER, '\t', $dataValue ); |
||
293 | |||
294 | if ( '' !== $dataValue ) { |
||
295 | |||
296 | $jsonOrRawContentArray = $this->JSONOrRawContentArray( $dataValue, false ); |
||
297 | |||
298 | // restore xliff tags |
||
299 | if ( isset( $jsonOrRawContentArray[ 'json' ] ) ) { |
||
300 | $jsonOrRawContentArray[ 'json' ] = str_replace( [ Placeholder::LT_PLACEHOLDER, Placeholder::GT_PLACEHOLDER ], [ '<', '>' ], $jsonOrRawContentArray[ 'json' ] ); |
||
301 | } |
||
302 | |||
303 | if ( isset( $jsonOrRawContentArray[ 'raw-content' ] ) ) { |
||
304 | $jsonOrRawContentArray[ 'raw-content' ] = str_replace( [ Placeholder::LT_PLACEHOLDER, Placeholder::GT_PLACEHOLDER ], [ '<', '>' ], $jsonOrRawContentArray[ 'raw-content' ] ); |
||
305 | } |
||
306 | |||
307 | $originalData[] = array_merge( |
||
308 | $jsonOrRawContentArray, |
||
309 | [ |
||
310 | 'attr' => [ |
||
311 | 'id' => $dataId |
||
312 | ] |
||
313 | ] |
||
314 | ); |
||
315 | } |
||
316 | } |
||
317 | } |
||
318 | } |
||
319 | } |
||
320 | |||
321 | return $originalData; |
||
322 | } |
||
323 | |||
324 | /** |
||
325 | * @param DOMElement $transUnit |
||
326 | * |
||
327 | * @return array |
||
328 | */ |
||
329 | private function extractTransUnitAdditionalTagData( DOMElement $transUnit ): array { |
||
330 | $additionalTagData = []; |
||
331 | |||
332 | // loop <originalData> to get nested content |
||
333 | foreach ( $transUnit->childNodes as $childNode ) { |
||
334 | if ( $childNode->nodeName === 'memsource:additionalTagData' ) { |
||
335 | foreach ( $childNode->childNodes as $data ) { |
||
336 | $dataArray = []; |
||
337 | |||
338 | // id |
||
339 | if ( $data->nodeName === 'memsource:tag' ) { |
||
340 | if ( null !== $data->attributes && null !== $data->attributes->getNamedItem( 'id' ) ) { |
||
341 | $dataId = $data->attributes->getNamedItem( 'id' )->nodeValue; |
||
342 | $dataArray[ 'attr' ][ 'id' ] = $dataId; |
||
343 | } |
||
344 | } |
||
345 | |||
346 | // in PHP 7.4 $data->childNodes is an empty DomNodeList, it is iterable with size 0 |
||
347 | // PHP 5.6 check: in php 5.6 $data->childNodes can be null |
||
348 | if ( $data->childNodes != null ) { |
||
349 | |||
350 | // content |
||
351 | foreach ( $data->childNodes as $datum ) { |
||
352 | if ( $datum->nodeName === 'memsource:tagId' ) { |
||
353 | $dataArray[ 'raw-content' ][ 'tagId' ] = $datum->nodeValue; |
||
354 | } |
||
355 | |||
356 | if ( $datum->nodeName === 'memsource:type' ) { |
||
357 | $dataArray[ 'raw-content' ][ 'type' ] = $datum->nodeValue; |
||
358 | } |
||
359 | } |
||
360 | |||
361 | } |
||
362 | |||
363 | if ( !empty( $dataArray ) ) { |
||
364 | $additionalTagData[] = $dataArray; |
||
365 | } |
||
366 | } |
||
367 | } |
||
368 | } |
||
369 | |||
370 | return $additionalTagData; |
||
371 | } |
||
372 | |||
373 | /** |
||
374 | * Check if segment id is present within tGroupBegin and tGroupEnd attributes |
||
375 | * |
||
376 | * @param DOMElement $segment |
||
377 | * @param array $attr |
||
378 | */ |
||
379 | private function checkSegmentIdConsistency( DOMElement $segment, array $attr ) { |
||
380 | if ( isset( $attr[ 'tGroupBegin' ] ) && isset( $attr[ 'tGroupEnd' ] ) && $segment->attributes->getNamedItem( 'id' ) ) { |
||
381 | $id = $segment->attributes->getNamedItem( 'id' )->nodeValue; |
||
382 | $min = (int)$attr[ 'tGroupBegin' ]; |
||
383 | $max = (int)$attr[ 'tGroupEnd' ]; |
||
384 | |||
385 | if ( false === ( ( $min <= $id ) && ( $id <= $max ) ) ) { |
||
386 | if ( $this->logger ) { |
||
387 | $this->logger->warning( 'Segment #' . $id . ' is not included within tGroupBegin and tGroupEnd' ); |
||
388 | } |
||
389 | } |
||
390 | } |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * @param DOMElement $transUnit |
||
395 | * |
||
396 | * @return array |
||
397 | * @throws Exception |
||
398 | */ |
||
399 | private function extractTransUnitNotes( DOMElement $transUnit ): array { |
||
439 | } |
||
440 | } |
||
441 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.