Total Complexity | 63 |
Total Lines | 357 |
Duplicated Lines | 0 % |
Changes | 3 | ||
Bugs | 1 | Features | 0 |
Complex classes like XliffParserV1 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use XliffParserV1, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
14 | class XliffParserV1 extends AbstractXliffParser { |
||
15 | /** |
||
16 | * @inheritDoc |
||
17 | * @throws Exception |
||
18 | */ |
||
19 | public function parse( DOMDocument $dom, ?array $output = [] ): array { |
||
20 | $i = 1; |
||
21 | /** @var DOMElement $file */ |
||
22 | foreach ( $dom->getElementsByTagName( 'file' ) as $file ) { |
||
23 | |||
24 | // metadata |
||
25 | $output[ 'files' ][ $i ][ 'attr' ] = $this->extractMetadata( $file ); |
||
26 | |||
27 | // reference |
||
28 | if ( !empty( $this->extractReference( $file ) ) ) { |
||
29 | $output[ 'files' ][ $i ][ 'reference' ] = $this->extractReference( $file ); |
||
30 | } |
||
31 | |||
32 | // trans-units |
||
33 | $transUnitIdArrayForUniquenessCheck = []; |
||
34 | $j = 1; |
||
35 | foreach ( $file->childNodes as $body ) { |
||
36 | |||
37 | // external-file |
||
38 | if ( $body->nodeName === 'header' ) { |
||
39 | foreach ( $body->childNodes as $header ) { |
||
40 | $this->extractExternalFile( $header, $i, $output ); |
||
41 | } |
||
42 | } |
||
43 | |||
44 | if ( $body->nodeName === 'body' ) { |
||
45 | foreach ( $body->childNodes as $childNode ) { |
||
46 | $this->extractTuFromNode( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j ); |
||
47 | } |
||
48 | |||
49 | // trans-unit re-count check |
||
50 | $totalTransUnitsId = count( $transUnitIdArrayForUniquenessCheck ); |
||
51 | $transUnitsUniqueId = count( array_unique( $transUnitIdArrayForUniquenessCheck ) ); |
||
52 | if ( $totalTransUnitsId != $transUnitsUniqueId ) { |
||
53 | throw new DuplicateTransUnitIdInXliff( "Invalid trans-unit id, duplicate found.", 400 ); |
||
54 | } |
||
55 | |||
56 | $i++; |
||
57 | } |
||
58 | } |
||
59 | } |
||
60 | |||
61 | return $output; |
||
62 | } |
||
63 | |||
64 | /** |
||
65 | * @param DOMNode $header |
||
66 | * @param $i |
||
67 | * @param $output |
||
68 | */ |
||
69 | private function extractExternalFile( DOMNode $header, $i, &$output ) { |
||
70 | |||
71 | if ( $header->nodeName === "skl" ) { |
||
72 | foreach ( $header->childNodes as $referenceNode ) { |
||
73 | if ( $referenceNode->nodeName === "reference" ) { |
||
74 | foreach ( $referenceNode->childNodes as $childNode ) { |
||
75 | if ( $childNode->nodeName === "external-file" ) { |
||
76 | $href = $childNode->getAttribute( "href" ); |
||
77 | $output[ 'files' ][ $i ][ 'attr' ][ 'external-file' ] = $href; |
||
78 | } |
||
79 | } |
||
80 | } elseif ( $referenceNode->nodeName === "external-file" ) { |
||
81 | $href = $referenceNode->getAttribute( "href" ); |
||
82 | $output[ 'files' ][ $i ][ 'attr' ][ 'external-file' ] = $href; |
||
83 | } |
||
84 | } |
||
85 | } elseif ( $header->nodeName === "reference" ) { |
||
86 | foreach ( $header->childNodes as $referenceNode ) { |
||
87 | if ( $referenceNode->nodeName === "external-file" ) { |
||
88 | $href = $referenceNode->getAttribute( "href" ); |
||
89 | $output[ 'files' ][ $i ][ 'attr' ][ 'external-file' ] = $href; |
||
90 | } |
||
91 | } |
||
92 | } |
||
93 | } |
||
94 | |||
95 | /** |
||
96 | * @param DOMElement $file |
||
97 | * |
||
98 | * @return array |
||
99 | */ |
||
100 | private function extractMetadata( DOMElement $file ): array { |
||
101 | $metadata = []; |
||
102 | $customAttr = []; |
||
103 | |||
104 | /** @var DOMAttr $attribute */ |
||
105 | foreach ( $file->attributes as $attribute ) { |
||
106 | switch ( $attribute->localName ) { |
||
107 | // original |
||
108 | case 'original': |
||
109 | $metadata[ 'original' ] = $attribute->value; |
||
110 | break; |
||
111 | |||
112 | // source-language |
||
113 | case 'source-language': |
||
114 | $metadata[ 'source-language' ] = $attribute->value; |
||
115 | break; |
||
116 | |||
117 | // data-type |
||
118 | case 'datatype': |
||
119 | $metadata[ 'data-type' ] = $attribute->value; |
||
120 | break; |
||
121 | |||
122 | // target-language |
||
123 | case 'target-language': |
||
124 | $metadata[ 'target-language' ] = $attribute->value; |
||
125 | break; |
||
126 | } |
||
127 | |||
128 | // Custom MateCat x-Attribute |
||
129 | preg_match( '|x-(.*?)|si', $attribute->localName, $temp ); |
||
130 | if ( isset( $temp[ 1 ] ) ) { |
||
131 | $customAttr[ $attribute->localName ] = $attribute->value; |
||
132 | } |
||
133 | unset( $temp ); |
||
134 | |||
135 | // Custom MateCat namespace Attribute mtc: |
||
136 | preg_match( '|mtc:(.*?)|si', $attribute->nodeName, $temp ); |
||
137 | if ( isset( $temp[ 1 ] ) ) { |
||
138 | $customAttr[ $attribute->nodeName ] = $attribute->value; |
||
139 | } |
||
140 | unset( $temp ); |
||
141 | |||
142 | if ( !empty( $customAttr ) ) { |
||
143 | $metadata[ 'custom' ] = $customAttr; |
||
144 | } |
||
145 | } |
||
146 | |||
147 | return $metadata; |
||
148 | } |
||
149 | |||
150 | /** |
||
151 | * @param DOMElement $file |
||
152 | * |
||
153 | * @return array |
||
154 | */ |
||
155 | private function extractReference( DOMElement $file ): array { |
||
156 | $reference = []; |
||
157 | |||
158 | $order = 0; |
||
159 | foreach ( $file->getElementsByTagName( 'reference' ) as $ref ) { |
||
160 | /** @var DOMNode $childNode */ |
||
161 | foreach ( $ref->childNodes as $childNode ) { |
||
162 | if ( $childNode->nodeName === 'internal-file' ) { |
||
163 | $reference[ $order ][ 'form-type' ] = $childNode->attributes->getNamedItem( 'form' )->nodeValue; |
||
164 | $reference[ $order ][ 'base64' ] = trim( $childNode->nodeValue ); |
||
165 | $order++; |
||
166 | } |
||
167 | } |
||
168 | } |
||
169 | |||
170 | return $reference; |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * Extract and populate 'trans-units' array |
||
175 | * |
||
176 | * @param DOMElement $transUnit |
||
177 | * @param array $transUnitIdArrayForUniquenessCheck |
||
178 | * @param DOMDocument $dom |
||
179 | * @param array $output |
||
180 | * @param int $i |
||
181 | * @param int $j |
||
182 | * @param array|null $contextGroups |
||
183 | * |
||
184 | * @throws Exception |
||
185 | */ |
||
186 | protected function extractTransUnit( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck, DomDocument $dom, array &$output, int &$i, int &$j, ?array $contextGroups = [] ) { |
||
187 | // metadata |
||
188 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata( $transUnit, $transUnitIdArrayForUniquenessCheck ); |
||
189 | |||
190 | // notes |
||
191 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] = $this->extractTransUnitNotes( $dom, $transUnit ); |
||
192 | |||
193 | // content |
||
194 | /** @var DOMElement $childNode */ |
||
195 | foreach ( $transUnit->childNodes as $childNode ) { |
||
196 | // source |
||
197 | if ( $childNode->nodeName === 'source' ) { |
||
198 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ] = $this->extractContent( $dom, $childNode ); |
||
199 | } |
||
200 | |||
201 | // seg-source |
||
202 | if ( $childNode->nodeName === 'seg-source' ) { |
||
203 | $rawSegment = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ][ 'raw-content' ]; |
||
204 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode, $rawSegment ); |
||
|
|||
205 | } |
||
206 | |||
207 | // target |
||
208 | if ( $childNode->nodeName === 'target' ) { |
||
209 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ] = $this->extractContent( $dom, $childNode ); |
||
210 | |||
211 | // seg-target |
||
212 | $targetRawContent = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ][ 'raw-content' ] ?? null; |
||
213 | $segSource = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] ?? null; |
||
214 | |||
215 | if ( !empty( $targetRawContent ) and isset( $segSource ) and count( $segSource ) > 0 ) { |
||
216 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode ); |
||
217 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ][ 0 ][ 'attr' ] = $this->extractTagAttributes( $childNode ); |
||
218 | } |
||
219 | } |
||
220 | |||
221 | // locked |
||
222 | if ( $childNode->nodeName === 'sdl:seg' ) { |
||
223 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'locked' ] = $this->extractLocked( $childNode ); |
||
224 | } |
||
225 | } |
||
226 | |||
227 | // context-group |
||
228 | if ( !empty( $contextGroups ) ) { |
||
229 | foreach ( $contextGroups as $contextGroup ) { |
||
230 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup ); |
||
231 | } |
||
232 | } |
||
233 | |||
234 | foreach ( $transUnit->getElementsByTagName( 'context-group' ) as $contextGroup ) { |
||
235 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup ); |
||
236 | } |
||
237 | |||
238 | // alt-trans |
||
239 | foreach ( $transUnit->getElementsByTagName( 'alt-trans' ) as $altTrans ) { |
||
240 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'alt-trans' ][] = $this->extractTransUnitAltTrans( $altTrans ); |
||
241 | } |
||
242 | |||
243 | $j++; |
||
244 | } |
||
245 | |||
246 | /** |
||
247 | * @param DOMElement $transUnit |
||
248 | * @param array $transUnitIdArrayForUniquenessCheck |
||
249 | * |
||
250 | * @return array |
||
251 | * @throws Exception |
||
252 | */ |
||
253 | private function extractTransUnitMetadata( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck ): array { |
||
293 | } |
||
294 | |||
295 | /** |
||
296 | * @param DOMDocument $dom |
||
297 | * @param DOMElement $transUnit |
||
298 | * |
||
299 | * @return array |
||
300 | * @throws Exception |
||
301 | */ |
||
302 | private function extractTransUnitNotes( DOMDocument $dom, DOMElement $transUnit ): array { |
||
303 | $notes = []; |
||
304 | foreach ( $transUnit->getElementsByTagName( 'note' ) as $note ) { |
||
305 | |||
306 | $noteValue = $this->extractTagContent( $dom, $note ); |
||
307 | |||
308 | if ( '' !== $noteValue ) { |
||
309 | |||
310 | $extractedNote = $this->JSONOrRawContentArray( $noteValue ); |
||
311 | |||
312 | // extract all the attributes |
||
313 | foreach ( $note->attributes as $attribute ) { |
||
314 | $extractedNote[ $attribute->name ] = $attribute->value; |
||
315 | } |
||
316 | |||
317 | $notes[] = $extractedNote; |
||
318 | } |
||
319 | } |
||
320 | |||
321 | return $notes; |
||
322 | } |
||
323 | |||
324 | /** |
||
325 | * @param DOMDocument $dom |
||
326 | * @param DOMElement $contextGroup |
||
327 | * |
||
328 | * @return array |
||
329 | */ |
||
330 | private function extractTransUnitContextGroup( DOMDocument $dom, DOMElement $contextGroup ): array { |
||
331 | $cg = []; |
||
332 | $cg[ 'attr' ] = $this->extractTagAttributes( $contextGroup ); |
||
333 | |||
334 | /** @var DOMNode $context */ |
||
335 | foreach ( $contextGroup->childNodes as $context ) { |
||
336 | if ( $context->nodeName === 'context' ) { |
||
337 | $cg[ 'contexts' ][] = $this->extractContent( $dom, $context ); |
||
338 | } |
||
339 | } |
||
340 | |||
341 | return $cg; |
||
342 | } |
||
343 | |||
344 | /** |
||
345 | * @param DOMElement $altTrans |
||
346 | * |
||
347 | * @return array |
||
348 | */ |
||
349 | private function extractTransUnitAltTrans( DOMElement $altTrans ) { |
||
362 | } |
||
363 | |||
364 | /** |
||
365 | * @param DOMElement $locked |
||
366 | * |
||
367 | * @return bool |
||
368 | */ |
||
369 | private function extractLocked( DOMElement $locked ) { |
||
371 | } |
||
372 | } |
||
373 |
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.
If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.