1 | <?php |
||||
2 | |||||
3 | namespace Matecat\XliffParser\XliffParser; |
||||
4 | |||||
5 | use DOMAttr; |
||||
6 | use DOMDocument; |
||||
7 | use DOMElement; |
||||
8 | use DOMNode; |
||||
9 | use Exception; |
||||
10 | use Matecat\XliffParser\Exception\DuplicateTransUnitIdInXliff; |
||||
11 | use Matecat\XliffParser\Exception\NotFoundIdInTransUnit; |
||||
12 | use Matecat\XliffParser\Exception\SegmentIdTooLongException; |
||||
13 | |||||
14 | class XliffParserV1 extends AbstractXliffParser { |
||||
15 | /** |
||||
16 | * @inheritDoc |
||||
17 | * @throws Exception |
||||
18 | */ |
||||
19 | public function parse( DOMDocument $dom, ?array $output = [] ): array { |
||||
20 | $i = 1; |
||||
21 | /** @var DOMElement $file */ |
||||
22 | foreach ( $dom->getElementsByTagName( 'file' ) as $file ) { |
||||
23 | |||||
24 | // metadata |
||||
25 | $output[ 'files' ][ $i ][ 'attr' ] = $this->extractMetadata( $file ); |
||||
26 | |||||
27 | // reference |
||||
28 | if ( !empty( $this->extractReference( $file ) ) ) { |
||||
29 | $output[ 'files' ][ $i ][ 'reference' ] = $this->extractReference( $file ); |
||||
30 | } |
||||
31 | |||||
32 | // trans-units |
||||
33 | $transUnitIdArrayForUniquenessCheck = []; |
||||
34 | $j = 1; |
||||
35 | foreach ( $file->childNodes as $body ) { |
||||
36 | if ( $body->nodeName === 'body' ) { |
||||
37 | foreach ( $body->childNodes as $childNode ) { |
||||
38 | $this->extractTuFromNode( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j ); |
||||
39 | } |
||||
40 | |||||
41 | // trans-unit re-count check |
||||
42 | $totalTransUnitsId = count( $transUnitIdArrayForUniquenessCheck ); |
||||
43 | $transUnitsUniqueId = count( array_unique( $transUnitIdArrayForUniquenessCheck ) ); |
||||
44 | if ( $totalTransUnitsId != $transUnitsUniqueId ) { |
||||
45 | throw new DuplicateTransUnitIdInXliff( "Invalid trans-unit id, duplicate found.", 400 ); |
||||
46 | } |
||||
47 | |||||
48 | $i++; |
||||
49 | } |
||||
50 | } |
||||
51 | } |
||||
52 | |||||
53 | return $output; |
||||
54 | } |
||||
55 | |||||
56 | /** |
||||
57 | * @param DOMElement $file |
||||
58 | * |
||||
59 | * @return array |
||||
60 | */ |
||||
61 | private function extractMetadata( DOMElement $file ): array { |
||||
62 | $metadata = []; |
||||
63 | $customAttr = []; |
||||
64 | |||||
65 | /** @var DOMAttr $attribute */ |
||||
66 | foreach ( $file->attributes as $attribute ) { |
||||
67 | switch ( $attribute->localName ) { |
||||
68 | // original |
||||
69 | case 'original': |
||||
70 | $metadata[ 'original' ] = $attribute->value; |
||||
71 | break; |
||||
72 | |||||
73 | // source-language |
||||
74 | case 'source-language': |
||||
75 | $metadata[ 'source-language' ] = $attribute->value; |
||||
76 | break; |
||||
77 | |||||
78 | // data-type |
||||
79 | case 'datatype': |
||||
80 | $metadata[ 'data-type' ] = $attribute->value; |
||||
81 | break; |
||||
82 | |||||
83 | // target-language |
||||
84 | case 'target-language': |
||||
85 | $metadata[ 'target-language' ] = $attribute->value; |
||||
86 | break; |
||||
87 | } |
||||
88 | |||||
89 | // Custom MateCat x-Attribute |
||||
90 | preg_match( '|x-(.*?)|si', $attribute->localName, $temp ); |
||||
91 | if ( isset( $temp[ 1 ] ) ) { |
||||
92 | $customAttr[ $attribute->localName ] = $attribute->value; |
||||
93 | } |
||||
94 | unset( $temp ); |
||||
95 | |||||
96 | // Custom MateCat namespace Attribute mtc: |
||||
97 | preg_match( '|mtc:(.*?)|si', $attribute->nodeName, $temp ); |
||||
98 | if ( isset( $temp[ 1 ] ) ) { |
||||
99 | $customAttr[ $attribute->nodeName ] = $attribute->value; |
||||
100 | } |
||||
101 | unset( $temp ); |
||||
102 | |||||
103 | if ( !empty( $customAttr ) ) { |
||||
104 | $metadata[ 'custom' ] = $customAttr; |
||||
105 | } |
||||
106 | } |
||||
107 | |||||
108 | return $metadata; |
||||
109 | } |
||||
110 | |||||
111 | /** |
||||
112 | * @param DOMElement $file |
||||
113 | * |
||||
114 | * @return array |
||||
115 | */ |
||||
116 | private function extractReference( DOMElement $file ): array { |
||||
117 | $reference = []; |
||||
118 | |||||
119 | $order = 0; |
||||
120 | foreach ( $file->getElementsByTagName( 'reference' ) as $ref ) { |
||||
121 | /** @var DOMNode $childNode */ |
||||
122 | foreach ( $ref->childNodes as $childNode ) { |
||||
123 | if ( $childNode->nodeName === 'internal-file' ) { |
||||
124 | $reference[ $order ][ 'form-type' ] = $childNode->attributes->getNamedItem( 'form' )->nodeValue; |
||||
125 | $reference[ $order ][ 'base64' ] = trim( $childNode->nodeValue ); |
||||
126 | $order++; |
||||
127 | } |
||||
128 | } |
||||
129 | } |
||||
130 | |||||
131 | return $reference; |
||||
132 | } |
||||
133 | |||||
134 | /** |
||||
135 | * Extract and populate 'trans-units' array |
||||
136 | * |
||||
137 | * @param DOMElement $transUnit |
||||
138 | * @param array $transUnitIdArrayForUniquenessCheck |
||||
139 | * @param DOMDocument $dom |
||||
140 | * @param array $output |
||||
141 | * @param int $i |
||||
142 | * @param int $j |
||||
143 | * @param array|null $contextGroups |
||||
144 | * |
||||
145 | * @throws Exception |
||||
146 | */ |
||||
147 | protected function extractTransUnit( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck, DomDocument $dom, array &$output, int &$i, int &$j, ?array $contextGroups = [] ) { |
||||
148 | // metadata |
||||
149 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata( $transUnit, $transUnitIdArrayForUniquenessCheck ); |
||||
150 | |||||
151 | // notes |
||||
152 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] = $this->extractTransUnitNotes( $dom, $transUnit ); |
||||
153 | |||||
154 | // content |
||||
155 | /** @var DOMElement $childNode */ |
||||
156 | foreach ( $transUnit->childNodes as $childNode ) { |
||||
157 | // source |
||||
158 | if ( $childNode->nodeName === 'source' ) { |
||||
159 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ] = $this->extractContent( $dom, $childNode ); |
||||
160 | } |
||||
161 | |||||
162 | // seg-source |
||||
163 | if ( $childNode->nodeName === 'seg-source' ) { |
||||
164 | $rawSegment = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ][ 'raw-content' ]; |
||||
165 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode, $rawSegment ); |
||||
0 ignored issues
–
show
|
|||||
166 | } |
||||
167 | |||||
168 | // target |
||||
169 | if ( $childNode->nodeName === 'target' ) { |
||||
170 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ] = $this->extractContent( $dom, $childNode ); |
||||
171 | |||||
172 | // seg-target |
||||
173 | $targetRawContent = @$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ][ 'raw-content' ]; |
||||
174 | $segSource = @$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ]; |
||||
175 | if ( !empty( $targetRawContent ) && isset( $segSource ) && count( $segSource ) > 0 ) { |
||||
176 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode ); |
||||
177 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ][ 0 ][ 'attr' ] = $this->extractTagAttributes( $childNode ); |
||||
178 | } |
||||
179 | } |
||||
180 | |||||
181 | // locked |
||||
182 | if ( $childNode->nodeName === 'sdl:seg' ) { |
||||
183 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'locked' ] = $this->extractLocked( $childNode ); |
||||
184 | } |
||||
185 | } |
||||
186 | |||||
187 | // context-group |
||||
188 | if ( !empty( $contextGroups ) ) { |
||||
189 | foreach ( $contextGroups as $contextGroup ) { |
||||
190 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup ); |
||||
191 | } |
||||
192 | } |
||||
193 | |||||
194 | foreach ( $transUnit->getElementsByTagName( 'context-group' ) as $contextGroup ) { |
||||
195 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup ); |
||||
196 | } |
||||
197 | |||||
198 | // alt-trans |
||||
199 | foreach ( $transUnit->getElementsByTagName( 'alt-trans' ) as $altTrans ) { |
||||
200 | $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'alt-trans' ][] = $this->extractTransUnitAltTrans( $altTrans ); |
||||
201 | } |
||||
202 | |||||
203 | $j++; |
||||
204 | } |
||||
205 | |||||
206 | /** |
||||
207 | * @param DOMElement $transUnit |
||||
208 | * @param array $transUnitIdArrayForUniquenessCheck |
||||
209 | * |
||||
210 | * @return array |
||||
211 | * @throws Exception |
||||
212 | */ |
||||
213 | private function extractTransUnitMetadata( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck ): array { |
||||
214 | $metadata = []; |
||||
215 | |||||
216 | // id MUST NOT be null |
||||
217 | if ( null === $transUnit->attributes->getNamedItem( 'id' ) ) { |
||||
0 ignored issues
–
show
The method
getNamedItem() does not exist on null .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed. ![]() |
|||||
218 | throw new NotFoundIdInTransUnit( 'Invalid trans-unit id found. EMPTY value', 400 ); |
||||
219 | } |
||||
220 | |||||
221 | /** |
||||
222 | * @var DOMAttr $element |
||||
223 | */ |
||||
224 | foreach ( $transUnit->attributes as $element ) { |
||||
225 | |||||
226 | if ( $element->nodeName === "id" ) { |
||||
227 | |||||
228 | $id = $element->nodeValue; |
||||
229 | |||||
230 | if ( strlen( $id ) > 100 ) { |
||||
231 | throw new SegmentIdTooLongException( 'Segment-id too long. Max 100 characters allowed', 400 ); |
||||
232 | } |
||||
233 | |||||
234 | $transUnitIdArrayForUniquenessCheck[] = $id; |
||||
235 | $metadata[ 'id' ] = $id; |
||||
236 | |||||
237 | } elseif ( $element->nodeName === "approved" ) { |
||||
238 | // approved as BOOLEAN |
||||
239 | // http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html#approved |
||||
240 | $metadata[ $element->nodeName ] = filter_var( $element->nodeValue, FILTER_VALIDATE_BOOLEAN ); |
||||
241 | } elseif ( $element->nodeName === "maxwidth" ) { |
||||
242 | // we ignore ( but we get ) the attribute size-unit="char" assuming that a restriction is everytime done by character |
||||
243 | // we duplicate the info to allow Xliff V1 and V2 to work the same |
||||
244 | $metadata[ 'sizeRestriction' ] = filter_var( $element->nodeValue, FILTER_SANITIZE_NUMBER_INT ); |
||||
245 | $metadata[ $element->nodeName ] = filter_var( $element->nodeValue, FILTER_SANITIZE_NUMBER_INT ); |
||||
246 | } else { |
||||
247 | $metadata[ $element->nodeName ] = $element->nodeValue; |
||||
248 | } |
||||
249 | |||||
250 | } |
||||
251 | |||||
252 | return $metadata; |
||||
253 | } |
||||
254 | |||||
255 | /** |
||||
256 | * @param DOMDocument $dom |
||||
257 | * @param DOMElement $transUnit |
||||
258 | * |
||||
259 | * @return array |
||||
260 | * @throws Exception |
||||
261 | */ |
||||
262 | private function extractTransUnitNotes( DOMDocument $dom, DOMElement $transUnit ): array { |
||||
263 | $notes = []; |
||||
264 | foreach ( $transUnit->getElementsByTagName( 'note' ) as $note ) { |
||||
265 | |||||
266 | $noteValue = $this->extractTagContent( $dom, $note ); |
||||
267 | |||||
268 | if ( '' !== $noteValue ) { |
||||
269 | |||||
270 | $extractedNote = $this->JSONOrRawContentArray( $noteValue ); |
||||
271 | |||||
272 | // extract all the attributes |
||||
273 | foreach ( $note->attributes as $attribute ) { |
||||
274 | $extractedNote[ $attribute->name ] = $attribute->value; |
||||
275 | } |
||||
276 | |||||
277 | $notes[] = $extractedNote; |
||||
278 | } |
||||
279 | } |
||||
280 | |||||
281 | return $notes; |
||||
282 | } |
||||
283 | |||||
284 | /** |
||||
285 | * @param DOMDocument $dom |
||||
286 | * @param DOMElement $contextGroup |
||||
287 | * |
||||
288 | * @return array |
||||
289 | */ |
||||
290 | private function extractTransUnitContextGroup( DOMDocument $dom, DOMElement $contextGroup ): array { |
||||
291 | $cg = []; |
||||
292 | $cg[ 'attr' ] = $this->extractTagAttributes( $contextGroup ); |
||||
293 | |||||
294 | /** @var DOMNode $context */ |
||||
295 | foreach ( $contextGroup->childNodes as $context ) { |
||||
296 | if ( $context->nodeName === 'context' ) { |
||||
297 | $cg[ 'contexts' ][] = $this->extractContent( $dom, $context ); |
||||
298 | } |
||||
299 | } |
||||
300 | |||||
301 | return $cg; |
||||
302 | } |
||||
303 | |||||
304 | /** |
||||
305 | * @param DOMElement $altTrans |
||||
306 | * |
||||
307 | * @return array |
||||
308 | */ |
||||
309 | private function extractTransUnitAltTrans( DOMElement $altTrans ) { |
||||
310 | $at = []; |
||||
311 | $at[ 'attr' ] = $this->extractTagAttributes( $altTrans ); |
||||
312 | |||||
313 | if ( $altTrans->getElementsByTagName( 'source' )->length > 0 ) { |
||||
314 | $at[ 'source' ] = $altTrans->getElementsByTagName( 'source' )->item( 0 )->nodeValue; |
||||
315 | } |
||||
316 | |||||
317 | if ( $altTrans->getElementsByTagName( 'target' ) ) { |
||||
318 | $at[ 'target' ] = $altTrans->getElementsByTagName( 'target' )->item( 0 )->nodeValue; |
||||
319 | } |
||||
320 | |||||
321 | return $at; |
||||
322 | } |
||||
323 | |||||
324 | /** |
||||
325 | * @param DOMElement $locked |
||||
326 | * |
||||
327 | * @return bool |
||||
328 | */ |
||||
329 | private function extractLocked( DOMElement $locked ) { |
||||
330 | return null !== $locked->getAttribute( 'locked' ); |
||||
331 | } |
||||
332 | } |
||||
333 |
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.
If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.