XliffParserV1::extractTransUnitMetadata()   B
last analyzed

Complexity

Conditions 7
Paths 7

Size

Total Lines 40
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 18
nc 7
nop 2
dl 0
loc 40
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
namespace Matecat\XliffParser\XliffParser;
4
5
use DOMAttr;
6
use DOMDocument;
7
use DOMElement;
8
use DOMNode;
9
use Exception;
10
use Matecat\XliffParser\Exception\DuplicateTransUnitIdInXliff;
11
use Matecat\XliffParser\Exception\NotFoundIdInTransUnit;
12
use Matecat\XliffParser\Exception\SegmentIdTooLongException;
13
14
class XliffParserV1 extends AbstractXliffParser {
15
    /**
16
     * @inheritDoc
17
     * @throws Exception
18
     */
19
    public function parse( DOMDocument $dom, ?array $output = [] ): array {
20
        $i = 1;
21
        /** @var DOMElement $file */
22
        foreach ( $dom->getElementsByTagName( 'file' ) as $file ) {
23
24
            // metadata
25
            $output[ 'files' ][ $i ][ 'attr' ] = $this->extractMetadata( $file );
26
27
            // reference
28
            if ( !empty( $this->extractReference( $file ) ) ) {
29
                $output[ 'files' ][ $i ][ 'reference' ] = $this->extractReference( $file );
30
            }
31
32
            // trans-units
33
            $transUnitIdArrayForUniquenessCheck = [];
34
            $j                                  = 1;
35
            foreach ( $file->childNodes as $body ) {
36
                if ( $body->nodeName === 'body' ) {
37
                    foreach ( $body->childNodes as $childNode ) {
38
                        $this->extractTuFromNode( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j );
39
                    }
40
41
                    // trans-unit re-count check
42
                    $totalTransUnitsId  = count( $transUnitIdArrayForUniquenessCheck );
43
                    $transUnitsUniqueId = count( array_unique( $transUnitIdArrayForUniquenessCheck ) );
44
                    if ( $totalTransUnitsId != $transUnitsUniqueId ) {
45
                        throw new DuplicateTransUnitIdInXliff( "Invalid trans-unit id, duplicate found.", 400 );
46
                    }
47
48
                    $i++;
49
                }
50
            }
51
        }
52
53
        return $output;
54
    }
55
56
    /**
57
     * @param DOMElement $file
58
     *
59
     * @return array
60
     */
61
    private function extractMetadata( DOMElement $file ): array {
62
        $metadata   = [];
63
        $customAttr = [];
64
65
        /** @var DOMAttr $attribute */
66
        foreach ( $file->attributes as $attribute ) {
67
            switch ( $attribute->localName ) {
68
                // original
69
                case 'original':
70
                    $metadata[ 'original' ] = $attribute->value;
71
                    break;
72
73
                // source-language
74
                case 'source-language':
75
                    $metadata[ 'source-language' ] = $attribute->value;
76
                    break;
77
78
                // data-type
79
                case 'datatype':
80
                    $metadata[ 'data-type' ] = $attribute->value;
81
                    break;
82
83
                // target-language
84
                case 'target-language':
85
                    $metadata[ 'target-language' ] = $attribute->value;
86
                    break;
87
            }
88
89
            // Custom MateCat x-Attribute
90
            preg_match( '|x-(.*?)|si', $attribute->localName, $temp );
91
            if ( isset( $temp[ 1 ] ) ) {
92
                $customAttr[ $attribute->localName ] = $attribute->value;
93
            }
94
            unset( $temp );
95
96
            // Custom MateCat namespace Attribute mtc:
97
            preg_match( '|mtc:(.*?)|si', $attribute->nodeName, $temp );
98
            if ( isset( $temp[ 1 ] ) ) {
99
                $customAttr[ $attribute->nodeName ] = $attribute->value;
100
            }
101
            unset( $temp );
102
103
            if ( !empty( $customAttr ) ) {
104
                $metadata[ 'custom' ] = $customAttr;
105
            }
106
        }
107
108
        return $metadata;
109
    }
110
111
    /**
112
     * @param DOMElement $file
113
     *
114
     * @return array
115
     */
116
    private function extractReference( DOMElement $file ): array {
117
        $reference = [];
118
119
        $order = 0;
120
        foreach ( $file->getElementsByTagName( 'reference' ) as $ref ) {
121
            /** @var DOMNode $childNode */
122
            foreach ( $ref->childNodes as $childNode ) {
123
                if ( $childNode->nodeName === 'internal-file' ) {
124
                    $reference[ $order ][ 'form-type' ] = $childNode->attributes->getNamedItem( 'form' )->nodeValue;
125
                    $reference[ $order ][ 'base64' ]    = trim( $childNode->nodeValue );
126
                    $order++;
127
                }
128
            }
129
        }
130
131
        return $reference;
132
    }
133
134
    /**
135
     * Extract and populate 'trans-units' array
136
     *
137
     * @param DOMElement  $transUnit
138
     * @param array       $transUnitIdArrayForUniquenessCheck
139
     * @param DOMDocument $dom
140
     * @param array       $output
141
     * @param int         $i
142
     * @param int         $j
143
     * @param array|null  $contextGroups
144
     *
145
     * @throws Exception
146
     */
147
    protected function extractTransUnit( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck, DomDocument $dom, array &$output, int &$i, int &$j, ?array $contextGroups = [] ) {
148
        // metadata
149
        $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata( $transUnit, $transUnitIdArrayForUniquenessCheck );
150
151
        // notes
152
        $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] = $this->extractTransUnitNotes( $dom, $transUnit );
153
154
        // content
155
        /** @var DOMElement $childNode */
156
        foreach ( $transUnit->childNodes as $childNode ) {
157
            // source
158
            if ( $childNode->nodeName === 'source' ) {
159
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ] = $this->extractContent( $dom, $childNode );
160
            }
161
162
            // seg-source
163
            if ( $childNode->nodeName === 'seg-source' ) {
164
                $rawSegment                                                     = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ][ 'raw-content' ];
165
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode, $rawSegment );
0 ignored issues
show
Unused Code introduced by
The call to Matecat\XliffParser\Xlif...ntWithMarksAndExtTags() has too many arguments starting with $rawSegment. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

165
                /** @scrutinizer ignore-call */ 
166
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode, $rawSegment );

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
166
            }
167
168
            // target
169
            if ( $childNode->nodeName === 'target' ) {
170
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ] = $this->extractContent( $dom, $childNode );
171
172
                // seg-target
173
                $targetRawContent = @$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ][ 'raw-content' ];
174
                $segSource        = @$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ];
175
                if ( !empty( $targetRawContent ) && isset( $segSource ) && count( $segSource ) > 0 ) {
176
                    $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ]                = $this->extractContentWithMarksAndExtTags( $dom, $childNode );
177
                    $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ][ 0 ][ 'attr' ] = $this->extractTagAttributes( $childNode );
178
                }
179
            }
180
181
            // locked
182
            if ( $childNode->nodeName === 'sdl:seg' ) {
183
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'locked' ] = $this->extractLocked( $childNode );
184
            }
185
        }
186
187
        // context-group
188
        if ( !empty( $contextGroups ) ) {
189
            foreach ( $contextGroups as $contextGroup ) {
190
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup );
191
            }
192
        }
193
194
        foreach ( $transUnit->getElementsByTagName( 'context-group' ) as $contextGroup ) {
195
            $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup );
196
        }
197
198
        // alt-trans
199
        foreach ( $transUnit->getElementsByTagName( 'alt-trans' ) as $altTrans ) {
200
            $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'alt-trans' ][] = $this->extractTransUnitAltTrans( $altTrans );
201
        }
202
203
        $j++;
204
    }
205
206
    /**
207
     * @param DOMElement $transUnit
208
     * @param array      $transUnitIdArrayForUniquenessCheck
209
     *
210
     * @return array
211
     * @throws Exception
212
     */
213
    private function extractTransUnitMetadata( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck ): array {
214
        $metadata = [];
215
216
        // id MUST NOT be null
217
        if ( null === $transUnit->attributes->getNamedItem( 'id' ) ) {
0 ignored issues
show
Bug introduced by
The method getNamedItem() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

217
        if ( null === $transUnit->attributes->/** @scrutinizer ignore-call */ getNamedItem( 'id' ) ) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
218
            throw new NotFoundIdInTransUnit( 'Invalid trans-unit id found. EMPTY value', 400 );
219
        }
220
221
        /**
222
         * @var DOMAttr $element
223
         */
224
        foreach ( $transUnit->attributes as $element ) {
225
226
            if ( $element->nodeName === "id" ) {
227
228
                $id = $element->nodeValue;
229
230
                if ( strlen( $id ) > 100 ) {
231
                    throw new SegmentIdTooLongException( 'Segment-id too long. Max 100 characters allowed', 400 );
232
                }
233
234
                $transUnitIdArrayForUniquenessCheck[] = $id;
235
                $metadata[ 'id' ]                     = $id;
236
237
            } elseif ( $element->nodeName === "approved" ) {
238
                // approved as BOOLEAN
239
                // http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html#approved
240
                $metadata[ $element->nodeName ] = filter_var( $element->nodeValue, FILTER_VALIDATE_BOOLEAN );
241
            } elseif ( $element->nodeName === "maxwidth" ) {
242
                // we ignore ( but we get ) the attribute size-unit="char" assuming that a restriction is everytime done by character
243
                // we duplicate the info to allow Xliff V1 and V2 to work the same
244
                $metadata[ 'sizeRestriction' ]  = filter_var( $element->nodeValue, FILTER_SANITIZE_NUMBER_INT );
245
                $metadata[ $element->nodeName ] = filter_var( $element->nodeValue, FILTER_SANITIZE_NUMBER_INT );
246
            } else {
247
                $metadata[ $element->nodeName ] = $element->nodeValue;
248
            }
249
250
        }
251
252
        return $metadata;
253
    }
254
255
    /**
256
     * @param DOMDocument $dom
257
     * @param DOMElement  $transUnit
258
     *
259
     * @return array
260
     * @throws Exception
261
     */
262
    private function extractTransUnitNotes( DOMDocument $dom, DOMElement $transUnit ): array {
263
        $notes = [];
264
        foreach ( $transUnit->getElementsByTagName( 'note' ) as $note ) {
265
266
            $noteValue = $this->extractTagContent( $dom, $note );
267
268
            if ( '' !== $noteValue ) {
269
270
                $extractedNote = $this->JSONOrRawContentArray( $noteValue );
271
272
                // extract all the attributes
273
                foreach ( $note->attributes as $attribute ) {
274
                    $extractedNote[ $attribute->name ] = $attribute->value;
275
                }
276
277
                $notes[] = $extractedNote;
278
            }
279
        }
280
281
        return $notes;
282
    }
283
284
    /**
285
     * @param DOMDocument $dom
286
     * @param DOMElement  $contextGroup
287
     *
288
     * @return array
289
     */
290
    private function extractTransUnitContextGroup( DOMDocument $dom, DOMElement $contextGroup ): array {
291
        $cg           = [];
292
        $cg[ 'attr' ] = $this->extractTagAttributes( $contextGroup );
293
294
        /** @var DOMNode $context */
295
        foreach ( $contextGroup->childNodes as $context ) {
296
            if ( $context->nodeName === 'context' ) {
297
                $cg[ 'contexts' ][] = $this->extractContent( $dom, $context );
298
            }
299
        }
300
301
        return $cg;
302
    }
303
304
    /**
305
     * @param DOMElement $altTrans
306
     *
307
     * @return array
308
     */
309
    private function extractTransUnitAltTrans( DOMElement $altTrans ) {
310
        $at           = [];
311
        $at[ 'attr' ] = $this->extractTagAttributes( $altTrans );
312
313
        if ( $altTrans->getElementsByTagName( 'source' )->length > 0 ) {
314
            $at[ 'source' ] = $altTrans->getElementsByTagName( 'source' )->item( 0 )->nodeValue;
315
        }
316
317
        if ( $altTrans->getElementsByTagName( 'target' ) ) {
318
            $at[ 'target' ] = $altTrans->getElementsByTagName( 'target' )->item( 0 )->nodeValue;
319
        }
320
321
        return $at;
322
    }
323
324
    /**
325
     * @param DOMElement $locked
326
     *
327
     * @return bool
328
     */
329
    private function extractLocked( DOMElement $locked ) {
330
        return null !== $locked->getAttribute( 'locked' );
331
    }
332
}
333