Passed
Push — master ( e0573b...63423a )
by Domenico
03:07
created

XliffParserV1::extractTransUnitMetadata()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 41
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 18
nc 7
nop 2
dl 0
loc 41
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
namespace Matecat\XliffParser\XliffParser;
4
5
use DOMAttr;
6
use DOMDocument;
7
use DOMElement;
8
use DOMNode;
9
use Exception;
10
use Matecat\XliffParser\Exception\DuplicateTransUnitIdInXliff;
11
use Matecat\XliffParser\Exception\NotFoundIdInTransUnit;
12
use Matecat\XliffParser\Exception\SegmentIdTooLongException;
13
14
class XliffParserV1 extends AbstractXliffParser {
15
    /**
16
     * @inheritDoc
17
     * @throws Exception
18
     */
19
    public function parse( DOMDocument $dom, $output = [] ) {
20
        $i = 1;
21
        /** @var DOMElement $file */
22
        foreach ( $dom->getElementsByTagName( 'file' ) as $file ) {
23
24
            // metadata
25
            $output[ 'files' ][ $i ][ 'attr' ] = $this->extractMetadata( $file );
26
27
            // reference
28
            if ( !empty( $this->extractReference( $file ) ) ) {
29
                $output[ 'files' ][ $i ][ 'reference' ] = $this->extractReference( $file );
30
            }
31
32
            // trans-units
33
            $transUnitIdArrayForUniquenessCheck = [];
34
            $j                                  = 1;
35
            foreach ( $file->childNodes as $body ) {
36
                if ( $body->nodeName === 'body' ) {
37
                    foreach ( $body->childNodes as $childNode ) {
38
                        $this->extractTuFromNode( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j );
39
                    }
40
41
                    // trans-unit re-count check
42
                    $totalTransUnitsId  = count( $transUnitIdArrayForUniquenessCheck );
43
                    $transUnitsUniqueId = count( array_unique( $transUnitIdArrayForUniquenessCheck ) );
44
                    if ( $totalTransUnitsId != $transUnitsUniqueId ) {
45
                        throw new DuplicateTransUnitIdInXliff( "Invalid trans-unit id, duplicate found.", 400 );
46
                    }
47
48
                    $i++;
49
                }
50
            }
51
        }
52
53
        return $output;
54
    }
55
56
    /**
57
     * @param DOMElement $file
58
     *
59
     * @return array
60
     */
61
    private function extractMetadata( DOMElement $file ) {
62
        $metadata   = [];
63
        $customAttr = [];
64
65
        /** @var DOMAttr $attribute */
66
        foreach ( $file->attributes as $attribute ) {
67
            switch ( $attribute->localName ) {
68
                // original
69
                case 'original':
70
                    $metadata[ 'original' ] = $attribute->value;
71
                    break;
72
73
                // source-language
74
                case 'source-language':
75
                    $metadata[ 'source-language' ] = $attribute->value;
76
                    break;
77
78
                // data-type
79
                case 'datatype':
80
                    $metadata[ 'data-type' ] = $attribute->value;
81
                    break;
82
83
                // target-language
84
                case 'target-language':
85
                    $metadata[ 'target-language' ] = $attribute->value;
86
                    break;
87
            }
88
89
            // Custom MateCat x-Attribute
90
            preg_match( '|x-(.*?)|si', $attribute->localName, $temp );
91
            if ( isset( $temp[ 1 ] ) ) {
92
                $customAttr[ $attribute->localName ] = $attribute->value;
93
            }
94
            unset( $temp );
95
96
            // Custom MateCat namespace Attribute mtc:
97
            preg_match( '|mtc:(.*?)|si', $attribute->nodeName, $temp );
98
            if ( isset( $temp[ 1 ] ) ) {
99
                $customAttr[ $attribute->nodeName ] = $attribute->value;
100
            }
101
            unset( $temp );
102
103
            if ( !empty( $customAttr ) ) {
104
                $metadata[ 'custom' ] = $customAttr;
105
            }
106
        }
107
108
        return $metadata;
109
    }
110
111
    /**
112
     * @param DOMElement $file
113
     *
114
     * @return array
115
     */
116
    private function extractReference( DOMElement $file ) {
117
        $reference = [];
118
119
        $order = 0;
120
        foreach ( $file->getElementsByTagName( 'reference' ) as $ref ) {
121
            /** @var DOMNode $childNode */
122
            foreach ( $ref->childNodes as $childNode ) {
123
                if ( $childNode->nodeName === 'internal-file' ) {
124
                    $reference[ $order ][ 'form-type' ] = $childNode->attributes->getNamedItem( 'form' )->nodeValue;
125
                    $reference[ $order ][ 'base64' ]    = trim( $childNode->nodeValue );
126
                    $order++;
127
                }
128
            }
129
        }
130
131
        return $reference;
132
    }
133
134
    /**
135
     * Extract and populate 'trans-units' array
136
     *
137
     * @param $transUnit
138
     * @param $transUnitIdArrayForUniquenessCheck
139
     * @param $dom
140
     * @param $output
141
     * @param $i
142
     * @param $j
143
     *
144
     * @throws Exception
145
     */
146
    protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j ) {
147
        // metadata
148
        $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata( $transUnit, $transUnitIdArrayForUniquenessCheck );
149
150
        // notes
151
        $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'notes' ] = $this->extractTransUnitNotes( $dom, $transUnit );
152
153
        // content
154
        /** @var DOMElement $childNode */
155
        foreach ( $transUnit->childNodes as $childNode ) {
156
            // source
157
            if ( $childNode->nodeName === 'source' ) {
158
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ] = $this->extractContent( $dom, $childNode );
159
            }
160
161
            // seg-source
162
            if ( $childNode->nodeName === 'seg-source' ) {
163
                $rawSegment                                                     = $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'source' ][ 'raw-content' ];
164
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode, $rawSegment );
165
            }
166
167
            // target
168
            if ( $childNode->nodeName === 'target' ) {
169
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ] = $this->extractContent( $dom, $childNode );
170
171
                // seg-target
172
                $targetRawContent = @$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'target' ][ 'raw-content' ];
173
                $segSource        = @$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-source' ];
174
                if ( isset( $targetRawContent ) && !empty( $targetRawContent ) && isset( $segSource ) && count( $segSource ) > 0 ) {
175
                    $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'seg-target' ] = $this->extractContentWithMarksAndExtTags( $dom, $childNode, $targetRawContent );
176
                }
177
            }
178
179
            // locked
180
            if ( $childNode->nodeName === 'sdl:seg' ) {
181
                $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'locked' ] = $this->extractLocked( $childNode );
182
            }
183
        }
184
185
        // context-group
186
        foreach ( $transUnit->getElementsByTagName( 'context-group' ) as $contextGroup ) {
187
            $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup );
188
        }
189
190
        // alt-trans
191
        foreach ( $transUnit->getElementsByTagName( 'alt-trans' ) as $altTrans ) {
192
            $output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'alt-trans' ][] = $this->extractTransUnitAltTrans( $altTrans );
193
        }
194
195
        $j++;
196
    }
197
198
    /**
199
     * @param DOMElement $transUnit
200
     * @param array      $transUnitIdArrayForUniquenessCheck
201
     *
202
     * @return array
203
     * @throws Exception
204
     */
205
    private function extractTransUnitMetadata( DOMElement $transUnit, array &$transUnitIdArrayForUniquenessCheck ) {
206
        $metadata = [];
207
208
        // id MUST NOT be null
209
        if ( null === $transUnit->attributes->getNamedItem( 'id' ) ) {
0 ignored issues
show
Bug introduced by
The method getNamedItem() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

209
        if ( null === $transUnit->attributes->/** @scrutinizer ignore-call */ getNamedItem( 'id' ) ) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
210
            throw new NotFoundIdInTransUnit( 'Invalid trans-unit id found. EMPTY value', 400 );
211
        }
212
213
        /**
214
         * @var string  $key
215
         * @var DOMAttr $element
216
         */
217
        foreach ( $transUnit->attributes as $key => $element ) {
218
219
            if ( $element->nodeName === "id" ) {
220
221
                $id = $element->nodeValue;
222
223
                if ( strlen( $id ) > 100 ) {
224
                    throw new SegmentIdTooLongException( 'Segment-id too long. Max 100 characters allowed', 400 );
225
                }
226
227
                $transUnitIdArrayForUniquenessCheck[] = $id;
228
                $metadata[ 'id' ]                     = $id;
229
230
            } elseif ( $element->nodeName === "approved" ) {
231
                // approved as BOOLEAN
232
                // http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html#approved
233
                $metadata[ $element->nodeName ] = filter_var( $element->nodeValue, FILTER_VALIDATE_BOOLEAN );
234
            } elseif ( $element->nodeName === "maxwidth" ) {
235
                // we ignore ( but we get ) the attribute size-unit="char" assuming that a restriction is everytime done by character
236
                // we duplicate the info to allow Xliff V1 and V2 to work the same
237
                $metadata[ 'sizeRestriction' ]  = filter_var( $element->nodeValue, FILTER_SANITIZE_NUMBER_INT );
238
                $metadata[ $element->nodeName ] = filter_var( $element->nodeValue, FILTER_SANITIZE_NUMBER_INT );
239
            } else {
240
                $metadata[ $element->nodeName ] = $element->nodeValue;
241
            }
242
243
        }
244
245
        return $metadata;
246
    }
247
248
    /**
249
     * @param DOMElement $transUnit
250
     *
251
     * @return array
252
     * @throws Exception
253
     */
254
    private function extractTransUnitNotes( DOMDocument $dom, DOMElement $transUnit ) {
255
        $notes = [];
256
        foreach ( $transUnit->getElementsByTagName( 'note' ) as $note ) {
257
258
            $noteValue = $this->extractTagContent( $dom, $note );
259
260
            if ( '' !== $noteValue ) {
261
262
                $extractedNote = $this->JSONOrRawContentArray( $noteValue );
263
264
                // extract all the attributes
265
                foreach ( $note->attributes as $attribute ) {
266
                    $extractedNote[ $attribute->name ] = $attribute->value;
267
                }
268
269
                $notes[] = $extractedNote;
270
            }
271
        }
272
273
        return $notes;
274
    }
275
276
    /**
277
     * @param DOMElement $contextGroup
278
     *
279
     * @return array
280
     */
281
    private function extractTransUnitContextGroup( DOMDocument $dom, DOMElement $contextGroup ) {
282
        $cg           = [];
283
        $cg[ 'attr' ] = $this->extractTagAttributes( $contextGroup );
284
285
        /** @var DOMNode $context */
286
        foreach ( $contextGroup->childNodes as $context ) {
287
            if ( $context->nodeName === 'context' ) {
288
                $cg[ 'contexts' ][] = $this->extractContent( $dom, $context );
289
            }
290
        }
291
292
        return $cg;
293
    }
294
295
    /**
296
     * @param DOMElement $altTrans
297
     *
298
     * @return array
299
     */
300
    private function extractTransUnitAltTrans( DOMElement $altTrans ) {
301
        $at           = [];
302
        $at[ 'attr' ] = $this->extractTagAttributes( $altTrans );
303
304
        if ( $altTrans->getElementsByTagName( 'source' )->length > 0 ) {
305
            $at[ 'source' ] = $altTrans->getElementsByTagName( 'source' )->item( 0 )->nodeValue;
306
        }
307
308
        if ( $altTrans->getElementsByTagName( 'target' ) ) {
309
            $at[ 'target' ] = $altTrans->getElementsByTagName( 'target' )->item( 0 )->nodeValue;
310
        }
311
312
        return $at;
313
    }
314
315
    /**
316
     * @param DOMElement $locked
317
     *
318
     * @return bool
319
     */
320
    private function extractLocked( DOMElement $locked ) {
321
        return null !== $locked->getAttribute( 'locked' );
322
    }
323
}
324