DataRefReplacer::sanitizeMap()   A
last analyzed

Complexity

Conditions 4
Paths 3

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 4
c 0
b 0
f 0
nc 3
nop 1
dl 0
loc 9
ccs 5
cts 5
cp 1
crap 4
rs 10
1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * @author hashashiyyin [email protected] / [email protected]
5
 * Date: 22/04/24
6
 * Time: 15:13
7
 *
8
 */
9
10
namespace Matecat\SubFiltering\Utils;
11
12
use DOMException;
13
use Exception;
14
use Matecat\SubFiltering\Enum\CTypeEnum;
15
use Matecat\XmlParser\Exception\InvalidXmlException;
16
use Matecat\XmlParser\Exception\XmlParsingException;
17
use Matecat\XmlParser\XmlParser;
18
19
class DataRefReplacer {
20
    /**
21
     * @var Map
22
     */
23
    private $map;
24
25
    /**
26
     * DataRefReplacer constructor.
27
     *
28
     * @param array $map
29
     */
30 55
    public function __construct( array $map = [] ) {
31 55
        $this->map = Map::instance( $this->sanitizeMap( $map ) );
32 55
    }
33
34
    /**
35
     * This function inserts a new attribute called 'equiv-text' from dataRef contained in <ph>, <sc>, <ec>, <pc> tags against the provided map array
36
     *
37
     * For a complete reference see:
38
     *
39
     * Http://docs.oasis-open.org/xliff/xliff-core/v2.1/os/xliff-core-v2.1-os.html#dataref
40
     *
41
     * @param string $string
42
     *
43
     * @return string
44
     */
45 54
    public function replace( $string ) {
46
47
        // if the map is empty
48
        // or the string has not a dataRef attribute
49
        // return string as is
50 54
        if ( $this->map->isEmpty() || !$this->hasAnyDataRefAttribute( $string ) ) {
51 6
            return $string;
52
        }
53
54
        // try not to throw exception for wrong segments with opening tags and no closing
55
        try {
56
57 48
            $html = XmlParser::parse( $string, true );
58
59 47
            $dataRefEndMap = new ArrayList();
60
61 47
            foreach ( $html as $node ) {
62
63
                // 1. Replace <ph>|<sc>|<ec> tags
64 47
                $string = $this->recursiveTransformDataRefToPhTag( $node, $string );
65
66
                // 2. Replace self-closed <pc dataRefStart="xyz" /> tags
67 47
                $string = $this->recursiveReplaceSelfClosedPcTags( $node, $string );
68
69
                // 3. Build the DataRefEndMap needed by replaceClosingPcTags function
70
                // (needed for correct handling of </pc> closing tags)
71
                // make this inline with one foreach cycle
72 47
                $this->extractDataRefMapRecursively( $node, $string, $dataRefEndMap );
73
74
            }
75
76
            // 4. replace pc tags
77 47
            $string = $this->replaceOpeningPcTags( $string );
78 47
            $string = $this->replaceClosingPcTags( $string, $dataRefEndMap );
79
80 1
        } catch ( Exception $ignore ) {
81
            // if something fails here, do not throw exception and return the original string instead
82
//            var_dump( $ignore );
83
        } finally {
84 48
            return $string;
85
        }
86
87
    }
88
89
    /**
90
     * @param string $string
91
     *
92
     * @return bool
93
     */
94 53
    private function hasAnyDataRefAttribute( $string ) {
95 53
        return (bool)preg_match( '/(dataRef|dataRefStart|dataRefEnd)=[\'"].*?[\'"]/', $string );
96
    }
97
98
    /**
99
     * This function adds equiv-text attribute to <ph>, <ec>, and <sc> tags.
100
     *
101
     * Please note that <ec> and <sc> tags are converted to <ph> tags (needed by Matecat);
102
     * in this case, another special attribute (dataType) is added just before equiv-text
103
     *
104
     * If there is no id tag, it will be copied from dataRef attribute
105
     *
106
     * @param object $node
107
     * @param string $string
108
     *
109
     * @return string
110
     */
111 47
    private function recursiveTransformDataRefToPhTag( $node, $string ) {
112
113 47
        if ( $node->has_children ) {
114
115 30
            foreach ( $node->inner_html as $childNode ) {
116 30
                $string = $this->recursiveTransformDataRefToPhTag( $childNode, $string );
117
            }
118
119
        } else {
120
121
            // accept only those tags
122 47
            switch ( $node->tagName ) {
123 47
                case 'ph':
124 24
                    $ctype = CTypeEnum::PH_DATA_REF;
125 24
                    break;
126 46
                case 'sc':
127 2
                    $ctype = CTypeEnum::SC_DATA_REF;
128 2
                    break;
129 46
                case 'ec':
130 4
                    $ctype = CTypeEnum::EC_DATA_REF;
131 4
                    break;
132
                default:
133 45
                    return $string;
134
            }
135
136
            // if isset a value in the map proceed with conversion otherwise skip
137 24
            $attributesMap = Map::instance( $node->attributes );
138 24
            if ( !$this->map->get( $attributesMap->get( 'dataRef' ) ) ) {
139 5
                return $string;
140
            }
141
142 20
            $dataRefName = $node->attributes[ 'dataRef' ];   // map identifier. Eg: source1
143
144 20
            return $this->replaceNewTagString(
145 20
                    $node->node,
146 20
                    $attributesMap->getOrDefault( 'id', $dataRefName ),
147 20
                    $this->map->getOrDefault( $node->attributes[ 'dataRef' ], 'NULL' ),
148
                    $ctype,
149
                    $string,
150 20
                    null
151
            );
152
153
        }
154
155 30
        return $string;
156
    }
157
158
    /**
159
     * Check if values in the map are null or an empty string, in that case, convert them to NULL string
160
     *
161
     * @param $map
162
     *
163
     * @return array
164
     */
165 55
    private function sanitizeMap( $map ) {
166
167 55
        foreach ( $map as $name => $value ) {
168 54
            if ( is_null( $value ) || $value === '' ) {
169 2
                $map[ $name ] = 'NULL';
170
            }
171
        }
172
173 55
        return $map;
174
    }
175
176
    /**
177
     * @param $node
178
     * @param $string
179
     *
180
     * @return string
181
     * @throws DOMException
182
     * @throws InvalidXmlException
183
     * @throws XmlParsingException
184
     */
185 47
    private function recursiveReplaceSelfClosedPcTags( $node, $string ) {
186
187 47
        if ( $node->has_children ) {
188
189 30
            foreach ( $node->inner_html as $childNode ) {
190 30
                $string = $this->recursiveReplaceSelfClosedPcTags( $childNode, $string );
191
            }
192
193 47
        } elseif ( $node->tagName == 'pc' && $node->self_closed === true ) {
194
195 3
            $attributesMap = Map::instance( $node->attributes );
196
197 3
            if ( $dataRefStartValue = $this->map->get( $node->attributes[ 'dataRefStart' ] ) ) {
198
199 3
                $string = $this->replaceNewTagString(
200 3
                        $node->node,
201 3
                        $attributesMap->get( 'id' ),
202
                        $dataRefStartValue,
203 3
                        CTypeEnum::PC_SELF_CLOSE_DATA_REF,
204
                        $string
205
                );
206
207
            }
208
209
        }
210
211 47
        return $string;
212
213
    }
214
215
    /**
216
     * Extract (recursively) the dataRefEnd map from a single node
217
     *
218
     * @param object    $node
219
     * @param string    $completeString
220
     * @param ArrayList $dataRefEndMap
221
     */
222 47
    private function extractDataRefMapRecursively( $node, $completeString, ArrayList $dataRefEndMap ) {
223
224
        // we have to build the map for the closing pc tag, so get the children first
225 47
        if ( $node->has_children ) {
226 30
            foreach ( $node->inner_html as $nestedNode ) {
227 30
                $this->extractDataRefMapRecursively( $nestedNode, $completeString, $dataRefEndMap );
228
            }
229
        }
230
231
        // EXCLUDE self-closing <pc id='xx'/> by checking for `$node->self_closed === false`
232
        // BUT here we have an ambiguity on self-closing pc tags when the inner text node is empty, so we must use a strpos to check for those false self-closing matches.
233
        //
234
        // Remove '/>' if present, add a closing tag '></pc>' and guess the match on the original string, if it succeeds, the tag was an empty `<pc id='yy'></pc>` pair
235
        // EX:
236
        // <pc id="source5" dataRefStart="source5"/>
237
        //   becomes
238
        // <pc id="source5" dataRefStart="source5"></pc>
239
        //
240 47
        $isATagPairWithEmptyTextNode = strpos( $completeString, substr( $node->node, 0, -2 ) . '></pc>' ) !== false;
241
242 47
        if ( $node->tagName === 'pc' && ( $node->self_closed === false || $isATagPairWithEmptyTextNode ) ) {
243
244 30
            $attributesMap = Map::instance( $node->attributes );
245 30
            $dataRefEnd    = $attributesMap->getOrDefault( 'dataRefEnd', $attributesMap->get( 'dataRefStart' ) );
246
247 30
            $dataRefEndMap[] = [
248 30
                    'id'         => $attributesMap->get( 'id' ),
249 30
                    'dataRefEnd' => $dataRefEnd,
250
            ];
251
252
        }
253
254 47
    }
255
256
    /**
257
     * Replace opening <pc> tags with correct reference in the $string
258
     *
259
     * @param string $string
260
     *
261
     * @return string
262
     * @throws DOMException
263
     * @throws InvalidXmlException
264
     * @throws XmlParsingException
265
     */
266 47
    private function replaceOpeningPcTags( $string ) {
267
268 47
        preg_match_all( '|<pc ([^>/]+?)>|iu', $string, $openingPcMatches );
269
270 47
        foreach ( $openingPcMatches[ 0 ] as $match ) {
271
272 30
            $node = XmlParser::parse( $match . '</pc>', true )[ 0 ]; // add a closing tag to not break xml integrity
273
274
            // CASE 1 - Missing `dataRefStart`
275 30
            if ( isset( $node->attributes[ 'dataRefEnd' ] ) && !isset( $node->attributes[ 'dataRefStart' ] ) ) {
276 1
                $node->attributes[ 'dataRefStart' ] = $node->attributes[ 'dataRefEnd' ];
277
            }
278
279
            // CASE 2 - Missing `dataRefEnd`
280 30
            if ( isset( $node->attributes[ 'dataRefStart' ] ) && !isset( $node->attributes[ 'dataRefEnd' ] ) ) {
281 15
                $node->attributes[ 'dataRefEnd' ] = $node->attributes[ 'dataRefStart' ];
282
            }
283
284 30
            if ( isset( $node->attributes[ 'dataRefStart' ] ) ) {
285
286 30
                $attributesMap = Map::instance( $node->attributes );
287 30
                $string        = $this->replaceNewTagString(
288 30
                        $match,
289 30
                        $attributesMap->get( 'id' ),
290 30
                        $this->map->getOrDefault( $node->attributes[ 'dataRefStart' ], 'NULL' ),
291 30
                        CTypeEnum::PC_OPEN_DATA_REF,
292
                        $string
293
                );
294
295
            }
296
        }
297
298 47
        return $string;
299
    }
300
301
    /**
302
     * Replace closing </pc> tags with correct reference in the $string
303
     * thanks to $dataRefEndMap
304
     *
305
     * @param string    $string
306
     * @param ArrayList $dataRefEndMap
307
     *
308
     * @return string
309
     */
310 47
    private function replaceClosingPcTags( $string, ArrayList $dataRefEndMap ) {
311
312 47
        preg_match_all( '|</pc>|iu', $string, $closingPcMatches, PREG_OFFSET_CAPTURE );
313 47
        $delta = 0;
314
315 47
        foreach ( $closingPcMatches[ 0 ] as $index => $match ) {
316
317 30
            $offset = $match[ 1 ];
318 30
            $length = 5; // strlen of '</pc>'
319
320 30
            $attr = $dataRefEndMap->get( $index );
321 30
            if ( !empty( $attr ) && isset( $attr[ 'dataRefEnd' ] ) ) {
322
323
                // conversion for opening <pc> tag
324 30
                $completeTag = $this->getNewTagString(
325 30
                        '</pc>',
326 30
                        $attr[ 'id' ],
327 30
                        $this->map->getOrDefault( $attr[ 'dataRefEnd' ], 'NULL' ),
328 30
                        CTypeEnum::PC_CLOSE_DATA_REF,
329 30
                        '_2'
330
                );
331
332 30
                $realOffset = ( $delta === 0 ) ? $offset : ( $offset + $delta );
333 30
                $string     = substr_replace( $string, $completeTag, $realOffset, $length );
334 30
                $delta      = $delta + strlen( $completeTag ) - $length;
335
336
            }
337
338
        }
339
340 47
        return $string;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $string also could return the type array which is incompatible with the documented return type string.
Loading history...
341
342
    }
343
344
    /**
345
     * @param string $string
346
     *
347
     * @return string
348
     * @throws DOMException
349
     * @throws InvalidXmlException
350
     * @throws XmlParsingException
351
     */
352 45
    public function restore( $string ) {
353
354
        // if the map is empty return string as is
355 45
        if ( empty( $this->map ) ) {
356
            return $string;
357
        }
358
359 45
        $html = XmlParser::parse( $string, true );
360
361 45
        foreach ( $html as $node ) {
362 45
            $string = $this->recursiveRestoreOriginalTags( $node, $string );
363
        }
364
365 45
        return $string;
366
    }
367
368
    /**
369
     * @param object $node
370
     * @param        $string
371
     *
372
     * @return string
373
     */
374 45
    private function recursiveRestoreOriginalTags( $node, $string ) {
375
376 45
        if ( $node->has_children ) {
377
378 2
            foreach ( $node->inner_html as $childNode ) {
379 2
                $string = $this->recursiveRestoreOriginalTags( $childNode, $string );
380
            }
381
382
        } else {
383
384 45
            $nodeAttributesMap = Map::instance( $node->attributes );
385
386 45
            if ( !$nodeAttributesMap->get( 'x-orig' ) ) {
387 42
                return $string;
388
            }
389
390 41
            $cType = $nodeAttributesMap->get( 'ctype' );
391
392 41
            if ( CTypeEnum::isLayer2Constant( $cType ) ) {
393 40
                return preg_replace( '/' . preg_quote( $node->node, '/' ) . '/', base64_decode( $nodeAttributesMap->get( 'x-orig' ) ), $string, 1 );
0 ignored issues
show
Bug introduced by
It seems like $nodeAttributesMap->get('x-orig') can also be of type null; however, parameter $string of base64_decode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

393
                return preg_replace( '/' . preg_quote( $node->node, '/' ) . '/', base64_decode( /** @scrutinizer ignore-type */ $nodeAttributesMap->get( 'x-orig' ) ), $string, 1 );
Loading history...
394
            }
395
396
        }
397
398 3
        return $string;
399
400
    }
401
402
    /**
403
     * @param string      $actualNodeString
404
     * @param string      $id
405
     * @param string      $dataRefValue
406
     * @param string      $ctype
407
     * @param string|null $upCountIdValue
408
     *
409
     * @return string
410
     */
411 45
    private function getNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $upCountIdValue = null ) {
412
413 45
        $newTag = [ '<ph' ];
414
415 45
        if ( isset( $id ) ) {
416 45
            $newTag[] = 'id="' . $id . $upCountIdValue . '"';
417
        }
418
419 45
        $newTag[] = 'ctype="' . $ctype . '"';
420 45
        $newTag[] = 'equiv-text="base64:' . base64_encode( $dataRefValue ) . '"';
421 45
        $newTag[] = 'x-orig="' . base64_encode( $actualNodeString ) . '"';
422
423 45
        return implode( " ", $newTag ) . '/>';
424
425
    }
426
427 45
    private function replaceNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $originalString, $upCountIdValue = '_1' ) {
428 45
        return str_replace( $actualNodeString, $this->getNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $upCountIdValue ), $originalString );
429
    }
430
431
}