Passed
Branch master (41e2a6)
by Domenico
02:28
created

DataRefReplacer::extractDataRefMapRecursively()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 5

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 5
eloc 9
c 1
b 0
f 0
nc 4
nop 2
dl 0
loc 18
ccs 10
cts 10
cp 1
crap 5
rs 9.6111
1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * @author hashashiyyin [email protected] / [email protected]
5
 * Date: 22/04/24
6
 * Time: 15:13
7
 *
8
 */
9
10
namespace Matecat\SubFiltering\Utils;
11
12
use DOMException;
13
use Exception;
14
use Matecat\SubFiltering\Enum\CTypeEnum;
15
use Matecat\XmlParser\Exception\InvalidXmlException;
16
use Matecat\XmlParser\Exception\XmlParsingException;
17
use Matecat\XmlParser\XmlParser;
18
19
class DataRefReplacer {
20
    /**
21
     * @var Map
22
     */
23
    private $map;
24
25
    /**
26
     * DataRefReplacer constructor.
27
     *
28
     * @param array $map
29
     */
30 53
    public function __construct( array $map = [] ) {
31 53
        $this->map = Map::instance( $this->sanitizeMap( $map ) );
32 53
    }
33
34
    /**
35
     * This function inserts a new attribute called 'equiv-text' from dataRef contained in <ph>, <sc>, <ec>, <pc> tags against the provided map array
36
     *
37
     * For a complete reference see:
38
     *
39
     * Http://docs.oasis-open.org/xliff/xliff-core/v2.1/os/xliff-core-v2.1-os.html#dataref
40
     *
41
     * @param string $string
42
     *
43
     * @return string
44
     */
45 52
    public function replace( $string ) {
46
47
        // if the map is empty
48
        // or the string has not a dataRef attribute
49
        // return string as is
50 52
        if ( $this->map->isEmpty() || !$this->hasAnyDataRefAttribute( $string ) ) {
51 6
            return $string;
52
        }
53
54
        // try not to throw exception for wrong segments with opening tags and no closing
55
        try {
56
57 46
            $html = XmlParser::parse( $string, true );
58
59 45
            $dataRefEndMap = new ArrayList();
60
61 45
            foreach ( $html as $node ) {
62
63
                // 1. Replace <ph>|<sc>|<ec> tags
64 45
                $string = $this->recursiveTransformDataRefToPhTag( $node, $string );
65
66
                // 2. Replace self-closed <pc dataRefStart="xyz" /> tags
67 45
                $string = $this->recursiveReplaceSelfClosedPcTags( $node, $string );
68
69
                // 3. Build the DataRefEndMap needed by replaceClosingPcTags function
70
                // (needed for correct handling of </pc> closing tags)
71
                // make this inline with one foreach cycle
72 45
                $this->extractDataRefMapRecursively( $node, $dataRefEndMap );
73
74
            }
75
76
            // 4. replace pc tags
77 45
            $string = $this->replaceOpeningPcTags( $string );
78 45
            $string = $this->replaceClosingPcTags( $string, $dataRefEndMap );
79
80 1
        } catch ( Exception $ignore ) {
81
            // if something fails here, do not throw exception and return the original string instead
82
//            var_dump( $ignore );
83
        } finally {
84 46
            return $string;
85
        }
86
87
    }
88
89
    /**
90
     * @param string $string
91
     *
92
     * @return bool
93
     */
94 51
    private function hasAnyDataRefAttribute( $string ) {
95 51
        return (bool)preg_match( '/(dataRef|dataRefStart|dataRefEnd)=[\'"].*?[\'"]/', $string );
96
    }
97
98
    /**
99
     * This function adds equiv-text attribute to <ph>, <ec>, and <sc> tags.
100
     *
101
     * Please note that <ec> and <sc> tags are converted to <ph> tags (needed by Matecat);
102
     * in this case, another special attribute (dataType) is added just before equiv-text
103
     *
104
     * If there is no id tag, it will be copied from dataRef attribute
105
     *
106
     * @param object $node
107
     * @param string $string
108
     *
109
     * @return string
110
     */
111 45
    private function recursiveTransformDataRefToPhTag( $node, $string ) {
112
113 45
        if ( $node->has_children ) {
114
115 28
            foreach ( $node->inner_html as $childNode ) {
116 28
                $string = $this->recursiveTransformDataRefToPhTag( $childNode, $string );
117
            }
118
119
        } else {
120
121
            // accept only those tags
122 45
            switch ( $node->tagName ) {
123 45
                case 'ph':
124 23
                    $ctype = CTypeEnum::PH_DATA_REF;
125 23
                    break;
126 44
                case 'sc':
127 2
                    $ctype = CTypeEnum::SC_DATA_REF;
128 2
                    break;
129 44
                case 'ec':
130 4
                    $ctype = CTypeEnum::EC_DATA_REF;
131 4
                    break;
132
                default:
133 43
                    return $string;
134
            }
135
136
            // if isset a value in the map proceed with conversion otherwise skip
137 23
            $attributesMap = Map::instance( $node->attributes );
138 23
            if ( !$this->map->get( $attributesMap->get( 'dataRef' ) ) ) {
139 5
                return $string;
140
            }
141
142 19
            $dataRefName = $node->attributes[ 'dataRef' ];   // map identifier. Eg: source1
143
144 19
            return $this->replaceNewTagString(
145 19
                    $node->node,
146 19
                    $attributesMap->getOrDefault( 'id', $dataRefName ),
147 19
                    $this->map->getOrDefault( $node->attributes[ 'dataRef' ], 'NULL' ),
148
                    $ctype,
149
                    $string,
150 19
                    null
151
            );
152
153
        }
154
155 28
        return $string;
156
    }
157
158
    /**
159
     * Check if values in the map are null or an empty string, in that case, convert them to NULL string
160
     *
161
     * @param $map
162
     *
163
     * @return array
164
     */
165 53
    private function sanitizeMap( $map ) {
166
167 53
        foreach ( $map as $name => $value ) {
168 52
            if ( is_null( $value ) || $value === '' ) {
169 2
                $map[ $name ] = 'NULL';
170
            }
171
        }
172
173 53
        return $map;
174
    }
175
176
    /**
177
     * @param $node
178
     * @param $string
179
     *
180
     * @return string
181
     * @throws DOMException
182
     * @throws InvalidXmlException
183
     * @throws XmlParsingException
184
     */
185 45
    private function recursiveReplaceSelfClosedPcTags( $node, $string ) {
186
187 45
        if ( $node->has_children ) {
188
189 28
            foreach ( $node->inner_html as $childNode ) {
190 28
                $string = $this->recursiveReplaceSelfClosedPcTags( $childNode, $string );
191
            }
192
193 45
        } elseif ( $node->tagName == 'pc' && $node->self_closed === true ) {
194
195 2
            $attributesMap = Map::instance( $node->attributes );
196
197 2
            if ( $dataRefStartValue = $this->map->get( $node->attributes[ 'dataRefStart' ] ) ) {
198
199 2
                $string = $this->replaceNewTagString(
200 2
                        $node->node,
201 2
                        $attributesMap->get( 'id' ),
202
                        $dataRefStartValue,
203 2
                        CTypeEnum::PC_SELF_CLOSE_DATA_REF,
204
                        $string
205
                );
206
207
            }
208
209
        }
210
211 45
        return $string;
212
213
    }
214
215
    /**
216
     * Extract (recursively) the dataRefEnd map from single nodes
217
     *
218
     * @param object    $node
219
     * @param ArrayList $dataRefEndMap
220
     */
221 45
    private function extractDataRefMapRecursively( $node, ArrayList $dataRefEndMap ) {
222
223
        // we have to build the map for the closing pc tag, so get the children first
224 45
        if ( $node->has_children ) {
225 28
            foreach ( $node->inner_html as $nestedNode ) {
226 28
                $this->extractDataRefMapRecursively( $nestedNode, $dataRefEndMap );
227
            }
228
        }
229
230
        // EXCLUDE self closed <pc/>
231 45
        if ( $node->tagName === 'pc' && $node->self_closed === false ) {
232
233 28
            $attributesMap = Map::instance( $node->attributes );
234 28
            $dataRefEnd    = $attributesMap->getOrDefault( 'dataRefEnd', $attributesMap->get( 'dataRefStart' ) );
235
236 28
            $dataRefEndMap[] = [
237 28
                    'id'         => $attributesMap->get( 'id' ),
238 28
                    'dataRefEnd' => $dataRefEnd,
239
            ];
240
241
        }
242
243 45
    }
244
245
    /**
246
     * Replace opening <pc> tags with correct reference in the $string
247
     *
248
     * @param string $string
249
     *
250
     * @return string
251
     * @throws DOMException
252
     * @throws InvalidXmlException
253
     * @throws XmlParsingException
254
     */
255 45
    private function replaceOpeningPcTags( $string ) {
256
257 45
        preg_match_all( '|<pc ([^>/]+?)>|iu', $string, $openingPcMatches );
258
259 45
        foreach ( $openingPcMatches[ 0 ] as $match ) {
260
261 28
            $node = XmlParser::parse( $match . '</pc>', true )[ 0 ]; // add a closing tag to not break xml integrity
262
263
            // CASE 1 - Missing `dataRefStart`
264 28
            if ( isset( $node->attributes[ 'dataRefEnd' ] ) && !isset( $node->attributes[ 'dataRefStart' ] ) ) {
265 1
                $node->attributes[ 'dataRefStart' ] = $node->attributes[ 'dataRefEnd' ];
266
            }
267
268
            // CASE 2 - Missing `dataRefEnd`
269 28
            if ( isset( $node->attributes[ 'dataRefStart' ] ) && !isset( $node->attributes[ 'dataRefEnd' ] ) ) {
270 13
                $node->attributes[ 'dataRefEnd' ] = $node->attributes[ 'dataRefStart' ];
271
            }
272
273 28
            if ( isset( $node->attributes[ 'dataRefStart' ] ) ) {
274
275 28
                $attributesMap = Map::instance( $node->attributes );
276 28
                $string        = $this->replaceNewTagString(
277 28
                        $match,
278 28
                        $attributesMap->get( 'id' ),
279 28
                        $this->map->getOrDefault( $node->attributes[ 'dataRefStart' ], 'NULL' ),
280 28
                        CTypeEnum::PC_OPEN_DATA_REF,
281
                        $string
282
                );
283
284
            }
285
        }
286
287 45
        return $string;
288
    }
289
290
    /**
291
     * Replace closing </pc> tags with correct reference in the $string
292
     * thanks to $dataRefEndMap
293
     *
294
     * @param string    $string
295
     * @param ArrayList $dataRefEndMap
296
     *
297
     * @return string
298
     */
299 45
    private function replaceClosingPcTags( $string, ArrayList $dataRefEndMap ) {
300
301 45
        preg_match_all( '|</pc>|iu', $string, $closingPcMatches, PREG_OFFSET_CAPTURE );
302 45
        $delta = 0;
303
304 45
        foreach ( $closingPcMatches[ 0 ] as $index => $match ) {
305
306 28
            $offset = $match[ 1 ];
307 28
            $length = 5; // strlen of '</pc>'
308
309 28
            $attr = $dataRefEndMap->get( $index );
310 28
            if ( !empty( $attr ) && isset( $attr[ 'dataRefEnd' ] ) ) {
311
312
                // conversion for opening <pc> tag
313 28
                $completeTag = $this->getNewTagString(
314 28
                        '</pc>',
315 28
                        $attr[ 'id' ],
316 28
                        $this->map->getOrDefault( $attr[ 'dataRefEnd' ], 'NULL' ),
317 28
                        CTypeEnum::PC_CLOSE_DATA_REF,
318 28
                        '_2'
319
                );
320
321 28
                $realOffset = ( $delta === 0 ) ? $offset : ( $offset + $delta );
322 28
                $string     = substr_replace( $string, $completeTag, $realOffset, $length );
323 28
                $delta      = $delta + strlen( $completeTag ) - $length;
324
325
            }
326
327
        }
328
329 45
        return $string;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $string also could return the type array which is incompatible with the documented return type string.
Loading history...
330
331
    }
332
333
    /**
334
     * @param string $string
335
     *
336
     * @return string
337
     * @throws DOMException
338
     * @throws InvalidXmlException
339
     * @throws XmlParsingException
340
     */
341 43
    public function restore( $string ) {
342
343
        // if the map is empty return string as is
344 43
        if ( empty( $this->map ) ) {
345
            return $string;
346
        }
347
348 43
        $html = XmlParser::parse( $string, true );
349
350 43
        foreach ( $html as $node ) {
351 43
            $string = $this->recursiveRestoreOriginalTags( $node, $string );
352
        }
353
354 43
        return $string;
355
    }
356
357
    /**
358
     * @param object $node
359
     * @param        $string
360
     *
361
     * @return string
362
     */
363 43
    private function recursiveRestoreOriginalTags( $node, $string ) {
364
365 43
        if ( $node->has_children ) {
366
367 2
            foreach ( $node->inner_html as $childNode ) {
368 2
                $string = $this->recursiveRestoreOriginalTags( $childNode, $string );
369
            }
370
371
        } else {
372
373 43
            $nodeAttributesMap = Map::instance( $node->attributes );
374 43
            $cType             = $nodeAttributesMap->get( 'ctype' );
375
376 43
            if ( CTypeEnum::isLayer2Constant( $cType ) ) {
377 38
                return preg_replace( '/' . preg_quote( $node->node, '/' ) . '/', base64_decode( $node->attributes[ 'x-orig' ] ), $string, 1 );
378
            }
379
380
        }
381
382 40
        return $string;
383
384
    }
385
386
    /**
387
     * @param string      $actualNodeString
388
     * @param string      $id
389
     * @param string      $dataRefValue
390
     * @param string      $ctype
391
     * @param string|null $upCountIdValue
392
     *
393
     * @return string
394
     */
395 43
    private function getNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $upCountIdValue = null ) {
396
397 43
        $newTag = [ '<ph' ];
398
399 43
        if ( isset( $id ) ) {
400 43
            $newTag[] = 'id="' . $id . $upCountIdValue . '"';
401
        }
402
403 43
        $newTag[] = 'ctype="' . $ctype . '"';
404 43
        $newTag[] = 'equiv-text="base64:' . base64_encode( $dataRefValue ) . '"';
405 43
        $newTag[] = 'x-orig="' . base64_encode( $actualNodeString ) . '"';
406
407 43
        return implode( " ", $newTag ) . '/>';
408
409
    }
410
411 43
    private function replaceNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $originalString, $upCountIdValue = '_1' ) {
412 43
        return str_replace( $actualNodeString, $this->getNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $upCountIdValue ), $originalString );
413
    }
414
415
}