Passed
Push — master ( a95f40...077740 )
by Domenico
02:21
created

DataRefReplacer::sanitizeMap()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 4
c 0
b 0
f 0
nc 3
nop 1
dl 0
loc 9
rs 10
1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * @author hashashiyyin [email protected] / [email protected]
5
 * Date: 22/04/24
6
 * Time: 15:13
7
 *
8
 */
9
10
namespace Matecat\SubFiltering\Utils;
11
12
use DOMException;
13
use Exception;
14
use Matecat\SubFiltering\Enum\CTypeEnum;
15
use Matecat\XmlParser\Exception\InvalidXmlException;
16
use Matecat\XmlParser\Exception\XmlParsingException;
17
use Matecat\XmlParser\XmlParser;
18
19
class DataRefReplacer {
20
    /**
21
     * @var Map
22
     */
23
    private $map;
24
25
    /**
26
     * DataRefReplacer constructor.
27
     *
28
     * @param array $map
29
     */
30
    public function __construct( array $map = [] ) {
31
        $this->map = Map::instance( $this->sanitizeMap( $map ) );
32
    }
33
34
    /**
35
     * This function inserts a new attribute called 'equiv-text' from dataRef contained in <ph>, <sc>, <ec>, <pc> tags against the provided map array
36
     *
37
     * For a complete reference see:
38
     *
39
     * Http://docs.oasis-open.org/xliff/xliff-core/v2.1/os/xliff-core-v2.1-os.html#dataref
40
     *
41
     * @param string $string
42
     *
43
     * @return string
44
     */
45
    public function replace( $string ) {
46
47
        // if the map is empty
48
        // or the string has not a dataRef attribute
49
        // return string as is
50
        if ( $this->map->isEmpty() || !$this->hasAnyDataRefAttribute( $string ) ) {
51
            return $string;
52
        }
53
54
        // try not to throw exception for wrong segments with opening tags and no closing
55
        try {
56
57
            $html = XmlParser::parse( $string, true );
58
59
            $dataRefEndMap = new ArrayList();
60
61
            foreach ( $html as $node ) {
62
63
                // 1. Replace <ph>|<sc>|<ec> tags
64
                $string = $this->recursiveTransformDataRefToPhTag( $node, $string );
65
66
                // 2. Replace self-closed <pc dataRefStart="xyz" /> tags
67
                $string = $this->recursiveReplaceSelfClosedPcTags( $node, $string );
68
69
                // 3. Build the DataRefEndMap needed by replaceClosingPcTags function
70
                // (needed for correct handling of </pc> closing tags)
71
                // make this inline with one foreach cycle
72
                $this->extractDataRefMapRecursively( $node, $dataRefEndMap );
73
74
            }
75
76
            // 4. replace pc tags
77
            $string = $this->replaceOpeningPcTags( $string );
78
            $string = $this->replaceClosingPcTags( $string, $dataRefEndMap );
79
80
        } catch ( Exception $ignore ) {
81
            // if something fails here, do not throw exception and return the original string instead
82
//            var_dump( $ignore );
83
        } finally {
84
            return $string;
85
        }
86
87
    }
88
89
    /**
90
     * @param string $string
91
     *
92
     * @return bool
93
     */
94
    private function hasAnyDataRefAttribute( $string ) {
95
        return (bool)preg_match( '/(dataRef|dataRefStart|dataRefEnd)=[\'"].*?[\'"]/', $string );
96
    }
97
98
    /**
99
     * This function adds equiv-text attribute to <ph>, <ec>, and <sc> tags.
100
     *
101
     * Please note that <ec> and <sc> tags are converted to <ph> tags (needed by Matecat);
102
     * in this case, another special attribute (dataType) is added just before equiv-text
103
     *
104
     * If there is no id tag, it will be copied from dataRef attribute
105
     *
106
     * @param object $node
107
     * @param string $string
108
     *
109
     * @return string
110
     */
111
    private function recursiveTransformDataRefToPhTag( $node, $string ) {
112
113
        if ( $node->has_children ) {
114
115
            foreach ( $node->inner_html as $childNode ) {
116
                $string = $this->recursiveTransformDataRefToPhTag( $childNode, $string );
117
            }
118
119
        } else {
120
121
            // accept only those tags
122
            switch ( $node->tagName ) {
123
                case 'ph':
124
                    $ctype = CTypeEnum::PH_DATA_REF;
125
                    break;
126
                case 'sc':
127
                    $ctype = CTypeEnum::SC_DATA_REF;
128
                    break;
129
                case 'ec':
130
                    $ctype = CTypeEnum::EC_DATA_REF;
131
                    break;
132
                default:
133
                    return $string;
134
            }
135
136
            // if isset a value in the map proceed with conversion otherwise skip
137
            $attributesMap = Map::instance( $node->attributes );
138
            if ( !$this->map->get( $attributesMap->get( 'dataRef' ) ) ) {
139
                return $string;
140
            }
141
142
            $dataRefName = $node->attributes[ 'dataRef' ];   // map identifier. Eg: source1
143
144
            return $this->replaceNewTagString(
145
                    $node->node,
146
                    $attributesMap->getOrDefault( 'id', $dataRefName ),
147
                    $this->map->getOrDefault( $node->attributes[ 'dataRef' ], 'NULL' ),
148
                    $ctype,
149
                    $string,
150
                    null
151
            );
152
153
        }
154
155
        return $string;
156
    }
157
158
    /**
159
     * Check if values in the map are null or an empty string, in that case, convert them to NULL string
160
     *
161
     * @param $map
162
     *
163
     * @return array
164
     */
165
    private function sanitizeMap( $map ) {
166
167
        foreach ( $map as $name => $value ) {
168
            if ( is_null( $value ) || $value === '' ) {
169
                $map[ $name ] = 'NULL';
170
            }
171
        }
172
173
        return $map;
174
    }
175
176
    /**
177
     * @param $node
178
     * @param $string
179
     *
180
     * @return string
181
     * @throws DOMException
182
     * @throws InvalidXmlException
183
     * @throws XmlParsingException
184
     */
185
    private function recursiveReplaceSelfClosedPcTags( $node, $string ) {
186
187
        if ( $node->has_children ) {
188
189
            foreach ( $node->inner_html as $childNode ) {
190
                $string = $this->recursiveReplaceSelfClosedPcTags( $childNode, $string );
191
            }
192
193
        } elseif ( $node->tagName == 'pc' && $node->self_closed === true ) {
194
195
            $attributesMap = Map::instance( $node->attributes );
196
197
            if ( $dataRefStartValue = $this->map->get( $node->attributes[ 'dataRefStart' ] ) ) {
198
199
                $string = $this->replaceNewTagString(
200
                        $node->node,
201
                        $attributesMap->get( 'id' ),
202
                        $dataRefStartValue,
203
                        CTypeEnum::PC_SELF_CLOSE_DATA_REF,
204
                        $string
205
                );
206
207
            }
208
209
        }
210
211
        return $string;
212
213
    }
214
215
    /**
216
     * Extract (recursively) the dataRefEnd map from single nodes
217
     *
218
     * @param object    $node
219
     * @param ArrayList $dataRefEndMap
220
     */
221
    private function extractDataRefMapRecursively( $node, ArrayList $dataRefEndMap ) {
222
223
        // we have to build the map for the closing pc tag, so get the children first
224
        if ( $node->has_children ) {
225
            foreach ( $node->inner_html as $nestedNode ) {
226
                $this->extractDataRefMapRecursively( $nestedNode, $dataRefEndMap );
227
            }
228
        }
229
230
        // EXCLUDE self closed <pc/>
231
        if ( $node->tagName === 'pc' && $node->self_closed === false ) {
232
233
            $attributesMap = Map::instance( $node->attributes );
234
            $dataRefEnd    = $attributesMap->getOrDefault( 'dataRefEnd', $attributesMap->get( 'dataRefStart' ) );
235
236
            $dataRefEndMap[] = [
237
                    'id'         => $attributesMap->get( 'id' ),
238
                    'dataRefEnd' => $dataRefEnd,
239
            ];
240
241
        }
242
243
    }
244
245
    /**
246
     * Replace opening <pc> tags with correct reference in the $string
247
     *
248
     * @param string $string
249
     *
250
     * @return string
251
     * @throws DOMException
252
     * @throws InvalidXmlException
253
     * @throws XmlParsingException
254
     */
255
    private function replaceOpeningPcTags( $string ) {
256
257
        preg_match_all( '|<pc ([^>/]+?)>|iu', $string, $openingPcMatches );
258
259
        foreach ( $openingPcMatches[ 0 ] as $match ) {
260
261
            $node = XmlParser::parse( $match . '</pc>', true )[ 0 ]; // add a closing tag to not break xml integrity
262
263
            // CASE 1 - Missing `dataRefStart`
264
            if ( isset( $node->attributes[ 'dataRefEnd' ] ) && !isset( $node->attributes[ 'dataRefStart' ] ) ) {
265
                $node->attributes[ 'dataRefStart' ] = $node->attributes[ 'dataRefEnd' ];
266
            }
267
268
            // CASE 2 - Missing `dataRefEnd`
269
            if ( isset( $node->attributes[ 'dataRefStart' ] ) && !isset( $node->attributes[ 'dataRefEnd' ] ) ) {
270
                $node->attributes[ 'dataRefEnd' ] = $node->attributes[ 'dataRefStart' ];
271
            }
272
273
            if ( isset( $node->attributes[ 'dataRefStart' ] ) ) {
274
275
                $attributesMap = Map::instance( $node->attributes );
276
                $string        = $this->replaceNewTagString(
277
                        $match,
278
                        $attributesMap->get( 'id' ),
279
                        $this->map->getOrDefault( $node->attributes[ 'dataRefStart' ], 'NULL' ),
280
                        CTypeEnum::PC_OPEN_DATA_REF,
281
                        $string
282
                );
283
284
            }
285
        }
286
287
        return $string;
288
    }
289
290
    /**
291
     * Replace closing </pc> tags with correct reference in the $string
292
     * thanks to $dataRefEndMap
293
     *
294
     * @param string    $string
295
     * @param ArrayList $dataRefEndMap
296
     *
297
     * @return string
298
     */
299
    private function replaceClosingPcTags( $string, ArrayList $dataRefEndMap ) {
300
301
        preg_match_all( '|</pc>|iu', $string, $closingPcMatches, PREG_OFFSET_CAPTURE );
302
        $delta = 0;
303
304
        foreach ( $closingPcMatches[ 0 ] as $index => $match ) {
305
306
            $offset = $match[ 1 ];
307
            $length = 5; // strlen of '</pc>'
308
309
            $attr = $dataRefEndMap->get( $index );
310
            if ( !empty( $attr ) && isset( $attr[ 'dataRefEnd' ] ) ) {
311
312
                // conversion for opening <pc> tag
313
                $completeTag = $this->getNewTagString(
314
                        '</pc>',
315
                        $attr[ 'id' ],
316
                        $this->map->getOrDefault( $attr[ 'dataRefEnd' ], 'NULL' ),
317
                        CTypeEnum::PC_CLOSE_DATA_REF,
318
                        '_2'
319
                );
320
321
                $realOffset = ( $delta === 0 ) ? $offset : ( $offset + $delta );
322
                $string     = substr_replace( $string, $completeTag, $realOffset, $length );
323
                $delta      = $delta + strlen( $completeTag ) - $length;
324
325
            }
326
327
        }
328
329
        return $string;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $string also could return the type array which is incompatible with the documented return type string.
Loading history...
330
331
    }
332
333
    /**
334
     * @param string $string
335
     *
336
     * @return string
337
     * @throws DOMException
338
     * @throws InvalidXmlException
339
     * @throws XmlParsingException
340
     */
341
    public function restore( $string ) {
342
343
        // if the map is empty return string as is
344
        if ( empty( $this->map ) ) {
345
            return $string;
346
        }
347
348
        $html = XmlParser::parse( $string, true );
349
350
        foreach ( $html as $node ) {
351
            $string = $this->recursiveRestoreOriginalTags( $node, $string );
352
        }
353
354
        return $string;
355
    }
356
357
    /**
358
     * @param object $node
359
     * @param        $string
360
     *
361
     * @return string
362
     */
363
    private function recursiveRestoreOriginalTags( $node, $string ) {
364
365
        if ( $node->has_children ) {
366
367
            foreach ( $node->inner_html as $childNode ) {
368
                $string = $this->recursiveRestoreOriginalTags( $childNode, $string );
369
            }
370
371
        } else {
372
373
            $nodeAttributesMap = Map::instance( $node->attributes );
374
            $cType             = $nodeAttributesMap->get( 'ctype' );
375
376
            if ( CTypeEnum::isLayer2Constant( $cType ) ) {
377
                return preg_replace( '/' . preg_quote( $node->node, '/' ) . '/', base64_decode( $node->attributes[ 'x-orig' ] ), $string, 1 );
378
            }
379
380
        }
381
382
        return $string;
383
384
    }
385
386
    /**
387
     * @param string      $actualNodeString
388
     * @param string      $id
389
     * @param string      $dataRefValue
390
     * @param string      $ctype
391
     * @param string|null $upCountIdValue
392
     *
393
     * @return string
394
     */
395
    private function getNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $upCountIdValue = null ) {
396
397
        $newTag = [ '<ph' ];
398
399
        if ( isset( $id ) ) {
400
            $newTag[] = 'id="' . $id . $upCountIdValue . '"';
401
        }
402
403
        $newTag[] = 'ctype="' . $ctype . '"';
404
        $newTag[] = 'equiv-text="base64:' . base64_encode( $dataRefValue ) . '"';
405
        $newTag[] = 'x-orig="' . base64_encode( $actualNodeString ) . '"';
406
407
        return implode( " ", $newTag ) . '/>';
408
409
    }
410
411
    private function replaceNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $originalString, $upCountIdValue = '_1' ) {
412
        return str_replace( $actualNodeString, $this->getNewTagString( $actualNodeString, $id, $dataRefValue, $ctype, $upCountIdValue ), $originalString );
413
    }
414
415
}