Passed
Push — master ( f5dc65...3a8597 )
by Domenico
02:13
created

DataRefReplacer::recursiveCleanFromEquivText()   A

Complexity

Conditions 6
Paths 4

Size

Total Lines 18
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
eloc 10
c 1
b 0
f 0
nc 4
nop 2
dl 0
loc 18
rs 9.2222
1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * @author hashashiyyin [email protected] / [email protected]
5
 * Date: 22/04/24
6
 * Time: 15:13
7
 *
8
 */
9
10
namespace Matecat\SubFiltering\Utils;
11
12
use DOMException;
13
use Exception;
14
use Matecat\SubFiltering\Enum\CTypeEnum;
15
use Matecat\XmlParser\Exception\InvalidXmlException;
16
use Matecat\XmlParser\Exception\XmlParsingException;
17
use Matecat\XmlParser\XmlParser;
18
19
class DataRefReplacer {
20
    /**
21
     * @var array
22
     */
23
    private $map;
24
25
    /**
26
     * DataRefReplacer constructor.
27
     *
28
     * @param array $map
29
     */
30
    public function __construct( array $map = null ) {
31
        $this->map = $map;
32
    }
33
34
    /**
35
     * This function inserts a new attribute called 'equiv-text' from dataRef contained in <ph>, <sc>, <ec>, <pc> tags against the provided map array
36
     *
37
     * For a complete reference see:
38
     *
39
     * Http://docs.oasis-open.org/xliff/xliff-core/v2.1/os/xliff-core-v2.1-os.html#dataref
40
     *
41
     * @param string $string
42
     *
43
     * @return string
44
     */
45
    public function replace( $string ) {
46
47
        // if map is empty
48
        // or the string has not a dataRef attribute
49
        // return string as is
50
        if ( empty( $this->map ) || !$this->hasAnyDataRefAttribute( $string ) ) {
51
            return $string;
52
        }
53
54
        // try not to throw exception for wrong segments with opening tags and no closing
55
        try {
56
57
            $html = XmlParser::parse( $string, true );
58
59
            $dataRefEndMap = [];
60
61
            foreach ( $html as $node ) {
62
63
                // 1. Replace <ph>|<sc>|<ec> tags
64
                $string = $this->recursiveTransformDataRefToPhTag( $node, $string );
0 ignored issues
show
Bug introduced by
It seems like $string can also be of type mixed; however, parameter $string of Matecat\SubFiltering\Uti...ansformDataRefToPhTag() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

64
                $string = $this->recursiveTransformDataRefToPhTag( $node, /** @scrutinizer ignore-type */ $string );
Loading history...
65
66
                // 2. Replace self-closed <pc dataRefStart="xyz" /> tags
67
                $string = $this->recursiveReplaceSelfClosedPcTags( $node, $string );
68
69
                // 3. Build the DataRefEndMap needed by replaceClosingPcTags function
70
                // (needed for correct handling of </pc> closing tags)
71
                // make this inline with one foreach cycle
72
                $this->extractDataRefMapRecursively( $node, $dataRefEndMap );
73
74
            }
75
76
            // 4. replace pc tags
77
            $string = $this->replaceOpeningPcTags( null, $string );
0 ignored issues
show
Bug introduced by
It seems like $string can also be of type mixed; however, parameter $string of Matecat\SubFiltering\Uti...:replaceOpeningPcTags() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

77
            $string = $this->replaceOpeningPcTags( null, /** @scrutinizer ignore-type */ $string );
Loading history...
78
            $string = $this->replaceClosingPcTags( $string, $dataRefEndMap );
79
80
        } catch ( Exception $ignore ) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
81
        } finally {
82
            return $string;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $string also could return the type mixed which is incompatible with the documented return type string.
Loading history...
83
        }
84
85
    }
86
87
    /**
88
     * @param string $string
89
     *
90
     * @return bool
91
     */
92
    private function hasAnyDataRefAttribute( $string ) {
93
        return (bool)preg_match( '/(dataRef|dataRefStart|dataRefEnd)=[\'"].*?[\'"]/', $string );
94
    }
95
96
    /**
97
     * This function adds equiv-text attribute to <ph>, <ec>, and <sc> tags.
98
     *
99
     * Please note that <ec> and <sc> tags are converted to <ph> tags (needed by Matecat);
100
     * in this case, another special attribute (dataType) is added just before equiv-text
101
     *
102
     * If there is no id tag, it will be copied from dataRef attribute
103
     *
104
     * @param object $node
105
     * @param string $string
106
     *
107
     * @return string
108
     */
109
    private function recursiveTransformDataRefToPhTag( $node, $string ) {
110
111
        if ( $node->has_children ) {
112
113
            foreach ( $node->inner_html as $childNode ) {
114
                $string = $this->recursiveTransformDataRefToPhTag( $childNode, $string );
115
            }
116
117
        } else {
118
119
            switch ( $node->tagName ) {
120
                case 'ph':
121
                case 'sc':
122
                case 'ec':
123
                    break;
124
                default:
125
                    return $string;
126
            }
127
128
            if ( !isset( $node->attributes[ 'dataRef' ] ) ) {
129
                return $string;
130
            }
131
132
            // if isset a value in the map calculate base64 encoded value
133
            // otherwise skip
134
            if ( !in_array( $node->attributes[ 'dataRef' ], array_keys( $this->map ) ) ) {
135
                return $string;
136
            }
137
138
            $dataRefName  = $node->attributes[ 'dataRef' ];   // map identifier. Eg: source1
139
            $dataRefValue = $this->map[ $dataRefName ];   // map identifier. Eg: source1
140
141
            // check if is null or an empty string, in this case, convert it to NULL string
142
            if ( is_null( $dataRefValue ) || $dataRefValue === '' ) {
143
                $this->map[ $dataRefName ] = 'NULL';
144
            }
145
146
147
            $newTag = [ '<ph' ];
148
149
            // if there is no id copy it from dataRef
150
            if ( !isset( $node->attributes[ 'id' ] ) ) {
151
                $newTag[] = 'id="' . $dataRefName . '"';
152
                $newTag[] = 'x-removeId="true"';
153
            } else {
154
                $newTag[] = 'id="' . $node->attributes[ 'id' ] . '"';
155
            }
156
157
            // introduce dataType for <ec>/<sc> tag handling
158
            if ( $node->tagName === 'ec' ) {
159
                $newTag[] = 'ctype="' . CTypeEnum::EC_DATA_REF . '"';
160
            } elseif ( $node->tagName === 'sc' ) {
161
                $newTag[] = 'ctype="' . CTypeEnum::SC_DATA_REF . '"';
162
            } else {
163
                $newTag[] = 'ctype="' . CTypeEnum::PH_DATA_REF . '"';
164
            }
165
166
            $newTag[] = 'equiv-text="base64:' . base64_encode( $dataRefValue ) . '"';
167
            $newTag[] = 'x-orig="' . base64_encode( $node->node ) . '"';
168
169
            return str_replace( $node->node, implode( " ", $newTag ) . '/>', $string );
170
171
        }
172
173
        return $string;
174
    }
175
176
    /**
177
     * @param $node
178
     * @param $string
179
     *
180
     * @return mixed
181
     * @throws DOMException
182
     * @throws InvalidXmlException
183
     * @throws XmlParsingException
184
     */
185
    private function recursiveReplaceSelfClosedPcTags( $node, $string ) {
186
187
        if ( $node->has_children ) {
188
189
            foreach ( $node->inner_html as $childNode ) {
190
                $string = $this->recursiveReplaceSelfClosedPcTags( $childNode, $string );
191
            }
192
193
        } elseif ( $node->tagName == 'pc' && $node->self_closed === true ) {
194
195
            if ( isset( $node->attributes[ 'dataRefStart' ] ) && array_key_exists( $node->attributes[ 'dataRefStart' ], $this->map ) ) {
196
197
                $newTag = [ '<ph' ];
198
199
                if ( isset( $node->attributes[ 'id' ] ) ) {
200
                    $newTag[] = 'id="' . $node->attributes[ 'id' ] . '_1"';
201
                }
202
203
                $newTag[] = 'ctype="' . CTypeEnum::PC_SELF_CLOSE_DATA_REF . '"';
204
                $newTag[] = 'equiv-text="base64:' . base64_encode( $this->map[ $node->attributes[ 'dataRefStart' ] ] ) . '"';
205
                $newTag[] = 'x-orig="' . base64_encode( $node->node ) . '"';
206
207
                $string = str_replace( $node->node, implode( " ", $newTag ) . '/>', $string );
208
            }
209
210
        }
211
212
        return $string;
213
214
    }
215
216
    /**
217
     * Extract (recursively) the dataRefEnd map from single nodes
218
     *
219
     * @param object $node
220
     * @param        $dataRefEndMap
221
     */
222
    private function extractDataRefMapRecursively( $node, &$dataRefEndMap ) {
223
224
        // we have to build the map for the closing pc tag, so get the children first
225
        if ( $node->has_children ) {
226
            foreach ( $node->inner_html as $nestedNode ) {
227
                $this->extractDataRefMapRecursively( $nestedNode, $dataRefEndMap );
228
            }
229
        }
230
231
        // EXCLUDE self closed <pc/>
232
        if ( $node->tagName === 'pc' && $node->self_closed === false ) {
233
            if ( isset( $node->attributes[ 'dataRefEnd' ] ) ) {
234
                $dataRefEnd = $node->attributes[ 'dataRefEnd' ];
235
            } elseif ( isset( $node->attributes[ 'dataRefStart' ] ) ) {
236
                $dataRefEnd = $node->attributes[ 'dataRefStart' ];
237
            } else {
238
                $dataRefEnd = null;
239
            }
240
241
            $dataRefEndMap[] = [
242
                    'id'         => isset( $node->attributes[ 'id' ] ) ? $node->attributes[ 'id' ] : null,
243
                    'dataRefEnd' => $dataRefEnd,
244
            ];
245
246
        }
247
248
    }
249
250
    /**
251
     * Replace opening <pc> tags with correct reference in the $string
252
     *
253
     * @param string $string
254
     *
255
     * @return string
256
     * @throws DOMException
257
     * @throws InvalidXmlException
258
     * @throws XmlParsingException
259
     */
260
    private function replaceOpeningPcTags( $node, $string ) {
0 ignored issues
show
Unused Code introduced by
The parameter $node is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

260
    private function replaceOpeningPcTags( /** @scrutinizer ignore-unused */ $node, $string ) {

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
261
262
        preg_match_all( '|<pc ([^>/]+?)>|iu', $string, $openingPcMatches );
263
264
        foreach ( $openingPcMatches[ 0 ] as $match ) {
265
266
            $node = XmlParser::parse( $match . '</pc>', true )[ 0 ]; // add a closing tag to not break xml integrity
267
268
            // CASE 1 - Missing `dataRefStart`
269
            if ( isset( $node->attributes[ 'dataRefEnd' ] ) && !isset( $node->attributes[ 'dataRefStart' ] ) ) {
270
                $node->attributes[ 'dataRefStart' ] = $node->attributes[ 'dataRefEnd' ];
271
            }
272
273
            // CASE 2 - Missing `dataRefEnd`
274
            if ( isset( $node->attributes[ 'dataRefStart' ] ) && !isset( $node->attributes[ 'dataRefEnd' ] ) ) {
275
                $node->attributes[ 'dataRefEnd' ] = $node->attributes[ 'dataRefStart' ];
276
            }
277
278
            if ( isset( $node->attributes[ 'dataRefStart' ] ) ) {
279
280
                $startValue = $this->map[ $node->attributes[ 'dataRefStart' ] ] ?: 'NULL'; //handling null values in original data map
281
282
                $newTag = [ '<ph' ];
283
284
                if ( isset( $node->attributes[ 'id' ] ) ) {
285
                    $newTag[] = 'id="' . $node->attributes[ 'id' ] . '_1"';
286
                }
287
288
                $newTag[] = 'ctype="' . CTypeEnum::PC_OPEN_DATA_REF . '"';
289
                $newTag[] = 'equiv-text="base64:' . base64_encode( $startValue ) . '"';
290
                $newTag[] = 'x-orig="' . base64_encode( $match ) . '"';
291
292
                // conversion for opening <pc> tag
293
                $string = str_replace( $match, implode( " ", $newTag ) . '/>', $string );
294
295
            }
296
        }
297
298
        return $string;
299
    }
300
301
    /**
302
     * Replace closing </pc> tags with correct reference in the $string
303
     * thanks to $dataRefEndMap
304
     *
305
     * @param string $string
306
     * @param array  $dataRefEndMap
307
     *
308
     * @return string
309
     */
310
    private function replaceClosingPcTags( $string, $dataRefEndMap = [] ) {
311
312
        preg_match_all( '|</pc>|iu', $string, $closingPcMatches, PREG_OFFSET_CAPTURE );
313
        $delta = 0;
314
315
        foreach ( $closingPcMatches[ 0 ] as $index => $match ) {
316
317
            $offset = $match[ 1 ];
318
            $length = 5; // strlen of '</pc>'
319
320
            $attr = isset( $dataRefEndMap[ $index ] ) ? $dataRefEndMap[ $index ] : null;
321
322
            if ( !empty( $attr ) && isset( $attr[ 'dataRefEnd' ] ) ) {
323
324
                $endValue = !empty( $this->map[ $attr[ 'dataRefEnd' ] ] ) ? $this->map[ $attr[ 'dataRefEnd' ] ] : 'NULL';
325
326
                $newTag = [ '<ph' ];
327
328
                if ( isset( $attr[ 'id' ] ) ) {
329
                    $newTag[] = 'id="' . $attr[ 'id' ] . '_2"';
330
                }
331
332
                $newTag[] = 'ctype="' . CTypeEnum::PC_CLOSE_DATA_REF . '"';
333
                $newTag[] = 'equiv-text="base64:' . base64_encode( $endValue ) . '"';
334
                $newTag[] = 'x-orig="' . base64_encode( '</pc>' ) . '"';
335
336
                // conversion for opening <pc> tag
337
                $completeTag = implode( " ", $newTag ) . '/>';
338
                $realOffset  = ( $delta === 0 ) ? $offset : ( $offset + $delta );
339
                $string      = substr_replace( $string, $completeTag, $realOffset, $length );
340
                $delta       = $delta + strlen( $completeTag ) - $length;
341
342
            }
343
344
        }
345
346
        return $string;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $string also could return the type array which is incompatible with the documented return type string.
Loading history...
347
348
    }
349
350
    /**
351
     * @param string $string
352
     *
353
     * @return string
354
     * @throws DOMException
355
     * @throws InvalidXmlException
356
     * @throws XmlParsingException
357
     */
358
    public function restore( $string ) {
359
360
        // if map is empty return string as is
361
        if ( empty( $this->map ) ) {
362
            return $string;
363
        }
364
365
        // replace eventual empty equiv-text=""
366
//        $string = str_replace( ' equiv-text=""', '', $string );
367
        $html   = XmlParser::parse( $string, true );
368
369
        foreach ( $html as $node ) {
370
            $string = $this->recursiveRestoreOriginalTags( $node, $string );
371
        }
372
373
        return $string;
374
    }
375
376
    /**
377
     * @param object $node
378
     * @param        $string
379
     *
380
     * @return string
381
     */
382
    private function recursiveRestoreOriginalTags( $node, $string ) {
383
384
        if ( $node->has_children ) {
385
386
            foreach ( $node->inner_html as $childNode ) {
387
                $string = $this->recursiveRestoreOriginalTags( $childNode, $string );
388
            }
389
390
        } else {
391
392
            $cType = isset( $node->attributes[ 'ctype' ] ) ? $node->attributes[ 'ctype' ] : null;
393
394
            if ( $cType ) {
395
396
                switch ( $node->attributes[ 'ctype' ] ) {
397
                    case CTypeEnum::ORIGINAL_PC_OPEN:
398
                    case CTypeEnum::ORIGINAL_PC_CLOSE:
399
                    case CTypeEnum::ORIGINAL_PH_OR_NOT_DATA_REF:
400
                    case CTypeEnum::PH_DATA_REF:
401
                    case CTypeEnum::PC_OPEN_DATA_REF:
402
                    case CTypeEnum::PC_CLOSE_DATA_REF:
403
                    case CTypeEnum::PC_SELF_CLOSE_DATA_REF:
404
                    case CTypeEnum::SC_DATA_REF:
405
                    case CTypeEnum::EC_DATA_REF:
406
                        return preg_replace( '/' . preg_quote( $node->node, '/' ) . '/', base64_decode( $node->attributes[ 'x-orig' ] ), $string, 1 );
407
                }
408
409
            }
410
411
        }
412
413
        return $string;
414
415
    }
416
417
}