Issues (15)

src/XliffReplacer/Xliff20.php (2 issues)

1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * @author hashashiyyin [email protected] / [email protected]
5
 * Date: 02/08/24
6
 * Time: 17:51
7
 *
8
 */
9
10
namespace Matecat\XliffParser\XliffReplacer;
11
12
use Matecat\XliffParser\Utils\Strings;
13
14
class Xliff20 extends AbstractXliffReplacer {
15
16
    /**
17
     * @var int
18
     */
19
    private int $mdaGroupCounter = 0;
20
    /**
21
     * @var bool
22
     */
23
    protected bool $unitContainsMda = false;   // check if <unit> already contains a <mda:metadata> (forXliff v 2.*)
24
25
    /**
26
     * @var string
27
     */
28
    protected string $alternativeMatchesTag = 'mtc:matches';
29
30
    /**
31
     * @var string
32
     */
33
    protected string $tuTagName = 'unit';
34
35
    /**
36
     * @var string
37
     */
38
    protected string $namespace = "matecat";       // Custom namespace
39
40
    /**
41
     * @var array
42
     */
43
    protected array $nodesToBuffer = [
44
            'source',
45
            'mda:metadata',
46
            'memsource:additionalTagData',
47
            'originalData',
48
            'note'
49
    ];
50
51
    /**
52
     * @inheritDoc
53
     */
54
    protected function tagOpen( $parser, string $name, array $attr ) {
55
56
        $this->handleOpenUnit( $name, $attr );
57
58
        if ( 'mda:metadata' === $name ) {
59
            $this->unitContainsMda = true;
60
        }
61
62
        $this->trySetAltTrans( $name );;
63
        $this->checkSetInTarget( $name );
64
65
        // open buffer
66
        $this->setInBuffer( $name );
67
68
        // check if we are inside a <target>, obviously this happen only if there are targets inside the trans-unit
69
        // <target> must be stripped to be replaced, so this check avoids <target> reconstruction
70
        if ( !$this->inTarget ) {
71
72
            $tag = '';
73
74
            //
75
            // ============================================
76
            // only for Xliff 2.*
77
            // ============================================
78
            //
79
            // In xliff v2 we MUST add <mda:metadata> BEFORE <notes>/<originalData>/<segment>/<ignorable>
80
            //
81
            // As documentation says, <unit> contains:
82
            //
83
            // - elements from other namespaces, OPTIONAL
84
            // - Zero or one <notes> elements followed by
85
            // - Zero or one <originalData> element followed by
86
            // - One or more <segment> or <ignorable> elements in any order.
87
            //
88
            // For more info please refer to:
89
            //
90
            // http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html#unit
91
            //
92
            if ( in_array( $name, [ 'notes', 'originalData', 'segment', 'ignorable' ] ) &&
93
                    $this->unitContainsMda === false &&
94
                    !empty( $this->transUnits[ $this->currentTransUnitId ] ) &&
95
                    !$this->hasWrittenCounts
96
            ) {
97
                // we need to update counts here
98
                $this->updateCounts();
99
                $this->hasWrittenCounts = true;
100
                $tag                    .= $this->getWordCountGroupForXliffV2();
101
                $this->unitContainsMda  = true;
102
            }
103
104
            // construct tag
105
            $tag .= "<$name ";
106
107
            foreach ( $attr as $k => $v ) {
108
                //normal tag flux, put attributes in it but skip for translation state and set the right value for the attribute
109
                if ( $k != 'state' ) {
110
                    $tag .= "$k=\"$v\" ";
111
                }
112
            }
113
114
            $seg = $this->getCurrentSegment();
115
116
            if ( $name === $this->tuTagName && !empty( $seg ) && isset( $seg[ 'sid' ] ) ) {
117
118
                // add `matecat:segment-id` to xliff v.2*
119
                if ( strpos( $tag, 'matecat:segment-id' ) === false ) {
120
                    $tag .= "matecat:segment-id=\"{$seg[ 'sid' ]}\" ";
121
                }
122
123
            }
124
125
            // replace state for xliff v2
126
            if ( 'segment' === $name ) { // add state to segment in Xliff v2
127
                [ $stateProp, ] = StatusToStateAttribute::getState( $this->xliffVersion, $seg[ 'status' ] );
128
                $tag .= $stateProp;
129
            }
130
131
            $tag = $this->handleOpenXliffTag( $name, $attr, $tag );
132
133
            $this->checkForSelfClosedTagAndFlush( $parser, $tag );
134
135
        }
136
137
    }
138
139
    /**
140
     * @param string $name
141
     * @param array  $attr
142
     * @param string $tag
143
     *
144
     * @return string
145
     */
146
    protected function handleOpenXliffTag( string $name, array $attr, string $tag ): string {
147
        $tag = parent::handleOpenXliffTag( $name, $attr, $tag );
148
        // add oasis xliff 20 namespace
149
        if ( $name === 'xliff' && !array_key_exists( 'xmlns:mda', $attr ) ) {
150
            $tag .= 'xmlns:mda="urn:oasis:names:tc:xliff:metadata:2.0"';
151
        }
152
153
        return $tag;
154
    }
155
156
    /**
157
     * @inheritDoc
158
     */
159
    protected function tagClose( $parser, string $name ) {
160
        $tag = '';
161
162
        /**
163
         * if is a tag within <target> or
164
         * if it is an empty tag, do not add closing tag because we have already closed it in
165
         *
166
         * self::tagOpen method
167
         */
168
        if ( !$this->isEmpty ) {
169
170
            // write closing tag if is not a target
171
            // EXCLUDE the target nodes with currentTransUnitIsTranslatable = 'NO'
172
            if ( !$this->inTarget and $this->currentTransUnitIsTranslatable !== 'no' ) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
173
                $tag = "</$name>";
174
            }
175
176
            if ( 'target' == $name && !$this->inAltTrans ) {
177
178
                if ( isset( $this->transUnits[ $this->currentTransUnitId ] ) ) {
179
180
                    $seg = $this->getCurrentSegment();
181
182
                    // update counts
183
                    if ( !$this->hasWrittenCounts && !empty( $seg ) ) {
184
                        $this->updateSegmentCounts( $seg );
185
                    }
186
187
                    // delete translations so the prepareSegment
188
                    // will put source content in target tag
189
                    if ( $this->sourceInTarget ) {
190
                        $seg[ 'translation' ] = '';
191
                        $this->resetCounts();
192
                    }
193
194
                    // append $translation
195
                    $translation = $this->prepareTranslation( $seg );
196
197
                    //append translation
198
                    $tag = "<target>$translation</target>";
199
200
                } elseif( !empty($this->CDATABuffer) and $this->currentTransUnitIsTranslatable === 'no' ) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
201
202
                    // These are target nodes with currentTransUnitIsTranslatable = 'NO'
203
                    $this->bufferIsActive = false;
204
                    $tag                  = $this->CDATABuffer . "</$name>";
205
                    $this->CDATABuffer    = "";
206
                }
207
208
                // signal we are leaving a target
209
                $this->targetWasWritten = true;
210
                $this->inTarget         = false;
211
                $this->postProcAndFlush( $this->outputFP, $tag, true );
212
213
            } elseif ( in_array( $name, $this->nodesToBuffer ) ) { // we are closing a critical CDATA section
214
215
                $this->bufferIsActive = false;
216
217
                // only for Xliff 2.*
218
                // write here <mda:metaGroup> and <mda:meta> if already present in the <unit>
219
                if ( 'mda:metadata' === $name && $this->unitContainsMda && !$this->hasWrittenCounts ) {
220
221
                    // we need to update counts here
222
                    $this->updateCounts();
223
                    $this->hasWrittenCounts = true;
224
225
                    $tag = $this->CDATABuffer;
226
                    $tag .= $this->getWordCountGroupForXliffV2( false );
227
                    $tag .= "    </mda:metadata>";
228
229
                } else {
230
                    $tag = $this->CDATABuffer . "</$name>";
231
                }
232
233
                $this->CDATABuffer = "";
234
235
                //flush to the pointer
236
                $this->postProcAndFlush( $this->outputFP, $tag );
237
238
            } elseif ( 'segment' === $name ) {
239
240
                // only for Xliff 2.*
241
                // if segment has no <target> add it BEFORE </segment>
242
                if ( !$this->targetWasWritten ) {
243
244
                    $seg = $this->getCurrentSegment();
245
246
                    if ( isset( $seg[ 'translation' ] ) ) {
247
248
                        $translation = $this->prepareTranslation( $seg );
249
                        // replace the tag
250
                        $tag = "<target>$translation</target>";
251
252
                        $tag .= '</segment>';
253
254
                    }
255
256
                }
257
258
                // update segmentPositionInTu
259
                $this->segmentInUnitPosition++;
260
261
                $this->postProcAndFlush( $this->outputFP, $tag );
262
263
                // we are leaving <segment>, reset $segmentHasTarget
264
                $this->targetWasWritten = false;
265
266
            } elseif ( $this->bufferIsActive ) { // this is a tag ( <g | <mrk ) inside a seg or seg-source tag
267
                $this->CDATABuffer .= "</$name>";
268
                // Do NOT Flush
269
            } else { //generic tag closure do Nothing
270
                // flush to pointer
271
                $this->postProcAndFlush( $this->outputFP, $tag );
272
            }
273
        } elseif ( in_array( $name, $this->nodesToBuffer ) ) {
274
275
            $this->isEmpty        = false;
276
            $this->bufferIsActive = false;
277
            $tag                  = $this->CDATABuffer;
278
            $this->CDATABuffer    = "";
279
280
            //flush to the pointer
281
            $this->postProcAndFlush( $this->outputFP, $tag );
282
283
        } else {
284
            //ok, nothing to be done; reset flag for next coming tag
285
            $this->isEmpty = false;
286
        }
287
288
        // try to signal that we are leaving a target
289
        $this->tryUnsetAltTrans( $name );
290
291
        // check if we are leaving a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*)
292
        if ( $this->tuTagName === $name ) {
293
            $this->currentTransUnitIsTranslatable = null;
294
            $this->inTU                           = false;
295
            $this->unitContainsMda                = false;
296
            $this->hasWrittenCounts               = false;
297
298
            $this->resetCounts();
299
        }
300
    }
301
302
    /**
303
     * Update counts
304
     */
305
    private function updateCounts() {
306
307
        $seg = $this->getCurrentSegment();
308
        if ( !empty( $seg ) ) {
309
            $this->updateSegmentCounts( $seg );
310
        }
311
312
    }
313
314
    /**
315
     * @param bool $withMetadataTag
316
     *
317
     * @return string
318
     */
319
    private function getWordCountGroupForXliffV2( bool $withMetadataTag = true ): string {
320
321
        $this->mdaGroupCounter++;
322
        $segments_count_array = $this->counts[ 'segments_count_array' ];
323
324
        $tag = '';
325
326
        if ( $withMetadataTag === true ) {
327
            $tag .= '<mda:metadata>';
328
        }
329
330
        $index = 0;
331
        foreach ( $segments_count_array as $segments_count_item ) {
332
333
            $id = 'word_count_tu.' . $this->currentTransUnitId . '.' . $index;
334
            $index++;
335
336
            $tag .= "    <mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\">
337
                                <mda:meta type=\"x-matecat-raw\">" . $segments_count_item[ 'raw_word_count' ] . "</mda:meta>
338
                                <mda:meta type=\"x-matecat-weighted\">" . $segments_count_item[ 'eq_word_count' ] . "</mda:meta>
339
                            </mda:metaGroup>";
340
        }
341
342
        if ( $withMetadataTag === true ) {
343
            $tag .= '</mda:metadata>';
344
        }
345
346
        return $tag;
347
348
    }
349
350
    /**
351
     * prepare segment tagging for xliff insertion
352
     *
353
     * @param array $seg
354
     *
355
     * @return string
356
     */
357
    protected function prepareTranslation( array $seg ): string {
358
359
        $segment     = Strings::removeDangerousChars( $seg [ 'segment' ] );
360
        $translation = Strings::removeDangerousChars( $seg [ 'translation' ] );
361
        $dataRefMap  = ( isset( $seg[ 'data_ref_map' ] ) ) ? Strings::jsonToArray( $seg[ 'data_ref_map' ] ) : [];
362
363
        if ( $seg [ 'translation' ] == '' ) {
364
            $translation = $segment;
365
        } else {
366
            if ( $this->callback instanceof XliffReplacerCallbackInterface ) {
367
                $error = ( !empty( $seg[ 'error' ] ) ) ? $seg[ 'error' ] : null;
368
                if ( $this->callback->thereAreErrors( $seg[ 'sid' ], $segment, $translation, $dataRefMap, $error ) ) {
369
                    $translation = '|||UNTRANSLATED_CONTENT_START|||' . $segment . '|||UNTRANSLATED_CONTENT_END|||';
370
                }
371
            }
372
        }
373
374
        return $translation;
375
376
    }
377
378
}