Passed
Pull Request — master (#90)
by Domenico
03:04
created

XliffSAXTranslationReplacer::prepareTranslation()   B

Complexity

Conditions 10
Paths 48

Size

Total Lines 34
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 10
eloc 19
c 4
b 0
f 0
nc 48
nop 2
dl 0
loc 34
rs 7.6666

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Matecat\XliffParser\XliffReplacer;
4
5
use Matecat\XliffParser\Constants\TranslationStatus;
6
use Matecat\XliffParser\Utils\Strings;
7
use RuntimeException;
8
9
class XliffSAXTranslationReplacer extends AbstractXliffReplacer {
10
    /**
11
     * @var int
12
     */
13
    private $mdaGroupCounter = 0;
14
15
    /**
16
     * @var array
17
     */
18
    private $nodesToCopy = [
19
            'source',
20
            'mda:metadata',
21
            'memsource:additionalTagData',
22
            'originalData',
23
            'seg-source',
24
            'value',
25
            'bpt',
26
            'ept',
27
            'ph',
28
            'st',
29
            'note',
30
            'context',
31
            'context-group'
32
    ];
33
34
    public function replaceTranslation() {
35
        fwrite( $this->outputFP, '<?xml version="1.0" encoding="UTF-8"?>' );
36
37
        //create Sax parser
38
        $xmlParser = $this->initSaxParser();
39
40
        while ( $this->currentBuffer = fread( $this->originalFP, 4096 ) ) {
41
            /*
42
               preprocess file
43
             */
44
            // obfuscate entities because sax automatically does html_entity_decode
45
            $temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
46
47
            //avoid cutting entities in half:
48
            //the last fread could have truncated an entity (say, '&lt;' in '&l'), thus invalidating the escaping
49
            //***** and if there is an & that it is not an entity, this is an infinite loop !!!!!
50
51
            $escape_AMP = false;
52
53
            // 9 is the max length of an entity. So, suppose that the & is at the end of buffer,
54
            // add 9 Bytes and substitute the entities, if the & is present, and it is not at the end
55
            //it can't be an entity, exit the loop
56
57
            while ( true ) {
58
                $_ampPos = strpos( $temporary_check_buffer, '&' );
59
60
                //check for real entity or escape it to safely exit from the loop!!!
61
                if ( $_ampPos === false || strlen( substr( $temporary_check_buffer, $_ampPos ) ) > 9 ) {
62
                    $escape_AMP = true;
63
                    break;
64
                }
65
66
                //if an entity is still present, fetch some more and repeat the escaping
67
                $this->currentBuffer    .= fread( $this->originalFP, 9 );
68
                $temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
69
            }
70
71
            //free stuff outside the loop
72
            unset( $temporary_check_buffer );
73
74
            $this->currentBuffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
75
            if ( $escape_AMP ) {
76
                $this->currentBuffer = str_replace( "&", self::$INTERNAL_TAG_PLACEHOLDER . 'amp' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
77
            }
78
79
            //get length of chunk
80
            $this->len = strlen( $this->currentBuffer );
81
82
            //parse chunk of text
83
            if ( !xml_parse( $xmlParser, $this->currentBuffer, feof( $this->originalFP ) ) ) {
84
                //if unable, raise an exception
85
                throw new RuntimeException( sprintf(
86
                        "XML error: %s at line %d",
87
                        xml_error_string( xml_get_error_code( $xmlParser ) ),
88
                        xml_get_current_line_number( $xmlParser )
89
                ) );
90
            }
91
            //get accumulated this->offset in document: as long as SAX pointer advances, we keep track of total bytes it has seen so far; this way, we can translate its global pointer in an address local to the current buffer of text to retrieve last char of tag
92
            $this->offset += $this->len;
93
        }
94
95
        // close Sax parser
96
        $this->closeSaxParser( $xmlParser );
97
98
    }
99
100
    /**
101
     * @inheritDoc
102
     */
103
    protected function tagOpen( $parser, $name, $attr ) {
104
        // check if we are entering into a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*)
105
        if ( $this->tuTagName === $name ) {
106
            $this->inTU = true;
107
108
            // get id
109
            // trim to first 100 characters because this is the limit on Matecat's DB
110
            $this->currentTransUnitId = substr( $attr[ 'id' ], 0, 100 );
111
112
            // `translate` attribute can be only yes or no
113
            if ( isset( $attr[ 'translate' ] ) && $attr[ 'translate' ] === 'no' ) {
114
                $attr[ 'translate' ] = 'no';
115
            } else {
116
                $attr[ 'translate' ] = 'yes';
117
            }
118
119
            // current 'translate' attribute of the current trans-unit
120
            $this->currentTransUnitTranslate = $attr[ 'translate' ];
121
        }
122
123
        if ( 'source' === $name ) {
124
            $this->sourceAttributes = $attr;
125
        }
126
127
        if ( 'mda:metadata' === $name ) {
128
            $this->unitContainsMda = true;
129
        }
130
131
        // check if we are entering into a <target>
132
        if ( 'target' === $name ) {
133
134
            if ( $this->currentTransUnitTranslate === 'no' ) {
135
                $this->inTarget = false;
136
            } else {
137
                $this->inTarget = true;
138
            }
139
        }
140
141
        // check if we are inside a <target>, obviously this happen only if there are targets inside the trans-unit
142
        // <target> must be stripped to be replaced, so this check avoids <target> reconstruction
143
        if ( !$this->inTarget ) {
144
145
            $tag = '';
146
147
            //
148
            // ============================================
149
            // only for Xliff 2.*
150
            // ============================================
151
            //
152
            // In xliff v2 we MUST add <mda:metadata> BEFORE <notes>/<originalData>/<segment>/<ignorable>
153
            //
154
            // As documentation says, <unit> contains:
155
            //
156
            // - elements from other namespaces, OPTIONAL
157
            // - Zero or one <notes> elements followed by
158
            // - Zero or one <originalData> element followed by
159
            // - One or more <segment> or <ignorable> elements in any order.
160
            //
161
            // For more info please refer to:
162
            //
163
            // http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html#unit
164
            //
165
            if ( $this->xliffVersion === 2 && ( $name === 'notes' || $name === 'originalData' || $name === 'segment' || $name === 'ignorable' ) && $this->unitContainsMda === false ) {
166
                if ( isset( $this->transUnits[ $this->currentTransUnitId ] ) && !empty( $this->transUnits[ $this->currentTransUnitId ] ) && !$this->hasWrittenCounts ) {
167
168
                    // we need to update counts here
169
                    $this->updateCounts();
170
                    $this->hasWrittenCounts = true;
171
172
                    $tag                   .= $this->getWordCountGroupForXliffV2();
173
                    $this->unitContainsMda = true;
174
                }
175
            }
176
177
            // construct tag
178
            $tag .= "<$name ";
179
180
            $lastMrkState = null;
181
            $stateProp    = '';
182
183
            foreach ( $attr as $k => $v ) {
184
185
                //if tag name is file, we must replace the target-language attribute
186
                if ( $name === 'file' && $k === 'target-language' && !empty( $this->targetLang ) ) {
187
                    //replace Target language with job language provided from constructor
188
                    $tag .= "$k=\"$this->targetLang\" ";
189
                } else {
190
                    $pos = 0;
191
                    if ( $this->currentTransUnitId and isset( $this->transUnits[ $this->currentTransUnitId ] ) ) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
192
                        $pos = current( $this->transUnits[ $this->currentTransUnitId ] );
193
                    }
194
195
                    if ( $name === $this->tuTagName and isset( $this->segments[ $pos ] ) and isset( $this->segments[ $pos ][ 'sid' ] ) ) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
196
197
                        $sid = $this->segments[ $pos ][ 'sid' ];
198
199
                        // add `help-id` to xliff v.1*
200
                        // add `mtc:segment-id` to xliff v.2*
201
                        if ( $this->xliffVersion === 1 && strpos( $tag, 'help-id' ) === false ) {
202
                            if ( !empty( $sid ) ) {
203
                                $tag .= "help-id=\"$sid\" ";
204
                            }
205
                        } elseif ( $this->xliffVersion === 2 && strpos( $tag, 'mtc:segment-id' ) === false ) {
206
                            if ( !empty( $sid ) ) {
207
                                $tag .= "mtc:segment-id=\"$sid\" ";
208
                            }
209
                        }
210
211
                    } elseif ( 'segment' === $name && $this->xliffVersion === 2 ) { // add state to segment in Xliff v2
212
                        [ $stateProp, $lastMrkState ] = $this->setTransUnitState( $this->segments[ $pos ], $stateProp, $lastMrkState );
213
                    }
214
215
                    //normal tag flux, put attributes in it but skip for translation state and set the right value for the attribute
216
                    if ( $k != 'state' ) {
217
                        $tag .= "$k=\"$v\" ";
218
                    }
219
220
                }
221
222
            }
223
224
            // replace state for xliff v2
225
            if ( $stateProp ) {
226
                $tag .= $stateProp;
227
            }
228
229
            // add oasis xliff 20 namespace
230
            if ( $this->xliffVersion === 2 && $name === 'xliff' && !array_key_exists( 'xmlns:mda', $attr ) ) {
231
                $tag .= 'xmlns:mda="urn:oasis:names:tc:xliff:metadata:2.0"';
232
            }
233
234
            // add MateCat specific namespace, we want maybe add non-XLIFF attributes
235
            if ( $name === 'xliff' && !array_key_exists( 'xmlns:mtc', $attr ) ) {
236
                $tag .= ' xmlns:mtc="https://www.matecat.com" ';
237
            }
238
239
            // trgLang
240
            if ( $name === 'xliff' ) {
241
                $tag = preg_replace( '/trgLang="(.*?)"/', 'trgLang="' . $this->targetLang . '"', $tag );
242
            }
243
244
            //this logic helps detecting empty tags
245
            //get current position of SAX pointer in all the stream of data is has read so far:
246
            //it points at the end of current tag
247
            $idx = xml_get_current_byte_index( $parser );
248
249
            //check whether the bounds of current tag are entirely in current buffer or the end of the current tag
250
            //is outside current buffer (in the latter case, it's in next buffer to be read by the while loop);
251
            //this check is necessary because we may have truncated a tag in half with current read,
252
            //and the other half may be encountered in the next buffer it will be passed
253
            if ( isset( $this->currentBuffer[ $idx - $this->offset ] ) ) {
254
                //if this tag entire lenght fitted in the buffer, the last char must be the last
255
                //symbol before the '>'; if it's an empty tag, it is assumed that it's a '/'
256
                $lastChar = $this->currentBuffer[ $idx - $this->offset ];
257
            } else {
258
                //if it's out, simple use the last character of the chunk
259
                $lastChar = $this->currentBuffer[ $this->len - 1 ];
260
            }
261
262
            //trim last space
263
            $tag = rtrim( $tag );
264
265
            //detect empty tag
266
            $this->isEmpty = ( $lastChar == '/' || $name == 'x' );
267
            if ( $this->isEmpty ) {
268
                $tag .= '/';
269
            }
270
271
            //add tag ending
272
            $tag .= ">";
273
274
            //set a a Buffer for the segSource Source tag
275
            if ( $this->bufferIsActive || in_array( $name, $this->nodesToCopy ) ) { // we are opening a critical CDATA section
276
277
                //WARNING BECAUSE SOURCE AND SEG-SOURCE TAGS CAN BE EMPTY IN SOME CASES!!!!!
278
                //so check for isEmpty also in conjunction with name
279
                if ( $this->isEmpty && ( 'source' === $name || 'seg-source' === $name ) ) {
280
                    $this->postProcAndFlush( $this->outputFP, $tag );
281
                } else {
282
                    //these are NOT source/seg-source/value empty tags, THERE IS A CONTENT, write it in buffer
283
                    $this->bufferIsActive = true;
284
                    $this->CDATABuffer    .= $tag;
285
                }
286
            } else {
287
                $this->postProcAndFlush( $this->outputFP, $tag );
288
            }
289
        }
290
291
        // update segmentPositionInTu
292
293
        if ( $this->xliffVersion === 1 && $this->inTU && $name === 'source' ) {
294
            $this->segmentPositionInTu++;
295
        }
296
297
        if ( $this->xliffVersion === 2 && $this->inTU && $name === 'segment' ) {
298
            $this->segmentPositionInTu++;
299
        }
300
    }
301
302
    /**
303
     * @inheritDoc
304
     */
305
    protected function tagClose( $parser, $name ) {
306
        $tag = '';
307
308
        /**
309
         * if is a tag within <target> or
310
         * if it is an empty tag, do not add closing tag because we have already closed it in
311
         *
312
         * self::tagOpen method
313
         */
314
        if ( !$this->isEmpty && !( $this->inTarget && $name !== 'target' ) ) {
315
316
            if ( !$this->inTarget ) {
317
                $tag = "</$name>";
318
            }
319
320
            if ( 'target' == $name ) {
321
322
                if ( $this->currentTransUnitTranslate === 'no' ) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
323
                    // do nothing
324
                } elseif ( isset( $this->transUnits[ $this->currentTransUnitId ] ) ) {
325
326
                    // get translation of current segment, by indirect indexing: id -> positional index -> segment
327
                    // actually there may be more that one segment to that ID if there are two mrk of the same source segment
328
329
                    $listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ];
330
331
                    // $currentSegmentId
332
                    if ( !empty( $listOfSegmentsIds ) ) {
333
                        $this->setCurrentSegmentArray( $listOfSegmentsIds );
334
                    }
335
336
                    /*
337
                     * At the end of every cycle the segment grouping information is lost: unset( 'matecat|' . $this->currentId )
338
                     *
339
                     * We need to take the info about the last segment parsed
340
                     *          ( normally more than 1 db row because of mrk tags )
341
                     *
342
                     * So, copy the current segment data group to an another structure to take the last one segment
343
                     * for the next tagOpen ( possible sdl:seg-defs )
344
                     *
345
                     */
346
347
                    $this->lastTransUnit = [];
348
349
                    $last_value    = null;
350
                    $segmentsCount = count( $listOfSegmentsIds );
351
                    for ( $i = 0; $i < $segmentsCount; $i++ ) {
352
                        $id = $listOfSegmentsIds[ $i ];
353
                        if ( isset( $this->segments[ $id ] ) && ( $i == 0 || $last_value + 1 == $listOfSegmentsIds[ $i ] ) ) {
354
                            $last_value            = $listOfSegmentsIds[ $i ];
355
                            $this->lastTransUnit[] = $this->segments[ $id ];
356
                        }
357
                    }
358
359
                    // init translation and state
360
                    $translation  = '';
361
                    $lastMrkState = null;
362
                    $stateProp    = '';
363
364
                    // we must reset the lastMrkId found because this is a new segment.
365
                    $lastMrkId = -1;
366
367
                    if ( $this->xliffVersion === 2 ) {
368
                        $seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ];
369
370
                        // update counts
371
                        if ( !$this->hasWrittenCounts && !empty( $seg ) ) {
372
                            $this->updateSegmentCounts( $seg );
373
                        }
374
375
                        // delete translations so the prepareSegment
376
                        // will put source content in target tag
377
                        if ( $this->sourceInTarget ) {
378
                            $seg[ 'translation' ] = '';
379
                            $this->resetCounts();
380
                        }
381
382
                        // append $translation
383
                        $translation = $this->prepareTranslation( $seg, $translation );
384
385
                        [ $stateProp, ] = $this->setTransUnitState( $seg, $stateProp, null );
386
                    } else {
387
                        foreach ( $listOfSegmentsIds as $pos => $id ) {
388
389
                            /*
390
                             * This routine works to respect the positional orders of markers.
391
                             * In every cycle we check if the mrk of the segment is below or equal the last one.
392
                             * When this is true, means that the mrk id belongs to the next segment with the same internal_id
393
                             * so we MUST stop to apply markers and translations
394
                             * and stop to add eq_word_count
395
                             *
396
                             * Begin:
397
                             * pre-assign zero to the new mrk if this is the first one ( in this segment )
398
                             * If it is null leave it NULL
399
                             */
400
                            if ( (int)$this->segments[ $id ][ "mrk_id" ] < 0 && $this->segments[ $id ][ "mrk_id" ] !== null ) {
401
                                $this->segments[ $id ][ "mrk_id" ] = 0;
402
                            }
403
404
                            /*
405
                             * WARNING:
406
                             * For those seg-source that doesn't have a mrk ( having a mrk id === null )
407
                             * ( null <= -1 ) === true
408
                             * so, cast to int
409
                             */
410
                            if ( (int)$this->segments[ $id ][ "mrk_id" ] <= $lastMrkId ) {
411
                                break;
412
                            }
413
414
                            // set $this->currentSegment
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
415
                            $seg = $this->segments[ $id ];
416
417
                            // update counts
418
                            if ( !empty( $seg ) ) {
419
                                $this->updateSegmentCounts( $seg );
420
                            }
421
422
                            // delete translations so the prepareSegment
423
                            // will put source content in target tag
424
                            if ( $this->sourceInTarget ) {
425
                                $seg[ 'translation' ] = '';
426
                                $this->resetCounts();
427
                            }
428
429
                            // append $translation
430
                            $translation = $this->prepareTranslation( $seg, $translation );
431
432
                            // for xliff 2 we need $this->transUnits[ $this->currentId ] [ $pos ] for populating metadata
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
433
434
                            unset( $this->transUnits[ $this->currentTransUnitId ] [ $pos ] );
435
436
                            $lastMrkId = $this->segments[ $id ][ "mrk_id" ];
437
438
                            [ $stateProp, $lastMrkState ] = $this->setTransUnitState( $seg, $stateProp, $lastMrkState );
439
                        }
440
                    }
441
442
                    //append translation
443
                    $tag = $this->createTargetTag( $translation, $stateProp );
444
445
                }
446
447
                // signal we are leaving a target
448
                $this->targetWasWritten = true;
449
                $this->inTarget         = false;
450
                $this->postProcAndFlush( $this->outputFP, $tag, $treatAsCDATA = true );
451
            } elseif ( in_array( $name, $this->nodesToCopy ) ) { // we are closing a critical CDATA section
452
453
                $this->bufferIsActive = false;
454
455
                // only for Xliff 2.*
456
                // write here <mda:metaGroup> and <mda:meta> if already present in the <unit>
457
                if ( 'mda:metadata' === $name && $this->unitContainsMda && $this->xliffVersion === 2 && !$this->hasWrittenCounts ) {
458
459
                    // we need to update counts here
460
                    $this->updateCounts();
461
                    $this->hasWrittenCounts = true;
462
463
                    $tag = $this->CDATABuffer;
464
                    $tag .= $this->getWordCountGroupForXliffV2( false );
465
                    $tag .= "    </mda:metadata>";
466
467
                } else {
468
                    $tag = $this->CDATABuffer . "</$name>";
469
                }
470
471
                $this->CDATABuffer = "";
472
473
                //flush to pointer
474
                $this->postProcAndFlush( $this->outputFP, $tag );
475
            } elseif ( 'segment' === $name ) {
476
477
                // only for Xliff 2.*
478
                // if segment has no <target> add it BEFORE </segment>
479
                if ( $this->xliffVersion === 2 && !$this->targetWasWritten ) {
480
481
                    $seg = $this->getCurrentSegment();
482
483
                    if ( isset( $seg[ 'translation' ] ) ) {
484
485
                        $translation = $this->prepareTranslation( $seg );
486
                        [ $stateProp, ] = $this->setTransUnitState( $seg, '', null );
487
488
                        // replace the tag
489
                        $tag = $this->createTargetTag( $translation, $stateProp );
490
491
                        $tag .= '</segment>';
492
493
                    }
494
495
                }
496
497
                $this->postProcAndFlush( $this->outputFP, $tag );
498
499
                // we are leaving <segment>, reset $segmentHasTarget
500
                $this->targetWasWritten = false;
501
502
            } elseif ( $name === 'trans-unit' ) {
503
504
                // only for Xliff 1.*
505
                // handling </trans-unit> closure
506
                if ( !$this->targetWasWritten ) {
507
508
                    $seg = $this->getCurrentSegment();
509
510
                    if ( isset( $seg[ 'translation' ] ) ) {
511
                        $translation = $this->prepareTranslation( $seg );
512
                        [ $stateProp, ] = $this->setTransUnitState( $seg, '', null );
513
514
                        // replace the tag
515
                        $tag = $this->createTargetTag( $translation, $stateProp );
516
                        $tag .= '</trans-unit>';
517
518
                    }
519
520
                    $this->postProcAndFlush( $this->outputFP, $tag );
521
522
                } else {
523
                    $this->postProcAndFlush( $this->outputFP, '</trans-unit>' );
524
                    $this->targetWasWritten = false;
525
                }
526
527
528
            } elseif ( $this->bufferIsActive ) { // this is a tag ( <g | <mrk ) inside a seg or seg-source tag
529
                $this->CDATABuffer .= "</$name>";
530
                // Do NOT Flush
531
            } else { //generic tag closure do Nothing
532
                // flush to pointer
533
                $this->postProcAndFlush( $this->outputFP, $tag );
534
            }
535
        } elseif ( $this->CDATABuffer === '<note/>' && $this->bufferIsActive === true ) {
536
            $this->postProcAndFlush( $this->outputFP, '<note/>' );
537
            $this->bufferIsActive = false;
538
            $this->CDATABuffer    = '';
539
            $this->isEmpty        = false;
540
        } else {
541
            //ok, nothing to be done; reset flag for next coming tag
542
            $this->isEmpty = false;
543
        }
544
545
        // check if we are leaving a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*)
546
        if ( $this->tuTagName === $name ) {
547
            $this->currentTransUnitTranslate = null;
548
            $this->inTU                      = false;
549
            $this->segmentPositionInTu       = -1;
550
            $this->unitContainsMda           = false;
551
            $this->hasWrittenCounts          = false;
552
            $this->sourceAttributes          = [];
553
554
            $this->resetCounts();
555
        }
556
    }
557
558
    /**
559
     * Set the current segment array (with segment id and trans-unit id)
560
     *
561
     * @param array $listOfSegmentsIds
562
     */
563
    private function setCurrentSegmentArray( array $listOfSegmentsIds = [] ) {
564
        // $currentSegmentId
565
        if ( empty( $this->currentSegmentArray ) ) {
566
            $this->currentSegmentArray = [
567
                    'sid' => $listOfSegmentsIds[ 0 ],
568
                    'tid' => $this->currentTransUnitId,
569
            ];
570
        } else {
571
            if ( $this->currentSegmentArray[ 'tid' ] === $this->currentTransUnitId ) {
572
                $key                                = array_search( $this->currentSegmentArray[ 'sid' ], $listOfSegmentsIds );
573
                $this->currentSegmentArray[ 'sid' ] = $listOfSegmentsIds[ $key + 1 ];
574
                $this->currentSegmentArray[ 'tid' ] = $this->currentTransUnitId;
575
            } else {
576
                $this->currentSegmentArray = [
577
                        'sid' => $listOfSegmentsIds[ 0 ],
578
                        'tid' => $this->currentTransUnitId,
579
                ];
580
            }
581
        }
582
    }
583
584
    /**
585
     * Update counts
586
     */
587
    private function updateCounts() {
588
        // populate counts
589
        $listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ];
590
591
        // $currentSegmentId
592
        if ( !empty( $listOfSegmentsIds ) ) {
593
            $this->setCurrentSegmentArray( $listOfSegmentsIds );
594
        }
595
596
        if ( $this->xliffVersion === 2 ) {
597
            $seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ];
598
            if ( !empty( $seg ) ) {
599
                $this->updateSegmentCounts( $seg );
600
            }
601
        } else {
602
            foreach ( $listOfSegmentsIds as $pos => $id ) {
603
                $seg = $this->segments[ $id ];
604
                if ( !empty( $seg ) ) {
605
                    $this->updateSegmentCounts( $seg );
606
                }
607
            }
608
        }
609
610
        $this->currentSegmentArray = [];
611
    }
612
613
    /**
614
     * @param array $seg
615
     */
616
    private function updateSegmentCounts( array $seg = [] ) {
617
618
        $raw_word_count = $seg[ 'raw_word_count' ];
619
        $eq_word_count  = ( floor( $seg[ 'eq_word_count' ] * 100 ) / 100 );
620
621
622
        $listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ];
0 ignored issues
show
Unused Code introduced by
The assignment to $listOfSegmentsIds is dead and can be removed.
Loading history...
623
624
        $this->counts[ 'segments_count_array' ][ $seg[ 'sid' ] ] = [
625
                'raw_word_count' => $raw_word_count,
626
                'eq_word_count'  => $eq_word_count,
627
        ];
628
629
        $this->counts[ 'raw_word_count' ] += $raw_word_count;
630
        $this->counts[ 'eq_word_count' ]  += $eq_word_count;
631
    }
632
633
    private function resetCounts() {
634
        $this->counts[ 'segments_count_array' ] = [];
635
        $this->counts[ 'raw_word_count' ]       = 0;
636
        $this->counts[ 'eq_word_count' ]        = 0;
637
    }
638
639
    /**
640
     * prepare segment tagging for xliff insertion
641
     *
642
     * @param array  $seg
643
     * @param string $transUnitTranslation
644
     *
645
     * @return string
646
     */
647
    protected function prepareTranslation( $seg, $transUnitTranslation = "" ) {
648
        $endTags = "";
649
650
        $segment     = Strings::removeDangerousChars( $seg [ 'segment' ] );
651
        $translation = Strings::removeDangerousChars( $seg [ 'translation' ] );
652
        $dataRefMap  = ( isset( $seg[ 'data_ref_map' ] ) ) ? Strings::jsonToArray( $seg[ 'data_ref_map' ] ) : [];
653
654
        if ( $seg [ 'translation' ] == '' ) {
655
            $translation = $segment;
656
        } else {
657
            if ( $this->callback instanceof XliffReplacerCallbackInterface ) {
658
                $error = ( !empty( $seg[ 'error' ] ) ) ? $seg[ 'error' ] : null;
659
                if ( $this->callback->thereAreErrors( $seg[ 'sid' ], $segment, $translation, $dataRefMap, $error ) ) {
660
                    $translation = '|||UNTRANSLATED_CONTENT_START|||' . $segment . '|||UNTRANSLATED_CONTENT_END|||';
661
                }
662
            }
663
        }
664
665
        // for xliff v2 we ignore the marks on purpose
666
        if ( $this->xliffVersion === 2 ) {
667
            return $translation;
668
        }
669
670
        if ( $seg[ 'mrk_id' ] !== null && $seg[ 'mrk_id' ] != '' ) {
671
            if ( $this->targetLang === 'ja-JP' ) {
672
                $seg[ 'mrk_succ_tags' ] = ltrim( $seg[ 'mrk_succ_tags' ] );
673
            }
674
675
            $translation = "<mrk mid=\"" . $seg[ 'mrk_id' ] . "\" mtype=\"seg\">" . $seg[ 'mrk_prev_tags' ] . $translation . $seg[ 'mrk_succ_tags' ] . "</mrk>";
676
        }
677
678
        $transUnitTranslation .= $seg[ 'prev_tags' ] . $translation . $endTags . $seg[ 'succ_tags' ];
679
680
        return $transUnitTranslation;
681
    }
682
683
684
    /**
685
     * @param $raw_word_count
686
     * @param $eq_word_count
687
     *
688
     * @return string
689
     */
690
    private function getWordCountGroup( $raw_word_count, $eq_word_count ) {
691
        return "\n<count-group name=\"$this->currentTransUnitId\"><count count-type=\"x-matecat-raw\">$raw_word_count</count><count count-type=\"x-matecat-weighted\">$eq_word_count</count></count-group>";
692
    }
693
694
    /**
695
     * @return array
696
     */
697
    private function getCurrentSegment() {
698
        if ( $this->currentTransUnitTranslate === 'yes' && isset( $this->transUnits[ $this->currentTransUnitId ] ) ) {
699
            $index = $this->transUnits[ $this->currentTransUnitId ][ $this->segmentPositionInTu ];
700
701
            if ( isset( $this->segments[ $index ] ) ) {
702
                return $this->segments[ $index ];
703
            }
704
        }
705
706
        return [];
707
    }
708
709
    /**
710
     * This function creates a <target>
711
     *
712
     * @param $translation
713
     * @param $stateProp
714
     *
715
     * @return string
716
     */
717
    private function createTargetTag( $translation, $stateProp ) {
718
719
        $targetLang = '';
720
        if ( $this->xliffVersion === 1 ) {
721
            $targetLang = ' xml:lang="' . $this->targetLang . '"';
722
        }
723
724
        switch ( $this->xliffVersion ) {
725
            case 1:
726
            default:
727
                $tag = "<target $targetLang $stateProp>$translation</target>";
728
729
                // if it's a Trados file don't append count group
730
                if ( get_class( $this ) !== SdlXliffSAXTranslationReplacer::class ) {
731
                    $tag .= $this->getWordCountGroup( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] );
732
                }
733
734
                return $tag;
735
736
            case 2:
737
                return "<target>$translation</target>";
738
        }
739
740
    }
741
742
    /**
743
     * @param bool $withMetadataTag
744
     *
745
     * @return string
746
     */
747
    private function getWordCountGroupForXliffV2( $withMetadataTag = true ) {
748
749
        $this->mdaGroupCounter++;
750
        $segments_count_array = $this->counts[ 'segments_count_array' ];
751
752
        $id = $this->currentSegmentArray;
0 ignored issues
show
Unused Code introduced by
The assignment to $id is dead and can be removed.
Loading history...
753
754
755
        $return = '';
756
757
        if ( $withMetadataTag === true ) {
758
            $return .= '<mda:metadata>';
759
        }
760
761
        $index = 0;
762
        foreach ( $segments_count_array as $segments_count_item ) {
763
764
            $id = 'word_count_tu[' . $this->currentTransUnitId . '][' . $index . ']';
765
            $index++;
766
767
            $return .= "    <mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\">
768
                                <mda:meta type=\"x-matecat-raw\">" . $segments_count_item[ 'raw_word_count' ] . "</mda:meta>
769
                                <mda:meta type=\"x-matecat-weighted\">" . $segments_count_item[ 'eq_word_count' ] . "</mda:meta>
770
                            </mda:metaGroup>";
771
        }
772
773
        if ( $withMetadataTag === true ) {
774
            $return .= '</mda:metadata>';
775
        }
776
777
        return $return;
778
779
    }
780
781
    /**
782
     * @param $seg
783
     * @param $state_prop
784
     * @param $lastMrkState
785
     *
786
     * @return array
787
     */
788
    private function setTransUnitState( $seg, $state_prop, $lastMrkState ) {
789
        switch ( $seg[ 'status' ] ) {
790
791
            case TranslationStatus::STATUS_FIXED:
792
            case TranslationStatus::STATUS_APPROVED2:
793
                if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_APPROVED2 ) {
794
                    $state_prop   = "state=\"final\"";
795
                    $lastMrkState = TranslationStatus::STATUS_APPROVED2;
796
                }
797
                break;
798
            case TranslationStatus::STATUS_APPROVED:
799
                if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_APPROVED ) {
800
                    $state_prop   = ( $this->xliffVersion === 2 ) ? "state=\"reviewed\"" : "state=\"signed-off\"";
801
                    $lastMrkState = TranslationStatus::STATUS_APPROVED;
802
                }
803
                break;
804
805
            case TranslationStatus::STATUS_TRANSLATED:
806
                if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_TRANSLATED || $lastMrkState == TranslationStatus::STATUS_APPROVED ) {
807
                    $state_prop   = "state=\"translated\"";
808
                    $lastMrkState = TranslationStatus::STATUS_TRANSLATED;
809
                }
810
                break;
811
812
            case TranslationStatus::STATUS_REJECTED:  // if there is a mark REJECTED and there is not a DRAFT, all the trans-unit is REJECTED. In V2 there is no way to mark
813
            case TranslationStatus::STATUS_REBUTTED:
814
                if ( ( $lastMrkState == null ) || ( $lastMrkState != TranslationStatus::STATUS_NEW || $lastMrkState != TranslationStatus::STATUS_DRAFT ) ) {
815
                    $state_prop   = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"needs-review-translation\"";
816
                    $lastMrkState = TranslationStatus::STATUS_REJECTED;
817
                }
818
                break;
819
820
            case TranslationStatus::STATUS_NEW:
821
                if ( ( $lastMrkState == null ) || $lastMrkState != TranslationStatus::STATUS_NEW ) {
822
                    $state_prop   = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"new\"";
823
                    $lastMrkState = TranslationStatus::STATUS_NEW;
824
                }
825
                break;
826
827
            case TranslationStatus::STATUS_DRAFT:
828
                if ( ( $lastMrkState == null ) || $lastMrkState != TranslationStatus::STATUS_DRAFT ) {
829
                    $state_prop   = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"new\"";
830
                    $lastMrkState = TranslationStatus::STATUS_DRAFT;
831
                }
832
                break;
833
834
            default:
835
                // this is the case when a segment is not showed in cattool, so the row in
836
                // segment_translations does not exists and
837
                // ---> $seg[ 'status' ] is NULL
838
                if ( $lastMrkState == null ) { //this is the first MRK ID
839
                    $state_prop   = "state=\"translated\"";
840
                    $lastMrkState = TranslationStatus::STATUS_TRANSLATED;
841
                } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
842
                    /* Do nothing and preserve the last state */
843
                }
844
                break;
845
        }
846
847
        return [ $state_prop, $lastMrkState ];
848
    }
849
850
    /**
851
     * @inheritDoc
852
     */
853
    protected function characterData( $parser, $data ) {
854
        // don't write <target> data
855
        if ( !$this->inTarget && !$this->bufferIsActive ) {
856
            $this->postProcAndFlush( $this->outputFP, $data );
857
        } elseif ( $this->bufferIsActive ) {
858
            $this->CDATABuffer .= $data;
859
        }
860
    }
861
}
862