Test Failed
Pull Request — master (#70)
by Mauro
08:18
created

XliffSAXTranslationReplacer::prepareTranslation()   C

Complexity

Conditions 12
Paths 96

Size

Total Lines 34
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 12
eloc 19
nc 96
nop 2
dl 0
loc 34
rs 6.9666
c 3
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Matecat\XliffParser\XliffReplacer;
4
5
use Matecat\XliffParser\Constants\TranslationStatus;
6
use Matecat\XliffParser\Utils\Strings;
7
use RuntimeException;
8
9
class XliffSAXTranslationReplacer extends AbstractXliffReplacer {
10
    /**
11
     * @var int
12
     */
13
    private $mdaGroupCounter = 0;
14
15
    /**
16
     * @var array
17
     */
18
    private $nodesToCopy = [
19
            'source',
20
            'mda:metadata',
21
            'memsource:additionalTagData',
22
            'originalData',
23
            'seg-source',
24
            'value',
25
            'bpt',
26
            'ept',
27
            'ph',
28
            'st',
29
            'note',
30
            'context',
31
    ];
32
33
    public function replaceTranslation() {
34
        fwrite( $this->outputFP, '<?xml version="1.0" encoding="UTF-8"?>' );
35
36
        //create Sax parser
37
        $xmlParser = $this->initSaxParser();
38
39
        while ( $this->currentBuffer = fread( $this->originalFP, 4096 ) ) {
40
            /*
41
               preprocess file
42
             */
43
            // obfuscate entities because sax automatically does html_entity_decode
44
            $temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
45
46
            //avoid cutting entities in half:
47
            //the last fread could have truncated an entity (say, '&lt;' in '&l'), thus invalidating the escaping
48
            //***** and if there is an & that it is not an entity, this is an infinite loop !!!!!
49
50
            $escape_AMP = false;
51
52
            // 9 is the max length of an entity. So, suppose that the & is at the end of buffer,
53
            // add 9 Bytes and substitute the entities, if the & is present, and it is not at the end
54
            //it can't be an entity, exit the loop
55
56
            while ( true ) {
57
                $_ampPos = strpos( $temporary_check_buffer, '&' );
58
59
                //check for real entity or escape it to safely exit from the loop!!!
60
                if ( $_ampPos === false || strlen( substr( $temporary_check_buffer, $_ampPos ) ) > 9 ) {
61
                    $escape_AMP = true;
62
                    break;
63
                }
64
65
                //if an entity is still present, fetch some more and repeat the escaping
66
                $this->currentBuffer    .= fread( $this->originalFP, 9 );
67
                $temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
68
            }
69
70
            //free stuff outside the loop
71
            unset( $temporary_check_buffer );
72
73
            $this->currentBuffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
74
            if ( $escape_AMP ) {
75
                $this->currentBuffer = str_replace( "&", self::$INTERNAL_TAG_PLACEHOLDER . 'amp' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer );
76
            }
77
78
            //get length of chunk
79
            $this->len = strlen( $this->currentBuffer );
80
81
            //parse chunk of text
82
            if ( !xml_parse( $xmlParser, $this->currentBuffer, feof( $this->originalFP ) ) ) {
83
                //if unable, raise an exception
84
                throw new RuntimeException( sprintf(
85
                        "XML error: %s at line %d",
86
                        xml_error_string( xml_get_error_code( $xmlParser ) ),
87
                        xml_get_current_line_number( $xmlParser )
88
                ) );
89
            }
90
            //get accumulated this->offset in document: as long as SAX pointer advances, we keep track of total bytes it has seen so far; this way, we can translate its global pointer in an address local to the current buffer of text to retrieve last char of tag
91
            $this->offset += $this->len;
92
        }
93
94
        // close Sax parser
95
        $this->closeSaxParser( $xmlParser );
96
97
    }
98
99
    /**
100
     * @inheritDoc
101
     */
102
    protected function tagOpen( $parser, $name, $attr ) {
103
        // check if we are entering into a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*)
104
        if ( $this->tuTagName === $name ) {
105
            $this->inTU = true;
106
107
            // get id
108
            // trim to first 100 characters because this is the limit on Matecat's DB
109
            $this->currentTransUnitId = substr( $attr[ 'id' ], 0, 100 );
110
111
            // `translate` attribute can be only yes or no
112
            if ( isset( $attr[ 'translate' ] ) && $attr[ 'translate' ] === 'no' ) {
113
                $attr[ 'translate' ] = 'no';
114
            } else {
115
                $attr[ 'translate' ] = 'yes';
116
            }
117
118
            // current 'translate' attribute of the current trans-unit
119
            $this->currentTransUnitTranslate = $attr[ 'translate' ];
120
        }
121
122
        if ( 'source' === $name ) {
123
            $this->sourceAttributes = $attr;
124
        }
125
126
        if ( 'mda:metadata' === $name ) {
127
            $this->unitContainsMda = true;
128
        }
129
130
        // check if we are entering into a <target>
131
        if ( 'target' === $name ) {
132
133
            if ( $this->currentTransUnitTranslate === 'no' ) {
134
                $this->inTarget = false;
135
            } else {
136
                $this->inTarget = true;
137
            }
138
        }
139
140
        // check if we are inside a <target>, obviously this happen only if there are targets inside the trans-unit
141
        // <target> must be stripped to be replaced, so this check avoids <target> reconstruction
142
        if ( !$this->inTarget ) {
143
144
            $tag = '';
145
146
            //
147
            // ============================================
148
            // only for Xliff 2.*
149
            // ============================================
150
            //
151
            // In xliff v2 we MUST add <mda:metadata> BEFORE <notes>/<originalData>/<segment>/<ignorable>
152
            //
153
            // As documentation says, <unit> contains:
154
            //
155
            // - elements from other namespaces, OPTIONAL
156
            // - Zero or one <notes> elements followed by
157
            // - Zero or one <originalData> element followed by
158
            // - One or more <segment> or <ignorable> elements in any order.
159
            //
160
            // For more info please refer to:
161
            //
162
            // http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html#unit
163
            //
164
            if ( $this->xliffVersion === 2 && ( $name === 'notes' || $name === 'originalData' || $name === 'segment' || $name === 'ignorable' ) && $this->unitContainsMda === false ) {
165
                if ( isset( $this->transUnits[ $this->currentTransUnitId ] ) && !empty( $this->transUnits[ $this->currentTransUnitId ] ) && !$this->hasWrittenCounts ) {
166
167
                    // we need to update counts here
168
                    $this->updateCounts();
169
                    $this->hasWrittenCounts = true;
170
171
                    $tag                   .= $this->getWordCountGroupForXliffV2( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] );
172
                    $this->unitContainsMda = true;
173
                }
174
            }
175
176
            // construct tag
177
            $tag .= "<$name ";
178
179
            $lastMrkState = null;
180
            $stateProp    = '';
181
182
            foreach ( $attr as $k => $v ) {
183
184
                //if tag name is file, we must replace the target-language attribute
185
                if ( $name === 'file' && $k === 'target-language' && !empty( $this->targetLang ) ) {
186
                    //replace Target language with job language provided from constructor
187
                    $tag .= "$k=\"$this->targetLang\" ";
188
                } else {
189
                    $pos = 0;
190
                    if ( $this->currentTransUnitId and isset($this->transUnits[ $this->currentTransUnitId ])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
191
                        $pos = current( $this->transUnits[ $this->currentTransUnitId ] );
192
                    }
193
194
                    if ( $name === $this->tuTagName and isset($this->segments[ $pos ]) and isset($this->segments[ $pos ][ 'sid' ]) ) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
195
196
                        $sid = $this->segments[ $pos ][ 'sid' ];
197
198
                        // add `help-id` to xliff v.1*
199
                        // add `mtc:segment-id` to xliff v.2*
200
                        if ( $this->xliffVersion === 1 && strpos( $tag, 'help-id' ) === false ) {
201
                            if ( !empty( $sid ) ) {
202
                                $tag .= "help-id=\"$sid\" ";
203
                            }
204
                        } elseif ( $this->xliffVersion === 2 && strpos( $tag, 'mtc:segment-id' ) === false ) {
205
                            if ( !empty( $sid ) ) {
206
                                $tag .= "mtc:segment-id=\"$sid\" ";
207
                            }
208
                        }
209
210
                    } elseif ( 'segment' === $name && $this->xliffVersion === 2 ) { // add state to segment in Xliff v2
211
                        list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $this->segments[ $pos ], $stateProp, $lastMrkState );
212
                    }
213
214
                    //normal tag flux, put attributes in it
215
                    $tag .= "$k=\"$v\" ";
216
217
                    // replace state for xliff v2
218
                    if ( $stateProp ) {
219
                        $pattern = '/state=\"(.*)\"/i';
220
                        $tag     = preg_replace( $pattern, $stateProp, $tag );
221
                    }
222
                }
223
            }
224
225
            // add oasis xliff 20 namespace
226
            if ( $this->xliffVersion === 2 && $name === 'xliff' && !array_key_exists( 'xmlns:mda', $attr ) ) {
227
                $tag .= 'xmlns:mda="urn:oasis:names:tc:xliff:metadata:2.0"';
228
            }
229
230
            // add MateCat specific namespace, we want maybe add non-XLIFF attributes
231
            if ( $name === 'xliff' && !array_key_exists( 'xmlns:mtc', $attr ) ) {
232
                $tag .= ' xmlns:mtc="https://www.matecat.com" ';
233
            }
234
235
            // trgLang
236
            if ( $name === 'xliff' ) {
237
                $tag = preg_replace( '/trgLang="(.*?)"/', 'trgLang="' . $this->targetLang . '"', $tag );
238
            }
239
240
            //this logic helps detecting empty tags
241
            //get current position of SAX pointer in all the stream of data is has read so far:
242
            //it points at the end of current tag
243
            $idx = xml_get_current_byte_index( $parser );
244
245
            //check whether the bounds of current tag are entirely in current buffer or the end of the current tag
246
            //is outside current buffer (in the latter case, it's in next buffer to be read by the while loop);
247
            //this check is necessary because we may have truncated a tag in half with current read,
248
            //and the other half may be encountered in the next buffer it will be passed
249
            if ( isset( $this->currentBuffer[ $idx - $this->offset ] ) ) {
250
                //if this tag entire lenght fitted in the buffer, the last char must be the last
251
                //symbol before the '>'; if it's an empty tag, it is assumed that it's a '/'
252
                $lastChar = $this->currentBuffer[ $idx - $this->offset ];
253
            } else {
254
                //if it's out, simple use the last character of the chunk
255
                $lastChar = $this->currentBuffer[ $this->len - 1 ];
256
            }
257
258
            //trim last space
259
            $tag = rtrim( $tag );
260
261
            //detect empty tag
262
            $this->isEmpty = ( $lastChar == '/' || $name == 'x' );
263
            if ( $this->isEmpty ) {
264
                $tag .= '/';
265
            }
266
267
            //add tag ending
268
            $tag .= ">";
269
270
            //set a a Buffer for the segSource Source tag
271
            if ( $this->bufferIsActive || in_array( $name, $this->nodesToCopy ) ) { // we are opening a critical CDATA section
272
273
                //WARNING BECAUSE SOURCE AND SEG-SOURCE TAGS CAN BE EMPTY IN SOME CASES!!!!!
274
                //so check for isEmpty also in conjunction with name
275
                if ( $this->isEmpty && ( 'source' === $name || 'seg-source' === $name ) ) {
276
                    $this->postProcAndFlush( $this->outputFP, $tag );
277
                } else {
278
                    //these are NOT source/seg-source/value empty tags, THERE IS A CONTENT, write it in buffer
279
                    $this->bufferIsActive = true;
280
                    $this->CDATABuffer    .= $tag;
281
                }
282
            } else {
283
                $this->postProcAndFlush( $this->outputFP, $tag );
284
            }
285
        }
286
287
        // update segmentPositionInTu
288
289
        if ( $this->xliffVersion === 1 && $this->inTU && $name === 'source' ) {
290
            $this->segmentPositionInTu++;
291
        }
292
293
        if ( $this->xliffVersion === 2 && $this->inTU && $name === 'segment' ) {
294
            $this->segmentPositionInTu++;
295
        }
296
    }
297
298
    /**
299
     * @inheritDoc
300
     */
301
    protected function tagClose( $parser, $name ) {
302
        $tag = '';
303
304
        /**
305
         * if is a tag within <target> or
306
         * if it is an empty tag, do not add closing tag because we have already closed it in
307
         *
308
         * self::tagOpen method
309
         */
310
        if ( !$this->isEmpty && !( $this->inTarget && $name !== 'target' ) ) {
311
312
            if ( !$this->inTarget ) {
313
                $tag = "</$name>";
314
            }
315
316
            if ( 'target' == $name ) {
317
318
                if ( $this->currentTransUnitTranslate === 'no' ) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
319
                    // do nothing
320
                } elseif ( isset( $this->transUnits[ $this->currentTransUnitId ] ) ) {
321
322
                    // get translation of current segment, by indirect indexing: id -> positional index -> segment
323
                    // actually there may be more that one segment to that ID if there are two mrk of the same source segment
324
325
                    $listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ];
326
327
                    // $currentSegmentId
328
                    if ( !empty( $listOfSegmentsIds ) ) {
329
                        $this->setCurrentSegmentArray( $listOfSegmentsIds );
330
                    }
331
332
                    /*
333
                     * At the end of every cycle the segment grouping information is lost: unset( 'matecat|' . $this->currentId )
334
                     *
335
                     * We need to take the info about the last segment parsed
336
                     *          ( normally more than 1 db row because of mrk tags )
337
                     *
338
                     * So, copy the current segment data group to an another structure to take the last one segment
339
                     * for the next tagOpen ( possible sdl:seg-defs )
340
                     *
341
                     */
342
343
                    $this->lastTransUnit = [];
344
345
                    $last_value    = null;
346
                    $segmentsCount = count( $listOfSegmentsIds );
347
                    for ( $i = 0; $i < $segmentsCount; $i++ ) {
348
                        $id = $listOfSegmentsIds[ $i ];
349
                        if ( isset( $this->segments[ $id ] ) && ( $i == 0 || $last_value + 1 == $listOfSegmentsIds[ $i ] ) ) {
350
                            $last_value            = $listOfSegmentsIds[ $i ];
351
                            $this->lastTransUnit[] = $this->segments[ $id ];
352
                        }
353
                    }
354
355
                    // init translation and state
356
                    $translation  = '';
357
                    $lastMrkState = null;
358
                    $stateProp    = '';
359
360
                    // we must reset the lastMrkId found because this is a new segment.
361
                    $lastMrkId = -1;
362
363
                    if ( $this->xliffVersion === 2 ) {
364
                        $seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ];
365
366
                        // update counts
367
                        if ( !$this->hasWrittenCounts && !empty( $seg ) ) {
368
                            $this->updateSegmentCounts( $seg );
369
                        }
370
371
                        // delete translations so the prepareSegment
372
                        // will put source content in target tag
373
                        if ( $this->sourceInTarget ) {
374
                            $seg[ 'translation' ] = '';
375
                            $this->resetCounts();
376
                        }
377
378
                        // append $translation
379
                        $translation = $this->prepareTranslation( $seg, $translation );
380
381
                        list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState );
382
                    } else {
383
                        foreach ( $listOfSegmentsIds as $pos => $id ) {
384
385
                            /*
386
                             * This routine works to respect the positional orders of markers.
387
                             * In every cycle we check if the mrk of the segment is below or equal the last one.
388
                             * When this is true, means that the mrk id belongs to the next segment with the same internal_id
389
                             * so we MUST stop to apply markers and translations
390
                             * and stop to add eq_word_count
391
                             *
392
                             * Begin:
393
                             * pre-assign zero to the new mrk if this is the first one ( in this segment )
394
                             * If it is null leave it NULL
395
                             */
396
                            if ( (int)$this->segments[ $id ][ "mrk_id" ] < 0 && $this->segments[ $id ][ "mrk_id" ] !== null ) {
397
                                $this->segments[ $id ][ "mrk_id" ] = 0;
398
                            }
399
400
                            /*
401
                             * WARNING:
402
                             * For those seg-source that doesn't have a mrk ( having a mrk id === null )
403
                             * ( null <= -1 ) === true
404
                             * so, cast to int
405
                             */
406
                            if ( (int)$this->segments[ $id ][ "mrk_id" ] <= $lastMrkId ) {
407
                                break;
408
                            }
409
410
                            // set $this->currentSegment
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
411
                            $seg = $this->segments[ $id ];
412
413
                            // update counts
414
                            if ( !empty( $seg ) ) {
415
                                $this->updateSegmentCounts( $seg );
416
                            }
417
418
                            // delete translations so the prepareSegment
419
                            // will put source content in target tag
420
                            if ( $this->sourceInTarget ) {
421
                                $seg[ 'translation' ] = '';
422
                                $this->resetCounts();
423
                            }
424
425
                            // append $translation
426
                            $translation = $this->prepareTranslation( $seg, $translation );
427
428
                            // for xliff 2 we need $this->transUnits[ $this->currentId ] [ $pos ] for populating metadata
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
429
430
                            unset( $this->transUnits[ $this->currentTransUnitId ] [ $pos ] );
431
432
                            $lastMrkId = $this->segments[ $id ][ "mrk_id" ];
433
434
                            list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState );
435
                        }
436
                    }
437
438
                    //append translation
439
                    $targetLang = '';
440
                    if ( $this->xliffVersion === 1 ) {
441
                        $targetLang = ' xml:lang="' . $this->targetLang . '"';
442
                    }
443
444
                    $tag = $this->buildTranslateTag( $targetLang, $stateProp, $translation, $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] );
445
                }
446
447
                // signal we are leaving a target
448
                $this->targetWasWritten = true;
449
                $this->inTarget         = false;
450
                $this->postProcAndFlush( $this->outputFP, $tag, $treatAsCDATA = true );
451
            } elseif ( in_array( $name, $this->nodesToCopy ) ) { // we are closing a critical CDATA section
452
453
                $this->bufferIsActive = false;
454
455
                // only for Xliff 2.*
456
                // write here <mda:metaGroup> and <mda:meta> if already present in the <unit>
457
                if ( 'mda:metadata' === $name && $this->unitContainsMda && $this->xliffVersion === 2 && !$this->hasWrittenCounts ) {
458
459
                    // we need to update counts here
460
                    $this->updateCounts();
461
                    $this->hasWrittenCounts = true;
462
463
                    $tag = $this->CDATABuffer;
464
                    $tag .= $this->getWordCountGroupForXliffV2( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ], false );
465
                    $tag .= "    </mda:metadata>";
466
467
                } else {
468
                    $tag = $this->CDATABuffer . "</$name>";
469
                }
470
471
                $this->CDATABuffer = "";
472
473
                //flush to pointer
474
                $this->postProcAndFlush( $this->outputFP, $tag );
475
            } elseif ( 'segment' === $name ) {
476
477
                // only for Xliff 2.*
478
                // if segment has no <target> add it BEFORE </segment>
479
                if ( $this->xliffVersion === 2 && !$this->targetWasWritten ) {
480
481
                    $seg = $this->getCurrentSegment();
482
483
                    // copy attr from <source>
484
                    $tag = '<target';
485
                    foreach ( $this->sourceAttributes as $k => $v ) {
486
                        $tag .= " $k=\"$v\"";
487
                    }
488
489
                    $tag .= '>' . $seg[ 'translation' ] . '</target></segment>';
490
                }
491
492
                $this->postProcAndFlush( $this->outputFP, $tag );
493
494
                // we are leaving <segment>, reset $segmentHasTarget
495
                $this->targetWasWritten = false;
496
497
            } elseif ( $name === 'trans-unit' ) {
498
499
                // only for Xliff 1.*
500
                // handling </trans-unit> closure
501
                if ( !$this->targetWasWritten ) {
502
                    $seg          = $this->getCurrentSegment();
503
                    $lastMrkState = null;
504
                    $stateProp    = '';
505
                    $tag          = '';
506
507
                    // if there is translation available insert <target> BEFORE </trans-unit>
508
                    if ( isset( $seg[ 'translation' ] ) ) {
509
                        list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState );
510
                        $tag .= $this->createTargetTag( $seg[ 'translation' ], $stateProp );
511
                    }
512
513
                    $tag .= '</trans-unit>';
514
                    $this->postProcAndFlush( $this->outputFP, $tag );
515
                } else {
516
                    $this->postProcAndFlush( $this->outputFP, '</trans-unit>' );
517
                }
518
            } elseif ( $this->bufferIsActive ) { // this is a tag ( <g | <mrk ) inside a seg or seg-source tag
519
                $this->CDATABuffer .= "</$name>";
520
                // Do NOT Flush
521
            } else { //generic tag closure do Nothing
522
                // flush to pointer
523
                $this->postProcAndFlush( $this->outputFP, $tag );
524
            }
525
        } elseif ( $this->CDATABuffer === '<note/>' && $this->bufferIsActive === true ) {
526
            $this->postProcAndFlush( $this->outputFP, '<note/>' );
527
            $this->bufferIsActive = false;
528
            $this->CDATABuffer    = '';
529
            $this->isEmpty        = false;
530
        } else {
531
            //ok, nothing to be done; reset flag for next coming tag
532
            $this->isEmpty = false;
533
        }
534
535
        // check if we are leaving a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*)
536
        if ( $this->tuTagName === $name ) {
537
            $this->currentTransUnitTranslate = null;
538
            $this->inTU                      = false;
539
            $this->segmentPositionInTu       = -1;
540
            $this->unitContainsMda           = false;
541
            $this->hasWrittenCounts          = false;
542
            $this->sourceAttributes          = [];
543
        }
544
    }
545
546
    /**
547
     * Set the current segment array (with segment id and trans-unit id)
548
     *
549
     * @param array $listOfSegmentsIds
550
     */
551
    private function setCurrentSegmentArray( array $listOfSegmentsIds = [] ) {
552
        // $currentSegmentId
553
        if ( empty( $this->currentSegmentArray ) ) {
554
            $this->currentSegmentArray = [
555
                    'sid' => $listOfSegmentsIds[ 0 ],
556
                    'tid' => $this->currentTransUnitId,
557
            ];
558
        } else {
559
            if ( $this->currentSegmentArray[ 'tid' ] === $this->currentTransUnitId ) {
560
                $key                                = array_search( $this->currentSegmentArray[ 'sid' ], $listOfSegmentsIds );
561
                $this->currentSegmentArray[ 'sid' ] = $listOfSegmentsIds[ $key + 1 ];
562
                $this->currentSegmentArray[ 'tid' ] = $this->currentTransUnitId;
563
            } else {
564
                $this->currentSegmentArray = [
565
                        'sid' => $listOfSegmentsIds[ 0 ],
566
                        'tid' => $this->currentTransUnitId,
567
                ];
568
            }
569
        }
570
    }
571
572
    /**
573
     * Update counts
574
     */
575
    private function updateCounts() {
576
        // populate counts
577
        $listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ];
578
579
        // $currentSegmentId
580
        if ( !empty( $listOfSegmentsIds ) ) {
581
            $this->setCurrentSegmentArray( $listOfSegmentsIds );
582
        }
583
584
        if ( $this->xliffVersion === 2 ) {
585
            $seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ];
586
            if ( !empty( $seg ) ) {
587
                $this->updateSegmentCounts( $seg );
588
            }
589
        } else {
590
            foreach ( $listOfSegmentsIds as $pos => $id ) {
591
                $seg = $this->segments[ $id ];
592
                if ( !empty( $seg ) ) {
593
                    $this->updateSegmentCounts( $seg );
594
                }
595
            }
596
        }
597
598
        $this->currentSegmentArray = [];
599
    }
600
601
    /**
602
     * @param array $seg
603
     */
604
    private function updateSegmentCounts( array $seg = [] ) {
605
        $this->counts[ 'raw_word_count' ] += $seg[ 'raw_word_count' ];
606
        $this->counts[ 'eq_word_count' ]  += ( floor( $seg[ 'eq_word_count' ] * 100 ) / 100 );
607
    }
608
609
    private function resetCounts() {
610
        $this->counts[ 'raw_word_count' ] = 0;
611
        $this->counts[ 'eq_word_count' ]  = 0;
612
    }
613
614
    /**
615
     * prepare segment tagging for xliff insertion
616
     *
617
     * @param array  $seg
618
     * @param string $transUnitTranslation
619
     *
620
     * @return string
621
     */
622
    protected function prepareTranslation( $seg, $transUnitTranslation = "" ) {
623
        $endTags = "";
624
625
        $segment     = Strings::removeDangerousChars( $seg [ 'segment' ] );
626
        $translation = Strings::removeDangerousChars( $seg [ 'translation' ] );
627
        $dataRefMap  = ( isset( $seg[ 'data_ref_map' ] ) && $seg[ 'data_ref_map' ] !== null ) ? Strings::jsonToArray( $seg[ 'data_ref_map' ] ) : [];
628
629
        if ( is_null( $seg [ 'translation' ] ) || $seg [ 'translation' ] == '' ) {
630
            $translation = $segment;
631
        } else {
632
            if ( $this->callback ) {
633
                $error = (isset($seg['error'])) ? $seg['error'] : null;
634
                if ( $this->callback->thereAreErrors( $seg[ 'sid' ], $segment, $translation, $dataRefMap, $error ) ) {
635
                    $translation = '|||UNTRANSLATED_CONTENT_START|||' . $segment . '|||UNTRANSLATED_CONTENT_END|||';
636
                }
637
            }
638
        }
639
640
        // for xliff v2 we ignore the marks on purpose
641
        if ( $this->xliffVersion === 2 ) {
642
            return $translation;
643
        }
644
645
        if ( $seg[ 'mrk_id' ] !== null && $seg[ 'mrk_id' ] != '' ) {
646
            if ( $this->targetLang === 'ja-JP' ) {
647
                $seg[ 'mrk_succ_tags' ] = ltrim( $seg[ 'mrk_succ_tags' ] );
648
            }
649
650
            $translation = "<mrk mid=\"" . $seg[ 'mrk_id' ] . "\" mtype=\"seg\">" . $seg[ 'mrk_prev_tags' ] . $translation . $seg[ 'mrk_succ_tags' ] . "</mrk>";
651
        }
652
653
        $transUnitTranslation .= $seg[ 'prev_tags' ] . $translation . $endTags . $seg[ 'succ_tags' ];
654
655
        return $transUnitTranslation;
656
    }
657
658
    /**
659
     * @param $targetLang
660
     * @param $stateProp
661
     * @param $translation
662
     * @param $rawWordCount
663
     * @param $eqWordCount
664
     *
665
     * @return string
666
     */
667
    private function buildTranslateTag( $targetLang, $stateProp, $translation, $rawWordCount, $eqWordCount ) {
668
        switch ( $this->xliffVersion ) {
669
            case 1:
670
            default:
671
                $tag = "<target $targetLang $stateProp>$translation</target>";
672
673
                // if it's a Trados file don't append count group
674
                if ( get_class( $this ) !== SdlXliffSAXTranslationReplacer::class ) {
675
                    $tag .= $this->getWordCountGroup( $rawWordCount, $eqWordCount );
676
                }
677
678
                return $tag;
679
680
            case 2:
681
                return "<target>$translation</target>";
682
        }
683
    }
684
685
    /**
686
     * @param $raw_word_count
687
     * @param $eq_word_count
688
     *
689
     * @return string
690
     */
691
    private function getWordCountGroup( $raw_word_count, $eq_word_count ) {
692
        return "\n<count-group name=\"$this->currentTransUnitId\"><count count-type=\"x-matecat-raw\">$raw_word_count</count><count count-type=\"x-matecat-weighted\">$eq_word_count</count></count-group>";
693
    }
694
695
    /**
696
     * @return array
697
     */
698
    private function getCurrentSegment() {
699
        if ( $this->currentTransUnitTranslate === 'yes' && isset( $this->transUnits[ $this->currentTransUnitId ] ) ) {
700
            $index = $this->transUnits[ $this->currentTransUnitId ][ $this->segmentPositionInTu ];
701
702
            if ( isset( $this->segments[ $index ] ) ) {
703
                return $this->segments[ $index ];
704
            }
705
        }
706
707
        return [];
708
    }
709
710
    /**
711
     * This function create a <target>
712
     *
713
     * @param $translation
714
     * @param $stateProp
715
     *
716
     * @return string
717
     */
718
    private function createTargetTag( $translation, $stateProp ) {
719
        $targetLang = 'xml:lang="' . $this->targetLang . '"';
720
721
        return $this->buildTranslateTag( $targetLang, $stateProp, $translation, $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] );
722
    }
723
724
    /**
725
     * @param      $raw_word_count
726
     * @param      $eq_word_count
727
     * @param bool $withMetadataTag
728
     *
729
     * @return string
730
     */
731
    private function getWordCountGroupForXliffV2( $raw_word_count, $eq_word_count, $withMetadataTag = true ) {
732
        $this->mdaGroupCounter++;
733
        $id = 'word_count_tu_' . $this->mdaGroupCounter;
734
735
        if ( $withMetadataTag === false ) {
736
            return "    <mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\">
737
                                <mda:meta type=\"x-matecat-raw\">$raw_word_count</mda:meta>
738
                                <mda:meta type=\"x-matecat-weighted\">$eq_word_count</mda:meta>
739
                            </mda:metaGroup>
740
                    ";
741
        }
742
743
        return "<mda:metadata>
744
                <mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\">
745
                    <mda:meta type=\"x-matecat-raw\">$raw_word_count</mda:meta>
746
                    <mda:meta type=\"x-matecat-weighted\">$eq_word_count</mda:meta>
747
                </mda:metaGroup>
748
            </mda:metadata>";
749
    }
750
751
    /**
752
     * @param $seg
753
     * @param $state_prop
754
     * @param $lastMrkState
755
     *
756
     * @return array
757
     */
758
    private function setTransUnitState( $seg, $state_prop, $lastMrkState ) {
759
        switch ( $seg[ 'status' ] ) {
760
761
            case TranslationStatus::STATUS_FIXED:
762
            case TranslationStatus::STATUS_APPROVED:
763
                if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_APPROVED ) {
764
                    $state_prop   = ( $this->xliffVersion === 2 ) ? "state=\"reviewed\"" : "state=\"signed-off\"";
765
                    $lastMrkState = TranslationStatus::STATUS_APPROVED;
766
                }
767
                break;
768
769
            case TranslationStatus::STATUS_TRANSLATED:
770
                if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_TRANSLATED || $lastMrkState == TranslationStatus::STATUS_APPROVED ) {
771
                    $state_prop   = "state=\"translated\"";
772
                    $lastMrkState = TranslationStatus::STATUS_TRANSLATED;
773
                }
774
                break;
775
776
            case TranslationStatus::STATUS_REJECTED:  // if there is a mark REJECTED and there is not a DRAFT, all the trans-unit is REJECTED. In V2 there is no way to mark
777
            case TranslationStatus::STATUS_REBUTTED:
778
                if ( ( $lastMrkState == null ) || ( $lastMrkState != TranslationStatus::STATUS_NEW || $lastMrkState != TranslationStatus::STATUS_DRAFT ) ) {
779
                    $state_prop   = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"needs-review-translation\"";
780
                    $lastMrkState = TranslationStatus::STATUS_REJECTED;
781
                }
782
                break;
783
784
            case TranslationStatus::STATUS_NEW:
785
                if ( ( $lastMrkState == null ) || $lastMrkState != TranslationStatus::STATUS_DRAFT ) {
786
                    $state_prop   = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"new\"";
787
                    $lastMrkState = TranslationStatus::STATUS_NEW;
788
                }
789
                break;
790
791
            case TranslationStatus::STATUS_DRAFT:
792
                $state_prop   = "state=\"needs-translation\"";
793
                $lastMrkState = TranslationStatus::STATUS_DRAFT;
794
                break;
795
            default:
796
                // this is the case when a segment is not showed in cattool, so the row in
797
                // segment_translations does not exists and
798
                // ---> $seg[ 'status' ] is NULL
799
                if ( $lastMrkState == null ) { //this is the first MRK ID
800
                    $state_prop   = "state=\"translated\"";
801
                    $lastMrkState = TranslationStatus::STATUS_TRANSLATED;
802
                } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
803
                    /* Do nothing and preserve the last state */
804
                }
805
                break;
806
        }
807
808
        return [ $state_prop, $lastMrkState ];
809
    }
810
811
    /**
812
     * @inheritDoc
813
     */
814
    protected function characterData( $parser, $data ) {
815
        // don't write <target> data
816
        if ( !$this->inTarget && !$this->bufferIsActive ) {
817
            $this->postProcAndFlush( $this->outputFP, $data );
818
        } elseif ( $this->bufferIsActive ) {
819
            $this->CDATABuffer .= $data;
820
        }
821
    }
822
}
823