1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Matecat\XliffParser\XliffReplacer; |
4
|
|
|
|
5
|
|
|
use Matecat\XliffParser\Constants\TranslationStatus; |
6
|
|
|
use Matecat\XliffParser\Utils\Strings; |
7
|
|
|
use RuntimeException; |
8
|
|
|
|
9
|
|
|
class XliffSAXTranslationReplacer extends AbstractXliffReplacer { |
10
|
|
|
/** |
11
|
|
|
* @var int |
12
|
|
|
*/ |
13
|
|
|
private $mdaGroupCounter = 0; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* @var array |
17
|
|
|
*/ |
18
|
|
|
private $nodesToCopy = [ |
19
|
|
|
'source', |
20
|
|
|
'mda:metadata', |
21
|
|
|
'memsource:additionalTagData', |
22
|
|
|
'originalData', |
23
|
|
|
'seg-source', |
24
|
|
|
'value', |
25
|
|
|
'bpt', |
26
|
|
|
'ept', |
27
|
|
|
'ph', |
28
|
|
|
'st', |
29
|
|
|
'note', |
30
|
|
|
'context', |
31
|
|
|
'context-group' |
32
|
|
|
]; |
33
|
|
|
|
34
|
|
|
public function replaceTranslation() { |
35
|
|
|
fwrite( $this->outputFP, '<?xml version="1.0" encoding="UTF-8"?>' ); |
36
|
|
|
|
37
|
|
|
//create Sax parser |
38
|
|
|
$xmlParser = $this->initSaxParser(); |
39
|
|
|
|
40
|
|
|
while ( $this->currentBuffer = fread( $this->originalFP, 4096 ) ) { |
41
|
|
|
/* |
42
|
|
|
preprocess file |
43
|
|
|
*/ |
44
|
|
|
// obfuscate entities because sax automatically does html_entity_decode |
45
|
|
|
$temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
46
|
|
|
|
47
|
|
|
//avoid cutting entities in half: |
48
|
|
|
//the last fread could have truncated an entity (say, '<' in '&l'), thus invalidating the escaping |
49
|
|
|
//***** and if there is an & that it is not an entity, this is an infinite loop !!!!! |
50
|
|
|
|
51
|
|
|
$escape_AMP = false; |
52
|
|
|
|
53
|
|
|
// 9 is the max length of an entity. So, suppose that the & is at the end of buffer, |
54
|
|
|
// add 9 Bytes and substitute the entities, if the & is present, and it is not at the end |
55
|
|
|
//it can't be an entity, exit the loop |
56
|
|
|
|
57
|
|
|
while ( true ) { |
58
|
|
|
$_ampPos = strpos( $temporary_check_buffer, '&' ); |
59
|
|
|
|
60
|
|
|
//check for real entity or escape it to safely exit from the loop!!! |
61
|
|
|
if ( $_ampPos === false || strlen( substr( $temporary_check_buffer, $_ampPos ) ) > 9 ) { |
62
|
|
|
$escape_AMP = true; |
63
|
|
|
break; |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
//if an entity is still present, fetch some more and repeat the escaping |
67
|
|
|
$this->currentBuffer .= fread( $this->originalFP, 9 ); |
68
|
|
|
$temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
//free stuff outside the loop |
72
|
|
|
unset( $temporary_check_buffer ); |
73
|
|
|
|
74
|
|
|
$this->currentBuffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
75
|
|
|
if ( $escape_AMP ) { |
76
|
|
|
$this->currentBuffer = str_replace( "&", self::$INTERNAL_TAG_PLACEHOLDER . 'amp' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
//get length of chunk |
80
|
|
|
$this->len = strlen( $this->currentBuffer ); |
81
|
|
|
|
82
|
|
|
//parse chunk of text |
83
|
|
|
if ( !xml_parse( $xmlParser, $this->currentBuffer, feof( $this->originalFP ) ) ) { |
84
|
|
|
//if unable, raise an exception |
85
|
|
|
throw new RuntimeException( sprintf( |
86
|
|
|
"XML error: %s at line %d", |
87
|
|
|
xml_error_string( xml_get_error_code( $xmlParser ) ), |
88
|
|
|
xml_get_current_line_number( $xmlParser ) |
89
|
|
|
) ); |
90
|
|
|
} |
91
|
|
|
//get accumulated this->offset in document: as long as SAX pointer advances, we keep track of total bytes it has seen so far; this way, we can translate its global pointer in an address local to the current buffer of text to retrieve last char of tag |
92
|
|
|
$this->offset += $this->len; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
// close Sax parser |
96
|
|
|
$this->closeSaxParser( $xmlParser ); |
97
|
|
|
|
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* @inheritDoc |
102
|
|
|
*/ |
103
|
|
|
protected function tagOpen( $parser, $name, $attr ) { |
104
|
|
|
// check if we are entering into a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
105
|
|
|
if ( $this->tuTagName === $name ) { |
106
|
|
|
$this->inTU = true; |
107
|
|
|
|
108
|
|
|
// get id |
109
|
|
|
// trim to first 100 characters because this is the limit on Matecat's DB |
110
|
|
|
$this->currentTransUnitId = substr( $attr[ 'id' ], 0, 100 ); |
111
|
|
|
|
112
|
|
|
// `translate` attribute can be only yes or no |
113
|
|
|
if ( isset( $attr[ 'translate' ] ) && $attr[ 'translate' ] === 'no' ) { |
114
|
|
|
$attr[ 'translate' ] = 'no'; |
115
|
|
|
} else { |
116
|
|
|
$attr[ 'translate' ] = 'yes'; |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
// current 'translate' attribute of the current trans-unit |
120
|
|
|
$this->currentTransUnitTranslate = $attr[ 'translate' ]; |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
if ( 'source' === $name ) { |
124
|
|
|
$this->sourceAttributes = $attr; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
if ( 'mda:metadata' === $name ) { |
128
|
|
|
$this->unitContainsMda = true; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
// check if we are entering into a <target> |
132
|
|
|
if ( 'target' === $name ) { |
133
|
|
|
|
134
|
|
|
if ( $this->currentTransUnitTranslate === 'no' ) { |
135
|
|
|
$this->inTarget = false; |
136
|
|
|
} else { |
137
|
|
|
$this->inTarget = true; |
138
|
|
|
} |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
// check if we are inside a <target>, obviously this happen only if there are targets inside the trans-unit |
142
|
|
|
// <target> must be stripped to be replaced, so this check avoids <target> reconstruction |
143
|
|
|
if ( !$this->inTarget ) { |
144
|
|
|
|
145
|
|
|
$tag = ''; |
146
|
|
|
|
147
|
|
|
// |
148
|
|
|
// ============================================ |
149
|
|
|
// only for Xliff 2.* |
150
|
|
|
// ============================================ |
151
|
|
|
// |
152
|
|
|
// In xliff v2 we MUST add <mda:metadata> BEFORE <notes>/<originalData>/<segment>/<ignorable> |
153
|
|
|
// |
154
|
|
|
// As documentation says, <unit> contains: |
155
|
|
|
// |
156
|
|
|
// - elements from other namespaces, OPTIONAL |
157
|
|
|
// - Zero or one <notes> elements followed by |
158
|
|
|
// - Zero or one <originalData> element followed by |
159
|
|
|
// - One or more <segment> or <ignorable> elements in any order. |
160
|
|
|
// |
161
|
|
|
// For more info please refer to: |
162
|
|
|
// |
163
|
|
|
// http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html#unit |
164
|
|
|
// |
165
|
|
|
if ( $this->xliffVersion === 2 && ( $name === 'notes' || $name === 'originalData' || $name === 'segment' || $name === 'ignorable' ) && $this->unitContainsMda === false ) { |
166
|
|
|
if ( isset( $this->transUnits[ $this->currentTransUnitId ] ) && !empty( $this->transUnits[ $this->currentTransUnitId ] ) && !$this->hasWrittenCounts ) { |
167
|
|
|
|
168
|
|
|
// we need to update counts here |
169
|
|
|
$this->updateCounts(); |
170
|
|
|
$this->hasWrittenCounts = true; |
171
|
|
|
|
172
|
|
|
$tag .= $this->getWordCountGroupForXliffV2(); |
173
|
|
|
$this->unitContainsMda = true; |
174
|
|
|
} |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
// construct tag |
178
|
|
|
$tag .= "<$name "; |
179
|
|
|
|
180
|
|
|
$lastMrkState = null; |
181
|
|
|
$stateProp = ''; |
182
|
|
|
|
183
|
|
|
foreach ( $attr as $k => $v ) { |
184
|
|
|
|
185
|
|
|
//if tag name is file, we must replace the target-language attribute |
186
|
|
|
if ( $name === 'file' && $k === 'target-language' && !empty( $this->targetLang ) ) { |
187
|
|
|
//replace Target language with job language provided from constructor |
188
|
|
|
$tag .= "$k=\"$this->targetLang\" "; |
189
|
|
|
} else { |
190
|
|
|
$pos = 0; |
191
|
|
|
if ( $this->currentTransUnitId and isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
|
|
|
|
192
|
|
|
$pos = current( $this->transUnits[ $this->currentTransUnitId ] ); |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
if ( $name === $this->tuTagName and isset( $this->segments[ $pos ] ) and isset( $this->segments[ $pos ][ 'sid' ] ) ) { |
|
|
|
|
196
|
|
|
|
197
|
|
|
$sid = $this->segments[ $pos ][ 'sid' ]; |
198
|
|
|
|
199
|
|
|
// add `help-id` to xliff v.1* |
200
|
|
|
// add `mtc:segment-id` to xliff v.2* |
201
|
|
|
if ( $this->xliffVersion === 1 && strpos( $tag, 'help-id' ) === false ) { |
202
|
|
|
if ( !empty( $sid ) ) { |
203
|
|
|
$tag .= "help-id=\"$sid\" "; |
204
|
|
|
} |
205
|
|
|
} elseif ( $this->xliffVersion === 2 && strpos( $tag, 'mtc:segment-id' ) === false ) { |
206
|
|
|
if ( !empty( $sid ) ) { |
207
|
|
|
$tag .= "mtc:segment-id=\"$sid\" "; |
208
|
|
|
} |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
} elseif ( 'segment' === $name && $this->xliffVersion === 2 ) { // add state to segment in Xliff v2 |
212
|
|
|
[ $stateProp, $lastMrkState ] = $this->setTransUnitState( $this->segments[ $pos ], $stateProp, $lastMrkState ); |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
//normal tag flux, put attributes in it but skip for translation state and set the right value for the attribute |
216
|
|
|
if ( $k != 'state' ) { |
217
|
|
|
$tag .= "$k=\"$v\" "; |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
// replace state for xliff v2 |
225
|
|
|
if ( $stateProp ) { |
226
|
|
|
$tag .= $stateProp; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
// add oasis xliff 20 namespace |
230
|
|
|
if ( $this->xliffVersion === 2 && $name === 'xliff' && !array_key_exists( 'xmlns:mda', $attr ) ) { |
231
|
|
|
$tag .= 'xmlns:mda="urn:oasis:names:tc:xliff:metadata:2.0"'; |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
// add MateCat specific namespace, we want maybe add non-XLIFF attributes |
235
|
|
|
if ( $name === 'xliff' && !array_key_exists( 'xmlns:mtc', $attr ) ) { |
236
|
|
|
$tag .= ' xmlns:mtc="https://www.matecat.com" '; |
237
|
|
|
} |
238
|
|
|
|
239
|
|
|
// trgLang |
240
|
|
|
if ( $name === 'xliff' ) { |
241
|
|
|
$tag = preg_replace( '/trgLang="(.*?)"/', 'trgLang="' . $this->targetLang . '"', $tag ); |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
//this logic helps detecting empty tags |
245
|
|
|
//get current position of SAX pointer in all the stream of data is has read so far: |
246
|
|
|
//it points at the end of current tag |
247
|
|
|
$idx = xml_get_current_byte_index( $parser ); |
248
|
|
|
|
249
|
|
|
//check whether the bounds of current tag are entirely in current buffer or the end of the current tag |
250
|
|
|
//is outside current buffer (in the latter case, it's in next buffer to be read by the while loop); |
251
|
|
|
//this check is necessary because we may have truncated a tag in half with current read, |
252
|
|
|
//and the other half may be encountered in the next buffer it will be passed |
253
|
|
|
if ( isset( $this->currentBuffer[ $idx - $this->offset ] ) ) { |
254
|
|
|
//if this tag entire lenght fitted in the buffer, the last char must be the last |
255
|
|
|
//symbol before the '>'; if it's an empty tag, it is assumed that it's a '/' |
256
|
|
|
$lastChar = $this->currentBuffer[ $idx - $this->offset ]; |
257
|
|
|
} else { |
258
|
|
|
//if it's out, simple use the last character of the chunk |
259
|
|
|
$lastChar = $this->currentBuffer[ $this->len - 1 ]; |
260
|
|
|
} |
261
|
|
|
|
262
|
|
|
//trim last space |
263
|
|
|
$tag = rtrim( $tag ); |
264
|
|
|
|
265
|
|
|
//detect empty tag |
266
|
|
|
$this->isEmpty = ( $lastChar == '/' || $name == 'x' ); |
267
|
|
|
if ( $this->isEmpty ) { |
268
|
|
|
$tag .= '/'; |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
//add tag ending |
272
|
|
|
$tag .= ">"; |
273
|
|
|
|
274
|
|
|
//set a a Buffer for the segSource Source tag |
275
|
|
|
if ( $this->bufferIsActive || in_array( $name, $this->nodesToCopy ) ) { // we are opening a critical CDATA section |
276
|
|
|
|
277
|
|
|
//WARNING BECAUSE SOURCE AND SEG-SOURCE TAGS CAN BE EMPTY IN SOME CASES!!!!! |
278
|
|
|
//so check for isEmpty also in conjunction with name |
279
|
|
|
if ( $this->isEmpty && ( 'source' === $name || 'seg-source' === $name ) ) { |
280
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
281
|
|
|
} else { |
282
|
|
|
//these are NOT source/seg-source/value empty tags, THERE IS A CONTENT, write it in buffer |
283
|
|
|
$this->bufferIsActive = true; |
284
|
|
|
$this->CDATABuffer .= $tag; |
285
|
|
|
} |
286
|
|
|
} else { |
287
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
288
|
|
|
} |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
// update segmentPositionInTu |
292
|
|
|
|
293
|
|
|
if ( $this->xliffVersion === 1 && $this->inTU && $name === 'source' ) { |
294
|
|
|
$this->segmentPositionInTu++; |
295
|
|
|
} |
296
|
|
|
|
297
|
|
|
if ( $this->xliffVersion === 2 && $this->inTU && $name === 'segment' ) { |
298
|
|
|
$this->segmentPositionInTu++; |
299
|
|
|
} |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
/** |
303
|
|
|
* @inheritDoc |
304
|
|
|
*/ |
305
|
|
|
protected function tagClose( $parser, $name ) { |
306
|
|
|
$tag = ''; |
307
|
|
|
|
308
|
|
|
/** |
309
|
|
|
* if is a tag within <target> or |
310
|
|
|
* if it is an empty tag, do not add closing tag because we have already closed it in |
311
|
|
|
* |
312
|
|
|
* self::tagOpen method |
313
|
|
|
*/ |
314
|
|
|
if ( !$this->isEmpty && !( $this->inTarget && $name !== 'target' ) ) { |
315
|
|
|
|
316
|
|
|
if ( !$this->inTarget ) { |
317
|
|
|
$tag = "</$name>"; |
318
|
|
|
} |
319
|
|
|
|
320
|
|
|
if ( 'target' == $name ) { |
321
|
|
|
|
322
|
|
|
if ( $this->currentTransUnitTranslate === 'no' ) { |
|
|
|
|
323
|
|
|
// do nothing |
324
|
|
|
} elseif ( isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
325
|
|
|
|
326
|
|
|
// get translation of current segment, by indirect indexing: id -> positional index -> segment |
327
|
|
|
// actually there may be more that one segment to that ID if there are two mrk of the same source segment |
328
|
|
|
|
329
|
|
|
$listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
330
|
|
|
|
331
|
|
|
// $currentSegmentId |
332
|
|
|
if ( !empty( $listOfSegmentsIds ) ) { |
333
|
|
|
$this->setCurrentSegmentArray( $listOfSegmentsIds ); |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/* |
337
|
|
|
* At the end of every cycle the segment grouping information is lost: unset( 'matecat|' . $this->currentId ) |
338
|
|
|
* |
339
|
|
|
* We need to take the info about the last segment parsed |
340
|
|
|
* ( normally more than 1 db row because of mrk tags ) |
341
|
|
|
* |
342
|
|
|
* So, copy the current segment data group to an another structure to take the last one segment |
343
|
|
|
* for the next tagOpen ( possible sdl:seg-defs ) |
344
|
|
|
* |
345
|
|
|
*/ |
346
|
|
|
|
347
|
|
|
$this->lastTransUnit = []; |
348
|
|
|
|
349
|
|
|
$last_value = null; |
350
|
|
|
$segmentsCount = count( $listOfSegmentsIds ); |
351
|
|
|
for ( $i = 0; $i < $segmentsCount; $i++ ) { |
352
|
|
|
$id = $listOfSegmentsIds[ $i ]; |
353
|
|
|
if ( isset( $this->segments[ $id ] ) && ( $i == 0 || $last_value + 1 == $listOfSegmentsIds[ $i ] ) ) { |
354
|
|
|
$last_value = $listOfSegmentsIds[ $i ]; |
355
|
|
|
$this->lastTransUnit[] = $this->segments[ $id ]; |
356
|
|
|
} |
357
|
|
|
} |
358
|
|
|
|
359
|
|
|
// init translation and state |
360
|
|
|
$translation = ''; |
361
|
|
|
$lastMrkState = null; |
362
|
|
|
$stateProp = ''; |
363
|
|
|
|
364
|
|
|
// we must reset the lastMrkId found because this is a new segment. |
365
|
|
|
$lastMrkId = -1; |
366
|
|
|
|
367
|
|
|
if ( $this->xliffVersion === 2 ) { |
368
|
|
|
$seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ]; |
369
|
|
|
|
370
|
|
|
// update counts |
371
|
|
|
if ( !$this->hasWrittenCounts && !empty( $seg ) ) { |
372
|
|
|
$this->updateSegmentCounts( $seg ); |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
// delete translations so the prepareSegment |
376
|
|
|
// will put source content in target tag |
377
|
|
|
if ( $this->sourceInTarget ) { |
378
|
|
|
$seg[ 'translation' ] = ''; |
379
|
|
|
$this->resetCounts(); |
380
|
|
|
} |
381
|
|
|
|
382
|
|
|
// append $translation |
383
|
|
|
$translation = $this->prepareTranslation( $seg, $translation ); |
384
|
|
|
|
385
|
|
|
[ $stateProp, ] = $this->setTransUnitState( $seg, $stateProp, null ); |
386
|
|
|
} else { |
387
|
|
|
foreach ( $listOfSegmentsIds as $pos => $id ) { |
388
|
|
|
|
389
|
|
|
/* |
390
|
|
|
* This routine works to respect the positional orders of markers. |
391
|
|
|
* In every cycle we check if the mrk of the segment is below or equal the last one. |
392
|
|
|
* When this is true, means that the mrk id belongs to the next segment with the same internal_id |
393
|
|
|
* so we MUST stop to apply markers and translations |
394
|
|
|
* and stop to add eq_word_count |
395
|
|
|
* |
396
|
|
|
* Begin: |
397
|
|
|
* pre-assign zero to the new mrk if this is the first one ( in this segment ) |
398
|
|
|
* If it is null leave it NULL |
399
|
|
|
*/ |
400
|
|
|
if ( (int)$this->segments[ $id ][ "mrk_id" ] < 0 && $this->segments[ $id ][ "mrk_id" ] !== null ) { |
401
|
|
|
$this->segments[ $id ][ "mrk_id" ] = 0; |
402
|
|
|
} |
403
|
|
|
|
404
|
|
|
/* |
405
|
|
|
* WARNING: |
406
|
|
|
* For those seg-source that doesn't have a mrk ( having a mrk id === null ) |
407
|
|
|
* ( null <= -1 ) === true |
408
|
|
|
* so, cast to int |
409
|
|
|
*/ |
410
|
|
|
if ( (int)$this->segments[ $id ][ "mrk_id" ] <= $lastMrkId ) { |
411
|
|
|
break; |
412
|
|
|
} |
413
|
|
|
|
414
|
|
|
// set $this->currentSegment |
|
|
|
|
415
|
|
|
$seg = $this->segments[ $id ]; |
416
|
|
|
|
417
|
|
|
// update counts |
418
|
|
|
if ( !empty( $seg ) ) { |
419
|
|
|
$this->updateSegmentCounts( $seg ); |
420
|
|
|
} |
421
|
|
|
|
422
|
|
|
// delete translations so the prepareSegment |
423
|
|
|
// will put source content in target tag |
424
|
|
|
if ( $this->sourceInTarget ) { |
425
|
|
|
$seg[ 'translation' ] = ''; |
426
|
|
|
$this->resetCounts(); |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
// append $translation |
430
|
|
|
$translation = $this->prepareTranslation( $seg, $translation ); |
431
|
|
|
|
432
|
|
|
// for xliff 2 we need $this->transUnits[ $this->currentId ] [ $pos ] for populating metadata |
|
|
|
|
433
|
|
|
|
434
|
|
|
unset( $this->transUnits[ $this->currentTransUnitId ] [ $pos ] ); |
435
|
|
|
|
436
|
|
|
$lastMrkId = $this->segments[ $id ][ "mrk_id" ]; |
437
|
|
|
|
438
|
|
|
[ $stateProp, $lastMrkState ] = $this->setTransUnitState( $seg, $stateProp, $lastMrkState ); |
439
|
|
|
} |
440
|
|
|
} |
441
|
|
|
|
442
|
|
|
//append translation |
443
|
|
|
$tag = $this->createTargetTag( $translation, $stateProp ); |
444
|
|
|
|
445
|
|
|
} |
446
|
|
|
|
447
|
|
|
// signal we are leaving a target |
448
|
|
|
$this->targetWasWritten = true; |
449
|
|
|
$this->inTarget = false; |
450
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag, $treatAsCDATA = true ); |
451
|
|
|
} elseif ( in_array( $name, $this->nodesToCopy ) ) { // we are closing a critical CDATA section |
452
|
|
|
|
453
|
|
|
$this->bufferIsActive = false; |
454
|
|
|
|
455
|
|
|
// only for Xliff 2.* |
456
|
|
|
// write here <mda:metaGroup> and <mda:meta> if already present in the <unit> |
457
|
|
|
if ( 'mda:metadata' === $name && $this->unitContainsMda && $this->xliffVersion === 2 && !$this->hasWrittenCounts ) { |
458
|
|
|
|
459
|
|
|
// we need to update counts here |
460
|
|
|
$this->updateCounts(); |
461
|
|
|
$this->hasWrittenCounts = true; |
462
|
|
|
|
463
|
|
|
$tag = $this->CDATABuffer; |
464
|
|
|
$tag .= $this->getWordCountGroupForXliffV2( false ); |
465
|
|
|
$tag .= " </mda:metadata>"; |
466
|
|
|
|
467
|
|
|
} else { |
468
|
|
|
$tag = $this->CDATABuffer . "</$name>"; |
469
|
|
|
} |
470
|
|
|
|
471
|
|
|
$this->CDATABuffer = ""; |
472
|
|
|
|
473
|
|
|
//flush to pointer |
474
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
475
|
|
|
} elseif ( 'segment' === $name ) { |
476
|
|
|
|
477
|
|
|
// only for Xliff 2.* |
478
|
|
|
// if segment has no <target> add it BEFORE </segment> |
479
|
|
|
if ( $this->xliffVersion === 2 && !$this->targetWasWritten ) { |
480
|
|
|
|
481
|
|
|
$seg = $this->getCurrentSegment(); |
482
|
|
|
|
483
|
|
|
if ( isset( $seg[ 'translation' ] ) ) { |
484
|
|
|
|
485
|
|
|
$translation = $this->prepareTranslation( $seg ); |
486
|
|
|
[ $stateProp, ] = $this->setTransUnitState( $seg, '', null ); |
487
|
|
|
|
488
|
|
|
// replace the tag |
489
|
|
|
$tag = $this->createTargetTag( $translation, $stateProp ); |
490
|
|
|
|
491
|
|
|
$tag .= '</segment>'; |
492
|
|
|
|
493
|
|
|
} |
494
|
|
|
|
495
|
|
|
} |
496
|
|
|
|
497
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
498
|
|
|
|
499
|
|
|
// we are leaving <segment>, reset $segmentHasTarget |
500
|
|
|
$this->targetWasWritten = false; |
501
|
|
|
|
502
|
|
|
} elseif ( $name === 'trans-unit' ) { |
503
|
|
|
|
504
|
|
|
// only for Xliff 1.* |
505
|
|
|
// handling </trans-unit> closure |
506
|
|
|
if ( !$this->targetWasWritten ) { |
507
|
|
|
|
508
|
|
|
$seg = $this->getCurrentSegment(); |
509
|
|
|
|
510
|
|
|
if ( isset( $seg[ 'translation' ] ) ) { |
511
|
|
|
$translation = $this->prepareTranslation( $seg ); |
512
|
|
|
[ $stateProp, ] = $this->setTransUnitState( $seg, '', null ); |
513
|
|
|
|
514
|
|
|
// replace the tag |
515
|
|
|
$tag = $this->createTargetTag( $translation, $stateProp ); |
516
|
|
|
$tag .= '</trans-unit>'; |
517
|
|
|
|
518
|
|
|
} |
519
|
|
|
|
520
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
521
|
|
|
|
522
|
|
|
} else { |
523
|
|
|
$this->postProcAndFlush( $this->outputFP, '</trans-unit>' ); |
524
|
|
|
$this->targetWasWritten = false; |
525
|
|
|
} |
526
|
|
|
|
527
|
|
|
|
528
|
|
|
} elseif ( $this->bufferIsActive ) { // this is a tag ( <g | <mrk ) inside a seg or seg-source tag |
529
|
|
|
$this->CDATABuffer .= "</$name>"; |
530
|
|
|
// Do NOT Flush |
531
|
|
|
} else { //generic tag closure do Nothing |
532
|
|
|
// flush to pointer |
533
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
534
|
|
|
} |
535
|
|
|
} elseif ( $this->CDATABuffer === '<note/>' && $this->bufferIsActive === true ) { |
536
|
|
|
$this->postProcAndFlush( $this->outputFP, '<note/>' ); |
537
|
|
|
$this->bufferIsActive = false; |
538
|
|
|
$this->CDATABuffer = ''; |
539
|
|
|
$this->isEmpty = false; |
540
|
|
|
} else { |
541
|
|
|
//ok, nothing to be done; reset flag for next coming tag |
542
|
|
|
$this->isEmpty = false; |
543
|
|
|
} |
544
|
|
|
|
545
|
|
|
// check if we are leaving a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
546
|
|
|
if ( $this->tuTagName === $name ) { |
547
|
|
|
$this->currentTransUnitTranslate = null; |
548
|
|
|
$this->inTU = false; |
549
|
|
|
$this->segmentPositionInTu = -1; |
550
|
|
|
$this->unitContainsMda = false; |
551
|
|
|
$this->hasWrittenCounts = false; |
552
|
|
|
$this->sourceAttributes = []; |
553
|
|
|
|
554
|
|
|
$this->resetCounts(); |
555
|
|
|
} |
556
|
|
|
} |
557
|
|
|
|
558
|
|
|
/** |
559
|
|
|
* Set the current segment array (with segment id and trans-unit id) |
560
|
|
|
* |
561
|
|
|
* @param array $listOfSegmentsIds |
562
|
|
|
*/ |
563
|
|
|
private function setCurrentSegmentArray( array $listOfSegmentsIds = [] ) { |
564
|
|
|
// $currentSegmentId |
565
|
|
|
if ( empty( $this->currentSegmentArray ) ) { |
566
|
|
|
$this->currentSegmentArray = [ |
567
|
|
|
'sid' => $listOfSegmentsIds[ 0 ], |
568
|
|
|
'tid' => $this->currentTransUnitId, |
569
|
|
|
]; |
570
|
|
|
} else { |
571
|
|
|
if ( $this->currentSegmentArray[ 'tid' ] === $this->currentTransUnitId ) { |
572
|
|
|
$key = array_search( $this->currentSegmentArray[ 'sid' ], $listOfSegmentsIds ); |
573
|
|
|
$this->currentSegmentArray[ 'sid' ] = $listOfSegmentsIds[ $key + 1 ]; |
574
|
|
|
$this->currentSegmentArray[ 'tid' ] = $this->currentTransUnitId; |
575
|
|
|
} else { |
576
|
|
|
$this->currentSegmentArray = [ |
577
|
|
|
'sid' => $listOfSegmentsIds[ 0 ], |
578
|
|
|
'tid' => $this->currentTransUnitId, |
579
|
|
|
]; |
580
|
|
|
} |
581
|
|
|
} |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
/** |
585
|
|
|
* Update counts |
586
|
|
|
*/ |
587
|
|
|
private function updateCounts() { |
588
|
|
|
// populate counts |
589
|
|
|
$listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
590
|
|
|
|
591
|
|
|
// $currentSegmentId |
592
|
|
|
if ( !empty( $listOfSegmentsIds ) ) { |
593
|
|
|
$this->setCurrentSegmentArray( $listOfSegmentsIds ); |
594
|
|
|
} |
595
|
|
|
|
596
|
|
|
if ( $this->xliffVersion === 2 ) { |
597
|
|
|
$seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ]; |
598
|
|
|
if ( !empty( $seg ) ) { |
599
|
|
|
$this->updateSegmentCounts( $seg ); |
600
|
|
|
} |
601
|
|
|
} else { |
602
|
|
|
foreach ( $listOfSegmentsIds as $pos => $id ) { |
603
|
|
|
$seg = $this->segments[ $id ]; |
604
|
|
|
if ( !empty( $seg ) ) { |
605
|
|
|
$this->updateSegmentCounts( $seg ); |
606
|
|
|
} |
607
|
|
|
} |
608
|
|
|
} |
609
|
|
|
|
610
|
|
|
$this->currentSegmentArray = []; |
611
|
|
|
} |
612
|
|
|
|
613
|
|
|
/** |
614
|
|
|
* @param array $seg |
615
|
|
|
*/ |
616
|
|
|
private function updateSegmentCounts( array $seg = [] ) { |
617
|
|
|
|
618
|
|
|
$raw_word_count = $seg[ 'raw_word_count' ]; |
619
|
|
|
$eq_word_count = ( floor( $seg[ 'eq_word_count' ] * 100 ) / 100 ); |
620
|
|
|
|
621
|
|
|
|
622
|
|
|
$listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
|
|
|
|
623
|
|
|
|
624
|
|
|
$this->counts[ 'segments_count_array' ][ $seg[ 'sid' ] ] = [ |
625
|
|
|
'raw_word_count' => $raw_word_count, |
626
|
|
|
'eq_word_count' => $eq_word_count, |
627
|
|
|
]; |
628
|
|
|
|
629
|
|
|
$this->counts[ 'raw_word_count' ] += $raw_word_count; |
630
|
|
|
$this->counts[ 'eq_word_count' ] += $eq_word_count; |
631
|
|
|
} |
632
|
|
|
|
633
|
|
|
private function resetCounts() { |
634
|
|
|
$this->counts[ 'segments_count_array' ] = []; |
635
|
|
|
$this->counts[ 'raw_word_count' ] = 0; |
636
|
|
|
$this->counts[ 'eq_word_count' ] = 0; |
637
|
|
|
} |
638
|
|
|
|
639
|
|
|
/** |
640
|
|
|
* prepare segment tagging for xliff insertion |
641
|
|
|
* |
642
|
|
|
* @param array $seg |
643
|
|
|
* @param string $transUnitTranslation |
644
|
|
|
* |
645
|
|
|
* @return string |
646
|
|
|
*/ |
647
|
|
|
protected function prepareTranslation( $seg, $transUnitTranslation = "" ) { |
648
|
|
|
$endTags = ""; |
649
|
|
|
|
650
|
|
|
$segment = Strings::removeDangerousChars( $seg [ 'segment' ] ); |
651
|
|
|
$translation = Strings::removeDangerousChars( $seg [ 'translation' ] ); |
652
|
|
|
$dataRefMap = ( isset( $seg[ 'data_ref_map' ] ) ) ? Strings::jsonToArray( $seg[ 'data_ref_map' ] ) : []; |
653
|
|
|
|
654
|
|
|
if ( $seg [ 'translation' ] == '' ) { |
655
|
|
|
$translation = $segment; |
656
|
|
|
} else { |
657
|
|
|
if ( $this->callback instanceof XliffReplacerCallbackInterface ) { |
658
|
|
|
$error = ( !empty( $seg[ 'error' ] ) ) ? $seg[ 'error' ] : null; |
659
|
|
|
if ( $this->callback->thereAreErrors( $seg[ 'sid' ], $segment, $translation, $dataRefMap, $error ) ) { |
660
|
|
|
$translation = '|||UNTRANSLATED_CONTENT_START|||' . $segment . '|||UNTRANSLATED_CONTENT_END|||'; |
661
|
|
|
} |
662
|
|
|
} |
663
|
|
|
} |
664
|
|
|
|
665
|
|
|
// for xliff v2 we ignore the marks on purpose |
666
|
|
|
if ( $this->xliffVersion === 2 ) { |
667
|
|
|
return $translation; |
668
|
|
|
} |
669
|
|
|
|
670
|
|
|
if ( $seg[ 'mrk_id' ] !== null && $seg[ 'mrk_id' ] != '' ) { |
671
|
|
|
if ( $this->targetLang === 'ja-JP' ) { |
672
|
|
|
$seg[ 'mrk_succ_tags' ] = ltrim( $seg[ 'mrk_succ_tags' ] ); |
673
|
|
|
} |
674
|
|
|
|
675
|
|
|
$translation = "<mrk mid=\"" . $seg[ 'mrk_id' ] . "\" mtype=\"seg\">" . $seg[ 'mrk_prev_tags' ] . $translation . $seg[ 'mrk_succ_tags' ] . "</mrk>"; |
676
|
|
|
} |
677
|
|
|
|
678
|
|
|
$transUnitTranslation .= $seg[ 'prev_tags' ] . $translation . $endTags . $seg[ 'succ_tags' ]; |
679
|
|
|
|
680
|
|
|
return $transUnitTranslation; |
681
|
|
|
} |
682
|
|
|
|
683
|
|
|
|
684
|
|
|
/** |
685
|
|
|
* @param $raw_word_count |
686
|
|
|
* @param $eq_word_count |
687
|
|
|
* |
688
|
|
|
* @return string |
689
|
|
|
*/ |
690
|
|
|
private function getWordCountGroup( $raw_word_count, $eq_word_count ) { |
691
|
|
|
return "\n<count-group name=\"$this->currentTransUnitId\"><count count-type=\"x-matecat-raw\">$raw_word_count</count><count count-type=\"x-matecat-weighted\">$eq_word_count</count></count-group>"; |
692
|
|
|
} |
693
|
|
|
|
694
|
|
|
/** |
695
|
|
|
* @return array |
696
|
|
|
*/ |
697
|
|
|
private function getCurrentSegment() { |
698
|
|
|
if ( $this->currentTransUnitTranslate === 'yes' && isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
699
|
|
|
$index = $this->transUnits[ $this->currentTransUnitId ][ $this->segmentPositionInTu ]; |
700
|
|
|
|
701
|
|
|
if ( isset( $this->segments[ $index ] ) ) { |
702
|
|
|
return $this->segments[ $index ]; |
703
|
|
|
} |
704
|
|
|
} |
705
|
|
|
|
706
|
|
|
return []; |
707
|
|
|
} |
708
|
|
|
|
709
|
|
|
/** |
710
|
|
|
* This function creates a <target> |
711
|
|
|
* |
712
|
|
|
* @param $translation |
713
|
|
|
* @param $stateProp |
714
|
|
|
* |
715
|
|
|
* @return string |
716
|
|
|
*/ |
717
|
|
|
private function createTargetTag( $translation, $stateProp ) { |
718
|
|
|
|
719
|
|
|
$targetLang = ''; |
720
|
|
|
if ( $this->xliffVersion === 1 ) { |
721
|
|
|
$targetLang = ' xml:lang="' . $this->targetLang . '"'; |
722
|
|
|
} |
723
|
|
|
|
724
|
|
|
switch ( $this->xliffVersion ) { |
725
|
|
|
case 1: |
726
|
|
|
default: |
727
|
|
|
$tag = "<target $targetLang $stateProp>$translation</target>"; |
728
|
|
|
|
729
|
|
|
// if it's a Trados file don't append count group |
730
|
|
|
if ( get_class( $this ) !== SdlXliffSAXTranslationReplacer::class ) { |
731
|
|
|
$tag .= $this->getWordCountGroup( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
732
|
|
|
} |
733
|
|
|
|
734
|
|
|
return $tag; |
735
|
|
|
|
736
|
|
|
case 2: |
737
|
|
|
return "<target>$translation</target>"; |
738
|
|
|
} |
739
|
|
|
|
740
|
|
|
} |
741
|
|
|
|
742
|
|
|
/** |
743
|
|
|
* @param bool $withMetadataTag |
744
|
|
|
* |
745
|
|
|
* @return string |
746
|
|
|
*/ |
747
|
|
|
private function getWordCountGroupForXliffV2( $withMetadataTag = true ) { |
748
|
|
|
|
749
|
|
|
$this->mdaGroupCounter++; |
750
|
|
|
$segments_count_array = $this->counts[ 'segments_count_array' ]; |
751
|
|
|
|
752
|
|
|
$id = $this->currentSegmentArray; |
|
|
|
|
753
|
|
|
|
754
|
|
|
|
755
|
|
|
$return = ''; |
756
|
|
|
|
757
|
|
|
if ( $withMetadataTag === true ) { |
758
|
|
|
$return .= '<mda:metadata>'; |
759
|
|
|
} |
760
|
|
|
|
761
|
|
|
$index = 0; |
762
|
|
|
foreach ( $segments_count_array as $segments_count_item ) { |
763
|
|
|
|
764
|
|
|
$id = 'word_count_tu[' . $this->currentTransUnitId . '][' . $index . ']'; |
765
|
|
|
$index++; |
766
|
|
|
|
767
|
|
|
$return .= " <mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\"> |
768
|
|
|
<mda:meta type=\"x-matecat-raw\">" . $segments_count_item[ 'raw_word_count' ] . "</mda:meta> |
769
|
|
|
<mda:meta type=\"x-matecat-weighted\">" . $segments_count_item[ 'eq_word_count' ] . "</mda:meta> |
770
|
|
|
</mda:metaGroup>"; |
771
|
|
|
} |
772
|
|
|
|
773
|
|
|
if ( $withMetadataTag === true ) { |
774
|
|
|
$return .= '</mda:metadata>'; |
775
|
|
|
} |
776
|
|
|
|
777
|
|
|
return $return; |
778
|
|
|
|
779
|
|
|
} |
780
|
|
|
|
781
|
|
|
/** |
782
|
|
|
* @param $seg |
783
|
|
|
* @param $state_prop |
784
|
|
|
* @param $lastMrkState |
785
|
|
|
* |
786
|
|
|
* @return array |
787
|
|
|
*/ |
788
|
|
|
private function setTransUnitState( $seg, $state_prop, $lastMrkState ) { |
789
|
|
|
switch ( $seg[ 'status' ] ) { |
790
|
|
|
|
791
|
|
|
case TranslationStatus::STATUS_FIXED: |
792
|
|
|
case TranslationStatus::STATUS_APPROVED2: |
793
|
|
|
if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_APPROVED2 ) { |
794
|
|
|
$state_prop = "state=\"final\""; |
795
|
|
|
$lastMrkState = TranslationStatus::STATUS_APPROVED2; |
796
|
|
|
} |
797
|
|
|
break; |
798
|
|
|
case TranslationStatus::STATUS_APPROVED: |
799
|
|
|
if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_APPROVED ) { |
800
|
|
|
$state_prop = ( $this->xliffVersion === 2 ) ? "state=\"reviewed\"" : "state=\"signed-off\""; |
801
|
|
|
$lastMrkState = TranslationStatus::STATUS_APPROVED; |
802
|
|
|
} |
803
|
|
|
break; |
804
|
|
|
|
805
|
|
|
case TranslationStatus::STATUS_TRANSLATED: |
806
|
|
|
if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_TRANSLATED || $lastMrkState == TranslationStatus::STATUS_APPROVED ) { |
807
|
|
|
$state_prop = "state=\"translated\""; |
808
|
|
|
$lastMrkState = TranslationStatus::STATUS_TRANSLATED; |
809
|
|
|
} |
810
|
|
|
break; |
811
|
|
|
|
812
|
|
|
case TranslationStatus::STATUS_REJECTED: // if there is a mark REJECTED and there is not a DRAFT, all the trans-unit is REJECTED. In V2 there is no way to mark |
813
|
|
|
case TranslationStatus::STATUS_REBUTTED: |
814
|
|
|
if ( ( $lastMrkState == null ) || ( $lastMrkState != TranslationStatus::STATUS_NEW || $lastMrkState != TranslationStatus::STATUS_DRAFT ) ) { |
815
|
|
|
$state_prop = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"needs-review-translation\""; |
816
|
|
|
$lastMrkState = TranslationStatus::STATUS_REJECTED; |
817
|
|
|
} |
818
|
|
|
break; |
819
|
|
|
|
820
|
|
|
case TranslationStatus::STATUS_NEW: |
821
|
|
|
if ( ( $lastMrkState == null ) || $lastMrkState != TranslationStatus::STATUS_NEW ) { |
822
|
|
|
$state_prop = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"new\""; |
823
|
|
|
$lastMrkState = TranslationStatus::STATUS_NEW; |
824
|
|
|
} |
825
|
|
|
break; |
826
|
|
|
|
827
|
|
|
case TranslationStatus::STATUS_DRAFT: |
828
|
|
|
if ( ( $lastMrkState == null ) || $lastMrkState != TranslationStatus::STATUS_DRAFT ) { |
829
|
|
|
$state_prop = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"new\""; |
830
|
|
|
$lastMrkState = TranslationStatus::STATUS_DRAFT; |
831
|
|
|
} |
832
|
|
|
break; |
833
|
|
|
|
834
|
|
|
default: |
835
|
|
|
// this is the case when a segment is not showed in cattool, so the row in |
836
|
|
|
// segment_translations does not exists and |
837
|
|
|
// ---> $seg[ 'status' ] is NULL |
838
|
|
|
if ( $lastMrkState == null ) { //this is the first MRK ID |
839
|
|
|
$state_prop = "state=\"translated\""; |
840
|
|
|
$lastMrkState = TranslationStatus::STATUS_TRANSLATED; |
841
|
|
|
} else { |
|
|
|
|
842
|
|
|
/* Do nothing and preserve the last state */ |
843
|
|
|
} |
844
|
|
|
break; |
845
|
|
|
} |
846
|
|
|
|
847
|
|
|
return [ $state_prop, $lastMrkState ]; |
848
|
|
|
} |
849
|
|
|
|
850
|
|
|
/** |
851
|
|
|
* @inheritDoc |
852
|
|
|
*/ |
853
|
|
|
protected function characterData( $parser, $data ) { |
854
|
|
|
// don't write <target> data |
855
|
|
|
if ( !$this->inTarget && !$this->bufferIsActive ) { |
856
|
|
|
$this->postProcAndFlush( $this->outputFP, $data ); |
857
|
|
|
} elseif ( $this->bufferIsActive ) { |
858
|
|
|
$this->CDATABuffer .= $data; |
859
|
|
|
} |
860
|
|
|
} |
861
|
|
|
} |
862
|
|
|
|
PHP has two types of connecting operators (logical operators, and boolean operators):
and
&&
or
||
The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like
&&
, or||
.Let’s take a look at a few examples:
Logical Operators are used for Control-Flow
One case where you explicitly want to use logical operators is for control-flow such as this:
Since
die
introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined withthrow
at this point:These limitations lead to logical operators rarely being of use in current PHP code.