1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Matecat\XliffParser\XliffReplacer; |
4
|
|
|
|
5
|
|
|
use Matecat\XliffParser\Constants\TranslationStatus; |
6
|
|
|
use Matecat\XliffParser\Utils\Strings; |
7
|
|
|
use RuntimeException; |
8
|
|
|
|
9
|
|
|
class XliffSAXTranslationReplacer extends AbstractXliffReplacer { |
10
|
|
|
/** |
11
|
|
|
* @var int |
12
|
|
|
*/ |
13
|
|
|
private $mdaGroupCounter = 0; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* @var array |
17
|
|
|
*/ |
18
|
|
|
private $nodesToCopy = [ |
19
|
|
|
'source', |
20
|
|
|
'mda:metadata', |
21
|
|
|
'memsource:additionalTagData', |
22
|
|
|
'originalData', |
23
|
|
|
'seg-source', |
24
|
|
|
'value', |
25
|
|
|
'bpt', |
26
|
|
|
'ept', |
27
|
|
|
'ph', |
28
|
|
|
'st', |
29
|
|
|
'note', |
30
|
|
|
'context', |
31
|
|
|
]; |
32
|
|
|
|
33
|
|
|
public function replaceTranslation() { |
34
|
|
|
fwrite( $this->outputFP, '<?xml version="1.0" encoding="UTF-8"?>' ); |
35
|
|
|
|
36
|
|
|
//create Sax parser |
37
|
|
|
$xmlParser = $this->initSaxParser(); |
38
|
|
|
|
39
|
|
|
while ( $this->currentBuffer = fread( $this->originalFP, 4096 ) ) { |
40
|
|
|
/* |
41
|
|
|
preprocess file |
42
|
|
|
*/ |
43
|
|
|
// obfuscate entities because sax automatically does html_entity_decode |
44
|
|
|
$temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
45
|
|
|
|
46
|
|
|
//avoid cutting entities in half: |
47
|
|
|
//the last fread could have truncated an entity (say, '<' in '&l'), thus invalidating the escaping |
48
|
|
|
//***** and if there is an & that it is not an entity, this is an infinite loop !!!!! |
49
|
|
|
|
50
|
|
|
$escape_AMP = false; |
51
|
|
|
|
52
|
|
|
// 9 is the max length of an entity. So, suppose that the & is at the end of buffer, |
53
|
|
|
// add 9 Bytes and substitute the entities, if the & is present, and it is not at the end |
54
|
|
|
//it can't be an entity, exit the loop |
55
|
|
|
|
56
|
|
|
while ( true ) { |
57
|
|
|
$_ampPos = strpos( $temporary_check_buffer, '&' ); |
58
|
|
|
|
59
|
|
|
//check for real entity or escape it to safely exit from the loop!!! |
60
|
|
|
if ( $_ampPos === false || strlen( substr( $temporary_check_buffer, $_ampPos ) ) > 9 ) { |
61
|
|
|
$escape_AMP = true; |
62
|
|
|
break; |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
//if an entity is still present, fetch some more and repeat the escaping |
66
|
|
|
$this->currentBuffer .= fread( $this->originalFP, 9 ); |
67
|
|
|
$temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
//free stuff outside the loop |
71
|
|
|
unset( $temporary_check_buffer ); |
72
|
|
|
|
73
|
|
|
$this->currentBuffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
74
|
|
|
if ( $escape_AMP ) { |
75
|
|
|
$this->currentBuffer = str_replace( "&", self::$INTERNAL_TAG_PLACEHOLDER . 'amp' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
//get length of chunk |
79
|
|
|
$this->len = strlen( $this->currentBuffer ); |
80
|
|
|
|
81
|
|
|
//parse chunk of text |
82
|
|
|
if ( !xml_parse( $xmlParser, $this->currentBuffer, feof( $this->originalFP ) ) ) { |
83
|
|
|
//if unable, raise an exception |
84
|
|
|
throw new RuntimeException( sprintf( |
85
|
|
|
"XML error: %s at line %d", |
86
|
|
|
xml_error_string( xml_get_error_code( $xmlParser ) ), |
87
|
|
|
xml_get_current_line_number( $xmlParser ) |
88
|
|
|
) ); |
89
|
|
|
} |
90
|
|
|
//get accumulated this->offset in document: as long as SAX pointer advances, we keep track of total bytes it has seen so far; this way, we can translate its global pointer in an address local to the current buffer of text to retrieve last char of tag |
91
|
|
|
$this->offset += $this->len; |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
// close Sax parser |
95
|
|
|
$this->closeSaxParser( $xmlParser ); |
96
|
|
|
|
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* @inheritDoc |
101
|
|
|
*/ |
102
|
|
|
protected function tagOpen( $parser, $name, $attr ) { |
103
|
|
|
// check if we are entering into a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
104
|
|
|
if ( $this->tuTagName === $name ) { |
105
|
|
|
$this->inTU = true; |
106
|
|
|
|
107
|
|
|
// get id |
108
|
|
|
// trim to first 100 characters because this is the limit on Matecat's DB |
109
|
|
|
$this->currentTransUnitId = substr( $attr[ 'id' ], 0, 100 ); |
110
|
|
|
|
111
|
|
|
// `translate` attribute can be only yes or no |
112
|
|
|
if ( isset( $attr[ 'translate' ] ) && $attr[ 'translate' ] === 'no' ) { |
113
|
|
|
$attr[ 'translate' ] = 'no'; |
114
|
|
|
} else { |
115
|
|
|
$attr[ 'translate' ] = 'yes'; |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
// current 'translate' attribute of the current trans-unit |
119
|
|
|
$this->currentTransUnitTranslate = $attr[ 'translate' ]; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
if ( 'source' === $name ) { |
123
|
|
|
$this->sourceAttributes = $attr; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
if ( 'mda:metadata' === $name ) { |
127
|
|
|
$this->unitContainsMda = true; |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
// check if we are entering into a <target> |
131
|
|
|
if ( 'target' === $name ) { |
132
|
|
|
|
133
|
|
|
if ( $this->currentTransUnitTranslate === 'no' ) { |
134
|
|
|
$this->inTarget = false; |
135
|
|
|
} else { |
136
|
|
|
$this->inTarget = true; |
137
|
|
|
} |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
// check if we are inside a <target>, obviously this happen only if there are targets inside the trans-unit |
141
|
|
|
// <target> must be stripped to be replaced, so this check avoids <target> reconstruction |
142
|
|
|
if ( !$this->inTarget ) { |
143
|
|
|
|
144
|
|
|
$tag = ''; |
145
|
|
|
|
146
|
|
|
// |
147
|
|
|
// ============================================ |
148
|
|
|
// only for Xliff 2.* |
149
|
|
|
// ============================================ |
150
|
|
|
// |
151
|
|
|
// In xliff v2 we MUST add <mda:metadata> BEFORE <notes>/<originalData>/<segment>/<ignorable> |
152
|
|
|
// |
153
|
|
|
// As documentation says, <unit> contains: |
154
|
|
|
// |
155
|
|
|
// - elements from other namespaces, OPTIONAL |
156
|
|
|
// - Zero or one <notes> elements followed by |
157
|
|
|
// - Zero or one <originalData> element followed by |
158
|
|
|
// - One or more <segment> or <ignorable> elements in any order. |
159
|
|
|
// |
160
|
|
|
// For more info please refer to: |
161
|
|
|
// |
162
|
|
|
// http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html#unit |
163
|
|
|
// |
164
|
|
|
if ( $this->xliffVersion === 2 && ( $name === 'notes' || $name === 'originalData' || $name === 'segment' || $name === 'ignorable' ) && $this->unitContainsMda === false ) { |
165
|
|
|
if ( isset( $this->transUnits[ $this->currentTransUnitId ] ) && !empty( $this->transUnits[ $this->currentTransUnitId ] ) && !$this->hasWrittenCounts ) { |
166
|
|
|
|
167
|
|
|
// we need to update counts here |
168
|
|
|
$this->updateCounts(); |
169
|
|
|
$this->hasWrittenCounts = true; |
170
|
|
|
|
171
|
|
|
$tag .= $this->getWordCountGroupForXliffV2( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
172
|
|
|
$this->unitContainsMda = true; |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
// construct tag |
177
|
|
|
$tag .= "<$name "; |
178
|
|
|
|
179
|
|
|
$lastMrkState = null; |
180
|
|
|
$stateProp = ''; |
181
|
|
|
|
182
|
|
|
foreach ( $attr as $k => $v ) { |
183
|
|
|
|
184
|
|
|
//if tag name is file, we must replace the target-language attribute |
185
|
|
|
if ( $name === 'file' && $k === 'target-language' && !empty( $this->targetLang ) ) { |
186
|
|
|
//replace Target language with job language provided from constructor |
187
|
|
|
$tag .= "$k=\"$this->targetLang\" "; |
188
|
|
|
} else { |
189
|
|
|
$pos = 0; |
190
|
|
|
if ( $this->currentTransUnitId and isset($this->transUnits[ $this->currentTransUnitId ])) { |
|
|
|
|
191
|
|
|
$pos = current( $this->transUnits[ $this->currentTransUnitId ] ); |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
if ( $name === $this->tuTagName and isset($this->segments[ $pos ]) and isset($this->segments[ $pos ][ 'sid' ]) ) { |
|
|
|
|
195
|
|
|
|
196
|
|
|
$sid = $this->segments[ $pos ][ 'sid' ]; |
197
|
|
|
|
198
|
|
|
// add `help-id` to xliff v.1* |
199
|
|
|
// add `mtc:segment-id` to xliff v.2* |
200
|
|
|
if ( $this->xliffVersion === 1 && strpos( $tag, 'help-id' ) === false ) { |
201
|
|
|
if ( !empty( $sid ) ) { |
202
|
|
|
$tag .= "help-id=\"$sid\" "; |
203
|
|
|
} |
204
|
|
|
} elseif ( $this->xliffVersion === 2 && strpos( $tag, 'mtc:segment-id' ) === false ) { |
205
|
|
|
if ( !empty( $sid ) ) { |
206
|
|
|
$tag .= "mtc:segment-id=\"$sid\" "; |
207
|
|
|
} |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
} elseif ( 'segment' === $name && $this->xliffVersion === 2 ) { // add state to segment in Xliff v2 |
211
|
|
|
list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $this->segments[ $pos ], $stateProp, $lastMrkState ); |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
//normal tag flux, put attributes in it |
215
|
|
|
$tag .= "$k=\"$v\" "; |
216
|
|
|
|
217
|
|
|
// replace state for xliff v2 |
218
|
|
|
if ( $stateProp ) { |
219
|
|
|
$pattern = '/state=\"(.*)\"/i'; |
220
|
|
|
$tag = preg_replace( $pattern, $stateProp, $tag ); |
221
|
|
|
} |
222
|
|
|
} |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
// add oasis xliff 20 namespace |
226
|
|
|
if ( $this->xliffVersion === 2 && $name === 'xliff' && !array_key_exists( 'xmlns:mda', $attr ) ) { |
227
|
|
|
$tag .= 'xmlns:mda="urn:oasis:names:tc:xliff:metadata:2.0"'; |
228
|
|
|
} |
229
|
|
|
|
230
|
|
|
// add MateCat specific namespace, we want maybe add non-XLIFF attributes |
231
|
|
|
if ( $name === 'xliff' && !array_key_exists( 'xmlns:mtc', $attr ) ) { |
232
|
|
|
$tag .= ' xmlns:mtc="https://www.matecat.com" '; |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
// trgLang |
236
|
|
|
if ( $name === 'xliff' ) { |
237
|
|
|
$tag = preg_replace( '/trgLang="(.*?)"/', 'trgLang="' . $this->targetLang . '"', $tag ); |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
//this logic helps detecting empty tags |
241
|
|
|
//get current position of SAX pointer in all the stream of data is has read so far: |
242
|
|
|
//it points at the end of current tag |
243
|
|
|
$idx = xml_get_current_byte_index( $parser ); |
244
|
|
|
|
245
|
|
|
//check whether the bounds of current tag are entirely in current buffer or the end of the current tag |
246
|
|
|
//is outside current buffer (in the latter case, it's in next buffer to be read by the while loop); |
247
|
|
|
//this check is necessary because we may have truncated a tag in half with current read, |
248
|
|
|
//and the other half may be encountered in the next buffer it will be passed |
249
|
|
|
if ( isset( $this->currentBuffer[ $idx - $this->offset ] ) ) { |
250
|
|
|
//if this tag entire lenght fitted in the buffer, the last char must be the last |
251
|
|
|
//symbol before the '>'; if it's an empty tag, it is assumed that it's a '/' |
252
|
|
|
$lastChar = $this->currentBuffer[ $idx - $this->offset ]; |
253
|
|
|
} else { |
254
|
|
|
//if it's out, simple use the last character of the chunk |
255
|
|
|
$lastChar = $this->currentBuffer[ $this->len - 1 ]; |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
//trim last space |
259
|
|
|
$tag = rtrim( $tag ); |
260
|
|
|
|
261
|
|
|
//detect empty tag |
262
|
|
|
$this->isEmpty = ( $lastChar == '/' || $name == 'x' ); |
263
|
|
|
if ( $this->isEmpty ) { |
264
|
|
|
$tag .= '/'; |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
//add tag ending |
268
|
|
|
$tag .= ">"; |
269
|
|
|
|
270
|
|
|
//set a a Buffer for the segSource Source tag |
271
|
|
|
if ( $this->bufferIsActive || in_array( $name, $this->nodesToCopy ) ) { // we are opening a critical CDATA section |
272
|
|
|
|
273
|
|
|
//WARNING BECAUSE SOURCE AND SEG-SOURCE TAGS CAN BE EMPTY IN SOME CASES!!!!! |
274
|
|
|
//so check for isEmpty also in conjunction with name |
275
|
|
|
if ( $this->isEmpty && ( 'source' === $name || 'seg-source' === $name ) ) { |
276
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
277
|
|
|
} else { |
278
|
|
|
//these are NOT source/seg-source/value empty tags, THERE IS A CONTENT, write it in buffer |
279
|
|
|
$this->bufferIsActive = true; |
280
|
|
|
$this->CDATABuffer .= $tag; |
281
|
|
|
} |
282
|
|
|
} else { |
283
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
284
|
|
|
} |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
// update segmentPositionInTu |
288
|
|
|
|
289
|
|
|
if ( $this->xliffVersion === 1 && $this->inTU && $name === 'source' ) { |
290
|
|
|
$this->segmentPositionInTu++; |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
if ( $this->xliffVersion === 2 && $this->inTU && $name === 'segment' ) { |
294
|
|
|
$this->segmentPositionInTu++; |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
/** |
299
|
|
|
* @inheritDoc |
300
|
|
|
*/ |
301
|
|
|
protected function tagClose( $parser, $name ) { |
302
|
|
|
$tag = ''; |
303
|
|
|
|
304
|
|
|
/** |
305
|
|
|
* if is a tag within <target> or |
306
|
|
|
* if it is an empty tag, do not add closing tag because we have already closed it in |
307
|
|
|
* |
308
|
|
|
* self::tagOpen method |
309
|
|
|
*/ |
310
|
|
|
if ( !$this->isEmpty && !( $this->inTarget && $name !== 'target' ) ) { |
311
|
|
|
|
312
|
|
|
if ( !$this->inTarget ) { |
313
|
|
|
$tag = "</$name>"; |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
if ( 'target' == $name ) { |
317
|
|
|
|
318
|
|
|
if ( $this->currentTransUnitTranslate === 'no' ) { |
|
|
|
|
319
|
|
|
// do nothing |
320
|
|
|
} elseif ( isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
321
|
|
|
|
322
|
|
|
// get translation of current segment, by indirect indexing: id -> positional index -> segment |
323
|
|
|
// actually there may be more that one segment to that ID if there are two mrk of the same source segment |
324
|
|
|
|
325
|
|
|
$listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
326
|
|
|
|
327
|
|
|
// $currentSegmentId |
328
|
|
|
if ( !empty( $listOfSegmentsIds ) ) { |
329
|
|
|
$this->setCurrentSegmentArray( $listOfSegmentsIds ); |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
/* |
333
|
|
|
* At the end of every cycle the segment grouping information is lost: unset( 'matecat|' . $this->currentId ) |
334
|
|
|
* |
335
|
|
|
* We need to take the info about the last segment parsed |
336
|
|
|
* ( normally more than 1 db row because of mrk tags ) |
337
|
|
|
* |
338
|
|
|
* So, copy the current segment data group to an another structure to take the last one segment |
339
|
|
|
* for the next tagOpen ( possible sdl:seg-defs ) |
340
|
|
|
* |
341
|
|
|
*/ |
342
|
|
|
|
343
|
|
|
$this->lastTransUnit = []; |
344
|
|
|
|
345
|
|
|
$last_value = null; |
346
|
|
|
$segmentsCount = count( $listOfSegmentsIds ); |
347
|
|
|
for ( $i = 0; $i < $segmentsCount; $i++ ) { |
348
|
|
|
$id = $listOfSegmentsIds[ $i ]; |
349
|
|
|
if ( isset( $this->segments[ $id ] ) && ( $i == 0 || $last_value + 1 == $listOfSegmentsIds[ $i ] ) ) { |
350
|
|
|
$last_value = $listOfSegmentsIds[ $i ]; |
351
|
|
|
$this->lastTransUnit[] = $this->segments[ $id ]; |
352
|
|
|
} |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
// init translation and state |
356
|
|
|
$translation = ''; |
357
|
|
|
$lastMrkState = null; |
358
|
|
|
$stateProp = ''; |
359
|
|
|
|
360
|
|
|
// we must reset the lastMrkId found because this is a new segment. |
361
|
|
|
$lastMrkId = -1; |
362
|
|
|
|
363
|
|
|
if ( $this->xliffVersion === 2 ) { |
364
|
|
|
$seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ]; |
365
|
|
|
|
366
|
|
|
// update counts |
367
|
|
|
if ( !$this->hasWrittenCounts && !empty( $seg ) ) { |
368
|
|
|
$this->updateSegmentCounts( $seg ); |
369
|
|
|
} |
370
|
|
|
|
371
|
|
|
// delete translations so the prepareSegment |
372
|
|
|
// will put source content in target tag |
373
|
|
|
if ( $this->sourceInTarget ) { |
374
|
|
|
$seg[ 'translation' ] = ''; |
375
|
|
|
$this->resetCounts(); |
376
|
|
|
} |
377
|
|
|
|
378
|
|
|
// append $translation |
379
|
|
|
$translation = $this->prepareTranslation( $seg, $translation ); |
380
|
|
|
|
381
|
|
|
list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState ); |
382
|
|
|
} else { |
383
|
|
|
foreach ( $listOfSegmentsIds as $pos => $id ) { |
384
|
|
|
|
385
|
|
|
/* |
386
|
|
|
* This routine works to respect the positional orders of markers. |
387
|
|
|
* In every cycle we check if the mrk of the segment is below or equal the last one. |
388
|
|
|
* When this is true, means that the mrk id belongs to the next segment with the same internal_id |
389
|
|
|
* so we MUST stop to apply markers and translations |
390
|
|
|
* and stop to add eq_word_count |
391
|
|
|
* |
392
|
|
|
* Begin: |
393
|
|
|
* pre-assign zero to the new mrk if this is the first one ( in this segment ) |
394
|
|
|
* If it is null leave it NULL |
395
|
|
|
*/ |
396
|
|
|
if ( (int)$this->segments[ $id ][ "mrk_id" ] < 0 && $this->segments[ $id ][ "mrk_id" ] !== null ) { |
397
|
|
|
$this->segments[ $id ][ "mrk_id" ] = 0; |
398
|
|
|
} |
399
|
|
|
|
400
|
|
|
/* |
401
|
|
|
* WARNING: |
402
|
|
|
* For those seg-source that doesn't have a mrk ( having a mrk id === null ) |
403
|
|
|
* ( null <= -1 ) === true |
404
|
|
|
* so, cast to int |
405
|
|
|
*/ |
406
|
|
|
if ( (int)$this->segments[ $id ][ "mrk_id" ] <= $lastMrkId ) { |
407
|
|
|
break; |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
// set $this->currentSegment |
|
|
|
|
411
|
|
|
$seg = $this->segments[ $id ]; |
412
|
|
|
|
413
|
|
|
// update counts |
414
|
|
|
if ( !empty( $seg ) ) { |
415
|
|
|
$this->updateSegmentCounts( $seg ); |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
// delete translations so the prepareSegment |
419
|
|
|
// will put source content in target tag |
420
|
|
|
if ( $this->sourceInTarget ) { |
421
|
|
|
$seg[ 'translation' ] = ''; |
422
|
|
|
$this->resetCounts(); |
423
|
|
|
} |
424
|
|
|
|
425
|
|
|
// append $translation |
426
|
|
|
$translation = $this->prepareTranslation( $seg, $translation ); |
427
|
|
|
|
428
|
|
|
// for xliff 2 we need $this->transUnits[ $this->currentId ] [ $pos ] for populating metadata |
|
|
|
|
429
|
|
|
|
430
|
|
|
unset( $this->transUnits[ $this->currentTransUnitId ] [ $pos ] ); |
431
|
|
|
|
432
|
|
|
$lastMrkId = $this->segments[ $id ][ "mrk_id" ]; |
433
|
|
|
|
434
|
|
|
list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState ); |
435
|
|
|
} |
436
|
|
|
} |
437
|
|
|
|
438
|
|
|
//append translation |
439
|
|
|
$targetLang = ''; |
440
|
|
|
if ( $this->xliffVersion === 1 ) { |
441
|
|
|
$targetLang = ' xml:lang="' . $this->targetLang . '"'; |
442
|
|
|
} |
443
|
|
|
|
444
|
|
|
$tag = $this->buildTranslateTag( $targetLang, $stateProp, $translation, $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
445
|
|
|
} |
446
|
|
|
|
447
|
|
|
// signal we are leaving a target |
448
|
|
|
$this->targetWasWritten = true; |
449
|
|
|
$this->inTarget = false; |
450
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag, $treatAsCDATA = true ); |
451
|
|
|
} elseif ( in_array( $name, $this->nodesToCopy ) ) { // we are closing a critical CDATA section |
452
|
|
|
|
453
|
|
|
$this->bufferIsActive = false; |
454
|
|
|
|
455
|
|
|
// only for Xliff 2.* |
456
|
|
|
// write here <mda:metaGroup> and <mda:meta> if already present in the <unit> |
457
|
|
|
if ( 'mda:metadata' === $name && $this->unitContainsMda && $this->xliffVersion === 2 && !$this->hasWrittenCounts ) { |
458
|
|
|
|
459
|
|
|
// we need to update counts here |
460
|
|
|
$this->updateCounts(); |
461
|
|
|
$this->hasWrittenCounts = true; |
462
|
|
|
|
463
|
|
|
$tag = $this->CDATABuffer; |
464
|
|
|
$tag .= $this->getWordCountGroupForXliffV2( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ], false ); |
465
|
|
|
$tag .= " </mda:metadata>"; |
466
|
|
|
|
467
|
|
|
} else { |
468
|
|
|
$tag = $this->CDATABuffer . "</$name>"; |
469
|
|
|
} |
470
|
|
|
|
471
|
|
|
$this->CDATABuffer = ""; |
472
|
|
|
|
473
|
|
|
//flush to pointer |
474
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
475
|
|
|
} elseif ( 'segment' === $name ) { |
476
|
|
|
|
477
|
|
|
// only for Xliff 2.* |
478
|
|
|
// if segment has no <target> add it BEFORE </segment> |
479
|
|
|
if ( $this->xliffVersion === 2 && !$this->targetWasWritten ) { |
480
|
|
|
|
481
|
|
|
$seg = $this->getCurrentSegment(); |
482
|
|
|
|
483
|
|
|
// copy attr from <source> |
484
|
|
|
$tag = '<target'; |
485
|
|
|
foreach ( $this->sourceAttributes as $k => $v ) { |
486
|
|
|
$tag .= " $k=\"$v\""; |
487
|
|
|
} |
488
|
|
|
|
489
|
|
|
$tag .= '>' . $seg[ 'translation' ] . '</target></segment>'; |
490
|
|
|
} |
491
|
|
|
|
492
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
493
|
|
|
|
494
|
|
|
// we are leaving <segment>, reset $segmentHasTarget |
495
|
|
|
$this->targetWasWritten = false; |
496
|
|
|
|
497
|
|
|
} elseif ( $name === 'trans-unit' ) { |
498
|
|
|
|
499
|
|
|
// only for Xliff 1.* |
500
|
|
|
// handling </trans-unit> closure |
501
|
|
|
if ( !$this->targetWasWritten ) { |
502
|
|
|
$seg = $this->getCurrentSegment(); |
503
|
|
|
$lastMrkState = null; |
504
|
|
|
$stateProp = ''; |
505
|
|
|
$tag = ''; |
506
|
|
|
|
507
|
|
|
// if there is translation available insert <target> BEFORE </trans-unit> |
508
|
|
|
if ( isset( $seg[ 'translation' ] ) ) { |
509
|
|
|
list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState ); |
510
|
|
|
$tag .= $this->createTargetTag( $seg[ 'translation' ], $stateProp ); |
511
|
|
|
} |
512
|
|
|
|
513
|
|
|
$tag .= '</trans-unit>'; |
514
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
515
|
|
|
} else { |
516
|
|
|
$this->postProcAndFlush( $this->outputFP, '</trans-unit>' ); |
517
|
|
|
} |
518
|
|
|
} elseif ( $this->bufferIsActive ) { // this is a tag ( <g | <mrk ) inside a seg or seg-source tag |
519
|
|
|
$this->CDATABuffer .= "</$name>"; |
520
|
|
|
// Do NOT Flush |
521
|
|
|
} else { //generic tag closure do Nothing |
522
|
|
|
// flush to pointer |
523
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
524
|
|
|
} |
525
|
|
|
} elseif ( $this->CDATABuffer === '<note/>' && $this->bufferIsActive === true ) { |
526
|
|
|
$this->postProcAndFlush( $this->outputFP, '<note/>' ); |
527
|
|
|
$this->bufferIsActive = false; |
528
|
|
|
$this->CDATABuffer = ''; |
529
|
|
|
$this->isEmpty = false; |
530
|
|
|
} else { |
531
|
|
|
//ok, nothing to be done; reset flag for next coming tag |
532
|
|
|
$this->isEmpty = false; |
533
|
|
|
} |
534
|
|
|
|
535
|
|
|
// check if we are leaving a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
536
|
|
|
if ( $this->tuTagName === $name ) { |
537
|
|
|
$this->currentTransUnitTranslate = null; |
538
|
|
|
$this->inTU = false; |
539
|
|
|
$this->segmentPositionInTu = -1; |
540
|
|
|
$this->unitContainsMda = false; |
541
|
|
|
$this->hasWrittenCounts = false; |
542
|
|
|
$this->sourceAttributes = []; |
543
|
|
|
} |
544
|
|
|
} |
545
|
|
|
|
546
|
|
|
/** |
547
|
|
|
* Set the current segment array (with segment id and trans-unit id) |
548
|
|
|
* |
549
|
|
|
* @param array $listOfSegmentsIds |
550
|
|
|
*/ |
551
|
|
|
private function setCurrentSegmentArray( array $listOfSegmentsIds = [] ) { |
552
|
|
|
// $currentSegmentId |
553
|
|
|
if ( empty( $this->currentSegmentArray ) ) { |
554
|
|
|
$this->currentSegmentArray = [ |
555
|
|
|
'sid' => $listOfSegmentsIds[ 0 ], |
556
|
|
|
'tid' => $this->currentTransUnitId, |
557
|
|
|
]; |
558
|
|
|
} else { |
559
|
|
|
if ( $this->currentSegmentArray[ 'tid' ] === $this->currentTransUnitId ) { |
560
|
|
|
$key = array_search( $this->currentSegmentArray[ 'sid' ], $listOfSegmentsIds ); |
561
|
|
|
$this->currentSegmentArray[ 'sid' ] = $listOfSegmentsIds[ $key + 1 ]; |
562
|
|
|
$this->currentSegmentArray[ 'tid' ] = $this->currentTransUnitId; |
563
|
|
|
} else { |
564
|
|
|
$this->currentSegmentArray = [ |
565
|
|
|
'sid' => $listOfSegmentsIds[ 0 ], |
566
|
|
|
'tid' => $this->currentTransUnitId, |
567
|
|
|
]; |
568
|
|
|
} |
569
|
|
|
} |
570
|
|
|
} |
571
|
|
|
|
572
|
|
|
/** |
573
|
|
|
* Update counts |
574
|
|
|
*/ |
575
|
|
|
private function updateCounts() { |
576
|
|
|
// populate counts |
577
|
|
|
$listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
578
|
|
|
|
579
|
|
|
// $currentSegmentId |
580
|
|
|
if ( !empty( $listOfSegmentsIds ) ) { |
581
|
|
|
$this->setCurrentSegmentArray( $listOfSegmentsIds ); |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
if ( $this->xliffVersion === 2 ) { |
585
|
|
|
$seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ]; |
586
|
|
|
if ( !empty( $seg ) ) { |
587
|
|
|
$this->updateSegmentCounts( $seg ); |
588
|
|
|
} |
589
|
|
|
} else { |
590
|
|
|
foreach ( $listOfSegmentsIds as $pos => $id ) { |
591
|
|
|
$seg = $this->segments[ $id ]; |
592
|
|
|
if ( !empty( $seg ) ) { |
593
|
|
|
$this->updateSegmentCounts( $seg ); |
594
|
|
|
} |
595
|
|
|
} |
596
|
|
|
} |
597
|
|
|
|
598
|
|
|
$this->currentSegmentArray = []; |
599
|
|
|
} |
600
|
|
|
|
601
|
|
|
/** |
602
|
|
|
* @param array $seg |
603
|
|
|
*/ |
604
|
|
|
private function updateSegmentCounts( array $seg = [] ) { |
605
|
|
|
$this->counts[ 'raw_word_count' ] += $seg[ 'raw_word_count' ]; |
606
|
|
|
$this->counts[ 'eq_word_count' ] += ( floor( $seg[ 'eq_word_count' ] * 100 ) / 100 ); |
607
|
|
|
} |
608
|
|
|
|
609
|
|
|
private function resetCounts() { |
610
|
|
|
$this->counts[ 'raw_word_count' ] = 0; |
611
|
|
|
$this->counts[ 'eq_word_count' ] = 0; |
612
|
|
|
} |
613
|
|
|
|
614
|
|
|
/** |
615
|
|
|
* prepare segment tagging for xliff insertion |
616
|
|
|
* |
617
|
|
|
* @param array $seg |
618
|
|
|
* @param string $transUnitTranslation |
619
|
|
|
* |
620
|
|
|
* @return string |
621
|
|
|
*/ |
622
|
|
|
protected function prepareTranslation( $seg, $transUnitTranslation = "" ) { |
623
|
|
|
$endTags = ""; |
624
|
|
|
|
625
|
|
|
$segment = Strings::removeDangerousChars( $seg [ 'segment' ] ); |
626
|
|
|
$translation = Strings::removeDangerousChars( $seg [ 'translation' ] ); |
627
|
|
|
$dataRefMap = ( isset( $seg[ 'data_ref_map' ] ) && $seg[ 'data_ref_map' ] !== null ) ? Strings::jsonToArray( $seg[ 'data_ref_map' ] ) : []; |
628
|
|
|
|
629
|
|
|
if ( is_null( $seg [ 'translation' ] ) || $seg [ 'translation' ] == '' ) { |
630
|
|
|
$translation = $segment; |
631
|
|
|
} else { |
632
|
|
|
if ( $this->callback ) { |
633
|
|
|
$error = (isset($seg['error'])) ? $seg['error'] : null; |
634
|
|
|
if ( $this->callback->thereAreErrors( $seg[ 'sid' ], $segment, $translation, $dataRefMap, $error ) ) { |
635
|
|
|
$translation = '|||UNTRANSLATED_CONTENT_START|||' . $segment . '|||UNTRANSLATED_CONTENT_END|||'; |
636
|
|
|
} |
637
|
|
|
} |
638
|
|
|
} |
639
|
|
|
|
640
|
|
|
// for xliff v2 we ignore the marks on purpose |
641
|
|
|
if ( $this->xliffVersion === 2 ) { |
642
|
|
|
return $translation; |
643
|
|
|
} |
644
|
|
|
|
645
|
|
|
if ( $seg[ 'mrk_id' ] !== null && $seg[ 'mrk_id' ] != '' ) { |
646
|
|
|
if ( $this->targetLang === 'ja-JP' ) { |
647
|
|
|
$seg[ 'mrk_succ_tags' ] = ltrim( $seg[ 'mrk_succ_tags' ] ); |
648
|
|
|
} |
649
|
|
|
|
650
|
|
|
$translation = "<mrk mid=\"" . $seg[ 'mrk_id' ] . "\" mtype=\"seg\">" . $seg[ 'mrk_prev_tags' ] . $translation . $seg[ 'mrk_succ_tags' ] . "</mrk>"; |
651
|
|
|
} |
652
|
|
|
|
653
|
|
|
$transUnitTranslation .= $seg[ 'prev_tags' ] . $translation . $endTags . $seg[ 'succ_tags' ]; |
654
|
|
|
|
655
|
|
|
return $transUnitTranslation; |
656
|
|
|
} |
657
|
|
|
|
658
|
|
|
/** |
659
|
|
|
* @param $targetLang |
660
|
|
|
* @param $stateProp |
661
|
|
|
* @param $translation |
662
|
|
|
* @param $rawWordCount |
663
|
|
|
* @param $eqWordCount |
664
|
|
|
* |
665
|
|
|
* @return string |
666
|
|
|
*/ |
667
|
|
|
private function buildTranslateTag( $targetLang, $stateProp, $translation, $rawWordCount, $eqWordCount ) { |
668
|
|
|
switch ( $this->xliffVersion ) { |
669
|
|
|
case 1: |
670
|
|
|
default: |
671
|
|
|
$tag = "<target $targetLang $stateProp>$translation</target>"; |
672
|
|
|
|
673
|
|
|
// if it's a Trados file don't append count group |
674
|
|
|
if ( get_class( $this ) !== SdlXliffSAXTranslationReplacer::class ) { |
675
|
|
|
$tag .= $this->getWordCountGroup( $rawWordCount, $eqWordCount ); |
676
|
|
|
} |
677
|
|
|
|
678
|
|
|
return $tag; |
679
|
|
|
|
680
|
|
|
case 2: |
681
|
|
|
return "<target>$translation</target>"; |
682
|
|
|
} |
683
|
|
|
} |
684
|
|
|
|
685
|
|
|
/** |
686
|
|
|
* @param $raw_word_count |
687
|
|
|
* @param $eq_word_count |
688
|
|
|
* |
689
|
|
|
* @return string |
690
|
|
|
*/ |
691
|
|
|
private function getWordCountGroup( $raw_word_count, $eq_word_count ) { |
692
|
|
|
return "\n<count-group name=\"$this->currentTransUnitId\"><count count-type=\"x-matecat-raw\">$raw_word_count</count><count count-type=\"x-matecat-weighted\">$eq_word_count</count></count-group>"; |
693
|
|
|
} |
694
|
|
|
|
695
|
|
|
/** |
696
|
|
|
* @return array |
697
|
|
|
*/ |
698
|
|
|
private function getCurrentSegment() { |
699
|
|
|
if ( $this->currentTransUnitTranslate === 'yes' && isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
700
|
|
|
$index = $this->transUnits[ $this->currentTransUnitId ][ $this->segmentPositionInTu ]; |
701
|
|
|
|
702
|
|
|
if ( isset( $this->segments[ $index ] ) ) { |
703
|
|
|
return $this->segments[ $index ]; |
704
|
|
|
} |
705
|
|
|
} |
706
|
|
|
|
707
|
|
|
return []; |
708
|
|
|
} |
709
|
|
|
|
710
|
|
|
/** |
711
|
|
|
* This function create a <target> |
712
|
|
|
* |
713
|
|
|
* @param $translation |
714
|
|
|
* @param $stateProp |
715
|
|
|
* |
716
|
|
|
* @return string |
717
|
|
|
*/ |
718
|
|
|
private function createTargetTag( $translation, $stateProp ) { |
719
|
|
|
$targetLang = 'xml:lang="' . $this->targetLang . '"'; |
720
|
|
|
|
721
|
|
|
return $this->buildTranslateTag( $targetLang, $stateProp, $translation, $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
722
|
|
|
} |
723
|
|
|
|
724
|
|
|
/** |
725
|
|
|
* @param $raw_word_count |
726
|
|
|
* @param $eq_word_count |
727
|
|
|
* @param bool $withMetadataTag |
728
|
|
|
* |
729
|
|
|
* @return string |
730
|
|
|
*/ |
731
|
|
|
private function getWordCountGroupForXliffV2( $raw_word_count, $eq_word_count, $withMetadataTag = true ) { |
732
|
|
|
$this->mdaGroupCounter++; |
733
|
|
|
$id = 'word_count_tu_' . $this->mdaGroupCounter; |
734
|
|
|
|
735
|
|
|
if ( $withMetadataTag === false ) { |
736
|
|
|
return " <mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\"> |
737
|
|
|
<mda:meta type=\"x-matecat-raw\">$raw_word_count</mda:meta> |
738
|
|
|
<mda:meta type=\"x-matecat-weighted\">$eq_word_count</mda:meta> |
739
|
|
|
</mda:metaGroup> |
740
|
|
|
"; |
741
|
|
|
} |
742
|
|
|
|
743
|
|
|
return "<mda:metadata> |
744
|
|
|
<mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\"> |
745
|
|
|
<mda:meta type=\"x-matecat-raw\">$raw_word_count</mda:meta> |
746
|
|
|
<mda:meta type=\"x-matecat-weighted\">$eq_word_count</mda:meta> |
747
|
|
|
</mda:metaGroup> |
748
|
|
|
</mda:metadata>"; |
749
|
|
|
} |
750
|
|
|
|
751
|
|
|
/** |
752
|
|
|
* @param $seg |
753
|
|
|
* @param $state_prop |
754
|
|
|
* @param $lastMrkState |
755
|
|
|
* |
756
|
|
|
* @return array |
757
|
|
|
*/ |
758
|
|
|
private function setTransUnitState( $seg, $state_prop, $lastMrkState ) { |
759
|
|
|
switch ( $seg[ 'status' ] ) { |
760
|
|
|
|
761
|
|
|
case TranslationStatus::STATUS_FIXED: |
762
|
|
|
case TranslationStatus::STATUS_APPROVED: |
763
|
|
|
if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_APPROVED ) { |
764
|
|
|
$state_prop = ( $this->xliffVersion === 2 ) ? "state=\"reviewed\"" : "state=\"signed-off\""; |
765
|
|
|
$lastMrkState = TranslationStatus::STATUS_APPROVED; |
766
|
|
|
} |
767
|
|
|
break; |
768
|
|
|
|
769
|
|
|
case TranslationStatus::STATUS_TRANSLATED: |
770
|
|
|
if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_TRANSLATED || $lastMrkState == TranslationStatus::STATUS_APPROVED ) { |
771
|
|
|
$state_prop = "state=\"translated\""; |
772
|
|
|
$lastMrkState = TranslationStatus::STATUS_TRANSLATED; |
773
|
|
|
} |
774
|
|
|
break; |
775
|
|
|
|
776
|
|
|
case TranslationStatus::STATUS_REJECTED: // if there is a mark REJECTED and there is not a DRAFT, all the trans-unit is REJECTED. In V2 there is no way to mark |
777
|
|
|
case TranslationStatus::STATUS_REBUTTED: |
778
|
|
|
if ( ( $lastMrkState == null ) || ( $lastMrkState != TranslationStatus::STATUS_NEW || $lastMrkState != TranslationStatus::STATUS_DRAFT ) ) { |
779
|
|
|
$state_prop = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"needs-review-translation\""; |
780
|
|
|
$lastMrkState = TranslationStatus::STATUS_REJECTED; |
781
|
|
|
} |
782
|
|
|
break; |
783
|
|
|
|
784
|
|
|
case TranslationStatus::STATUS_NEW: |
785
|
|
|
if ( ( $lastMrkState == null ) || $lastMrkState != TranslationStatus::STATUS_DRAFT ) { |
786
|
|
|
$state_prop = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"new\""; |
787
|
|
|
$lastMrkState = TranslationStatus::STATUS_NEW; |
788
|
|
|
} |
789
|
|
|
break; |
790
|
|
|
|
791
|
|
|
case TranslationStatus::STATUS_DRAFT: |
792
|
|
|
$state_prop = "state=\"needs-translation\""; |
793
|
|
|
$lastMrkState = TranslationStatus::STATUS_DRAFT; |
794
|
|
|
break; |
795
|
|
|
default: |
796
|
|
|
// this is the case when a segment is not showed in cattool, so the row in |
797
|
|
|
// segment_translations does not exists and |
798
|
|
|
// ---> $seg[ 'status' ] is NULL |
799
|
|
|
if ( $lastMrkState == null ) { //this is the first MRK ID |
800
|
|
|
$state_prop = "state=\"translated\""; |
801
|
|
|
$lastMrkState = TranslationStatus::STATUS_TRANSLATED; |
802
|
|
|
} else { |
|
|
|
|
803
|
|
|
/* Do nothing and preserve the last state */ |
804
|
|
|
} |
805
|
|
|
break; |
806
|
|
|
} |
807
|
|
|
|
808
|
|
|
return [ $state_prop, $lastMrkState ]; |
809
|
|
|
} |
810
|
|
|
|
811
|
|
|
/** |
812
|
|
|
* @inheritDoc |
813
|
|
|
*/ |
814
|
|
|
protected function characterData( $parser, $data ) { |
815
|
|
|
// don't write <target> data |
816
|
|
|
if ( !$this->inTarget && !$this->bufferIsActive ) { |
817
|
|
|
$this->postProcAndFlush( $this->outputFP, $data ); |
818
|
|
|
} elseif ( $this->bufferIsActive ) { |
819
|
|
|
$this->CDATABuffer .= $data; |
820
|
|
|
} |
821
|
|
|
} |
822
|
|
|
} |
823
|
|
|
|
PHP has two types of connecting operators (logical operators, and boolean operators):
and
&&
or
||
The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like
&&
, or||
.Let’s take a look at a few examples:
Logical Operators are used for Control-Flow
One case where you explicitly want to use logical operators is for control-flow such as this:
Since
die
introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined withthrow
at this point:These limitations lead to logical operators rarely being of use in current PHP code.