1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Matecat\XliffParser\XliffReplacer; |
4
|
|
|
|
5
|
|
|
use Matecat\XliffParser\Constants\TranslationStatus; |
6
|
|
|
use Matecat\XliffParser\Utils\Strings; |
7
|
|
|
|
8
|
|
|
class oldXliffSAXTranslationReplacer extends AbstractXliffReplacer { |
9
|
|
|
/** |
10
|
|
|
* @var int |
11
|
|
|
*/ |
12
|
|
|
private $mdaGroupCounter = 0; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* @var array |
16
|
|
|
*/ |
17
|
|
|
private $nodesToCopy = [ |
18
|
|
|
'source', |
19
|
|
|
'mda:metadata', |
20
|
|
|
'memsource:additionalTagData', |
21
|
|
|
'originalData', |
22
|
|
|
'seg-source', |
23
|
|
|
'value', |
24
|
|
|
'bpt', |
25
|
|
|
'ept', |
26
|
|
|
'ph', |
27
|
|
|
'st', |
28
|
|
|
'note', |
29
|
|
|
'context', |
30
|
|
|
'context-group' |
31
|
|
|
]; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @inheritDoc |
35
|
|
|
*/ |
36
|
|
|
protected function tagOpen( $parser, $name, $attr ) { |
37
|
|
|
// check if we are entering into a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
38
|
|
|
if ( $this->tuTagName === $name ) { |
39
|
|
|
$this->inTU = true; |
40
|
|
|
|
41
|
|
|
// get id |
42
|
|
|
// trim to first 100 characters because this is the limit on Matecat's DB |
43
|
|
|
$this->currentTransUnitId = substr( $attr[ 'id' ], 0, 100 ); |
44
|
|
|
|
45
|
|
|
// `translate` attribute can be only yes or no |
46
|
|
|
if ( isset( $attr[ 'translate' ] ) && $attr[ 'translate' ] === 'no' ) { |
47
|
|
|
$attr[ 'translate' ] = 'no'; |
48
|
|
|
} else { |
49
|
|
|
$attr[ 'translate' ] = 'yes'; |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
// current 'translate' attribute of the current trans-unit |
53
|
|
|
$this->currentTransUnitIsTranslatable = $attr[ 'translate' ]; |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
if ( 'source' === $name ) { |
57
|
|
|
$this->sourceAttributes = $attr; |
|
|
|
|
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
if ( 'mda:metadata' === $name ) { |
61
|
|
|
$this->unitContainsMda = true; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
// check if we are entering into a <target> |
65
|
|
|
if ( 'target' === $name ) { |
66
|
|
|
|
67
|
|
|
if ( $this->currentTransUnitIsTranslatable === 'no' ) { |
68
|
|
|
$this->inTarget = false; |
69
|
|
|
} else { |
70
|
|
|
$this->inTarget = true; |
71
|
|
|
} |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
// check if we are inside a <target>, obviously this happen only if there are targets inside the trans-unit |
75
|
|
|
// <target> must be stripped to be replaced, so this check avoids <target> reconstruction |
76
|
|
|
if ( !$this->inTarget ) { |
77
|
|
|
|
78
|
|
|
$tag = ''; |
79
|
|
|
|
80
|
|
|
// |
81
|
|
|
// ============================================ |
82
|
|
|
// only for Xliff 2.* |
83
|
|
|
// ============================================ |
84
|
|
|
// |
85
|
|
|
// In xliff v2 we MUST add <mda:metadata> BEFORE <notes>/<originalData>/<segment>/<ignorable> |
86
|
|
|
// |
87
|
|
|
// As documentation says, <unit> contains: |
88
|
|
|
// |
89
|
|
|
// - elements from other namespaces, OPTIONAL |
90
|
|
|
// - Zero or one <notes> elements followed by |
91
|
|
|
// - Zero or one <originalData> element followed by |
92
|
|
|
// - One or more <segment> or <ignorable> elements in any order. |
93
|
|
|
// |
94
|
|
|
// For more info please refer to: |
95
|
|
|
// |
96
|
|
|
// http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html#unit |
97
|
|
|
// |
98
|
|
|
if ( $this->xliffVersion === 2 && ( $name === 'notes' || $name === 'originalData' || $name === 'segment' || $name === 'ignorable' ) && $this->unitContainsMda === false ) { |
99
|
|
|
if ( isset( $this->transUnits[ $this->currentTransUnitId ] ) && !empty( $this->transUnits[ $this->currentTransUnitId ] ) && !$this->hasWrittenCounts ) { |
100
|
|
|
|
101
|
|
|
// we need to update counts here |
102
|
|
|
$this->updateCounts(); |
103
|
|
|
$this->hasWrittenCounts = true; |
104
|
|
|
|
105
|
|
|
$tag .= $this->getWordCountGroupForXliffV2(); |
106
|
|
|
$this->unitContainsMda = true; |
107
|
|
|
} |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
// construct tag |
111
|
|
|
$tag .= "<$name "; |
112
|
|
|
|
113
|
|
|
$stateProp = null; |
114
|
|
|
|
115
|
|
|
foreach ( $attr as $k => $v ) { |
116
|
|
|
|
117
|
|
|
//if tag name is file, we must replace the target-language attribute |
118
|
|
|
if ( $name === 'file' && $k === 'target-language' && !empty( $this->targetLang ) ) { |
119
|
|
|
//replace Target language with job language provided from constructor |
120
|
|
|
$tag .= "$k=\"$this->targetLang\" "; |
121
|
|
|
} else { |
122
|
|
|
$pos = 0; |
123
|
|
|
if ( $this->currentTransUnitId and isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
|
|
|
|
124
|
|
|
$pos = current( $this->transUnits[ $this->currentTransUnitId ] ); |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
if ( $name === $this->tuTagName and isset( $this->segments[ $pos ] ) and isset( $this->segments[ $pos ][ 'sid' ] ) ) { |
|
|
|
|
128
|
|
|
|
129
|
|
|
$sid = $this->segments[ $pos ][ 'sid' ]; |
130
|
|
|
|
131
|
|
|
// add `help-id` to xliff v.1* |
132
|
|
|
// add `mtc:segment-id` to xliff v.2* |
133
|
|
|
if ( $this->xliffVersion === 1 && strpos( $tag, 'help-id' ) === false ) { |
134
|
|
|
if ( !empty( $sid ) ) { |
135
|
|
|
$tag .= "help-id=\"$sid\" "; |
136
|
|
|
} |
137
|
|
|
} elseif ( $this->xliffVersion === 2 && strpos( $tag, 'mtc:segment-id' ) === false ) { |
138
|
|
|
if ( !empty( $sid ) ) { |
139
|
|
|
$tag .= "mtc:segment-id=\"$sid\" "; |
140
|
|
|
} |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
} elseif ( 'segment' === $name && $this->xliffVersion === 2 ) { // add state to segment in Xliff v2 |
144
|
|
|
[ $stateProp, ] = StatusToStateAttribute::getState( $this->segments[ $pos ][ 'status' ], $this->xliffVersion ); |
145
|
|
|
} |
146
|
|
|
|
147
|
|
|
//normal tag flux, put attributes in it but skip for translation state and set the right value for the attribute |
148
|
|
|
if ( $k != 'state' ) { |
149
|
|
|
$tag .= "$k=\"$v\" "; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
// replace state for xliff v2 |
157
|
|
|
if ( $stateProp ) { |
158
|
|
|
$tag .= $stateProp; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
// add oasis xliff 20 namespace |
162
|
|
|
if ( $this->xliffVersion === 2 && $name === 'xliff' && !array_key_exists( 'xmlns:mda', $attr ) ) { |
163
|
|
|
$tag .= 'xmlns:mda="urn:oasis:names:tc:xliff:metadata:2.0"'; |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
// add MateCat specific namespace, we want maybe add non-XLIFF attributes |
167
|
|
|
if ( $name === 'xliff' && !array_key_exists( 'xmlns:mtc', $attr ) ) { |
168
|
|
|
$tag .= ' xmlns:mtc="https://www.matecat.com" '; |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
// trgLang |
172
|
|
|
if ( $name === 'xliff' ) { |
173
|
|
|
$tag = preg_replace( '/trgLang="(.*?)"/', 'trgLang="' . $this->targetLang . '"', $tag ); |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
$lastChar = $this->getLastCharacter( $parser ); |
177
|
|
|
|
178
|
|
|
//trim last space |
179
|
|
|
$tag = rtrim( $tag ); |
180
|
|
|
|
181
|
|
|
//detect empty tag |
182
|
|
|
$this->isEmpty = ( $lastChar == '/' || $name == 'x' ); |
183
|
|
|
if ( $this->isEmpty ) { |
184
|
|
|
$tag .= '/'; |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
//add tag ending |
188
|
|
|
$tag .= ">"; |
189
|
|
|
|
190
|
|
|
//set a a Buffer for the segSource Source tag |
191
|
|
|
if ( $this->bufferIsActive || in_array( $name, $this->nodesToCopy ) ) { // we are opening a critical CDATA section |
192
|
|
|
|
193
|
|
|
//WARNING BECAUSE SOURCE AND SEG-SOURCE TAGS CAN BE EMPTY IN SOME CASES!!!!! |
194
|
|
|
//so check for isEmpty also in conjunction with name |
195
|
|
|
if ( $this->isEmpty && ( 'source' === $name || 'seg-source' === $name ) ) { |
196
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
197
|
|
|
} else { |
198
|
|
|
//these are NOT source/seg-source/value empty tags, THERE IS A CONTENT, write it in buffer |
199
|
|
|
$this->bufferIsActive = true; |
200
|
|
|
$this->CDATABuffer .= $tag; |
201
|
|
|
} |
202
|
|
|
} else { |
203
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
204
|
|
|
} |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
// update segmentPositionInTu |
208
|
|
|
|
209
|
|
|
if ( $this->xliffVersion === 1 && $this->inTU && $name === 'source' ) { |
210
|
|
|
$this->segmentPositionInTu++; |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
if ( $this->xliffVersion === 2 && $this->inTU && $name === 'segment' ) { |
214
|
|
|
$this->segmentPositionInTu++; |
215
|
|
|
} |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
/** |
219
|
|
|
* @inheritDoc |
220
|
|
|
*/ |
221
|
|
|
protected function tagClose( $parser, $name ) { |
222
|
|
|
$tag = ''; |
223
|
|
|
|
224
|
|
|
/** |
225
|
|
|
* if is a tag within <target> or |
226
|
|
|
* if it is an empty tag, do not add closing tag because we have already closed it in |
227
|
|
|
* |
228
|
|
|
* self::tagOpen method |
229
|
|
|
*/ |
230
|
|
|
if ( !$this->isEmpty ) { |
231
|
|
|
|
232
|
|
|
if ( !$this->inTarget ) { |
233
|
|
|
$tag = "</$name>"; |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
if ( 'target' == $name ) { |
237
|
|
|
|
238
|
|
|
if ( $this->currentTransUnitIsTranslatable === 'no' ) { |
|
|
|
|
239
|
|
|
// do nothing |
240
|
|
|
} elseif ( isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
241
|
|
|
|
242
|
|
|
// get translation of current segment, by indirect indexing: id -> positional index -> segment |
243
|
|
|
// actually there may be more that one segment to that ID if there are two mrk of the same source segment |
244
|
|
|
|
245
|
|
|
$listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
246
|
|
|
|
247
|
|
|
// $currentSegmentId |
248
|
|
|
if ( !empty( $listOfSegmentsIds ) ) { |
249
|
|
|
$this->setCurrentSegmentArray( $listOfSegmentsIds ); |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
/* |
253
|
|
|
* At the end of every cycle the segment grouping information is lost: unset( 'matecat|' . $this->currentId ) |
254
|
|
|
* |
255
|
|
|
* We need to take the info about the last segment parsed |
256
|
|
|
* ( normally more than 1 db row because of mrk tags ) |
257
|
|
|
* |
258
|
|
|
* So, copy the current segment data group to an another structure to take the last one segment |
259
|
|
|
* for the next tagOpen ( possible sdl:seg-defs ) |
260
|
|
|
* |
261
|
|
|
*/ |
262
|
|
|
|
263
|
|
|
$this->lastTransUnit = []; |
264
|
|
|
|
265
|
|
|
$last_value = null; |
266
|
|
|
$segmentsCount = count( $listOfSegmentsIds ); |
267
|
|
|
for ( $i = 0; $i < $segmentsCount; $i++ ) { |
268
|
|
|
$id = $listOfSegmentsIds[ $i ]; |
269
|
|
|
if ( isset( $this->segments[ $id ] ) && ( $i == 0 || $last_value + 1 == $listOfSegmentsIds[ $i ] ) ) { |
270
|
|
|
$last_value = $listOfSegmentsIds[ $i ]; |
271
|
|
|
$this->lastTransUnit[] = $this->segments[ $id ]; |
272
|
|
|
} |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
// init translation and state |
276
|
|
|
$translation = ''; |
277
|
|
|
$lastMrkState = null; |
278
|
|
|
$stateProp = ''; |
279
|
|
|
|
280
|
|
|
// we must reset the lastMrkId found because this is a new segment. |
281
|
|
|
$lastMrkId = -1; |
282
|
|
|
|
283
|
|
|
if ( $this->xliffVersion === 2 ) { |
284
|
|
|
$seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ]; |
285
|
|
|
|
286
|
|
|
// update counts |
287
|
|
|
if ( !$this->hasWrittenCounts && !empty( $seg ) ) { |
288
|
|
|
$this->updateSegmentCounts( $seg ); |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
// delete translations so the prepareSegment |
292
|
|
|
// will put source content in target tag |
293
|
|
|
if ( $this->sourceInTarget ) { |
294
|
|
|
$seg[ 'translation' ] = ''; |
295
|
|
|
$this->resetCounts(); |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
// append $translation |
299
|
|
|
$translation = $this->prepareTranslation( $seg, $translation ); |
300
|
|
|
|
301
|
|
|
[ $stateProp, ] = StatusToStateAttribute::getState( $seg[ 'status' ], $this->xliffVersion ); |
302
|
|
|
|
303
|
|
|
} else { |
304
|
|
|
foreach ( $listOfSegmentsIds as $pos => $id ) { |
305
|
|
|
|
306
|
|
|
/* |
307
|
|
|
* This routine works to respect the positional orders of markers. |
308
|
|
|
* In every cycle we check if the mrk of the segment is below or equal the last one. |
309
|
|
|
* When this is true, means that the mrk id belongs to the next segment with the same internal_id |
310
|
|
|
* so we MUST stop to apply markers and translations |
311
|
|
|
* and stop to add eq_word_count |
312
|
|
|
* |
313
|
|
|
* Begin: |
314
|
|
|
* pre-assign zero to the new mrk if this is the first one ( in this segment ) |
315
|
|
|
* If it is null leave it NULL |
316
|
|
|
*/ |
317
|
|
|
if ( (int)$this->segments[ $id ][ "mrk_id" ] < 0 && $this->segments[ $id ][ "mrk_id" ] !== null ) { |
318
|
|
|
$this->segments[ $id ][ "mrk_id" ] = 0; |
319
|
|
|
} |
320
|
|
|
|
321
|
|
|
/* |
322
|
|
|
* WARNING: |
323
|
|
|
* For those seg-source that doesn't have a mrk ( having a mrk id === null ) |
324
|
|
|
* ( null <= -1 ) === true |
325
|
|
|
* so, cast to int |
326
|
|
|
*/ |
327
|
|
|
if ( (int)$this->segments[ $id ][ "mrk_id" ] <= $lastMrkId ) { |
328
|
|
|
break; |
329
|
|
|
} |
330
|
|
|
|
331
|
|
|
// set $this->currentSegment |
|
|
|
|
332
|
|
|
$seg = $this->segments[ $id ]; |
333
|
|
|
|
334
|
|
|
// update counts |
335
|
|
|
if ( !empty( $seg ) ) { |
336
|
|
|
$this->updateSegmentCounts( $seg ); |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
// delete translations so the prepareSegment |
340
|
|
|
// will put source content in target tag |
341
|
|
|
if ( $this->sourceInTarget ) { |
342
|
|
|
$seg[ 'translation' ] = ''; |
343
|
|
|
$this->resetCounts(); |
344
|
|
|
} |
345
|
|
|
|
346
|
|
|
// append $translation |
347
|
|
|
$translation = $this->prepareTranslation( $seg, $translation ); |
348
|
|
|
|
349
|
|
|
// for xliff 2 we need $this->transUnits[ $this->currentId ] [ $pos ] for populating metadata |
|
|
|
|
350
|
|
|
|
351
|
|
|
unset( $this->transUnits[ $this->currentTransUnitId ] [ $pos ] ); |
352
|
|
|
|
353
|
|
|
$lastMrkId = $this->segments[ $id ][ "mrk_id" ]; |
354
|
|
|
|
355
|
|
|
[ $stateProp, $lastMrkState ] = StatusToStateAttribute::getState( $seg[ 'status' ], $this->xliffVersion, $stateProp, $lastMrkState ); |
356
|
|
|
|
357
|
|
|
} |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
//append translation |
361
|
|
|
$tag = $this->createTargetTag( $translation, $stateProp ); |
362
|
|
|
|
363
|
|
|
} |
364
|
|
|
|
365
|
|
|
// signal we are leaving a target |
366
|
|
|
$this->targetWasWritten = true; |
367
|
|
|
$this->inTarget = false; |
368
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag, true ); |
369
|
|
|
} elseif ( in_array( $name, $this->nodesToCopy ) && !$this->inTarget ) { // we are closing a critical CDATA section |
370
|
|
|
|
371
|
|
|
$this->bufferIsActive = false; |
372
|
|
|
|
373
|
|
|
// only for Xliff 2.* |
374
|
|
|
// write here <mda:metaGroup> and <mda:meta> if already present in the <unit> |
375
|
|
|
if ( 'mda:metadata' === $name && $this->unitContainsMda && $this->xliffVersion === 2 && !$this->hasWrittenCounts ) { |
376
|
|
|
|
377
|
|
|
// we need to update counts here |
378
|
|
|
$this->updateCounts(); |
379
|
|
|
$this->hasWrittenCounts = true; |
380
|
|
|
|
381
|
|
|
$tag = $this->CDATABuffer; |
382
|
|
|
$tag .= $this->getWordCountGroupForXliffV2( false ); |
383
|
|
|
$tag .= " </mda:metadata>"; |
384
|
|
|
|
385
|
|
|
} else { |
386
|
|
|
$tag = $this->CDATABuffer . "</$name>"; |
387
|
|
|
} |
388
|
|
|
|
389
|
|
|
$this->CDATABuffer = ""; |
390
|
|
|
|
391
|
|
|
//flush to pointer |
392
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
393
|
|
|
} elseif ( 'segment' === $name ) { |
394
|
|
|
|
395
|
|
|
// only for Xliff 2.* |
396
|
|
|
// if segment has no <target> add it BEFORE </segment> |
397
|
|
|
if ( !$this->targetWasWritten ) { |
398
|
|
|
|
399
|
|
|
$seg = $this->getCurrentSegment(); |
400
|
|
|
|
401
|
|
|
if ( isset( $seg[ 'translation' ] ) ) { |
402
|
|
|
|
403
|
|
|
$translation = $this->prepareTranslation( $seg ); |
404
|
|
|
[ $stateProp, ] = StatusToStateAttribute::getState( $seg[ 'status' ], $this->xliffVersion ); |
405
|
|
|
|
406
|
|
|
// replace the tag |
407
|
|
|
$tag = $this->createTargetTag( $translation, $stateProp ); |
408
|
|
|
|
409
|
|
|
$tag .= '</segment>'; |
410
|
|
|
|
411
|
|
|
} |
412
|
|
|
|
413
|
|
|
} |
414
|
|
|
|
415
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
416
|
|
|
|
417
|
|
|
// we are leaving <segment>, reset $segmentHasTarget |
418
|
|
|
$this->targetWasWritten = false; |
419
|
|
|
|
420
|
|
|
} elseif ( $name === 'trans-unit' ) { |
421
|
|
|
|
422
|
|
|
// only for Xliff 1.* |
423
|
|
|
// handling </trans-unit> closure |
424
|
|
|
if ( !$this->targetWasWritten ) { |
425
|
|
|
|
426
|
|
|
$seg = $this->getCurrentSegment(); |
427
|
|
|
|
428
|
|
|
if ( isset( $seg[ 'translation' ] ) ) { |
429
|
|
|
$translation = $this->prepareTranslation( $seg ); |
430
|
|
|
[ $stateProp, ] = StatusToStateAttribute::getState( $seg[ 'status' ], $this->xliffVersion ); |
431
|
|
|
|
432
|
|
|
// replace the tag |
433
|
|
|
$tag = $this->createTargetTag( $translation, $stateProp ); |
434
|
|
|
$tag .= '</trans-unit>'; |
435
|
|
|
|
436
|
|
|
} |
437
|
|
|
|
438
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
439
|
|
|
|
440
|
|
|
} else { |
441
|
|
|
$this->postProcAndFlush( $this->outputFP, '</trans-unit>' ); |
442
|
|
|
$this->targetWasWritten = false; |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
|
446
|
|
|
} elseif ( $this->bufferIsActive ) { // this is a tag ( <g | <mrk ) inside a seg or seg-source tag |
447
|
|
|
$this->CDATABuffer .= "</$name>"; |
448
|
|
|
// Do NOT Flush |
449
|
|
|
} else { //generic tag closure do Nothing |
450
|
|
|
// flush to pointer |
451
|
|
|
$this->postProcAndFlush( $this->outputFP, $tag ); |
452
|
|
|
} |
453
|
|
|
} elseif ( $this->CDATABuffer === '<note/>' && $this->bufferIsActive === true ) { |
454
|
|
|
$this->postProcAndFlush( $this->outputFP, '<note/>' ); |
455
|
|
|
$this->bufferIsActive = false; |
456
|
|
|
$this->CDATABuffer = ''; |
457
|
|
|
$this->isEmpty = false; |
458
|
|
|
} else { |
459
|
|
|
//ok, nothing to be done; reset flag for next coming tag |
460
|
|
|
$this->isEmpty = false; |
461
|
|
|
} |
462
|
|
|
|
463
|
|
|
// check if we are leaving a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
464
|
|
|
if ( $this->tuTagName === $name ) { |
465
|
|
|
$this->currentTransUnitIsTranslatable = null; |
466
|
|
|
$this->inTU = false; |
467
|
|
|
$this->segmentPositionInTu = -1; |
468
|
|
|
$this->unitContainsMda = false; |
469
|
|
|
$this->hasWrittenCounts = false; |
470
|
|
|
$this->sourceAttributes = []; |
|
|
|
|
471
|
|
|
|
472
|
|
|
$this->resetCounts(); |
473
|
|
|
} |
474
|
|
|
} |
475
|
|
|
|
476
|
|
|
/** |
477
|
|
|
* Set the current segment array (with segment id and trans-unit id) |
478
|
|
|
* |
479
|
|
|
* @param array $listOfSegmentsIds |
480
|
|
|
*/ |
481
|
|
|
private function setCurrentSegmentArray( array $listOfSegmentsIds = [] ) { |
482
|
|
|
// $currentSegmentId |
483
|
|
|
if ( empty( $this->currentSegmentArray ) ) { |
484
|
|
|
$this->currentSegmentArray = [ |
|
|
|
|
485
|
|
|
'sid' => $listOfSegmentsIds[ 0 ], |
486
|
|
|
'tid' => $this->currentTransUnitId, |
487
|
|
|
]; |
488
|
|
|
} else { |
489
|
|
|
if ( $this->currentSegmentArray[ 'tid' ] === $this->currentTransUnitId ) { |
490
|
|
|
$key = array_search( $this->currentSegmentArray[ 'sid' ], $listOfSegmentsIds ); |
491
|
|
|
$this->currentSegmentArray[ 'sid' ] = $listOfSegmentsIds[ $key + 1 ]; |
492
|
|
|
} else { |
493
|
|
|
$this->currentSegmentArray = [ |
494
|
|
|
'sid' => $listOfSegmentsIds[ 0 ], |
495
|
|
|
'tid' => $this->currentTransUnitId, |
496
|
|
|
]; |
497
|
|
|
} |
498
|
|
|
} |
499
|
|
|
} |
500
|
|
|
|
501
|
|
|
/** |
502
|
|
|
* Update counts |
503
|
|
|
*/ |
504
|
|
|
private function updateCounts() { |
505
|
|
|
// populate counts |
506
|
|
|
$listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
507
|
|
|
|
508
|
|
|
// $currentSegmentId |
509
|
|
|
if ( !empty( $listOfSegmentsIds ) ) { |
510
|
|
|
$this->setCurrentSegmentArray( $listOfSegmentsIds ); |
511
|
|
|
} |
512
|
|
|
|
513
|
|
|
if ( $this->xliffVersion === 2 ) { |
514
|
|
|
$seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ]; |
515
|
|
|
if ( !empty( $seg ) ) { |
516
|
|
|
$this->updateSegmentCounts( $seg ); |
517
|
|
|
} |
518
|
|
|
} else { |
519
|
|
|
foreach ( $listOfSegmentsIds as $pos => $id ) { |
520
|
|
|
$seg = $this->segments[ $id ]; |
521
|
|
|
if ( !empty( $seg ) ) { |
522
|
|
|
$this->updateSegmentCounts( $seg ); |
523
|
|
|
} |
524
|
|
|
} |
525
|
|
|
} |
526
|
|
|
|
527
|
|
|
$this->currentSegmentArray = []; |
|
|
|
|
528
|
|
|
} |
529
|
|
|
|
530
|
|
|
/** |
531
|
|
|
* @param array $seg |
532
|
|
|
*/ |
533
|
|
|
private function updateSegmentCounts( array $seg = [] ) { |
534
|
|
|
|
535
|
|
|
$raw_word_count = $seg[ 'raw_word_count' ]; |
536
|
|
|
$eq_word_count = ( floor( $seg[ 'eq_word_count' ] * 100 ) / 100 ); |
537
|
|
|
|
538
|
|
|
$this->counts[ 'segments_count_array' ][ $seg[ 'sid' ] ] = [ |
539
|
|
|
'raw_word_count' => $raw_word_count, |
540
|
|
|
'eq_word_count' => $eq_word_count, |
541
|
|
|
]; |
542
|
|
|
|
543
|
|
|
$this->counts[ 'raw_word_count' ] += $raw_word_count; |
544
|
|
|
$this->counts[ 'eq_word_count' ] += $eq_word_count; |
545
|
|
|
} |
546
|
|
|
|
547
|
|
|
private function resetCounts() { |
548
|
|
|
$this->counts[ 'segments_count_array' ] = []; |
549
|
|
|
$this->counts[ 'raw_word_count' ] = 0; |
550
|
|
|
$this->counts[ 'eq_word_count' ] = 0; |
551
|
|
|
} |
552
|
|
|
|
553
|
|
|
/** |
554
|
|
|
* prepare segment tagging for xliff insertion |
555
|
|
|
* |
556
|
|
|
* @param array $seg |
557
|
|
|
* @param string $transUnitTranslation |
558
|
|
|
* |
559
|
|
|
* @return string |
560
|
|
|
*/ |
561
|
|
|
protected function prepareTranslation( $seg, $transUnitTranslation = "" ) { |
562
|
|
|
$endTags = ""; |
563
|
|
|
|
564
|
|
|
$segment = Strings::removeDangerousChars( $seg [ 'segment' ] ); |
565
|
|
|
$translation = Strings::removeDangerousChars( $seg [ 'translation' ] ); |
566
|
|
|
$dataRefMap = ( isset( $seg[ 'data_ref_map' ] ) ) ? Strings::jsonToArray( $seg[ 'data_ref_map' ] ) : []; |
567
|
|
|
|
568
|
|
|
if ( $seg [ 'translation' ] == '' ) { |
569
|
|
|
$translation = $segment; |
570
|
|
|
} else { |
571
|
|
|
if ( $this->callback instanceof XliffReplacerCallbackInterface ) { |
572
|
|
|
$error = ( !empty( $seg[ 'error' ] ) ) ? $seg[ 'error' ] : null; |
573
|
|
|
if ( $this->callback->thereAreErrors( $seg[ 'sid' ], $segment, $translation, $dataRefMap, $error ) ) { |
574
|
|
|
$translation = '|||UNTRANSLATED_CONTENT_START|||' . $segment . '|||UNTRANSLATED_CONTENT_END|||'; |
575
|
|
|
} |
576
|
|
|
} |
577
|
|
|
} |
578
|
|
|
|
579
|
|
|
// for xliff v2 we ignore the marks on purpose |
580
|
|
|
if ( $this->xliffVersion === 2 ) { |
581
|
|
|
return $translation; |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
if ( $seg[ 'mrk_id' ] !== null && $seg[ 'mrk_id' ] != '' ) { |
585
|
|
|
if ( $this->targetLang === 'ja-JP' ) { |
586
|
|
|
$seg[ 'mrk_succ_tags' ] = ltrim( $seg[ 'mrk_succ_tags' ] ); |
587
|
|
|
} |
588
|
|
|
|
589
|
|
|
$translation = "<mrk mid=\"" . $seg[ 'mrk_id' ] . "\" mtype=\"seg\">" . $seg[ 'mrk_prev_tags' ] . $translation . $seg[ 'mrk_succ_tags' ] . "</mrk>"; |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
$transUnitTranslation .= $seg[ 'prev_tags' ] . $translation . $endTags . $seg[ 'succ_tags' ]; |
593
|
|
|
|
594
|
|
|
return $transUnitTranslation; |
595
|
|
|
} |
596
|
|
|
|
597
|
|
|
|
598
|
|
|
/** |
599
|
|
|
* @param $raw_word_count |
600
|
|
|
* @param $eq_word_count |
601
|
|
|
* |
602
|
|
|
* @return string |
603
|
|
|
*/ |
604
|
|
|
private function getWordCountGroup( $raw_word_count, $eq_word_count ) { |
605
|
|
|
return "\n<count-group name=\"$this->currentTransUnitId\"><count count-type=\"x-matecat-raw\">$raw_word_count</count><count count-type=\"x-matecat-weighted\">$eq_word_count</count></count-group>"; |
606
|
|
|
} |
607
|
|
|
|
608
|
|
|
/** |
609
|
|
|
* @return array |
610
|
|
|
*/ |
611
|
|
|
private function getCurrentSegment() { |
612
|
|
|
if ( $this->currentTransUnitIsTranslatable === 'yes' && isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
613
|
|
|
$index = $this->transUnits[ $this->currentTransUnitId ][ $this->segmentPositionInTu ]; |
614
|
|
|
|
615
|
|
|
if ( isset( $this->segments[ $index ] ) ) { |
616
|
|
|
return $this->segments[ $index ]; |
617
|
|
|
} |
618
|
|
|
} |
619
|
|
|
|
620
|
|
|
return []; |
621
|
|
|
} |
622
|
|
|
|
623
|
|
|
/** |
624
|
|
|
* This function creates a <target> |
625
|
|
|
* |
626
|
|
|
* @param $translation |
627
|
|
|
* @param $stateProp |
628
|
|
|
* |
629
|
|
|
* @return string |
630
|
|
|
*/ |
631
|
|
|
private function createTargetTag( $translation, $stateProp ) { |
632
|
|
|
|
633
|
|
|
$targetLang = ''; |
634
|
|
|
if ( $this->xliffVersion === 1 ) { |
635
|
|
|
$targetLang = ' xml:lang="' . $this->targetLang . '"'; |
636
|
|
|
} |
637
|
|
|
|
638
|
|
|
switch ( $this->xliffVersion ) { |
639
|
|
|
case 1: |
640
|
|
|
default: |
641
|
|
|
$tag = "<target $targetLang $stateProp>$translation</target>"; |
642
|
|
|
|
643
|
|
|
// if it's a Trados file don't append count group |
644
|
|
|
if ( get_class( $this ) !== oldSdlOldXliffSAXTranslationReplacer::class ) { |
645
|
|
|
$tag .= $this->getWordCountGroup( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
646
|
|
|
} |
647
|
|
|
|
648
|
|
|
return $tag; |
649
|
|
|
|
650
|
|
|
case 2: |
651
|
|
|
return "<target>$translation</target>"; |
652
|
|
|
} |
653
|
|
|
|
654
|
|
|
} |
655
|
|
|
|
656
|
|
|
/** |
657
|
|
|
* @param bool $withMetadataTag |
658
|
|
|
* |
659
|
|
|
* @return string |
660
|
|
|
*/ |
661
|
|
|
private function getWordCountGroupForXliffV2( $withMetadataTag = true ) { |
662
|
|
|
|
663
|
|
|
$this->mdaGroupCounter++; |
664
|
|
|
$segments_count_array = $this->counts[ 'segments_count_array' ]; |
665
|
|
|
|
666
|
|
|
$id = $this->currentSegmentArray; |
|
|
|
|
667
|
|
|
|
668
|
|
|
|
669
|
|
|
$return = ''; |
670
|
|
|
|
671
|
|
|
if ( $withMetadataTag === true ) { |
672
|
|
|
$return .= '<mda:metadata>'; |
673
|
|
|
} |
674
|
|
|
|
675
|
|
|
$index = 0; |
676
|
|
|
foreach ( $segments_count_array as $segments_count_item ) { |
677
|
|
|
|
678
|
|
|
$id = 'word_count_tu[' . $this->currentTransUnitId . '][' . $index . ']'; |
679
|
|
|
$index++; |
680
|
|
|
|
681
|
|
|
$return .= " <mda:metaGroup id=\"" . $id . "\" category=\"row_xml_attribute\"> |
682
|
|
|
<mda:meta type=\"x-matecat-raw\">" . $segments_count_item[ 'raw_word_count' ] . "</mda:meta> |
683
|
|
|
<mda:meta type=\"x-matecat-weighted\">" . $segments_count_item[ 'eq_word_count' ] . "</mda:meta> |
684
|
|
|
</mda:metaGroup>"; |
685
|
|
|
} |
686
|
|
|
|
687
|
|
|
if ( $withMetadataTag === true ) { |
688
|
|
|
$return .= '</mda:metadata>'; |
689
|
|
|
} |
690
|
|
|
|
691
|
|
|
return $return; |
692
|
|
|
|
693
|
|
|
} |
694
|
|
|
|
695
|
|
|
} |
696
|
|
|
|