Total Complexity | 164 |
Total Lines | 811 |
Duplicated Lines | 0 % |
Changes | 6 | ||
Bugs | 0 | Features | 0 |
Complex classes like XliffSAXTranslationReplacer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use XliffSAXTranslationReplacer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
9 | class XliffSAXTranslationReplacer extends AbstractXliffReplacer { |
||
10 | /** |
||
11 | * @var int |
||
12 | */ |
||
13 | private $mdaGroupCounter = 0; |
||
14 | |||
15 | /** |
||
16 | * @var array |
||
17 | */ |
||
18 | private $nodesToCopy = [ |
||
19 | 'source', |
||
20 | 'mda:metadata', |
||
21 | 'memsource:additionalTagData', |
||
22 | 'originalData', |
||
23 | 'seg-source', |
||
24 | 'value', |
||
25 | 'bpt', |
||
26 | 'ept', |
||
27 | 'ph', |
||
28 | 'st', |
||
29 | 'note', |
||
30 | 'context', |
||
31 | ]; |
||
32 | |||
33 | public function replaceTranslation() { |
||
34 | fwrite( $this->outputFP, '<?xml version="1.0" encoding="UTF-8"?>' ); |
||
35 | |||
36 | //create Sax parser |
||
37 | $xmlParser = $this->initSaxParser(); |
||
38 | |||
39 | while ( $this->currentBuffer = fread( $this->originalFP, 4096 ) ) { |
||
40 | /* |
||
41 | preprocess file |
||
42 | */ |
||
43 | // obfuscate entities because sax automatically does html_entity_decode |
||
44 | $temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
||
45 | |||
46 | //avoid cutting entities in half: |
||
47 | //the last fread could have truncated an entity (say, '<' in '&l'), thus invalidating the escaping |
||
48 | //***** and if there is an & that it is not an entity, this is an infinite loop !!!!! |
||
49 | |||
50 | $escape_AMP = false; |
||
51 | |||
52 | // 9 is the max length of an entity. So, suppose that the & is at the end of buffer, |
||
53 | // add 9 Bytes and substitute the entities, if the & is present, and it is not at the end |
||
54 | //it can't be an entity, exit the loop |
||
55 | |||
56 | while ( true ) { |
||
57 | $_ampPos = strpos( $temporary_check_buffer, '&' ); |
||
58 | |||
59 | //check for real entity or escape it to safely exit from the loop!!! |
||
60 | if ( $_ampPos === false || strlen( substr( $temporary_check_buffer, $_ampPos ) ) > 9 ) { |
||
61 | $escape_AMP = true; |
||
62 | break; |
||
63 | } |
||
64 | |||
65 | //if an entity is still present, fetch some more and repeat the escaping |
||
66 | $this->currentBuffer .= fread( $this->originalFP, 9 ); |
||
67 | $temporary_check_buffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
||
68 | } |
||
69 | |||
70 | //free stuff outside the loop |
||
71 | unset( $temporary_check_buffer ); |
||
72 | |||
73 | $this->currentBuffer = preg_replace( "/&(.*?);/", self::$INTERNAL_TAG_PLACEHOLDER . '$1' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
||
74 | if ( $escape_AMP ) { |
||
75 | $this->currentBuffer = str_replace( "&", self::$INTERNAL_TAG_PLACEHOLDER . 'amp' . self::$INTERNAL_TAG_PLACEHOLDER, $this->currentBuffer ); |
||
76 | } |
||
77 | |||
78 | //get length of chunk |
||
79 | $this->len = strlen( $this->currentBuffer ); |
||
80 | |||
81 | //parse chunk of text |
||
82 | if ( !xml_parse( $xmlParser, $this->currentBuffer, feof( $this->originalFP ) ) ) { |
||
83 | //if unable, raise an exception |
||
84 | throw new RuntimeException( sprintf( |
||
85 | "XML error: %s at line %d", |
||
86 | xml_error_string( xml_get_error_code( $xmlParser ) ), |
||
87 | xml_get_current_line_number( $xmlParser ) |
||
88 | ) ); |
||
89 | } |
||
90 | //get accumulated this->offset in document: as long as SAX pointer advances, we keep track of total bytes it has seen so far; this way, we can translate its global pointer in an address local to the current buffer of text to retrieve last char of tag |
||
91 | $this->offset += $this->len; |
||
92 | } |
||
93 | |||
94 | // close Sax parser |
||
95 | $this->closeSaxParser( $xmlParser ); |
||
96 | |||
97 | } |
||
98 | |||
99 | /** |
||
100 | * @inheritDoc |
||
101 | */ |
||
102 | protected function tagOpen( $parser, $name, $attr ) { |
||
103 | // check if we are entering into a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
||
104 | if ( $this->tuTagName === $name ) { |
||
105 | $this->inTU = true; |
||
106 | |||
107 | // get id |
||
108 | // trim to first 100 characters because this is the limit on Matecat's DB |
||
109 | $this->currentTransUnitId = substr( $attr[ 'id' ], 0, 100 ); |
||
110 | |||
111 | // `translate` attribute can be only yes or no |
||
112 | if ( isset( $attr[ 'translate' ] ) && $attr[ 'translate' ] === 'no' ) { |
||
113 | $attr[ 'translate' ] = 'no'; |
||
114 | } else { |
||
115 | $attr[ 'translate' ] = 'yes'; |
||
116 | } |
||
117 | |||
118 | // current 'translate' attribute of the current trans-unit |
||
119 | $this->currentTransUnitTranslate = $attr[ 'translate' ]; |
||
120 | } |
||
121 | |||
122 | if ( 'source' === $name ) { |
||
123 | $this->sourceAttributes = $attr; |
||
124 | } |
||
125 | |||
126 | if ( 'mda:metadata' === $name ) { |
||
127 | $this->unitContainsMda = true; |
||
128 | } |
||
129 | |||
130 | // check if we are entering into a <target> |
||
131 | if ( 'target' === $name ) { |
||
132 | |||
133 | if ( $this->currentTransUnitTranslate === 'no' ) { |
||
134 | $this->inTarget = false; |
||
135 | } else { |
||
136 | $this->inTarget = true; |
||
137 | } |
||
138 | } |
||
139 | |||
140 | // check if we are inside a <target>, obviously this happen only if there are targets inside the trans-unit |
||
141 | // <target> must be stripped to be replaced, so this check avoids <target> reconstruction |
||
142 | if ( !$this->inTarget ) { |
||
143 | |||
144 | $tag = ''; |
||
145 | |||
146 | // |
||
147 | // ============================================ |
||
148 | // only for Xliff 2.* |
||
149 | // ============================================ |
||
150 | // |
||
151 | // In xliff v2 we MUST add <mda:metadata> BEFORE <notes>/<originalData>/<segment>/<ignorable> |
||
152 | // |
||
153 | // As documentation says, <unit> contains: |
||
154 | // |
||
155 | // - elements from other namespaces, OPTIONAL |
||
156 | // - Zero or one <notes> elements followed by |
||
157 | // - Zero or one <originalData> element followed by |
||
158 | // - One or more <segment> or <ignorable> elements in any order. |
||
159 | // |
||
160 | // For more info please refer to: |
||
161 | // |
||
162 | // http://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html#unit |
||
163 | // |
||
164 | if ( $this->xliffVersion === 2 && ( $name === 'notes' || $name === 'originalData' || $name === 'segment' || $name === 'ignorable' ) && $this->unitContainsMda === false ) { |
||
165 | if ( isset( $this->transUnits[ $this->currentTransUnitId ] ) && !empty( $this->transUnits[ $this->currentTransUnitId ] ) && !$this->hasWrittenCounts ) { |
||
166 | |||
167 | // we need to update counts here |
||
168 | $this->updateCounts(); |
||
169 | $this->hasWrittenCounts = true; |
||
170 | |||
171 | $tag .= $this->getWordCountGroupForXliffV2( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
||
172 | $this->unitContainsMda = true; |
||
173 | } |
||
174 | } |
||
175 | |||
176 | // construct tag |
||
177 | $tag .= "<$name "; |
||
178 | |||
179 | $lastMrkState = null; |
||
180 | $stateProp = ''; |
||
181 | |||
182 | foreach ( $attr as $k => $v ) { |
||
183 | |||
184 | //if tag name is file, we must replace the target-language attribute |
||
185 | if ( $name === 'file' && $k === 'target-language' && !empty( $this->targetLang ) ) { |
||
186 | //replace Target language with job language provided from constructor |
||
187 | $tag .= "$k=\"$this->targetLang\" "; |
||
188 | } else { |
||
189 | $pos = 0; |
||
190 | if ( $this->currentTransUnitId and isset($this->transUnits[ $this->currentTransUnitId ])) { |
||
|
|||
191 | $pos = current( $this->transUnits[ $this->currentTransUnitId ] ); |
||
192 | } |
||
193 | |||
194 | if ( $name === $this->tuTagName and isset($this->segments[ $pos ]) and isset($this->segments[ $pos ][ 'sid' ]) ) { |
||
195 | |||
196 | $sid = $this->segments[ $pos ][ 'sid' ]; |
||
197 | |||
198 | // add `help-id` to xliff v.1* |
||
199 | // add `mtc:segment-id` to xliff v.2* |
||
200 | if ( $this->xliffVersion === 1 && strpos( $tag, 'help-id' ) === false ) { |
||
201 | if ( !empty( $sid ) ) { |
||
202 | $tag .= "help-id=\"$sid\" "; |
||
203 | } |
||
204 | } elseif ( $this->xliffVersion === 2 && strpos( $tag, 'mtc:segment-id' ) === false ) { |
||
205 | if ( !empty( $sid ) ) { |
||
206 | $tag .= "mtc:segment-id=\"$sid\" "; |
||
207 | } |
||
208 | } |
||
209 | |||
210 | } elseif ( 'segment' === $name && $this->xliffVersion === 2 ) { // add state to segment in Xliff v2 |
||
211 | list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $this->segments[ $pos ], $stateProp, $lastMrkState ); |
||
212 | } |
||
213 | |||
214 | //normal tag flux, put attributes in it |
||
215 | $tag .= "$k=\"$v\" "; |
||
216 | |||
217 | // replace state for xliff v2 |
||
218 | if ( $stateProp ) { |
||
219 | $pattern = '/state=\"(.*)\"/i'; |
||
220 | $tag = preg_replace( $pattern, $stateProp, $tag ); |
||
221 | } |
||
222 | } |
||
223 | } |
||
224 | |||
225 | // add oasis xliff 20 namespace |
||
226 | if ( $this->xliffVersion === 2 && $name === 'xliff' && !array_key_exists( 'xmlns:mda', $attr ) ) { |
||
227 | $tag .= 'xmlns:mda="urn:oasis:names:tc:xliff:metadata:2.0"'; |
||
228 | } |
||
229 | |||
230 | // add MateCat specific namespace, we want maybe add non-XLIFF attributes |
||
231 | if ( $name === 'xliff' && !array_key_exists( 'xmlns:mtc', $attr ) ) { |
||
232 | $tag .= ' xmlns:mtc="https://www.matecat.com" '; |
||
233 | } |
||
234 | |||
235 | // trgLang |
||
236 | if ( $name === 'xliff' ) { |
||
237 | $tag = preg_replace( '/trgLang="(.*?)"/', 'trgLang="' . $this->targetLang . '"', $tag ); |
||
238 | } |
||
239 | |||
240 | //this logic helps detecting empty tags |
||
241 | //get current position of SAX pointer in all the stream of data is has read so far: |
||
242 | //it points at the end of current tag |
||
243 | $idx = xml_get_current_byte_index( $parser ); |
||
244 | |||
245 | //check whether the bounds of current tag are entirely in current buffer or the end of the current tag |
||
246 | //is outside current buffer (in the latter case, it's in next buffer to be read by the while loop); |
||
247 | //this check is necessary because we may have truncated a tag in half with current read, |
||
248 | //and the other half may be encountered in the next buffer it will be passed |
||
249 | if ( isset( $this->currentBuffer[ $idx - $this->offset ] ) ) { |
||
250 | //if this tag entire lenght fitted in the buffer, the last char must be the last |
||
251 | //symbol before the '>'; if it's an empty tag, it is assumed that it's a '/' |
||
252 | $lastChar = $this->currentBuffer[ $idx - $this->offset ]; |
||
253 | } else { |
||
254 | //if it's out, simple use the last character of the chunk |
||
255 | $lastChar = $this->currentBuffer[ $this->len - 1 ]; |
||
256 | } |
||
257 | |||
258 | //trim last space |
||
259 | $tag = rtrim( $tag ); |
||
260 | |||
261 | //detect empty tag |
||
262 | $this->isEmpty = ( $lastChar == '/' || $name == 'x' ); |
||
263 | if ( $this->isEmpty ) { |
||
264 | $tag .= '/'; |
||
265 | } |
||
266 | |||
267 | //add tag ending |
||
268 | $tag .= ">"; |
||
269 | |||
270 | //set a a Buffer for the segSource Source tag |
||
271 | if ( $this->bufferIsActive || in_array( $name, $this->nodesToCopy ) ) { // we are opening a critical CDATA section |
||
272 | |||
273 | //WARNING BECAUSE SOURCE AND SEG-SOURCE TAGS CAN BE EMPTY IN SOME CASES!!!!! |
||
274 | //so check for isEmpty also in conjunction with name |
||
275 | if ( $this->isEmpty && ( 'source' === $name || 'seg-source' === $name ) ) { |
||
276 | $this->postProcAndFlush( $this->outputFP, $tag ); |
||
277 | } else { |
||
278 | //these are NOT source/seg-source/value empty tags, THERE IS A CONTENT, write it in buffer |
||
279 | $this->bufferIsActive = true; |
||
280 | $this->CDATABuffer .= $tag; |
||
281 | } |
||
282 | } else { |
||
283 | $this->postProcAndFlush( $this->outputFP, $tag ); |
||
284 | } |
||
285 | } |
||
286 | |||
287 | // update segmentPositionInTu |
||
288 | |||
289 | if ( $this->xliffVersion === 1 && $this->inTU && $name === 'source' ) { |
||
290 | $this->segmentPositionInTu++; |
||
291 | } |
||
292 | |||
293 | if ( $this->xliffVersion === 2 && $this->inTU && $name === 'segment' ) { |
||
294 | $this->segmentPositionInTu++; |
||
295 | } |
||
296 | } |
||
297 | |||
298 | /** |
||
299 | * @inheritDoc |
||
300 | */ |
||
301 | protected function tagClose( $parser, $name ) { |
||
302 | $tag = ''; |
||
303 | |||
304 | /** |
||
305 | * if is a tag within <target> or |
||
306 | * if it is an empty tag, do not add closing tag because we have already closed it in |
||
307 | * |
||
308 | * self::tagOpen method |
||
309 | */ |
||
310 | if ( !$this->isEmpty && !( $this->inTarget && $name !== 'target' ) ) { |
||
311 | |||
312 | if ( !$this->inTarget ) { |
||
313 | $tag = "</$name>"; |
||
314 | } |
||
315 | |||
316 | if ( 'target' == $name ) { |
||
317 | |||
318 | if ( $this->currentTransUnitTranslate === 'no' ) { |
||
319 | // do nothing |
||
320 | } elseif ( isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
||
321 | |||
322 | // get translation of current segment, by indirect indexing: id -> positional index -> segment |
||
323 | // actually there may be more that one segment to that ID if there are two mrk of the same source segment |
||
324 | |||
325 | $listOfSegmentsIds = $this->transUnits[ $this->currentTransUnitId ]; |
||
326 | |||
327 | // $currentSegmentId |
||
328 | if ( !empty( $listOfSegmentsIds ) ) { |
||
329 | $this->setCurrentSegmentArray( $listOfSegmentsIds ); |
||
330 | } |
||
331 | |||
332 | /* |
||
333 | * At the end of every cycle the segment grouping information is lost: unset( 'matecat|' . $this->currentId ) |
||
334 | * |
||
335 | * We need to take the info about the last segment parsed |
||
336 | * ( normally more than 1 db row because of mrk tags ) |
||
337 | * |
||
338 | * So, copy the current segment data group to an another structure to take the last one segment |
||
339 | * for the next tagOpen ( possible sdl:seg-defs ) |
||
340 | * |
||
341 | */ |
||
342 | |||
343 | $this->lastTransUnit = []; |
||
344 | |||
345 | $last_value = null; |
||
346 | $segmentsCount = count( $listOfSegmentsIds ); |
||
347 | for ( $i = 0; $i < $segmentsCount; $i++ ) { |
||
348 | $id = $listOfSegmentsIds[ $i ]; |
||
349 | if ( isset( $this->segments[ $id ] ) && ( $i == 0 || $last_value + 1 == $listOfSegmentsIds[ $i ] ) ) { |
||
350 | $last_value = $listOfSegmentsIds[ $i ]; |
||
351 | $this->lastTransUnit[] = $this->segments[ $id ]; |
||
352 | } |
||
353 | } |
||
354 | |||
355 | // init translation and state |
||
356 | $translation = ''; |
||
357 | $lastMrkState = null; |
||
358 | $stateProp = ''; |
||
359 | |||
360 | // we must reset the lastMrkId found because this is a new segment. |
||
361 | $lastMrkId = -1; |
||
362 | |||
363 | if ( $this->xliffVersion === 2 ) { |
||
364 | $seg = $this->segments[ $this->currentSegmentArray[ 'sid' ] ]; |
||
365 | |||
366 | // update counts |
||
367 | if ( !$this->hasWrittenCounts && !empty( $seg ) ) { |
||
368 | $this->updateSegmentCounts( $seg ); |
||
369 | } |
||
370 | |||
371 | // delete translations so the prepareSegment |
||
372 | // will put source content in target tag |
||
373 | if ( $this->sourceInTarget ) { |
||
374 | $seg[ 'translation' ] = ''; |
||
375 | $this->resetCounts(); |
||
376 | } |
||
377 | |||
378 | // append $translation |
||
379 | $translation = $this->prepareTranslation( $seg, $translation ); |
||
380 | |||
381 | list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState ); |
||
382 | } else { |
||
383 | foreach ( $listOfSegmentsIds as $pos => $id ) { |
||
384 | |||
385 | /* |
||
386 | * This routine works to respect the positional orders of markers. |
||
387 | * In every cycle we check if the mrk of the segment is below or equal the last one. |
||
388 | * When this is true, means that the mrk id belongs to the next segment with the same internal_id |
||
389 | * so we MUST stop to apply markers and translations |
||
390 | * and stop to add eq_word_count |
||
391 | * |
||
392 | * Begin: |
||
393 | * pre-assign zero to the new mrk if this is the first one ( in this segment ) |
||
394 | * If it is null leave it NULL |
||
395 | */ |
||
396 | if ( (int)$this->segments[ $id ][ "mrk_id" ] < 0 && $this->segments[ $id ][ "mrk_id" ] !== null ) { |
||
397 | $this->segments[ $id ][ "mrk_id" ] = 0; |
||
398 | } |
||
399 | |||
400 | /* |
||
401 | * WARNING: |
||
402 | * For those seg-source that doesn't have a mrk ( having a mrk id === null ) |
||
403 | * ( null <= -1 ) === true |
||
404 | * so, cast to int |
||
405 | */ |
||
406 | if ( (int)$this->segments[ $id ][ "mrk_id" ] <= $lastMrkId ) { |
||
407 | break; |
||
408 | } |
||
409 | |||
410 | // set $this->currentSegment |
||
411 | $seg = $this->segments[ $id ]; |
||
412 | |||
413 | // update counts |
||
414 | if ( !empty( $seg ) ) { |
||
415 | $this->updateSegmentCounts( $seg ); |
||
416 | } |
||
417 | |||
418 | // delete translations so the prepareSegment |
||
419 | // will put source content in target tag |
||
420 | if ( $this->sourceInTarget ) { |
||
421 | $seg[ 'translation' ] = ''; |
||
422 | $this->resetCounts(); |
||
423 | } |
||
424 | |||
425 | // append $translation |
||
426 | $translation = $this->prepareTranslation( $seg, $translation ); |
||
427 | |||
428 | // for xliff 2 we need $this->transUnits[ $this->currentId ] [ $pos ] for populating metadata |
||
429 | |||
430 | unset( $this->transUnits[ $this->currentTransUnitId ] [ $pos ] ); |
||
431 | |||
432 | $lastMrkId = $this->segments[ $id ][ "mrk_id" ]; |
||
433 | |||
434 | list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState ); |
||
435 | } |
||
436 | } |
||
437 | |||
438 | //append translation |
||
439 | $targetLang = ''; |
||
440 | if ( $this->xliffVersion === 1 ) { |
||
441 | $targetLang = ' xml:lang="' . $this->targetLang . '"'; |
||
442 | } |
||
443 | |||
444 | $tag = $this->buildTranslateTag( $targetLang, $stateProp, $translation, $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
||
445 | } |
||
446 | |||
447 | // signal we are leaving a target |
||
448 | $this->targetWasWritten = true; |
||
449 | $this->inTarget = false; |
||
450 | $this->postProcAndFlush( $this->outputFP, $tag, $treatAsCDATA = true ); |
||
451 | } elseif ( in_array( $name, $this->nodesToCopy ) ) { // we are closing a critical CDATA section |
||
452 | |||
453 | $this->bufferIsActive = false; |
||
454 | |||
455 | // only for Xliff 2.* |
||
456 | // write here <mda:metaGroup> and <mda:meta> if already present in the <unit> |
||
457 | if ( 'mda:metadata' === $name && $this->unitContainsMda && $this->xliffVersion === 2 && !$this->hasWrittenCounts ) { |
||
458 | |||
459 | // we need to update counts here |
||
460 | $this->updateCounts(); |
||
461 | $this->hasWrittenCounts = true; |
||
462 | |||
463 | $tag = $this->CDATABuffer; |
||
464 | $tag .= $this->getWordCountGroupForXliffV2( $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ], false ); |
||
465 | $tag .= " </mda:metadata>"; |
||
466 | |||
467 | } else { |
||
468 | $tag = $this->CDATABuffer . "</$name>"; |
||
469 | } |
||
470 | |||
471 | $this->CDATABuffer = ""; |
||
472 | |||
473 | //flush to pointer |
||
474 | $this->postProcAndFlush( $this->outputFP, $tag ); |
||
475 | } elseif ( 'segment' === $name ) { |
||
476 | |||
477 | // only for Xliff 2.* |
||
478 | // if segment has no <target> add it BEFORE </segment> |
||
479 | if ( $this->xliffVersion === 2 && !$this->targetWasWritten ) { |
||
480 | |||
481 | $seg = $this->getCurrentSegment(); |
||
482 | |||
483 | // copy attr from <source> |
||
484 | $tag = '<target'; |
||
485 | foreach ( $this->sourceAttributes as $k => $v ) { |
||
486 | $tag .= " $k=\"$v\""; |
||
487 | } |
||
488 | |||
489 | $tag .= '>' . $seg[ 'translation' ] . '</target></segment>'; |
||
490 | } |
||
491 | |||
492 | $this->postProcAndFlush( $this->outputFP, $tag ); |
||
493 | |||
494 | // we are leaving <segment>, reset $segmentHasTarget |
||
495 | $this->targetWasWritten = false; |
||
496 | |||
497 | } elseif ( $name === 'trans-unit' ) { |
||
498 | |||
499 | // only for Xliff 1.* |
||
500 | // handling </trans-unit> closure |
||
501 | if ( !$this->targetWasWritten ) { |
||
502 | $seg = $this->getCurrentSegment(); |
||
503 | $lastMrkState = null; |
||
504 | $stateProp = ''; |
||
505 | $tag = ''; |
||
506 | |||
507 | // if there is translation available insert <target> BEFORE </trans-unit> |
||
508 | if ( isset( $seg[ 'translation' ] ) ) { |
||
509 | list( $stateProp, $lastMrkState ) = $this->setTransUnitState( $seg, $stateProp, $lastMrkState ); |
||
510 | $tag .= $this->createTargetTag( $seg[ 'translation' ], $stateProp ); |
||
511 | } |
||
512 | |||
513 | $tag .= '</trans-unit>'; |
||
514 | $this->postProcAndFlush( $this->outputFP, $tag ); |
||
515 | } else { |
||
516 | $this->postProcAndFlush( $this->outputFP, '</trans-unit>' ); |
||
517 | } |
||
518 | } elseif ( $this->bufferIsActive ) { // this is a tag ( <g | <mrk ) inside a seg or seg-source tag |
||
519 | $this->CDATABuffer .= "</$name>"; |
||
520 | // Do NOT Flush |
||
521 | } else { //generic tag closure do Nothing |
||
522 | // flush to pointer |
||
523 | $this->postProcAndFlush( $this->outputFP, $tag ); |
||
524 | } |
||
525 | } elseif ( $this->CDATABuffer === '<note/>' && $this->bufferIsActive === true ) { |
||
526 | $this->postProcAndFlush( $this->outputFP, '<note/>' ); |
||
527 | $this->bufferIsActive = false; |
||
528 | $this->CDATABuffer = ''; |
||
529 | $this->isEmpty = false; |
||
530 | } else { |
||
531 | //ok, nothing to be done; reset flag for next coming tag |
||
532 | $this->isEmpty = false; |
||
533 | } |
||
534 | |||
535 | // check if we are leaving a <trans-unit> (xliff v1.*) or <unit> (xliff v2.*) |
||
536 | if ( $this->tuTagName === $name ) { |
||
537 | $this->currentTransUnitTranslate = null; |
||
538 | $this->inTU = false; |
||
539 | $this->segmentPositionInTu = -1; |
||
540 | $this->unitContainsMda = false; |
||
541 | $this->hasWrittenCounts = false; |
||
542 | $this->sourceAttributes = []; |
||
543 | } |
||
544 | } |
||
545 | |||
546 | /** |
||
547 | * Set the current segment array (with segment id and trans-unit id) |
||
548 | * |
||
549 | * @param array $listOfSegmentsIds |
||
550 | */ |
||
551 | private function setCurrentSegmentArray( array $listOfSegmentsIds = [] ) { |
||
567 | ]; |
||
568 | } |
||
569 | } |
||
570 | } |
||
571 | |||
572 | /** |
||
573 | * Update counts |
||
574 | */ |
||
575 | private function updateCounts() { |
||
599 | } |
||
600 | |||
601 | /** |
||
602 | * @param array $seg |
||
603 | */ |
||
604 | private function updateSegmentCounts( array $seg = [] ) { |
||
605 | $this->counts[ 'raw_word_count' ] += $seg[ 'raw_word_count' ]; |
||
606 | $this->counts[ 'eq_word_count' ] += ( floor( $seg[ 'eq_word_count' ] * 100 ) / 100 ); |
||
607 | } |
||
608 | |||
609 | private function resetCounts() { |
||
612 | } |
||
613 | |||
614 | /** |
||
615 | * prepare segment tagging for xliff insertion |
||
616 | * |
||
617 | * @param array $seg |
||
618 | * @param string $transUnitTranslation |
||
619 | * |
||
620 | * @return string |
||
621 | */ |
||
622 | protected function prepareTranslation( $seg, $transUnitTranslation = "" ) { |
||
623 | $endTags = ""; |
||
624 | |||
625 | $segment = Strings::removeDangerousChars( $seg [ 'segment' ] ); |
||
626 | $translation = Strings::removeDangerousChars( $seg [ 'translation' ] ); |
||
627 | $dataRefMap = ( isset( $seg[ 'data_ref_map' ] ) && $seg[ 'data_ref_map' ] !== null ) ? Strings::jsonToArray( $seg[ 'data_ref_map' ] ) : []; |
||
628 | |||
629 | if ( is_null( $seg [ 'translation' ] ) || $seg [ 'translation' ] == '' ) { |
||
630 | $translation = $segment; |
||
631 | } else { |
||
632 | if ( $this->callback instanceof XliffReplacerCallbackInterface ) { |
||
633 | $error = (isset($seg['error'])) ? $seg['error'] : null; |
||
634 | if ( $this->callback->thereAreErrors( $seg[ 'sid' ], $segment, $translation, $dataRefMap, $error ) ) { |
||
635 | $translation = '|||UNTRANSLATED_CONTENT_START|||' . $segment . '|||UNTRANSLATED_CONTENT_END|||'; |
||
636 | } |
||
637 | } |
||
638 | } |
||
639 | |||
640 | // for xliff v2 we ignore the marks on purpose |
||
641 | if ( $this->xliffVersion === 2 ) { |
||
642 | return $translation; |
||
643 | } |
||
644 | |||
645 | if ( $seg[ 'mrk_id' ] !== null && $seg[ 'mrk_id' ] != '' ) { |
||
646 | if ( $this->targetLang === 'ja-JP' ) { |
||
647 | $seg[ 'mrk_succ_tags' ] = ltrim( $seg[ 'mrk_succ_tags' ] ); |
||
648 | } |
||
649 | |||
650 | $translation = "<mrk mid=\"" . $seg[ 'mrk_id' ] . "\" mtype=\"seg\">" . $seg[ 'mrk_prev_tags' ] . $translation . $seg[ 'mrk_succ_tags' ] . "</mrk>"; |
||
651 | } |
||
652 | |||
653 | $transUnitTranslation .= $seg[ 'prev_tags' ] . $translation . $endTags . $seg[ 'succ_tags' ]; |
||
654 | |||
655 | return $transUnitTranslation; |
||
656 | } |
||
657 | |||
658 | /** |
||
659 | * @param $targetLang |
||
660 | * @param $stateProp |
||
661 | * @param $translation |
||
662 | * @param $rawWordCount |
||
663 | * @param $eqWordCount |
||
664 | * |
||
665 | * @return string |
||
666 | */ |
||
667 | private function buildTranslateTag( $targetLang, $stateProp, $translation, $rawWordCount, $eqWordCount ) { |
||
668 | switch ( $this->xliffVersion ) { |
||
669 | case 1: |
||
670 | default: |
||
671 | $tag = "<target $targetLang $stateProp>$translation</target>"; |
||
672 | |||
673 | // if it's a Trados file don't append count group |
||
674 | if ( get_class( $this ) !== SdlXliffSAXTranslationReplacer::class ) { |
||
675 | $tag .= $this->getWordCountGroup( $rawWordCount, $eqWordCount ); |
||
676 | } |
||
677 | |||
678 | return $tag; |
||
679 | |||
680 | case 2: |
||
681 | return "<target>$translation</target>"; |
||
682 | } |
||
683 | } |
||
684 | |||
685 | /** |
||
686 | * @param $raw_word_count |
||
687 | * @param $eq_word_count |
||
688 | * |
||
689 | * @return string |
||
690 | */ |
||
691 | private function getWordCountGroup( $raw_word_count, $eq_word_count ) { |
||
692 | return "\n<count-group name=\"$this->currentTransUnitId\"><count count-type=\"x-matecat-raw\">$raw_word_count</count><count count-type=\"x-matecat-weighted\">$eq_word_count</count></count-group>"; |
||
693 | } |
||
694 | |||
695 | /** |
||
696 | * @return array |
||
697 | */ |
||
698 | private function getCurrentSegment() { |
||
699 | if ( $this->currentTransUnitTranslate === 'yes' && isset( $this->transUnits[ $this->currentTransUnitId ] ) ) { |
||
700 | $index = $this->transUnits[ $this->currentTransUnitId ][ $this->segmentPositionInTu ]; |
||
701 | |||
702 | if ( isset( $this->segments[ $index ] ) ) { |
||
703 | return $this->segments[ $index ]; |
||
704 | } |
||
705 | } |
||
706 | |||
707 | return []; |
||
708 | } |
||
709 | |||
710 | /** |
||
711 | * This function create a <target> |
||
712 | * |
||
713 | * @param $translation |
||
714 | * @param $stateProp |
||
715 | * |
||
716 | * @return string |
||
717 | */ |
||
718 | private function createTargetTag( $translation, $stateProp ) { |
||
719 | $targetLang = 'xml:lang="' . $this->targetLang . '"'; |
||
720 | |||
721 | return $this->buildTranslateTag( $targetLang, $stateProp, $translation, $this->counts[ 'raw_word_count' ], $this->counts[ 'eq_word_count' ] ); |
||
722 | } |
||
723 | |||
724 | /** |
||
725 | * @param $raw_word_count |
||
726 | * @param $eq_word_count |
||
727 | * @param bool $withMetadataTag |
||
728 | * |
||
729 | * @return string |
||
730 | */ |
||
731 | private function getWordCountGroupForXliffV2( $raw_word_count, $eq_word_count, $withMetadataTag = true ) { |
||
747 | </mda:metaGroup> |
||
748 | </mda:metadata>"; |
||
749 | } |
||
750 | |||
751 | /** |
||
752 | * @param $seg |
||
753 | * @param $state_prop |
||
754 | * @param $lastMrkState |
||
755 | * |
||
756 | * @return array |
||
757 | */ |
||
758 | private function setTransUnitState( $seg, $state_prop, $lastMrkState ) { |
||
759 | switch ( $seg[ 'status' ] ) { |
||
760 | |||
761 | case TranslationStatus::STATUS_FIXED: |
||
762 | case TranslationStatus::STATUS_APPROVED: |
||
763 | if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_APPROVED ) { |
||
764 | $state_prop = ( $this->xliffVersion === 2 ) ? "state=\"reviewed\"" : "state=\"signed-off\""; |
||
765 | $lastMrkState = TranslationStatus::STATUS_APPROVED; |
||
766 | } |
||
767 | break; |
||
768 | |||
769 | case TranslationStatus::STATUS_TRANSLATED: |
||
770 | if ( $lastMrkState == null || $lastMrkState == TranslationStatus::STATUS_TRANSLATED || $lastMrkState == TranslationStatus::STATUS_APPROVED ) { |
||
771 | $state_prop = "state=\"translated\""; |
||
772 | $lastMrkState = TranslationStatus::STATUS_TRANSLATED; |
||
773 | } |
||
774 | break; |
||
775 | |||
776 | case TranslationStatus::STATUS_REJECTED: // if there is a mark REJECTED and there is not a DRAFT, all the trans-unit is REJECTED. In V2 there is no way to mark |
||
777 | case TranslationStatus::STATUS_REBUTTED: |
||
778 | if ( ( $lastMrkState == null ) || ( $lastMrkState != TranslationStatus::STATUS_NEW || $lastMrkState != TranslationStatus::STATUS_DRAFT ) ) { |
||
779 | $state_prop = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"needs-review-translation\""; |
||
780 | $lastMrkState = TranslationStatus::STATUS_REJECTED; |
||
781 | } |
||
782 | break; |
||
783 | |||
784 | case TranslationStatus::STATUS_NEW: |
||
785 | if ( ( $lastMrkState == null ) || $lastMrkState != TranslationStatus::STATUS_DRAFT ) { |
||
786 | $state_prop = ( $this->xliffVersion === 2 ) ? "state=\"initial\"" : "state=\"new\""; |
||
787 | $lastMrkState = TranslationStatus::STATUS_NEW; |
||
788 | } |
||
789 | break; |
||
790 | |||
791 | case TranslationStatus::STATUS_DRAFT: |
||
792 | $state_prop = "state=\"needs-translation\""; |
||
793 | $lastMrkState = TranslationStatus::STATUS_DRAFT; |
||
794 | break; |
||
795 | default: |
||
796 | // this is the case when a segment is not showed in cattool, so the row in |
||
797 | // segment_translations does not exists and |
||
798 | // ---> $seg[ 'status' ] is NULL |
||
799 | if ( $lastMrkState == null ) { //this is the first MRK ID |
||
800 | $state_prop = "state=\"translated\""; |
||
801 | $lastMrkState = TranslationStatus::STATUS_TRANSLATED; |
||
802 | } else { |
||
803 | /* Do nothing and preserve the last state */ |
||
804 | } |
||
805 | break; |
||
806 | } |
||
807 | |||
808 | return [ $state_prop, $lastMrkState ]; |
||
809 | } |
||
810 | |||
811 | /** |
||
812 | * @inheritDoc |
||
813 | */ |
||
814 | protected function characterData( $parser, $data ) { |
||
820 | } |
||
821 | } |
||
822 | } |
||
823 |
PHP has two types of connecting operators (logical operators, and boolean operators):
and
&&
or
||
The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like
&&
, or||
.Let’s take a look at a few examples:
Logical Operators are used for Control-Flow
One case where you explicitly want to use logical operators is for control-flow such as this:
Since
die
introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined withthrow
at this point:These limitations lead to logical operators rarely being of use in current PHP code.