|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Matecat\SubFiltering; |
|
4
|
|
|
|
|
5
|
|
|
use Exception; |
|
6
|
|
|
use Matecat\SubFiltering\Commons\Pipeline; |
|
7
|
|
|
use Matecat\SubFiltering\Filters\CtrlCharsPlaceHoldToAscii; |
|
8
|
|
|
use Matecat\SubFiltering\Filters\DataRefReplace; |
|
9
|
|
|
use Matecat\SubFiltering\Filters\DataRefRestore; |
|
10
|
|
|
use Matecat\SubFiltering\Filters\EmojiToEntity; |
|
11
|
|
|
use Matecat\SubFiltering\Filters\EncodeControlCharsInXliff; |
|
12
|
|
|
use Matecat\SubFiltering\Filters\EntityToEmoji; |
|
13
|
|
|
use Matecat\SubFiltering\Filters\FromLayer2ToRawXML; |
|
14
|
|
|
use Matecat\SubFiltering\Filters\LtGtEncode; |
|
15
|
|
|
use Matecat\SubFiltering\Filters\PlaceHoldXliffTags; |
|
16
|
|
|
use Matecat\SubFiltering\Filters\RemoveDangerousChars; |
|
17
|
|
|
use Matecat\SubFiltering\Filters\RestorePlaceHoldersToXLIFFLtGt; |
|
18
|
|
|
use Matecat\SubFiltering\Filters\RestoreXliffTagsContent; |
|
19
|
|
|
use Matecat\SubFiltering\Filters\SpecialEntitiesToPlaceholdersForView; |
|
20
|
|
|
|
|
21
|
|
|
/** |
|
22
|
|
|
* Class Filter |
|
23
|
|
|
* |
|
24
|
|
|
* This class is meant to create subfiltering layers to allow data to be safely sent and received from 2 different Layers and real file |
|
25
|
|
|
* |
|
26
|
|
|
* # Definitions |
|
27
|
|
|
* |
|
28
|
|
|
* - Raw file, the real XML file in input, with data in XML |
|
29
|
|
|
* - Layer 0 is defined to be the Database. The data stored in the database should be in the same form (sanitized if needed) they come from Xliff file |
|
30
|
|
|
* - Layer 1 is defined to be external services and resources, for example, MT/TM server. This layer is different from layer 0, HTML subfiltering is applied here |
|
31
|
|
|
* - Layer 2 is defined to be the MayeCat UI. |
|
32
|
|
|
* |
|
33
|
|
|
* # Constraints |
|
34
|
|
|
* - We have to maintain the compatibility with PH tags placed inside the XLIff in the form <ph id="[0-9+]" equiv-text="<br/>"/> . |
|
35
|
|
|
* Those tags are placed into the database as XML. |
|
36
|
|
|
* - HTML and other variables like android tags and custom features are placed into the database as encoded HTML <br/> |
|
37
|
|
|
* |
|
38
|
|
|
* - Data sent to the external services like MT/TM are sub-filtered: |
|
39
|
|
|
* -- <br/> become <ph id="mtc_[0-9]+" equiv-text="base64:Jmx0O2JyLyZndDs="/> |
|
40
|
|
|
* -- Existent tags in the XLIFF like <ph id="[0-9+]" equiv-text="<br/>"/> will leave as is |
|
41
|
|
|
* |
|
42
|
|
|
* |
|
43
|
|
|
* @package SubFiltering |
|
44
|
|
|
*/ |
|
45
|
|
|
class MateCatFilter extends AbstractFilter { |
|
46
|
|
|
|
|
47
|
|
|
/** |
|
48
|
|
|
* Transforms database raw XML content (Layer 0) to intermediate structures (Layer 1). |
|
49
|
|
|
* |
|
50
|
|
|
* @param string $segment The data segment to transform from Layer 0 to Layer 1. |
|
51
|
|
|
* @param string|null $cid Optional context identifier for the transformation process. |
|
52
|
|
|
* |
|
53
|
|
|
* @return string The transformed segment in Layer 1 format. |
|
54
|
|
|
* @throws Exception If the transformation process fails. |
|
55
|
|
|
*/ |
|
56
|
75 |
|
public function fromLayer0ToLayer1( string $segment, ?string $cid = null ): string { |
|
57
|
|
|
|
|
58
|
75 |
|
$channel = new Pipeline( $this->source, $this->target, $this->dataRefMap ); |
|
59
|
|
|
|
|
60
|
75 |
|
$this->configureFromLayer0ToLayer1Pipeline( $channel ); |
|
61
|
|
|
|
|
62
|
|
|
// Allow the feature set to modify the pipeline for this specific transformation. |
|
63
|
|
|
/** @var $channel Pipeline */ |
|
64
|
75 |
|
$channel = $this->featureSet->filter( 'fromLayer0ToLayer1', $channel ); |
|
65
|
|
|
|
|
66
|
|
|
// Process the segment and return the result. |
|
67
|
75 |
|
return $channel->transform( $segment ); |
|
68
|
|
|
|
|
69
|
|
|
} |
|
70
|
|
|
|
|
71
|
|
|
/** |
|
72
|
|
|
* Used to transform database raw XML content (Layer 0) to the UI structures (Layer 2) |
|
73
|
|
|
* |
|
74
|
|
|
* @param string $segment |
|
75
|
|
|
* |
|
76
|
|
|
* @return string |
|
77
|
|
|
* @throws Exception |
|
78
|
|
|
*/ |
|
79
|
41 |
|
public function fromLayer0ToLayer2( string $segment ): string { |
|
80
|
41 |
|
return $this->fromLayer1ToLayer2( |
|
81
|
41 |
|
$this->fromLayer0ToLayer1( $segment ) |
|
82
|
41 |
|
); |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
/** |
|
86
|
|
|
* Used to transform database raw XML content (Layer 0) to the UI structures (Layer 2) |
|
87
|
|
|
* |
|
88
|
|
|
* @param string $segment |
|
89
|
|
|
* |
|
90
|
|
|
* @return string |
|
91
|
|
|
* @throws Exception |
|
92
|
|
|
*/ |
|
93
|
57 |
|
public function fromLayer1ToLayer2( string $segment ): string { |
|
94
|
57 |
|
$channel = new Pipeline( $this->source, $this->target, $this->dataRefMap ); |
|
95
|
57 |
|
$channel->addLast( SpecialEntitiesToPlaceholdersForView::class ); |
|
96
|
57 |
|
$channel->addLast( EntityToEmoji::class ); |
|
97
|
57 |
|
$channel->addLast( DataRefReplace::class ); |
|
98
|
|
|
|
|
99
|
|
|
/** @var $channel Pipeline */ |
|
100
|
57 |
|
$channel = $this->featureSet->filter( 'fromLayer1ToLayer2', $channel ); |
|
101
|
|
|
|
|
102
|
57 |
|
return $channel->transform( $segment ); |
|
103
|
|
|
} |
|
104
|
|
|
|
|
105
|
|
|
/** |
|
106
|
|
|
* Used to transform UI data (Layer 2) to the XML structures (Layer 1) |
|
107
|
|
|
* |
|
108
|
|
|
* @param string $segment |
|
109
|
|
|
* |
|
110
|
|
|
* @return string |
|
111
|
|
|
* @throws Exception |
|
112
|
|
|
*/ |
|
113
|
38 |
|
public function fromLayer2ToLayer1( string $segment ): string { |
|
114
|
38 |
|
$channel = new Pipeline( $this->source, $this->target, $this->dataRefMap ); |
|
115
|
38 |
|
$channel->addLast( CtrlCharsPlaceHoldToAscii::class ); |
|
116
|
38 |
|
$channel->addLast( PlaceHoldXliffTags::class ); |
|
117
|
38 |
|
$channel->addLast( FromLayer2TorawXML::class ); |
|
118
|
38 |
|
$channel->addLast( EmojiToEntity::class ); |
|
119
|
38 |
|
$channel->addLast( RestoreXliffTagsContent::class ); |
|
120
|
38 |
|
$channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class ); |
|
121
|
38 |
|
$channel->addLast( DataRefRestore::class ); |
|
122
|
|
|
|
|
123
|
|
|
/** @var $channel Pipeline */ |
|
124
|
38 |
|
$channel = $this->featureSet->filter( 'fromLayer2ToLayer1', $channel ); |
|
125
|
|
|
|
|
126
|
38 |
|
return $channel->transform( $segment ); |
|
127
|
|
|
} |
|
128
|
|
|
|
|
129
|
|
|
/** |
|
130
|
|
|
* |
|
131
|
|
|
* Used to transform the UI structures (Layer 2) to allow them to be stored in the database (Layer 0) |
|
132
|
|
|
* |
|
133
|
|
|
* It is assumed that the UI sends strings having XLF tags not encoded and HTML in XML encoding representation: |
|
134
|
|
|
* - <b>de <ph id="mtc_1" equiv-text="base64:JTEkcw=="/>, <x id="1" /> </b>que |
|
135
|
|
|
* |
|
136
|
|
|
* @param string $segment |
|
137
|
|
|
* |
|
138
|
|
|
* @return string |
|
139
|
|
|
* @throws Exception |
|
140
|
|
|
*/ |
|
141
|
38 |
|
public function fromLayer2ToLayer0( string $segment ): string { |
|
142
|
38 |
|
return $this->fromLayer1ToLayer0( |
|
143
|
38 |
|
$this->fromLayer2ToLayer1( $segment ) |
|
144
|
38 |
|
); |
|
145
|
|
|
} |
|
146
|
|
|
|
|
147
|
|
|
/** |
|
148
|
|
|
* Transforms content from UI structures (Layer 1) back to database raw XML content (Layer 0). |
|
149
|
|
|
* |
|
150
|
|
|
* @param string $segment The segment of content to be transformed from Layer 1 to Layer 0. |
|
151
|
|
|
* |
|
152
|
|
|
* @return string The resulting transformed content in Layer 0 format. |
|
153
|
|
|
* @throws Exception |
|
154
|
|
|
*/ |
|
155
|
69 |
|
public function fromLayer1ToLayer0( string $segment ): string { |
|
156
|
69 |
|
return parent::fromLayer1ToLayer0( $segment ); |
|
157
|
|
|
} |
|
158
|
|
|
|
|
159
|
|
|
/** |
|
160
|
|
|
* Used to convert the raw XLIFF content from the file to an XML for the database (Layer 0) |
|
161
|
|
|
* |
|
162
|
|
|
* @param string $segment |
|
163
|
|
|
* |
|
164
|
|
|
* @return string |
|
165
|
|
|
* @throws Exception |
|
166
|
|
|
*/ |
|
167
|
11 |
|
public function fromRawXliffToLayer0( string $segment ): string { |
|
168
|
11 |
|
$channel = new Pipeline( $this->source, $this->target, $this->dataRefMap ); |
|
169
|
11 |
|
$channel->addLast( RemoveDangerousChars::class ); |
|
170
|
11 |
|
$channel->addLast( PlaceHoldXliffTags::class ); |
|
171
|
11 |
|
$channel->addLast( EncodeControlCharsInXliff::class ); |
|
172
|
11 |
|
$channel->addLast( RestoreXliffTagsContent::class ); |
|
173
|
11 |
|
$channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class ); |
|
174
|
|
|
|
|
175
|
|
|
/** @var $channel Pipeline */ |
|
176
|
11 |
|
$channel = $this->featureSet->filter( 'fromRawXliffToLayer0', $channel ); |
|
177
|
|
|
|
|
178
|
11 |
|
return $channel->transform( $segment ); |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
/** |
|
182
|
|
|
* Used to export Database XML string into TMX files as valid XML |
|
183
|
|
|
* |
|
184
|
|
|
* @param string $segment |
|
185
|
|
|
* |
|
186
|
|
|
* @return string |
|
187
|
|
|
* @throws Exception |
|
188
|
|
|
*/ |
|
189
|
5 |
|
public function fromLayer0ToRawXliff( string $segment ): string { |
|
190
|
5 |
|
$channel = new Pipeline( $this->source, $this->target, $this->dataRefMap ); |
|
191
|
5 |
|
$channel->addLast( PlaceHoldXliffTags::class ); |
|
192
|
5 |
|
$channel->addLast( RemoveDangerousChars::class ); |
|
193
|
5 |
|
$channel->addLast( RestoreXliffTagsContent::class ); |
|
194
|
5 |
|
$channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class ); |
|
195
|
5 |
|
$channel->addLast( LtGtEncode::class ); |
|
196
|
|
|
|
|
197
|
|
|
/** @var $channel Pipeline */ |
|
198
|
5 |
|
$channel = $this->featureSet->filter( 'fromLayer0ToRawXliff', $channel ); |
|
199
|
|
|
|
|
200
|
5 |
|
return $channel->transform( $segment ); |
|
201
|
|
|
} |
|
202
|
|
|
|
|
203
|
|
|
/** |
|
204
|
|
|
* Used to align the tags when created from Layer 0 to Layer 1, when converting data from the database is possible that HTML placeholders are in different positions |
|
205
|
|
|
* and their id are different because they are simple sequences. |
|
206
|
|
|
* We must place the right source tag ID in the corresponding target tags. |
|
207
|
|
|
* |
|
208
|
|
|
* The source holds the truth :D |
|
209
|
|
|
* realigns the target ids by matching the content of the base64. |
|
210
|
|
|
* |
|
211
|
|
|
* @param string $source |
|
212
|
|
|
* @param string $target |
|
213
|
|
|
* |
|
214
|
|
|
* @return string |
|
215
|
|
|
* @see getSegmentsController in matecat |
|
216
|
|
|
* |
|
217
|
|
|
*/ |
|
218
|
5 |
|
public function realignIDInLayer1( string $source, string $target ): string { |
|
219
|
5 |
|
$pattern = '|<ph id ?= ?["\'](mtc_[0-9]+)["\'] ?(equiv-text=["\'].+?["\'] ?)/>|ui'; |
|
220
|
5 |
|
preg_match_all( $pattern, $source, $src_tags, PREG_PATTERN_ORDER ); |
|
221
|
5 |
|
preg_match_all( $pattern, $target, $trg_tags, PREG_PATTERN_ORDER ); |
|
222
|
|
|
|
|
223
|
5 |
|
if ( count( $src_tags[ 0 ] ) != count( $trg_tags[ 0 ] ) ) { |
|
224
|
1 |
|
return $target; //WRONG NUMBER OF TAGS, in the translation there is a tag mismatch, let the user fix it |
|
225
|
|
|
} |
|
226
|
|
|
|
|
227
|
4 |
|
$notFoundTargetTags = []; |
|
228
|
|
|
|
|
229
|
4 |
|
$start_offset = 0; |
|
230
|
4 |
|
foreach ( $trg_tags[ 2 ] as $trg_tag_position => $b64 ) { |
|
231
|
|
|
|
|
232
|
3 |
|
$src_tag_position = array_search( $b64, $src_tags[ 2 ], true ); |
|
233
|
|
|
|
|
234
|
3 |
|
if ( $src_tag_position === false ) { |
|
235
|
|
|
//this means that the content of a tag is changed in the translation |
|
236
|
2 |
|
$notFoundTargetTags[ $trg_tag_position ] = $b64; |
|
237
|
2 |
|
continue; |
|
238
|
|
|
} else { |
|
239
|
2 |
|
unset( $src_tags[ 2 ][ $src_tag_position ] ); // remove the index to allow array_search to find the equal next one if it is present |
|
240
|
|
|
} |
|
241
|
|
|
|
|
242
|
|
|
//replace ONLY ONE element AND the EXACT ONE |
|
243
|
2 |
|
$tag_position_in_string = strpos( $target, $trg_tags[ 0 ][ $trg_tag_position ], $start_offset ); |
|
244
|
2 |
|
$target = (string)substr_replace( $target, $src_tags[ 0 ][ $src_tag_position ], $tag_position_in_string, strlen( $trg_tags[ 0 ][ $trg_tag_position ] ) ); |
|
245
|
2 |
|
$start_offset = $tag_position_in_string + strlen( $src_tags[ 0 ][ $src_tag_position ] ); // set the next starting point |
|
246
|
|
|
} |
|
247
|
|
|
|
|
248
|
4 |
|
if ( !empty( $notFoundTargetTags ) ) { |
|
249
|
|
|
//do something ?!? how to re-align if they are changed in value and changed in position? |
|
250
|
|
|
} |
|
251
|
|
|
|
|
252
|
4 |
|
return $target; |
|
253
|
|
|
} |
|
254
|
|
|
} |