MateCatFilter::fromLayer1ToLayer2()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 6
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 10
ccs 7
cts 7
cp 1
crap 1
rs 10
1
<?php
2
3
namespace Matecat\SubFiltering;
4
5
use Exception;
6
use Matecat\SubFiltering\Commons\Pipeline;
7
use Matecat\SubFiltering\Filters\CtrlCharsPlaceHoldToAscii;
8
use Matecat\SubFiltering\Filters\DataRefReplace;
9
use Matecat\SubFiltering\Filters\DataRefRestore;
10
use Matecat\SubFiltering\Filters\EmojiToEntity;
11
use Matecat\SubFiltering\Filters\EncodeControlCharsInXliff;
12
use Matecat\SubFiltering\Filters\EntityToEmoji;
13
use Matecat\SubFiltering\Filters\FromLayer2ToRawXML;
14
use Matecat\SubFiltering\Filters\LtGtEncode;
15
use Matecat\SubFiltering\Filters\PlaceHoldXliffTags;
16
use Matecat\SubFiltering\Filters\RemoveDangerousChars;
17
use Matecat\SubFiltering\Filters\RestorePlaceHoldersToXLIFFLtGt;
18
use Matecat\SubFiltering\Filters\RestoreXliffTagsContent;
19
use Matecat\SubFiltering\Filters\SpecialEntitiesToPlaceholdersForView;
20
21
/**
22
 * Class Filter
23
 *
24
 * This class is meant to create subfiltering layers to allow data to be safely sent and received from 2 different Layers and real file
25
 *
26
 * # Definitions
27
 *
28
 * - Raw file, the real XML file in input, with data in XML
29
 * - Layer 0 is defined to be the Database. The data stored in the database should be in the same form (sanitized if needed) they come from Xliff file
30
 * - Layer 1 is defined to be external services and resources, for example, MT/TM server. This layer is different from layer 0, HTML subfiltering is applied here
31
 * - Layer 2 is defined to be the MayeCat UI.
32
 *
33
 * # Constraints
34
 * - We have to maintain the compatibility with PH tags placed inside the XLIff in the form <ph id="[0-9+]" equiv-text="&lt;br/&gt;"/> .
35
 *     Those tags are placed into the database as XML.
36
 * - HTML and other variables like android tags and custom features are placed into the database as encoded HTML &lt;br/&gt;
37
 *
38
 * - Data sent to the external services like MT/TM are sub-filtered:
39
 * -- &lt;br/&gt; become <ph id="mtc_[0-9]+" equiv-text="base64:Jmx0O2JyLyZndDs="/>
40
 * -- Existent tags in the XLIFF like <ph id="[0-9+]" equiv-text="&lt;br/&gt;"/> will leave as is
41
 *
42
 *
43
 * @package SubFiltering
44
 */
45
class MateCatFilter extends AbstractFilter {
46
47
    /**
48
     * Transforms database raw XML content (Layer 0) to intermediate structures (Layer 1).
49
     *
50
     * @param string      $segment The data segment to transform from Layer 0 to Layer 1.
51
     * @param string|null $cid     Optional context identifier for the transformation process.
52
     *
53
     * @return string The transformed segment in Layer 1 format.
54
     * @throws Exception If the transformation process fails.
55
     */
56 75
    public function fromLayer0ToLayer1( string $segment, ?string $cid = null ): string {
57
58 75
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
59
60 75
        $this->configureFromLayer0ToLayer1Pipeline( $channel );
61
62
        // Allow the feature set to modify the pipeline for this specific transformation.
63
        /** @var $channel Pipeline */
64 75
        $channel = $this->featureSet->filter( 'fromLayer0ToLayer1', $channel );
65
66
        // Process the segment and return the result.
67 75
        return $channel->transform( $segment );
68
69
    }
70
71
    /**
72
     * Used to transform database raw XML content (Layer 0) to the UI structures (Layer 2)
73
     *
74
     * @param string $segment
75
     *
76
     * @return string
77
     * @throws Exception
78
     */
79 41
    public function fromLayer0ToLayer2( string $segment ): string {
80 41
        return $this->fromLayer1ToLayer2(
81 41
                $this->fromLayer0ToLayer1( $segment )
82 41
        );
83
    }
84
85
    /**
86
     * Used to transform database raw XML content (Layer 0) to the UI structures (Layer 2)
87
     *
88
     * @param string $segment
89
     *
90
     * @return string
91
     * @throws Exception
92
     */
93 57
    public function fromLayer1ToLayer2( string $segment ): string {
94 57
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
95 57
        $channel->addLast( SpecialEntitiesToPlaceholdersForView::class );
96 57
        $channel->addLast( EntityToEmoji::class );
97 57
        $channel->addLast( DataRefReplace::class );
98
99
        /** @var $channel Pipeline */
100 57
        $channel = $this->featureSet->filter( 'fromLayer1ToLayer2', $channel );
101
102 57
        return $channel->transform( $segment );
103
    }
104
105
    /**
106
     * Used to transform UI data (Layer 2) to the XML structures (Layer 1)
107
     *
108
     * @param string $segment
109
     *
110
     * @return string
111
     * @throws Exception
112
     */
113 38
    public function fromLayer2ToLayer1( string $segment ): string {
114 38
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
115 38
        $channel->addLast( CtrlCharsPlaceHoldToAscii::class );
116 38
        $channel->addLast( PlaceHoldXliffTags::class );
117 38
        $channel->addLast( FromLayer2TorawXML::class );
118 38
        $channel->addLast( EmojiToEntity::class );
119 38
        $channel->addLast( RestoreXliffTagsContent::class );
120 38
        $channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class );
121 38
        $channel->addLast( DataRefRestore::class );
122
123
        /** @var $channel Pipeline */
124 38
        $channel = $this->featureSet->filter( 'fromLayer2ToLayer1', $channel );
125
126 38
        return $channel->transform( $segment );
127
    }
128
129
    /**
130
     *
131
     * Used to transform the UI structures (Layer 2) to allow them to be stored in the database (Layer 0)
132
     *
133
     * It is assumed that the UI sends strings having XLF tags not encoded and HTML in XML encoding representation:
134
     * - &lt;b&gt;de <ph id="mtc_1" equiv-text="base64:JTEkcw=="/>, <x id="1" /> &lt;/b&gt;que
135
     *
136
     * @param string $segment
137
     *
138
     * @return string
139
     * @throws Exception
140
     */
141 38
    public function fromLayer2ToLayer0( string $segment ): string {
142 38
        return $this->fromLayer1ToLayer0(
143 38
                $this->fromLayer2ToLayer1( $segment )
144 38
        );
145
    }
146
147
    /**
148
     * Transforms content from UI structures (Layer 1) back to database raw XML content (Layer 0).
149
     *
150
     * @param string $segment The segment of content to be transformed from Layer 1 to Layer 0.
151
     *
152
     * @return string The resulting transformed content in Layer 0 format.
153
     * @throws Exception
154
     */
155 69
    public function fromLayer1ToLayer0( string $segment ): string {
156 69
        return parent::fromLayer1ToLayer0( $segment );
157
    }
158
159
    /**
160
     * Used to convert the raw XLIFF content from the file to an XML for the database (Layer 0)
161
     *
162
     * @param string $segment
163
     *
164
     * @return string
165
     * @throws Exception
166
     */
167 11
    public function fromRawXliffToLayer0( string $segment ): string {
168 11
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
169 11
        $channel->addLast( RemoveDangerousChars::class );
170 11
        $channel->addLast( PlaceHoldXliffTags::class );
171 11
        $channel->addLast( EncodeControlCharsInXliff::class );
172 11
        $channel->addLast( RestoreXliffTagsContent::class );
173 11
        $channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class );
174
175
        /** @var $channel Pipeline */
176 11
        $channel = $this->featureSet->filter( 'fromRawXliffToLayer0', $channel );
177
178 11
        return $channel->transform( $segment );
179
    }
180
181
    /**
182
     * Used to export Database XML string into TMX files as valid XML
183
     *
184
     * @param string $segment
185
     *
186
     * @return string
187
     * @throws Exception
188
     */
189 5
    public function fromLayer0ToRawXliff( string $segment ): string {
190 5
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
191 5
        $channel->addLast( PlaceHoldXliffTags::class );
192 5
        $channel->addLast( RemoveDangerousChars::class );
193 5
        $channel->addLast( RestoreXliffTagsContent::class );
194 5
        $channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class );
195 5
        $channel->addLast( LtGtEncode::class );
196
197
        /** @var $channel Pipeline */
198 5
        $channel = $this->featureSet->filter( 'fromLayer0ToRawXliff', $channel );
199
200 5
        return $channel->transform( $segment );
201
    }
202
203
    /**
204
     * Used to align the tags when created from Layer 0 to Layer 1, when converting data from the database is possible that HTML placeholders are in different positions
205
     * and their id are different because they are simple sequences.
206
     * We must place the right source tag ID in the corresponding target tags.
207
     *
208
     * The source holds the truth :D
209
     * realigns the target ids by matching the content of the base64.
210
     *
211
     * @param string $source
212
     * @param string $target
213
     *
214
     * @return string
215
     * @see getSegmentsController in matecat
216
     *
217
     */
218 5
    public function realignIDInLayer1( string $source, string $target ): string {
219 5
        $pattern = '|<ph id ?= ?["\'](mtc_[0-9]+)["\'] ?(equiv-text=["\'].+?["\'] ?)/>|ui';
220 5
        preg_match_all( $pattern, $source, $src_tags, PREG_PATTERN_ORDER );
221 5
        preg_match_all( $pattern, $target, $trg_tags, PREG_PATTERN_ORDER );
222
223 5
        if ( count( $src_tags[ 0 ] ) != count( $trg_tags[ 0 ] ) ) {
224 1
            return $target; //WRONG NUMBER OF TAGS, in the translation there is a tag mismatch, let the user fix it
225
        }
226
227 4
        $notFoundTargetTags = [];
228
229 4
        $start_offset = 0;
230 4
        foreach ( $trg_tags[ 2 ] as $trg_tag_position => $b64 ) {
231
232 3
            $src_tag_position = array_search( $b64, $src_tags[ 2 ], true );
233
234 3
            if ( $src_tag_position === false ) {
235
                //this means that the content of a tag is changed in the translation
236 2
                $notFoundTargetTags[ $trg_tag_position ] = $b64;
237 2
                continue;
238
            } else {
239 2
                unset( $src_tags[ 2 ][ $src_tag_position ] ); // remove the index to allow array_search to find the equal next one if it is present
240
            }
241
242
            //replace ONLY ONE element AND the EXACT ONE
243 2
            $tag_position_in_string = strpos( $target, $trg_tags[ 0 ][ $trg_tag_position ], $start_offset );
244 2
            $target                 = (string)substr_replace( $target, $src_tags[ 0 ][ $src_tag_position ], $tag_position_in_string, strlen( $trg_tags[ 0 ][ $trg_tag_position ] ) );
245 2
            $start_offset           = $tag_position_in_string + strlen( $src_tags[ 0 ][ $src_tag_position ] ); // set the next starting point
246
        }
247
248 4
        if ( !empty( $notFoundTargetTags ) ) {
249
            //do something ?!? how to re-align if they are changed in value and changed in position?
250
        }
251
252 4
        return $target;
253
    }
254
}