Passed
Pull Request — master (#51)
by Domenico
02:05
created

MateCatFilter::fromLayer0ToLayer1()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 25
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 22
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 21
nc 1
nop 1
dl 0
loc 25
ccs 22
cts 22
cp 1
crap 1
rs 9.584
c 0
b 0
f 0
1
<?php
2
3
namespace Matecat\SubFiltering;
4
5
use Exception;
6
use Matecat\SubFiltering\Commons\Pipeline;
7
use Matecat\SubFiltering\Filters\CtrlCharsPlaceHoldToAscii;
8
use Matecat\SubFiltering\Filters\DataRefReplace;
9
use Matecat\SubFiltering\Filters\DataRefRestore;
10
use Matecat\SubFiltering\Filters\EmojiToEntity;
11
use Matecat\SubFiltering\Filters\EncodeControlCharsInXliff;
12
use Matecat\SubFiltering\Filters\EntityToEmoji;
13
use Matecat\SubFiltering\Filters\FromLayer2ToRawXML;
14
use Matecat\SubFiltering\Filters\LtGtEncode;
15
use Matecat\SubFiltering\Filters\PlaceHoldXliffTags;
16
use Matecat\SubFiltering\Filters\RemoveDangerousChars;
17
use Matecat\SubFiltering\Filters\RestorePlaceHoldersToXLIFFLtGt;
18
use Matecat\SubFiltering\Filters\RestoreXliffTagsContent;
19
use Matecat\SubFiltering\Filters\SpecialEntitiesToPlaceholdersForView;
20
21
/**
22
 * Class Filter
23
 *
24
 * This class is meant to create subfiltering layers to allow data to be safely sent and received from 2 different Layers and real file
25
 *
26
 * # Definitions
27
 *
28
 * - Raw file, the real XML file in input, with data in XML
29
 * - Layer 0 is defined to be the Database. The data stored in the database should be in the same form (sanitized if needed) they come from Xliff file
30
 * - Layer 1 is defined to be external services and resources, for example, MT/TM server. This layer is different from layer 0, HTML subfiltering is applied here
31
 * - Layer 2 is defined to be the MayeCat UI.
32
 *
33
 * # Constraints
34
 * - We have to maintain the compatibility with PH tags placed inside the XLIff in the form <ph id="[0-9+]" equiv-text="&lt;br/&gt;"/> .
35
 *     Those tags are placed into the database as XML.
36
 * - HTML and other variables like android tags and custom features are placed into the database as encoded HTML &lt;br/&gt;
37
 *
38
 * - Data sent to the external services like MT/TM are sub-filtered:
39
 * -- &lt;br/&gt; become <ph id="mtc_[0-9]+" equiv-text="base64:Jmx0O2JyLyZndDs="/>
40
 * -- Existent tags in the XLIFF like <ph id="[0-9+]" equiv-text="&lt;br/&gt;"/> will leave as is
41
 *
42
 *
43
 * @package SubFiltering
44
 */
45
class MateCatFilter extends AbstractFilter {
46
    /**
47
     * Used to transform database raw XML content (Layer 0) to the UI structures (Layer 2)
48
     *
49
     * @param string $segment
50
     *
51
     * @return string
52
     * @throws Exception
53
     */
54 40
    public function fromLayer0ToLayer2( string $segment ): string {
55 40
        return $this->fromLayer1ToLayer2(
56 40
                $this->fromLayer0ToLayer1( $segment )
57 40
        );
58
    }
59
60
    /**
61
     * Used to transform database raw XML content (Layer 0) to the UI structures (Layer 2)
62
     *
63
     * @param string $segment
64
     *
65
     * @return string
66
     * @throws Exception
67
     */
68 56
    public function fromLayer1ToLayer2( string $segment ): string {
69 56
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
70 56
        $channel->addLast( SpecialEntitiesToPlaceholdersForView::class );
71 56
        $channel->addLast( EntityToEmoji::class );
72 56
        $channel->addLast( DataRefReplace::class );
73
74
        /** @var $channel Pipeline */
75 56
        $channel = $this->featureSet->filter( 'fromLayer1ToLayer2', $channel );
76
77 56
        return $channel->transform( $segment );
78
    }
79
80
    /**
81
     * Used to transform UI data (Layer 2) to the XML structures (Layer 1)
82
     *
83
     * @param string $segment
84
     *
85
     * @return string
86
     * @throws Exception
87
     */
88 37
    public function fromLayer2ToLayer1( string $segment ): string {
89 37
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
90 37
        $channel->addLast( CtrlCharsPlaceHoldToAscii::class );
91 37
        $channel->addLast( PlaceHoldXliffTags::class );
92 37
        $channel->addLast( FromLayer2TorawXML::class );
93 37
        $channel->addLast( EmojiToEntity::class );
94 37
        $channel->addLast( RestoreXliffTagsContent::class );
95 37
        $channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class );
96 37
        $channel->addLast( DataRefRestore::class );
97
98
        /** @var $channel Pipeline */
99 37
        $channel = $this->featureSet->filter( 'fromLayer2ToLayer1', $channel );
100
101 37
        return $channel->transform( $segment );
102
    }
103
104
    /**
105
     *
106
     * Used to transform the UI structures (Layer 2) to allow them to be stored in the database (Layer 0)
107
     *
108
     * It is assumed that the UI sends strings having XLF tags not encoded and HTML in XML encoding representation:
109
     * - &lt;b&gt;de <ph id="mtc_1" equiv-text="base64:JTEkcw=="/>, <x id="1" /> &lt;/b&gt;que
110
     *
111
     * @param string $segment
112
     *
113
     * @return string
114
     * @throws Exception
115
     */
116 37
    public function fromLayer2ToLayer0( string $segment ): string {
117 37
        return $this->fromLayer1ToLayer0(
118 37
                $this->fromLayer2ToLayer1( $segment )
119 37
        );
120
    }
121
122
    /**
123
     * Transforms content from UI structures (Layer 1) back to database raw XML content (Layer 0).
124
     *
125
     * @param string $segment The segment of content to be transformed from Layer 1 to Layer 0.
126
     *
127
     * @return string The resulting transformed content in Layer 0 format.
128
     * @throws Exception
129
     */
130 68
    public function fromLayer1ToLayer0( string $segment ): string {
131 68
        return parent::fromLayer1ToLayer0( $segment );
132
    }
133
134
    /**
135
     * Used to convert the raw XLIFF content from the file to an XML for the database (Layer 0)
136
     *
137
     * @param string $segment
138
     *
139
     * @return string
140
     * @throws Exception
141
     */
142 11
    public function fromRawXliffToLayer0( string $segment ): string {
143 11
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
144 11
        $channel->addLast( RemoveDangerousChars::class );
145 11
        $channel->addLast( PlaceHoldXliffTags::class );
146 11
        $channel->addLast( EncodeControlCharsInXliff::class );
147 11
        $channel->addLast( RestoreXliffTagsContent::class );
148 11
        $channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class );
149
150
        /** @var $channel Pipeline */
151 11
        $channel = $this->featureSet->filter( 'fromRawXliffToLayer0', $channel );
152
153 11
        return $channel->transform( $segment );
154
    }
155
156
    /**
157
     * Used to export Database XML string into TMX files as valid XML
158
     *
159
     * @param string $segment
160
     *
161
     * @return string
162
     * @throws Exception
163
     */
164
    public function fromLayer0ToRawXliff( string $segment ): string {
165
        $channel = new Pipeline( $this->source, $this->target, $this->dataRefMap );
166
        $channel->addLast( PlaceHoldXliffTags::class );
167
        $channel->addLast( RemoveDangerousChars::class );
168
        $channel->addLast( RestoreXliffTagsContent::class );
169
        $channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class );
170
        $channel->addLast( LtGtEncode::class );
171
172
        /** @var $channel Pipeline */
173
        $channel = $this->featureSet->filter( 'fromLayer0ToRawXliff', $channel );
174
175
        return $channel->transform( $segment );
176
    }
177
178
    /**
179
     * Used to align the tags when created from Layer 0 to Layer 1, when converting data from the database is possible that HTML placeholders are in different positions
180
     * and their id are different because they are simple sequences.
181
     * We must place the right source tag ID in the corresponding target tags.
182
     *
183
     * The source holds the truth :D
184
     * realigns the target ids by matching the content of the base64.
185
     *
186
     * @param string $source
187
     * @param string $target
188
     *
189
     * @return string
190
     * @see getSegmentsController in matecat
191
     *
192
     */
193 5
    public function realignIDInLayer1( string $source, string $target ): string {
194 5
        $pattern = '|<ph id ?= ?["\'](mtc_[0-9]+)["\'] ?(equiv-text=["\'].+?["\'] ?)/>|ui';
195 5
        preg_match_all( $pattern, $source, $src_tags, PREG_PATTERN_ORDER );
196 5
        preg_match_all( $pattern, $target, $trg_tags, PREG_PATTERN_ORDER );
197
198 5
        if ( count( $src_tags[ 0 ] ) != count( $trg_tags[ 0 ] ) ) {
199 1
            return $target; //WRONG NUMBER OF TAGS, in the translation there is a tag mismatch, let the user fix it
200
        }
201
202 4
        $notFoundTargetTags = [];
203
204 4
        $start_offset = 0;
205 4
        foreach ( $trg_tags[ 2 ] as $trg_tag_position => $b64 ) {
206
207 3
            $src_tag_position = array_search( $b64, $src_tags[ 2 ], true );
208
209 3
            if ( $src_tag_position === false ) {
210
                //this means that the content of a tag is changed in the translation
211 2
                $notFoundTargetTags[ $trg_tag_position ] = $b64;
212 2
                continue;
213
            } else {
214 2
                unset( $src_tags[ 2 ][ $src_tag_position ] ); // remove the index to allow array_search to find the equal next one if it is present
215
            }
216
217
            //replace ONLY ONE element AND the EXACT ONE
218 2
            $tag_position_in_string = strpos( $target, $trg_tags[ 0 ][ $trg_tag_position ], $start_offset );
219 2
            $target                 = substr_replace( $target, $src_tags[ 0 ][ $src_tag_position ], $tag_position_in_string, strlen( $trg_tags[ 0 ][ $trg_tag_position ] ) );
220 2
            $start_offset           = $tag_position_in_string + strlen( $src_tags[ 0 ][ $src_tag_position ] ); // set the next starting point
221
        }
222
223 4
        if ( !empty( $notFoundTargetTags ) ) {
224
            //do something ?!? how to re-align if they are changed in value and changed in position?
225
        }
226
227 4
        return $target;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $target could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
228
    }
229
}