Passed
Push — master ( 0e1770...775632 )
by Mauro
03:12
created

AbstractXliffParser::extractTuFromNode()   B

Complexity

Conditions 9
Paths 17

Size

Total Lines 27
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 13
nc 17
nop 8
dl 0
loc 27
rs 8.0555
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
namespace Matecat\XliffParser\XliffParser;
4
5
use DOMDocument;
6
use DOMElement;
7
use DOMNode;
8
use Matecat\XliffParser\Constants\Placeholder;
9
use Matecat\XliffParser\Utils\Emoji;
10
use Matecat\XliffParser\Utils\Strings;
11
use Matecat\XliffParser\XliffUtils\DataRefReplacer;
12
use Psr\Log\LoggerInterface;
13
14
abstract class AbstractXliffParser {
15
16
    const MAX_GROUP_RECURSION_LEVEL = 5;
17
18
    /**
19
     * @var LoggerInterface
20
     */
21
    protected $logger;
22
23
    /**
24
     * @var string|null
25
     */
26
    protected $xliffProprietary;
27
28
    /**
29
     * @var int
30
     */
31
    protected $xliffVersion;
32
33
    /**
34
     * AbstractXliffParser constructor.
35
     *
36
     * @param int                  $xliffVersion
37
     * @param string|null          $xliffProprietary
38
     * @param LoggerInterface|null $logger
39
     */
40
    public function __construct( $xliffVersion, $xliffProprietary = null, LoggerInterface $logger = null ) {
41
        $this->xliffVersion     = $xliffVersion;
42
        $this->logger           = $logger;
43
        $this->xliffProprietary = $xliffProprietary;
44
    }
45
46
    /**
47
     * @return string
48
     */
49
    protected function getTuTagName() {
50
        return ( $this->xliffVersion === 1 ) ? 'trans-unit' : 'unit';
51
    }
52
53
    /**
54
     * @param DOMDocument $dom
55
     *
56
     * @return array
57
     */
58
    abstract public function parse( DOMDocument $dom, $output = [] );
59
60
    /**
61
     * Extract trans-unit content from the current node
62
     *
63
     * @param $childNode
64
     * @param              $transUnitIdArrayForUniquenessCheck
65
     * @param DOMDocument $dom
66
     * @param              $output
67
     * @param              $i
68
     * @param              $j
69
     * @param array $contextGroups
70
     * @param int $recursionLevel
71
     */
72
    protected function extractTuFromNode( $childNode, &$transUnitIdArrayForUniquenessCheck, DOMDocument $dom, &$output, &$i, &$j, $contextGroups = [], $recursionLevel = 0 ) {
73
        if ( $childNode->nodeName === 'group' ) {
74
75
            // add nested context-groups
76
            foreach ( $childNode->childNodes as $nestedChildNode ) {
77
                if ( $nestedChildNode->nodeName ===  'context-group' ) {
78
                    $contextGroups[] = $nestedChildNode;
79
                }
80
            }
81
82
            foreach ( $childNode->childNodes as $nestedChildNode ) {
83
84
                // nested groups
85
                if ( $nestedChildNode->nodeName === 'group' ) {
86
87
                    // avoid infinite recursion
88
                    $recursionLevel++;
89
                    if($recursionLevel < self::MAX_GROUP_RECURSION_LEVEL){
90
                        $this->extractTuFromNode( $nestedChildNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j, $contextGroups, $recursionLevel );
91
                    }
92
93
                } elseif ( $nestedChildNode->nodeName === $this->getTuTagName() ) {
94
                    $this->extractTransUnit( $nestedChildNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j, $contextGroups );
95
                }
96
            }
97
        } elseif ( $childNode->nodeName === $this->getTuTagName() ) {
98
            $this->extractTransUnit( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j, $contextGroups );
99
        }
100
    }
101
102
    /**
103
     * Extract and populate 'trans-units' array
104
     *
105
     * @param $transUnit
106
     * @param $transUnitIdArrayForUniquenessCheck
107
     * @param $dom
108
     * @param $output
109
     * @param $i
110
     * @param $j
111
     * @param $contextGroups
112
     *
113
     * @return mixed
114
     */
115
    abstract protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j,$contextGroups = [] );
116
117
    /**
118
     * @param DOMDocument $dom
119
     * @param DOMElement  $node
120
     *
121
     * @return array
122
     */
123
    protected function extractContent( DOMDocument $dom, DOMNode $node ) {
124
        return [
125
                'raw-content' => $this->extractTagContent( $dom, $node ),
126
                'attr'        => $this->extractTagAttributes( $node )
127
        ];
128
    }
129
130
    /**
131
     * Extract attributes if they are present
132
     *
133
     * Ex:
134
     * <p align=center style="font-size: 12px;">some text</p>
135
     *
136
     * $attr->nodeName == 'align' :: $attr->nodeValue == 'center'
137
     * $attr->nodeName == 'style' :: $attr->nodeValue == 'font-size: 12px;'
138
     *
139
     * @param DOMNode $element
140
     *
141
     * @return array
142
     */
143
    protected function extractTagAttributes( DOMNode $element ) {
144
        $tagAttributes = [];
145
146
        if ( $element->hasAttributes() ) {
147
            foreach ( $element->attributes as $attr ) {
148
                $tagAttributes[ $attr->nodeName ] = $attr->nodeValue;
149
            }
150
        }
151
152
        return $tagAttributes;
153
    }
154
155
    /**
156
     * Extract tag content from DOMDocument node
157
     *
158
     * @param DOMDocument $dom
159
     * @param DOMNode     $element
160
     *
161
     * @return string
162
     */
163
    protected function extractTagContent( DOMDocument $dom, DOMNode $element ) {
164
        $childNodes       = $element->hasChildNodes();
165
        $extractedContent = '';
166
167
        if ( !empty( $childNodes ) ) {
168
            foreach ( $element->childNodes as $node ) {
169
                $extractedContent .= Emoji::toEntity( Strings::fixNonWellFormedXml( $dom->saveXML( $node ) ) );
170
            }
171
        }
172
173
        return str_replace( Placeholder::EMPTY_TAG_PLACEHOLDER, '', $extractedContent );
174
    }
175
176
    /**
177
     * Used to extract <seg-source> and <seg-target>
178
     *
179
     * @param DOMDocument $dom
180
     * @param DOMElement  $childNode
181
     * @param string      $originalRawContent
182
     * @param array       $originalData
183
     *
184
     * @return array
185
     */
186
    protected function extractContentWithMarksAndExtTags( DOMDocument $dom, DOMElement $childNode, $originalRawContent, array $originalData = [] ) {
0 ignored issues
show
Unused Code introduced by
The parameter $originalRawContent is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

186
    protected function extractContentWithMarksAndExtTags( DOMDocument $dom, DOMElement $childNode, /** @scrutinizer ignore-unused */ $originalRawContent, array $originalData = [] ) {

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
187
        $source = [];
188
189
        // example:
190
        // <g id="1"><mrk mid="0" mtype="seg">An English string with g tags</mrk></g>
191
        $raw = $this->extractTagContent( $dom, $childNode );
192
193
        $markers = preg_split( '#<mrk\s#si', $raw, -1 );
194
195
        $mi = 0;
196
        while ( isset( $markers[ $mi + 1 ] ) ) {
197
            unset( $mid );
198
199
            preg_match( '|mid\s?=\s?["\'](.*?)["\']|si', $markers[ $mi + 1 ], $mid );
200
201
            // if it's a Trados file the trailing spaces after </mrk> are meaningful
202
            // so we add them to
203
            $trailingSpaces = '';
204
            if ( $this->xliffProprietary === 'trados' ) {
205
                preg_match_all( '/<\/mrk>[\s]+/iu', $markers[ $mi + 1 ], $trailingSpacesMatches );
206
207
                if ( isset( $trailingSpacesMatches[ 0 ] ) && count( $trailingSpacesMatches[ 0 ] ) > 0 ) {
208
                    foreach ( $trailingSpacesMatches[ 0 ] as $match ) {
209
                        $trailingSpaces = str_replace( '</mrk>', '', $match );
210
                    }
211
                }
212
            }
213
214
            //re-build the mrk tag after the split
215
            $originalMark = trim( '<mrk ' . $markers[ $mi + 1 ] );
216
217
            $mark_string  = preg_replace( '#^<mrk\s[^>]+>(.*)#', '$1', $originalMark ); // at this point we have: ---> 'Test </mrk> </g>>'
218
            $mark_content = preg_split( '#</mrk>#si', $mark_string );
219
220
            $sourceArray = [
221
                    'mid'           => ( isset( $mid[ 1 ] ) ) ? $mid[ 1 ] : $mi,
222
                    'ext-prec-tags' => ( $mi == 0 ? $markers[ 0 ] : "" ),
223
                    'raw-content'   => ( isset( $mark_content[ 0 ] ) ) ? $mark_content[ 0 ] . $trailingSpaces : '',
224
                    'ext-succ-tags' => ( isset( $mark_content[ 1 ] ) ) ? $mark_content[ 1 ] : '',
225
            ];
226
227
            if ( !empty( $originalData ) ) {
228
                $dataRefMap                        = $this->getDataRefMap( $originalData );
229
                $sourceArray[ 'replaced-content' ] = ( new DataRefReplacer( $dataRefMap ) )->replace( $mark_content[ 0 ] );
230
            }
231
232
            $source[] = $sourceArray;
233
234
            $mi++;
235
        }
236
237
        return $source;
238
    }
239
240
    /**
241
     * @param array $originalData
242
     *
243
     * @return array
244
     */
245
    protected function getDataRefMap( $originalData ) {
246
        // dataRef map
247
        $dataRefMap = [];
248
        foreach ( $originalData as $datum ) {
249
            if ( isset( $datum[ 'attr' ][ 'id' ] ) ) {
250
                $dataRefMap[ $datum[ 'attr' ][ 'id' ] ] = $datum[ 'raw-content' ];
251
            }
252
        }
253
254
        return $dataRefMap;
255
    }
256
257
    /**
258
     * @param $raw
259
     *
260
     * @return bool
261
     */
262
    protected function stringContainsMarks( $raw ) {
263
        $markers = preg_split( '#<mrk\s#si', $raw, -1 );
264
265
        return isset( $markers[ 1 ] );
266
    }
267
268
    /**
269
     * @param      $noteValue
270
     * @param bool $escapeStrings
271
     *
272
     * @return array
273
     * @throws \Exception
274
     */
275
    protected function JSONOrRawContentArray( $noteValue, $escapeStrings = true ) {
276
        //
277
        // convert double escaped entites
278
        //
279
        // Example:
280
        //
281
        // &amp;#39; ---> &#39;
282
        // &amp;amp; ---> &amp;
283
        // &amp;apos ---> &apos;
284
        //
285
        if ( Strings::isADoubleEscapedEntity( $noteValue ) ) {
286
            $noteValue = Strings::htmlspecialchars_decode( $noteValue, true );
287
        } else {
288
            // for non escaped entities $escapeStrings is always true for security reasons
289
            $escapeStrings = true;
290
        }
291
292
        if ( Strings::isJSON( $noteValue ) ) {
293
            return [ 'json' => Strings::cleanCDATA( $noteValue ) ];
294
        }
295
296
        return [ 'raw-content' => Strings::fixNonWellFormedXml( $noteValue, $escapeStrings ) ];
297
    }
298
}
299