Passed
Push — master ( 7096f4...95179a )
by Domenico
09:20
created

Strings::raiseLastJsonException()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 3
nc 2
nop 0
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Matecat\XliffParser\Utils;
4
5
use Exception;
6
use Matecat\XliffParser\Constants\XliffTags;
7
use Matecat\XliffParser\Exception\NotValidJSONException;
8
use SimpleXMLElement;
9
10
class Strings {
11
    private static $find_xliff_tags_reg = null;
12
    private static $htmlEntityRegex     = '/&amp;[#a-zA-Z0-9]{1,20};/u';
13
14
    /**
15
     * @param string $testString
16
     *
17
     * @return string
18
     * @throws Exception
19
     */
20
    public static function cleanCDATA( $testString ) {
21
        $cleanXMLContent = new SimpleXMLElement( '<rootNoteNode>' . $testString . '</rootNoteNode>', LIBXML_NOCDATA );
22
23
        return $cleanXMLContent->__toString();
24
    }
25
26
    /**
27
     * @param $string
28
     *
29
     * @return bool
30
     */
31
    public static function isJSON( $string ) {
32
        if ( is_numeric( $string ) ) {
33
            return false;
34
        }
35
36
        try {
37
            $string = Strings::cleanCDATA( $string );
38
        } catch ( Exception $e ) {
39
            return false;
40
        }
41
42
        $string = trim( $string );
43
        if ( empty( $string ) ) {
44
            return false;
45
        }
46
47
        // String representation in json is "quoted", but we want to accept only object or arrays.
48
        // exclude strings and numbers and other primitive types
49
        if ( in_array( $string [ 0 ], [ "{", "[" ] ) ) {
50
            json_decode( $string );
51
52
            return empty( self::getLastJsonError()[ 0 ] );
53
        } else {
54
            return false; // Not accepted: string or primitive types.
55
        }
56
57
    }
58
59
    /**
60
     * @param string $string
61
     *
62
     * @return array
63
     */
64
    public static function jsonToArray( $string ) {
65
        $decodedJSON = json_decode( $string, true );
66
67
        return ( is_array( $decodedJSON ) ) ? $decodedJSON : [];
68
    }
69
70
    /**
71
     * @param bool $raise
72
     *
73
     * @return void
74
     * @throws NotValidJSONException
75
     */
76
    private static function raiseLastJsonException() {
0 ignored issues
show
Unused Code introduced by
The method raiseLastJsonException() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
77
78
        list( $msg, $error ) = self::getLastJsonError();
79
80
        if ( $error != JSON_ERROR_NONE ) {
81
            throw new NotValidJSONException( $msg, $error );
82
        }
83
84
    }
85
86
    /**
87
     * @return array
88
     */
89
    private static function getLastJsonError() {
90
91
        if ( function_exists( "json_last_error" ) ) {
92
93
            $error = json_last_error();
94
95
            switch ( $error ) {
96
                case JSON_ERROR_NONE:
97
                    $msg = null; # - No errors
98
                    break;
99
                case JSON_ERROR_DEPTH:
100
                    $msg = ' - Maximum stack depth exceeded';
101
                    break;
102
                case JSON_ERROR_STATE_MISMATCH:
103
                    $msg = ' - Underflow or the modes mismatch';
104
                    break;
105
                case JSON_ERROR_CTRL_CHAR:
106
                    $msg = ' - Unexpected control character found';
107
                    break;
108
                case JSON_ERROR_SYNTAX:
109
                    $msg = ' - Syntax error, malformed JSON';
110
                    break;
111
                case JSON_ERROR_UTF8:
112
                    $msg = ' - Malformed UTF-8 characters, possibly incorrectly encoded';
113
                    break;
114
                default:
115
                    $msg = ' - Unknown error';
116
                    break;
117
            }
118
119
            return [ $msg, $error ];
120
        }
121
122
        return [ null, JSON_ERROR_NONE ];
123
124
    }
125
126
    /**
127
     * This function exists because many developers started adding html tags directly into the XLIFF source since:
128
     * 1) XLIFF tag remapping is too complex for them
129
     * 2) Trados does not lock Tags within the <source> that are expressed as &gt;b&lt; but is tolerant to html tags in <source>
130
     *
131
     * in short people typed:
132
     * <source>The <b>red</d> house</source> or worst <source>5 > 3</source>
133
     * instead of
134
     * <source>The <g id="1">red</g> house.</source> and <source>5 &gt; 3</source>
135
     *
136
     * This function will do the following
137
     * <g id="1">Hello</g>, 4 > 3 -> <g id="1">Hello</g>, 4 &gt; 3
138
     * <g id="1">Hello</g>, 4 > 3 &gt; -> <g id="1">Hello</g>, 4 &gt; 3 &gt; 2
139
     *
140
     * @param string $content
141
     * @param bool   $escapeStrings
142
     *
143
     * @return string
144
     */
145
    public static function fixNonWellFormedXml( $content, $escapeStrings = true ) {
146
        if ( self::$find_xliff_tags_reg === null ) {
147
            // Convert the list of tags in a regexp list, for example "g|x|bx|ex"
148
            $xliffTags           = XliffTags::$tags;
149
            $xliff_tags_reg_list = implode( '|', $xliffTags );
150
            // Regexp to find all the XLIFF tags:
151
            //   </?               -> matches the tag start, for both opening and
152
            //                        closure tags (see the optional slash)
153
            //   ($xliff_tags_reg) -> matches one of the XLIFF tags in the list above
154
            //   (\s[^>]*)?        -> matches attributes and so on; ensures there's a
155
            //                        space after the tag, to not confuse for example a
156
            //                        "g" tag with a "gblabla"; [^>]* matches anything,
157
            //                        including additional spaces; the entire block is
158
            //                        optional, to allow tags with no spaces or attrs
159
            //   /? >              -> matches tag end, with optional slash for
160
            //                        self-closing ones
161
            // If you are wondering about spaces inside tags, look at this:
162
            // http://www.w3.org/TR/REC-xml/#sec-starttags
163
            // It says that there cannot be any space between the '<' and the tag name,
164
            // between '</' and the tag name, or inside '/>'. But you can add white
165
            // space after the tag name, though.
166
            self::$find_xliff_tags_reg = "#</?($xliff_tags_reg_list)(\\s[^>]*)?/?>#si";
167
        }
168
169
        // Find all the XLIFF tags
170
        preg_match_all( self::$find_xliff_tags_reg, $content, $matches );
171
        $tags = (array)$matches[ 0 ];
172
173
        // Prepare placeholders
174
        $tags_placeholders = [];
175
        $tagsNum           = count( $tags );
176
        for ( $i = 0; $i < $tagsNum; $i++ ) {
177
            $tag                       = $tags[ $i ];
178
            $tags_placeholders[ $tag ] = "#@!XLIFF-TAG-$i!@#";
179
        }
180
181
        // Replace all XLIFF tags with placeholders that will not be escaped
182
        foreach ( $tags_placeholders as $tag => $placeholder ) {
183
            $content = str_replace( $tag, $placeholder, $content );
184
        }
185
186
        // Escape the string with the remaining non-XLIFF tags
187
        if ( $escapeStrings ) {
188
            $content = htmlspecialchars( $content, ENT_NOQUOTES, 'UTF-8', false );
189
        }
190
191
        // Put again in place the original XLIFF tags replacing placeholders
192
        foreach ( $tags_placeholders as $tag => $placeholder ) {
193
            $content = str_replace( $placeholder, $tag, $content );
194
        }
195
196
        return $content;
197
    }
198
199
    /**
200
     * @param $string
201
     *
202
     * @return string
203
     */
204
    public static function removeDangerousChars( $string ) {
205
        // clean invalid xml entities ( characters with ascii < 32 and different from 0A, 0D and 09
206
        $regexpEntity = '/&#x(0[0-8BCEF]|1[\dA-F]|7F);/u';
207
208
        // remove binary chars in some xliff files
209
        $regexpAscii = '/[\x{00}-\x{08}\x{0B}\x{0C}\x{0E}-\x{1F}\x{7F}]/u';
210
211
        $string = preg_replace( $regexpAscii, '', $string );
212
        $string = preg_replace( $regexpEntity, '', $string );
213
214
        return !empty( $string ) ? $string : "";
215
    }
216
217
    /**
218
     * @param string $needle
219
     * @param string $haystack
220
     *
221
     * @return bool
222
     */
223
    public static function contains( $needle, $haystack ) {
224
        return mb_strpos( $haystack, $needle ) !== false;
225
    }
226
227
    /**
228
     * @param string $string
229
     *
230
     * @return string
231
     */
232
    public static function htmlentities( $string ) {
233
        return htmlentities( $string, ENT_NOQUOTES );
234
    }
235
236
    /**
237
     * @param string $string
238
     * @param bool   $onlyEscapedEntities
239
     *
240
     * @return string
241
     */
242
    public static function htmlspecialchars_decode( $string, $onlyEscapedEntities = false ) {
243
        if ( false === $onlyEscapedEntities ) {
244
            return htmlspecialchars_decode( $string, ENT_NOQUOTES );
245
        }
246
247
        return preg_replace_callback( self::$htmlEntityRegex,
248
                function ( $match ) {
249
                    return self::htmlspecialchars_decode( $match[ 0 ] );
250
                }, $string );
251
    }
252
253
    /**
254
     * Checks if a string is a double encoded entity.
255
     *
256
     * Example:
257
     *
258
     * &amp;#39; ---> true
259
     * &#39;     ---> false
260
     *
261
     * @param string $str
262
     *
263
     * @return bool
264
     */
265
    public static function isADoubleEscapedEntity( $str ) {
266
        return preg_match( self::$htmlEntityRegex, $str ) != 0;
267
    }
268
269
    /**
270
     * @param string $str
271
     *
272
     * @return bool
273
     */
274
    public static function isAnEscapedHTML( $str ) {
275
        return preg_match( '#/[a-z]*&gt;#i', $str ) != 0;
276
    }
277
278
    /**
279
     * @param string $uuid
280
     *
281
     * @return bool
282
     */
283
    public static function isAValidUuid( $uuid ) {
284
        return preg_match( '/^[\da-f]{8}-[\da-f]{4}-4[\da-f]{3}-[89ab][\da-f]{3}-[\da-f]{12}$/', $uuid ) === 1;
285
    }
286
287
    /**
288
     * @param $pattern
289
     * @param $subject
290
     *
291
     * @return array|false|string[]
292
     */
293
    public static function preg_split( $pattern, $subject ) {
294
        return preg_split( $pattern, $subject, -1, PREG_SPLIT_NO_EMPTY );
295
    }
296
297
    /**
298
     * Escape ONLY HTML tags
299
     *
300
     * For example:
301
     *
302
     * <a href="#">link</a> < text
303
     *
304
     * is converted to:
305
     *
306
     * &lt;a href="#"&gt;link&lt;/a&gt; < text
307
     *
308
     * @param string $string
309
     *
310
     * @return string
311
     */
312
    public static function escapeOnlyHTMLTags( $string ) {
313
        return preg_replace( '/<(.*?)>/iu', '&lt;$1&gt;', $string );
314
    }
315
316
    /**
317
     * Get the last character of a string
318
     *
319
     * @param $string
320
     *
321
     * @return string
322
     */
323
    public static function lastChar( $string ) {
324
        return mb_substr( $string, -1 );
325
    }
326
327
    /**
328
     * @param string $segment
329
     *
330
     * @return int
331
     */
332
    public static function getTheNumberOfTrailingSpaces( $segment ) {
333
        return mb_strlen( $segment ) - mb_strlen( rtrim( $segment, ' ' ) );
334
    }
335
336
    /**
337
     * @TODO We need to improve this
338
     *
339
     * @param string $string
340
     *
341
     * @return bool
342
     */
343
    public static function isHtmlString( $string ) {
344
        $string = stripslashes( $string );
345
346
        if ( $string === '<>' ) {
347
            return false;
348
        }
349
350
        preg_match( "#</?[a-zA-Z1-6-]+((\s+[a-zA-Z1-6-]+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)/?>#", $string, $matches );
351
352
        return count( $matches ) !== 0;
353
    }
354
}
355