Strings::isJSON()   A
last analyzed

Complexity

Conditions 5
Paths 5

Size

Total Lines 24
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 5
eloc 14
nc 5
nop 1
dl 0
loc 24
rs 9.4888
c 2
b 0
f 0
1
<?php
2
3
namespace Matecat\XliffParser\Utils;
4
5
use Exception;
6
use Matecat\XliffParser\Constants\XliffTags;
7
use Matecat\XliffParser\Exception\NotValidJSONException;
8
use SimpleXMLElement;
9
10
class Strings {
11
    private static ?string $find_xliff_tags_reg = null;
12
    private static string  $htmlEntityRegex     = '/&amp;[#a-zA-Z0-9]{1,20};/u';
13
14
    /**
15
     * @param string $testString
16
     *
17
     * @return string
18
     * @throws Exception
19
     */
20
    public static function cleanCDATA( string $testString ): string {
21
        $cleanXMLContent = new SimpleXMLElement( '<rootNoteNode>' . $testString . '</rootNoteNode>', LIBXML_NOCDATA );
22
23
        return $cleanXMLContent->__toString();
24
    }
25
26
    /**
27
     * @param string $string
28
     *
29
     * @return bool
30
     */
31
    public static function isJSON( string $string ): bool {
32
        if ( is_numeric( $string ) ) {
33
            return false;
34
        }
35
36
        try {
37
            $string = Strings::cleanCDATA( $string );
38
        } catch ( Exception $e ) {
39
            return false;
40
        }
41
42
        $string = trim( $string );
43
        if ( empty( $string ) ) {
44
            return false;
45
        }
46
47
        // String representation in json is "quoted", but we want to accept only object or arrays.
48
        // exclude strings and numbers and other primitive types
49
        if ( in_array( $string [ 0 ], [ "{", "[" ] ) ) {
50
            json_decode( $string );
51
52
            return empty( self::getLastJsonError()[ 0 ] );
53
        } else {
54
            return false; // Not accepted: string or primitive types.
55
        }
56
57
    }
58
59
    /**
60
     * @param string $string
61
     *
62
     * @return array
63
     */
64
    public static function jsonToArray( string $string ): array {
65
        $decodedJSON = json_decode( $string, true );
66
67
        return ( is_array( $decodedJSON ) ) ? $decodedJSON : [];
68
    }
69
70
    /**
71
     * @return void
72
     * @throws NotValidJSONException
73
     */
74
    private static function raiseLastJsonException() {
0 ignored issues
show
Unused Code introduced by
The method raiseLastJsonException() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
75
76
        [ $msg, $error ] = self::getLastJsonError();
77
78
        if ( $error != JSON_ERROR_NONE ) {
79
            throw new NotValidJSONException( $msg, $error );
80
        }
81
82
    }
83
84
    /**
85
     * @return array
86
     */
87
    private static function getLastJsonError(): array {
88
89
        if ( function_exists( "json_last_error" ) ) {
90
91
            $error = json_last_error();
92
93
            switch ( $error ) {
94
                case JSON_ERROR_NONE:
95
                    $msg = null; # - No errors
96
                    break;
97
                case JSON_ERROR_DEPTH:
98
                    $msg = ' - Maximum stack depth exceeded';
99
                    break;
100
                case JSON_ERROR_STATE_MISMATCH:
101
                    $msg = ' - Underflow or the modes mismatch';
102
                    break;
103
                case JSON_ERROR_CTRL_CHAR:
104
                    $msg = ' - Unexpected control character found';
105
                    break;
106
                case JSON_ERROR_SYNTAX:
107
                    $msg = ' - Syntax error, malformed JSON';
108
                    break;
109
                case JSON_ERROR_UTF8:
110
                    $msg = ' - Malformed UTF-8 characters, possibly incorrectly encoded';
111
                    break;
112
                default:
113
                    $msg = ' - Unknown error';
114
                    break;
115
            }
116
117
            return [ $msg, $error ];
118
        }
119
120
        return [ null, JSON_ERROR_NONE ];
121
122
    }
123
124
    /**
125
     * This function exists because many developers started adding html tags directly into the XLIFF source since:
126
     * 1) XLIFF tag remapping is too complex for them
127
     * 2) Trados does not lock Tags within the <source> that are expressed as &gt;b&lt; but is tolerant to html tags in <source>
128
     *
129
     * in short people typed:
130
     * <source>The <b>red</d> house</source> or worst <source>5 > 3</source>
131
     * instead of
132
     * <source>The <g id="1">red</g> house.</source> and <source>5 &gt; 3</source>
133
     *
134
     * This function will do the following
135
     * <g id="1">Hello</g>, 4 > 3 -> <g id="1">Hello</g>, 4 &gt; 3
136
     * <g id="1">Hello</g>, 4 > 3 &gt; -> <g id="1">Hello</g>, 4 &gt; 3 &gt; 2
137
     *
138
     * @param string $content
139
     * @param bool   $escapeStrings
140
     *
141
     * @return string
142
     */
143
    public static function fixNonWellFormedXml( string $content, ?bool $escapeStrings = true ): string {
144
        if ( self::$find_xliff_tags_reg === null ) {
145
            // Convert the list of tags in a regexp list, for example "g|x|bx|ex"
146
            $xliffTags           = XliffTags::$tags;
147
            $xliff_tags_reg_list = implode( '|', $xliffTags );
148
            // Regexp to find all the XLIFF tags:
149
            //   </?               -> matches the tag start, for both opening and
150
            //                        closure tags (see the optional slash)
151
            //   ($xliff_tags_reg) -> matches one of the XLIFF tags in the list above
152
            //   (\s[^>]*)?        -> matches attributes and so on; ensures there's a
153
            //                        space after the tag, to not confuse for example a
154
            //                        "g" tag with a "gblabla"; [^>]* matches anything,
155
            //                        including additional spaces; the entire block is
156
            //                        optional, to allow tags with no spaces or attrs
157
            //   /? >              -> matches tag end, with optional slash for
158
            //                        self-closing ones
159
            // If you are wondering about spaces inside tags, look at this:
160
            // http://www.w3.org/TR/REC-xml/#sec-starttags
161
            // It says that there cannot be any space between the '<' and the tag name,
162
            // between '</' and the tag name, or inside '/>'. But you can add white
163
            // space after the tag name, though.
164
            self::$find_xliff_tags_reg = "#</?($xliff_tags_reg_list)(\\s[^>]*)?/?>#si";
165
        }
166
167
        // Find all the XLIFF tags
168
        preg_match_all( self::$find_xliff_tags_reg, $content, $matches );
169
        $tags = (array)$matches[ 0 ];
170
171
        // Prepare placeholders
172
        $tags_placeholders = [];
173
        $tagsNum           = count( $tags );
174
        for ( $i = 0; $i < $tagsNum; $i++ ) {
175
            $tag                       = $tags[ $i ];
176
            $tags_placeholders[ $tag ] = "#@!XLIFF-TAG-$i!@#";
177
        }
178
179
        // Replace all XLIFF tags with placeholders that will not be escaped
180
        foreach ( $tags_placeholders as $tag => $placeholder ) {
181
            $content = str_replace( $tag, $placeholder, $content );
182
        }
183
184
        // Escape the string with the remaining non-XLIFF tags
185
        if ( $escapeStrings ) {
186
            $content = htmlspecialchars( $content, ENT_NOQUOTES, 'UTF-8', false );
187
        }
188
189
        // Put again in place the original XLIFF tags replacing placeholders
190
        foreach ( $tags_placeholders as $tag => $placeholder ) {
191
            $content = str_replace( $placeholder, $tag, $content );
192
        }
193
194
        return $content;
195
    }
196
197
    /**
198
     * @param $string
199
     *
200
     * @return string
201
     */
202
    public static function removeDangerousChars( $string ): string {
203
        // clean invalid xml entities ( characters with ascii < 32 and different from 0A, 0D and 09
204
        $regexpEntity = '/&#x(0[0-8BCEF]|1[\dA-F]|7F);/u';
205
206
        // remove binary chars in some xliff files
207
        $regexpAscii = '/[\x{00}-\x{08}\x{0B}\x{0C}\x{0E}-\x{1F}\x{7F}]/u';
208
209
        $string = preg_replace( $regexpAscii, '', $string );
210
        $string = preg_replace( $regexpEntity, '', $string );
211
212
        return !empty( $string ) || strlen( $string ) > 0 ? $string : "";
213
    }
214
215
216
    /**
217
     * @param string $string
218
     * @param ?bool  $onlyEscapedEntities
219
     *
220
     * @return string
221
     */
222
    public static function htmlspecialchars_decode( string $string, ?bool $onlyEscapedEntities = false ): string {
223
        if ( false === $onlyEscapedEntities ) {
224
            return htmlspecialchars_decode( $string, ENT_NOQUOTES );
225
        }
226
227
        return preg_replace_callback( self::$htmlEntityRegex,
228
                function ( $match ) {
229
                    return self::htmlspecialchars_decode( $match[ 0 ] );
230
                }, $string );
231
    }
232
233
    /**
234
     * Checks if a string is a double encoded entity.
235
     *
236
     * Example:
237
     *
238
     * &amp;#39; ---> true
239
     * &#39;     ---> false
240
     *
241
     * @param string $str
242
     *
243
     * @return bool
244
     */
245
    public static function isADoubleEscapedEntity( string $str ): bool {
246
        return preg_match( self::$htmlEntityRegex, $str ) != 0;
247
    }
248
249
    /**
250
     * @param string $uuid
251
     *
252
     * @return bool
253
     */
254
    public static function isAValidUuid( $uuid ) {
255
        return preg_match( '/^[\da-f]{8}-[\da-f]{4}-4[\da-f]{3}-[89ab][\da-f]{3}-[\da-f]{12}$/', $uuid ) === 1;
256
    }
257
258
    /**
259
     * @param $pattern
260
     * @param $subject
261
     *
262
     * @return array|false|string[]
263
     */
264
    public static function preg_split( $pattern, $subject ) {
265
        return preg_split( $pattern, $subject, -1, PREG_SPLIT_NO_EMPTY );
266
    }
267
268
    /**
269
     * @param string $segment
270
     *
271
     * @return int
272
     */
273
    public static function getTheNumberOfTrailingSpaces( $segment ): int {
274
        return mb_strlen( $segment ) - mb_strlen( rtrim( $segment, ' ' ) );
275
    }
276
277
}
278