Completed
Branch master (939199)
by
unknown
39:35
created

includes/json/FormatJson.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Wrapper for json_encode and json_decode.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 */
22
23
/**
24
 * JSON formatter wrapper class
25
 */
26
class FormatJson {
27
	/**
28
	 * Skip escaping most characters above U+007F for readability and compactness.
29
	 * This encoding option saves 3 to 8 bytes (uncompressed) for each such character;
30
	 * however, it could break compatibility with systems that incorrectly handle UTF-8.
31
	 *
32
	 * @since 1.22
33
	 */
34
	const UTF8_OK = 1;
35
36
	/**
37
	 * Skip escaping the characters '<', '>', and '&', which have special meanings in
38
	 * HTML and XML.
39
	 *
40
	 * @warning Do not use this option for JSON that could end up in inline scripts.
41
	 * - HTML5, §4.3.1.2 Restrictions for contents of script elements
42
	 * - XML 1.0 (5th Ed.), §2.4 Character Data and Markup
43
	 *
44
	 * @since 1.22
45
	 */
46
	const XMLMETA_OK = 2;
47
48
	/**
49
	 * Skip escaping as many characters as reasonably possible.
50
	 *
51
	 * @warning When generating inline script blocks, use FormatJson::UTF8_OK instead.
52
	 *
53
	 * @since 1.22
54
	 */
55
	const ALL_OK = 3;
56
57
	/**
58
	 * If set, treat json objects '{...}' as associative arrays. Without this option,
59
	 * json objects will be converted to stdClass.
60
	 * The value is set to 1 to be backward compatible with 'true' that was used before.
61
	 *
62
	 * @since 1.24
63
	 */
64
	const FORCE_ASSOC = 0x100;
65
66
	/**
67
	 * If set, attempts to fix invalid json.
68
	 *
69
	 * @since 1.24
70
	 */
71
	const TRY_FIXING = 0x200;
72
73
	/**
74
	 * If set, strip comments from input before parsing as JSON.
75
	 *
76
	 * @since 1.25
77
	 */
78
	const STRIP_COMMENTS = 0x400;
79
80
	/**
81
	 * Regex that matches whitespace inside empty arrays and objects.
82
	 *
83
	 * This doesn't affect regular strings inside the JSON because those can't
84
	 * have a real line break (\n) in them, at this point they are already escaped
85
	 * as the string "\n" which this doesn't match.
86
	 *
87
	 * @private
88
	 */
89
	const WS_CLEANUP_REGEX = '/(?<=[\[{])\n\s*+(?=[\]}])/';
90
91
	/**
92
	 * Characters problematic in JavaScript.
93
	 *
94
	 * @note These are listed in ECMA-262 (5.1 Ed.), §7.3 Line Terminators along with U+000A (LF)
95
	 *       and U+000D (CR). However, PHP already escapes LF and CR according to RFC 4627.
96
	 */
97
	private static $badChars = [
98
		"\xe2\x80\xa8", // U+2028 LINE SEPARATOR
99
		"\xe2\x80\xa9", // U+2029 PARAGRAPH SEPARATOR
100
	];
101
102
	/**
103
	 * Escape sequences for characters listed in FormatJson::$badChars.
104
	 */
105
	private static $badCharsEscaped = [
106
		'\u2028', // U+2028 LINE SEPARATOR
107
		'\u2029', // U+2029 PARAGRAPH SEPARATOR
108
	];
109
110
	/**
111
	 * Returns the JSON representation of a value.
112
	 *
113
	 * @note Empty arrays are encoded as numeric arrays, not as objects, so cast any associative
114
	 *       array that might be empty to an object before encoding it.
115
	 *
116
	 * @note In pre-1.22 versions of MediaWiki, using this function for generating inline script
117
	 *       blocks may result in an XSS vulnerability, and quite likely will in XML documents
118
	 *       (cf. FormatJson::XMLMETA_OK). Use Xml::encodeJsVar() instead in such cases.
119
	 *
120
	 * @param mixed $value The value to encode. Can be any type except a resource.
121
	 * @param string|bool $pretty If a string, add non-significant whitespace to improve
122
	 *   readability, using that string for indentation. If true, use the default indent
123
	 *   string (four spaces).
124
	 * @param int $escaping Bitfield consisting of _OK class constants
125
	 * @return string|false String if successful; false upon failure
126
	 */
127
	public static function encode( $value, $pretty = false, $escaping = 0 ) {
128
		if ( !is_string( $pretty ) ) {
129
			$pretty = $pretty ? '    ' : false;
130
		}
131
132
		static $bug66021;
133
		if ( $pretty !== false && $bug66021 === null ) {
134
			$bug66021 = json_encode( [], JSON_PRETTY_PRINT ) !== '[]';
135
		}
136
137
		// PHP escapes '/' to prevent breaking out of inline script blocks using '</script>',
138
		// which is hardly useful when '<' and '>' are escaped (and inadequate), and such
139
		// escaping negatively impacts the human readability of URLs and similar strings.
140
		$options = JSON_UNESCAPED_SLASHES;
141
		$options |= $pretty !== false ? JSON_PRETTY_PRINT : 0;
142
		$options |= ( $escaping & self::UTF8_OK ) ? JSON_UNESCAPED_UNICODE : 0;
143
		$options |= ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP );
144
		$json = json_encode( $value, $options );
145
		if ( $json === false ) {
146
			return false;
147
		}
148
149
		if ( $pretty !== false ) {
150
			// Workaround for <https://bugs.php.net/bug.php?id=66021>
151
			if ( $bug66021 ) {
152
				$json = preg_replace( self::WS_CLEANUP_REGEX, '', $json );
153
			}
154
			if ( $pretty !== '    ' ) {
155
				// Change the four-space indent to a tab indent
156
				$json = str_replace( "\n    ", "\n\t", $json );
157
				while ( strpos( $json, "\t    " ) !== false ) {
158
					$json = str_replace( "\t    ", "\t\t", $json );
159
				}
160
161
				if ( $pretty !== "\t" ) {
162
					// Change the tab indent to the provided indent
163
					$json = str_replace( "\t", $pretty, $json );
164
				}
165
			}
166
		}
167
		if ( $escaping & self::UTF8_OK ) {
168
			$json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
169
		}
170
171
		return $json;
172
	}
173
174
	/**
175
	 * Decodes a JSON string. It is recommended to use FormatJson::parse(),
176
	 * which returns more comprehensive result in case of an error, and has
177
	 * more parsing options.
178
	 *
179
	 * @param string $value The JSON string being decoded
180
	 * @param bool $assoc When true, returned objects will be converted into associative arrays.
181
	 *
182
	 * @return mixed The value encoded in JSON in appropriate PHP type.
183
	 * `null` is returned if $value represented `null`, if $value could not be decoded,
184
	 * or if the encoded data was deeper than the recursion limit.
185
	 * Use FormatJson::parse() to distinguish between types of `null` and to get proper error code.
186
	 */
187
	public static function decode( $value, $assoc = false ) {
188
		return json_decode( $value, $assoc );
189
	}
190
191
	/**
192
	 * Decodes a JSON string.
193
	 * Unlike FormatJson::decode(), if $value represents null value, it will be
194
	 * properly decoded as valid.
195
	 *
196
	 * @param string $value The JSON string being decoded
197
	 * @param int $options A bit field that allows FORCE_ASSOC, TRY_FIXING,
198
	 * STRIP_COMMENTS
199
	 * @return Status If valid JSON, the value is available in $result->getValue()
200
	 */
201
	public static function parse( $value, $options = 0 ) {
202
		if ( $options & self::STRIP_COMMENTS ) {
203
			$value = self::stripComments( $value );
204
		}
205
		$assoc = ( $options & self::FORCE_ASSOC ) !== 0;
206
		$result = json_decode( $value, $assoc );
207
		$code = json_last_error();
208
209
		if ( $code === JSON_ERROR_SYNTAX && ( $options & self::TRY_FIXING ) !== 0 ) {
210
			// The most common error is the trailing comma in a list or an object.
211
			// We cannot simply replace /,\s*[}\]]/ because it could be inside a string value.
212
			// But we could use the fact that JSON does not allow multi-line string values,
213
			// And remove trailing commas if they are et the end of a line.
214
			// JSON only allows 4 control characters: [ \t\r\n].  So we must not use '\s' for matching.
215
			// Regex match   ,]<any non-quote chars>\n   or   ,\n]   with optional spaces/tabs.
216
			$count = 0;
217
			$value =
218
				preg_replace( '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1',
219
					$value, - 1, $count );
220
			if ( $count > 0 ) {
221
				$result = json_decode( $value, $assoc );
222
				if ( JSON_ERROR_NONE === json_last_error() ) {
223
					// Report warning
224
					$st = Status::newGood( $result );
225
					$st->warning( wfMessage( 'json-warn-trailing-comma' )->numParams( $count ) );
226
					return $st;
227
				}
228
			}
229
		}
230
231
		switch ( $code ) {
232
			case JSON_ERROR_NONE:
233
				return Status::newGood( $result );
234
			default:
235
				return Status::newFatal( wfMessage( 'json-error-unknown' )->numParams( $code ) );
236
			case JSON_ERROR_DEPTH:
0 ignored issues
show
case JSON_ERROR_DEPTH: ...rror-depth'; break; does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
237
				$msg = 'json-error-depth';
238
				break;
239
			case JSON_ERROR_STATE_MISMATCH:
240
				$msg = 'json-error-state-mismatch';
241
				break;
242
			case JSON_ERROR_CTRL_CHAR:
243
				$msg = 'json-error-ctrl-char';
244
				break;
245
			case JSON_ERROR_SYNTAX:
246
				$msg = 'json-error-syntax';
247
				break;
248
			case JSON_ERROR_UTF8:
249
				$msg = 'json-error-utf8';
250
				break;
251
			case JSON_ERROR_RECURSION:
252
				$msg = 'json-error-recursion';
253
				break;
254
			case JSON_ERROR_INF_OR_NAN:
255
				$msg = 'json-error-inf-or-nan';
256
				break;
257
			case JSON_ERROR_UNSUPPORTED_TYPE:
258
				$msg = 'json-error-unsupported-type';
259
				break;
260
		}
261
		return Status::newFatal( $msg );
262
	}
263
264
	/**
265
	 * Remove multiline and single line comments from an otherwise valid JSON
266
	 * input string. This can be used as a preprocessor for to allow JSON
267
	 * formatted configuration files to contain comments.
268
	 *
269
	 * @param string $json
270
	 * @return string JSON with comments removed
271
	 */
272
	public static function stripComments( $json ) {
273
		// Ensure we have a string
274
		$str = (string)$json;
275
		$buffer = '';
276
		$maxLen = strlen( $str );
277
		$mark = 0;
278
279
		$inString = false;
280
		$inComment = false;
281
		$multiline = false;
282
283
		for ( $idx = 0; $idx < $maxLen; $idx++ ) {
284
			switch ( $str[$idx] ) {
285
				case '"':
286
					$lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
287
					if ( !$inComment && $lookBehind !== '\\' ) {
288
						// Either started or ended a string
289
						$inString = !$inString;
290
					}
291
					break;
292
293
				case '/':
294
					$lookAhead = ( $idx + 1 < $maxLen ) ? $str[$idx + 1] : '';
295
					$lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
296
					if ( $inString ) {
297
						continue;
298
299
					} elseif ( !$inComment &&
300
						( $lookAhead === '/' || $lookAhead === '*' )
301
					) {
302
						// Transition into a comment
303
						// Add characters seen to buffer
304
						$buffer .= substr( $str, $mark, $idx - $mark );
305
						// Consume the look ahead character
306
						$idx++;
307
						// Track state
308
						$inComment = true;
309
						$multiline = $lookAhead === '*';
310
311
					} elseif ( $multiline && $lookBehind === '*' ) {
312
						// Found the end of the current comment
313
						$mark = $idx + 1;
314
						$inComment = false;
315
						$multiline = false;
316
					}
317
					break;
318
319
				case "\n":
320
					if ( $inComment && !$multiline ) {
321
						// Found the end of the current comment
322
						$mark = $idx + 1;
323
						$inComment = false;
324
					}
325
					break;
326
			}
327
		}
328
		if ( $inComment ) {
329
			// Comment ends with input
330
			// Technically we should check to ensure that we aren't in
331
			// a multiline comment that hasn't been properly ended, but this
332
			// is a strip filter, not a validating parser.
333
			$mark = $maxLen;
334
		}
335
		// Add final chunk to buffer before returning
336
		return $buffer . substr( $str, $mark, $maxLen - $mark );
337
	}
338
}
339