DateFormatParser::stringParse()   C
last analyzed

Complexity

Conditions 12
Paths 42

Size

Total Lines 53

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 53
rs 6.9666
c 0
b 0
f 0
cc 12
nc 42
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Wikibase\Repo\Parsers;
4
5
use DataValues\IllegalValueException;
6
use DataValues\TimeValue;
7
use ValueParsers\ParseException;
8
use ValueParsers\ParserOptions;
9
use ValueParsers\StringValueParser;
10
use Wikimedia\AtEase\AtEase;
11
12
/**
13
 * This parser is in essence the inverse operation of MediaWiki's Language::sprintfDate.
14
 *
15
 * @see \Language::sprintfDate
16
 *
17
 * @license GPL-2.0-or-later
18
 * @author Thiemo Kreuz
19
 */
20
class DateFormatParser extends StringValueParser {
21
22
	const FORMAT_NAME = 'date-format';
23
24
	const OPT_DATE_FORMAT = 'dateFormat';
25
26
	/**
27
	 * Option for unlocalizing non-canonical digits. Must be an array of strings, mapping canonical
28
	 * digit characters ("1", "2" and so on, possibly including "." and ",") to localized
29
	 * characters.
30
	 */
31
	const OPT_DIGIT_TRANSFORM_TABLE = 'digitTransformTable';
32
33
	/**
34
	 * Option for localized month names. Should be a two-dimensional array, the first dimension
35
	 * mapping the month's numbers 1 to 12 to arrays of localized month names, possibly including
36
	 * full month names, genitive names and abbreviations. Can also be a one-dimensional array of
37
	 * strings.
38
	 */
39
	const OPT_MONTH_NAMES = 'monthNames';
40
41
	/**
42
	 * Option to override the precision auto-detection and set a specific precision. Should be an
43
	 * integer or string containing one of the TimeValue::PRECISION_... constants.
44
	 */
45
	const OPT_PRECISION = 'precision';
46
47
	public function __construct( ParserOptions $options = null ) {
48
		parent::__construct( $options );
49
50
		$this->defaultOption( self::OPT_DATE_FORMAT, 'j F Y' );
51
		// FIXME: Should not be an option. Options should be trivial, never arrays or objects!
52
		$this->defaultOption( self::OPT_DIGIT_TRANSFORM_TABLE, null );
53
		$this->defaultOption( self::OPT_MONTH_NAMES, null );
54
		$this->defaultOption( self::OPT_PRECISION, null );
55
	}
56
57
	/**
58
	 * @see StringValueParser::stringParse
59
	 *
60
	 * @param string $value
61
	 *
62
	 * @throws ParseException
63
	 * @return TimeValue
64
	 */
65
	protected function stringParse( $value ) {
66
		$date = $this->parseDate( $value );
67
		$precision = TimeValue::PRECISION_YEAR;
68
		$time = [ $this->parseFormattedNumber( $date['year'] ), 0, 0, 0, 0, 0 ];
69
70
		if ( isset( $date['month'] ) ) {
71
			$precision = TimeValue::PRECISION_MONTH;
72
			$time[1] = $this->findMonthMatch( $date );
73
74
			if ( isset( $date['day'] ) ) {
75
				$precision = TimeValue::PRECISION_DAY;
76
				$time[2] = $this->parseFormattedNumber( $date['day'] );
77
78
				if ( isset( $date['hour'] ) ) {
79
					$precision = TimeValue::PRECISION_HOUR;
80
					$time[3] = $this->parseFormattedNumber( $date['hour'] );
81
82
					if ( isset( $date['minute'] ) ) {
83
						$precision = TimeValue::PRECISION_MINUTE;
84
						$time[4] = $this->parseFormattedNumber( $date['minute'] );
85
86
						if ( isset( $date['second'] ) ) {
87
							$precision = TimeValue::PRECISION_SECOND;
88
							$time[5] = $this->parseFormattedNumber( $date['second'] );
89
						}
90
					}
91
				}
92
			}
93
		}
94
95
		$option = $this->getOption( self::OPT_PRECISION );
96
		if ( $option !== null ) {
97
			if ( !is_int( $option ) && !ctype_digit( $option ) ) {
98
				throw new ParseException( 'Precision must be an integer' );
99
			}
100
101
			$option = (int)$option;
102
103
			// It's impossible to increase the detected precision via option, e.g. from year to month if
104
			// no month is given. If a day is given it can be increased, relevant for midnight.
105
			if ( $option <= $precision || $precision >= TimeValue::PRECISION_DAY ) {
106
				$precision = $option;
107
			}
108
		}
109
110
		$timestamp = vsprintf( '+%04s-%02s-%02sT%02s:%02s:%02sZ', $time );
111
112
		try {
113
			return new TimeValue( $timestamp, 0, 0, 0, $precision, TimeValue::CALENDAR_GREGORIAN );
114
		} catch ( IllegalValueException $ex ) {
115
			throw new ParseException( $ex->getMessage(), $value, self::FORMAT_NAME );
116
		}
117
	}
118
119
	// @codingStandardsIgnoreStart
120
	/**
121
	 * @see Language::sprintfDate
122
	 *
123
	 * @param string $format A date format, as described in Language::sprintfDate.
124
	 *
125
	 * @return string Regular expression
126
	 */
127
	private function parseDateFormat( $format ) {
128
		$length = strlen( $format );
129
130
		$number = $this->getNumberPattern();
131
		$notFollowedByNumber = '(?!' . $number . ')';
132
		$optionalPunctuation = '\p{P}*';
133
		$optionalWhitespace = '\p{Z}*';
134
		$separation = $notFollowedByNumber . $optionalWhitespace;
135
		$pattern = '<^' . $optionalWhitespace;
136
137
		for ( $p = 0; $p < $length; $p++ ) {
138
			$code = $format[$p];
139
140
			// "x" is used as a prefix for MediaWiki specific, 2- and 3-letter codes.
141
			if ( $code === 'x' && $p < $length - 1 ) {
142
				$code .= $format[++$p];
143
144
				if ( preg_match( '<^x[ijkmot]$>', $code ) && $p < $length - 1 ) {
145
					$code .= $format[++$p];
146
				}
147
			}
148
149
			switch ( $code ) {
150
				// Year
151
				case 'o':
152
				case 'Y':
153
					$pattern .= '(?P<year>' . $number . '+)' . $separation;
154
					break;
155
156
				// Month
157
				case 'F':
158
				case 'M':
159
				case 'm':
160
				case 'n':
161
				case 'xg':
162
					$pattern .= '(?P<month>' . $number . '{1,2}' . $notFollowedByNumber
163
						. $this->getMonthNamesPattern() . ')' . $optionalPunctuation
164
						. $optionalWhitespace;
165
					break;
166
167
				// Day
168
				case 'd':
169
				case 'j':
170
					$pattern .= '(?P<day>' . $number . '{1,2})' . $optionalPunctuation
171
						. $separation;
172
					break;
173
174
				// Hour
175
				case 'G':
176
				case 'H':
177
					$pattern .= '(?P<hour>' . $number . '{1,2})' . $separation;
178
					break;
179
180
				// Minute
181
				case 'i':
182
					$pattern .= '(?P<minute>' . $number . '{1,2})' . $separation;
183
					break;
184
185
				// Second
186
				case 's':
187
					$pattern .= '(?P<second>' . $number . '{1,2})' . $separation;
188
					break;
189
190
				// Escaped "x"
191
				case 'xx':
192
					$pattern .= 'x';
193
					break;
194
195
				// Escaped character or backslash at the end of the sequence
196
				case '\\':
197
					$pattern .= preg_quote( $p < $length - 1 ? $format[++$p] : '\\' );
198
					break;
199
200
				// Quoted sequence
201
				case '"':
202
					$endQuote = strpos( $format, '"', $p + 1 );
203
					if ( $endQuote !== false ) {
204
						$pattern .= preg_quote( substr( $format, $p + 1, $endQuote - $p - 1 ) );
205
						$p = $endQuote;
206
					} else {
207
						$pattern .= '"';
208
					}
209
					break;
210
211
				// We can ignore "raw" and "raw toggle" when parsing, because we always accept
212
				// canonical digits.
213
				case 'xN':
214
				case 'xn':
215
					break;
216
217
				// 12-hour format
218
				case 'A':
219
				case 'a':
220
				case 'g':
221
				case 'h':
222
223
				// Full, formatted dates
224
				case 'c':
225
				case 'r':
226
				case 'U':
227
228
				// Day of the week
229
				case 'D':
230
				case 'l':
231
				case 'N':
232
				case 'w':
233
234
				// Timezone
235
				case 'e':
236
				case 'O':
237
				case 'P':
238
				case 'T':
239
				case 'Z':
240
241
				// Daylight saving time ("1" if true)
242
				case 'I':
243
244
				// Leap year ("1" if true)
245
				case 'L':
246
247
				// Number of days in the current month
248
				case 't':
249
				case 'xit':
250
				case 'xjt':
251
252
				// Week number
253
				case 'W':
254
255
				// "Hebrew" and "Roman" modifiers
256
				case 'xh':
257
				case 'xr':
258
259
				// 2-digit year
260
				case 'y':
261
				case 'xiy':
262
263
				// Day of the year
264
				case 'z':
265
				case 'xiz':
266
267
				// Day, month and year in incompatible calendar models (Hebrew, Iranian, and others)
268
				case 'xiF':
269
				case 'xij':
270
				case 'xin':
271
				case 'xiY':
272
				case 'xjF':
273
				case 'xjj':
274
				case 'xjn':
275
				case 'xjx':
276
				case 'xjY':
277
				case 'xkY':
278
				case 'xmF':
279
				case 'xmj':
280
				case 'xmn':
281
				case 'xmY':
282
				case 'xoY':
283
				case 'xtY':
284
					throw new ParseException( 'Unsupported date format "' . $code . '"' );
285
					break;
0 ignored issues
show
Unused Code introduced by
break; does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
286
287
				// Character with no meaning
288
				default:
289
					if ( preg_match( '<^' . $optionalPunctuation . '$>u', $format[$p] ) ) {
290
						$pattern .= $optionalPunctuation;
291
					} elseif ( preg_match( '<^' . $optionalWhitespace . '$>u', $format[$p] ) ) {
292
						$pattern .= $optionalWhitespace;
293
					} else {
294
						$pattern .= preg_quote( $format[$p] );
295
					}
296
			}
297
		}
298
299
		return $pattern . '$>iu';
300
	}
301
	// @codingStandardsIgnoreEnd
302
303
	/**
304
	 * @return string Partial regular expression
305
	 */
306
	private function getNumberPattern() {
307
		$pattern = '[\d';
308
309
		$transformTable = $this->getDigitTransformTable();
310
		if ( is_array( $transformTable ) ) {
311
			$pattern .= preg_quote( implode( '', $transformTable ) );
312
		}
313
314
		return $pattern . ']';
315
	}
316
317
	/**
318
	 * @return string Partial regular expression
319
	 */
320
	private function getMonthNamesPattern() {
321
		$pattern = '';
322
323
		foreach ( $this->getMonthNames() as $i => $monthNames ) {
324
			$pattern .= '|(?P<month' . $i . '>'
325
				. implode( '|', array_map( 'preg_quote', (array)$monthNames ) )
326
				. ')';
327
		}
328
329
		return $pattern;
330
	}
331
332
	/**
333
	 * @param string $input
334
	 *
335
	 * @throws ParseException
336
	 * @return string[] Guaranteed to have the "year" key, optionally followed by more elements.
337
	 *  Guaranteed to be continuous, e.g. "year" and "day" with no "month" is illegal.
338
	 */
339
	private function parseDate( $input ) {
340
		$pattern = $this->parseDateFormat( $this->getDateFormat() );
341
342
		AtEase::suppressWarnings();
343
		$success = preg_match( $pattern, $input, $matches );
344
		AtEase::restoreWarnings();
345
346
		if ( !$success ) {
347
			throw new ParseException(
348
				$success === false
349
					? 'Illegal date format "' . $this->getDateFormat() . '"'
350
					: 'Failed to parse "' . $input . '"',
351
				$input,
352
				self::FORMAT_NAME
353
			);
354
		}
355
356
		if ( !isset( $matches['year'] )
357
			|| isset( $matches['day'] ) && !isset( $matches['month'] )
358
			|| isset( $matches['hour'] ) && !isset( $matches['day'] )
359
			|| isset( $matches['minute'] ) && !isset( $matches['hour'] )
360
			|| isset( $matches['second'] ) && !isset( $matches['minute'] )
361
		) {
362
			throw new ParseException( 'Non-continuous date format', $input, self::FORMAT_NAME );
363
		}
364
365
		return $matches;
366
	}
367
368
	/**
369
	 * @param string[] $matches
370
	 *
371
	 * @return int|string
372
	 */
373
	private function findMonthMatch( $matches ) {
374
		for ( $i = 1; $i <= 12; $i++ ) {
375
			if ( !empty( $matches['month' . $i] ) ) {
376
				return $i;
377
			}
378
		}
379
380
		return $this->parseFormattedNumber( $matches['month'] );
381
	}
382
383
	/**
384
	 * @param string $number
385
	 *
386
	 * @return string Canonical number
387
	 */
388
	private function parseFormattedNumber( $number ) {
389
		$transformTable = $this->getDigitTransformTable();
390
391
		if ( is_array( $transformTable ) ) {
392
			// Eliminate empty array values (bug T66347).
393
			$transformTable = array_filter( $transformTable );
394
			$number = strtr( $number, array_flip( $transformTable ) );
395
		}
396
397
		return $number;
398
	}
399
400
	/**
401
	 * @return string
402
	 */
403
	private function getDateFormat() {
404
		return $this->getOption( self::OPT_DATE_FORMAT );
405
	}
406
407
	/**
408
	 * @return string[]|null
409
	 */
410
	private function getDigitTransformTable() {
411
		return $this->getOption( self::OPT_DIGIT_TRANSFORM_TABLE );
412
	}
413
414
	/**
415
	 * @return array[]|string[]
416
	 */
417
	private function getMonthNames() {
418
		return $this->getOption( self::OPT_MONTH_NAMES ) ?: [];
419
	}
420
421
}
422