1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Wikibase\Repo\Parsers; |
4
|
|
|
|
5
|
|
|
use DataValues\IllegalValueException; |
6
|
|
|
use DataValues\TimeValue; |
7
|
|
|
use ValueParsers\ParseException; |
8
|
|
|
use ValueParsers\ParserOptions; |
9
|
|
|
use ValueParsers\StringValueParser; |
10
|
|
|
use Wikimedia\AtEase\AtEase; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* This parser is in essence the inverse operation of MediaWiki's Language::sprintfDate. |
14
|
|
|
* |
15
|
|
|
* @see \Language::sprintfDate |
16
|
|
|
* |
17
|
|
|
* @license GPL-2.0-or-later |
18
|
|
|
* @author Thiemo Kreuz |
19
|
|
|
*/ |
20
|
|
|
class DateFormatParser extends StringValueParser { |
21
|
|
|
|
22
|
|
|
const FORMAT_NAME = 'date-format'; |
23
|
|
|
|
24
|
|
|
const OPT_DATE_FORMAT = 'dateFormat'; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* Option for unlocalizing non-canonical digits. Must be an array of strings, mapping canonical |
28
|
|
|
* digit characters ("1", "2" and so on, possibly including "." and ",") to localized |
29
|
|
|
* characters. |
30
|
|
|
*/ |
31
|
|
|
const OPT_DIGIT_TRANSFORM_TABLE = 'digitTransformTable'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Option for localized month names. Should be a two-dimensional array, the first dimension |
35
|
|
|
* mapping the month's numbers 1 to 12 to arrays of localized month names, possibly including |
36
|
|
|
* full month names, genitive names and abbreviations. Can also be a one-dimensional array of |
37
|
|
|
* strings. |
38
|
|
|
*/ |
39
|
|
|
const OPT_MONTH_NAMES = 'monthNames'; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Option to override the precision auto-detection and set a specific precision. Should be an |
43
|
|
|
* integer or string containing one of the TimeValue::PRECISION_... constants. |
44
|
|
|
*/ |
45
|
|
|
const OPT_PRECISION = 'precision'; |
46
|
|
|
|
47
|
|
|
public function __construct( ParserOptions $options = null ) { |
48
|
|
|
parent::__construct( $options ); |
49
|
|
|
|
50
|
|
|
$this->defaultOption( self::OPT_DATE_FORMAT, 'j F Y' ); |
51
|
|
|
// FIXME: Should not be an option. Options should be trivial, never arrays or objects! |
52
|
|
|
$this->defaultOption( self::OPT_DIGIT_TRANSFORM_TABLE, null ); |
53
|
|
|
$this->defaultOption( self::OPT_MONTH_NAMES, null ); |
54
|
|
|
$this->defaultOption( self::OPT_PRECISION, null ); |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @see StringValueParser::stringParse |
59
|
|
|
* |
60
|
|
|
* @param string $value |
61
|
|
|
* |
62
|
|
|
* @throws ParseException |
63
|
|
|
* @return TimeValue |
64
|
|
|
*/ |
65
|
|
|
protected function stringParse( $value ) { |
66
|
|
|
$date = $this->parseDate( $value ); |
67
|
|
|
$precision = TimeValue::PRECISION_YEAR; |
68
|
|
|
$time = [ $this->parseFormattedNumber( $date['year'] ), 0, 0, 0, 0, 0 ]; |
69
|
|
|
|
70
|
|
|
if ( isset( $date['month'] ) ) { |
71
|
|
|
$precision = TimeValue::PRECISION_MONTH; |
72
|
|
|
$time[1] = $this->findMonthMatch( $date ); |
73
|
|
|
|
74
|
|
|
if ( isset( $date['day'] ) ) { |
75
|
|
|
$precision = TimeValue::PRECISION_DAY; |
76
|
|
|
$time[2] = $this->parseFormattedNumber( $date['day'] ); |
77
|
|
|
|
78
|
|
|
if ( isset( $date['hour'] ) ) { |
79
|
|
|
$precision = TimeValue::PRECISION_HOUR; |
80
|
|
|
$time[3] = $this->parseFormattedNumber( $date['hour'] ); |
81
|
|
|
|
82
|
|
|
if ( isset( $date['minute'] ) ) { |
83
|
|
|
$precision = TimeValue::PRECISION_MINUTE; |
84
|
|
|
$time[4] = $this->parseFormattedNumber( $date['minute'] ); |
85
|
|
|
|
86
|
|
|
if ( isset( $date['second'] ) ) { |
87
|
|
|
$precision = TimeValue::PRECISION_SECOND; |
88
|
|
|
$time[5] = $this->parseFormattedNumber( $date['second'] ); |
89
|
|
|
} |
90
|
|
|
} |
91
|
|
|
} |
92
|
|
|
} |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
$option = $this->getOption( self::OPT_PRECISION ); |
96
|
|
|
if ( $option !== null ) { |
97
|
|
|
if ( !is_int( $option ) && !ctype_digit( $option ) ) { |
98
|
|
|
throw new ParseException( 'Precision must be an integer' ); |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
$option = (int)$option; |
102
|
|
|
|
103
|
|
|
// It's impossible to increase the detected precision via option, e.g. from year to month if |
104
|
|
|
// no month is given. If a day is given it can be increased, relevant for midnight. |
105
|
|
|
if ( $option <= $precision || $precision >= TimeValue::PRECISION_DAY ) { |
106
|
|
|
$precision = $option; |
107
|
|
|
} |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
$timestamp = vsprintf( '+%04s-%02s-%02sT%02s:%02s:%02sZ', $time ); |
111
|
|
|
|
112
|
|
|
try { |
113
|
|
|
return new TimeValue( $timestamp, 0, 0, 0, $precision, TimeValue::CALENDAR_GREGORIAN ); |
114
|
|
|
} catch ( IllegalValueException $ex ) { |
115
|
|
|
throw new ParseException( $ex->getMessage(), $value, self::FORMAT_NAME ); |
116
|
|
|
} |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
// @codingStandardsIgnoreStart |
120
|
|
|
/** |
121
|
|
|
* @see Language::sprintfDate |
122
|
|
|
* |
123
|
|
|
* @param string $format A date format, as described in Language::sprintfDate. |
124
|
|
|
* |
125
|
|
|
* @return string Regular expression |
126
|
|
|
*/ |
127
|
|
|
private function parseDateFormat( $format ) { |
128
|
|
|
$length = strlen( $format ); |
129
|
|
|
|
130
|
|
|
$number = $this->getNumberPattern(); |
131
|
|
|
$notFollowedByNumber = '(?!' . $number . ')'; |
132
|
|
|
$optionalPunctuation = '\p{P}*'; |
133
|
|
|
$optionalWhitespace = '\p{Z}*'; |
134
|
|
|
$separation = $notFollowedByNumber . $optionalWhitespace; |
135
|
|
|
$pattern = '<^' . $optionalWhitespace; |
136
|
|
|
|
137
|
|
|
for ( $p = 0; $p < $length; $p++ ) { |
138
|
|
|
$code = $format[$p]; |
139
|
|
|
|
140
|
|
|
// "x" is used as a prefix for MediaWiki specific, 2- and 3-letter codes. |
141
|
|
|
if ( $code === 'x' && $p < $length - 1 ) { |
142
|
|
|
$code .= $format[++$p]; |
143
|
|
|
|
144
|
|
|
if ( preg_match( '<^x[ijkmot]$>', $code ) && $p < $length - 1 ) { |
145
|
|
|
$code .= $format[++$p]; |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
switch ( $code ) { |
150
|
|
|
// Year |
151
|
|
|
case 'o': |
152
|
|
|
case 'Y': |
153
|
|
|
$pattern .= '(?P<year>' . $number . '+)' . $separation; |
154
|
|
|
break; |
155
|
|
|
|
156
|
|
|
// Month |
157
|
|
|
case 'F': |
158
|
|
|
case 'M': |
159
|
|
|
case 'm': |
160
|
|
|
case 'n': |
161
|
|
|
case 'xg': |
162
|
|
|
$pattern .= '(?P<month>' . $number . '{1,2}' . $notFollowedByNumber |
163
|
|
|
. $this->getMonthNamesPattern() . ')' . $optionalPunctuation |
164
|
|
|
. $optionalWhitespace; |
165
|
|
|
break; |
166
|
|
|
|
167
|
|
|
// Day |
168
|
|
|
case 'd': |
169
|
|
|
case 'j': |
170
|
|
|
$pattern .= '(?P<day>' . $number . '{1,2})' . $optionalPunctuation |
171
|
|
|
. $separation; |
172
|
|
|
break; |
173
|
|
|
|
174
|
|
|
// Hour |
175
|
|
|
case 'G': |
176
|
|
|
case 'H': |
177
|
|
|
$pattern .= '(?P<hour>' . $number . '{1,2})' . $separation; |
178
|
|
|
break; |
179
|
|
|
|
180
|
|
|
// Minute |
181
|
|
|
case 'i': |
182
|
|
|
$pattern .= '(?P<minute>' . $number . '{1,2})' . $separation; |
183
|
|
|
break; |
184
|
|
|
|
185
|
|
|
// Second |
186
|
|
|
case 's': |
187
|
|
|
$pattern .= '(?P<second>' . $number . '{1,2})' . $separation; |
188
|
|
|
break; |
189
|
|
|
|
190
|
|
|
// Escaped "x" |
191
|
|
|
case 'xx': |
192
|
|
|
$pattern .= 'x'; |
193
|
|
|
break; |
194
|
|
|
|
195
|
|
|
// Escaped character or backslash at the end of the sequence |
196
|
|
|
case '\\': |
197
|
|
|
$pattern .= preg_quote( $p < $length - 1 ? $format[++$p] : '\\' ); |
198
|
|
|
break; |
199
|
|
|
|
200
|
|
|
// Quoted sequence |
201
|
|
|
case '"': |
202
|
|
|
$endQuote = strpos( $format, '"', $p + 1 ); |
203
|
|
|
if ( $endQuote !== false ) { |
204
|
|
|
$pattern .= preg_quote( substr( $format, $p + 1, $endQuote - $p - 1 ) ); |
205
|
|
|
$p = $endQuote; |
206
|
|
|
} else { |
207
|
|
|
$pattern .= '"'; |
208
|
|
|
} |
209
|
|
|
break; |
210
|
|
|
|
211
|
|
|
// We can ignore "raw" and "raw toggle" when parsing, because we always accept |
212
|
|
|
// canonical digits. |
213
|
|
|
case 'xN': |
214
|
|
|
case 'xn': |
215
|
|
|
break; |
216
|
|
|
|
217
|
|
|
// 12-hour format |
218
|
|
|
case 'A': |
219
|
|
|
case 'a': |
220
|
|
|
case 'g': |
221
|
|
|
case 'h': |
222
|
|
|
|
223
|
|
|
// Full, formatted dates |
224
|
|
|
case 'c': |
225
|
|
|
case 'r': |
226
|
|
|
case 'U': |
227
|
|
|
|
228
|
|
|
// Day of the week |
229
|
|
|
case 'D': |
230
|
|
|
case 'l': |
231
|
|
|
case 'N': |
232
|
|
|
case 'w': |
233
|
|
|
|
234
|
|
|
// Timezone |
235
|
|
|
case 'e': |
236
|
|
|
case 'O': |
237
|
|
|
case 'P': |
238
|
|
|
case 'T': |
239
|
|
|
case 'Z': |
240
|
|
|
|
241
|
|
|
// Daylight saving time ("1" if true) |
242
|
|
|
case 'I': |
243
|
|
|
|
244
|
|
|
// Leap year ("1" if true) |
245
|
|
|
case 'L': |
246
|
|
|
|
247
|
|
|
// Number of days in the current month |
248
|
|
|
case 't': |
249
|
|
|
case 'xit': |
250
|
|
|
case 'xjt': |
251
|
|
|
|
252
|
|
|
// Week number |
253
|
|
|
case 'W': |
254
|
|
|
|
255
|
|
|
// "Hebrew" and "Roman" modifiers |
256
|
|
|
case 'xh': |
257
|
|
|
case 'xr': |
258
|
|
|
|
259
|
|
|
// 2-digit year |
260
|
|
|
case 'y': |
261
|
|
|
case 'xiy': |
262
|
|
|
|
263
|
|
|
// Day of the year |
264
|
|
|
case 'z': |
265
|
|
|
case 'xiz': |
266
|
|
|
|
267
|
|
|
// Day, month and year in incompatible calendar models (Hebrew, Iranian, and others) |
268
|
|
|
case 'xiF': |
269
|
|
|
case 'xij': |
270
|
|
|
case 'xin': |
271
|
|
|
case 'xiY': |
272
|
|
|
case 'xjF': |
273
|
|
|
case 'xjj': |
274
|
|
|
case 'xjn': |
275
|
|
|
case 'xjx': |
276
|
|
|
case 'xjY': |
277
|
|
|
case 'xkY': |
278
|
|
|
case 'xmF': |
279
|
|
|
case 'xmj': |
280
|
|
|
case 'xmn': |
281
|
|
|
case 'xmY': |
282
|
|
|
case 'xoY': |
283
|
|
|
case 'xtY': |
284
|
|
|
throw new ParseException( 'Unsupported date format "' . $code . '"' ); |
285
|
|
|
break; |
|
|
|
|
286
|
|
|
|
287
|
|
|
// Character with no meaning |
288
|
|
|
default: |
289
|
|
|
if ( preg_match( '<^' . $optionalPunctuation . '$>u', $format[$p] ) ) { |
290
|
|
|
$pattern .= $optionalPunctuation; |
291
|
|
|
} elseif ( preg_match( '<^' . $optionalWhitespace . '$>u', $format[$p] ) ) { |
292
|
|
|
$pattern .= $optionalWhitespace; |
293
|
|
|
} else { |
294
|
|
|
$pattern .= preg_quote( $format[$p] ); |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
} |
298
|
|
|
|
299
|
|
|
return $pattern . '$>iu'; |
300
|
|
|
} |
301
|
|
|
// @codingStandardsIgnoreEnd |
302
|
|
|
|
303
|
|
|
/** |
304
|
|
|
* @return string Partial regular expression |
305
|
|
|
*/ |
306
|
|
|
private function getNumberPattern() { |
307
|
|
|
$pattern = '[\d'; |
308
|
|
|
|
309
|
|
|
$transformTable = $this->getDigitTransformTable(); |
310
|
|
|
if ( is_array( $transformTable ) ) { |
311
|
|
|
$pattern .= preg_quote( implode( '', $transformTable ) ); |
312
|
|
|
} |
313
|
|
|
|
314
|
|
|
return $pattern . ']'; |
315
|
|
|
} |
316
|
|
|
|
317
|
|
|
/** |
318
|
|
|
* @return string Partial regular expression |
319
|
|
|
*/ |
320
|
|
|
private function getMonthNamesPattern() { |
321
|
|
|
$pattern = ''; |
322
|
|
|
|
323
|
|
|
foreach ( $this->getMonthNames() as $i => $monthNames ) { |
324
|
|
|
$pattern .= '|(?P<month' . $i . '>' |
325
|
|
|
. implode( '|', array_map( 'preg_quote', (array)$monthNames ) ) |
326
|
|
|
. ')'; |
327
|
|
|
} |
328
|
|
|
|
329
|
|
|
return $pattern; |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
/** |
333
|
|
|
* @param string $input |
334
|
|
|
* |
335
|
|
|
* @throws ParseException |
336
|
|
|
* @return string[] Guaranteed to have the "year" key, optionally followed by more elements. |
337
|
|
|
* Guaranteed to be continuous, e.g. "year" and "day" with no "month" is illegal. |
338
|
|
|
*/ |
339
|
|
|
private function parseDate( $input ) { |
340
|
|
|
$pattern = $this->parseDateFormat( $this->getDateFormat() ); |
341
|
|
|
|
342
|
|
|
AtEase::suppressWarnings(); |
343
|
|
|
$success = preg_match( $pattern, $input, $matches ); |
344
|
|
|
AtEase::restoreWarnings(); |
345
|
|
|
|
346
|
|
|
if ( !$success ) { |
347
|
|
|
throw new ParseException( |
348
|
|
|
$success === false |
349
|
|
|
? 'Illegal date format "' . $this->getDateFormat() . '"' |
350
|
|
|
: 'Failed to parse "' . $input . '"', |
351
|
|
|
$input, |
352
|
|
|
self::FORMAT_NAME |
353
|
|
|
); |
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
if ( !isset( $matches['year'] ) |
357
|
|
|
|| isset( $matches['day'] ) && !isset( $matches['month'] ) |
358
|
|
|
|| isset( $matches['hour'] ) && !isset( $matches['day'] ) |
359
|
|
|
|| isset( $matches['minute'] ) && !isset( $matches['hour'] ) |
360
|
|
|
|| isset( $matches['second'] ) && !isset( $matches['minute'] ) |
361
|
|
|
) { |
362
|
|
|
throw new ParseException( 'Non-continuous date format', $input, self::FORMAT_NAME ); |
363
|
|
|
} |
364
|
|
|
|
365
|
|
|
return $matches; |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
/** |
369
|
|
|
* @param string[] $matches |
370
|
|
|
* |
371
|
|
|
* @return int|string |
372
|
|
|
*/ |
373
|
|
|
private function findMonthMatch( $matches ) { |
374
|
|
|
for ( $i = 1; $i <= 12; $i++ ) { |
375
|
|
|
if ( !empty( $matches['month' . $i] ) ) { |
376
|
|
|
return $i; |
377
|
|
|
} |
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
return $this->parseFormattedNumber( $matches['month'] ); |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
/** |
384
|
|
|
* @param string $number |
385
|
|
|
* |
386
|
|
|
* @return string Canonical number |
387
|
|
|
*/ |
388
|
|
|
private function parseFormattedNumber( $number ) { |
389
|
|
|
$transformTable = $this->getDigitTransformTable(); |
390
|
|
|
|
391
|
|
|
if ( is_array( $transformTable ) ) { |
392
|
|
|
// Eliminate empty array values (bug T66347). |
393
|
|
|
$transformTable = array_filter( $transformTable ); |
394
|
|
|
$number = strtr( $number, array_flip( $transformTable ) ); |
395
|
|
|
} |
396
|
|
|
|
397
|
|
|
return $number; |
398
|
|
|
} |
399
|
|
|
|
400
|
|
|
/** |
401
|
|
|
* @return string |
402
|
|
|
*/ |
403
|
|
|
private function getDateFormat() { |
404
|
|
|
return $this->getOption( self::OPT_DATE_FORMAT ); |
405
|
|
|
} |
406
|
|
|
|
407
|
|
|
/** |
408
|
|
|
* @return string[]|null |
409
|
|
|
*/ |
410
|
|
|
private function getDigitTransformTable() { |
411
|
|
|
return $this->getOption( self::OPT_DIGIT_TRANSFORM_TABLE ); |
412
|
|
|
} |
413
|
|
|
|
414
|
|
|
/** |
415
|
|
|
* @return array[]|string[] |
416
|
|
|
*/ |
417
|
|
|
private function getMonthNames() { |
418
|
|
|
return $this->getOption( self::OPT_MONTH_NAMES ) ?: []; |
419
|
|
|
} |
420
|
|
|
|
421
|
|
|
} |
422
|
|
|
|
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.