|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace ValueParsers; |
|
4
|
|
|
|
|
5
|
|
|
use DataValues\TimeValue; |
|
6
|
|
|
use DateTime; |
|
7
|
|
|
use Exception; |
|
8
|
|
|
|
|
9
|
|
|
/** |
|
10
|
|
|
* Time parser using PHP's DateTime object. Since the behavior of PHP's parser can be quite odd |
|
11
|
|
|
* (for example, it pads missing elements with the current date and does actual calculations such as |
|
12
|
|
|
* parsing "2015-00-00" as "2014-12-30") this parser should only be used as a fallback. |
|
13
|
|
|
* |
|
14
|
|
|
* This class implements heuristics to guess which sequence of digits in the input represents the |
|
15
|
|
|
* year. This is relevant because PHP's parser can only handle 4-digit years as expected. The |
|
16
|
|
|
* following criteria are used to identify the year: |
|
17
|
|
|
* |
|
18
|
|
|
* - The first number longer than 2 digits or bigger than 59. |
|
19
|
|
|
* - The first number in the input, if it is bigger than 31. |
|
20
|
|
|
* - The third of three space-separated parts at the beginning of the input, if it is a number. |
|
21
|
|
|
* - The third number in the input. |
|
22
|
|
|
* - The last number in the input otherwise. |
|
23
|
|
|
* |
|
24
|
|
|
* @since 0.7 |
|
25
|
|
|
* |
|
26
|
|
|
* @license GPL-2.0+ |
|
27
|
|
|
* @author Addshore |
|
28
|
|
|
* @author Thiemo Mättig |
|
29
|
|
|
*/ |
|
30
|
|
|
class PhpDateTimeParser extends StringValueParser { |
|
31
|
|
|
|
|
32
|
|
|
const FORMAT_NAME = 'php-date-time'; |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* @var MonthNameUnlocalizer |
|
36
|
|
|
*/ |
|
37
|
|
|
private $monthNameUnlocalizer; |
|
38
|
|
|
|
|
39
|
|
|
/** |
|
40
|
|
|
* @var ValueParser |
|
41
|
|
|
*/ |
|
42
|
|
|
private $eraParser; |
|
43
|
|
|
|
|
44
|
|
|
/** |
|
45
|
|
|
* @var ValueParser |
|
46
|
|
|
*/ |
|
47
|
|
|
private $isoTimestampParser; |
|
48
|
|
|
|
|
49
|
|
|
/** |
|
50
|
|
|
* @param MonthNameUnlocalizer $monthNameUnlocalizer Used to translate month names to English, |
|
51
|
|
|
* the language PHP's DateTime parser understands. |
|
52
|
|
|
* @param ValueParser $eraParser String parser that detects signs, "BC" suffixes and such and |
|
53
|
|
|
* returns an array with the detected sign character and the remaining value. |
|
54
|
|
|
* @param ValueParser $isoTimestampParser String parser that gets a language independent |
|
55
|
|
|
* YMD-ordered timestamp and returns a TimeValue object. Used for precision detection. |
|
56
|
|
|
*/ |
|
57
|
113 |
|
public function __construct( |
|
58
|
|
|
MonthNameUnlocalizer $monthNameUnlocalizer, |
|
59
|
|
|
ValueParser $eraParser, |
|
60
|
|
|
ValueParser $isoTimestampParser |
|
61
|
|
|
) { |
|
62
|
113 |
|
parent::__construct(); |
|
63
|
|
|
|
|
64
|
113 |
|
$this->monthNameUnlocalizer = $monthNameUnlocalizer; |
|
65
|
113 |
|
$this->eraParser = $eraParser; |
|
66
|
113 |
|
$this->isoTimestampParser = $isoTimestampParser; |
|
67
|
113 |
|
} |
|
68
|
|
|
|
|
69
|
|
|
/** |
|
70
|
|
|
* @param string $value in a format as specified by the PHP DateTime object |
|
71
|
|
|
* there are exceptions as we can handel 5+ digit dates |
|
72
|
|
|
* |
|
73
|
|
|
* @throws ParseException |
|
74
|
|
|
* @return TimeValue |
|
75
|
|
|
*/ |
|
76
|
106 |
|
protected function stringParse( $value ) { |
|
77
|
106 |
|
$rawValue = $value; |
|
78
|
|
|
|
|
79
|
|
|
try { |
|
80
|
106 |
|
list( $sign, $value ) = $this->eraParser->parse( $value ); |
|
81
|
|
|
|
|
82
|
106 |
|
$value = trim( $value ); |
|
83
|
106 |
|
$value = $this->monthNameUnlocalizer->unlocalize( $value ); |
|
84
|
106 |
|
$year = $this->fetchAndNormalizeYear( $value ); |
|
85
|
|
|
|
|
86
|
106 |
|
$value = $this->getValueWithFixedSeparators( $value, $year ); |
|
87
|
|
|
|
|
88
|
106 |
|
$this->validateDateTimeInput( $value ); |
|
89
|
|
|
|
|
90
|
|
|
// Parse using the DateTime object (this will allow us to format the date in a nicer way) |
|
91
|
91 |
|
$dateTime = new DateTime( $value ); |
|
92
|
|
|
|
|
93
|
|
|
// Fail if the DateTime object does calculations like changing 2015-00-00 to 2014-12-30. |
|
94
|
84 |
|
if ( $year !== null && $dateTime->format( 'Y' ) !== substr( $year, -4 ) ) { |
|
95
|
7 |
|
throw new ParseException( $value . ' is not a valid date.' ); |
|
96
|
|
|
} |
|
97
|
|
|
|
|
98
|
|
|
// Input was one, two, or three numbers? Where the heck does a time come from? |
|
99
|
77 |
|
if ( $dateTime->format( 'H:i:s' ) !== '00:00:00' |
|
100
|
18 |
|
&& preg_match( '/^\D*\d+(?:\D+\d+){0,2}\D*$/', $value ) |
|
101
|
|
|
) { |
|
102
|
1 |
|
throw new ParseException( $value . ' is not a valid date.' ); |
|
103
|
|
|
} |
|
104
|
|
|
|
|
105
|
76 |
|
if ( $year !== null && strlen( $year ) > 4 ) { |
|
106
|
12 |
|
$timestamp = $sign . $year . $dateTime->format( '-m-d\TH:i:s\Z' ); |
|
107
|
|
|
} else { |
|
108
|
64 |
|
$timestamp = $sign . $dateTime->format( 'Y-m-d\TH:i:s\Z' ); |
|
109
|
|
|
} |
|
110
|
|
|
|
|
111
|
|
|
// Use a common base parser for precision detection and option handling. |
|
112
|
76 |
|
return $this->isoTimestampParser->parse( $timestamp ); |
|
113
|
30 |
|
} catch ( Exception $exception ) { |
|
114
|
30 |
|
throw new ParseException( $exception->getMessage(), $rawValue, self::FORMAT_NAME ); |
|
115
|
|
|
} |
|
116
|
|
|
} |
|
117
|
|
|
|
|
118
|
|
|
/** |
|
119
|
|
|
* @param string $value |
|
120
|
|
|
* |
|
121
|
|
|
* @throws ParseException |
|
122
|
|
|
*/ |
|
123
|
106 |
|
private function validateDateTimeInput( $value ) { |
|
124
|
|
|
// we don't support input of non-digits only, such as 'x'. |
|
125
|
106 |
|
if ( !preg_match( '/\d/', $value ) ) { |
|
126
|
5 |
|
throw new ParseException( $value . ' does not contain a digit.' ); |
|
127
|
|
|
} |
|
128
|
|
|
|
|
129
|
|
|
// @todo i18n support for these exceptions |
|
130
|
|
|
// we don't support dates in format of year + timezone |
|
131
|
101 |
|
if ( preg_match( '/^\d{1,7}(\+\d*|\D*)$/', $value ) ) { |
|
132
|
10 |
|
throw new ParseException( $value . ' is not a valid date.' ); |
|
133
|
|
|
} |
|
134
|
91 |
|
} |
|
135
|
|
|
|
|
136
|
|
|
/** |
|
137
|
|
|
* PHP's DateTime object does not accept spaces as separators between year, month and day, |
|
138
|
|
|
* e.g. dates like 20 12 2012, but we want to support them. |
|
139
|
|
|
* See http://de1.php.net/manual/en/datetime.formats.date.php |
|
140
|
|
|
* |
|
141
|
|
|
* @param string $value |
|
142
|
|
|
* @param string|null $year |
|
143
|
|
|
* |
|
144
|
|
|
* @return string |
|
145
|
|
|
*/ |
|
146
|
106 |
|
private function getValueWithFixedSeparators( $value, $year = null ) { |
|
147
|
106 |
|
$isYmd = $year !== null && preg_match( '/^\D*' . $year . '\D+\d+\D+\d+\D*$/', $value ); |
|
148
|
106 |
|
$separator = $isYmd ? '-' : '.'; |
|
149
|
|
|
// Meant to match separator characters after day and month. \p{L} matches letters outside |
|
150
|
|
|
// the ASCII range. |
|
151
|
106 |
|
return preg_replace( '/(?<=[\d\p{L}])[.,\s]\s*/', $separator, $value ); |
|
152
|
|
|
} |
|
153
|
|
|
|
|
154
|
|
|
/** |
|
155
|
|
|
* Tries to find and pad the sequence of digits in the input that represents the year. |
|
156
|
|
|
* Refer to the class level documentation for a description of the heuristics used. |
|
157
|
|
|
* |
|
158
|
|
|
* @param string &$value A time value string, possibly containing a year. If found, the year in |
|
159
|
|
|
* the string will be cut and padded to exactly 4 digits. |
|
160
|
|
|
* |
|
161
|
|
|
* @return string|null The full year, if found, not cut but padded to at least 4 digits. |
|
162
|
|
|
*/ |
|
163
|
106 |
|
private function fetchAndNormalizeYear( &$value ) { |
|
164
|
|
|
// NOTE: When changing the regex matching below, keep the class level |
|
165
|
|
|
// documentation of the extraction heuristics up to date! |
|
166
|
|
|
$patterns = array( |
|
167
|
|
|
// Check if the string contains a number longer than 2 digits or bigger than 59. |
|
168
|
106 |
|
'/(?<!\d)(' // can not be prepended by a digit |
|
169
|
|
|
. '\d{3,}|' // any number longer than 2 digits, or |
|
170
|
|
|
. '[6-9]\d' // any number bigger than 59 |
|
171
|
|
|
. ')(?!\d)/', // can not be followed by a digit |
|
172
|
|
|
|
|
173
|
|
|
// Check if the first number in the string is bigger than 31. |
|
174
|
|
|
'/^\D*(3[2-9]|[4-9]\d)/', |
|
175
|
|
|
|
|
176
|
|
|
// Check if the string starts with three space-separated parts or three numbers. |
|
177
|
|
|
'/^(?:' |
|
178
|
|
|
. '\S+\s+\S+\s+|' // e.g. "July<SPACE>4th<SPACE>", or |
|
179
|
|
|
. '\d+\D+\d+\D+' // e.g. "4.7." |
|
180
|
|
|
. ')(\d+)/', // followed by a number |
|
181
|
|
|
|
|
182
|
|
|
// Check if the string ends with a number. |
|
183
|
|
|
'/(\d+)\D*$/', |
|
184
|
|
|
); |
|
185
|
|
|
|
|
186
|
106 |
|
foreach ( $patterns as $pattern ) { |
|
187
|
106 |
|
if ( preg_match( $pattern, $value, $matches, PREG_OFFSET_CAPTURE ) ) { |
|
188
|
101 |
|
break; |
|
189
|
|
|
} |
|
190
|
|
|
} |
|
191
|
|
|
|
|
192
|
106 |
|
if ( !isset( $matches[1] ) ) { |
|
193
|
5 |
|
return null; |
|
194
|
|
|
} |
|
195
|
|
|
|
|
196
|
101 |
|
$year = $matches[1][0]; |
|
197
|
101 |
|
$index = $matches[1][1]; |
|
198
|
101 |
|
$length = strlen( $year ); |
|
199
|
|
|
|
|
200
|
|
|
// Trim irrelevant leading zeros. |
|
201
|
101 |
|
$year = ltrim( $year, '0' ); |
|
202
|
|
|
|
|
203
|
|
|
// Pad to at least 4 digits. |
|
204
|
101 |
|
$year = str_pad( $year, 4, '0', STR_PAD_LEFT ); |
|
205
|
|
|
|
|
206
|
|
|
// Manipulate the value to have an exactly 4-digit year. Crucial for PHP's DateTime object. |
|
207
|
101 |
|
$value = substr_replace( $value, substr( $year, -4 ), $index, $length ); |
|
208
|
|
|
|
|
209
|
101 |
|
return $year; |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
} |
|
213
|
|
|
|