PhpDateTimeParser::stringParse()   B
last analyzed

Complexity

Conditions 8
Paths 18

Size

Total Lines 41

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 8

Importance

Changes 0
Metric Value
dl 0
loc 41
ccs 20
cts 20
cp 1
rs 8.0195
c 0
b 0
f 0
cc 8
nc 18
nop 1
crap 8
1
<?php
2
3
namespace ValueParsers;
4
5
use DataValues\TimeValue;
6
use DateTime;
7
use Exception;
8
9
/**
10
 * Time parser using PHP's DateTime object. Since the behavior of PHP's parser can be quite odd
11
 * (for example, it pads missing elements with the current date and does actual calculations such as
12
 * parsing "2015-00-00" as "2014-12-30") this parser should only be used as a fallback.
13
 *
14
 * This class implements heuristics to guess which sequence of digits in the input represents the
15
 * year. This is relevant because PHP's parser can only handle 4-digit years as expected. The
16
 * following criteria are used to identify the year:
17
 *
18
 * - The first number longer than 2 digits or bigger than 59.
19
 * - The first number in the input, if it is bigger than 31.
20
 * - The third of three space-separated parts at the beginning of the input, if it is a number.
21
 * - The third number in the input.
22
 * - The last number in the input otherwise.
23
 *
24
 * @since 0.7
25
 *
26
 * @license GPL-2.0+
27
 * @author Addshore
28
 * @author Thiemo Kreuz
29
 */
30
class PhpDateTimeParser extends StringValueParser {
31
32
	const FORMAT_NAME = 'php-date-time';
33
34
	/**
35
	 * @var MonthNameUnlocalizer
36
	 */
37
	private $monthNameUnlocalizer;
38
39
	/**
40
	 * @var ValueParser
41
	 */
42
	private $eraParser;
43
44
	/**
45
	 * @var ValueParser
46
	 */
47
	private $isoTimestampParser;
48
49
	/**
50
	 * @param MonthNameUnlocalizer $monthNameUnlocalizer Used to translate month names to English,
51
	 * the language PHP's DateTime parser understands.
52
	 * @param ValueParser $eraParser String parser that detects signs, "BC" suffixes and such and
53
	 * returns an array with the detected sign character and the remaining value.
54
	 * @param ValueParser $isoTimestampParser String parser that gets a language independent
55
	 * YMD-ordered timestamp and returns a TimeValue object. Used for precision detection.
56
	 */
57 113
	public function __construct(
58
		MonthNameUnlocalizer $monthNameUnlocalizer,
59
		ValueParser $eraParser,
60
		ValueParser $isoTimestampParser
61
	) {
62 113
		parent::__construct();
63
64 113
		$this->monthNameUnlocalizer = $monthNameUnlocalizer;
65 113
		$this->eraParser = $eraParser;
66 113
		$this->isoTimestampParser = $isoTimestampParser;
67 113
	}
68
69
	/**
70
	 * @param string $value in a format as specified by the PHP DateTime object
71
	 *       there are exceptions as we can handel 5+ digit dates
72
	 *
73
	 * @throws ParseException
74
	 * @return TimeValue
75
	 */
76 106
	protected function stringParse( $value ) {
77 106
		$rawValue = $value;
78
79
		try {
80 106
			list( $sign, $value ) = $this->eraParser->parse( $value );
81
82 106
			$value = trim( $value );
83 106
			$value = $this->monthNameUnlocalizer->unlocalize( $value );
84 106
			$year = $this->fetchAndNormalizeYear( $value );
85
86 106
			$value = $this->getValueWithFixedSeparators( $value, $year );
87
88 106
			$this->validateDateTimeInput( $value );
89
90
			// Parse using the DateTime object (this will allow us to format the date in a nicer way)
91 91
			$dateTime = new DateTime( $value );
92
93
			// Fail if the DateTime object does calculations like changing 2015-00-00 to 2014-12-30.
94 84
			if ( $year !== null && $dateTime->format( 'Y' ) !== substr( $year, -4 ) ) {
95 7
				throw new ParseException( $value . ' is not a valid date.' );
96
			}
97
98
			// Input was one, two, or three numbers? Where the heck does a time come from?
99 77
			if ( $dateTime->format( 'H:i:s' ) !== '00:00:00'
100 77
				&& preg_match( '/^\D*\d+(?:\D+\d+){0,2}\D*$/', $value )
101
			) {
102 1
				throw new ParseException( $value . ' is not a valid date.' );
103
			}
104
105 76
			if ( $year !== null && strlen( $year ) > 4 ) {
106 12
				$timestamp = $sign . $year . $dateTime->format( '-m-d\TH:i:s\Z' );
107
			} else {
108 64
				$timestamp = $sign . $dateTime->format( 'Y-m-d\TH:i:s\Z' );
109
			}
110
111
			// Use a common base parser for precision detection and option handling.
112 76
			return $this->isoTimestampParser->parse( $timestamp );
113 30
		} catch ( Exception $exception ) {
114 30
			throw new ParseException( $exception->getMessage(), $rawValue, self::FORMAT_NAME );
115
		}
116
	}
117
118
	/**
119
	 * @param string $value
120
	 *
121
	 * @throws ParseException
122
	 */
123 106
	private function validateDateTimeInput( $value ) {
124
		// we don't support input of non-digits only, such as 'x'.
125 106
		if ( !preg_match( '/\d/', $value ) ) {
126 5
			throw new ParseException( $value . ' does not contain a digit.' );
127
		}
128
129
		// @todo i18n support for these exceptions
130
		// we don't support dates in format of year + timezone
131 101
		if ( preg_match( '/^\d{1,7}(\+\d*|\D*)$/', $value ) ) {
132 10
			throw new ParseException( $value . ' is not a valid date.' );
133
		}
134 91
	}
135
136
	/**
137
	 * PHP's DateTime object does not accept spaces as separators between year, month and day,
138
	 * e.g. dates like 20 12 2012, but we want to support them.
139
	 * See http://de1.php.net/manual/en/datetime.formats.date.php
140
	 *
141
	 * @param string $value
142
	 * @param string|null $year
143
	 *
144
	 * @return string
145
	 */
146 106
	private function getValueWithFixedSeparators( $value, $year = null ) {
147 106
		$isYmd = $year !== null && preg_match( '/^\D*' . $year . '\D+\d+\D+\d+\D*$/', $value );
148 106
		$separator = $isYmd ? '-' : '.';
149
		// Meant to match separator characters after day and month. \p{L} matches letters outside
150
		// the ASCII range.
151 106
		return preg_replace( '/(?<=[\d\p{L}])[.,\s]\s*/', $separator, $value );
152
	}
153
154
	/**
155
	 * Tries to find and pad the sequence of digits in the input that represents the year.
156
	 * Refer to the class level documentation for a description of the heuristics used.
157
	 *
158
	 * @param string &$value A time value string, possibly containing a year. If found, the year in
159
	 * the string will be cut and padded to exactly 4 digits.
160
	 *
161
	 * @return string|null The full year, if found, not cut but padded to at least 4 digits.
162
	 */
163 106
	private function fetchAndNormalizeYear( &$value ) {
164
		// NOTE: When changing the regex matching below, keep the class level
165
		// documentation of the extraction heuristics up to date!
166
		$patterns = array(
167
			// Check if the string contains a number longer than 2 digits or bigger than 59.
168 106
			'/(?<!\d)('           // can not be prepended by a digit
169
				. '\d{3,}|'       // any number longer than 2 digits, or
170
				. '[6-9]\d'       // any number bigger than 59
171
				. ')(?!\d)/',     // can not be followed by a digit
172
173
			// Check if the first number in the string is bigger than 31.
174
			'/^\D*(3[2-9]|[4-9]\d)/',
175
176
			// Check if the string starts with three space-separated parts or three numbers.
177
			'/^(?:'
178
				. '\S+\s+\S+\s+|' // e.g. "July<SPACE>4th<SPACE>", or
179
				. '\d+\D+\d+\D+'  // e.g. "4.7."
180
				. ')(\d+)/',      // followed by a number
181
182
			// Check if the string ends with a number.
183
			'/(\d+)\D*$/',
184
		);
185
186 106
		foreach ( $patterns as $pattern ) {
187 106
			if ( preg_match( $pattern, $value, $matches, PREG_OFFSET_CAPTURE ) ) {
188 106
				break;
189
			}
190
		}
191
192 106
		if ( !isset( $matches[1] ) ) {
193 5
			return null;
194
		}
195
196 101
		$year = $matches[1][0];
197 101
		$index = $matches[1][1];
198 101
		$length = strlen( $year );
199
200
		// Trim irrelevant leading zeros.
201 101
		$year = ltrim( $year, '0' );
202
203
		// Pad to at least 4 digits.
204 101
		$year = str_pad( $year, 4, '0', STR_PAD_LEFT );
205
206
		// Manipulate the value to have an exactly 4-digit year. Crucial for PHP's DateTime object.
207 101
		$value = substr_replace( $value, substr( $year, -4 ), $index, $length );
208
209 101
		return $year;
210
	}
211
212
}
213