1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace ICanBoogie\CLDR\Dates; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* @link https://www.unicode.org/reports/tr35/tr35-72/tr35-dates.html#Date_Format_Patterns |
7
|
|
|
*/ |
8
|
|
|
final class DateFormatPatternParser |
9
|
|
|
{ |
10
|
|
|
private const QUOTE = "'"; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* Parses a date format pattern. |
14
|
|
|
* |
15
|
|
|
* @param string $pattern |
16
|
|
|
* A date format pattern; for example, "hh 'o''clock' a, zzzz". |
17
|
|
|
* |
18
|
|
|
* @return array<string|array{ string, int }> |
|
|
|
|
19
|
|
|
* Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length. |
20
|
|
|
*/ |
21
|
|
|
public static function parse(string $pattern): array |
22
|
|
|
{ |
23
|
|
|
static $cache = []; |
24
|
|
|
|
25
|
|
|
return $cache[$pattern] ??= self::do_parse($pattern); |
26
|
|
|
} |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* Parses a date format pattern. |
30
|
|
|
* |
31
|
|
|
* @param string $pattern |
32
|
|
|
* A date format pattern; for example, "hh 'o''clock' a, zzzz". |
33
|
|
|
* |
34
|
|
|
* @return array<string|array{ string, int }> |
|
|
|
|
35
|
|
|
* Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length. |
36
|
|
|
*/ |
37
|
|
|
private static function do_parse(string $pattern): array |
38
|
|
|
{ |
39
|
|
|
$tokens = []; |
40
|
|
|
$is_literal = false; |
41
|
|
|
$literal = ''; |
42
|
|
|
$z = mb_strlen($pattern); |
43
|
|
|
|
44
|
|
|
for ($i = 0; $i < $z; ++$i) { |
45
|
|
|
$c = mb_substr($pattern, $i, 1); |
46
|
|
|
|
47
|
|
|
if ($c === self::QUOTE) { |
48
|
|
|
// Two adjacent single vertical quotes (''), which represent a literal single quote, |
49
|
|
|
// either inside or outside a quoted text. |
50
|
|
|
if (mb_substr($pattern, $i + 1, 1) === self::QUOTE) { |
51
|
|
|
$i++; |
52
|
|
|
$literal .= self::QUOTE; |
53
|
|
|
} else { |
54
|
|
|
// Toggle literal |
55
|
|
|
$is_literal = !$is_literal; |
|
|
|
|
56
|
|
|
} |
57
|
|
|
} elseif ($is_literal) { |
58
|
|
|
$literal .= $c; |
59
|
|
|
} elseif (ctype_alpha($c)) { |
60
|
|
|
if ($literal) { |
61
|
|
|
$tokens[] = $literal; |
62
|
|
|
$literal = ''; |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
for ($j = $i + 1; $j < $z; ++$j) { |
66
|
|
|
$nc = mb_substr($pattern, $j, 1); |
67
|
|
|
if ($nc !== $c) { |
68
|
|
|
break; |
69
|
|
|
} |
70
|
|
|
} |
71
|
|
|
$tokens[] = [ $c, $j - $i ]; |
72
|
|
|
$i = $j - 1; // because +1 from the for loop |
73
|
|
|
} else { |
74
|
|
|
$literal .= $c; |
75
|
|
|
} |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
// If the pattern ends with literal (could also be a malformed quote) |
79
|
|
|
if ($literal) { |
80
|
|
|
$tokens[] = $literal; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
return $tokens; |
84
|
|
|
} |
85
|
|
|
} |
86
|
|
|
|