1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace ICanBoogie\CLDR; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* @link https://www.unicode.org/reports/tr35/tr35-72/tr35-dates.html#Date_Format_Patterns |
7
|
|
|
*/ |
8
|
|
|
final class DateFormatPattern |
9
|
|
|
{ |
10
|
|
|
private const QUOTE = "'"; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* @param string $pattern |
14
|
|
|
* A date format pattern; for example, "yyyy.MM.dd G 'at' HH:mm:ss zzz". |
15
|
|
|
* |
16
|
|
|
* @return array<string|array{ string, int }> |
|
|
|
|
17
|
|
|
* Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length. |
18
|
|
|
*/ |
19
|
|
|
public static function tokenize(string $pattern): array |
20
|
|
|
{ |
21
|
|
|
static $cache = []; |
22
|
|
|
|
23
|
|
|
if (isset($cache[$pattern])) { |
24
|
|
|
return $cache[$pattern]; |
25
|
|
|
} |
26
|
|
|
|
27
|
|
|
$tokens = []; |
28
|
|
|
$is_literal = false; |
29
|
|
|
$literal = ''; |
30
|
|
|
$z = mb_strlen($pattern); |
31
|
|
|
|
32
|
|
|
for ($i = 0; $i < $z; ++$i) { |
33
|
|
|
$c = mb_substr($pattern, $i, 1); |
34
|
|
|
|
35
|
|
|
if ($c === self::QUOTE) { |
36
|
|
|
// Two adjacent single vertical quotes (''), which represent a literal single quote, |
37
|
|
|
// either inside or outside a quoted text. |
38
|
|
|
if (mb_substr($pattern, $i + 1, 1) === self::QUOTE) { |
39
|
|
|
$i++; |
40
|
|
|
$literal .= self::QUOTE; |
41
|
|
|
} else { |
42
|
|
|
// Toggle literal |
43
|
|
|
$is_literal = !$is_literal; |
|
|
|
|
44
|
|
|
} |
45
|
|
|
} elseif ($is_literal) { |
46
|
|
|
$literal .= $c; |
47
|
|
|
} elseif (ctype_alpha($c)) { |
48
|
|
|
if ($literal) { |
49
|
|
|
$tokens[] = $literal; |
50
|
|
|
$literal = ''; |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
for ($j = $i + 1; $j < $z; ++$j) { |
54
|
|
|
$nc = mb_substr($pattern, $j, 1); |
55
|
|
|
if ($nc !== $c) { |
56
|
|
|
break; |
57
|
|
|
} |
58
|
|
|
} |
59
|
|
|
$tokens[] = [ $c, $j - $i ]; |
60
|
|
|
$i = $j - 1; // because +1 from the for loop |
61
|
|
|
} else { |
62
|
|
|
$literal .= $c; |
63
|
|
|
} |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
// If the pattern ends with literal (could also be a malformed quote) |
67
|
|
|
if ($literal) { |
68
|
|
|
$tokens[] = $literal; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
return $cache[$pattern] = $tokens; |
72
|
|
|
} |
73
|
|
|
} |
74
|
|
|
|