1 | <?php |
||
2 | |||
3 | namespace ICanBoogie\CLDR\Dates; |
||
4 | |||
5 | /** |
||
6 | * @link https://www.unicode.org/reports/tr35/tr35-72/tr35-dates.html#Date_Format_Patterns |
||
7 | */ |
||
8 | final class DateFormatPatternParser |
||
9 | { |
||
10 | private const QUOTE = "'"; |
||
11 | |||
12 | /** |
||
13 | * Parses a date format pattern. |
||
14 | * |
||
15 | * @param string $pattern |
||
16 | * A date format pattern; for example, "hh 'o''clock' a, zzzz". |
||
17 | * |
||
18 | * @return array<string|array{ string, int }> |
||
0 ignored issues
–
show
Documentation
Bug
introduced
by
![]() |
|||
19 | * Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length. |
||
20 | */ |
||
21 | public static function parse(string $pattern): array |
||
22 | { |
||
23 | static $cache = []; |
||
24 | |||
25 | return $cache[$pattern] ??= self::do_parse($pattern); |
||
26 | } |
||
27 | |||
28 | /** |
||
29 | * Parses a date format pattern. |
||
30 | * |
||
31 | * @param string $pattern |
||
32 | * A date format pattern; for example, "hh 'o''clock' a, zzzz". |
||
33 | * |
||
34 | * @return array<string|array{ string, int }> |
||
0 ignored issues
–
show
|
|||
35 | * Where _value_ is either a literal or an array where `0` is a pattern character and `1` its length. |
||
36 | */ |
||
37 | private static function do_parse(string $pattern): array |
||
38 | { |
||
39 | $tokens = []; |
||
40 | $is_literal = false; |
||
41 | $literal = ''; |
||
42 | $z = mb_strlen($pattern); |
||
43 | |||
44 | for ($i = 0; $i < $z; ++$i) { |
||
45 | $c = mb_substr($pattern, $i, 1); |
||
46 | |||
47 | if ($c === self::QUOTE) { |
||
48 | // Two adjacent single vertical quotes (''), which represent a literal single quote, |
||
49 | // either inside or outside a quoted text. |
||
50 | if (mb_substr($pattern, $i + 1, 1) === self::QUOTE) { |
||
51 | $i++; |
||
52 | $literal .= self::QUOTE; |
||
53 | } else { |
||
54 | // Toggle literal |
||
55 | $is_literal = !$is_literal; |
||
0 ignored issues
–
show
|
|||
56 | } |
||
57 | } elseif ($is_literal) { |
||
58 | $literal .= $c; |
||
59 | } elseif (ctype_alpha($c)) { |
||
60 | if ($literal) { |
||
61 | $tokens[] = $literal; |
||
62 | $literal = ''; |
||
63 | } |
||
64 | |||
65 | for ($j = $i + 1; $j < $z; ++$j) { |
||
66 | $nc = mb_substr($pattern, $j, 1); |
||
67 | if ($nc !== $c) { |
||
68 | break; |
||
69 | } |
||
70 | } |
||
71 | $tokens[] = [ $c, $j - $i ]; |
||
72 | $i = $j - 1; // because +1 from the for loop |
||
73 | } else { |
||
74 | $literal .= $c; |
||
75 | } |
||
76 | } |
||
77 | |||
78 | // If the pattern ends with literal (could also be a malformed quote) |
||
79 | if ($literal) { |
||
80 | $tokens[] = $literal; |
||
81 | } |
||
82 | |||
83 | return $tokens; |
||
84 | } |
||
85 | } |
||
86 |