Passed
Push — latest ( 8e76fa...96be75 )
by Mark
02:56
created

Parser   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 140
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 20
eloc 61
c 1
b 0
f 0
dl 0
loc 140
ccs 53
cts 53
cp 1
rs 10

7 Methods

Rating   Name   Duplication   Size   Complexity  
A parseTextToken() 0 17 5
A parseToken() 0 19 4
A loadLocalePreset() 0 15 3
A getConfiguration() 0 3 1
A __construct() 0 12 1
A parse() 0 6 1
A parseTokens() 0 19 5
1
<?php
2
3
declare(strict_types=1);
4
5
namespace UnicornFail\Emoji;
6
7
use UnicornFail\Emoji\Emojibase\ShortcodeInterface;
8
use UnicornFail\Emoji\Exception\LocalePresetException;
9
use UnicornFail\Emoji\Token\EmojiTokenInterface;
10
use UnicornFail\Emoji\Token\Text;
11
use UnicornFail\Emoji\Token\TokenInterface;
12
13
class Parser implements ParserInterface
14
{
15
    public const T_EMOJI_TOKENS = [
16
        Lexer::T_EMOTICON => '\UnicornFail\Emoji\Token\Emoticon',
17
        Lexer::T_HTML_ENTITY => 'UnicornFail\Emoji\Token\HtmlEntity',
18
        Lexer::T_SHORTCODE => 'UnicornFail\Emoji\Token\Shortcode',
19
        Lexer::T_UNICODE => 'UnicornFail\Emoji\Token\Unicode',
20
    ];
21
22
    public const T_DATASETS = [
23
        Lexer::T_EMOTICON => 'emoticon',
24
        Lexer::T_HTML_ENTITY => 'htmlEntity',
25
        Lexer::T_SHORTCODE => 'shortcodes',
26
        Lexer::T_UNICODE => 'unicode',
27
    ];
28
29
    /** @var ConfigurationInterface */
30
    private $configuration;
31
32
    /** @var Dataset */
33
    private $dataset;
34
35
    /** @var Lexer */
36
    private $lexer;
37
38
    /**
39
     * @param mixed[]|\Traversable $configuration
40
     */
41 606
    public function __construct(?iterable $configuration = null, ?Dataset $dataset = null, ?Lexer $lexer = null)
42
    {
43 606
        $this->configuration = Configuration::create($configuration);
44
45 606
        $locale = $this->configuration->get('locale');
46
        \assert(\is_string($locale));
47
48
        /** @var string[] $preset */
49 606
        $preset = $this->configuration->get('preset');
50
51 606
        $this->dataset = $dataset ?? self::loadLocalePreset($locale, $preset);
52 192
        $this->lexer   = $lexer ?? new Lexer($this->configuration);
53 192
    }
54
55
    /**
56
     * @param string[] $presets
57
     */
58 606
    protected static function loadLocalePreset(string $locale = 'en', array $presets = ShortcodeInterface::DEFAULT_PRESETS): Dataset
59
    {
60 606
        $throwables = [];
61 606
        $presets    = \array_filter($presets);
62 606
        $remaining  = $presets;
63 606
        while (\count($remaining) > 0) {
64 606
            $preset = \array_shift($remaining);
65
            try {
66 606
                return Dataset::unarchive(\sprintf('%s/%s/%s.gz', Dataset::DIRECTORY, $locale, $preset));
67 414
            } catch (\Throwable $throwable) {
68 414
                $throwables[$preset] = $throwable;
69
            }
70
        }
71
72 414
        throw new LocalePresetException($locale, $throwables);
73
    }
74
75 81
    public function getConfiguration(): ConfigurationInterface
76
    {
77 81
        return $this->configuration;
78
    }
79
80
    /**
81
     * @return TokenInterface[]
82
     */
83 81
    public function parse(string $input): array
84
    {
85 81
        $this->lexer->setInput($input);
86 81
        $this->lexer->moveNext();
87
88 81
        return $this->parseTokens();
89
    }
90
91
    /**
92
     * @return TokenInterface[]
93
     */
94 81
    protected function parseTokens(): array
95
    {
96 81
        $tokens = [];
97 81
        while (true) {
98 81
            if (! $this->lexer->lookahead) {
99 81
                break;
100
            }
101
102 81
            $this->lexer->moveNext();
103
104 81
            $type  = (int) ($this->lexer->token['type'] ?? Lexer::T_TEXT);
105 81
            $value = (string) ($this->lexer->token['value'] ?? '');
106
107 81
            if ($token = $type === Lexer::T_TEXT ? $this->parseTextToken($value) : $this->parseToken($type, $value)) {
108 81
                $tokens[] = $token;
109
            }
110
        }
111
112 81
        return $tokens;
113
    }
114
115 81
    protected function parseToken(int $type, string $value): ?EmojiTokenInterface
116
    {
117 81
        $token = null;
118
119
        // Immediately return if not a valid type.
120 81
        if (isset(self::T_DATASETS[$type]) || isset(self::T_EMOJI_TOKENS[$type])) {
121 81
            $dataset = $this->dataset->indexBy(self::T_DATASETS[$type]);
122
123 81
            $tokenClass = self::T_EMOJI_TOKENS[$type];
124
125 81
            if ($emoji = $dataset->offsetGet($value)) {
126
                // Clone the configuration here. This is necessary so it can be passed to tokens,
127
                // which may be rendered at a later time; when the configuration may have changed.
128
                /** @var EmojiTokenInterface $token */
129 81
                $token = new $tokenClass($value, clone $this->configuration, $emoji);
130
            }
131
        }
132
133 81
        return $token;
134
    }
135
136 66
    protected function parseTextToken(string $value): ?Text
137
    {
138 66
        $text = '';
139 66
        while (true) {
140 66
            $text .= $value;
141 66
            if ($this->lexer->lookahead === null || $this->lexer->lookahead['type'] !== Lexer::T_TEXT) {
142 66
                break;
143
            }
144
145 66
            $value = (string) ($this->lexer->lookahead['value'] ?? '');
146
147 66
            $this->lexer->moveNext();
148
        }
149
150 66
        return $text
151 66
            ? new Text($text)
152 66
            : null;
153
    }
154
}
155