Passed
Push — master ( 7783b2...f247da )
by Kacper
02:40
created

Language::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 1

Importance

Changes 7
Bugs 1 Features 0
Metric Value
c 7
b 1
f 0
dl 0
loc 10
ccs 7
cts 7
cp 1
rs 9.4285
cc 1
eloc 5
nc 1
nop 1
crap 1
1
<?php
2
/**
3
 * Highlighter
4
 *
5
 * Copyright (C) 2015, Some right reserved.
6
 *
7
 * @author Kacper "Kadet" Donat <[email protected]>
8
 *
9
 * Contact with author:
10
 * Xmpp: [email protected]
11
 * E-mail: [email protected]
12
 *
13
 * From Kadet with love.
14
 */
15
16
namespace Kadet\Highlighter\Language;
17
18
use Kadet\Highlighter\Matcher\WholeMatcher;
19
use Kadet\Highlighter\Parser\LanguageToken;
20
use Kadet\Highlighter\Parser\Rule;
21
use Kadet\Highlighter\Parser\Token;
22
use Kadet\Highlighter\Parser\TokenFactory;
23
use Kadet\Highlighter\Parser\TokenIterator;
24
use Kadet\Highlighter\Parser\TokenList;
25
use Kadet\Highlighter\Utils\ArrayHelper;
26
27
/**
28
 * Class Language
29
 *
30
 * @package Kadet\Highlighter\Language
31
 */
32
abstract class Language
33
{
34
    /**
35
     * @var array
36
     */
37
    protected $_options = [];
38
    /**
39
     * Tokenizer rules
40
     *
41
     * @var Rule[]
42
     */
43
    private $_rules;
44
45
    /**
46
     * Language constructor.
47
     *
48
     * @param array $options
49
     */
50 22
    public function __construct(array $options = [])
51
    {
52 22
        $this->_options = array_merge(
53
            [
54 22
                'embedded' => [],
55 22
            ], $this->_options, $options
56 22
        );
57
58 22
        $this->_rules = $this->getRules();
59 22
    }
60
61
    /**
62
     * Tokenization rules definition
63
     *
64
     * @return array
65
     */
66
    abstract public function getRules();
67
68
    /**
69
     * Parses source and removes wrong tokens.
70
     *
71
     * @param TokenIterator|string $tokens
72
     *
73
     * @param array                $additional
74
     * @param bool                 $embedded
75
     *
76
     * @return TokenIterator
77
     */
78 12
    public function parse($tokens = null, $additional = [], $embedded = false)
79
    {
80 12
        if (is_string($tokens)) {
81 11
            $tokens = $this->tokenize($tokens, $additional, $embedded);
82 12
        } elseif (!$tokens instanceof TokenIterator) {
83
            // Todo: Own Exceptions
84 1
            throw new \InvalidArgumentException('$tokens must be string or TokenIterator');
85
        }
86
87 11
        $start = $tokens->current();
88 11
        $context = [];
89
90
        /** @var Token[] $result */
91 11
        $result = [$start];
92
93
        /** @var Token $token */
94 11
        for ($tokens->next(); $tokens->valid(); $tokens->next()) {
95 11
            $token = $tokens->current();
96
97 11
            if (!$token->isValid($this, $context)) {
98 3
                continue;
99
            }
100
101 11
            if ($token->isStart()) {
102 10
                if ($token instanceof LanguageToken) {
103
                    /** @var LanguageToken $token */
104 2
                    $result = array_merge(
105 2
                        $result,
106 2
                        $token->getInjected()->parse($tokens)->getTokens()
107 2
                    );
108 2
                } else {
109 10
                    $result[] = $token;
110 10
                    $context[$tokens->key()] = $token->name;
111
                }
112 10
            } else {
113 11
                $start = $token->getStart();
114
115
                /** @noinspection PhpUndefinedMethodInspection bug */
116 11
                if ($token instanceof LanguageToken && $token->getLanguage() === $this) {
117 11
                    $result[0]->setEnd($token);
118
119 11
                    if ($result[0]->postProcess) {
120 1
                        $source = substr($tokens->getSource(), $result[0]->pos, $result[0]->getLength());
121
122 12
                        $tokens = $this->tokenize($source, $result, $result[0]->pos, true);
123 1
                        $result = $this->parse($tokens)->getTokens();
124 1
                    }
125
126
                    # closing unclosed tokens
127 11
                    foreach (array_reverse($context) as $hash => $name) {
128 1
                        $end = new Token([$name, 'pos' => $token->pos]);
129 1
                        $tokens[$hash]->setEnd($end);
130 1
                        $result[] = $end;
131 11
                    }
132
133 11
                    $result[] = $token;
134 11
                    break;
135
                } else {
136 9
                    if ($start) {
137 8
                        unset($context[spl_object_hash($start)]);
138 8
                    } else {
139
                        /** @noinspection PhpUnusedParameterInspection */
140 1
                        $start = ArrayHelper::find(
141 1
                            array_reverse($context), function ($k, $v) use ($token) {
142 1
                            return $v === $token->name;
143 1
                        });
144
145 1
                        if ($start !== false) {
146 1
                            $token->setStart($tokens[$start]);
147 1
                            unset($context[$start]);
148 1
                        }
149
                    }
150
151 9
                    $result[] = $token;
152
                }
153
            }
154 10
        }
155
156 11
        return new TokenIterator($result, $tokens->getSource());
157
    }
158
159 12
    public function tokenize($source, $additional = [], $offset = 0, $embedded = false)
160
    {
161 12
        $iterator = new TokenIterator(
162 12
            $this->_tokens($source, $offset, $additional, $embedded)->sort()->toArray(), $source
163 12
        );
164
165 12
        return $iterator;
166
    }
167
168
    /**
169
     * Tokenize source
170
     *
171
     * @param       $source
172
     *
173
     * @param int   $offset
174
     * @param array $additional
175
     *
176
     * @param bool  $embedded
177
     *
178
     * @return TokenList
179
     */
180 12
    private function _tokens($source, $offset = 0, $additional = [], $embedded = false)
181
    {
182 12
        $result = new TokenList();
183
184
        /** @var Language $language */
185 12
        foreach ($this->_rules($embedded) as $rule) {
186 12
            $rule->factory->setOffset($offset);
187 12
            foreach ($rule->match($source) as $token) {
188 12
                $result->add($token);
189 12
            }
190 12
        }
191
192 12
        return $result->batch($additional);
193
    }
194
195
    /**
196
     * @param bool $embedded
197
     *
198
     * @return Rule[]
199
     */
200 12
    private function _rules($embedded = false)
201
    {
202 12
        $all = $this->_rules;
203 12
        if (!$embedded) {
204 12
            $all['language.' . $this->getIdentifier()] = $this->getOpenClose();
205 12
        }
206
207
        // why this code sucks so much? Because RecursiveIterator performance such a lot more.
208 12
        foreach ($all as $name => $rules) {
209 12
            if (!is_array($rules)) {
210 12
                $rules = [$rules];
211 12
            }
212
213
            /** @var Rule $rule */
214 12
            foreach ($rules as $rule) {
215 12
                if ($rule->language === false) {
216 11
                    $rule->language = $this;
217 11
                }
218
219 12
                $rule->factory->setBase($name);
220
221 12
                yield $rule;
222 12
            }
223 12
        }
224
225 12
        foreach ($this->getEmbedded() as $language) {
226 1
            foreach ($language->_rules() as $rule) {
227 1
                yield $rule;
228 1
            }
229 12
        }
230 12
    }
231
232
    /**
233
     * Unique language identifier, for example 'php'
234
     *
235
     * @return string
236
     */
237
    abstract public function getIdentifier();
238
239
    /**
240
     * Language range Rule(s)
241
     *
242
     * @return Rule|Rule[]
243
     */
244 16
    public function getOpenClose()
245
    {
246 16
        return new Rule(
247 16
            new WholeMatcher(), [
248 16
                'priority' => 1000,
249 16
                'factory'  => new TokenFactory(LanguageToken::class),
250 16
                'inject'   => $this,
251 16
                'language' => null,
252 16
                'context'  => Rule::everywhere(),
253
            ]
254 16
        );
255
    }
256
257
    /**
258
     * @return Language[]
259
     */
260 13
    public function getEmbedded()
261
    {
262 13
        return $this->_options['embedded'];
263
    }
264
265
    /**
266
     * @param Language $lang
267
     */
268 1
    public function embed(Language $lang)
269
    {
270 1
        $this->_options['embedded'][] = $lang;
271 1
    }
272
273 1
    public function __get($name)
274
    {
275 1
        return isset($this->_options[$name]) ? $this->_options[$name] : null;
276
    }
277
278 1
    public function __set($name, $value)
279
    {
280 1
        $this->_options[$name] = $value;
281 1
    }
282
}
283