Passed
Branch fuck-54 (18c095)
by Kacper
03:37
created

Language::_tokens()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3

Importance

Changes 9
Bugs 1 Features 0
Metric Value
c 9
b 1
f 0
dl 0
loc 14
ccs 9
cts 9
cp 1
rs 9.4285
cc 3
eloc 7
nc 3
nop 4
crap 3
1
<?php
2
/**
3
 * Highlighter
4
 *
5
 * Copyright (C) 2015, Some right reserved.
6
 *
7
 * @author Kacper "Kadet" Donat <[email protected]>
8
 *
9
 * Contact with author:
10
 * Xmpp: [email protected]
11
 * E-mail: [email protected]
12
 *
13
 * From Kadet with love.
14
 */
15
16
namespace Kadet\Highlighter\Language;
17
18
use Kadet\Highlighter\Matcher\WholeMatcher;
19
use Kadet\Highlighter\Parser\LanguageToken;
20
use Kadet\Highlighter\Parser\Rule;
21
use Kadet\Highlighter\Parser\Token;
22
use Kadet\Highlighter\Parser\TokenFactory;
23
use Kadet\Highlighter\Parser\TokenIterator;
24
use Kadet\Highlighter\Parser\TokenList;
25
use Kadet\Highlighter\Utils\ArrayHelper;
26
27
/**
28
 * Class Language
29
 *
30
 * @package Kadet\Highlighter\Language
31
 */
32
abstract class Language
33
{
34
    /**
35
     * Tokenizer rules
36
     *
37
     * @var Rule[]
38
     */
39
    private $_rules;
40
41
    /**
42
     * @var array
43
     */
44
    protected $_options = [];
45
46
    /**
47
     * Language constructor.
48
     *
49
     * @param array $options
50
     */
51 20
    public function __construct(array $options = []) {
52 20
        $this->_options  = array_merge([
53 20
            'embedded' => [],
54 20
        ], $this->_options, $options);
55
56 20
        $this->_rules    = $this->getRules();
57 20
    }
58
59
    /**
60
     * Tokenization rules definition
61
     *
62
     * @return array
63
     */
64
    public abstract function getRules();
0 ignored issues
show
Coding Style introduced by
The abstract declaration must precede the visibility declaration
Loading history...
65
66
    /**
67
     * Parses source and removes wrong tokens.
68
     *
69
     * @param TokenIterator|string $tokens
70
     *
71
     * @param array                $additional
72
     * @param bool                 $embedded
73
     *
74
     * @return TokenIterator
75
     */
76 12
    public function parse($tokens = null, $additional = [], $embedded = false)
77
    {
78 11
        if (is_string($tokens)) {
79 11
            $tokens = $this->tokenize($tokens, $additional, $embedded);
1 ignored issue
show
Documentation introduced by
$embedded is of type boolean, but the function expects a integer.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
80 11
        } elseif(!$tokens instanceof TokenIterator) {
81
            // Todo: Own Exceptions
82
            throw new \InvalidArgumentException('$tokens must be string or TokenIterator');
83
        }
84
85 11
        $start = $tokens->current();
86
87
88 11
        $context = [];
89
90 11
        /** @var Token[] $result */ $result = [$start];
91 11
        /** @var Token[] $all */    $all    = [];
92
93
        /** @var Token $token */
94 11
        for($tokens->next(); $tokens->valid(); $tokens->next()) {
95 11
            $token = $tokens->current();
96
97 11
            if (!$token->isValid($this, $context)) {
98 3
                continue;
99
            }
100
101 11
            if ($token->isStart()) {
102 10
                if ($token instanceof LanguageToken) {
103
                    /** @var LanguageToken $token */
104 2
                    $result = array_merge(
105 2
                        $result,
106 2
                        $token->getInjected()->parse($tokens)->getTokens()
107 2
                    );
108 2
                } else {
109 10
                    $all[spl_object_hash($token)] = $result[] = $token;
110 10
                    $context[spl_object_hash($token)] = $token->name;
111
                }
112 10
            } else {
113 11
                $start = $token->getStart();
114
115
                /** @noinspection PhpUndefinedMethodInspection bug */
116 11
                if ($token instanceof LanguageToken && $token->getLanguage() === $this) {
117 11
                    $result[0]->setEnd($token);
118
119 12
                    if($result[0]->getRule()->postProcess) {
120 1
                        $source = substr($tokens->getSource(), $result[0]->pos, $result[0]->getLength());
121
122 1
                        $tokens = $this->tokenize($source, $result, $result[0]->pos, true);
123 1
                        $result = $this->parse($tokens)->getTokens();
124 1
                    }
125
126
                    # closing unclosed tokens
127 11
                    foreach(array_reverse($context) as $hash => $name) {
128 1
                        $end = new Token([$name, 'pos' => $token->pos]);
129 1
                        $all[$hash]->setEnd($end);
130 1
                        $result[] = $end;
131 11
                    }
132
133 11
                    $result[] = $token;
134 11
                    break;
135
                } else {
136 9
                    if ($start) {
137 8
                        unset($context[spl_object_hash($start)]);
138 8
                    } else {
139
                        /** @noinspection PhpUnusedParameterInspection */
140 1
                        $start = ArrayHelper::find(array_reverse($context), function ($k, $v) use ($token) {
141 1
                            return $v === $token->name;
142 1
                        });
143
144 1
                        if ($start !== false) {
145 1
                            $token->setStart($all[$start]);
146 1
                            unset($context[$start]);
147 1
                        }
148
                    }
149
150 9
                    $result[] = $token;
151
                }
152
            }
153 10
        }
154
155 11
        return new TokenIterator($result, $tokens->getSource());
156
    }
157
158
    /**
159
     * Tokenize source
160
     *
161
     * @param       $source
162
     *
163
     * @param int   $offset
164
     * @param array $additional
165
     *
166
     * @param bool  $embedded
167
     *
168
     * @return TokenList
169
     */
170 12
    private function _tokens($source, $offset = 0, $additional = [], $embedded = false)
171
    {
172 12
        $result = new TokenList();
173
174
        /** @var Language $language */
175 12
        foreach($this->_rules($embedded) as $rule) {
176 12
            $rule->factory->setOffset($offset);
177 12
            foreach ($rule->match($source) as $token) {
178 12
                $result->add($token);
179 12
            }
180 12
        }
181
182 12
        return $result->batch($additional);
183
    }
184
185 12
    public function tokenize($source, $additional = [], $offset = 0, $embedded = false)
186
    {
187 12
        $iterator = new TokenIterator($this->_tokens($source, $offset, $additional, $embedded)->sort()->toArray(), $source);
188 12
        return $iterator;
189
    }
190
191
    /**
192
     * @param bool $embedded
193
     *
194
     * @return Rule[]
195
     */
196 12
    private function _rules($embedded = false) {
197 12
        $all = $this->_rules;
198 12
        if(!$embedded) {
199 12
            $all['language.' . $this->getIdentifier()] = $this->getOpenClose();
200 12
        }
201
202
        // why this code sucks so much? Because RecursiveIterator performance such a lot more.
203 12
        foreach ($all as $name => $rules) {
204 12
            if (!is_array($rules)) {
205 12
                $rules = [$rules];
206 12
            }
207
208
            /** @var Rule $rule */
209 12
            foreach ($rules as $rule) {
210 12
                if($rule->language === false) {
211 11
                    $rule->language = $this;
212 11
                }
213
214 12
                $rule->factory->setBase($name);
215
216 12
                yield $rule;
217 12
            }
218 12
        }
219
220 12
        foreach($this->getEmbedded() as $language) {
221 1
            foreach($language->_rules() as $rule) {
222 1
                yield $rule;
223 1
            }
224 12
        }
225 12
    }
226
227
    /**
228
     * Unique language identifier, for example 'php'
229
     *
230
     * @return string
231
     */
232
    public abstract function getIdentifier();
0 ignored issues
show
Coding Style introduced by
The abstract declaration must precede the visibility declaration
Loading history...
233
234
    /**
235
     * Language range Rule(s)
236
     *
237
     * @return Rule|Rule[]
238
     */
239 15
    public function getOpenClose()
240
    {
241 15
        return new Rule(
242 15
            new WholeMatcher(), [
243 15
                'priority' => 1000,
244 15
                'factory'  => new TokenFactory(LanguageToken::class),
245 15
                'inject'   => $this,
246 15
                'language' => null,
247 15
                'context'  => ['!!'],
248
            ]
249 15
        );
250
    }
251
252
    /**
253
     * @return Language[]
254
     */
255 13
    public function getEmbedded() {
256 13
        return $this->_options['embedded'];
257
    }
258
259
    /**
260
     * @param Language $lang
261
     */
262 1
    public function embed(Language $lang) {
263 1
        $this->_options['embedded'][] = $lang;
264 1
    }
265
266 1
    public function __get($name) {
267 1
        return isset($this->_options[$name]) ? $this->_options[$name] : null;
268
    }
269
270 1
    public function __set($name, $value) {
271 1
        $this->_options[$name] = $value;
272 1
    }
273
}
274