Completed
Push — master ( 5ee129...947530 )
by Kacper
05:05
created

Language   A

Complexity

Total Complexity 26

Size/Duplication

Total Lines 206
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 7

Importance

Changes 24
Bugs 0 Features 1
Metric Value
wmc 26
c 24
b 0
f 1
lcom 1
cbo 7
dl 0
loc 206
rs 10

9 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 1
getRules() 0 1 ?
C parse() 0 77 13
C _tokens() 0 34 8
A tokenize() 0 7 1
getIdentifier() 0 1 ?
A getOpenClose() 0 11 1
A getEmbedded() 0 3 1
A embed() 0 3 1
1
<?php
2
/**
3
 * Highlighter
4
 *
5
 * Copyright (C) 2015, Some right reserved.
6
 *
7
 * @author Kacper "Kadet" Donat <[email protected]>
8
 *
9
 * Contact with author:
10
 * Xmpp: [email protected]
11
 * E-mail: [email protected]
12
 *
13
 * From Kadet with love.
14
 */
15
16
namespace Kadet\Highlighter\Language;
17
18
use Kadet\Highlighter\Matcher\WholeMatcher;
19
use Kadet\Highlighter\Parser\LanguageToken;
20
use Kadet\Highlighter\Parser\Rule;
21
use Kadet\Highlighter\Parser\Token;
22
use Kadet\Highlighter\Parser\TokenFactory;
23
use Kadet\Highlighter\Parser\TokenIterator;
24
use Kadet\Highlighter\Utils\ArrayHelper;
25
26
/**
27
 * Class Language
28
 *
29
 * @package Kadet\Highlighter\Language
30
 */
31
abstract class Language
32
{
33
    /**
34
     * Tokenizer rules
35
     *
36
     * @var Rule[]
37
     */
38
    private $_rules;
39
40
    /**
41
     * @var array
42
     */
43
    private $_options = [];
44
45
    /**
46
     * Language constructor.
47
     *
48
     * @param array $options
49
     */
50
    public function __construct(array $options = []) {
51
        $this->_options  = array_merge([
52
            'embedded' => [],
53
        ], $options);
54
        $this->_rules    = $this->getRules();
55
    }
56
57
    /**
58
     * Tokenization rules definition
59
     *
60
     * @return array
61
     */
62
    public abstract function getRules();
0 ignored issues
show
Coding Style introduced by
The abstract declaration must precede the visibility declaration
Loading history...
63
64
    /**
65
     * Parses source and removes wrong tokens.
66
     *
67
     * @param TokenIterator|string $tokens
68
     *
69
     * @return TokenIterator
70
     */
71
    public function parse($tokens = null, $additional = [])
72
    {
73
        if (is_string($tokens)) {
74
            $tokens = $this->tokenize($tokens, $additional);
75
        } elseif(!$tokens instanceof TokenIterator) {
76
            throw new \InvalidArgumentException('$tokens must be string or TokenIterator');
77
        }
78
79
        $start = $tokens->current();
80
81
        /** @var Token[] $result */
82
        $result = [$start];
83
84
        $context = [];
85
        $all = [];
86
87
        /** @var Token $token */
88
        for($tokens->next(); $tokens->valid(); $tokens->next()) {
89
            $token = $tokens->current();
90
91
            if (!$token->isValid($this, $context)) {
92
                continue;
93
            }
94
95
            if ($token->isStart()) {
96
                if ($token instanceof LanguageToken) {
97
                    /** @noinspection PhpUndefinedMethodInspection bug */
98
                    $result = array_merge(
99
                        $result,
100
                        $token->getLanguage()->parse($tokens)->getTokens()
101
                    );
102
                } else {
103
                    $all[spl_object_hash($token)] = $result[] = $token;
104
                    $context[spl_object_hash($token)] = $token->name;
105
                }
106
            } else {
107
                $start = $token->getStart();
108
109
                if ($token instanceof LanguageToken && $token->getRule()->getLanguage() === $this) {
110
                    $result[0]->setEnd($token);
111
112
                    if($result[0]->getRule()->postProcess) {
113
                        $source = substr($tokens->getSource(), $result[0]->pos, $result[0]->getLength());
114
115
                        $tokens = $this->tokenize($source, $result, $result[0]->pos);
116
                        $result = $this->parse($tokens)->getTokens();
117
                    }
118
119
                    # closing unclosed tokens
120
                    foreach(array_reverse($context) as $hash => $name) {
121
                        $result[$hash]->setEnd(new Token([$name, 'pos' => $token->pos]));
122
                    }
123
124
                    $result[] = $token;
125
                    break;
126
                } else {
127
                    if ($start !== null) {
128
                        unset($context[spl_object_hash($start)]);
129
                    } else {
130
                        /** @noinspection PhpUnusedParameterInspection */
131
                        $start = ArrayHelper::find(array_reverse($context), function ($k, $v) use ($token) {
132
                            return $v === $token->name;
133
                        });
134
135
                        if ($start !== false) {
136
                            $token->setStart($all[$start]);
137
                            unset($context[$start]);
138
                        }
139
                    }
140
141
                    $result[] = $token;
142
                }
143
            }
144
        }
145
146
        return new TokenIterator($result, $tokens->getSource());
147
    }
148
149
    /**
150
     * Tokenize source
151
     *
152
     * @param $source
153
     *
154
     * @return array
155
     */
156
    private function _tokens($source, $additional = [], $offset = 0)
157
    {
158
        $this->_rules['language.' . $this->getIdentifier()] = $this->getOpenClose();
159
160
        $result = [];
161
        foreach ($this->_rules as $name => $rules) {
162
            if (!is_array($rules)) {
163
                $rules = [$rules];
164
            }
165
166
            /** @var Rule $rule */
167
            foreach ($rules as $rule) {
168
                if($rule->getLanguage() === false) {
169
                    $rule->setLanguage($this);
170
                }
171
172
                $rule->factory->setBase($name);
173
                $result = array_merge($result, $rule->match($source));
174
            }
175
        }
176
177
        foreach($this->getEmbedded() as $language) {
178
            $result = array_merge($result, $language->_tokens($source));
179
        }
180
181
        // Array map would be cool, but is a lot slower
182
        if($offset) {
183
            foreach ($result as $item) {
184
                $item->pos += $offset;
185
            }
186
        }
187
188
        return array_merge($result, $additional);
189
    }
190
191
    public function tokenize($source, $additional = [], $offset = 0)
192
    {
193
        $iterator = new TokenIterator($this->_tokens($source, $additional, $offset), $source);
194
        $iterator->sort();
195
        $iterator->rewind();
196
        return $iterator;
197
    }
198
199
    /**
200
     * Unique language identifier, for example 'php'
201
     *
202
     * @return string
203
     */
204
    public abstract function getIdentifier();
0 ignored issues
show
Coding Style introduced by
The abstract declaration must precede the visibility declaration
Loading history...
205
206
    /**
207
     * Language range Rule(s)
208
     *
209
     * @return Rule|Rule[]
210
     */
211
    public function getOpenClose()
212
    {
213
        return new Rule(
214
            new WholeMatcher(), [
215
                'priority' => 1000,
216
                'factory'  => new TokenFactory('Kadet\\Highlighter\\Parser\\LanguageToken'),
217
                'inject'   => $this,
218
                'language' => null
219
            ]
220
        );
221
    }
222
223
    /**
224
     * @return Language[]
225
     */
226
    public function getEmbedded() {
227
        return $this->_options['embedded'];
228
    }
229
230
    /**
231
     * @param Language $lang
232
     */
233
    public function embed(Language $lang) {
234
        $this->_options['embedded'][] = $lang;
235
    }
236
}
237