Completed
Push — master ( 51cde5...f3ba84 )
by Kacper
02:21
created

Language/Language.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Highlighter
4
 *
5
 * Copyright (C) 2015, Some right reserved.
6
 *
7
 * @author Kacper "Kadet" Donat <[email protected]>
8
 *
9
 * Contact with author:
10
 * Xmpp: [email protected]
11
 * E-mail: [email protected]
12
 *
13
 * From Kadet with love.
14
 */
15
16
namespace Kadet\Highlighter\Language;
17
18
use Kadet\Highlighter\Matcher\WholeMatcher;
19
use Kadet\Highlighter\Parser\LanguageToken;
20
use Kadet\Highlighter\Parser\Rule;
21
use Kadet\Highlighter\Parser\Token;
22
use Kadet\Highlighter\Parser\TokenFactory;
23
use Kadet\Highlighter\Parser\TokenIterator;
24
use Kadet\Highlighter\Utils\ArrayHelper;
25
26
/**
27
 * Class Language
28
 *
29
 * @package Kadet\Highlighter\Language
30
 */
31
abstract class Language
32
{
33
    /**
34
     * Tokenizer rules
35
     *
36
     * @var Rule[]
37
     */
38
    private $_rules;
39
40
    /**
41
     * @var Language[]
42
     */
43
    private $_embedded = [];
44
45
    /**
46
     * Language constructor.
47
     *
48
     * @param Language[] $embedded
49
     */
50
    public function __construct(array $embedded = []) {
51
        $this->_embedded = $embedded;
52
        $this->_rules    = $this->getRules();
53
    }
54
55
    /**
56
     * Tokenization rules definition
57
     *
58
     * @return array
59
     */
60
    public abstract function getRules();
61
62
    /**
63
     * Parses source and removes wrong tokens.
64
     *
65
     * @param TokenIterator|string $tokens
66
     *
67
     * @return TokenIterator
68
     */
69
    public function parse($tokens = null)
70
    {
71
        if (is_string($tokens)) {
72
            $tokens = $this->tokenize($tokens);
73
        } elseif(!$tokens instanceof TokenIterator) {
74
            throw new \InvalidArgumentException('$tokens must be string or TokenIterator');
75
        }
76
77
        $start = $tokens->current();
78
79
        /** @var Token[] $result */
80
        $result = [$start];
81
82
        $context = [];
83
        $all = [];
84
85
        /** @var Token $token */
86
        for($tokens->next(); $tokens->valid(); $tokens->next()) {
87
            $token = $tokens->current();
88
89
            if (!$token->isValid($this, $context)) {
90
                continue;
91
            }
92
93
            if ($token->isStart()) {
94
                if ($token instanceof LanguageToken) {
95
                    /** @noinspection PhpUndefinedMethodInspection bug */
96
                    $result = array_merge($result, $token->getLanguage()->parse($tokens));
97
                } else {
98
                    $all[spl_object_hash($token)] = $result[] = $token;
99
                    $context[spl_object_hash($token)] = $token->name;
100
                }
101
            } else {
102
                $start = $token->getStart();
103
104
                if ($token instanceof LanguageToken && $token->getRule()->getLanguage() === $this) {
105
                    // todo: close unclosed tokens
106
                    $result[0]->setEnd($token);
107
108
                    if($result[0]->getRule()->postProcess) {
109
                        $embed = $this
110
                            ->parse(substr($tokens->getSource(), $result[0]->pos, $result[0]->getLength()))
111
                            ->getTokens();
112
113
                        foreach($embed as $etoken) {
114
                            $etoken->pos += $result[0]->pos;
115
                        }
116
117
                        $result = array_merge($result, $embed);
118
                    }
119
120
                    $result[] = $token;
121
                    return $result;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $result; (array) is incompatible with the return type documented by Kadet\Highlighter\Language\Language::parse of type Kadet\Highlighter\Parser\TokenIterator.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
122
                } else {
123
                    if ($start !== null) {
124
                        unset($context[spl_object_hash($start)]);
125
                    } else {
126
                        /** @noinspection PhpUnusedParameterInspection */
127
                        $start = ArrayHelper::find(array_reverse($context), function ($k, $v) use ($token) {
128
                            return $v === $token->name;
129
                        });
130
131
                        if ($start !== false) {
132
                            $token->setStart($all[$start]);
133
                            unset($context[$start]);
134
                        }
135
                    }
136
137
                    $result[] = $token;
138
                }
139
            }
140
        }
141
142
        return new TokenIterator($result, $tokens->getSource());
143
    }
144
145
    /**
146
     * Tokenize source
147
     *
148
     * @param $source
149
     *
150
     * @return array
151
     */
152
    private function _tokens($source)
153
    {
154
        $result = [];
155
        $this->_rules['language.' . $this->getIdentifier()] = $this->getOpenClose();
156
157
        foreach ($this->_rules as $name => $rules) {
158
            if (!is_array($rules)) {
159
                $rules = [$rules];
160
            }
161
162
            /** @var Rule $rule */
163
            foreach ($rules as $rule) {
164
                if(strpos($name, 'language.') === false) {
165
                    $rule->setLanguage($this);
166
                }
167
168
                $rule->factory->setBase($name);
169
170
                $result = array_merge($result, $rule->match($source));
171
            }
172
        }
173
174
        foreach($this->_embedded as $language) {
175
            $result = array_merge($result, $language->_tokens($source));
176
        }
177
178
        return $result;
179
    }
180
181
    public function tokenize($source)
182
    {
183
        $iterator = new TokenIterator($this->_tokens($source), $source);
184
        $iterator->uasort('\Kadet\Highlighter\Parser\Token::compare');
185
        $iterator->rewind();
186
        return $iterator;
187
    }
188
189
    /**
190
     * Unique language identifier, for example 'php'
191
     *
192
     * @return string
193
     */
194
    public abstract function getIdentifier();
195
196
    /**
197
     * Language range Rule(s)
198
     *
199
     * @return Rule|Rule[]
200
     */
201
    public function getOpenClose()
202
    {
203
        return new Rule(
204
            new WholeMatcher(), [
205
            'priority' => 1000,
206
            'factory' => new TokenFactory('Kadet\\Highlighter\\Parser\\LanguageToken'),
207
            'inject' => $this
208
        ]);
209
    }
210
211
    public function getEmbedded() {
212
        return $this->_embedded;
213
    }
214
215
    public function embed(Language $lang) {
216
        $this->_embedded[] = $lang;
217
    }
218
}
219