Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like RegularParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use RegularParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | final class RegularParser implements ParserInterface |
||
14 | { |
||
15 | private $lexerRegex; |
||
16 | private $nameRegex; |
||
17 | private $tokens; |
||
18 | private $tokensCount; |
||
19 | private $position; |
||
20 | /** @var int[] */ |
||
21 | private $backtracks; |
||
22 | private $lastBacktrack; |
||
23 | |||
24 | const TOKEN_OPEN = 1; |
||
25 | const TOKEN_CLOSE = 2; |
||
26 | const TOKEN_MARKER = 3; |
||
27 | const TOKEN_SEPARATOR = 4; |
||
28 | const TOKEN_DELIMITER = 5; |
||
29 | const TOKEN_STRING = 6; |
||
30 | const TOKEN_WS = 7; |
||
31 | |||
32 | 15 | public function __construct(SyntaxInterface $syntax = null) |
|
37 | |||
38 | /** |
||
39 | * @param string $text |
||
40 | * |
||
41 | * @return ParsedShortcode[] |
||
42 | */ |
||
43 | 59 | public function parse($text) |
|
44 | { |
||
45 | 59 | $nestingLevel = ini_set('xdebug.max_nesting_level', -1); |
|
46 | 59 | $this->tokens = $this->tokenize($text); |
|
47 | 59 | $this->backtracks = array(); |
|
48 | 59 | $this->lastBacktrack = 0; |
|
49 | 59 | $this->position = 0; |
|
50 | 59 | $this->tokensCount = \count($this->tokens); |
|
51 | |||
52 | 59 | $shortcodes = array(); |
|
53 | 59 | while($this->position < $this->tokensCount) { |
|
54 | 58 | while($this->position < $this->tokensCount && false === $this->lookahead(self::TOKEN_OPEN)) { |
|
55 | 27 | $this->position++; |
|
56 | } |
||
57 | 58 | $names = array(); |
|
58 | 58 | $this->beginBacktrack(); |
|
59 | 58 | $matches = $this->shortcode($names); |
|
60 | 58 | if(false === $matches) { |
|
61 | 16 | $this->backtrack(); |
|
62 | 16 | $this->match(null, true); |
|
63 | 16 | continue; |
|
64 | } |
||
65 | 50 | if(\is_array($matches)) { |
|
66 | 50 | foreach($matches as $shortcode) { |
|
67 | 50 | $shortcodes[] = $shortcode; |
|
68 | } |
||
69 | } |
||
70 | } |
||
71 | 59 | ini_set('xdebug.max_nesting_level', $nestingLevel); |
|
72 | |||
73 | 59 | return $shortcodes; |
|
74 | } |
||
75 | |||
76 | 50 | private function getObject($name, $parameters, $bbCode, $offset, $content, $text) |
|
80 | |||
81 | /* --- RULES ----------------------------------------------------------- */ |
||
82 | |||
83 | 58 | private function shortcode(array &$names) |
|
84 | { |
||
85 | 58 | if(!$this->match(self::TOKEN_OPEN, false)) { return false; } |
|
86 | 58 | $offset = $this->tokens[$this->position - 1][2]; |
|
87 | 58 | $this->match(self::TOKEN_WS, false); |
|
88 | 58 | if('' === $name = $this->match(self::TOKEN_STRING, false)) { return false; } |
|
89 | 55 | if($this->lookahead(self::TOKEN_STRING)) { return false; } |
|
90 | 55 | if(1 !== preg_match($this->nameRegex, $name, $matches)) { return false; } |
|
91 | 54 | $this->match(self::TOKEN_WS, false); |
|
92 | // bbCode |
||
93 | 54 | $bbCode = $this->match(self::TOKEN_SEPARATOR, true) ? $this->value() : null; |
|
94 | 54 | if(false === $bbCode) { return false; } |
|
95 | // parameters |
||
96 | 53 | if(false === ($parameters = $this->parameters())) { return false; } |
|
97 | |||
98 | // self-closing |
||
99 | 51 | if($this->match(self::TOKEN_MARKER, true)) { |
|
100 | 16 | if(!$this->match(self::TOKEN_CLOSE, false)) { return false; } |
|
101 | |||
102 | 15 | return array($this->getObject($name, $parameters, $bbCode, $offset, null, $this->getBacktrack())); |
|
103 | } |
||
104 | |||
105 | // just-closed or with-content |
||
106 | 39 | if(!$this->match(self::TOKEN_CLOSE, false)) { return false; } |
|
107 | 39 | $this->beginBacktrack(); |
|
108 | 39 | $names[] = $name; |
|
109 | |||
110 | // begin inlined content() |
||
111 | 39 | $content = ''; |
|
112 | 39 | $shortcodes = array(); |
|
113 | 39 | $closingName = null; |
|
114 | |||
115 | 39 | while($this->position < $this->tokensCount) { |
|
116 | 30 | View Code Duplication | while($this->position < $this->tokensCount && false === $this->lookahead(self::TOKEN_OPEN)) { |
|
|||
117 | 26 | $content .= $this->match(null, true); |
|
118 | } |
||
119 | |||
120 | 30 | $this->beginBacktrack(); |
|
121 | 30 | $contentMatchedShortcodes = $this->shortcode($names); |
|
122 | 30 | if(\is_string($contentMatchedShortcodes)) { |
|
123 | 6 | $closingName = $contentMatchedShortcodes; |
|
124 | 6 | break; |
|
125 | } |
||
126 | 30 | if(\is_array($contentMatchedShortcodes)) { |
|
127 | 16 | foreach($contentMatchedShortcodes as $matchedShortcode) { |
|
128 | 16 | $shortcodes[] = $matchedShortcode; |
|
129 | } |
||
130 | 16 | continue; |
|
131 | } |
||
132 | 24 | $this->backtrack(); |
|
133 | |||
134 | 24 | $this->beginBacktrack(); |
|
135 | 24 | if(false !== ($closingName = $this->close($names))) { |
|
136 | 21 | if(null === $content) { $content = ''; } |
|
137 | 21 | $this->backtrack(); |
|
138 | 21 | $shortcodes = array(); |
|
139 | 21 | break; |
|
140 | } |
||
141 | 9 | $closingName = null; |
|
142 | 9 | $this->backtrack(); |
|
143 | |||
144 | 9 | $content .= $this->match(null, false); |
|
145 | } |
||
146 | 39 | $content = $this->position < $this->tokensCount ? $content : false; |
|
147 | // end inlined content() |
||
148 | |||
149 | 39 | if(null !== $closingName && $closingName !== $name) { |
|
150 | 6 | array_pop($names); |
|
151 | 6 | array_pop($this->backtracks); |
|
152 | 6 | array_pop($this->backtracks); |
|
153 | |||
154 | 6 | return $closingName; |
|
155 | } |
||
156 | 39 | if(false === $content || $closingName !== $name) { |
|
157 | 25 | $this->backtrack(false); |
|
158 | 25 | $text = $this->backtrack(false); |
|
159 | 25 | array_pop($names); |
|
160 | |||
161 | 25 | return array_merge(array($this->getObject($name, $parameters, $bbCode, $offset, null, $text)), $shortcodes); |
|
162 | } |
||
163 | 21 | $content = $this->getBacktrack(); |
|
164 | 21 | if(!$this->close($names)) { return false; } |
|
165 | 21 | array_pop($names); |
|
166 | |||
167 | 21 | return array($this->getObject($name, $parameters, $bbCode, $offset, $content, $this->getBacktrack())); |
|
168 | } |
||
169 | |||
170 | 24 | private function close(array &$names) |
|
171 | { |
||
172 | 24 | if(!$this->match(self::TOKEN_OPEN, true)) { return false; } |
|
173 | 22 | if(!$this->match(self::TOKEN_MARKER, true)) { return false; } |
|
174 | 22 | if(!$closingName = $this->match(self::TOKEN_STRING, true)) { return false; } |
|
175 | 22 | if(!$this->match(self::TOKEN_CLOSE, false)) { return false; } |
|
176 | |||
177 | 22 | return \in_array($closingName, $names, true) ? $closingName : false; |
|
178 | } |
||
179 | |||
180 | 53 | private function parameters() |
|
181 | { |
||
182 | 53 | $parameters = array(); |
|
183 | |||
184 | 53 | while(true) { |
|
185 | 53 | $this->match(self::TOKEN_WS, false); |
|
186 | 53 | if($this->lookahead(self::TOKEN_MARKER) || $this->lookahead(self::TOKEN_CLOSE)) { break; } |
|
187 | 29 | if(!$name = $this->match(self::TOKEN_STRING, true)) { return false; } |
|
188 | 28 | if(!$this->match(self::TOKEN_SEPARATOR, true)) { $parameters[$name] = null; continue; } |
|
189 | 27 | if(false === ($value = $this->value())) { return false; } |
|
190 | 25 | $this->match(self::TOKEN_WS, false); |
|
191 | |||
192 | 25 | $parameters[$name] = $value; |
|
193 | } |
||
194 | |||
195 | 51 | return $parameters; |
|
196 | } |
||
197 | |||
198 | 29 | private function value() |
|
199 | { |
||
200 | 29 | $value = ''; |
|
201 | |||
202 | 29 | if($this->match(self::TOKEN_DELIMITER, false)) { |
|
203 | 20 | View Code Duplication | while($this->position < $this->tokensCount && false === $this->lookahead(self::TOKEN_DELIMITER)) { |
204 | 20 | $value .= $this->match(null, false); |
|
205 | } |
||
206 | |||
207 | 20 | return $this->match(self::TOKEN_DELIMITER, false) ? $value : false; |
|
208 | } |
||
209 | |||
210 | 15 | if('' !== $tmp = $this->match(self::TOKEN_STRING, false)) { |
|
211 | 14 | $value .= $tmp; |
|
212 | 14 | while('' !== $tmp = $this->match(self::TOKEN_STRING, false)) { |
|
213 | $value .= $tmp; |
||
214 | } |
||
215 | |||
216 | 14 | return $value; |
|
217 | } |
||
218 | |||
219 | 1 | return false; |
|
220 | } |
||
221 | |||
222 | /* --- PARSER ---------------------------------------------------------- */ |
||
223 | |||
224 | 58 | private function beginBacktrack() |
|
229 | |||
230 | 32 | private function getBacktrack() |
|
231 | { |
||
232 | 32 | $position = array_pop($this->backtracks); |
|
233 | 32 | $backtrack = ''; |
|
234 | 32 | View Code Duplication | for($i = $position; $i < $this->position; $i++) { |
235 | 32 | $backtrack .= $this->tokens[$i][1]; |
|
236 | } |
||
237 | |||
238 | 32 | return $backtrack; |
|
239 | } |
||
240 | |||
241 | 48 | private function backtrack($modifyPosition = true) |
|
242 | { |
||
243 | 48 | $position = array_pop($this->backtracks); |
|
244 | 48 | if($modifyPosition) { |
|
245 | 33 | $this->position = $position; |
|
246 | } |
||
247 | |||
248 | 48 | $backtrack = ''; |
|
249 | 48 | View Code Duplication | for($i = $position; $i < $this->lastBacktrack; $i++) { |
250 | 25 | $backtrack .= $this->tokens[$i][1]; |
|
251 | } |
||
252 | 48 | $this->lastBacktrack = $position; |
|
253 | |||
254 | 48 | return $backtrack; |
|
255 | } |
||
256 | |||
257 | 58 | private function lookahead($type) |
|
261 | |||
262 | 58 | private function match($type, $ws) |
|
263 | { |
||
264 | 58 | if($this->position >= $this->tokensCount) { |
|
265 | 21 | return ''; |
|
266 | } |
||
267 | |||
268 | 58 | $token = $this->tokens[$this->position]; |
|
280 | |||
281 | /* --- LEXER ----------------------------------------------------------- */ |
||
282 | |||
283 | 59 | private function tokenize($text) |
|
310 | |||
311 | 15 | private function prepareLexer(SyntaxInterface $syntax) |
|
339 | } |
||
340 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.