Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Glob often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Glob, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
75 | final class Glob |
||
76 | { |
||
77 | /** |
||
78 | * Flag: Filter the values in {@link Glob::filter()}. |
||
79 | */ |
||
80 | const FILTER_VALUE = 1; |
||
81 | |||
82 | /** |
||
83 | * Flag: Filter the keys in {@link Glob::filter()}. |
||
84 | */ |
||
85 | const FILTER_KEY = 2; |
||
86 | |||
87 | /** |
||
88 | * Globs the file system paths matching the glob. |
||
89 | * |
||
90 | * The glob may contain the wildcard "*". This wildcard matches any number |
||
91 | * of characters, *including* directory separators. |
||
92 | * |
||
93 | * ```php |
||
94 | * foreach (Glob::glob('/project/**.twig') as $path) { |
||
95 | * // do something... |
||
96 | * } |
||
97 | * ``` |
||
98 | * |
||
99 | * @param string $glob The canonical glob. The glob should contain forward |
||
100 | * slashes as directory separators only. It must not |
||
101 | * contain any "." or ".." segments. Use the |
||
102 | * "webmozart/path-util" utility to canonicalize globs |
||
103 | * prior to calling this method. |
||
104 | * @param int $flags A bitwise combination of the flag constants in this |
||
105 | * class. |
||
106 | * |
||
107 | * @return string[] The matching paths. The keys of the array are |
||
108 | * incrementing integers. |
||
109 | */ |
||
110 | 8 | public static function glob($glob, $flags = 0) |
|
118 | |||
119 | /** |
||
120 | * Matches a path against a glob. |
||
121 | * |
||
122 | * ```php |
||
123 | * if (Glob::match('/project/views/index.html.twig', '/project/**.twig')) { |
||
124 | * // path matches |
||
125 | * } |
||
126 | * ``` |
||
127 | * |
||
128 | * @param string $path The path to match. |
||
129 | * @param string $glob The canonical glob. The glob should contain forward |
||
130 | * slashes as directory separators only. It must not |
||
131 | * contain any "." or ".." segments. Use the |
||
132 | * "webmozart/path-util" utility to canonicalize globs |
||
133 | * prior to calling this method. |
||
134 | * @param int $flags A bitwise combination of the flag constants in |
||
135 | * this class. |
||
136 | * |
||
137 | * @return bool Returns `true` if the path is matched by the glob. |
||
138 | */ |
||
139 | 12 | public static function match($path, $glob, $flags = 0) |
|
155 | |||
156 | /** |
||
157 | * Filters an array for paths matching a glob. |
||
158 | * |
||
159 | * The filtered array is returned. This array preserves the keys of the |
||
160 | * passed array. |
||
161 | * |
||
162 | * ```php |
||
163 | * $filteredPaths = Glob::filter($paths, '/project/**.twig'); |
||
164 | * ``` |
||
165 | * |
||
166 | * @param string[] $paths A list of paths. |
||
167 | * @param string $glob The canonical glob. The glob should contain |
||
168 | * forward slashes as directory separators only. It |
||
169 | * must not contain any "." or ".." segments. Use the |
||
170 | * "webmozart/path-util" utility to canonicalize |
||
171 | * globs prior to calling this method. |
||
172 | * @param int $flags A bitwise combination of the flag constants in |
||
173 | * this class. |
||
174 | * |
||
175 | * @return string[] The paths matching the glob indexed by their original |
||
176 | * keys. |
||
177 | */ |
||
178 | 9 | public static function filter(array $paths, $glob, $flags = self::FILTER_VALUE) |
|
221 | |||
222 | /** |
||
223 | * Returns the base path of a glob. |
||
224 | * |
||
225 | * This method returns the most specific directory that contains all files |
||
226 | * matched by the glob. If this directory does not exist on the file system, |
||
227 | * it's not necessary to execute the glob algorithm. |
||
228 | * |
||
229 | * More specifically, the "base path" is the longest path trailed by a "/" |
||
230 | * on the left of the first wildcard "*". If the glob does not contain |
||
231 | * wildcards, the directory name of the glob is returned. |
||
232 | * |
||
233 | * ```php |
||
234 | * Glob::getBasePath('/css/*.css'); |
||
235 | * // => /css |
||
236 | * |
||
237 | * Glob::getBasePath('/css/style.css'); |
||
238 | * // => /css |
||
239 | * |
||
240 | * Glob::getBasePath('/css/st*.css'); |
||
241 | * // => /css |
||
242 | * |
||
243 | * Glob::getBasePath('/*.css'); |
||
244 | * // => / |
||
245 | * ``` |
||
246 | * |
||
247 | * @param string $glob The canonical glob. The glob should contain forward |
||
248 | * slashes as directory separators only. It must not |
||
249 | * contain any "." or ".." segments. Use the |
||
250 | * "webmozart/path-util" utility to canonicalize globs |
||
251 | * prior to calling this method. |
||
252 | * @param int $flags A bitwise combination of the flag constants in this |
||
253 | * class. |
||
254 | * |
||
255 | * @return string The base path of the glob. |
||
256 | */ |
||
257 | 42 | public static function getBasePath($glob, $flags = 0) |
|
281 | |||
282 | /** |
||
283 | * Converts a glob to a regular expression. |
||
284 | * |
||
285 | * Use this method if you need to match many paths against a glob: |
||
286 | * |
||
287 | * ```php |
||
288 | * $staticPrefix = Glob::getStaticPrefix('/project/**.twig'); |
||
289 | * $regEx = Glob::toRegEx('/project/**.twig'); |
||
290 | * |
||
291 | * if (0 !== strpos($path, $staticPrefix)) { |
||
292 | * // no match |
||
293 | * } |
||
294 | * |
||
295 | * if (!preg_match($regEx, $path)) { |
||
296 | * // no match |
||
297 | * } |
||
298 | * ``` |
||
299 | * |
||
300 | * You should always test whether a path contains the static prefix of the |
||
301 | * glob returned by {@link getStaticPrefix()} to reduce the number of calls |
||
302 | * to the expensive {@link preg_match()}. |
||
303 | * |
||
304 | * @param string $glob The canonical glob. The glob should contain forward |
||
305 | * slashes as directory separators only. It must not |
||
306 | * contain any "." or ".." segments. Use the |
||
307 | * "webmozart/path-util" utility to canonicalize globs |
||
308 | * prior to calling this method. |
||
309 | * @param int $flags A bitwise combination of the flag constants in this |
||
310 | * class. |
||
311 | * |
||
312 | * @return string The regular expression for matching the glob. |
||
313 | */ |
||
314 | 75 | public static function toRegEx($glob, $flags = 0, $delimiter = '~') |
|
315 | { |
||
316 | 75 | View Code Duplication | if (!Path::isAbsolute($glob) && false === strpos($glob, '://')) { |
317 | 1 | throw new InvalidArgumentException(sprintf( |
|
318 | 1 | 'The glob "%s" is not absolute and not a URI.', |
|
319 | $glob |
||
320 | )); |
||
321 | } |
||
322 | |||
323 | 74 | $inSquare = false; |
|
324 | 74 | $curlyLevels = 0; |
|
325 | 74 | $regex = ''; |
|
326 | 74 | $length = strlen($glob); |
|
327 | |||
328 | 74 | for ($i = 0; $i < $length; ++$i) { |
|
329 | 74 | $c = $glob[$i]; |
|
330 | |||
331 | switch ($c) { |
||
332 | 74 | case '.': |
|
333 | 74 | case '(': |
|
334 | 74 | case ')': |
|
335 | 74 | case '|': |
|
336 | 74 | case '+': |
|
337 | 74 | case '^': |
|
338 | 74 | case '$': |
|
339 | 74 | case $delimiter: |
|
340 | 71 | $regex .= "\\$c"; |
|
341 | 71 | break; |
|
342 | |||
343 | 74 | View Code Duplication | case '/': |
344 | 74 | if (isset($glob[$i + 3]) && '**/' === $glob[$i + 1].$glob[$i + 2].$glob[$i + 3]) { |
|
345 | 24 | $regex .= '/([^/]+/)*'; |
|
346 | 24 | $i += 3; |
|
347 | } else { |
||
348 | 72 | $regex .= '/'; |
|
349 | } |
||
350 | 74 | break; |
|
351 | |||
352 | 74 | case '*': |
|
353 | 40 | $regex .= '[^/]*'; |
|
354 | 40 | break; |
|
355 | |||
356 | 74 | case '?': |
|
357 | 3 | $regex .= '.'; |
|
358 | 3 | break; |
|
359 | |||
360 | 74 | case '{': |
|
361 | 8 | $regex .= '('; |
|
362 | 8 | ++$curlyLevels; |
|
363 | 8 | break; |
|
364 | |||
365 | 74 | case '}': |
|
366 | 8 | if ($curlyLevels > 0) { |
|
367 | 6 | $regex .= ')'; |
|
368 | 6 | --$curlyLevels; |
|
369 | } else { |
||
370 | 3 | $regex .= '}'; |
|
371 | } |
||
372 | 8 | break; |
|
373 | |||
374 | 74 | case ',': |
|
375 | 9 | $regex .= $curlyLevels > 0 ? '|' : ','; |
|
376 | 9 | break; |
|
377 | |||
378 | 74 | case '[': |
|
379 | 12 | $regex .= '['; |
|
380 | 12 | $inSquare = true; |
|
381 | 12 | if (isset($glob[$i + 1]) && '^' === $glob[$i + 1]) { |
|
382 | 1 | $regex .= '^'; |
|
383 | 1 | ++$i; |
|
384 | } |
||
385 | 12 | break; |
|
386 | |||
387 | 74 | case ']': |
|
388 | 12 | $regex .= $inSquare ? ']' : '\\]'; |
|
389 | 12 | $inSquare = false; |
|
390 | 12 | break; |
|
391 | |||
392 | 74 | case '-': |
|
393 | 13 | $regex .= $inSquare ? '-' : '\\-'; |
|
394 | 13 | break; |
|
395 | |||
396 | 74 | case '\\': |
|
397 | 33 | if (isset($glob[$i + 1])) { |
|
398 | 33 | switch ($glob[$i + 1]) { |
|
399 | 33 | case '*': |
|
400 | 27 | case '?': |
|
401 | 26 | case '{': |
|
402 | 25 | case '}': |
|
403 | 24 | case '[': |
|
404 | 23 | case ']': |
|
405 | 22 | case '-': |
|
406 | 21 | case '^': |
|
407 | 20 | case '\\': |
|
408 | 33 | $regex .= '\\'.$glob[$i + 1]; |
|
409 | 33 | ++$i; |
|
410 | 33 | break; |
|
411 | |||
412 | default: |
||
413 | 33 | $regex .= '\\\\'; |
|
414 | } |
||
415 | } else { |
||
416 | $regex .= '\\\\'; |
||
417 | } |
||
418 | 33 | break; |
|
419 | |||
420 | default: |
||
421 | 74 | $regex .= $c; |
|
422 | 74 | break; |
|
423 | } |
||
424 | } |
||
425 | |||
426 | 74 | if ($inSquare) { |
|
427 | 2 | throw new InvalidArgumentException(sprintf( |
|
428 | 2 | 'Invalid glob: missing ] in %s', |
|
429 | $glob |
||
430 | )); |
||
431 | } |
||
432 | |||
433 | 72 | if ($curlyLevels > 0) { |
|
434 | 2 | throw new InvalidArgumentException(sprintf( |
|
435 | 2 | 'Invalid glob: missing } in %s', |
|
436 | $glob |
||
437 | )); |
||
438 | } |
||
439 | |||
440 | 70 | return $delimiter.'^'.$regex.'$'.$delimiter; |
|
441 | } |
||
442 | |||
443 | /** |
||
444 | * Returns the static prefix of a glob. |
||
445 | * |
||
446 | * The "static prefix" is the part of the glob up to the first wildcard "*". |
||
447 | * If the glob does not contain wildcards, the full glob is returned. |
||
448 | * |
||
449 | * @param string $glob The canonical glob. The glob should contain forward |
||
450 | * slashes as directory separators only. It must not |
||
451 | * contain any "." or ".." segments. Use the |
||
452 | * "webmozart/path-util" utility to canonicalize globs |
||
453 | * prior to calling this method. |
||
454 | * @param int $flags A bitwise combination of the flag constants in this |
||
455 | * class. |
||
456 | * |
||
457 | * @return string The static prefix of the glob. |
||
458 | */ |
||
459 | 81 | public static function getStaticPrefix($glob, $flags = 0) |
|
516 | |||
517 | /** |
||
518 | * Returns whether the glob contains a dynamic part. |
||
519 | * |
||
520 | * The glob contains a dynamic part if it contains an unescaped "*" or |
||
521 | * "{" character. |
||
522 | * |
||
523 | * @param string $glob The glob to test. |
||
524 | * |
||
525 | * @return bool Returns `true` if the glob contains a dynamic part and |
||
526 | * `false` otherwise. |
||
527 | */ |
||
528 | 40 | public static function isDynamic($glob) |
|
532 | |||
533 | private function __construct() |
||
536 | } |
||
537 |