Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Glob often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Glob, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 75 | final class Glob |
||
| 76 | { |
||
| 77 | /** |
||
| 78 | * Flag: Filter the values in {@link Glob::filter()}. |
||
| 79 | */ |
||
| 80 | const FILTER_VALUE = 1; |
||
| 81 | |||
| 82 | /** |
||
| 83 | * Flag: Filter the keys in {@link Glob::filter()}. |
||
| 84 | */ |
||
| 85 | const FILTER_KEY = 2; |
||
| 86 | |||
| 87 | /** |
||
| 88 | * Globs the file system paths matching the glob. |
||
| 89 | * |
||
| 90 | * The glob may contain the wildcard "*". This wildcard matches any number |
||
| 91 | * of characters, *including* directory separators. |
||
| 92 | * |
||
| 93 | * ```php |
||
| 94 | * foreach (Glob::glob('/project/**.twig') as $path) { |
||
| 95 | * // do something... |
||
| 96 | * } |
||
| 97 | * ``` |
||
| 98 | * |
||
| 99 | * @param string $glob The canonical glob. The glob should contain forward |
||
| 100 | * slashes as directory separators only. It must not |
||
| 101 | * contain any "." or ".." segments. Use the |
||
| 102 | * "webmozart/path-util" utility to canonicalize globs |
||
| 103 | * prior to calling this method. |
||
| 104 | * @param int $flags A bitwise combination of the flag constants in this |
||
| 105 | * class. |
||
| 106 | * |
||
| 107 | * @return string[] The matching paths. The keys of the array are |
||
| 108 | * incrementing integers. |
||
| 109 | */ |
||
| 110 | 8 | public static function glob($glob, $flags = 0) |
|
| 118 | |||
| 119 | /** |
||
| 120 | * Matches a path against a glob. |
||
| 121 | * |
||
| 122 | * ```php |
||
| 123 | * if (Glob::match('/project/views/index.html.twig', '/project/**.twig')) { |
||
| 124 | * // path matches |
||
| 125 | * } |
||
| 126 | * ``` |
||
| 127 | * |
||
| 128 | * @param string $path The path to match. |
||
| 129 | * @param string $glob The canonical glob. The glob should contain forward |
||
| 130 | * slashes as directory separators only. It must not |
||
| 131 | * contain any "." or ".." segments. Use the |
||
| 132 | * "webmozart/path-util" utility to canonicalize globs |
||
| 133 | * prior to calling this method. |
||
| 134 | * @param int $flags A bitwise combination of the flag constants in |
||
| 135 | * this class. |
||
| 136 | * |
||
| 137 | * @return bool Returns `true` if the path is matched by the glob. |
||
| 138 | */ |
||
| 139 | 12 | public static function match($path, $glob, $flags = 0) |
|
| 155 | |||
| 156 | /** |
||
| 157 | * Filters an array for paths matching a glob. |
||
| 158 | * |
||
| 159 | * The filtered array is returned. This array preserves the keys of the |
||
| 160 | * passed array. |
||
| 161 | * |
||
| 162 | * ```php |
||
| 163 | * $filteredPaths = Glob::filter($paths, '/project/**.twig'); |
||
| 164 | * ``` |
||
| 165 | * |
||
| 166 | * @param string[] $paths A list of paths. |
||
| 167 | * @param string $glob The canonical glob. The glob should contain |
||
| 168 | * forward slashes as directory separators only. It |
||
| 169 | * must not contain any "." or ".." segments. Use the |
||
| 170 | * "webmozart/path-util" utility to canonicalize |
||
| 171 | * globs prior to calling this method. |
||
| 172 | * @param int $flags A bitwise combination of the flag constants in |
||
| 173 | * this class. |
||
| 174 | * |
||
| 175 | * @return string[] The paths matching the glob indexed by their original |
||
| 176 | * keys. |
||
| 177 | */ |
||
| 178 | 9 | public static function filter(array $paths, $glob, $flags = self::FILTER_VALUE) |
|
| 221 | |||
| 222 | /** |
||
| 223 | * Returns the base path of a glob. |
||
| 224 | * |
||
| 225 | * This method returns the most specific directory that contains all files |
||
| 226 | * matched by the glob. If this directory does not exist on the file system, |
||
| 227 | * it's not necessary to execute the glob algorithm. |
||
| 228 | * |
||
| 229 | * More specifically, the "base path" is the longest path trailed by a "/" |
||
| 230 | * on the left of the first wildcard "*". If the glob does not contain |
||
| 231 | * wildcards, the directory name of the glob is returned. |
||
| 232 | * |
||
| 233 | * ```php |
||
| 234 | * Glob::getBasePath('/css/*.css'); |
||
| 235 | * // => /css |
||
| 236 | * |
||
| 237 | * Glob::getBasePath('/css/style.css'); |
||
| 238 | * // => /css |
||
| 239 | * |
||
| 240 | * Glob::getBasePath('/css/st*.css'); |
||
| 241 | * // => /css |
||
| 242 | * |
||
| 243 | * Glob::getBasePath('/*.css'); |
||
| 244 | * // => / |
||
| 245 | * ``` |
||
| 246 | * |
||
| 247 | * @param string $glob The canonical glob. The glob should contain forward |
||
| 248 | * slashes as directory separators only. It must not |
||
| 249 | * contain any "." or ".." segments. Use the |
||
| 250 | * "webmozart/path-util" utility to canonicalize globs |
||
| 251 | * prior to calling this method. |
||
| 252 | * @param int $flags A bitwise combination of the flag constants in this |
||
| 253 | * class. |
||
| 254 | * |
||
| 255 | * @return string The base path of the glob. |
||
| 256 | */ |
||
| 257 | 42 | public static function getBasePath($glob, $flags = 0) |
|
| 281 | |||
| 282 | /** |
||
| 283 | * Converts a glob to a regular expression. |
||
| 284 | * |
||
| 285 | * Use this method if you need to match many paths against a glob: |
||
| 286 | * |
||
| 287 | * ```php |
||
| 288 | * $staticPrefix = Glob::getStaticPrefix('/project/**.twig'); |
||
| 289 | * $regEx = Glob::toRegEx('/project/**.twig'); |
||
| 290 | * |
||
| 291 | * if (0 !== strpos($path, $staticPrefix)) { |
||
| 292 | * // no match |
||
| 293 | * } |
||
| 294 | * |
||
| 295 | * if (!preg_match($regEx, $path)) { |
||
| 296 | * // no match |
||
| 297 | * } |
||
| 298 | * ``` |
||
| 299 | * |
||
| 300 | * You should always test whether a path contains the static prefix of the |
||
| 301 | * glob returned by {@link getStaticPrefix()} to reduce the number of calls |
||
| 302 | * to the expensive {@link preg_match()}. |
||
| 303 | * |
||
| 304 | * @param string $glob The canonical glob. The glob should contain forward |
||
| 305 | * slashes as directory separators only. It must not |
||
| 306 | * contain any "." or ".." segments. Use the |
||
| 307 | * "webmozart/path-util" utility to canonicalize globs |
||
| 308 | * prior to calling this method. |
||
| 309 | * @param int $flags A bitwise combination of the flag constants in this |
||
| 310 | * class. |
||
| 311 | * |
||
| 312 | * @return string The regular expression for matching the glob. |
||
| 313 | */ |
||
| 314 | 75 | public static function toRegEx($glob, $flags = 0, $delimiter = '~') |
|
| 315 | { |
||
| 316 | 75 | View Code Duplication | if (!Path::isAbsolute($glob) && false === strpos($glob, '://')) { |
| 317 | 1 | throw new InvalidArgumentException(sprintf( |
|
| 318 | 1 | 'The glob "%s" is not absolute and not a URI.', |
|
| 319 | $glob |
||
| 320 | )); |
||
| 321 | } |
||
| 322 | |||
| 323 | 74 | $inSquare = false; |
|
| 324 | 74 | $curlyLevels = 0; |
|
| 325 | 74 | $regex = ''; |
|
| 326 | 74 | $length = strlen($glob); |
|
| 327 | |||
| 328 | 74 | for ($i = 0; $i < $length; ++$i) { |
|
| 329 | 74 | $c = $glob[$i]; |
|
| 330 | |||
| 331 | switch ($c) { |
||
| 332 | 74 | case '.': |
|
| 333 | 74 | case '(': |
|
| 334 | 74 | case ')': |
|
| 335 | 74 | case '|': |
|
| 336 | 74 | case '+': |
|
| 337 | 74 | case '^': |
|
| 338 | 74 | case '$': |
|
| 339 | 74 | case $delimiter: |
|
| 340 | 71 | $regex .= "\\$c"; |
|
| 341 | 71 | break; |
|
| 342 | |||
| 343 | 74 | View Code Duplication | case '/': |
| 344 | 74 | if (isset($glob[$i + 3]) && '**/' === $glob[$i + 1].$glob[$i + 2].$glob[$i + 3]) { |
|
| 345 | 24 | $regex .= '/([^/]+/)*'; |
|
| 346 | 24 | $i += 3; |
|
| 347 | } else { |
||
| 348 | 72 | $regex .= '/'; |
|
| 349 | } |
||
| 350 | 74 | break; |
|
| 351 | |||
| 352 | 74 | case '*': |
|
| 353 | 40 | $regex .= '[^/]*'; |
|
| 354 | 40 | break; |
|
| 355 | |||
| 356 | 74 | case '?': |
|
| 357 | 3 | $regex .= '.'; |
|
| 358 | 3 | break; |
|
| 359 | |||
| 360 | 74 | case '{': |
|
| 361 | 8 | $regex .= '('; |
|
| 362 | 8 | ++$curlyLevels; |
|
| 363 | 8 | break; |
|
| 364 | |||
| 365 | 74 | case '}': |
|
| 366 | 8 | if ($curlyLevels > 0) { |
|
| 367 | 6 | $regex .= ')'; |
|
| 368 | 6 | --$curlyLevels; |
|
| 369 | } else { |
||
| 370 | 3 | $regex .= '}'; |
|
| 371 | } |
||
| 372 | 8 | break; |
|
| 373 | |||
| 374 | 74 | case ',': |
|
| 375 | 9 | $regex .= $curlyLevels > 0 ? '|' : ','; |
|
| 376 | 9 | break; |
|
| 377 | |||
| 378 | 74 | case '[': |
|
| 379 | 12 | $regex .= '['; |
|
| 380 | 12 | $inSquare = true; |
|
| 381 | 12 | if (isset($glob[$i + 1]) && '^' === $glob[$i + 1]) { |
|
| 382 | 1 | $regex .= '^'; |
|
| 383 | 1 | ++$i; |
|
| 384 | } |
||
| 385 | 12 | break; |
|
| 386 | |||
| 387 | 74 | case ']': |
|
| 388 | 12 | $regex .= $inSquare ? ']' : '\\]'; |
|
| 389 | 12 | $inSquare = false; |
|
| 390 | 12 | break; |
|
| 391 | |||
| 392 | 74 | case '-': |
|
| 393 | 13 | $regex .= $inSquare ? '-' : '\\-'; |
|
| 394 | 13 | break; |
|
| 395 | |||
| 396 | 74 | case '\\': |
|
| 397 | 33 | if (isset($glob[$i + 1])) { |
|
| 398 | 33 | switch ($glob[$i + 1]) { |
|
| 399 | 33 | case '*': |
|
| 400 | 27 | case '?': |
|
| 401 | 26 | case '{': |
|
| 402 | 25 | case '}': |
|
| 403 | 24 | case '[': |
|
| 404 | 23 | case ']': |
|
| 405 | 22 | case '-': |
|
| 406 | 21 | case '^': |
|
| 407 | 20 | case '\\': |
|
| 408 | 33 | $regex .= '\\'.$glob[$i + 1]; |
|
| 409 | 33 | ++$i; |
|
| 410 | 33 | break; |
|
| 411 | |||
| 412 | default: |
||
| 413 | 33 | $regex .= '\\\\'; |
|
| 414 | } |
||
| 415 | } else { |
||
| 416 | $regex .= '\\\\'; |
||
| 417 | } |
||
| 418 | 33 | break; |
|
| 419 | |||
| 420 | default: |
||
| 421 | 74 | $regex .= $c; |
|
| 422 | 74 | break; |
|
| 423 | } |
||
| 424 | } |
||
| 425 | |||
| 426 | 74 | if ($inSquare) { |
|
| 427 | 2 | throw new InvalidArgumentException(sprintf( |
|
| 428 | 2 | 'Invalid glob: missing ] in %s', |
|
| 429 | $glob |
||
| 430 | )); |
||
| 431 | } |
||
| 432 | |||
| 433 | 72 | if ($curlyLevels > 0) { |
|
| 434 | 2 | throw new InvalidArgumentException(sprintf( |
|
| 435 | 2 | 'Invalid glob: missing } in %s', |
|
| 436 | $glob |
||
| 437 | )); |
||
| 438 | } |
||
| 439 | |||
| 440 | 70 | return $delimiter.'^'.$regex.'$'.$delimiter; |
|
| 441 | } |
||
| 442 | |||
| 443 | /** |
||
| 444 | * Returns the static prefix of a glob. |
||
| 445 | * |
||
| 446 | * The "static prefix" is the part of the glob up to the first wildcard "*". |
||
| 447 | * If the glob does not contain wildcards, the full glob is returned. |
||
| 448 | * |
||
| 449 | * @param string $glob The canonical glob. The glob should contain forward |
||
| 450 | * slashes as directory separators only. It must not |
||
| 451 | * contain any "." or ".." segments. Use the |
||
| 452 | * "webmozart/path-util" utility to canonicalize globs |
||
| 453 | * prior to calling this method. |
||
| 454 | * @param int $flags A bitwise combination of the flag constants in this |
||
| 455 | * class. |
||
| 456 | * |
||
| 457 | * @return string The static prefix of the glob. |
||
| 458 | */ |
||
| 459 | 81 | public static function getStaticPrefix($glob, $flags = 0) |
|
| 516 | |||
| 517 | /** |
||
| 518 | * Returns whether the glob contains a dynamic part. |
||
| 519 | * |
||
| 520 | * The glob contains a dynamic part if it contains an unescaped "*" or |
||
| 521 | * "{" character. |
||
| 522 | * |
||
| 523 | * @param string $glob The glob to test. |
||
| 524 | * |
||
| 525 | * @return bool Returns `true` if the glob contains a dynamic part and |
||
| 526 | * `false` otherwise. |
||
| 527 | */ |
||
| 528 | 40 | public static function isDynamic($glob) |
|
| 532 | |||
| 533 | private function __construct() |
||
| 536 | } |
||
| 537 |