Completed
Push — master ( 410005...efdb94 )
by Bernhard
04:50
created

Glob::toRegEx()   B

Complexity

Conditions 4
Paths 3

Size

Total Lines 36
Code Lines 11

Duplication

Lines 6
Ratio 16.67 %

Code Coverage

Tests 11
CRAP Score 4

Importance

Changes 8
Bugs 2 Features 2
Metric Value
c 8
b 2
f 2
dl 6
loc 36
ccs 11
cts 11
cp 1
rs 8.5806
cc 4
eloc 11
nc 3
nop 2
crap 4
1
<?php
2
3
/*
4
 * This file is part of the webmozart/glob package.
5
 *
6
 * (c) Bernhard Schussek <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace Webmozart\Glob;
13
14
use InvalidArgumentException;
15
use Webmozart\Glob\Iterator\GlobIterator;
16
use Webmozart\PathUtil\Path;
17
18
/**
19
 * Searches and matches file paths using Ant-like globs.
20
 *
21
 * This class implements an Ant-like version of PHP's `glob()` function. The
22
 * wildcard "*" matches any number of characters except directory separators.
23
 * The double wildcard "**" matches any number of characters, including
24
 * directory separators.
25
 *
26
 * Use {@link glob()} to glob the filesystem for paths:
27
 *
28
 * ```php
29
 * foreach (Glob::glob('/project/**.twig') as $path) {
30
 *     // do something...
31
 * }
32
 * ```
33
 *
34
 * Use {@link match()} to match a file path against a glob:
35
 *
36
 * ```php
37
 * if (Glob::match('/project/views/index.html.twig', '/project/**.twig')) {
38
 *     // path matches
39
 * }
40
 * ```
41
 *
42
 * You can also filter an array of paths for all paths that match your glob with
43
 * {@link filter()}:
44
 *
45
 * ```php
46
 * $filteredPaths = Glob::filter($paths, '/project/**.twig');
47
 * ```
48
 *
49
 * Internally, the methods described above convert the glob into a regular
50
 * expression that is then matched against the matched paths. If you need to
51
 * match many paths against the same glob, you should convert the glob manually
52
 * and use {@link preg_match()} to test the paths:
53
 *
54
 * ```php
55
 * $staticPrefix = Glob::getStaticPrefix('/project/**.twig');
56
 * $regEx = Glob::toRegEx('/project/**.twig');
57
 *
58
 * if (0 !== strpos($path, $staticPrefix)) {
59
 *     // no match
60
 * }
61
 *
62
 * if (!preg_match($regEx, $path)) {
63
 *     // no match
64
 * }
65
 * ```
66
 *
67
 * The method {@link getStaticPrefix()} returns the part of the glob up to the
68
 * first wildcard "*". You should always test whether a path has this prefix
69
 * before calling the much more expensive {@link preg_match()}.
70
 *
71
 * @since  1.0
72
 *
73
 * @author Bernhard Schussek <[email protected]>
74
 */
75
final class Glob
76
{
77
    /**
78
     * Flag: Enable escaping of special characters with leading backslashes.
79
     */
80
    const ESCAPE = 1;
81
82
    /**
83
     * Globs the file system paths matching the glob.
84
     *
85
     * The glob may contain the wildcard "*". This wildcard matches any number
86
     * of characters, *including* directory separators.
87
     *
88
     * ```php
89
     * foreach (Glob::glob('/project/**.twig') as $path) {
90
     *     // do something...
91
     * }
92
     * ```
93
     *
94
     * @param string $glob  The canonical glob. The glob should contain forward
95
     *                      slashes as directory separators only. It must not
96
     *                      contain any "." or ".." segments. Use the
97
     *                      "webmozart/path-util" utility to canonicalize globs
98
     *                      prior to calling this method.
99
     * @param int    $flags A bitwise combination of the flag constants in this
100
     *                      class.
101
     *
102
     * @return string[] The matching paths. The keys of the array are
103
     *                  incrementing integers.
104
     */
105 4
    public static function glob($glob, $flags = 0)
106
    {
107 4
        $results = iterator_to_array(new GlobIterator($glob, $flags));
108
109 3
        sort($results);
110
111 3
        return $results;
112
    }
113
114
    /**
115
     * Matches a path against a glob.
116
     *
117
     * ```php
118
     * if (Glob::match('/project/views/index.html.twig', '/project/**.twig')) {
119
     *     // path matches
120
     * }
121
     * ```
122
     *
123
     * @param string $path  The path to match.
124
     * @param string $glob  The canonical glob. The glob should contain forward
125
     *                      slashes as directory separators only. It must not
126
     *                      contain any "." or ".." segments. Use the
127
     *                      "webmozart/path-util" utility to canonicalize globs
128
     *                      prior to calling this method.
129
     * @param int    $flags A bitwise combination of the flag constants in
130
     *                      this class.
131
     *
132
     * @return bool Returns `true` if the path is matched by the glob.
133
     */
134 12
    public static function match($path, $glob, $flags = 0)
135
    {
136 12
        if (!self::isDynamic($glob)) {
137 1
            return $glob === $path;
138
        }
139
140 11
        if (0 !== strpos($path, self::getStaticPrefix($glob, $flags))) {
141 6
            return false;
142
        }
143
144 5
        if (!preg_match(self::toRegEx($glob, $flags), $path)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return (bool) preg_match...$glob, $flags), $path);.
Loading history...
145
            return false;
146
        }
147
148 5
        return true;
149
    }
150
151
    /**
152
     * Filters an array for paths matching a glob.
153
     *
154
     * The filtered array is returned. This array preserves the keys of the
155
     * passed array.
156
     *
157
     * ```php
158
     * $filteredPaths = Glob::filter($paths, '/project/**.twig');
159
     * ```
160
     *
161
     * @param string[] $paths A list of paths.
162
     * @param string   $glob  The canonical glob. The glob should contain
163
     *                        forward slashes as directory separators only. It
164
     *                        must not contain any "." or ".." segments. Use the
165
     *                        "webmozart/path-util" utility to canonicalize
166
     *                        globs prior to calling this method.
167
     * @param int      $flags A bitwise combination of the flag constants in
168
     *                        this class.
169
     *
170
     * @return string[] The paths matching the glob indexed by their original
171
     *                  keys.
172
     */
173 5
    public static function filter(array $paths, $glob, $flags = 0)
174
    {
175 5
        if (!self::isDynamic($glob)) {
176 1
            if (false !== $key = array_search($glob, $paths)) {
177 1
                return array($key => $glob);
178
            }
179
180 1
            return array();
181
        }
182
183 4
        $staticPrefix = self::getStaticPrefix($glob, $flags);
184 3
        $regExp = self::toRegEx($glob, $flags);
185
186
        return array_filter($paths, function ($path) use ($staticPrefix, $regExp) {
187 3
            return 0 === strpos($path, $staticPrefix) && preg_match($regExp, $path);
188 3
        });
189
    }
190
191
    /**
192
     * Returns the base path of a glob.
193
     *
194
     * This method returns the most specific directory that contains all files
195
     * matched by the glob. If this directory does not exist on the file system,
196
     * it's not necessary to execute the glob algorithm.
197
     *
198
     * More specifically, the "base path" is the longest path trailed by a "/"
199
     * on the left of the first wildcard "*". If the glob does not contain
200
     * wildcards, the directory name of the glob is returned.
201
     *
202
     * ```php
203
     * Glob::getBasePath('/css/*.css');
204
     * // => /css
205
     *
206
     * Glob::getBasePath('/css/style.css');
207
     * // => /css
208
     *
209
     * Glob::getBasePath('/css/st*.css');
210
     * // => /css
211
     *
212
     * Glob::getBasePath('/*.css');
213
     * // => /
214
     * ```
215
     *
216
     * @param string $glob  The canonical glob. The glob should contain forward
217
     *                      slashes as directory separators only. It must not
218
     *                      contain any "." or ".." segments. Use the
219
     *                      "webmozart/path-util" utility to canonicalize globs
220
     *                      prior to calling this method.
221
     * @param int    $flags A bitwise combination of the flag constants in this
222
     *                      class.
223
     *
224
     * @return string The base path of the glob.
225
     */
226 47
    public static function getBasePath($glob, $flags = 0)
227
    {
228
        // Search the static prefix for the last "/"
229 47
        $staticPrefix = self::getStaticPrefix($glob, $flags);
230
231 45
        if (false !== ($pos = strrpos($staticPrefix, '/'))) {
232
            // Special case: Return "/" if the only slash is at the beginning
233
            // of the glob
234 45
            if (0 === $pos) {
235 2
                return '/';
236
            }
237
238
            // Special case: Include trailing slash of "scheme:///foo"
239 43
            if ($pos - 3 === strpos($glob, '://')) {
240 3
                return substr($staticPrefix, 0, $pos + 1);
241
            }
242
243 40
            return substr($staticPrefix, 0, $pos);
244
        }
245
246
        // Glob contains no slashes on the left of the wildcard
247
        // Return an empty string
248
        return '';
249
    }
250
251
    /**
252
     * Converts a glob to a regular expression.
253
     *
254
     * Use this method if you need to match many paths against a glob:
255
     *
256
     * ```php
257
     * $staticPrefix = Glob::getStaticPrefix('/project/**.twig');
258
     * $regEx = Glob::toRegEx('/project/**.twig');
259
     *
260
     * if (0 !== strpos($path, $staticPrefix)) {
261
     *     // no match
262
     * }
263
     *
264
     * if (!preg_match($regEx, $path)) {
265
     *     // no match
266
     * }
267
     * ```
268
     *
269
     * You should always test whether a path contains the static prefix of the
270
     * glob returned by {@link getStaticPrefix()} to reduce the number of calls
271
     * to the expensive {@link preg_match()}.
272
     *
273
     * @param string $glob  The canonical glob. The glob should contain forward
274
     *                      slashes as directory separators only. It must not
275
     *                      contain any "." or ".." segments. Use the
276
     *                      "webmozart/path-util" utility to canonicalize globs
277
     *                      prior to calling this method.
278
     * @param int    $flags A bitwise combination of the flag constants in this
279
     *                      class.
280
     *
281
     * @return string The regular expression for matching the glob.
282
     */
283 70
    public static function toRegEx($glob, $flags = 0)
284
    {
285 70 View Code Duplication
        if (!Path::isAbsolute($glob) && false === strpos($glob, '://')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
286 1
            throw new InvalidArgumentException(sprintf(
287 1
                'The glob "%s" is not absolute and not a URI.',
288
                $glob
289 1
            ));
290
        }
291
292
        // From the PHP manual: To specify a literal single quote, escape it
293
        // with a backslash (\). To specify a literal backslash, double it (\\).
294
        // All other instances of backslash will be treated as a literal backslash.
295
296
        // This method does the following replacements:
297
298
        // Normal wildcards:    "*"       => "[^/]*"   (regex match any except separator)
299
        // Double wildcards:    "**"      => ".*"      (regex match any)
300
        // Sets:                "{ab,cd}" => "(ab|cd)" (regex group)
0 ignored issues
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
301
302
        // with flag Glob::ESCAPE:
303
        // Escaped wildcards:   "\*" => "\*"    (regex star)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
304
        // Escaped backslashes: "\\" => "\\"    (regex backslash)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
305
306
        // Other characters are escaped as usual for regular expressions.
307
308
        // Quote regex characters
309 69
        $quoted = preg_quote($glob, '~');
310
311 69
        if ($flags & self::ESCAPE) {
312 38
            $regEx = self::toRegExEscaped($quoted);
313 38
        } else {
314 31
            $regEx = self::toRegExNonEscaped($quoted);
315
        }
316
317 69
        return '~^'.$regEx.'$~';
318
    }
319
320
    /**
321
     * Returns the static prefix of a glob.
322
     *
323
     * The "static prefix" is the part of the glob up to the first wildcard "*".
324
     * If the glob does not contain wildcards, the full glob is returned.
325
     *
326
     * @param string $glob  The canonical glob. The glob should contain forward
327
     *                      slashes as directory separators only. It must not
328
     *                      contain any "." or ".." segments. Use the
329
     *                      "webmozart/path-util" utility to canonicalize globs
330
     *                      prior to calling this method.
331
     * @param int    $flags A bitwise combination of the flag constants in this
332
     *                      class.
333
     *
334
     * @return string The static prefix of the glob.
335
     */
336 96
    public static function getStaticPrefix($glob, $flags = 0)
337
    {
338 96 View Code Duplication
        if (!Path::isAbsolute($glob) && false === strpos($glob, '://')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
339 5
            throw new InvalidArgumentException(sprintf(
340 5
                'The glob "%s" is not absolute and not a URI.',
341
                $glob
342 5
            ));
343
        }
344
345 91
        $prefix = $glob;
346
347 91
        if ($flags & self::ESCAPE) {
348
            // Read backslashes together with the next (the escaped) character
349
            // up to the first non-escaped star/brace
350 30
            if (preg_match('~^('.Symbol::BACKSLASH.'.|[^'.Symbol::BACKSLASH.Symbol::STAR.Symbol::L_BRACE.Symbol::QUESTION_MARK.Symbol::L_BRACKET.'])*~', $glob, $matches)) {
351 30
                $prefix = $matches[0];
352 30
            }
353
354
            // Replace escaped characters by their unescaped equivalents
355 30
            $prefix = str_replace(
356 30
                array('\\\\', '\\*', '\\{', '\\}', '\\?', '\\[', '\\]', '\\^'),
357 30
                array('\\', '*', '{', '}', '?', '[', ']', '^'),
358
                $prefix
359 30
            );
360 30
        } else {
361 61
            $pos1 = strpos($glob, '*');
362 61
            $pos2 = strpos($glob, '{');
363 61
            $pos3 = strpos($glob, '?');
364 61
            $pos4 = strpos($glob, '[');
365
366
            $positions = array_filter(array($pos1, $pos2, $pos3, $pos4), function ($v) {
367 61
                return false !== $v;
368 61
            });
369
370 61
            if (!empty($positions)) {
371 56
                $prefix = substr($glob, 0, min($positions));
372 56
            }
373
        }
374
375 91
        return $prefix;
376
    }
377
378
    /**
379
     * Returns whether the glob contains a dynamic part.
380
     *
381
     * The glob contains a dynamic part if it contains an unescaped "*" or
382
     * "{" character.
383
     *
384
     * @param string $glob The glob to test.
385
     *
386
     * @return bool Returns `true` if the glob contains a dynamic part and
387
     *              `false` otherwise.
388
     */
389 34
    public static function isDynamic($glob)
390
    {
391 34
        return false !== strpos($glob, '*') || false !== strpos($glob, '{') || false !== strpos($glob, '?');
392
    }
393
394
    private function __construct()
395
    {
396
    }
397
398 31
    private static function toRegExNonEscaped($quoted)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
399
    {
400
        // Replace "{a,b,c}" by "(a|b|c)"
401 31
        if (false !== strpos($quoted, Symbol::L_BRACE)) {
402 2
            $quoted = preg_replace_callback(
403 2
                '~'.Symbol::E_L_BRACE.'([^'.Symbol::R_BRACE.']*)'.Symbol::E_R_BRACE.'~',
404
                function ($match) {
405 2
                    return '('.str_replace(',', '|', $match[1]).')';
406 2
                },
407
                $quoted
408 2
            );
409 2
        }
410
411
        // Replace "[abc]" by "[abc]" and "[^abc]" by "[^abc]"
412
        // We do this with a regex instead of simply replacing "[" etc. in order
413
        // to not generate broken regular expressions
414 31
        if (false !== strpos($quoted, Symbol::L_BRACKET)) {
415 2
            $quoted = preg_replace_callback(
416 2
                '~'.Symbol::E_L_BRACKET.'('.Symbol::E_CARET.')?'.'([^'.Symbol::R_BRACKET.']*)'.Symbol::E_R_BRACKET.'~',
417 View Code Duplication
                function ($match) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
418 2
                    return '['.($match[1] ? '^' : '').str_replace(Symbol::HYPHEN, '-', $match[2]).']';
419 2
                },
420
                $quoted
421 2
            );
422 2
        }
423
424 31
        return str_replace(
425
            // Replace "/**/" by "/(.+/)?"
426
            // Replace "*" by "[^/]*"
427
            // Replace "?" by "."
428 31
            array('/'.Symbol::STAR.Symbol::STAR.'/', Symbol::STAR, Symbol::QUESTION_MARK),
429 31
            array('/(.+/)?', '[^/]*', '.'),
430
            $quoted
431 31
        );
432
    }
433
434 38
    private static function toRegExEscaped($quoted)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
435
    {
436 38
        $noEscaping = '(?<!'.Symbol::E_BACKSLASH.')(('.Symbol::E_BACKSLASH.Symbol::E_BACKSLASH.')*)';
437
438
        // Replace "{a,b,c}" by "(a|b|c)", as long as preceded by an even number
439
        // of backslashes
440 38
        if (false !== strpos($quoted, Symbol::L_BRACE)) {
441 6
            $quoted = preg_replace_callback(
442 6
                '~'.$noEscaping.Symbol::E_L_BRACE.'(.*?)'.$noEscaping.Symbol::E_R_BRACE.'~',
443 View Code Duplication
                function ($match) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
444 4
                    return $match[1].'('.str_replace(',', '|', $match[3]).$match[4].')';
445 6
                },
446
                $quoted
447 6
            );
448 6
        }
449
450
        // Replace "[abc]" by "[abc]", as long as preceded by an even number
451
        // of backslashes
452 38
        if (false !== strpos($quoted, Symbol::L_BRACKET)) {
453 10
            $quoted = preg_replace_callback(
454 10
                '~'.$noEscaping.Symbol::E_L_BRACKET.'('.Symbol::E_CARET.')?(.*?)'.$noEscaping.Symbol::E_R_BRACKET.'~',
455 10
                function ($match) use ($noEscaping) {
456 8
                    $content = preg_replace('~'.$noEscaping.Symbol::E_HYPHEN.'~', '$1-', $match[4]);
457
458 8
                    return $match[1].'['.($match[3] ? '^' : '').$content.$match[5].']';
459 10
                },
460
                $quoted
461 10
            );
462 10
        }
463
464
        // Replace "/**/" by "/(.+/)?"
465 38
        $quoted = str_replace('/'.Symbol::STAR.Symbol::STAR.'/', '/(.+/)?', $quoted);
466
467
        // Replace "*" by "[^/]*", as long as preceded by an even number of backslashes
468 38 View Code Duplication
        if (false !== strpos($quoted, Symbol::STAR)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
469 11
            $quoted = preg_replace(
470 11
                '~'.$noEscaping.Symbol::E_STAR.'~',
471 11
                '$1[^/]*',
472
                $quoted
473 11
            );
474 11
        }
475
476
        // Replace "?" by ".", as long as preceded by an even number of backslashes
477 38 View Code Duplication
        if (false !== strpos($quoted, Symbol::QUESTION_MARK)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
478 3
            $quoted = preg_replace(
479 3
                '~'.$noEscaping.Symbol::E_QUESTION_MARK.'~',
480 3
                '$1.',
481
                $quoted
482 3
            );
483 3
        }
484
485 38
        return str_replace(
486
            // Replace "\*" by "*"
487
            // Replace "\{" by "{"
488
            // Replace "\}" by "}"
489
            // Replace "\?" by "?"
490
            // Replace "\[" by "["
491
            // Replace "\]" by "]"
492
            // Replace "\^" by "^"
493
            // Replace "\-" by "-"
494
            // Replace "\\\\" by "\\"
495
            // (escaped backslashes were escaped again by preg_quote())
496
            array(
497 38
                Symbol::E_STAR,
498 38
                Symbol::E_L_BRACE,
499 38
                Symbol::E_R_BRACE,
500 38
                Symbol::E_QUESTION_MARK,
501 38
                Symbol::E_L_BRACKET,
502 38
                Symbol::E_R_BRACKET,
503 38
                Symbol::E_CARET,
504 38
                Symbol::E_HYPHEN,
505 38
                Symbol::E_BACKSLASH,
506 38
            ),
507
            array(
508 38
                Symbol::STAR,
509 38
                Symbol::L_BRACE,
510 38
                Symbol::R_BRACE,
511 38
                Symbol::QUESTION_MARK,
512 38
                Symbol::L_BRACKET,
513 38
                Symbol::R_BRACKET,
514 38
                Symbol::CARET,
515 38
                Symbol::HYPHEN,
516 38
                Symbol::BACKSLASH,
517 38
            ),
518
            $quoted
519 38
        );
520
    }
521
}
522