Completed
Push — master ( bd32c3...0c99c0 )
by Bernhard
15:35
created

Glob::toRegExNonEscaped()   B

Complexity

Conditions 4
Paths 4

Size

Total Lines 35
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 4

Importance

Changes 5
Bugs 1 Features 3
Metric Value
c 5
b 1
f 3
dl 0
loc 35
ccs 19
cts 19
cp 1
rs 8.5806
cc 4
eloc 17
nc 4
nop 1
crap 4
1
<?php
2
3
/*
4
 * This file is part of the webmozart/glob package.
5
 *
6
 * (c) Bernhard Schussek <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace Webmozart\Glob;
13
14
use InvalidArgumentException;
15
use Webmozart\Glob\Iterator\GlobIterator;
16
use Webmozart\PathUtil\Path;
17
18
/**
19
 * Searches and matches file paths using Ant-like globs.
20
 *
21
 * This class implements an Ant-like version of PHP's `glob()` function. The
22
 * wildcard "*" matches any number of characters except directory separators.
23
 * The double wildcard "**" matches any number of characters, including
24
 * directory separators.
25
 *
26
 * Use {@link glob()} to glob the filesystem for paths:
27
 *
28
 * ```php
29
 * foreach (Glob::glob('/project/**.twig') as $path) {
30
 *     // do something...
31
 * }
32
 * ```
33
 *
34
 * Use {@link match()} to match a file path against a glob:
35
 *
36
 * ```php
37
 * if (Glob::match('/project/views/index.html.twig', '/project/**.twig')) {
38
 *     // path matches
39
 * }
40
 * ```
41
 *
42
 * You can also filter an array of paths for all paths that match your glob with
43
 * {@link filter()}:
44
 *
45
 * ```php
46
 * $filteredPaths = Glob::filter($paths, '/project/**.twig');
47
 * ```
48
 *
49
 * Internally, the methods described above convert the glob into a regular
50
 * expression that is then matched against the matched paths. If you need to
51
 * match many paths against the same glob, you should convert the glob manually
52
 * and use {@link preg_match()} to test the paths:
53
 *
54
 * ```php
55
 * $staticPrefix = Glob::getStaticPrefix('/project/**.twig');
56
 * $regEx = Glob::toRegEx('/project/**.twig');
57
 *
58
 * if (0 !== strpos($path, $staticPrefix)) {
59
 *     // no match
60
 * }
61
 *
62
 * if (!preg_match($regEx, $path)) {
63
 *     // no match
64
 * }
65
 * ```
66
 *
67
 * The method {@link getStaticPrefix()} returns the part of the glob up to the
68
 * first wildcard "*". You should always test whether a path has this prefix
69
 * before calling the much more expensive {@link preg_match()}.
70
 *
71
 * @since  1.0
72
 *
73
 * @author Bernhard Schussek <[email protected]>
74
 */
75
final class Glob
76
{
77
    /**
78
     * Flag: Enable escaping of special characters with leading backslashes.
79
     */
80
    const ESCAPE = 1;
81
82
    /**
83
     * Globs the file system paths matching the glob.
84
     *
85
     * The glob may contain the wildcard "*". This wildcard matches any number
86
     * of characters, *including* directory separators.
87
     *
88
     * ```php
89
     * foreach (Glob::glob('/project/**.twig') as $path) {
90
     *     // do something...
91
     * }
92
     * ```
93
     *
94
     * @param string $glob  The canonical glob. The glob should contain forward
95
     *                      slashes as directory separators only. It must not
96
     *                      contain any "." or ".." segments. Use the
97
     *                      "webmozart/path-util" utility to canonicalize globs
98
     *                      prior to calling this method.
99
     * @param int    $flags A bitwise combination of the flag constants in this
100
     *                      class.
101
     *
102
     * @return string[] The matching paths. The keys of the array are
103
     *                  incrementing integers.
104
     */
105 4
    public static function glob($glob, $flags = 0)
106
    {
107 4
        $results = iterator_to_array(new GlobIterator($glob, $flags));
108
109 3
        sort($results);
110
111 3
        return $results;
112
    }
113
114
    /**
115
     * Matches a path against a glob.
116
     *
117
     * ```php
118
     * if (Glob::match('/project/views/index.html.twig', '/project/**.twig')) {
119
     *     // path matches
120
     * }
121
     * ```
122
     *
123
     * @param string $path  The path to match.
124
     * @param string $glob  The canonical glob. The glob should contain forward
125
     *                      slashes as directory separators only. It must not
126
     *                      contain any "." or ".." segments. Use the
127
     *                      "webmozart/path-util" utility to canonicalize globs
128
     *                      prior to calling this method.
129
     * @param int    $flags A bitwise combination of the flag constants in
130
     *                      this class.
131
     *
132
     * @return bool Returns `true` if the path is matched by the glob.
133
     */
134 12
    public static function match($path, $glob, $flags = 0)
135
    {
136 12
        if (!self::isDynamic($glob)) {
137 1
            return $glob === $path;
138
        }
139
140 11
        if (0 !== strpos($path, self::getStaticPrefix($glob, $flags))) {
141 6
            return false;
142
        }
143
144 5
        if (!preg_match(self::toRegEx($glob, $flags), $path)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return (bool) preg_match...$glob, $flags), $path);.
Loading history...
145
            return false;
146
        }
147
148 5
        return true;
149
    }
150
151
    /**
152
     * Filters an array for paths matching a glob.
153
     *
154
     * The filtered array is returned. This array preserves the keys of the
155
     * passed array.
156
     *
157
     * ```php
158
     * $filteredPaths = Glob::filter($paths, '/project/**.twig');
159
     * ```
160
     *
161
     * @param string[] $paths A list of paths.
162
     * @param string   $glob  The canonical glob. The glob should contain
163
     *                        forward slashes as directory separators only. It
164
     *                        must not contain any "." or ".." segments. Use the
165
     *                        "webmozart/path-util" utility to canonicalize
166
     *                        globs prior to calling this method.
167
     * @param int      $flags A bitwise combination of the flag constants in
168
     *                        this class.
169
     *
170
     * @return string[] The paths matching the glob indexed by their original
171
     *                  keys.
172
     */
173 5
    public static function filter(array $paths, $glob, $flags = 0)
174
    {
175 5
        if (!self::isDynamic($glob)) {
176 1
            if (false !== $key = array_search($glob, $paths)) {
177 1
                return array($key => $glob);
178
            }
179
180 1
            return array();
181
        }
182
183 4
        $staticPrefix = self::getStaticPrefix($glob, $flags);
184 3
        $regExp = self::toRegEx($glob, $flags);
185
186
        return array_filter($paths, function ($path) use ($staticPrefix, $regExp) {
187 3
            return 0 === strpos($path, $staticPrefix) && preg_match($regExp, $path);
188 3
        });
189
    }
190
191
    /**
192
     * Returns the base path of a glob.
193
     *
194
     * This method returns the most specific directory that contains all files
195
     * matched by the glob. If this directory does not exist on the file system,
196
     * it's not necessary to execute the glob algorithm.
197
     *
198
     * More specifically, the "base path" is the longest path trailed by a "/"
199
     * on the left of the first wildcard "*". If the glob does not contain
200
     * wildcards, the directory name of the glob is returned.
201
     *
202
     * ```php
203
     * Glob::getBasePath('/css/*.css');
204
     * // => /css
205
     *
206
     * Glob::getBasePath('/css/style.css');
207
     * // => /css
208
     *
209
     * Glob::getBasePath('/css/st*.css');
210
     * // => /css
211
     *
212
     * Glob::getBasePath('/*.css');
213
     * // => /
214
     * ```
215
     *
216
     * @param string $glob  The canonical glob. The glob should contain forward
217
     *                      slashes as directory separators only. It must not
218
     *                      contain any "." or ".." segments. Use the
219
     *                      "webmozart/path-util" utility to canonicalize globs
220
     *                      prior to calling this method.
221
     * @param int    $flags A bitwise combination of the flag constants in this
222
     *                      class.
223
     *
224
     * @return string The base path of the glob.
225
     */
226 47
    public static function getBasePath($glob, $flags = 0)
227
    {
228
        // Search the static prefix for the last "/"
229 47
        $staticPrefix = self::getStaticPrefix($glob, $flags);
230
231 45
        if (false !== ($pos = strrpos($staticPrefix, '/'))) {
232
            // Special case: Return "/" if the only slash is at the beginning
233
            // of the glob
234 45
            if (0 === $pos) {
235 2
                return '/';
236
            }
237
238
            // Special case: Include trailing slash of "scheme:///foo"
239 43
            if ($pos - 3 === strpos($glob, '://')) {
240 3
                return substr($staticPrefix, 0, $pos + 1);
241
            }
242
243 40
            return substr($staticPrefix, 0, $pos);
244
        }
245
246
        // Glob contains no slashes on the left of the wildcard
247
        // Return an empty string
248
        return '';
249
    }
250
251
    /**
252
     * Converts a glob to a regular expression.
253
     *
254
     * Use this method if you need to match many paths against a glob:
255
     *
256
     * ```php
257
     * $staticPrefix = Glob::getStaticPrefix('/project/**.twig');
258
     * $regEx = Glob::toRegEx('/project/**.twig');
259
     *
260
     * if (0 !== strpos($path, $staticPrefix)) {
261
     *     // no match
262
     * }
263
     *
264
     * if (!preg_match($regEx, $path)) {
265
     *     // no match
266
     * }
267
     * ```
268
     *
269
     * You should always test whether a path contains the static prefix of the
270
     * glob returned by {@link getStaticPrefix()} to reduce the number of calls
271
     * to the expensive {@link preg_match()}.
272
     *
273
     * @param string $glob  The canonical glob. The glob should contain forward
274
     *                      slashes as directory separators only. It must not
275
     *                      contain any "." or ".." segments. Use the
276
     *                      "webmozart/path-util" utility to canonicalize globs
277
     *                      prior to calling this method.
278
     * @param int    $flags A bitwise combination of the flag constants in this
279
     *                      class.
280
     *
281
     * @return string The regular expression for matching the glob.
282
     */
283 77
    public static function toRegEx($glob, $flags = 0)
284
    {
285 77 View Code Duplication
        if (!Path::isAbsolute($glob) && false === strpos($glob, '://')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
286 1
            throw new InvalidArgumentException(sprintf(
287 1
                'The glob "%s" is not absolute and not a URI.',
288
                $glob
289 1
            ));
290
        }
291
292
        // From the PHP manual: To specify a literal single quote, escape it
293
        // with a backslash (\). To specify a literal backslash, double it (\\).
294
        // All other instances of backslash will be treated as a literal backslash.
295
296
        // This method does the following replacements:
297
298
        // Normal wildcards:    "*"       => "[^/]*"   (regex match any except separator)
299
        // Double wildcards:    "**"      => ".*"      (regex match any)
300
        // Sets:                "{ab,cd}" => "(ab|cd)" (regex group)
0 ignored issues
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
301
302
        // with flag Glob::ESCAPE:
303
        // Escaped wildcards:   "\*" => "\*"    (regex star)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
304
        // Escaped backslashes: "\\" => "\\"    (regex backslash)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
305
306
        // Other characters are escaped as usual for regular expressions.
307
308
        // Quote regex characters
309 76
        $quoted = preg_quote($glob, '~');
310
311 76
        if ($flags & self::ESCAPE) {
312 36
            $regEx = self::toRegExEscaped($quoted);
313 36
        } else {
314 41
            $regEx = self::toRegExNonEscaped($quoted);
315
        }
316
317 76
        return '~^'.$regEx.'$~';
318
    }
319
320
    /**
321
     * Returns the static prefix of a glob.
322
     *
323
     * The "static prefix" is the part of the glob up to the first wildcard "*".
324
     * If the glob does not contain wildcards, the full glob is returned.
325
     *
326
     * @param string $glob  The canonical glob. The glob should contain forward
327
     *                      slashes as directory separators only. It must not
328
     *                      contain any "." or ".." segments. Use the
329
     *                      "webmozart/path-util" utility to canonicalize globs
330
     *                      prior to calling this method.
331
     * @param int    $flags A bitwise combination of the flag constants in this
332
     *                      class.
333
     *
334
     * @return string The static prefix of the glob.
335
     */
336 96
    public static function getStaticPrefix($glob, $flags = 0)
337
    {
338 96 View Code Duplication
        if (!Path::isAbsolute($glob) && false === strpos($glob, '://')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
339 5
            throw new InvalidArgumentException(sprintf(
340 5
                'The glob "%s" is not absolute and not a URI.',
341
                $glob
342 5
            ));
343
        }
344
345 91
        $prefix = $glob;
346
347 91
        if ($flags & self::ESCAPE) {
348
            // Read backslashes together with the next (the escaped) character
349
            // up to the first non-escaped star/brace
350 32
            if (preg_match('~^('.Symbol::BACKSLASH.'.|[^'.Symbol::BACKSLASH.Symbol::STAR.Symbol::L_BRACE.Symbol::QUESTION_MARK.Symbol::L_BRACKET.'])*~', $glob, $matches)) {
351 32
                $prefix = $matches[0];
352 32
            }
353
354
            // Replace escaped characters by their unescaped equivalents
355 32
            $prefix = str_replace(
356 32
                array('\\\\', '\\*', '\\{', '\\}', '\\?', '\\[', '\\]', '\\^'),
357 32
                array('\\', '*', '{', '}', '?', '[', ']', '^'),
358
                $prefix
359 32
            );
360 32
        } else {
361 61
            $pos1 = strpos($glob, '*');
362 61
            $pos2 = strpos($glob, '{');
363 61
            $pos3 = strpos($glob, '?');
364 61
            $pos4 = strpos($glob, '[');
365
366
            $positions = array_filter(array($pos1, $pos2, $pos3, $pos4), function ($v) {
367 61
                return false !== $v;
368 61
            });
369
370 61
            if (!empty($positions)) {
371 56
                $prefix = substr($glob, 0, min($positions));
372 56
            }
373
        }
374
375 91
        return $prefix;
376
    }
377
378
    /**
379
     * Returns whether the glob contains a dynamic part.
380
     *
381
     * The glob contains a dynamic part if it contains an unescaped "*" or
382
     * "{" character.
383
     *
384
     * @param string $glob The glob to test.
385
     *
386
     * @return bool Returns `true` if the glob contains a dynamic part and
387
     *              `false` otherwise.
388
     */
389 34
    public static function isDynamic($glob)
390
    {
391 34
        return false !== strpos($glob, '*') || false !== strpos($glob, '{') || false !== strpos($glob, '?');
392
    }
393
394
    private function __construct()
395
    {
396
    }
397
398 41
    private static function toRegExNonEscaped($quoted)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
399
    {
400
        // Replace "{a,b,c}" by "(a|b|c)"
401 41
        if (false !== strpos($quoted, Symbol::L_BRACE)) {
402 3
            $quoted = preg_replace_callback(
403 3
                '~'.Symbol::E_L_BRACE.'([^'.Symbol::R_BRACE.']*)'.Symbol::E_R_BRACE.'~',
404
                function ($match) {
405 2
                    return '('.str_replace(',', '|', $match[1]).')';
406 3
                },
407
                $quoted
408 3
            );
409 3
        }
410
411
        // Replace "[abc]" by "[abc]" and "[^abc]" by "[^abc]"
412
        // We do this with a regex instead of simply replacing "[" etc. in order
413
        // to not generate broken regular expressions
414 41
        if (false !== strpos($quoted, Symbol::L_BRACKET)) {
415 3
            $quoted = preg_replace_callback(
416 3
                '~'.Symbol::E_L_BRACKET.'('.Symbol::E_CARET.')?'.'([^'.Symbol::E_R_BRACKET.']*)'.Symbol::E_R_BRACKET.'~',
417
                function ($match) {
418 2
                    return '['.($match[1] ? '^' : '').$match[2].']';
419 3
                },
420
                $quoted
421 3
            );
422 3
        }
423
424 41
        return str_replace(
425
            // Replace "/**/" by "/(.+/)?"
426
            // Replace "*" by "[^/]*"
427
            // Replace "?" by "."
428 41
            array('/'.Symbol::STAR.Symbol::STAR.'/', Symbol::STAR, Symbol::QUESTION_MARK),
429 41
            array('/(.+/)?', '[^/]*', '.'),
430
            $quoted
431 41
        );
432
    }
433
434 36
    private static function toRegExEscaped($quoted)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
435
    {
436 36
        $noEscaping = '(?<!'.Symbol::E_BACKSLASH.')(('.Symbol::E_BACKSLASH.Symbol::E_BACKSLASH.')*)';
437
438
        // Replace "{a,b,c}" by "(a|b|c)", as long as preceded by an even number
439
        // of backslashes
440 36
        if (false !== strpos($quoted, Symbol::L_BRACE)) {
441 7
            $quoted = preg_replace_callback(
442 7
                '~'.$noEscaping.Symbol::E_L_BRACE.'(.*?)'.$noEscaping.Symbol::E_R_BRACE.'~',
443 View Code Duplication
                function ($match) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
444 4
                    return $match[1].'('.str_replace(',', '|', $match[3]).$match[4].')';
445 7
                },
446
                $quoted
447 7
            );
448 7
        }
449
450
        // Replace "[abc]" by "[abc]", as long as preceded by an even number
451
        // of backslashes
452 36
        if (false !== strpos($quoted, Symbol::L_BRACKET)) {
453 9
            $quoted = preg_replace_callback(
454 9
                '~'.$noEscaping.Symbol::E_L_BRACKET.'('.Symbol::E_CARET.')?(.*?)'.$noEscaping.Symbol::E_R_BRACKET.'~',
455 9 View Code Duplication
                function ($match) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
456 6
                    return $match[1].'['.($match[3] ? '^' : '').$match[4].$match[5].']';
457 9
                },
458
                $quoted
459 9
            );
460 9
        }
461
462
        // Replace "/**/" by "/(.+/)?"
463 36
        $quoted = str_replace('/'.Symbol::STAR.Symbol::STAR.'/', '/(.+/)?', $quoted);
464
465
        // Replace "*" by "[^/]*", as long as preceded by an even number of backslashes
466 36 View Code Duplication
        if (false !== strpos($quoted, Symbol::STAR)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
467 13
            $quoted = preg_replace(
468 13
                '~'.$noEscaping.Symbol::E_STAR.'~',
469 13
                '$1[^/]*',
470
                $quoted
471 13
            );
472 13
        }
473
474
        // Replace "?" by ".", as long as preceded by an even number of backslashes
475 36 View Code Duplication
        if (false !== strpos($quoted, Symbol::QUESTION_MARK)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
476 4
            $quoted = preg_replace(
477 4
                '~'.$noEscaping.Symbol::E_QUESTION_MARK.'~',
478 4
                '$1.',
479
                $quoted
480 4
            );
481 4
        }
482
483 36
        return str_replace(
484
            // Replace "\*" by "*"
485
            // Replace "\{" by "{"
486
            // Replace "\}" by "}"
487
            // Replace "\?" by "?"
488
            // Replace "\\\\" by "\\"
489
            // (escaped backslashes were escaped again by preg_quote())
490
            array(
491 36
                Symbol::E_STAR,
492 36
                Symbol::E_L_BRACE,
493 36
                Symbol::E_R_BRACE,
494 36
                Symbol::E_QUESTION_MARK,
495 36
                Symbol::E_L_BRACKET,
496 36
                Symbol::E_R_BRACKET,
497 36
                Symbol::E_CARET,
498 36
                Symbol::E_BACKSLASH,
499 36
            ),
500
            array(
501 36
                Symbol::STAR,
502 36
                Symbol::L_BRACE,
503 36
                Symbol::R_BRACE,
504 36
                Symbol::QUESTION_MARK,
505 36
                Symbol::L_BRACKET,
506 36
                Symbol::R_BRACKET,
507 36
                Symbol::CARET,
508 36
                Symbol::BACKSLASH,
509 36
            ),
510
            $quoted
511 36
        );
512
    }
513
}
514