Completed
Pull Request — master (#318)
by
unknown
01:13
created

Minify::extractStrings()   A

Complexity

Conditions 2
Paths 1

Size

Total Lines 37

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 37
rs 9.328
c 0
b 0
f 0
cc 2
nc 1
nop 2
1
<?php
2
/**
3
 * Abstract minifier class
4
 *
5
 * Please report bugs on https://github.com/matthiasmullie/minify/issues
6
 *
7
 * @author Matthias Mullie <[email protected]>
8
 * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
9
 * @license MIT License
10
 */
11
namespace MatthiasMullie\Minify;
12
13
use MatthiasMullie\Minify\Exceptions\IOException;
14
use Psr\Cache\CacheItemInterface;
15
16
/**
17
 * Abstract minifier class.
18
 *
19
 * Please report bugs on https://github.com/matthiasmullie/minify/issues
20
 *
21
 * @package Minify
22
 * @author Matthias Mullie <[email protected]>
23
 * @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
24
 * @license MIT License
25
 */
26
abstract class Minify
27
{
28
    /**
29
     * The data to be minified.
30
     *
31
     * @var string[]
32
     */
33
    protected $data = array();
34
35
    /**
36
     * Array of patterns to match.
37
     *
38
     * @var string[]
39
     */
40
    protected $patterns = array();
41
42
    /**
43
     * This array will hold content of strings and regular expressions that have
44
     * been extracted from the JS source code, so we can reliably match "code",
45
     * without having to worry about potential "code-like" characters inside.
46
     *
47
     * @var string[]
48
     */
49
    public $extracted = array();
50
51
    /**
52
     * Init the minify class - optionally, code may be passed along already.
53
     */
54
    public function __construct(/* $data = null, ... */)
55
    {
56
        // it's possible to add the source through the constructor as well ;)
57
        if (func_num_args()) {
58
            call_user_func_array(array($this, 'add'), func_get_args());
59
        }
60
    }
61
62
    /**
63
     * Add a file or straight-up code to be minified.
64
     *
65
     * @param string|string[] $data
66
     *
67
     * @return static
68
     */
69
    public function add($data /* $data = null, ... */)
70
    {
71
        // bogus "usage" of parameter $data: scrutinizer warns this variable is
72
        // not used (we're using func_get_args instead to support overloading),
73
        // but it still needs to be defined because it makes no sense to have
74
        // this function without argument :)
75
        $args = array($data) + func_get_args();
76
77
        // this method can be overloaded
78
        foreach ($args as $data) {
79
            if (is_array($data)) {
80
                call_user_func_array(array($this, 'add'), $data);
81
                continue;
82
            }
83
84
            // redefine var
85
            $data = (string) $data;
86
87
            // load data
88
            $value = $this->load($data);
89
            $key = ($data != $value) ? $data : count($this->data);
90
91
            // replace CR linefeeds etc.
92
            // @see https://github.com/matthiasmullie/minify/pull/139
93
            $value = str_replace(array("\r\n", "\r"), "\n", $value);
94
95
            // store data
96
            $this->data[$key] = $value;
97
        }
98
99
        return $this;
100
    }
101
102
    /**
103
     * Add a file to be minified.
104
     *
105
     * @param string|string[] $data
106
     *
107
     * @return static
108
     *
109
     * @throws IOException
110
     */
111
    public function addFile($data /* $data = null, ... */)
112
    {
113
        // bogus "usage" of parameter $data: scrutinizer warns this variable is
114
        // not used (we're using func_get_args instead to support overloading),
115
        // but it still needs to be defined because it makes no sense to have
116
        // this function without argument :)
117
        $args = array($data) + func_get_args();
118
119
        // this method can be overloaded
120
        foreach ($args as $path) {
121
            if (is_array($path)) {
122
                call_user_func_array(array($this, 'addFile'), $path);
123
                continue;
124
            }
125
126
            // redefine var
127
            $path = (string) $path;
128
129
            // check if we can read the file
130
            if (!$this->canImportFile($path)) {
131
                throw new IOException('The file "'.$path.'" could not be opened for reading. Check if PHP has enough permissions.');
132
            }
133
134
            $this->add($path);
135
        }
136
137
        return $this;
138
    }
139
140
    /**
141
     * Minify the data & (optionally) saves it to a file.
142
     *
143
     * @param string[optional] $path Path to write the data to
144
     *
145
     * @return string The minified data
146
     */
147
    public function minify($path = null)
148
    {
149
        $content = $this->execute($path);
150
151
        // save to path
152
        if ($path !== null) {
153
            $this->save($content, $path);
154
        }
155
156
        return $content;
157
    }
158
159
    /**
160
     * Minify & gzip the data & (optionally) saves it to a file.
161
     *
162
     * @param string[optional] $path  Path to write the data to
163
     * @param int[optional]    $level Compression level, from 0 to 9
164
     *
165
     * @return string The minified & gzipped data
166
     */
167
    public function gzip($path = null, $level = 9)
168
    {
169
        $content = $this->execute($path);
170
        $content = gzencode($content, $level, FORCE_GZIP);
171
172
        // save to path
173
        if ($path !== null) {
174
            $this->save($content, $path);
175
        }
176
177
        return $content;
178
    }
179
180
    /**
181
     * Minify the data & write it to a CacheItemInterface object.
182
     *
183
     * @param CacheItemInterface $item Cache item to write the data to
184
     *
185
     * @return CacheItemInterface Cache item with the minifier data
186
     */
187
    public function cache(CacheItemInterface $item)
188
    {
189
        $content = $this->execute();
190
        $item->set($content);
191
192
        return $item;
193
    }
194
195
    /**
196
     * Minify the data.
197
     *
198
     * @param string[optional] $path Path to write the data to
199
     *
200
     * @return string The minified data
201
     */
202
    abstract public function execute($path = null);
203
204
    /**
205
     * Load data.
206
     *
207
     * @param string $data Either a path to a file or the content itself
208
     *
209
     * @return string
210
     */
211
    protected function load($data)
212
    {
213
        // check if the data is a file
214
        if ($this->canImportFile($data)) {
215
            $data = file_get_contents($data);
216
217
            // strip BOM, if any
218
            if (substr($data, 0, 3) == "\xef\xbb\xbf") {
219
                $data = substr($data, 3);
220
            }
221
        }
222
223
        return $data;
224
    }
225
226
    /**
227
     * Save to file.
228
     *
229
     * @param string $content The minified data
230
     * @param string $path    The path to save the minified data to
231
     *
232
     * @throws IOException
233
     */
234
    protected function save($content, $path)
235
    {
236
        $handler = $this->openFileForWriting($path);
237
238
        $this->writeToFile($handler, $content);
239
240
        @fclose($handler);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
241
    }
242
243
    /**
244
     * Register a pattern to execute against the source content.
245
     *
246
     * @param string          $pattern     PCRE pattern
247
     * @param string|callable $replacement Replacement value for matched pattern
248
     */
249
    protected function registerPattern($pattern, $replacement = '')
250
    {
251
        // study the pattern, we'll execute it more than once
252
        $pattern .= 'S';
253
254
        $this->patterns[] = array($pattern, $replacement);
255
    }
256
257
    /**
258
     * Both JS and CSS use the same form of multi-line comment, so putting the common code here.
259
     */
260
    protected function stripMultilineComments()
261
    {
262
        // First extract comments we want to keep, so they can be restored later
263
        // PHP only supports $this inside anonymous functions since 5.4
264
        $minifier = $this;
265 View Code Duplication
        $callback = function ($match) use ($minifier) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
266
            $count = count($minifier->extracted);
267
            $placeholder = '/*'.$count.'*/';
268
            $minifier->extracted[$placeholder] = $match[0];
269
270
            return $placeholder;
271
        };
272
        $this->registerPattern('/
273
            # optional newline
274
            \n?
275
276
            # start comment
277
            \/\*
278
279
            # comment content
280
            (?:
281
                # either starts with an !
282
                !
283
            |
284
                # or, after some number of characters which do not end the comment
285
                (?:(?!\*\/).)*?
286
287
                # there is either a @license or @preserve tag
288
                @(?:license|preserve)
289
            )
290
291
            # then match to the end of the comment
292
            .*?\*\/\n?
293
294
            /ixs', $callback);
295
296
        // Then strip all other comments
297
        $this->registerPattern('/\/\*.*?\*\//s', '');
298
    }
299
300
    /**
301
     * We can't "just" run some regular expressions against JavaScript: it's a
302
     * complex language. E.g. having an occurrence of // xyz would be a comment,
303
     * unless it's used within a string. Of you could have something that looks
304
     * like a 'string', but inside a comment.
305
     * The only way to accurately replace these pieces is to traverse the JS one
306
     * character at a time and try to find whatever starts first.
307
     *
308
     * @param string $content The content to replace patterns in
309
     *
310
     * @return string The (manipulated) content
311
     */
312
    protected function replace($content)
313
    {
314
        $processed = '';
315
        $positions = array_fill(0, count($this->patterns), -1);
316
        $matches = array();
317
318
        while ($content) {
319
            // find first match for all patterns
320
            foreach ($this->patterns as $i => $pattern) {
321
                list($pattern, $replacement) = $pattern;
322
323
                // we can safely ignore patterns for positions we've unset earlier,
324
                // because we know these won't show up anymore
325
                if (array_key_exists($i, $positions) == false) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
326
                    continue;
327
                }
328
329
                // no need to re-run matches that are still in the part of the
330
                // content that hasn't been processed
331
                if ($positions[$i] >= 0) {
332
                    continue;
333
                }
334
335
                $match = null;
336
                if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
337
                    $matches[$i] = $match;
338
339
                    // we'll store the match position as well; that way, we
340
                    // don't have to redo all preg_matches after changing only
341
                    // the first (we'll still know where those others are)
342
                    $positions[$i] = $match[0][1];
343
                } else {
344
                    // if the pattern couldn't be matched, there's no point in
345
                    // executing it again in later runs on this same content;
346
                    // ignore this one until we reach end of content
347
                    unset($matches[$i], $positions[$i]);
348
                }
349
            }
350
351
            // no more matches to find: everything's been processed, break out
352
            if (!$matches) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $matches of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
353
                $processed .= $content;
354
                break;
355
            }
356
357
            // see which of the patterns actually found the first thing (we'll
358
            // only want to execute that one, since we're unsure if what the
359
            // other found was not inside what the first found)
360
            $discardLength = min($positions);
361
            $firstPattern = array_search($discardLength, $positions);
362
            $match = $matches[$firstPattern][0][0];
363
364
            // execute the pattern that matches earliest in the content string
365
            list($pattern, $replacement) = $this->patterns[$firstPattern];
366
            $replacement = $this->replacePattern($pattern, $replacement, $content);
367
368
            // figure out which part of the string was unmatched; that's the
369
            // part we'll execute the patterns on again next
370
            $content = (string) substr($content, $discardLength);
371
            $unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
372
373
            // move the replaced part to $processed and prepare $content to
374
            // again match batch of patterns against
375
            $processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
376
            $content = $unmatched;
377
378
            // first match has been replaced & that content is to be left alone,
379
            // the next matches will start after this replacement, so we should
380
            // fix their offsets
381
            foreach ($positions as $i => $position) {
382
                $positions[$i] -= $discardLength + strlen($match);
383
            }
384
        }
385
386
        return $processed;
387
    }
388
389
    /**
390
     * This is where a pattern is matched against $content and the matches
391
     * are replaced by their respective value.
392
     * This function will be called plenty of times, where $content will always
393
     * move up 1 character.
394
     *
395
     * @param string          $pattern     Pattern to match
396
     * @param string|callable $replacement Replacement value
397
     * @param string          $content     Content to match pattern against
398
     *
399
     * @return string
400
     */
401
    protected function replacePattern($pattern, $replacement, $content)
402
    {
403
        if (is_callable($replacement)) {
404
            return preg_replace_callback($pattern, $replacement, $content, 1, $count);
405
        } else {
406
            return preg_replace($pattern, $replacement, $content, 1, $count);
407
        }
408
    }
409
410
    /**
411
     * Strings are a pattern we need to match, in order to ignore potential
412
     * code-like content inside them, but we just want all of the string
413
     * content to remain untouched.
414
     *
415
     * This method will replace all string content with simple STRING#
416
     * placeholder text, so we've rid all strings from characters that may be
417
     * misinterpreted. Original string content will be saved in $this->extracted
418
     * and after doing all other minifying, we can restore the original content
419
     * via restoreStrings().
420
     *
421
     * @param string[optional] $chars
422
     * @param string[optional] $placeholderPrefix
423
     */
424
    protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
425
    {
426
        // PHP only supports $this inside anonymous functions since 5.4
427
        $minifier = $this;
428
        $callback = function ($match) use ($minifier, $placeholderPrefix) {
429
            // check the second index here, because the first always contains a quote
430
            if ($match[2] === '') {
431
                /*
432
                 * Empty strings need no placeholder; they can't be confused for
433
                 * anything else anyway.
434
                 * But we still needed to match them, for the extraction routine
435
                 * to skip over this particular string.
436
                 */
437
                return $match[0];
438
            }
439
440
            $count = count($minifier->extracted);
441
            $placeholder = $match[1].$placeholderPrefix.$count.$match[1];
442
            $minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];
443
444
            return $placeholder;
445
        };
446
447
        /*
448
         * The \\ messiness explained:
449
         * * Don't count ' or " as end-of-string if it's escaped (has backslash
450
         * in front of it)
451
         * * Unless... that backslash itself is escaped (another leading slash),
452
         * in which case it's no longer escaping the ' or "
453
         * * So there can be either no backslash, or an even number
454
         * * multiply all of that times 4, to account for the escaping that has
455
         * to be done to pass the backslash into the PHP string without it being
456
         * considered as escape-char (times 2) and to get it in the regex,
457
         * escaped (times 2)
458
         */
459
        $this->registerPattern('/(['.$chars.'])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback);
460
    }
461
462
    /**
463
     * This method will restore all extracted data (strings, regexes) that were
464
     * replaced with placeholder text in extract*(). The original content was
465
     * saved in $this->extracted.
466
     *
467
     * @param string $content
468
     *
469
     * @return string
470
     */
471
    protected function restoreExtractedData($content)
472
    {
473
        if (!$this->extracted) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->extracted of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
474
            // nothing was extracted, nothing to restore
475
            return $content;
476
        }
477
478
        $content = strtr($content, $this->extracted);
479
480
        $this->extracted = array();
481
482
        return $content;
483
    }
484
485
    /**
486
     * Check if the path is a regular file and can be read.
487
     *
488
     * @param string $path
489
     *
490
     * @return bool
491
     */
492
    protected function canImportFile($path)
493
    {
494
        $parsed = parse_url($path);
495
        if (
496
            // file is elsewhere
497
            isset($parsed['host']) ||
498
            // file responds to queries (may change, or need to bypass cache)
499
            isset($parsed['query'])
500
        ) {
501
            return false;
502
        }
503
504
        return strlen($path) < PHP_MAXPATHLEN && @is_file($path) && is_readable($path);
505
    }
506
507
    /**
508
     * Attempts to open file specified by $path for writing.
509
     *
510
     * @param string $path The path to the file
511
     *
512
     * @return resource Specifier for the target file
513
     *
514
     * @throws IOException
515
     */
516
    protected function openFileForWriting($path)
517
    {
518
        if (($handler = @fopen($path, 'w')) === false) {
519
            throw new IOException('The file "'.$path.'" could not be opened for writing. Check if PHP has enough permissions.');
520
        }
521
522
        return $handler;
523
    }
524
525
    /**
526
     * Attempts to write $content to the file specified by $handler. $path is used for printing exceptions.
527
     *
528
     * @param resource $handler The resource to write to
529
     * @param string   $content The content to write
530
     * @param string   $path    The path to the file (for exception printing only)
531
     *
532
     * @throws IOException
533
     */
534
    protected function writeToFile($handler, $content, $path = '')
535
    {
536
        if (($result = @fwrite($handler, $content)) === false || ($result < strlen($content))) {
537
            throw new IOException('The file "'.$path.'" could not be written to. Check your disk space and file permissions.');
538
        }
539
    }
540
}
541