Completed
Push — master ( 8538b3...935035 )
by frank
02:00
created

AO_Minify_HTML::_removeDataURICB()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * Class Minify_HTML
4
 * @package Minify
5
 */
6
7
/**
8
 * Compress HTML
9
 *
10
 * This is a heavy regex-based removal of whitespace, unnecessary comments and
11
 * tokens. IE conditional comments are preserved. There are also options to have
12
 * STYLE and SCRIPT blocks compressed by callback functions.
13
 *
14
 * A test suite is available.
15
 *
16
 * @package Minify
17
 * @author Stephen Clay <[email protected]>
18
 */
19
class AO_Minify_HTML {
20
21
    /** @var string */
22
    private $_html;
23
24
    /**
25
     * "Minify" an HTML page
26
     *
27
     * @param string $html
28
     *
29
     * @param array $options
30
     *
31
     * 'cssMinifier' : (optional) callback function to process content of STYLE
32
     * elements.
33
     *
34
     * 'jsMinifier' : (optional) callback function to process content of SCRIPT
35
     * elements. Note: the type attribute is ignored.
36
     *
37
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If
38
     * unset, minify will sniff for an XHTML doctype.
39
     *
40
     * 'keepComments' : (optional boolean) should the HTML comments be kept
41
     * in the HTML Code?
42
     *
43
     * @return string
44
     */
45
    public static function minify($html, $options = array()) {
46
        $min = new AO_Minify_HTML($html, $options);
47
        return $min->process();
48
    }
49
50
51
    /**
52
     * Create a minifier object
53
     *
54
     * @param string $html
55
     *
56
     * @param array $options
57
     *
58
     * 'cssMinifier' : (optional) callback function to process content of STYLE
59
     * elements.
60
     *
61
     * 'jsMinifier' : (optional) callback function to process content of SCRIPT
62
     * elements. Note: the type attribute is ignored.
63
     *
64
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If
65
     * unset, minify will sniff for an XHTML doctype.
66
     *
67
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If
68
     * unset, minify will sniff for an XHTML doctype.
69
     *
70
     * @return null
0 ignored issues
show
Comprehensibility Best Practice introduced by
Adding a @return annotation to constructors is generally not recommended as a constructor does not have a meaningful return value.

Adding a @return annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.

Please refer to the PHP core documentation on constructors.

Loading history...
71
     */
72
    public function __construct($html, $options = array())
73
    {
74
        $this->_html = str_replace("\r\n", "\n", trim($html));
75
        if (isset($options['xhtml'])) {
76
            $this->_isXhtml = (bool)$options['xhtml'];
77
        }
78
        if (isset($options['cssMinifier'])) {
79
            $this->_cssMinifier = $options['cssMinifier'];
80
        }
81
        if (isset($options['jsMinifier'])) {
82
            $this->_jsMinifier = $options['jsMinifier'];
83
        }
84
        if (isset($options['keepComments'])) {
85
            $this->_keepComments = $options['keepComments'];
86
        }
87
    }
88
89
90
    /**
91
     * Minify the markeup given in the constructor
92
     *
93
     * @return string
94
     */
95
    public function process()
96
    {
97
        if ($this->_isXhtml === null) {
98
            $this->_isXhtml = (false !== strpos($this->_html, '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML'));
99
        }
100
101
        $this->_replacementHash = 'MINIFYHTML' . md5($_SERVER['REQUEST_TIME']);
102
        $this->_placeholders = array();
103
104
        // replace SCRIPTs (and minify) with placeholders
105
        $this->_html = preg_replace_callback(
106
            '/(\\s*)(<script\\b[^>]*?>)([\\s\\S]*?)<\\/script>(\\s*)/i'
107
            ,array($this, '_removeScriptCB')
108
            ,$this->_html);
109
110
        // replace STYLEs (and minify) with placeholders
111
        $this->_html = preg_replace_callback(
112
            '/\\s*(<style\\b[^>]*?>)([\\s\\S]*?)<\\/style>\\s*/i'
113
            ,array($this, '_removeStyleCB')
114
            ,$this->_html);
115
116
        // remove HTML comments (not containing IE conditional comments).
117
        if  ($this->_keepComments == false) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
118
            $this->_html = preg_replace_callback(
119
                '/<!--([\\s\\S]*?)-->/'
120
                ,array($this, '_commentCB')
121
                ,$this->_html);
122
        }
123
124
        // replace PREs with placeholders
125
        $this->_html = preg_replace_callback('/\\s*(<pre\\b[^>]*?>[\\s\\S]*?<\\/pre>)\\s*/i'
126
            ,array($this, '_removePreCB')
127
            ,$this->_html);
128
129
        // replace TEXTAREAs with placeholders
130
        $this->_html = preg_replace_callback(
131
            '/\\s*(<textarea\\b[^>]*?>[\\s\\S]*?<\\/textarea>)\\s*/i'
132
            ,array($this, '_removeTextareaCB')
133
            ,$this->_html);
134
135
        // replace data: URIs with placeholders
136
        $this->_html = preg_replace_callback(
137
            '/(=("|\')data:.*\\2)/Ui'
138
            ,array($this, '_removeDataURICB')
139
            ,$this->_html);
140
141
        // trim each line.
142
        // replace by space instead of '' to avoid newline after opening tag getting zapped
143
        $this->_html = preg_replace('/^\s+|\s+$/m', ' ', $this->_html);
144
145
        // remove ws around block/undisplayed elements
146
        $this->_html = preg_replace('/\\s+(<\\/?(?:area|article|aside|base(?:font)?|blockquote|body'
147
            .'|canvas|caption|center|col(?:group)?|dd|dir|div|dl|dt|fieldset|figcaption|figure|footer|form'
148
            .'|frame(?:set)?|h[1-6]|head|header|hgroup|hr|html|legend|li|link|main|map|menu|meta|nav'
149
            .'|ol|opt(?:group|ion)|output|p|param|section|t(?:able|body|head|d|h||r|foot|itle)'
150
            .'|ul|video)\\b[^>]*>)/i', '$1', $this->_html);
151
152
        // remove ws outside of all elements
153
        $this->_html = preg_replace_callback(
154
            '/>([^<]+)</'
155
            ,array($this, '_outsideTagCB')
156
            ,$this->_html);
157
158
        // use newlines before 1st attribute in open tags (to limit line lengths)
159
        //$this->_html = preg_replace('/(<[a-z\\-]+)\\s+([^>]+>)/i', "$1\n$2", $this->_html);
160
161
        // reverse order while preserving keys to ensure the last replacement is done first, etc ...
162
        $this->_placeholders = array_reverse( $this->_placeholders, true );
163
164
        // fill placeholders
165
        $this->_html = str_replace(
166
            array_keys($this->_placeholders)
167
            ,array_values($this->_placeholders)
168
            ,$this->_html
169
        );
170
        return $this->_html;
171
    }
172
173
    protected function _commentCB($m)
174
    {
175
        return (0 === strpos($m[1], '[') || false !== strpos($m[1], '<!['))
176
            ? $m[0]
177
            : '';
178
    }
179
180
    protected function _reservePlace($content)
181
    {
182
        $placeholder = '%' . $this->_replacementHash . count($this->_placeholders) . '%';
183
        $this->_placeholders[$placeholder] = $content;
184
        return $placeholder;
185
    }
186
187
    protected $_isXhtml = null;
188
    protected $_replacementHash = null;
189
    protected $_placeholders = array();
190
    protected $_cssMinifier = null;
191
    protected $_jsMinifier = null;
192
    protected $_keepComments = false;
193
194
    protected function _outsideTagCB($m)
195
    {
196
        return '>' . preg_replace('/^\\s+|\\s+$/', ' ', $m[1]) . '<';
197
    }
198
199
    protected function _removePreCB($m)
200
    {
201
        return $this->_reservePlace($m[1]);
202
    }
203
204
    protected function _removeTextareaCB($m)
205
    {
206
        return $this->_reservePlace($m[1]);
207
    }
208
209
    protected function _removeDataURICB($m)
210
    {
211
        return $this->_reservePlace($m[1]);
212
    }
213
214
    protected function _removeStyleCB($m)
215
    {
216
        $openStyle = $m[1];
217
        $css = $m[2];
218
        // remove HTML comments
219
        $css = preg_replace('/(?:^\\s*<!--|-->\\s*$)/', '', $css);
220
221
        // remove CDATA section markers
222
        $css = $this->_removeCdata($css);
223
224
        // minify
225
        $minifier = $this->_cssMinifier
226
            ? $this->_cssMinifier
227
            : 'trim';
228
        $css = call_user_func($minifier, $css);
229
230
        return $this->_reservePlace($this->_needsCdata($css)
231
            ? "{$openStyle}/*<![CDATA[*/{$css}/*]]>*/</style>"
232
            : "{$openStyle}{$css}</style>"
233
        );
234
    }
235
236
    protected function _removeScriptCB($m)
237
    {
238
        $openScript = $m[2];
239
        $js = $m[3];
240
241
        // whitespace surrounding? preserve at least one space
242
        $ws1 = ($m[1] === '') ? '' : ' ';
243
        $ws2 = ($m[4] === '') ? '' : ' ';
244
245
        if ($this->_keepComments == false) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
246
            // remove HTML comments (and ending "//" if present)
247
            $js = preg_replace('/(?:^\\s*<!--\\s*|\\s*(?:\\/\\/)?\\s*-->\\s*$)/', '', $js);
248
249
            // remove CDATA section markers
250
            $js = $this->_removeCdata($js);
251
        }
252
253
        // minify
254
        $minifier = $this->_jsMinifier
255
            ? $this->_jsMinifier
256
            : 'trim';
257
        $js = call_user_func($minifier, $js);
258
259
        return $this->_reservePlace($this->_needsCdata($js)
260
            ? "{$ws1}{$openScript}/*<![CDATA[*/{$js}/*]]>*/</script>{$ws2}"
261
            : "{$ws1}{$openScript}{$js}</script>{$ws2}"
262
        );
263
    }
264
265
    protected function _removeCdata($str)
266
    {
267
        return (false !== strpos($str, '<![CDATA['))
268
            ? str_replace(array('/* <![CDATA[ */','/* ]]> */','/*<![CDATA[*/','/*]]>*/','<![CDATA[', ']]>'), '', $str)
269
            : $str;
270
    }
271
272
    protected function _needsCdata($str)
273
    {
274
        return ($this->_isXhtml && preg_match('/(?:[<&]|\\-\\-|\\]\\]>)/', $str));
275
    }
276
}
277