Completed
Push — master ( 5b581f...5da188 )
by frank
04:46
created

Minify_HTML::_needsCdata()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
nc 2
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * Class Minify_HTML
4
 * @package Minify
5
 */
6
7
/**
8
 * Compress HTML
9
 *
10
 * This is a heavy regex-based removal of whitespace, unnecessary comments and
11
 * tokens. IE conditional comments are preserved. There are also options to have
12
 * STYLE and SCRIPT blocks compressed by callback functions.
13
 *
14
 * A test suite is available.
15
 *
16
 * @package Minify
17
 * @author Stephen Clay <[email protected]>
18
 */
19
class Minify_HTML {
20
21
    /** @var string */
22
    private $_html;
23
24
    /**
25
     * "Minify" an HTML page
26
     *
27
     * @param string $html
28
     *
29
     * @param array $options
30
     *
31
     * 'cssMinifier' : (optional) callback function to process content of STYLE
32
     * elements.
33
     *
34
     * 'jsMinifier' : (optional) callback function to process content of SCRIPT
35
     * elements. Note: the type attribute is ignored.
36
     *
37
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If
38
     * unset, minify will sniff for an XHTML doctype.
39
     *
40
     * 'keepComments' : (optional boolean) should the HTML comments be kept
41
     * in the HTML Code?
42
     *
43
     * @return string
44
     */
45
    public static function minify($html, $options = array()) {
46
        $min = new Minify_HTML($html, $options);
47
        return $min->process();
48
    }
49
50
51
    /**
52
     * Create a minifier object
53
     *
54
     * @param string $html
55
     *
56
     * @param array $options
57
     *
58
     * 'cssMinifier' : (optional) callback function to process content of STYLE
59
     * elements.
60
     *
61
     * 'jsMinifier' : (optional) callback function to process content of SCRIPT
62
     * elements. Note: the type attribute is ignored.
63
     *
64
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If
65
     * unset, minify will sniff for an XHTML doctype.
66
     *
67
     * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If
68
     * unset, minify will sniff for an XHTML doctype.
69
     *
70
     * @return null
0 ignored issues
show
Comprehensibility Best Practice introduced by
Adding a @return annotation to constructors is generally not recommended as a constructor does not have a meaningful return value.

Adding a @return annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.

Please refer to the PHP core documentation on constructors.

Loading history...
71
     */
72
    public function __construct($html, $options = array())
73
    {
74
        $this->_html = str_replace("\r\n", "\n", trim($html));
75
        if (isset($options['xhtml'])) {
76
            $this->_isXhtml = (bool)$options['xhtml'];
77
        }
78
        if (isset($options['cssMinifier'])) {
79
            $this->_cssMinifier = $options['cssMinifier'];
80
        }
81
        if (isset($options['jsMinifier'])) {
82
            $this->_jsMinifier = $options['jsMinifier'];
83
        }
84
        if (isset($options['keepComments'])) {
85
            $this->_keepComments = $options['keepComments'];
86
        }
87
    }
88
89
90
    /**
91
     * Minify the markeup given in the constructor
92
     *
93
     * @return string
94
     */
95
    public function process()
96
    {
97
        if ($this->_isXhtml === null) {
98
            $this->_isXhtml = (false !== strpos($this->_html, '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML'));
99
        }
100
101
        $this->_replacementHash = 'MINIFYHTML' . md5($_SERVER['REQUEST_TIME']);
102
        $this->_placeholders = array();
103
104
        // replace SCRIPTs (and minify) with placeholders
105
        $this->_html = preg_replace_callback(
106
            '/(\\s*)(<script\\b[^>]*?>)([\\s\\S]*?)<\\/script>(\\s*)/i'
107
            ,array($this, '_removeScriptCB')
108
            ,$this->_html);
109
110
        // replace STYLEs (and minify) with placeholders
111
        $this->_html = preg_replace_callback(
112
            '/\\s*(<style\\b[^>]*?>)([\\s\\S]*?)<\\/style>\\s*/i'
113
            ,array($this, '_removeStyleCB')
114
            ,$this->_html);
115
116
        // remove HTML comments (not containing IE conditional comments).
117
        if  ($this->_keepComments == false) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
118
            $this->_html = preg_replace_callback(
119
                '/<!--([\\s\\S]*?)-->/'
120
                ,array($this, '_commentCB')
121
                ,$this->_html);
122
        }
123
124
        // replace PREs with placeholders
125
        $this->_html = preg_replace_callback('/\\s*(<pre\\b[^>]*?>[\\s\\S]*?<\\/pre>)\\s*/i'
126
            ,array($this, '_removePreCB')
127
            ,$this->_html);
128
129
        // replace TEXTAREAs with placeholders
130
        $this->_html = preg_replace_callback(
131
            '/\\s*(<textarea\\b[^>]*?>[\\s\\S]*?<\\/textarea>)\\s*/i'
132
            ,array($this, '_removeTextareaCB')
133
            ,$this->_html);
134
135
        // replace data: URIs with placeholders
136
        $this->_html = preg_replace_callback(
137
            '/(=("|\')data:.*\\2)/Ui'
138
            ,array($this, '_removeDataURICB')
139
            ,$this->_html);
140
141
        // trim each line.
142
        // replace by space instead of '' to avoid newline after opening tag getting zapped
143
        $this->_html = preg_replace('/^\s+|\s+$/m', ' ', $this->_html);
144
145
        // remove ws around block/undisplayed elements
146
        $this->_html = preg_replace('/\\s+(<\\/?(?:area|article|aside|base(?:font)?|blockquote|body'
147
            .'|canvas|caption|center|col(?:group)?|dd|dir|div|dl|dt|fieldset|figcaption|figure|footer|form'
148
            .'|frame(?:set)?|h[1-6]|head|header|hgroup|hr|html|legend|li|link|main|map|menu|meta|nav'
149
            .'|ol|opt(?:group|ion)|output|p|param|section|t(?:able|body|head|d|h||r|foot|itle)'
150
            .'|ul|video)\\b[^>]*>)/i', '$1', $this->_html);
151
152
        // remove ws outside of all elements
153
        $this->_html = preg_replace_callback(
154
            '/>([^<]+)</'
155
            ,array($this, '_outsideTagCB')
156
            ,$this->_html);
157
158
        // use newlines before 1st attribute in open tags (to limit line lengths)
159
        //$this->_html = preg_replace('/(<[a-z\\-]+)\\s+([^>]+>)/i', "$1\n$2", $this->_html);
160
161
        // fill placeholders
162
        $this->_html = str_replace(
163
            array_keys($this->_placeholders)
164
            ,array_values($this->_placeholders)
165
            ,$this->_html
166
        );
167
        return $this->_html;
168
    }
169
170
    protected function _commentCB($m)
171
    {
172
        return (0 === strpos($m[1], '[') || false !== strpos($m[1], '<!['))
173
            ? $m[0]
174
            : '';
175
    }
176
177
    protected function _reservePlace($content)
178
    {
179
        $placeholder = '%' . $this->_replacementHash . count($this->_placeholders) . '%';
180
        $this->_placeholders[$placeholder] = $content;
181
        return $placeholder;
182
    }
183
184
    protected $_isXhtml = null;
185
    protected $_replacementHash = null;
186
    protected $_placeholders = array();
187
    protected $_cssMinifier = null;
188
    protected $_jsMinifier = null;
189
    protected $_keepComments = false;
190
191
    protected function _outsideTagCB($m)
192
    {
193
        return '>' . preg_replace('/^\\s+|\\s+$/', ' ', $m[1]) . '<';
194
    }
195
196
    protected function _removePreCB($m)
197
    {
198
        return $this->_reservePlace($m[1]);
199
    }
200
201
    protected function _removeTextareaCB($m)
202
    {
203
        return $this->_reservePlace($m[1]);
204
    }
205
206
    protected function _removeDataURICB($m)
207
    {
208
        return $this->_reservePlace($m[1]);
209
    }
210
211
    protected function _removeStyleCB($m)
212
    {
213
        $openStyle = $m[1];
214
        $css = $m[2];
215
        // remove HTML comments
216
        $css = preg_replace('/(?:^\\s*<!--|-->\\s*$)/', '', $css);
217
218
        // remove CDATA section markers
219
        $css = $this->_removeCdata($css);
220
221
        // minify
222
        $minifier = $this->_cssMinifier
223
            ? $this->_cssMinifier
224
            : 'trim';
225
        $css = call_user_func($minifier, $css);
226
227
        return $this->_reservePlace($this->_needsCdata($css)
228
            ? "{$openStyle}/*<![CDATA[*/{$css}/*]]>*/</style>"
229
            : "{$openStyle}{$css}</style>"
230
        );
231
    }
232
233
    protected function _removeScriptCB($m)
234
    {
235
        $openScript = $m[2];
236
        $js = $m[3];
237
238
        // whitespace surrounding? preserve at least one space
239
        $ws1 = ($m[1] === '') ? '' : ' ';
240
        $ws2 = ($m[4] === '') ? '' : ' ';
241
242
        if ($this->_keepComments == false) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
243
            // remove HTML comments (and ending "//" if present)
244
            $js = preg_replace('/(?:^\\s*<!--\\s*|\\s*(?:\\/\\/)?\\s*-->\\s*$)/', '', $js);
245
246
            // remove CDATA section markers
247
            $js = $this->_removeCdata($js);
248
        }
249
250
        // minify
251
        $minifier = $this->_jsMinifier
252
            ? $this->_jsMinifier
253
            : 'trim';
254
        $js = call_user_func($minifier, $js);
255
256
        return $this->_reservePlace($this->_needsCdata($js)
257
            ? "{$ws1}{$openScript}/*<![CDATA[*/{$js}/*]]>*/</script>{$ws2}"
258
            : "{$ws1}{$openScript}{$js}</script>{$ws2}"
259
        );
260
    }
261
262
    protected function _removeCdata($str)
263
    {
264
        return (false !== strpos($str, '<![CDATA['))
265
            ? str_replace(array('/* <![CDATA[ */','/* ]]> */','/*<![CDATA[*/','/*]]>*/','<![CDATA[', ']]>'), '', $str)
266
            : $str;
267
    }
268
269
    protected function _needsCdata($str)
270
    {
271
        return ($this->_isXhtml && preg_match('/(?:[<&]|\\-\\-|\\]\\]>)/', $str));
272
    }
273
}
274