Completed
Push — master ( 535d53...103422 )
by Nazar
04:40
created

Includes_processing::html()   A

Complexity

Conditions 2
Paths 1

Size

Total Lines 13
Code Lines 9

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
eloc 9
nc 1
nop 5
dl 0
loc 13
rs 9.4285
1
<?php
2
/**
3
 * @package   CleverStyle Framework
4
 * @author    Nazar Mokrynskyi <[email protected]>
5
 * @copyright Copyright (c) 2014-2016, Nazar Mokrynskyi
6
 * @license   MIT License, see license.txt
7
 */
8
namespace cs\Page;
9
10
/**
11
 * Class includes few methods used for processing CSS and HTML files before putting into cache.
12
 *
13
 * This is because CSS and HTML files may contain other includes of other CSS, JS files, images, fonts and so on with absolute and relative paths.
14
 * Methods of this class handles all this includes and put them into single resulting file compressed with gzip.
15
 * This allows to decrease number of HTTP requests on page and avoid breaking of relative paths for fonts, images and other includes
16
 * after putting them into cache directory.
17
 */
18
class Includes_processing {
19
	/**
20
	 * Do not inline files bigger than 4 KiB
21
	 */
22
	const MAX_EMBEDDING_SIZE = 4096;
23
	protected static $extension_to_mime = [
24
		'jpeg' => 'image/jpg',
25
		'jpe'  => 'image/jpg',
26
		'jpg'  => 'image/jpg',
27
		'gif'  => 'image/gif',
28
		'png'  => 'image/png',
29
		'svg'  => 'image/svg+xml',
30
		'svgz' => 'image/svg+xml',
31
		'woff' => 'application/font-woff',
32
		//'woff2' => 'application/font-woff2',
33
		'css'  => 'text/css'
34
	];
35
	/**
36
	 * Analyses file for images, fonts and css links and include they content into single resulting css file.
37
	 *
38
	 * Supports next file extensions for possible includes:
39
	 * jpeg, jpe, jpg, gif, png, ttf, ttc, svg, svgz, woff, eot, css
40
	 *
41
	 * @param string   $data                   Content of processed file
42
	 * @param string   $file                   Path to file, that includes specified in previous parameter content
43
	 * @param string[] $not_embedded_resources Some resources like images and fonts might not be embedded into resulting CSS because of their size
44
	 *
45
	 * @return string    $data
46
	 */
47
	static function css ($data, $file, &$not_embedded_resources = []) {
48
		$dir = dirname($file);
49
		/**
50
		 * Remove comments, tabs and new lines
51
		 */
52
		$data = preg_replace('#(/\*.*?\*/)|\t|\n|\r#s', ' ', $data);
53
		/**
54
		 * Remove unnecessary spaces
55
		 */
56
		$data = preg_replace('/\s*([,;>{}\(])\s*/', '$1', $data);
57
		$data = preg_replace('/\s+/', ' ', $data);
58
		/**
59
		 * Return spaces required in media queries
60
		 */
61
		$data = preg_replace('/\s(and|or)\(/', ' $1 (', $data);
62
		/**
63
		 * Duplicated semicolons
64
		 */
65
		$data = preg_replace('/;+/m', ';', $data);
66
		/**
67
		 * Minify repeated colors declarations
68
		 */
69
		$data = preg_replace('/#([0-9a-f])\1([0-9a-f])\2([0-9a-f])\3/i', '#$1$2$3', $data);
70
		/**
71
		 * Minify rgb colors declarations
72
		 */
73
		$data = preg_replace_callback(
74
			'/rgb\(([0-9,\.]+)\)/i',
75
			function ($rgb) {
76
				$rgb = explode(',', $rgb[1]);
77
				return
78
					'#'.
79
					str_pad(dechex($rgb[0]), 2, 0, STR_PAD_LEFT).
80
					str_pad(dechex($rgb[1]), 2, 0, STR_PAD_LEFT).
81
					str_pad(dechex($rgb[2]), 2, 0, STR_PAD_LEFT);
82
			},
83
			$data
84
		);
85
		/**
86
		 * Remove unnecessary zeros
87
		 */
88
		$data = preg_replace('/(\D)0\.(\d+)/i', '$1.$2', $data);
89
		/**
90
		 * Includes processing
91
		 */
92
		$data = preg_replace_callback(
93
			'/url\((.*?)\)|@import[\s\t\n\r]*[\'"](.*?)[\'"]/',
94
			function ($match) use ($dir, &$not_embedded_resources) {
95
				$link = trim($match[1], '\'" ');
96
				$link = explode('?', $link, 2)[0];
97
				if (!static::is_relative_path_and_exists($link, $dir)) {
98
					return $match[0];
99
				}
100
				$content   = file_get_contents("$dir/$link");
101
				$extension = file_extension($link);
102
				if (!isset(static::$extension_to_mime[$extension]) || filesize("$dir/$link") > static::MAX_EMBEDDING_SIZE) {
103
					$path_relatively_to_the_root = str_replace(getcwd(), '', realpath("$dir/$link"));
104
					$path_relatively_to_the_root .= '?'.substr(md5($content), 0, 5);
105
					$not_embedded_resources[] = $path_relatively_to_the_root;
106
					return str_replace($match[1], "'".str_replace("'", "\\'", $path_relatively_to_the_root)."'", $match[0]);
107
				}
108
				if ($extension == 'css') {
109
					/**
110
					 * For recursive includes processing, if CSS file includes others CSS files
111
					 */
112
					$content = static::css($content, $link, $not_embedded_resources);
113
				}
114
				$mime_type = static::$extension_to_mime[$extension];
115
				$content   = base64_encode($content);
116
				return str_replace($match[1], "data:$mime_type;charset=utf-8;base64,$content", $match[0]);
117
			},
118
			$data
119
		);
120
		return trim($data);
121
	}
122
	/**
123
	 * Simple and fast JS minification
124
	 *
125
	 * @param string $data
126
	 *
127
	 * @return string
128
	 */
129
	static function js ($data) {
130
		/**
131
		 * Split into array of lines
132
		 */
133
		$data = explode("\n", $data);
134
		/**
135
		 * Flag that is `true` when inside comment
136
		 */
137
		$comment = false;
138
		/**
139
		 * Set of symbols that are safe to be concatenated without new line with anything else
140
		 */
141
		$regexp = '[:;,.+\-*\/{}?><^\'"\[\]=&\(]';
142
		foreach ($data as $index => &$d) {
143
			$next_line = isset($data[$index + 1]) ? trim($data[$index + 1]) : '';
144
			/**
145
			 * Remove starting and trailing spaces
146
			 */
147
			$d = trim($d);
148
			/**
149
			 * Remove single-line comments
150
			 */
151
			if (mb_strpos($d, '//') === 0) {
152
				$d = '';
153
				continue;
154
			}
155
			/**
156
			 * Starts with multi-line comment
157
			 */
158
			if (mb_strpos($d, '/*') === 0) {
159
				$comment = true;
160
			}
161
			/**
162
			 * Add new line at the end if only needed
163
			 */
164
			if (
165
				$d &&
166
				$next_line &&
167
				!$comment &&
168
				!preg_match("/$regexp\$/", $d) &&
169
				!preg_match("/^$regexp/", $next_line)
170
			) {
171
				$d .= "\n";
172
			}
173
			if ($comment) {
174
				/**
175
				 * End of multi-line comment
176
				 */
177
				if (strpos($d, '*/') !== false) {
178
					$d       = explode('*/', $d)[1];
179
					$comment = false;
180
				} else {
181
					$d = '';
182
				}
183
			} else {
184
				/**
185
				 * Single-line comment
186
				 */
187
				$d = preg_replace('#^\s*//[^\'"]+$#', '', $d);
188
				/**
189
				 * If we are not sure - just add new like afterwards
190
				 */
191
				$d = preg_replace('#//.*$#', "\\0\n", $d);
192
			}
193
		}
194
		$data = implode('', $data);
195
		$data = str_replace('</script>', '<\/script>', $data);
196
		return trim($data, ';').';';
197
	}
198
	/**
199
	 * Analyses file for scripts and styles, combines them into resulting files in order to optimize loading process
200
	 * (files with combined scripts and styles will be created)
201
	 *
202
	 * @param string   $data                   Content of processed file
203
	 * @param string   $file                   Path to file, that includes specified in previous parameter content
204
	 * @param string   $base_target_file_path  Base filename for resulting combined files
205
	 * @param bool     $vulcanization          Whether to put combined files separately or to make includes built-in (vulcanization)
206
	 * @param string[] $not_embedded_resources Resources like images/fonts might not be embedded into resulting CSS because of big size or CSS/JS because of CSP
207
	 *
208
	 * @return string
209
	 */
210
	static function html ($data, $file, $base_target_file_path, $vulcanization, &$not_embedded_resources = []) {
211
		static::html_process_scripts($data, $file, $base_target_file_path, $vulcanization, $not_embedded_resources);
212
		static::html_process_links_and_styles($data, $file, $base_target_file_path, $vulcanization, $not_embedded_resources);
213
		// Removing HTML comments (those that are mostly likely comments, to avoid problems)
214
		$data = preg_replace_callback(
215
			'/^\s*<!--([^>-].*[^-])?-->/Ums',
216
			function ($matches) {
217
				return mb_strpos('--', $matches[1]) === false ? '' : $matches[0];
218
			},
219
			$data
220
		);
221
		return preg_replace("/\n+/", "\n", $data);
222
	}
223
	/**
224
	 * @param string   $data                   Content of processed file
225
	 * @param string   $file                   Path to file, that includes specified in previous parameter content
226
	 * @param string   $base_target_file_path  Base filename for resulting combined files
227
	 * @param bool     $vulcanization          Whether to put combined files separately or to make includes built-in (vulcanization)
228
	 * @param string[] $not_embedded_resources Resources like images/fonts might not be embedded into resulting CSS because of big size or CSS/JS because of CSP
229
	 *
230
	 * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
231
	 */
232
	protected static function html_process_scripts (&$data, $file, $base_target_file_path, $vulcanization, &$not_embedded_resources) {
233
		if (!preg_match_all('/<script(.*)<\/script>/Uims', $data, $scripts)) {
234
			return;
235
		}
236
		$scripts_content    = '';
237
		$scripts_to_replace = [];
238
		$dir                = dirname($file);
239
		foreach ($scripts[1] as $index => $script) {
240
			$script = explode('>', $script, 2);
241
			if (preg_match('/src\s*=\s*[\'"](.*)[\'"]/Uims', $script[0], $url)) {
242
				$url = $url[1];
243
				if (!static::is_relative_path_and_exists($url, $dir)) {
244
					continue;
245
				}
246
				$scripts_to_replace[] = $scripts[0][$index];
247
				$scripts_content .= file_get_contents("$dir/$url").";\n";
248
			} else {
249
				$scripts_to_replace[] = $scripts[0][$index];
250
				$scripts_content .= "$script[1];\n";
251
			}
252
		}
253
		$scripts_content = static::js($scripts_content);
254
		if (!$scripts_to_replace) {
255
			return;
256
		}
257
		// Remove all scripts
258
		$data = str_replace($scripts_to_replace, '', $data);
259
		/**
260
		 * If vulcanization is not used - put contents into separate file, and put link to it, otherwise put minified content back
261
		 */
262
		if (!$vulcanization) {
263
			/**
264
			 * md5 to distinguish modifications of the files
265
			 */
266
			$content_md5 = substr(md5($scripts_content), 0, 5);
267
			file_put_contents(
268
				"$base_target_file_path.js",
269
				gzencode($scripts_content, 9),
270
				LOCK_EX | FILE_BINARY
271
			);
272
			$base_target_file_name = basename($base_target_file_path);
273
			// Add script with combined content file to the end
274
			$data .= "<script src=\"$base_target_file_name.js?$content_md5\"></script>";
275
			$not_embedded_resources[] = "$base_target_file_name.js?$content_md5";
276
		} else {
277
			// Add combined content inline script to the end
278
			$data .= "<script>$scripts_content</script>";
279
		}
280
	}
281
	/**
282
	 * @param string   $data                   Content of processed file
283
	 * @param string   $file                   Path to file, that includes specified in previous parameter content
284
	 * @param string   $base_target_file_path  Base filename for resulting combined files
285
	 * @param bool     $vulcanization          Whether to put combined files separately or to make includes built-in (vulcanization)
286
	 * @param string[] $not_embedded_resources Resources like images/fonts might not be embedded into resulting CSS because of big size or CSS/JS because of CSP
287
	 *
288
	 * @return string
0 ignored issues
show
Documentation introduced by
Should the return type not be string|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
289
	 */
290
	protected static function html_process_links_and_styles (&$data, $file, $base_target_file_path, $vulcanization, &$not_embedded_resources) {
291
		// Drop Polymer inclusion, since it is already present
292
		$data = str_replace('<link rel="import" href="../polymer/polymer.html">', '', $data);
293
		if (!preg_match_all('/<link(.*)>|<style(.*)<\/style>/Uims', $data, $links_and_styles)) {
294
			return;
295
		}
296
		$imports_content             = '';
297
		$links_and_styles_to_replace = [];
298
		$dir                         = dirname($file);
299
		foreach ($links_and_styles[1] as $index => $link) {
300
			/**
301
			 * Check for custom styles `is="custom-style"` or styles includes `include=".."` - we'll skip them
302
			 * Or if content is plain CSS
303
			 */
304
			if (
305
				preg_match('/^[^>]*(is="custom-style"|include=)[^>]*>/Uim', $links_and_styles[2][$index]) ||
306
				mb_strpos($links_and_styles[0][$index], '</style>') > 0
307
			) {
308
				$content = explode('>', $links_and_styles[2][$index], 2)[1];
309
				$data    = str_replace(
310
					$content,
311
					static::css($content, $file, $not_embedded_resources),
312
					$data
313
				);
314
				continue;
315
			}
316
			if (!static::has_relative_href($link, $url, $dir)) {
317
				continue;
318
			}
319
			$import = preg_match('/rel\s*=\s*[\'"]import[\'"]/Uim', $link);
320
			/**
321
			 * CSS imports are available in Polymer alongside with HTML imports
322
			 */
323
			$css_import = $import && preg_match('/type\s*=\s*[\'"]css[\'"]/Uim', $link);
324
			$stylesheet = preg_match('/rel\s*=\s*[\'"]stylesheet[\'"]/Uim', $link);
325
			// TODO: Polymer only supports `style[is=custom-style]`, but no `link`-based counterpart, so we can't provide CSP-compatibility for CSS anyway
326
			if ($css_import || $stylesheet) {
327
				/**
328
				 * If content is link to CSS file
329
				 */
330
				$css  = static::css(
331
					file_get_contents("$dir/$url"),
332
					"$dir/$url",
333
					$not_embedded_resources
334
				);
335
				$data = str_replace(
336
					$links_and_styles[0][$index],
337
					"<style>$css</style>",
338
					$data
339
				);
340
			} elseif ($import) {
341
				/**
342
				 * If content is HTML import
343
				 */
344
				$links_and_styles_to_replace[] = $links_and_styles[0][$index];
345
				$imports_content .= static::html(
346
					file_get_contents("$dir/$url"),
347
					"$dir/$url",
348
					"$base_target_file_path-".basename($url, '.html'),
349
					$vulcanization,
350
					$not_embedded_resources
351
				);
352
			}
353
		}
354
		if (!$links_and_styles_to_replace) {
355
			return;
356
		}
357
		// Add imports to the end
358
		$data .= $imports_content;
359
	}
360
	/**
361
	 * @param string $link
362
	 * @param string $url
363
	 * @param string $dir
364
	 *
365
	 * @return bool
366
	 */
367
	protected static function has_relative_href ($link, &$url, $dir) {
368
		$result =
369
			$link &&
370
			preg_match('/href\s*=\s*[\'"](.*)[\'"]/Uims', $link, $url);
371
		if ($result && static::is_relative_path_and_exists($url[1], $dir)) {
372
			$url = $url[1];
373
			return true;
374
		}
375
		return false;
376
	}
377
	/**
378
	 * Simple check for http[s], ftp and absolute links
379
	 *
380
	 * @param string $path
381
	 * @param string $dir
382
	 *
383
	 * @return bool
384
	 */
385
	protected static function is_relative_path_and_exists ($path, $dir) {
386
		return !preg_match('#^(http://|https://|ftp://|/)#i', $path) && file_exists("$dir/$path");
387
	}
388
}
389