Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — master (#44)
by Der Mundschenk
03:05
created

PHP_Typography::process()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 5
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 4
nc 1
nop 3
1
<?php
2
/**
3
 *  This file is part of PHP-Typography.
4
 *
5
 *  Copyright 2014-2017 Peter Putzer.
6
 *  Copyright 2009-2011 KINGdesk, LLC.
7
 *
8
 *  This program is free software; you can redistribute it and/or modify
9
 *  it under the terms of the GNU General Public License as published by
10
 *  the Free Software Foundation; either version 2 of the License, or
11
 *  (at your option) any later version.
12
 *
13
 *  This program is distributed in the hope that it will be useful,
14
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 *  GNU General Public License for more details.
17
 *
18
 *  You should have received a copy of the GNU General Public License along
19
 *  with this program; if not, write to the Free Software Foundation, Inc.,
20
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21
 *
22
 *  ***
23
 *
24
 *  @package mundschenk-at/php-typography
25
 *  @license http://www.gnu.org/licenses/gpl-2.0.html
26
 */
27
28
namespace PHP_Typography;
29
30
use PHP_Typography\Fixes\Registry;
31
use PHP_Typography\Fixes\Default_Registry;
32
33
/**
34
 * Parses HTML5 (or plain text) and applies various typographic fixes to the text.
35
 *
36
 * If used with multibyte language, UTF-8 encoding is required.
37
 *
38
 * Portions of this code have been inspired by:
39
 *  - typogrify (https://code.google.com/p/typogrify/)
40
 *  - WordPress code for wptexturize (https://developer.wordpress.org/reference/functions/wptexturize/)
41
 *  - PHP SmartyPants Typographer (https://michelf.ca/projects/php-smartypants/typographer/)
42
 *
43
 *  @author Jeffrey D. King <[email protected]>
44
 *  @author Peter Putzer <[email protected]>
45
 */
46
class PHP_Typography {
47
48
	/**
49
	 * A DOM-based HTML5 parser.
50
	 *
51
	 * @var \Masterminds\HTML5
52
	 */
53
	private $html5_parser;
54
55
	/**
56
	 * The hyphenator cache.
57
	 *
58
	 * @var Hyphenator\Cache
59
	 */
60
	protected $hyphenator_cache;
61
62
	/**
63
	 * The node fixes registry.
64
	 *
65
	 * @var Registry|null;
66
	 */
67
	private $registry;
68
69
	/**
70
	 * Whether the Hyphenator\Cache of the $registry needs to be updated.
71
	 *
72
	 * @var bool
73
	 */
74
	private $update_registry_cache;
75
76
	/**
77
	 * Sets up a new PHP_Typography object.
78
	 *
79
	 * @param Registry|null $registry Optional. A fix registry instance. Default null,
80
	 *                                meaning the default fixes are used.
81
	 */
82
	public function __construct( Registry $registry = null ) {
83
		$this->registry              = $registry;
84
		$this->update_registry_cache = ! empty( $registry );
85
	}
86
87
	/**
88
	 * Modifies $html according to the defined settings.
89
	 *
90
	 * @param string   $html      A HTML fragment.
91
	 * @param Settings $settings  A settings object.
92
	 * @param bool     $is_title  Optional. If the HTML fragment is a title. Default false.
93
	 *
94
	 * @return string The processed $html.
95
	 */
96
	public function process( $html, Settings $settings, $is_title = false ) {
97
		return $this->process_textnodes( $html, function( $html, $settings, $is_title ) {
98
			return $this->get_registry()->apply_fixes( $html, $settings, $is_title, false );
99
		}, $settings, $is_title );
100
	}
101
102
	/**
103
	 * Modifies $html according to the defined settings, in a way that is appropriate for RSS feeds
104
	 * (i.e. excluding processes that may not display well with limited character set intelligence).
105
	 *
106
	 * @param string   $html     A HTML fragment.
107
	 * @param Settings $settings  A settings object.
108
	 * @param bool     $is_title Optional. If the HTML fragment is a title. Default false.
109
	 *
110
	 * @return string The processed $html.
111
	 */
112
	public function process_feed( $html, Settings $settings, $is_title = false ) {
113
		return $this->process_textnodes( $html, function( $html, $settings, $is_title ) {
114
			return $this->get_registry()->apply_fixes( $html, $settings, $is_title, true );
115
		}, $settings, $is_title );
116
	}
117
118
	/**
119
	 * Applies specific fixes to all textnodes of the HTML fragment.
120
	 *
121
	 * @param string   $html     A HTML fragment.
122
	 * @param callable $fixer    A callback that applies typography fixes to a single textnode.
123
	 * @param Settings $settings  A settings object.
124
	 * @param bool     $is_title Optional. If the HTML fragment is a title. Default false.
125
	 *
126
	 * @return string The processed $html.
127
	 */
128
	public function process_textnodes( $html, callable $fixer, Settings $settings, $is_title = false ) {
129
		if ( isset( $settings['ignoreTags'] ) && $is_title && ( in_array( 'h1', $settings['ignoreTags'], true ) || in_array( 'h2', $settings['ignoreTags'], true ) ) ) {
130
			return $html;
131
		}
132
133
		// Lazy-load our parser (the text parser is not needed for feeds).
134
		$html5_parser = $this->get_html5_parser();
135
136
		// Parse the HTML.
137
		$dom = $this->parse_html( $html5_parser, $html, $settings );
138
139
		// Abort if there were parsing errors.
140
		if ( empty( $dom ) ) {
141
			return $html;
142
		}
143
144
		// Query some nodes in the DOM.
145
		$xpath          = new \DOMXPath( $dom );
146
		$body_node      = $xpath->query( '/html/body' )->item( 0 );
147
		$all_textnodes  = $xpath->query( '//text()', $body_node );
148
		$tags_to_ignore = $this->query_tags_to_ignore( $xpath, $body_node, $settings );
149
150
		// Start processing.
151
		foreach ( $all_textnodes as $textnode ) {
152
			if ( self::arrays_intersect( DOM::get_ancestors( $textnode ), $tags_to_ignore ) ) {
153
				continue;
154
			}
155
156
			// We won't be doing anything with spaces, so we can jump ship if that is all we have.
157
			if ( $textnode->isWhitespaceInElementContent() ) {
158
				continue;
159
			}
160
161
			// Decode all characters except < > &.
162
			$textnode->data = htmlspecialchars( $textnode->data, ENT_NOQUOTES, 'UTF-8' ); // returns < > & to encoded HTML characters (&lt; &gt; and &amp; respectively).
163
164
			// Apply fixes.
165
			$fixer( $textnode, $settings, $is_title );
166
167
			// Until now, we've only been working on a textnode: HTMLify result.
168
			$this->replace_node_with_html( $textnode, $textnode->data );
169
		}
170
171
		return $html5_parser->saveHTML( $body_node->childNodes );
172
	}
173
174
	/**
175
	 * Determines whether two object arrays intersect. The second array is expected
176
	 * to use the spl_object_hash for its keys.
177
	 *
178
	 * @param array $array1 The keys are ignored.
179
	 * @param array $array2 This array has to be in the form ( $spl_object_hash => $object ).
180
	 *
181
	 * @return boolean
182
	 */
183
	protected static function arrays_intersect( array $array1, array $array2 ) {
184
		foreach ( $array1 as $value ) {
185
			if ( isset( $array2[ spl_object_hash( $value ) ] ) ) {
186
				return true;
187
			}
188
		}
189
190
		return false;
191
	}
192
193
	/**
194
	 * Parse HTML5 fragment while ignoring certain warnings for invalid HTML code (e.g. duplicate IDs).
195
	 *
196
	 * @param \Masterminds\HTML5 $parser   An intialized parser object.
197
	 * @param string             $html     The HTML fragment to parse (not a complete document).
198
	 * @param Settings           $settings The settings to apply.
199
	 *
200
	 * @return \DOMDocument|null The encoding has already been set to UTF-8. Returns null if there were parsing errors.
201
	 */
202
	public function parse_html( \Masterminds\HTML5 $parser, $html, Settings $settings ) {
203
		// Silence some parsing errors for invalid HTML.
204
		set_error_handler( [ $this, 'handle_parsing_errors' ] ); // @codingStandardsIgnoreLine
205
		$xml_error_handling = libxml_use_internal_errors( true );
206
207
		// Do the actual parsing.
208
		$dom           = $parser->loadHTML( '<!DOCTYPE html><html><body>' . $html . '</body></html>' );
209
		$dom->encoding = 'UTF-8';
210
211
		// Restore original error handling.
212
		libxml_clear_errors();
213
		libxml_use_internal_errors( $xml_error_handling );
214
		restore_error_handler();
215
216
		// Handle any parser errors.
217
		$errors = $parser->getErrors();
218
		if ( ! empty( $settings['parserErrorsHandler'] ) && ! empty( $errors ) ) {
219
			$errors = $settings['parserErrorsHandler']( $errors );
220
		}
221
222
		// Return null if there are still unhandled parsing errors.
223
		if ( ! empty( $errors ) && ! $settings['parserErrorsIgnore'] ) {
224
			$dom = null;
225
		}
226
227
		return $dom;
228
	}
229
230
	/**
231
	 * Silently handle certain HTML parsing errors.
232
	 *
233
	 * @param int    $errno      Error number.
234
	 * @param string $errstr     Error message.
235
	 * @param string $errfile    The file in which the error occurred.
236
	 * @param int    $errline    The line in which the error occurred.
237
	 * @param array  $errcontext Calling context.
238
	 *
239
	 * @return boolean Returns true if the error was handled, false otherwise.
240
	 */
241
	public function handle_parsing_errors( $errno, $errstr, $errfile, $errline, array $errcontext ) {
0 ignored issues
show
Unused Code introduced by
The parameter $errline is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $errcontext is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
242
		if ( ! ( error_reporting() & $errno ) ) { // @codingStandardsIgnoreLine.
243
			return true; // not interesting.
244
		}
245
246
		// Ignore warnings from parser & let PHP handle the rest.
247
		return $errno & E_USER_WARNING && 0 === substr_compare( $errfile, 'DOMTreeBuilder.php', -18 );
248
	}
249
250
	/**
251
	 * Retrieves an array of nodes that should be skipped during processing.
252
	 *
253
	 * @param \DOMXPath $xpath        A valid XPath instance for the DOM to be queried.
254
	 * @param \DOMNode  $initial_node The starting node of the XPath query.
255
	 * @param Settings  $settings     The settings to apply.
256
	 *
257
	 * @return \DOMNode[] An array of \DOMNode (can be empty).
258
	 */
259
	public function query_tags_to_ignore( \DOMXPath $xpath, \DOMNode $initial_node, Settings $settings ) {
260
		$elements    = [];
261
		$query_parts = [];
262 View Code Duplication
		if ( ! empty( $settings['ignoreTags'] ) ) {
263
			$query_parts[] = '//' . implode( ' | //', $settings['ignoreTags'] );
264
		}
265 View Code Duplication
		if ( ! empty( $settings['ignoreClasses'] ) ) {
266
			$query_parts[] = "//*[contains(concat(' ', @class, ' '), ' " . implode( " ') or contains(concat(' ', @class, ' '), ' ", $settings['ignoreClasses'] ) . " ')]";
267
		}
268 View Code Duplication
		if ( ! empty( $settings['ignoreIDs'] ) ) {
269
			$query_parts[] = '//*[@id=\'' . implode( '\' or @id=\'', $settings['ignoreIDs'] ) . '\']';
270
		}
271
272 View Code Duplication
		if ( ! empty( $query_parts ) ) {
273
			$ignore_query = implode( ' | ', $query_parts );
274
275
			$nodelist = $xpath->query( $ignore_query, $initial_node );
276
			if ( false !== $nodelist ) {
277
				$elements = DOM::nodelist_to_array( $nodelist );
278
			}
279
		}
280
281
		return $elements;
282
	}
283
284
	/**
285
	 * Replaces the given node with HTML content. Uses the HTML5 parser.
286
	 *
287
	 * @param \DOMNode $node    The node to replace.
288
	 * @param string   $content The HTML fragment used to replace the node.
289
	 *
290
	 * @return \DOMNode|array An array of \DOMNode containing the new nodes or the old \DOMNode if the replacement failed.
291
	 */
292
	public function replace_node_with_html( \DOMNode $node, $content ) {
293
		$result = $node;
294
295
		$parent = $node->parentNode;
296
		if ( empty( $parent ) ) {
297
			return $node; // abort early to save cycles.
298
		}
299
300
		set_error_handler( [ $this, 'handle_parsing_errors' ] ); // @codingStandardsIgnoreLine.
301
302
		$html_fragment = $this->get_html5_parser()->loadHTMLFragment( $content );
303
		if ( ! empty( $html_fragment ) ) {
304
			$imported_fragment = $node->ownerDocument->importNode( $html_fragment, true );
305
306 View Code Duplication
			if ( ! empty( $imported_fragment ) ) {
307
				// Save the children of the imported DOMDocumentFragment before replacement.
308
				$children = DOM::nodelist_to_array( $imported_fragment->childNodes );
309
310
				if ( false !== $parent->replaceChild( $imported_fragment, $node ) ) {
311
					// Success! We return the saved array of DOMNodes as
312
					// $imported_fragment is just an empty DOMDocumentFragment now.
313
					$result = $children;
314
				}
315
			}
316
		}
317
318
		restore_error_handler();
319
320
		return $result;
321
	}
322
323
	/**
324
	 * Retrieves the fix registry.
325
	 *
326
	 * @return Registry
327
	 */
328
	public function get_registry() {
329
		if ( ! isset( $this->registry ) ) {
330
			$this->registry = new Default_Registry( $this->get_hyphenator_cache() );
331
		} elseif ( $this->update_registry_cache ) {
332
			$this->registry->update_hyphenator_cache( $this->get_hyphenator_cache() );
333
			$this->update_registry_cache = false;
334
		}
335
336
		return $this->registry;
337
	}
338
339
	/**
340
	 * Retrieves the HTML5 parser instance.
341
	 *
342
	 * @return \Masterminds\HTML5
343
	 */
344
	public function get_html5_parser() {
345
		// Lazy-load HTML5 parser.
346
		if ( ! isset( $this->html5_parser ) ) {
347
			$this->html5_parser = new \Masterminds\HTML5( [
348
				'disable_html_ns' => true,
349
			] );
350
		}
351
352
		return $this->html5_parser;
353
	}
354
355
	/**
356
	 * Retrieves the hyphenator cache.
357
	 *
358
	 * @return Hyphenator\Cache
359
	 */
360
	public function get_hyphenator_cache() {
361
		if ( ! isset( $this->hyphenator_cache ) ) {
362
			$this->hyphenator_cache = new Hyphenator\Cache();
363
		}
364
365
		return $this->hyphenator_cache;
366
	}
367
368
	/**
369
	 * Injects an existing Hyphenator\Cache (to facilitate persistent language caching).
370
	 *
371
	 * @param Hyphenator\Cache $cache A hyphenator cache instance.
372
	 */
373
	public function set_hyphenator_cache( Hyphenator\Cache $cache ) {
374
		$this->hyphenator_cache = $cache;
375
376
		// Change hyphenator cache for existing token fixes.
377
		if ( isset( $this->registry ) ) {
378
			$this->registry->update_hyphenator_cache( $cache );
379
		}
380
	}
381
382
	/**
383
	 * Retrieves the list of valid language plugins in the given directory.
384
	 *
385
	 * @param string $path The path in which to look for language plugin files.
386
	 *
387
	 * @return string[] An array in the form ( $language_code => $language_name ).
388
	 */
389
	private static function get_language_plugin_list( $path ) {
390
		$language_name_pattern = '/"language"\s*:\s*((".+")|(\'.+\'))\s*,/';
391
		$languages             = [];
392
		$handle                = opendir( $path );
393
394
		// Read all files in directory.
395
		$file = readdir( $handle );
396
		while ( $file ) {
397
			// We only want the JSON files.
398
			if ( '.json' === substr( $file, -5 ) ) {
399
				$file_content = file_get_contents( $path . $file );
400
				if ( preg_match( $language_name_pattern, $file_content, $matches ) ) {
401
					$language_name = substr( $matches[1], 1, -1 );
402
					$language_code = substr( $file, 0, -5 );
403
404
					$languages[ $language_code ] = $language_name;
405
				}
406
			}
407
408
			// Read next file.
409
			$file = readdir( $handle );
410
		}
411
		closedir( $handle );
412
413
		// Sort translated language names according to current locale.
414
		asort( $languages );
415
416
		return $languages;
417
	}
418
419
	/**
420
	 * Retrieves the list of valid hyphenation languages.
421
	 *
422
	 * Note that this method reads all the language files on disc, so you should
423
	 * cache the results if possible.
424
	 *
425
	 * @return string[] An array in the form of ( LANG_CODE => LANGUAGE ).
426
	 */
427
	public static function get_hyphenation_languages() {
428
		return self::get_language_plugin_list( __DIR__ . '/lang/' );
429
	}
430
431
	/**
432
	 * Retrieves the list of valid diacritic replacement languages.
433
	 *
434
	 * Note that this method reads all the language files on disc, so you should
435
	 * cache the results if possible.
436
	 *
437
	 * @return string[] An array in the form of ( LANG_CODE => LANGUAGE ).
438
	 */
439
	public static function get_diacritic_languages() {
440
		return self::get_language_plugin_list( __DIR__ . '/diacritics/' );
441
	}
442
}
443