We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
1 | <?php |
||
2 | /** |
||
3 | * This file is part of PHP-Typography. |
||
4 | * |
||
5 | * Copyright 2014-2019 Peter Putzer. |
||
6 | * Copyright 2009-2011 KINGdesk, LLC. |
||
7 | * |
||
8 | * This program is free software; you can redistribute it and/or modify |
||
9 | * it under the terms of the GNU General Public License as published by |
||
10 | * the Free Software Foundation; either version 2 of the License, or |
||
11 | * (at your option) any later version. |
||
12 | * |
||
13 | * This program is distributed in the hope that it will be useful, |
||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
16 | * GNU General Public License for more details. |
||
17 | * |
||
18 | * You should have received a copy of the GNU General Public License along |
||
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
21 | * |
||
22 | * *** |
||
23 | * |
||
24 | * @package mundschenk-at/php-typography |
||
25 | * @license http://www.gnu.org/licenses/gpl-2.0.html |
||
26 | */ |
||
27 | |||
28 | namespace PHP_Typography; |
||
29 | |||
30 | use PHP_Typography\Fixes\Registry; |
||
31 | use PHP_Typography\Fixes\Default_Registry; |
||
32 | |||
33 | /** |
||
34 | * Parses HTML5 (or plain text) and applies various typographic fixes to the text. |
||
35 | * |
||
36 | * If used with multibyte language, UTF-8 encoding is required. |
||
37 | * |
||
38 | * Portions of this code have been inspired by: |
||
39 | * - typogrify (https://code.google.com/p/typogrify/) |
||
40 | * - WordPress code for wptexturize (https://developer.wordpress.org/reference/functions/wptexturize/) |
||
41 | * - PHP SmartyPants Typographer (https://michelf.ca/projects/php-smartypants/typographer/) |
||
42 | * |
||
43 | * @author Jeffrey D. King <[email protected]> |
||
44 | * @author Peter Putzer <[email protected]> |
||
45 | */ |
||
46 | class PHP_Typography { |
||
47 | |||
48 | /** |
||
49 | * A DOM-based HTML5 parser. |
||
50 | * |
||
51 | * @var \Masterminds\HTML5 |
||
52 | */ |
||
53 | private $html5_parser; |
||
54 | |||
55 | /** |
||
56 | * The hyphenator cache. |
||
57 | * |
||
58 | * @var Hyphenator\Cache |
||
59 | */ |
||
60 | protected $hyphenator_cache; |
||
61 | |||
62 | /** |
||
63 | * The node fixes registry. |
||
64 | * |
||
65 | * @var Registry|null; |
||
66 | */ |
||
67 | private $registry; |
||
68 | |||
69 | /** |
||
70 | * Whether the Hyphenator\Cache of the $registry needs to be updated. |
||
71 | * |
||
72 | * @var bool |
||
73 | */ |
||
74 | private $update_registry_cache; |
||
75 | |||
76 | /** |
||
77 | * Sets up a new PHP_Typography object. |
||
78 | * |
||
79 | * @param Registry|null $registry Optional. A fix registry instance. Default null, |
||
80 | * meaning the default fixes are used. |
||
81 | */ |
||
82 | 1 | public function __construct( Registry $registry = null ) { |
|
83 | 1 | $this->registry = $registry; |
|
84 | 1 | $this->update_registry_cache = ! empty( $registry ); |
|
85 | 1 | } |
|
86 | |||
87 | /** |
||
88 | * Modifies $html according to the defined settings. |
||
89 | * |
||
90 | * @since 6.0.0 Parameter $body_classes added. |
||
91 | * |
||
92 | * @param string $html A HTML fragment. |
||
93 | * @param Settings $settings A settings object. |
||
94 | * @param bool $is_title Optional. If the HTML fragment is a title. Default false. |
||
95 | * @param string[] $body_classes Optional. CSS classes added to the virtual |
||
96 | * <body> element used for processing. Default []. |
||
97 | * |
||
98 | * @return string The processed $html. |
||
99 | */ |
||
100 | 44 | public function process( $html, Settings $settings, $is_title = false, array $body_classes = [] ) { |
|
101 | 44 | return $this->process_textnodes( |
|
102 | 44 | $html, |
|
103 | function( $html, $settings, $is_title ) { |
||
104 | 37 | $this->get_registry()->apply_fixes( $html, $settings, $is_title, false ); |
|
105 | 44 | }, |
|
106 | $settings, |
||
107 | $is_title, |
||
108 | $body_classes |
||
109 | ); |
||
110 | } |
||
111 | |||
112 | /** |
||
113 | * Modifies $html according to the defined settings, in a way that is appropriate for RSS feeds |
||
114 | * (i.e. excluding processes that may not display well with limited character set intelligence). |
||
115 | * |
||
116 | * @since 6.0.0 Parameter $body_classes added. |
||
117 | * |
||
118 | * @param string $html A HTML fragment. |
||
119 | * @param Settings $settings A settings object. |
||
120 | * @param bool $is_title Optional. If the HTML fragment is a title. Default false. |
||
121 | * @param string[] $body_classes Optional. CSS classes added to the virtual |
||
122 | * <body> element used for processing. Default []. |
||
123 | * |
||
124 | * @return string The processed $html. |
||
125 | */ |
||
126 | 44 | public function process_feed( $html, Settings $settings, $is_title = false, array $body_classes = [] ) { |
|
127 | 44 | return $this->process_textnodes( |
|
128 | 44 | $html, |
|
129 | function( $html, $settings, $is_title ) { |
||
130 | 37 | $this->get_registry()->apply_fixes( $html, $settings, $is_title, true ); |
|
131 | 44 | }, |
|
132 | $settings, |
||
133 | $is_title, |
||
134 | $body_classes |
||
135 | ); |
||
136 | } |
||
137 | |||
138 | /** |
||
139 | * Applies specific fixes to all textnodes of the HTML fragment. |
||
140 | * |
||
141 | * @since 6.0.0 Parameter $body_classes added. |
||
142 | * |
||
143 | * @param string $html A HTML fragment. |
||
144 | * @param callable $fixer A callback that applies typography fixes to a single textnode. |
||
145 | * @param Settings $settings A settings object. |
||
146 | * @param bool $is_title Optional. If the HTML fragment is a title. Default false. |
||
147 | * @param string[] $body_classes Optional. CSS classes added to the virtual |
||
148 | * <body> element used for processing. Default []. |
||
149 | * |
||
150 | * @return string The processed $html. |
||
151 | */ |
||
152 | 87 | public function process_textnodes( $html, callable $fixer, Settings $settings, $is_title = false, array $body_classes = [] ) { |
|
153 | 87 | if ( isset( $settings['ignoreTags'] ) && $is_title && ( \in_array( 'h1', /** Array. @scrutinizer ignore-type */ $settings['ignoreTags'], true ) || \in_array( 'h2', /** Array. @scrutinizer ignore-type */ $settings['ignoreTags'], true ) ) ) { |
|
154 | 42 | return $html; |
|
155 | } |
||
156 | |||
157 | // Lazy-load our parser (the text parser is not needed for feeds). |
||
158 | 45 | $html5_parser = $this->get_html5_parser(); |
|
159 | |||
160 | // Parse the HTML. |
||
161 | 45 | $dom = $this->parse_html( $html5_parser, $html, $settings, $body_classes ); |
|
162 | |||
163 | // Abort if there were parsing errors. |
||
164 | 45 | if ( ! $dom instanceof \DOMDocument || ! $dom->hasChildNodes() ) { |
|
165 | 2 | return $html; |
|
166 | } |
||
167 | |||
168 | // Query some nodes in the DOM. |
||
169 | 43 | $xpath = new \DOMXPath( $dom ); |
|
170 | 43 | $body_node = $xpath->query( '/html/body' )->item( 0 ); |
|
171 | |||
172 | // Abort if we could not retrieve the body node. |
||
173 | // This should be refactored to use exceptions in a future version. |
||
174 | 43 | if ( ! $body_node instanceof \DOMNode ) { |
|
175 | return $html; |
||
176 | } |
||
177 | |||
178 | // Get the list of tags that should be ignored. |
||
179 | 43 | $tags_to_ignore = $this->query_tags_to_ignore( $xpath, $body_node, $settings ); |
|
180 | |||
181 | // Start processing. |
||
182 | 43 | foreach ( $xpath->query( '//text()', $body_node ) as $textnode ) { |
|
183 | if ( |
||
184 | // One of the ancestors should be ignored. |
||
185 | 40 | self::arrays_intersect( DOM::get_ancestors( $textnode ), $tags_to_ignore ) || |
|
186 | // The node contains only whitespace. |
||
187 | 40 | $textnode->isWhitespaceInElementContent() |
|
188 | ) { |
||
189 | 3 | continue; |
|
190 | } |
||
191 | |||
192 | // Store original content. |
||
193 | 37 | $original = $textnode->data; |
|
194 | |||
195 | // Apply fixes. |
||
196 | 37 | $fixer( $textnode, $settings, $is_title ); |
|
197 | |||
198 | // Until now, we've only been working on a textnode: HTMLify result. |
||
199 | 37 | $new = $textnode->data; |
|
200 | |||
201 | // Replace original node (if anthing was changed). |
||
202 | 37 | if ( $new !== $original ) { |
|
203 | 1 | $this->replace_node_with_html( $textnode, $settings->apply_character_mapping( $new ) ); |
|
204 | } |
||
205 | } |
||
206 | |||
207 | 43 | return $html5_parser->saveHTML( $body_node->childNodes ); |
|
208 | } |
||
209 | |||
210 | /** |
||
211 | * Determines whether two object arrays intersect. The second array is expected |
||
212 | * to use the spl_object_hash for its keys. |
||
213 | * |
||
214 | * @param array $array1 The keys are ignored. |
||
215 | * @param array $array2 This array has to be in the form ( $spl_object_hash => $object ). |
||
216 | * |
||
217 | * @return boolean |
||
218 | */ |
||
219 | 4 | protected static function arrays_intersect( array $array1, array $array2 ) { |
|
220 | 4 | foreach ( $array1 as $value ) { |
|
221 | 2 | if ( isset( $array2[ \spl_object_hash( $value ) ] ) ) { |
|
222 | 1 | return true; |
|
223 | } |
||
224 | } |
||
225 | |||
226 | 3 | return false; |
|
227 | } |
||
228 | |||
229 | /** |
||
230 | * Parse HTML5 fragment while ignoring certain warnings for invalid HTML code (e.g. duplicate IDs). |
||
231 | * |
||
232 | * @since 6.0.0 Parameter $body_classes added. |
||
233 | * |
||
234 | * @param \Masterminds\HTML5 $parser An intialized parser object. |
||
235 | * @param string $html The HTML fragment to parse (not a complete document). |
||
236 | * @param Settings $settings The settings to apply. |
||
237 | * @param string[] $body_classes Optional. CSS classes added to the virtual |
||
238 | * <body> element used for processing. Default []. |
||
239 | * |
||
240 | * @return \DOMDocument|null The encoding has already been set to UTF-8. Returns null if there were parsing errors. |
||
241 | */ |
||
242 | 87 | public function parse_html( \Masterminds\HTML5 $parser, $html, Settings $settings, array $body_classes = [] ) { |
|
243 | // Silence some parsing errors for invalid HTML. |
||
244 | 87 | \set_error_handler( [ $this, 'handle_parsing_errors' ] ); // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_set_error_handler |
|
245 | 87 | $xml_error_handling = \libxml_use_internal_errors( true ); |
|
246 | |||
247 | // Inject <body> classes. |
||
248 | 87 | $body = empty( $body_classes ) ? 'body' : 'body class="' . \implode( ' ', $body_classes ) . '"'; |
|
249 | |||
250 | // Do the actual parsing. |
||
251 | 87 | $dom = $parser->loadHTML( "<!DOCTYPE html><html><{$body}>{$html}</body></html>" ); |
|
252 | 87 | $dom->encoding = 'UTF-8'; |
|
253 | |||
254 | // Restore original error handling. |
||
255 | 87 | \libxml_clear_errors(); |
|
256 | 87 | \libxml_use_internal_errors( $xml_error_handling ); |
|
257 | 87 | \restore_error_handler(); |
|
258 | |||
259 | // Handle any parser errors. |
||
260 | 87 | $errors = $parser->getErrors(); |
|
261 | 87 | if ( ! empty( $settings[ Settings::PARSER_ERRORS_HANDLER ] ) && ! empty( $errors ) ) { |
|
262 | 2 | $errors = $settings[ Settings::PARSER_ERRORS_HANDLER ]( $errors ); |
|
263 | } |
||
264 | |||
265 | // Return null if there are still unhandled parsing errors. |
||
266 | 87 | if ( ! empty( $errors ) && ! $settings[ Settings::PARSER_ERRORS_IGNORE ] ) { |
|
267 | 2 | $dom = null; |
|
268 | } |
||
269 | |||
270 | 87 | return $dom; |
|
271 | } |
||
272 | |||
273 | /** |
||
274 | * Silently handle certain HTML parsing errors. |
||
275 | * |
||
276 | * @since 6.0.0 Unused parameters $errline and $errcontext removed. |
||
277 | * |
||
278 | * @param int $errno Error number. |
||
279 | * @param string $errstr Error message. |
||
280 | * @param string $errfile The file in which the error occurred. |
||
281 | * |
||
282 | * @return boolean Returns true if the error was handled, false otherwise. |
||
283 | */ |
||
284 | 4 | public function handle_parsing_errors( $errno, $errstr, $errfile ) { |
|
285 | 4 | if ( ! ( \error_reporting() & $errno ) ) { // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.runtime_configuration_error_reporting,WordPress.PHP.DevelopmentFunctions.prevent_path_disclosure_error_reporting |
|
286 | 4 | return true; // not interesting. |
|
287 | } |
||
288 | |||
289 | // Ignore warnings from parser & let PHP handle the rest. |
||
290 | 4 | return $errno & E_USER_WARNING && 0 === \substr_compare( $errfile, 'DOMTreeBuilder.php', -18 ); |
|
291 | } |
||
292 | |||
293 | /** |
||
294 | * Retrieves an array of nodes that should be skipped during processing. |
||
295 | * |
||
296 | * @param \DOMXPath $xpath A valid XPath instance for the DOM to be queried. |
||
297 | * @param \DOMNode $initial_node The starting node of the XPath query. |
||
298 | * @param Settings $settings The settings to apply. |
||
299 | * |
||
300 | * @return \DOMNode[] An array of \DOMNode (can be empty). |
||
301 | */ |
||
302 | 1 | public function query_tags_to_ignore( \DOMXPath $xpath, \DOMNode $initial_node, Settings $settings ) { |
|
303 | 1 | $elements = []; |
|
304 | 1 | $query_parts = []; |
|
305 | 1 | if ( ! empty( $settings['ignoreTags'] ) ) { |
|
306 | 1 | $query_parts[] = '//' . \implode( ' | //', /** Array. @scrutinizer ignore-type */ $settings['ignoreTags'] ); |
|
307 | } |
||
308 | 1 | if ( ! empty( $settings['ignoreClasses'] ) ) { |
|
309 | 1 | $query_parts[] = "//*[contains(concat(' ', @class, ' '), ' " . \implode( " ') or contains(concat(' ', @class, ' '), ' ", /** Array. @scrutinizer ignore-type */ $settings['ignoreClasses'] ) . " ')]"; |
|
310 | } |
||
311 | 1 | if ( ! empty( $settings['ignoreIDs'] ) ) { |
|
312 | 1 | $query_parts[] = '//*[@id=\'' . \implode( '\' or @id=\'', /** Array. @scrutinizer ignore-type */ $settings['ignoreIDs'] ) . '\']'; |
|
313 | } |
||
314 | |||
315 | 1 | if ( ! empty( $query_parts ) ) { |
|
316 | 1 | $ignore_query = \implode( ' | ', $query_parts ); |
|
317 | |||
318 | 1 | $nodelist = $xpath->query( $ignore_query, $initial_node ); |
|
319 | 1 | if ( false !== $nodelist ) { |
|
320 | 1 | $elements = DOM::nodelist_to_array( $nodelist ); |
|
321 | } |
||
322 | } |
||
323 | |||
324 | 1 | return $elements; |
|
325 | } |
||
326 | |||
327 | /** |
||
328 | * Replaces the given node with HTML content. Uses the HTML5 parser. |
||
329 | * |
||
330 | * @param \DOMNode $node The node to replace. |
||
331 | * @param string $content The HTML fragment used to replace the node. |
||
332 | * |
||
333 | * @return \DOMNode|array An array of \DOMNode containing the new nodes or the old \DOMNode if the replacement failed. |
||
334 | */ |
||
335 | 2 | public function replace_node_with_html( \DOMNode $node, $content ) { |
|
336 | 2 | $result = $node; |
|
337 | |||
338 | 2 | $parent = $node->parentNode; |
|
339 | 2 | if ( empty( $parent ) ) { |
|
340 | 1 | return $node; // abort early to save cycles. |
|
341 | } |
||
342 | |||
343 | // Encode bare < > & and decode escaped HTML tag. |
||
344 | 1 | $content = RE::unescape_tags( \htmlspecialchars( $content, ENT_NOQUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8', true ) ); |
|
345 | |||
346 | 1 | \set_error_handler( [ $this, 'handle_parsing_errors' ] ); // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_set_error_handler |
|
347 | |||
348 | 1 | $html_fragment = $this->get_html5_parser()->loadHTMLFragment( $content ); |
|
349 | 1 | if ( ! empty( $html_fragment ) ) { |
|
350 | 1 | $imported_fragment = $node->ownerDocument->importNode( $html_fragment, true ); |
|
351 | |||
352 | 1 | if ( ! empty( $imported_fragment ) ) { |
|
353 | // Save the children of the imported DOMDocumentFragment before replacement. |
||
354 | 1 | $children = DOM::nodelist_to_array( $imported_fragment->childNodes ); |
|
355 | |||
356 | 1 | if ( false !== $parent->replaceChild( $imported_fragment, $node ) ) { |
|
357 | // Success! We return the saved array of DOMNodes as |
||
358 | // $imported_fragment is just an empty DOMDocumentFragment now. |
||
359 | 1 | $result = $children; |
|
360 | } |
||
361 | } |
||
362 | } |
||
363 | |||
364 | 1 | \restore_error_handler(); |
|
365 | |||
366 | 1 | return $result; |
|
367 | } |
||
368 | |||
369 | /** |
||
370 | * Retrieves the fix registry. |
||
371 | * |
||
372 | * @return Registry |
||
373 | */ |
||
374 | 2 | public function get_registry() { |
|
375 | 2 | if ( ! isset( $this->registry ) ) { |
|
376 | 1 | $this->registry = new Default_Registry( $this->get_hyphenator_cache() ); |
|
377 | 1 | } elseif ( $this->update_registry_cache ) { |
|
378 | 1 | $this->registry->update_hyphenator_cache( $this->get_hyphenator_cache() ); |
|
0 ignored issues
–
show
|
|||
379 | 1 | $this->update_registry_cache = false; |
|
380 | } |
||
381 | |||
382 | 2 | return $this->registry; |
|
383 | } |
||
384 | |||
385 | /** |
||
386 | * Retrieves the HTML5 parser instance. |
||
387 | * |
||
388 | * @return \Masterminds\HTML5 |
||
389 | */ |
||
390 | 1 | public function get_html5_parser() { |
|
391 | // Lazy-load HTML5 parser. |
||
392 | 1 | if ( ! isset( $this->html5_parser ) ) { |
|
393 | 1 | $this->html5_parser = new \Masterminds\HTML5( [ 'disable_html_ns' => true ] ); |
|
394 | } |
||
395 | |||
396 | 1 | return $this->html5_parser; |
|
397 | } |
||
398 | |||
399 | /** |
||
400 | * Retrieves the hyphenator cache. |
||
401 | * |
||
402 | * @return Hyphenator\Cache |
||
403 | */ |
||
404 | 1 | public function get_hyphenator_cache() { |
|
405 | 1 | if ( ! isset( $this->hyphenator_cache ) ) { |
|
406 | 1 | $this->hyphenator_cache = new Hyphenator\Cache(); |
|
407 | } |
||
408 | |||
409 | 1 | return $this->hyphenator_cache; |
|
410 | } |
||
411 | |||
412 | /** |
||
413 | * Injects an existing Hyphenator\Cache (to facilitate persistent language caching). |
||
414 | * |
||
415 | * @param Hyphenator\Cache $cache A hyphenator cache instance. |
||
416 | */ |
||
417 | 2 | public function set_hyphenator_cache( Hyphenator\Cache $cache ) { |
|
418 | 2 | $this->hyphenator_cache = $cache; |
|
419 | |||
420 | // Change hyphenator cache for existing token fixes. |
||
421 | 2 | if ( isset( $this->registry ) ) { |
|
422 | 1 | $this->registry->update_hyphenator_cache( $cache ); |
|
423 | } |
||
424 | 2 | } |
|
425 | |||
426 | /** |
||
427 | * Retrieves the list of valid language plugins in the given directory. |
||
428 | * |
||
429 | * @param string $path The path in which to look for language plugin files. |
||
430 | * |
||
431 | * @return string[] An array in the form ( $language_code => $language_name ). |
||
432 | */ |
||
433 | 3 | private static function get_language_plugin_list( $path ) { |
|
434 | 3 | $languages = []; |
|
435 | |||
436 | // Try to open the given directory. |
||
437 | 3 | $handle = \opendir( $path ); |
|
438 | 2 | if ( false === $handle ) { |
|
439 | // Abort. |
||
440 | return $languages; // @codeCoverageIgnore |
||
441 | } |
||
442 | |||
443 | // Read all files in directory. |
||
444 | 2 | $file = \readdir( $handle ); |
|
445 | 2 | while ( $file ) { |
|
446 | // We only want the JSON files. |
||
447 | 2 | if ( '.json' === \substr( $file, -5 ) ) { |
|
448 | 2 | $file_content = \file_get_contents( $path . $file ); |
|
449 | 2 | if ( \preg_match( '/"language"\s*:\s*((".+")|(\'.+\'))\s*,/', $file_content, $matches ) ) { |
|
450 | 2 | $language_name = \substr( $matches[1], 1, -1 ); |
|
451 | 2 | $language_code = \substr( $file, 0, -5 ); |
|
452 | |||
453 | 2 | $languages[ $language_code ] = $language_name; |
|
454 | } |
||
455 | } |
||
456 | |||
457 | // Read next file. |
||
458 | 2 | $file = \readdir( $handle ); |
|
459 | } |
||
460 | 2 | \closedir( $handle ); |
|
461 | |||
462 | // Sort translated language names according to current locale. |
||
463 | 2 | \asort( $languages ); |
|
464 | |||
465 | 2 | return $languages; |
|
466 | } |
||
467 | |||
468 | /** |
||
469 | * Retrieves the list of valid hyphenation languages. |
||
470 | * |
||
471 | * Note that this method reads all the language files on disc, so you should |
||
472 | * cache the results if possible. |
||
473 | * |
||
474 | * @return string[] An array in the form of ( LANG_CODE => LANGUAGE ). |
||
475 | */ |
||
476 | 1 | public static function get_hyphenation_languages() { |
|
477 | 1 | return self::get_language_plugin_list( __DIR__ . '/lang/' ); |
|
478 | } |
||
479 | |||
480 | /** |
||
481 | * Retrieves the list of valid diacritic replacement languages. |
||
482 | * |
||
483 | * Note that this method reads all the language files on disc, so you should |
||
484 | * cache the results if possible. |
||
485 | * |
||
486 | * @return string[] An array in the form of ( LANG_CODE => LANGUAGE ). |
||
487 | */ |
||
488 | 1 | public static function get_diacritic_languages() { |
|
489 | 1 | return self::get_language_plugin_list( __DIR__ . '/diacritics/' ); |
|
490 | } |
||
491 | } |
||
492 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.