PHP_Typography::get_registry() - Code Metrics - Inspection of "Update Travis and Scrutinizer" - mundschenk-at/php-typography - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#56)

by Der Mundschenk

created 2018-01-03 12:29 UTC

PHP_Typography::get_registry() A

↳ Parent: PHP_Typography

Complexity

Conditions	3
Paths	3

Size

Total Lines	9
Code Lines	6

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	7
CRAP Score	3

Importance

Changes

Metric	Value
dl	0
loc	9
rs	9.6666
c	0
b	0
f	0
ccs	7
cts	7
cp	1
cc	3
eloc	6
nc	3
nop	0
crap	3

<?php
/**
 *  This file is part of PHP-Typography.
 *
 *  Copyright 2014-2017 Peter Putzer.
 *  Copyright 2009-2011 KINGdesk, LLC.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 *  ***
 *
 *  @package mundschenk-at/php-typography
 *  @license http://www.gnu.org/licenses/gpl-2.0.html
 */

namespace PHP_Typography;

use PHP_Typography\Fixes\Registry;
use PHP_Typography\Fixes\Default_Registry;

/**
 * Parses HTML5 (or plain text) and applies various typographic fixes to the text.
 *
 * If used with multibyte language, UTF-8 encoding is required.
 *
 * Portions of this code have been inspired by:
 *  - typogrify (https://code.google.com/p/typogrify/)
 *  - WordPress code for wptexturize (https://developer.wordpress.org/reference/functions/wptexturize/)
 *  - PHP SmartyPants Typographer (https://michelf.ca/projects/php-smartypants/typographer/)
 *
 *  @author Jeffrey D. King <[email protected]>
 *  @author Peter Putzer <[email protected]>
 */
class PHP_Typography {

	/**
	 * A DOM-based HTML5 parser.
	 *
	 * @var \Masterminds\HTML5
	 */
	private $html5_parser;

	/**
	 * The hyphenator cache.
	 *
	 * @var Hyphenator\Cache
	 */
	protected $hyphenator_cache;

	/**
	 * The node fixes registry.
	 *
	 * @var Registry|null;
	 */
	private $registry;

	/**
	 * Whether the Hyphenator\Cache of the $registry needs to be updated.
	 *
	 * @var bool
	 */
	private $update_registry_cache;

	/**
	 * Sets up a new PHP_Typography object.
	 *
	 * @param Registry|null $registry Optional. A fix registry instance. Default null,
	 *                                meaning the default fixes are used.
	 */
	public function __construct( Registry $registry = null ) {
		$this->registry              = $registry;
		$this->update_registry_cache = ! empty( $registry );
	}

	/**
	 * Modifies $html according to the defined settings.
	 *
	 * @since 6.0.0 Parameter $body_classes added.
	 *
	 * @param string   $html         A HTML fragment.
	 * @param Settings $settings     A settings object.
	 * @param bool     $is_title     Optional. If the HTML fragment is a title. Default false.
	 * @param string[] $body_classes Optional. CSS classes added to the virtual
	 *                               <body> element used for processing. Default [].
	 *
	 * @return string The processed $html.
	 */
	public function process( $html, Settings $settings, $is_title = false, array $body_classes = [] ) {
		return $this->process_textnodes( $html, function( $html, $settings, $is_title ) {
			$this->get_registry()->apply_fixes( $html, $settings, $is_title, false );
		}, $settings, $is_title, $body_classes );
	}

	/**
	 * Modifies $html according to the defined settings, in a way that is appropriate for RSS feeds
	 * (i.e. excluding processes that may not display well with limited character set intelligence).
	 *
	 * @since 6.0.0 Parameter $body_classes added.
	 *
	 * @param string   $html         A HTML fragment.
	 * @param Settings $settings     A settings object.
	 * @param bool     $is_title     Optional. If the HTML fragment is a title. Default false.
	 * @param string[] $body_classes Optional. CSS classes added to the virtual
	 *                               <body> element used for processing. Default [].
	 *
	 * @return string The processed $html.
	 */
	public function process_feed( $html, Settings $settings, $is_title = false, array $body_classes = [] ) {
		return $this->process_textnodes( $html, function( $html, $settings, $is_title ) {
			$this->get_registry()->apply_fixes( $html, $settings, $is_title, true );
		}, $settings, $is_title, $body_classes );
	}

	/**
	 * Applies specific fixes to all textnodes of the HTML fragment.
	 *
	 * @since 6.0.0 Parameter $body_classes added.
	 *
	 * @param string   $html         A HTML fragment.
	 * @param callable $fixer        A callback that applies typography fixes to a single textnode.
	 * @param Settings $settings     A settings object.
	 * @param bool     $is_title     Optional. If the HTML fragment is a title. Default false.
	 * @param string[] $body_classes Optional. CSS classes added to the virtual
	 *                               <body> element used for processing. Default [].
	 *
	 * @return string The processed $html.
	 */
	public function process_textnodes( $html, callable $fixer, Settings $settings, $is_title = false, array $body_classes = [] ) {
		if ( isset( $settings['ignoreTags'] ) && $is_title && ( \in_array( 'h1', /** Array. @scrutinizer ignore-type */ $settings['ignoreTags'], true ) || \in_array( 'h2', /** Array. @scrutinizer ignore-type */ $settings['ignoreTags'], true ) ) ) {
			return $html;
		}

		// Lazy-load our parser (the text parser is not needed for feeds).
		$html5_parser = $this->get_html5_parser();

		// Parse the HTML.
		$dom = $this->parse_html( $html5_parser, $html, $settings, $body_classes );

		// Abort if there were parsing errors.
		if ( empty( $dom ) ) {
			return $html;
		}

		// Query some nodes in the DOM.
		$xpath          = new \DOMXPath( $dom );
		$body_node      = $xpath->query( '/html/body' )->item( 0 );
		$tags_to_ignore = $this->query_tags_to_ignore( $xpath, $body_node, $settings );

		// Start processing.
		foreach ( $xpath->query( '//text()', $body_node ) as $textnode ) {
			if (
				// One of the ancestors should be ignored.
				self::arrays_intersect( DOM::get_ancestors( $textnode ), $tags_to_ignore ) ||
				// The node contains only whitespace.
				$textnode->isWhitespaceInElementContent()
			) {
				continue;
			}

			// Apply fixes.
			$fixer( $textnode, $settings, $is_title );

			// Until now, we've only been working on a textnode: HTMLify result.
			$this->replace_node_with_html( $textnode, $textnode->data );
		}

		return $html5_parser->saveHTML( $body_node->childNodes );
	}

	/**
	 * Determines whether two object arrays intersect. The second array is expected
	 * to use the spl_object_hash for its keys.
	 *
	 * @param array $array1 The keys are ignored.
	 * @param array $array2 This array has to be in the form ( $spl_object_hash => $object ).
	 *
	 * @return boolean
	 */
	protected static function arrays_intersect( array $array1, array $array2 ) {
		foreach ( $array1 as $value ) {
			if ( isset( $array2[ \spl_object_hash( $value ) ] ) ) {
				return true;
			}
		}

		return false;
	}

	/**
	 * Parse HTML5 fragment while ignoring certain warnings for invalid HTML code (e.g. duplicate IDs).
	 *
	 * @since 6.0.0 Parameter $body_classes added.
	 *
	 * @param \Masterminds\HTML5 $parser       An intialized parser object.
	 * @param string             $html         The HTML fragment to parse (not a complete document).
	 * @param Settings           $settings     The settings to apply.
	 * @param string[]           $body_classes Optional. CSS classes added to the virtual
	 *                                         <body> element used for processing. Default [].
	 *
	 * @return \DOMDocument|null The encoding has already been set to UTF-8. Returns null if there were parsing errors.
	 */
	public function parse_html( \Masterminds\HTML5 $parser, $html, Settings $settings, array $body_classes = [] ) {
		// Silence some parsing errors for invalid HTML.
		\set_error_handler( [ $this, 'handle_parsing_errors' ] ); // @codingStandardsIgnoreLine
		$xml_error_handling = \libxml_use_internal_errors( true );

		// Inject <body> classes.
		$body = empty( $body_classes ) ? 'body' : 'body class="' . \implode( ' ', $body_classes ) . '"';

		// Do the actual parsing.
		$dom           = $parser->loadHTML( "<!DOCTYPE html><html><{$body}>{$html}</body></html>" );
		$dom->encoding = 'UTF-8';

		// Restore original error handling.
		\libxml_clear_errors();
		\libxml_use_internal_errors( $xml_error_handling );
		\restore_error_handler();

		// Handle any parser errors.
		$errors = $parser->getErrors();
		if ( ! empty( $settings['parserErrorsHandler'] ) && ! empty( $errors ) ) {
			$errors = $settings['parserErrorsHandler']( $errors );
		}

		// Return null if there are still unhandled parsing errors.
		if ( ! empty( $errors ) && ! $settings['parserErrorsIgnore'] ) {
			$dom = null;
		}

		return $dom;
	}

	/**
	 * Silently handle certain HTML parsing errors.
	 *
	 * @since 6.0.0 Unused parameters $errline and $errcontext removed.
	 *
	 * @param int    $errno      Error number.
	 * @param string $errstr     Error message.
	 * @param string $errfile    The file in which the error occurred.
	 *
	 * @return boolean Returns true if the error was handled, false otherwise.
	 */
	public function handle_parsing_errors( $errno, $errstr, $errfile ) {
		if ( ! ( \error_reporting() & $errno ) ) { // @codingStandardsIgnoreLine.
			return true; // not interesting.
		}

		// Ignore warnings from parser & let PHP handle the rest.
		return $errno & E_USER_WARNING && 0 === \substr_compare( $errfile, 'DOMTreeBuilder.php', -18 );
	}

	/**
	 * Retrieves an array of nodes that should be skipped during processing.
	 *
	 * @param \DOMXPath $xpath        A valid XPath instance for the DOM to be queried.
	 * @param \DOMNode  $initial_node The starting node of the XPath query.
	 * @param Settings  $settings     The settings to apply.
	 *
	 * @return \DOMNode[] An array of \DOMNode (can be empty).
	 */
	public function query_tags_to_ignore( \DOMXPath $xpath, \DOMNode $initial_node, Settings $settings ) {
		$elements    = [];
		$query_parts = [];
		if ( ! empty( $settings['ignoreTags'] ) ) {
			$query_parts[] = '//' . \implode( ' | //', /** Array. @scrutinizer ignore-type */ $settings['ignoreTags'] );
		}
		if ( ! empty( $settings['ignoreClasses'] ) ) {
			$query_parts[] = "//*[contains(concat(' ', @class, ' '), ' " . \implode( " ') or contains(concat(' ', @class, ' '), ' ", /** Array. @scrutinizer ignore-type */ $settings['ignoreClasses'] ) . " ')]";
		}
		if ( ! empty( $settings['ignoreIDs'] ) ) {
			$query_parts[] = '//*[@id=\'' . \implode( '\' or @id=\'', /** Array. @scrutinizer ignore-type */ $settings['ignoreIDs'] ) . '\']';
		}

		if ( ! empty( $query_parts ) ) {
			$ignore_query = \implode( ' | ', $query_parts );

			$nodelist = $xpath->query( $ignore_query, $initial_node );
			if ( false !== $nodelist ) {
				$elements = DOM::nodelist_to_array( $nodelist );
			}
		}

		return $elements;
	}

	/**
	 * Replaces the given node with HTML content. Uses the HTML5 parser.
	 *
	 * @param \DOMNode $node    The node to replace.
	 * @param string   $content The HTML fragment used to replace the node.
	 *
	 * @return \DOMNode|array An array of \DOMNode containing the new nodes or the old \DOMNode if the replacement failed.
	 */
	public function replace_node_with_html( \DOMNode $node, $content ) {
		$result = $node;

		$parent = $node->parentNode;
		if ( empty( $parent ) ) {
			return $node; // abort early to save cycles.
		}

		\set_error_handler( [ $this, 'handle_parsing_errors' ] ); // @codingStandardsIgnoreLine.

		$html_fragment = $this->get_html5_parser()->loadHTMLFragment( $content );
		if ( ! empty( $html_fragment ) ) {
			$imported_fragment = $node->ownerDocument->importNode( $html_fragment, true );

			if ( ! empty( $imported_fragment ) ) {
				// Save the children of the imported DOMDocumentFragment before replacement.
				$children = DOM::nodelist_to_array( $imported_fragment->childNodes );

				if ( false !== $parent->replaceChild( $imported_fragment, $node ) ) {
					// Success! We return the saved array of DOMNodes as
					// $imported_fragment is just an empty DOMDocumentFragment now.
					$result = $children;
				}
			}
		}

		\restore_error_handler();

		return $result;
	}

	/**
	 * Retrieves the fix registry.
	 *
	 * @return Registry
	 */
	public function get_registry() {
		if ( ! isset( $this->registry ) ) {
			$this->registry = new Default_Registry( $this->get_hyphenator_cache() );
		} elseif ( $this->update_registry_cache ) {
			$this->registry->update_hyphenator_cache( $this->get_hyphenator_cache() );
			$this->update_registry_cache = false;
		}

		return $this->registry;
	}

	/**
	 * Retrieves the HTML5 parser instance.
	 *
	 * @return \Masterminds\HTML5
	 */
	public function get_html5_parser() {
		// Lazy-load HTML5 parser.
		if ( ! isset( $this->html5_parser ) ) {
			$this->html5_parser = new \Masterminds\HTML5( [
				'disable_html_ns' => true,
			] );
		}

		return $this->html5_parser;
	}

	/**
	 * Retrieves the hyphenator cache.
	 *
	 * @return Hyphenator\Cache
	 */
	public function get_hyphenator_cache() {
		if ( ! isset( $this->hyphenator_cache ) ) {
			$this->hyphenator_cache = new Hyphenator\Cache();
		}

		return $this->hyphenator_cache;
	}

	/**
	 * Injects an existing Hyphenator\Cache (to facilitate persistent language caching).
	 *
	 * @param Hyphenator\Cache $cache A hyphenator cache instance.
	 */
	public function set_hyphenator_cache( Hyphenator\Cache $cache ) {
		$this->hyphenator_cache = $cache;

		// Change hyphenator cache for existing token fixes.
		if ( isset( $this->registry ) ) {
			$this->registry->update_hyphenator_cache( $cache );
		}
	}

	/**
	 * Retrieves the list of valid language plugins in the given directory.
	 *
	 * @param string $path The path in which to look for language plugin files.
	 *
	 * @return string[] An array in the form ( $language_code => $language_name ).
	 */
	private static function get_language_plugin_list( $path ) {
		$languages = [];

		// Try to open the given directory.
		$handle = \opendir( $path );
		if ( false === $handle ) {
			return $languages; // Abort.
		}

		// Read all files in directory.
		$file = \readdir( $handle );
		while ( $file ) {
			// We only want the JSON files.
			if ( '.json' === \substr( $file, -5 ) ) {
				$file_content = \file_get_contents( $path . $file );
				if ( \preg_match( '/"language"\s*:\s*((".+")|(\'.+\'))\s*,/', $file_content, $matches ) ) {
					$language_name = \substr( $matches[1], 1, -1 );
					$language_code = \substr( $file, 0, -5 );

					$languages[ $language_code ] = $language_name;
				}
			}

			// Read next file.
			$file = \readdir( $handle );
		}
		\closedir( $handle );

		// Sort translated language names according to current locale.
		\asort( $languages );

		return $languages;
	}

	/**
	 * Retrieves the list of valid hyphenation languages.
	 *
	 * Note that this method reads all the language files on disc, so you should
	 * cache the results if possible.
	 *
	 * @return string[] An array in the form of ( LANG_CODE => LANGUAGE ).
	 */
	public static function get_hyphenation_languages() {
		return self::get_language_plugin_list( __DIR__ . '/lang/' );
	}

	/**
	 * Retrieves the list of valid diacritic replacement languages.
	 *
	 * Note that this method reads all the language files on disc, so you should
	 * cache the results if possible.
	 *
	 * @return string[] An array in the form of ( LANG_CODE => LANGUAGE ).
	 */
	public static function get_diacritic_languages() {
		return self::get_language_plugin_list( __DIR__ . '/diacritics/' );
	}
}


Scrutinizer GitHub App not installed

GitHub Access Token became invalid

Pull Request — master (#56)

PHP_Typography::get_registry() A

Complexity

Size

Duplication

Code Coverage

Importance

1		<?php
2		/**
3		* This file is part of PHP-Typography.
4		*
5		* Copyright 2014-2017 Peter Putzer.
6		* Copyright 2009-2011 KINGdesk, LLC.
7		*
8		* This program is free software; you can redistribute it and/or modify
9		* it under the terms of the GNU General Public License as published by
10		* the Free Software Foundation; either version 2 of the License, or
11		* (at your option) any later version.
12		*
13		* This program is distributed in the hope that it will be useful,
14		* but WITHOUT ANY WARRANTY; without even the implied warranty of
15		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16		* GNU General Public License for more details.
17		*
18		* You should have received a copy of the GNU General Public License along
19		* with this program; if not, write to the Free Software Foundation, Inc.,
20		* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21		*
22		* ***
23		*
24		* @package mundschenk-at/php-typography
25		* @license http://www.gnu.org/licenses/gpl-2.0.html
26		*/
27
28		namespace PHP_Typography;
29
30		use PHP_Typography\Fixes\Registry;
31		use PHP_Typography\Fixes\Default_Registry;
32
33		/**
34		* Parses HTML5 (or plain text) and applies various typographic fixes to the text.
35		*
36		* If used with multibyte language, UTF-8 encoding is required.
37		*
38		* Portions of this code have been inspired by:
39		* - typogrify (https://code.google.com/p/typogrify/)
40		* - WordPress code for wptexturize (https://developer.wordpress.org/reference/functions/wptexturize/)
41		* - PHP SmartyPants Typographer (https://michelf.ca/projects/php-smartypants/typographer/)
42		*
43		* @author Jeffrey D. King <[email protected]>
44		* @author Peter Putzer <[email protected]>
45		*/
46		class PHP_Typography {
47
48		/**
49		* A DOM-based HTML5 parser.
50		*
51		* @var \Masterminds\HTML5
52		*/
53		private $html5_parser;
54
55		/**
56		* The hyphenator cache.
57		*
58		* @var Hyphenator\Cache
59		*/
60		protected $hyphenator_cache;
61
62		/**
63		* The node fixes registry.
64		*
65		* @var Registry\|null;
66		*/
67		private $registry;
68
69		/**
70		* Whether the Hyphenator\Cache of the $registry needs to be updated.
71		*
72		* @var bool
73		*/
74		private $update_registry_cache;
75
76		/**
77		* Sets up a new PHP_Typography object.
78		*
79		* @param Registry\|null $registry Optional. A fix registry instance. Default null,
80		* meaning the default fixes are used.
81		*/
82	1	public function __construct( Registry $registry = null ) {
83	1	$this->registry = $registry;
84	1	$this->update_registry_cache = ! empty( $registry );
85	1	}
86
87		/**
88		* Modifies $html according to the defined settings.
89		*
90		* @since 6.0.0 Parameter $body_classes added.
91		*
92		* @param string $html A HTML fragment.
93		* @param Settings $settings A settings object.
94		* @param bool $is_title Optional. If the HTML fragment is a title. Default false.
95		* @param string[] $body_classes Optional. CSS classes added to the virtual
96		* <body> element used for processing. Default [].
97		*
98		* @return string The processed $html.
99		*/
100		public function process( $html, Settings $settings, $is_title = false, array $body_classes = [] ) {
101	27	return $this->process_textnodes( $html, function( $html, $settings, $is_title ) {
102	20	$this->get_registry()->apply_fixes( $html, $settings, $is_title, false );
103	27	}, $settings, $is_title, $body_classes );
104		}
105
106		/**
107		* Modifies $html according to the defined settings, in a way that is appropriate for RSS feeds
108		* (i.e. excluding processes that may not display well with limited character set intelligence).
109		*
110		* @since 6.0.0 Parameter $body_classes added.
111		*
112		* @param string $html A HTML fragment.
113		* @param Settings $settings A settings object.
114		* @param bool $is_title Optional. If the HTML fragment is a title. Default false.
115		* @param string[] $body_classes Optional. CSS classes added to the virtual
116		* <body> element used for processing. Default [].
117		*
118		* @return string The processed $html.
119		*/
120		public function process_feed( $html, Settings $settings, $is_title = false, array $body_classes = [] ) {
121	27	return $this->process_textnodes( $html, function( $html, $settings, $is_title ) {
122	20	$this->get_registry()->apply_fixes( $html, $settings, $is_title, true );
123	27	}, $settings, $is_title, $body_classes );
124		}
125
126		/**
127		* Applies specific fixes to all textnodes of the HTML fragment.
128		*
129		* @since 6.0.0 Parameter $body_classes added.
130		*
131		* @param string $html A HTML fragment.
132		* @param callable $fixer A callback that applies typography fixes to a single textnode.
133		* @param Settings $settings A settings object.
134		* @param bool $is_title Optional. If the HTML fragment is a title. Default false.
135		* @param string[] $body_classes Optional. CSS classes added to the virtual
136		* <body> element used for processing. Default [].
137		*
138		* @return string The processed $html.
139		*/
140	52	public function process_textnodes( $html, callable $fixer, Settings $settings, $is_title = false, array $body_classes = [] ) {
141	52	if ( isset( $settings['ignoreTags'] ) && $is_title && ( \in_array( 'h1', /** Array. @scrutinizer ignore-type / $settings['ignoreTags'], true ) \|\| \in_array( 'h2', /* Array. @scrutinizer ignore-type */ $settings['ignoreTags'], true ) ) ) {
142	25	return $html;
143		}
144
145		// Lazy-load our parser (the text parser is not needed for feeds).
146	27	$html5_parser = $this->get_html5_parser();
147
148		// Parse the HTML.
149	27	$dom = $this->parse_html( $html5_parser, $html, $settings, $body_classes );
150
151		// Abort if there were parsing errors.
152	27	if ( empty( $dom ) ) {
153	2	return $html;
154		}
155
156		// Query some nodes in the DOM.
157	25	$xpath = new \DOMXPath( $dom );
158	25	$body_node = $xpath->query( '/html/body' )->item( 0 );
159	25	$tags_to_ignore = $this->query_tags_to_ignore( $xpath, $body_node, $settings );
160
161		// Start processing.
162	25	foreach ( $xpath->query( '//text()', $body_node ) as $textnode ) {
163		if (
164		// One of the ancestors should be ignored.
165	22	self::arrays_intersect( DOM::get_ancestors( $textnode ), $tags_to_ignore ) \|\|
166		// The node contains only whitespace.
167	22	$textnode->isWhitespaceInElementContent()
168		) {
169	3	continue;
170		}
171
172		// Apply fixes.
173	19	$fixer( $textnode, $settings, $is_title );
174
175		// Until now, we've only been working on a textnode: HTMLify result.
176	19	$this->replace_node_with_html( $textnode, $textnode->data );
177		}
178
179	25	return $html5_parser->saveHTML( $body_node->childNodes );
180		}
181
182		/**
183		* Determines whether two object arrays intersect. The second array is expected
184		* to use the spl_object_hash for its keys.
185		*
186		* @param array $array1 The keys are ignored.
187		* @param array $array2 This array has to be in the form ( $spl_object_hash => $object ).
188		*
189		* @return boolean
190		*/
191	4	protected static function arrays_intersect( array $array1, array $array2 ) {
192	4	foreach ( $array1 as $value ) {
193	2	if ( isset( $array2[ \spl_object_hash( $value ) ] ) ) {
194	2	return true;
195		}
196		}
197
198	3	return false;
199		}
200
201		/**
202		* Parse HTML5 fragment while ignoring certain warnings for invalid HTML code (e.g. duplicate IDs).
203		*
204		* @since 6.0.0 Parameter $body_classes added.
205		*
206		* @param \Masterminds\HTML5 $parser An intialized parser object.
207		* @param string $html The HTML fragment to parse (not a complete document).
208		* @param Settings $settings The settings to apply.
209		* @param string[] $body_classes Optional. CSS classes added to the virtual
210		* <body> element used for processing. Default [].
211		*
212		* @return \DOMDocument\|null The encoding has already been set to UTF-8. Returns null if there were parsing errors.
213		*/
214	53	public function parse_html( \Masterminds\HTML5 $parser, $html, Settings $settings, array $body_classes = [] ) {
215		// Silence some parsing errors for invalid HTML.
216	53	\set_error_handler( [ $this, 'handle_parsing_errors' ] ); // @codingStandardsIgnoreLine
217	53	$xml_error_handling = \libxml_use_internal_errors( true );
218
219		// Inject <body> classes.
220	53	$body = empty( $body_classes ) ? 'body' : 'body class="' . \implode( ' ', $body_classes ) . '"';
221
222		// Do the actual parsing.
223	53	$dom = $parser->loadHTML( "<!DOCTYPE html><html><{$body}>{$html}</body></html>" );
224	53	$dom->encoding = 'UTF-8';
225
226		// Restore original error handling.
227	53	\libxml_clear_errors();
228	53	\libxml_use_internal_errors( $xml_error_handling );
229	53	\restore_error_handler();
230
231		// Handle any parser errors.
232	53	$errors = $parser->getErrors();
233	53	if ( ! empty( $settings['parserErrorsHandler'] ) && ! empty( $errors ) ) {
234	2	$errors = $settings['parserErrorsHandler']( $errors );
235		}
236
237		// Return null if there are still unhandled parsing errors.
238	53	if ( ! empty( $errors ) && ! $settings['parserErrorsIgnore'] ) {
239	2	$dom = null;
240		}
241
242	53	return $dom;
243		}
244
245		/**
246		* Silently handle certain HTML parsing errors.
247		*
248		* @since 6.0.0 Unused parameters $errline and $errcontext removed.
249		*
250		* @param int $errno Error number.
251		* @param string $errstr Error message.
252		* @param string $errfile The file in which the error occurred.
253		*
254		* @return boolean Returns true if the error was handled, false otherwise.
255		*/
256	4	public function handle_parsing_errors( $errno, $errstr, $errfile ) {
257	4	if ( ! ( \error_reporting() & $errno ) ) { // @codingStandardsIgnoreLine.
258	4	return true; // not interesting.
259		}
260
261		// Ignore warnings from parser & let PHP handle the rest.
262	4	return $errno & E_USER_WARNING && 0 === \substr_compare( $errfile, 'DOMTreeBuilder.php', -18 );
263		}
264
265		/**
266		* Retrieves an array of nodes that should be skipped during processing.
267		*
268		* @param \DOMXPath $xpath A valid XPath instance for the DOM to be queried.
269		* @param \DOMNode $initial_node The starting node of the XPath query.
270		* @param Settings $settings The settings to apply.
271		*
272		* @return \DOMNode[] An array of \DOMNode (can be empty).
273		*/
274	1	public function query_tags_to_ignore( \DOMXPath $xpath, \DOMNode $initial_node, Settings $settings ) {
275	1	$elements = [];
276	1	$query_parts = [];
277	1	if ( ! empty( $settings['ignoreTags'] ) ) {
278	1	$query_parts[] = '//' . \implode( ' \| //', /** Array. @scrutinizer ignore-type */ $settings['ignoreTags'] );
279		}
280	1	if ( ! empty( $settings['ignoreClasses'] ) ) {
281	1	$query_parts[] = "//[contains(concat(' ', @class, ' '), ' " . \implode( " ') or contains(concat(' ', @class, ' '), ' ", /* Array. @scrutinizer ignore-type */ $settings['ignoreClasses'] ) . " ')]";
282		}
283	1	if ( ! empty( $settings['ignoreIDs'] ) ) {
284	1	$query_parts[] = '//[@id=\'' . \implode( '\' or @id=\'', /* Array. @scrutinizer ignore-type */ $settings['ignoreIDs'] ) . '\']';
285		}
286
287	1	if ( ! empty( $query_parts ) ) {
288	1	$ignore_query = \implode( ' \| ', $query_parts );
289
290	1	$nodelist = $xpath->query( $ignore_query, $initial_node );
291	1	if ( false !== $nodelist ) {
292	1	$elements = DOM::nodelist_to_array( $nodelist );
293		}
294		}
295
296	1	return $elements;
297		}
298
299		/**
300		* Replaces the given node with HTML content. Uses the HTML5 parser.
301		*
302		* @param \DOMNode $node The node to replace.
303		* @param string $content The HTML fragment used to replace the node.
304		*
305		* @return \DOMNode\|array An array of \DOMNode containing the new nodes or the old \DOMNode if the replacement failed.
306		*/
307	2	public function replace_node_with_html( \DOMNode $node, $content ) {
308	2	$result = $node;
309
310	2	$parent = $node->parentNode;
311	2	if ( empty( $parent ) ) {
312	1	return $node; // abort early to save cycles.
313		}
314
315	1	\set_error_handler( [ $this, 'handle_parsing_errors' ] ); // @codingStandardsIgnoreLine.
316
317	1	$html_fragment = $this->get_html5_parser()->loadHTMLFragment( $content );
318	1	if ( ! empty( $html_fragment ) ) {
319	1	$imported_fragment = $node->ownerDocument->importNode( $html_fragment, true );
320
321	1	if ( ! empty( $imported_fragment ) ) {
322		// Save the children of the imported DOMDocumentFragment before replacement.
323	1	$children = DOM::nodelist_to_array( $imported_fragment->childNodes );
324
325	1	if ( false !== $parent->replaceChild( $imported_fragment, $node ) ) {
326		// Success! We return the saved array of DOMNodes as
327		// $imported_fragment is just an empty DOMDocumentFragment now.
328	1	$result = $children;
329		}
330		}
331		}
332
333	1	\restore_error_handler();
334
335	1	return $result;
336		}
337
338		/**
339		* Retrieves the fix registry.
340		*
341		* @return Registry
342		*/
343	2	public function get_registry() {
344	2	if ( ! isset( $this->registry ) ) {
345	1	$this->registry = new Default_Registry( $this->get_hyphenator_cache() );
346	1	} elseif ( $this->update_registry_cache ) {
347	1	$this->registry->update_hyphenator_cache( $this->get_hyphenator_cache() );
348	1	$this->update_registry_cache = false;
349		}
350
351	2	return $this->registry;
352		}
353
354		/**
355		* Retrieves the HTML5 parser instance.
356		*
357		* @return \Masterminds\HTML5
358		*/
359	1	public function get_html5_parser() {
360		// Lazy-load HTML5 parser.
361	1	if ( ! isset( $this->html5_parser ) ) {
362	1	$this->html5_parser = new \Masterminds\HTML5( [
363	1	'disable_html_ns' => true,
364		] );
365		}
366
367	1	return $this->html5_parser;
368		}
369
370		/**
371		* Retrieves the hyphenator cache.
372		*
373		* @return Hyphenator\Cache
374		*/
375	1	public function get_hyphenator_cache() {
376	1	if ( ! isset( $this->hyphenator_cache ) ) {
377	1	$this->hyphenator_cache = new Hyphenator\Cache();
378		}
379
380	1	return $this->hyphenator_cache;
381		}
382
383		/**
384		* Injects an existing Hyphenator\Cache (to facilitate persistent language caching).
385		*
386		* @param Hyphenator\Cache $cache A hyphenator cache instance.
387		*/
388	2	public function set_hyphenator_cache( Hyphenator\Cache $cache ) {
389	2	$this->hyphenator_cache = $cache;
390
391		// Change hyphenator cache for existing token fixes.
392	2	if ( isset( $this->registry ) ) {
393	1	$this->registry->update_hyphenator_cache( $cache );
394		}
395	2	}
396
397		/**
398		* Retrieves the list of valid language plugins in the given directory.
399		*
400		* @param string $path The path in which to look for language plugin files.
401		*
402		* @return string[] An array in the form ( $language_code => $language_name ).
403		*/
404	2	private static function get_language_plugin_list( $path ) {
405	2	$languages = [];
406
407		// Try to open the given directory.
408	2	$handle = \opendir( $path );
409	2	if ( false === $handle ) {
410		return $languages; // Abort.
411		}
412
413		// Read all files in directory.
414	2	$file = \readdir( $handle );
415	2	while ( $file ) {
416		// We only want the JSON files.
417	2	if ( '.json' === \substr( $file, -5 ) ) {
418	2	$file_content = \file_get_contents( $path . $file );
419	2	if ( \preg_match( '/"language"\s:\s((".+")\|(\'.+\'))\s*,/', $file_content, $matches ) ) {
420	2	$language_name = \substr( $matches[1], 1, -1 );
421	2	$language_code = \substr( $file, 0, -5 );
422
423	2	$languages[ $language_code ] = $language_name;
424		}
425		}
426
427		// Read next file.
428	2	$file = \readdir( $handle );
429		}
430	2	\closedir( $handle );
431
432		// Sort translated language names according to current locale.
433	2	\asort( $languages );
434
435	2	return $languages;
436		}
437
438		/**
439		* Retrieves the list of valid hyphenation languages.
440		*
441		* Note that this method reads all the language files on disc, so you should
442		* cache the results if possible.
443		*
444		* @return string[] An array in the form of ( LANG_CODE => LANGUAGE ).
445		*/
446	1	public static function get_hyphenation_languages() {
447	1	return self::get_language_plugin_list( __DIR__ . '/lang/' );
448		}
449
450		/**
451		* Retrieves the list of valid diacritic replacement languages.
452		*
453		* Note that this method reads all the language files on disc, so you should
454		* cache the results if possible.
455		*
456		* @return string[] An array in the form of ( LANG_CODE => LANGUAGE ).
457		*/
458	1	public static function get_diacritic_languages() {
459	1	return self::get_language_plugin_list( __DIR__ . '/diacritics/' );
460		}
461		}
462

mundschenk-at / php-typography

Scrutinizer GitHub App not installed

GitHub Access Token became invalid

Pull Request — master (#56)

PHP_Typography::get_registry() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like