HtmlFormatter - Code Metrics - Inspection of "Daily Inspection: Update OOjs UI to v0.16.5" - wikimedia/mediawiki - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Branch — master (d58858)

unknown

created 2016-04-07 17:09 UTC

HtmlFormatter B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	344
Duplicated Lines	0 %

Coupling/Cohesion

Components	1
Dependencies	2

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
wmc	49
c	1
b	0
f	0
lcom	1
cbo	2
dl	0
loc	344
rs	8.5454

14 Methods

Rating	Name	Size	Complexity
A	__construct()	3	1
A	wrapHTML()	3	1
A	onHtmlReady()	3	1
A	getDoc()	22	2
A	setRemoveMedia()	3	1
A	remove()	3	1
A	flatten()	3	1
A	flattenAllTags()	3	1
C	filterContent()	70	13
B	removeElements()	16	5
A	fixLibXML()	20	2
D	getText()	40	9
B	parseSelector()	19	7
B	parseItemsToRemove()	24	4

How to fix Complexity

<?php
/**
 * Performs transformations of HTML by wrapping around libxml2 and working
 * around its countless bugs.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */
class HtmlFormatter {
	/**
	 * @var DOMDocument
	 */
	private $doc;

	private $html;
	private $itemsToRemove = [];
	private $elementsToFlatten = [];
	protected $removeMedia = false;

	/**
	 * Constructor
	 *
	 * @param string $html Text to process
	 */
	public function __construct( $html ) {
		$this->html = $html;
	}

	/**
	 * Turns a chunk of HTML into a proper document
	 * @param string $html
	 * @return string
	 */
	public static function wrapHTML( $html ) {
		return '<!doctype html><html><head></head><body>' . $html . '</body></html>';
	}

	/**
	 * Override this in descendant class to modify HTML after it has been converted from DOM tree
	 * @param string $html HTML to process
	 * @return string Processed HTML
	 */
	protected function onHtmlReady( $html ) {
		return $html;
	}

	/**
	 * @return DOMDocument DOM to manipulate
	 */
	public function getDoc() {
		if ( !$this->doc ) {
			// DOMDocument::loadHTML isn't very good with encodings, so
			// convert input to ASCII by encoding everything above 128 as entities.
			$html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );

			// Workaround for bug that caused spaces before references
			// to disappear during processing: https://phabricator.wikimedia.org/T55086
			// TODO: Please replace with a better fix if one can be found.
			$html = str_replace( ' <', '&#32;<', $html );

			libxml_use_internal_errors( true );
			$loader = libxml_disable_entity_loader();
			$this->doc = new DOMDocument();
			$this->doc->strictErrorChecking = false;
			$this->doc->loadHTML( $html );
			libxml_disable_entity_loader( $loader );
			libxml_use_internal_errors( false );
			$this->doc->encoding = 'UTF-8';
		}
		return $this->doc;
	}

	/**
	 * Sets whether images/videos/sounds should be removed from output
	 * @param bool $flag
	 */
	public function setRemoveMedia( $flag = true ) {
		$this->removeMedia = $flag;
	}

	/**
	 * Adds one or more selector of content to remove. A subset of CSS selector
	 * syntax is supported:
	 *
	 *   <tag>
	 *   <tag>.class
	 *   .<class>
	 *   #<id>
	 *
	 * @param array|string $selectors Selector(s) of stuff to remove
	 */
	public function remove( $selectors ) {
		$this->itemsToRemove = array_merge( $this->itemsToRemove, (array)$selectors );
	}

	/**
	 * Adds one or more element name to the list to flatten (remove tag, but not its content)
	 * Can accept undelimited regexes
	 *
	 * Note this interface may fail in surprising unexpected ways due to usage of regexes,
	 * so should not be relied on for HTML markup security measures.
	 *
	 * @param array|string $elements Name(s) of tag(s) to flatten
	 */
	public function flatten( $elements ) {
		$this->elementsToFlatten = array_merge( $this->elementsToFlatten, (array)$elements );
	}

	/**
	 * Instructs the formatter to flatten all tags
	 */
	public function flattenAllTags() {
		$this->flatten( '[?!]?[a-z0-9]+' );
	}

	/**
	 * Removes content we've chosen to remove.  The text of the removed elements can be
	 * extracted with the getText method.
	 * @return array Array of removed DOMElements
	 */
	public function filterContent() {
		$removals = $this->parseItemsToRemove();

		// Bail out early if nothing to do
		if ( array_reduce( $removals,
			function ( $carry, $item ) {
				return $carry && !$item;
			},
			true
		) ) {
			return [];
		}

		$doc = $this->getDoc();

		// Remove tags

		// You can't remove DOMNodes from a DOMNodeList as you're iterating
		// over them in a foreach loop. It will seemingly leave the internal
		// iterator on the foreach out of wack and results will be quite
		// strange. Though, making a queue of items to remove seems to work.
		$domElemsToRemove = [];
		foreach ( $removals['TAG'] as $tagToRemove ) {
			$tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove );
			foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
				if ( $tagToRemoveNode ) {
					$domElemsToRemove[] = $tagToRemoveNode;
				}
			}
		}
		$removed = $this->removeElements( $domElemsToRemove );

		// Elements with named IDs
		$domElemsToRemove = [];
		foreach ( $removals['ID'] as $itemToRemove ) {
			$itemToRemoveNode = $doc->getElementById( $itemToRemove );
			if ( $itemToRemoveNode ) {
				$domElemsToRemove[] = $itemToRemoveNode;
			}
		}
		$removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );

		// CSS Classes
		$domElemsToRemove = [];
		$xpath = new DOMXPath( $doc );
		foreach ( $removals['CLASS'] as $classToRemove ) {
			$elements = $xpath->query( '//*[contains(@class, "' . $classToRemove . '")]' );

			/** @var $element DOMElement */
			foreach ( $elements as $element ) {
				$classes = $element->getAttribute( 'class' );
				if ( preg_match( "/\b$classToRemove\b/", $classes ) && $element->parentNode ) {
					$domElemsToRemove[] = $element;
				}
			}
		}
		$removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );

		// Tags with CSS Classes
		foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
			$parts = explode( '.', $classToRemove );

			$elements = $xpath->query(
				'//' . $parts[0] . '[@class="' . $parts[1] . '"]'
			);
			$removed = array_merge( $removed, $this->removeElements( $elements ) );
		}

		return $removed;
	}

	/**
	 * Removes a list of elelments from DOMDocument
	 * @param array|DOMNodeList $elements
	 * @return array Array of removed elements
	 */
	private function removeElements( $elements ) {
		$list = $elements;
		if ( $elements instanceof DOMNodeList ) {
			$list = [];
			foreach ( $elements as $element ) {
				$list[] = $element;
			}
		}
		/** @var $element DOMElement */
		foreach ( $list as $element ) {
			if ( $element->parentNode ) {
				$element->parentNode->removeChild( $element );
			}
		}
		return $list;
	}

	/**
	 * libxml in its usual pointlessness converts many chars to entities - this function
	 * perfoms a reverse conversion
	 * @param string $html
	 * @return string
	 */
	private function fixLibXML( $html ) {
		static $replacements;
		if ( !$replacements ) {
			// We don't include rules like '&#34;' => '&amp;quot;' because entities had already been
			// normalized by libxml. Using this function with input not sanitized by libxml is UNSAFE!
			$replacements = new ReplacementArray( [
				'&quot;' => '&amp;quot;',
				'&amp;' => '&amp;amp;',
				'&lt;' => '&amp;lt;',
				'&gt;' => '&amp;gt;',
			] );
		}
		$html = $replacements->replace( $html );

		// Just in case the conversion in getDoc() above used named
		// entities that aren't known to html_entity_decode().
		$html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );

		return $html;
	}

	/**
	 * Performs final transformations and returns resulting HTML.  Note that if you want to call this
	 * both without an element and with an element you should call it without an element first.  If you
	 * specify the $element in the method it'll change the underlying dom and you won't be able to get
	 * it back.
	 *
	 * @param DOMElement|string|null $element ID of element to get HTML from or
	 *   false to get it from the whole tree
	 * @return string Processed HTML
	 */
	public function getText( $element = null ) {

		if ( $this->doc ) {
			if ( $element !== null && !( $element instanceof DOMElement ) ) {
				$element = $this->doc->getElementById( $element );
			}
			if ( $element ) {
				$body = $this->doc->getElementsByTagName( 'body' )->item( 0 );
				$nodesArray = [];
				foreach ( $body->childNodes as $node ) {
					$nodesArray[] = $node;
				}
				foreach ( $nodesArray as $nodeArray ) {
					$body->removeChild( $nodeArray );
				}
				$body->appendChild( $element );
			}
			$html = $this->doc->saveHTML();

			$html = $this->fixLibXML( $html );
			if ( wfIsWindows() ) {
				// Cleanup for CRLF misprocessing of unknown origin on Windows.
				// If this error continues in the future, please track it down in the
				// XML code paths if possible and fix there.
				$html = str_replace( '&#13;', '', $html );
			}
		} else {
			$html = $this->html;
		}
		// Remove stuff added by wrapHTML()
		$html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html );
		$html = $this->onHtmlReady( $html );

		if ( $this->elementsToFlatten ) {

			$elements = implode( '|', $this->elementsToFlatten );
			$html = preg_replace( "#</?($elements)\\b[^>]*>#is", '', $html );
		}

		return $html;
	}

	/**
	 * Helper function for parseItemsToRemove(). This function extracts the selector type
	 * and the raw name of a selector from a CSS-style selector string and assigns those
	 * values to parameters passed by reference. For example, if given '#toc' as the
	 * $selector parameter, it will assign 'ID' as the $type and 'toc' as the $rawName.
	 * @param string $selector CSS selector to parse
	 * @param string $type The type of selector (ID, CLASS, TAG_CLASS, or TAG)
	 * @param string $rawName The raw name of the selector
	 * @return bool Whether the selector was successfully recognised
	 * @throws MWException
	 */
	protected function parseSelector( $selector, &$type, &$rawName ) {
		if ( strpos( $selector, '.' ) === 0 ) {
			$type = 'CLASS';
			$rawName = substr( $selector, 1 );
		} elseif ( strpos( $selector, '#' ) === 0 ) {
			$type = 'ID';
			$rawName = substr( $selector, 1 );
		} elseif ( strpos( $selector, '.' ) !== 0 && strpos( $selector, '.' ) !== false ) {
			$type = 'TAG_CLASS';
			$rawName = $selector;
		} elseif ( strpos( $selector, '[' ) === false && strpos( $selector, ']' ) === false ) {
			$type = 'TAG';
			$rawName = $selector;
		} else {
			throw new MWException( __METHOD__ . "(): unrecognized selector '$selector'" );
		}

		return true;
	}

	/**
	 * Transforms CSS-style selectors into an internal representation suitable for
	 * processing by filterContent()
	 * @return array
	 */
	protected function parseItemsToRemove() {
		$removals = [
			'ID' => [],
			'TAG' => [],
			'CLASS' => [],
			'TAG_CLASS' => [],
		];

		foreach ( $this->itemsToRemove as $itemToRemove ) {
			$type = '';
			$rawName = '';
			if ( $this->parseSelector( $itemToRemove, $type, $rawName ) ) {
				$removals[$type][] = $rawName;
			}
		}

		if ( $this->removeMedia ) {
			$removals['TAG'][] = 'img';
			$removals['TAG'][] = 'audio';
			$removals['TAG'][] = 'video';
		}

		return $removals;
	}
}


1			<?php
2			/**
3			* Performs transformations of HTML by wrapping around libxml2 and working
4			* around its countless bugs.
5			*
6			* This program is free software; you can redistribute it and/or modify
7			* it under the terms of the GNU General Public License as published by
8			* the Free Software Foundation; either version 2 of the License, or
9			* (at your option) any later version.
10			*
11			* This program is distributed in the hope that it will be useful,
12			* but WITHOUT ANY WARRANTY; without even the implied warranty of
13			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			* GNU General Public License for more details.
15			*
16			* You should have received a copy of the GNU General Public License along
17			* with this program; if not, write to the Free Software Foundation, Inc.,
18			* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19			* http://www.gnu.org/copyleft/gpl.html
20			*
21			* @file
22			*/
23			class HtmlFormatter {
24			/**
25			* @var DOMDocument
26			*/
27			private $doc;
28
29			private $html;
30			private $itemsToRemove = [];
31			private $elementsToFlatten = [];
32			protected $removeMedia = false;
33
34			/**
35			* Constructor
36			*
37			* @param string $html Text to process
38			*/
39			public function __construct( $html ) {
40			$this->html = $html;
41			}
42
43			/**
44			* Turns a chunk of HTML into a proper document
45			* @param string $html
46			* @return string
47			*/
48			public static function wrapHTML( $html ) {
49			return '<!doctype html><html><head></head><body>' . $html . '</body></html>';
50			}
51
52			/**
53			* Override this in descendant class to modify HTML after it has been converted from DOM tree
54			* @param string $html HTML to process
55			* @return string Processed HTML
56			*/
57			protected function onHtmlReady( $html ) {
58			return $html;
59			}
60
61			/**
62			* @return DOMDocument DOM to manipulate
63			*/
64			public function getDoc() {
65			if ( !$this->doc ) {
66			// DOMDocument::loadHTML isn't very good with encodings, so
67			// convert input to ASCII by encoding everything above 128 as entities.
68			$html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
69
70			// Workaround for bug that caused spaces before references
71			// to disappear during processing: https://phabricator.wikimedia.org/T55086
72			// TODO: Please replace with a better fix if one can be found.
73			$html = str_replace( ' <', ' <', $html );
74
75			libxml_use_internal_errors( true );
76			$loader = libxml_disable_entity_loader();
77			$this->doc = new DOMDocument();
78			$this->doc->strictErrorChecking = false;
79			$this->doc->loadHTML( $html );
80			libxml_disable_entity_loader( $loader );
81			libxml_use_internal_errors( false );
82			$this->doc->encoding = 'UTF-8';
83			}
84			return $this->doc;
85			}
86
87			/**
88			* Sets whether images/videos/sounds should be removed from output
89			* @param bool $flag
90			*/
91			public function setRemoveMedia( $flag = true ) {
92			$this->removeMedia = $flag;
93			}
94
95			/**
96			* Adds one or more selector of content to remove. A subset of CSS selector
97			* syntax is supported:
98			*
99			* <tag>
100			* <tag>.class
101			* .<class>
102			* #<id>
103			*
104			* @param array\|string $selectors Selector(s) of stuff to remove
105			*/
106			public function remove( $selectors ) {
107			$this->itemsToRemove = array_merge( $this->itemsToRemove, (array)$selectors );
108			}
109
110			/**
111			* Adds one or more element name to the list to flatten (remove tag, but not its content)
112			* Can accept undelimited regexes
113			*
114			* Note this interface may fail in surprising unexpected ways due to usage of regexes,
115			* so should not be relied on for HTML markup security measures.
116			*
117			* @param array\|string $elements Name(s) of tag(s) to flatten
118			*/
119			public function flatten( $elements ) {
120			$this->elementsToFlatten = array_merge( $this->elementsToFlatten, (array)$elements );
121			}
122
123			/**
124			* Instructs the formatter to flatten all tags
125			*/
126			public function flattenAllTags() {
127			$this->flatten( '[?!]?[a-z0-9]+' );
128			}
129
130			/**
131			* Removes content we've chosen to remove. The text of the removed elements can be
132			* extracted with the getText method.
133			* @return array Array of removed DOMElements
134			*/
135			public function filterContent() {
136			$removals = $this->parseItemsToRemove();
137
138			// Bail out early if nothing to do
139			if ( array_reduce( $removals,
140			function ( $carry, $item ) {
141			return $carry && !$item;
142			},
143			true
144			) ) {
145			return [];
146			}
147
148			$doc = $this->getDoc();
149
150			// Remove tags
151
152			// You can't remove DOMNodes from a DOMNodeList as you're iterating
153			// over them in a foreach loop. It will seemingly leave the internal
154			// iterator on the foreach out of wack and results will be quite
155			// strange. Though, making a queue of items to remove seems to work.
156			$domElemsToRemove = [];
157			foreach ( $removals['TAG'] as $tagToRemove ) {
158			$tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove );
159			foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
160			if ( $tagToRemoveNode ) {
161			$domElemsToRemove[] = $tagToRemoveNode;
162			}
163			}
164			}
165			$removed = $this->removeElements( $domElemsToRemove );
166
167			// Elements with named IDs
168			$domElemsToRemove = [];
169			foreach ( $removals['ID'] as $itemToRemove ) {
170			$itemToRemoveNode = $doc->getElementById( $itemToRemove );
171			if ( $itemToRemoveNode ) {
172			$domElemsToRemove[] = $itemToRemoveNode;
173			}
174			}
175			$removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
176
177			// CSS Classes
178			$domElemsToRemove = [];
179			$xpath = new DOMXPath( $doc );
180			foreach ( $removals['CLASS'] as $classToRemove ) {
181			$elements = $xpath->query( '//*[contains(@class, "' . $classToRemove . '")]' );
182
183			/** @var $element DOMElement */
184			foreach ( $elements as $element ) {
185			$classes = $element->getAttribute( 'class' );
186			if ( preg_match( "/\b$classToRemove\b/", $classes ) && $element->parentNode ) {
187			$domElemsToRemove[] = $element;
188			}
189			}
190			}
191			$removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
192
193			// Tags with CSS Classes
194			foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
195			$parts = explode( '.', $classToRemove );
196
197			$elements = $xpath->query(
198			'//' . $parts[0] . '[@class="' . $parts[1] . '"]'
199			);
200			$removed = array_merge( $removed, $this->removeElements( $elements ) );
201			}
202
203			return $removed;
204			}
205
206			/**
207			* Removes a list of elelments from DOMDocument
208			* @param array\|DOMNodeList $elements
209			* @return array Array of removed elements
210			*/
211			private function removeElements( $elements ) {
212			$list = $elements;
213			if ( $elements instanceof DOMNodeList ) {
214			$list = [];
215			foreach ( $elements as $element ) {
216			$list[] = $element;
217			}
218			}
219			/** @var $element DOMElement */
220			foreach ( $list as $element ) {
221			if ( $element->parentNode ) {
222			$element->parentNode->removeChild( $element );
223			}
224			}
225			return $list;
226			}
227
228			/**
229			* libxml in its usual pointlessness converts many chars to entities - this function
230			* perfoms a reverse conversion
231			* @param string $html
232			* @return string
233			*/
234			private function fixLibXML( $html ) {
235			static $replacements;
236			if ( !$replacements ) {
237			// We don't include rules like '"' => '&quot;' because entities had already been
238			// normalized by libxml. Using this function with input not sanitized by libxml is UNSAFE!
239			$replacements = new ReplacementArray( [
240			'"' => '&quot;',
241			'&' => '&amp;',
242			'<' => '&lt;',
243			'>' => '&gt;',
244			] );
245			}
246			$html = $replacements->replace( $html );
247
248			// Just in case the conversion in getDoc() above used named
249			// entities that aren't known to html_entity_decode().
250			$html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
251
252			return $html;
253			}
254
255			/**
256			* Performs final transformations and returns resulting HTML. Note that if you want to call this
257			* both without an element and with an element you should call it without an element first. If you
258			* specify the $element in the method it'll change the underlying dom and you won't be able to get
259			* it back.
260			*
261			* @param DOMElement\|string\|null $element ID of element to get HTML from or
262			* false to get it from the whole tree
263			* @return string Processed HTML
264			*/
265			public function getText( $element = null ) {
266
267			if ( $this->doc ) {
268			if ( $element !== null && !( $element instanceof DOMElement ) ) {
269			$element = $this->doc->getElementById( $element );
270			}
271			if ( $element ) {
272			$body = $this->doc->getElementsByTagName( 'body' )->item( 0 );
273			$nodesArray = [];
274			foreach ( $body->childNodes as $node ) {
275			$nodesArray[] = $node;
276			}
277			foreach ( $nodesArray as $nodeArray ) {
278			$body->removeChild( $nodeArray );
279			}
280			$body->appendChild( $element );
281			}
282			$html = $this->doc->saveHTML();
283
284			$html = $this->fixLibXML( $html );
285			if ( wfIsWindows() ) {
286			// Cleanup for CRLF misprocessing of unknown origin on Windows.
287			// If this error continues in the future, please track it down in the
288			// XML code paths if possible and fix there.
289			$html = str_replace( ' ', '', $html );
290			}
291			} else {
292			$html = $this->html;
293			}
294			// Remove stuff added by wrapHTML()
295			$html = preg_replace( '/<!--.?-->\|^.?<body>\|<\/body>.*$/s', '', $html );
296			$html = $this->onHtmlReady( $html );
297
298			if ( $this->elementsToFlatten ) {
			0 ignored issues – show Bug Best Practice introduced 2016-01-16 18:00 UTC by Report Bug Copy Issue Report The expression `$this->elementsToFlatten` of type `array` is implicitly converted to a boolean; are you sure this is intended? If so, consider using `! empty($expr)` instead to make it clear that you intend to check for an array without elements. This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using `empty(..)` or `! empty(...)` instead. Loading history...
299			$elements = implode( '\|', $this->elementsToFlatten );
300			$html = preg_replace( "#</?($elements)\\b[^>]*>#is", '', $html );
301			}
302
303			return $html;
304			}
305
306			/**
307			* Helper function for parseItemsToRemove(). This function extracts the selector type
308			* and the raw name of a selector from a CSS-style selector string and assigns those
309			* values to parameters passed by reference. For example, if given '#toc' as the
310			* $selector parameter, it will assign 'ID' as the $type and 'toc' as the $rawName.
311			* @param string $selector CSS selector to parse
312			* @param string $type The type of selector (ID, CLASS, TAG_CLASS, or TAG)
313			* @param string $rawName The raw name of the selector
314			* @return bool Whether the selector was successfully recognised
315			* @throws MWException
316			*/
317			protected function parseSelector( $selector, &$type, &$rawName ) {
318			if ( strpos( $selector, '.' ) === 0 ) {
319			$type = 'CLASS';
320			$rawName = substr( $selector, 1 );
321			} elseif ( strpos( $selector, '#' ) === 0 ) {
322			$type = 'ID';
323			$rawName = substr( $selector, 1 );
324			} elseif ( strpos( $selector, '.' ) !== 0 && strpos( $selector, '.' ) !== false ) {
325			$type = 'TAG_CLASS';
326			$rawName = $selector;
327			} elseif ( strpos( $selector, '[' ) === false && strpos( $selector, ']' ) === false ) {
328			$type = 'TAG';
329			$rawName = $selector;
330			} else {
331			throw new MWException( __METHOD__ . "(): unrecognized selector '$selector'" );
332			}
333
334			return true;
335			}
336
337			/**
338			* Transforms CSS-style selectors into an internal representation suitable for
339			* processing by filterContent()
340			* @return array
341			*/
342			protected function parseItemsToRemove() {
343			$removals = [
344			'ID' => [],
345			'TAG' => [],
346			'CLASS' => [],
347			'TAG_CLASS' => [],
348			];
349
350			foreach ( $this->itemsToRemove as $itemToRemove ) {
351			$type = '';
352			$rawName = '';
353			if ( $this->parseSelector( $itemToRemove, $type, $rawName ) ) {
354			$removals[$type][] = $rawName;
355			}
356			}
357
358			if ( $this->removeMedia ) {
359			$removals['TAG'][] = 'img';
360			$removals['TAG'][] = 'audio';
361			$removals['TAG'][] = 'video';
362			}
363
364			return $removals;
365			}
366			}
367

wikimedia / mediawiki

Branch — master (d58858)

HtmlFormatter B

Complexity

Size/Duplication

Coupling/Cohesion

Importance

14 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like