HtmlPage - Code Metrics - Inspection of "Try to fix UTF-8 problem." - wasinger/htmlpagedom - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#19)

unknown

created 2017-02-27 21:50 UTC

HtmlPage B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	350
Duplicated Lines	0 %

Coupling/Cohesion

Components	1
Dependencies	3

Test Coverage

Coverage

73.28%

Importance

Changes	3
Bugs	1	Features	0

Metric	Value
wmc	40
lcom	1
cbo	3
dl	0
loc	350
ccs	85
cts	116
cp	0.7328
rs	8.2608
c	3
b	1
f	0

24 Methods

Rating	Name	Size	Complexity
A	getCrawler()	4	1
A	getDOMDocument()	4	1
A	getTitle()	9	2
A	removeMeta()	5	1
A	getMeta()	9	2
A	getBaseHref()	9	2
A	setHtmlById()	4	1
A	getHead()	4	1
A	getBody()	4	1
A	getElementById()	4	1
A	filter()	5	1
A	filterXPath()	4	1
A	trimNewlines()	4	1
A	__clone()	5	1
A	minify()	9	2
A	indent()	9	2
B	__construct()	25	4
A	setTitle()	9	2
A	setMeta()	11	2
A	setBaseHref()	9	2
A	getHeadNode()	9	2
A	getBodyNode()	9	2
A	__toString()	8	3
A	save()	9	2

How to fix Complexity

<?php
namespace Wa72\HtmlPageDom;

use Symfony\Component\CssSelector\CssSelector;
use Wa72\HtmlPrettymin\PrettyMin;

/**
 * This class represents a complete HTML document.
 *
 * It offers convenience functions for getting and setting elements of the document
 * such as setTitle(), getTitle(), setMeta($name, $value), getBody().
 *
 * It uses HtmlPageCrawler to navigate and manipulate the DOM tree.
 *
 * @author Christoph Singer
 * @license MIT
 */
class HtmlPage
{
    /**
     *
     * @var \DOMDocument
     */
    protected $dom;

    /**
     * @var string
     */
    protected $charset;

    /**
     * @var string
     */
    protected $url;

    /**
     *
     * @var HtmlPageCrawler
     */
    protected $crawler;

    public function __construct($content = '', $url = '', $charset = 'UTF-8')
    {
        $this->charset = $charset;
        $this->url = $url;
        if ($content == '') {
            $content = '<!DOCTYPE html><html><head><title></title></head><body></body></html>';
        }
        $current = libxml_use_internal_errors(true);
        $disableEntities = libxml_disable_entity_loader(true);

        $this->dom = new \DOMDocument('1.0', $charset);
        $this->dom->loadHTML('<meta http-equiv="Content-Type" content="text/html;charset='.$charset.'">');
        $this->dom->validateOnParse = true;


        if (function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) {
            $content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
        }

        @$this->dom->loadHTML($content);
// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}

        libxml_use_internal_errors($current);
        libxml_disable_entity_loader($disableEntities);
        $this->crawler = new HtmlPageCrawler($this->dom);
    }

    /**
     * Get a HtmlPageCrawler object containing the root node of the HTML document
     *
     * @return HtmlPageCrawler
     */
    public function getCrawler()
    {
        return $this->crawler;
    }

    /**
     * Get a DOMDocument object for the HTML document
     *
     * @return \DOMDocument
     */
    public function getDOMDocument()
    {
        return $this->dom;
    }

    /**
     * Sets the page title of the HTML document
     *
     * @param string $title
     */
    public function setTitle($title)
    {
        $t = $this->dom->getElementsByTagName('title')->item(0);
        if ($t == null) {
            $t = $this->dom->createElement('title');
            $this->getHeadNode()->appendChild($t);
        }
        $t->nodeValue = htmlspecialchars($title);
    }

    /**
     * Get the page title of the HTML document
     *
     * @return null|string
     */
    public function getTitle()
    {
        $t = $this->dom->getElementsByTagName('title')->item(0);
        if ($t == null) {
            return null;
        } else {
            return $t->nodeValue;
        }
    }

    /**
     * Set a META tag with specified 'name' and 'content' attributes
     *
     * @TODO: add support for multiple meta tags with the same name but different languages
     *
     * @param $name
     * @param $content
     */
    public function setMeta($name, $content)
    {
        $c = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']');
        if (count($c) == 0) {
            $node = $this->dom->createElement('meta');
            $node->setAttribute('name', $name);
            $this->getHeadNode()->appendChild($node);
            $c->addNode($node);
        }
        $c->setAttribute('content', $content);
    }

    /**
     * Remove all meta tags with the specified name attribute
     *
     * @param string $name
     */
    public function removeMeta($name)
    {
        $meta = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']');
        $meta->remove();
    }

    /**
     * Get the content attribute of a meta tag with the specified name attribute
     *
     * @param string $name
     * @return null|string
     */
    public function getMeta($name)
    {
        $node = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']')->getNode(0);
        if ($node instanceof \DOMElement) {
            return $node->getAttribute('content');
        } else {
            return null;
        }
    }

    /**
     * Set the base tag with href attribute set to parameter $url
     *
     * @param string $url
     */
    public function setBaseHref($url)
    {
        $node = $this->filterXPath('descendant-or-self::base')->getNode(0);
        if ($node == null) {
            $node = $this->dom->createElement('base');
            $this->getHeadNode()->appendChild($node);
        }
        $node->setAttribute('href', $url);
    }

    /**
     * Get the href attribute from the base tag, null if not present in document
     *
     * @return null|string
     */
    public function getBaseHref()
    {
        $node = $this->filterXPath('descendant-or-self::base')->getNode(0);
        if ($node instanceof \DOMElement) {
            return $node->getAttribute('href');
        } else {
            return null;
        }
    }

    /**
     * Sets innerHTML content of an element specified by elementId
     *
     * @param string $elementId
     * @param string $html
     */
    public function setHtmlById($elementId, $html)
    {
        $this->getElementById($elementId)->setInnerHtml($html);
    }

    /**
     * Get the document's HEAD section as DOMElement
     *
     * @return \DOMElement
     */
    public function getHeadNode()
    {
        $head = $this->dom->getElementsByTagName('head')->item(0);
        if ($head == null) {
            $head = $this->dom->createElement('head');
            $head = $this->dom->documentElement->insertBefore($head, $this->getBodyNode());
        }
        return $head;
    }

    /**
     * Get the document's body as DOMElement
     *
     * @return \DOMElement
     */
    public function getBodyNode()
    {
        $body = $this->dom->getElementsByTagName('body')->item(0);
        if ($body == null) {
            $body = $this->dom->createElement('body');
            $body = $this->dom->documentElement->appendChild($body);
        }
        return $body;
    }

    /**
     * Get the document's HEAD section wrapped in a HtmlPageCrawler instance
     *
     * @return HtmlPageCrawler
     */
    public function getHead()
    {
        return new HtmlPageCrawler($this->getHeadNode());
    }

    /**
     * Get the document's body wrapped in a HtmlPageCrawler instance
     *
     * @return HtmlPageCrawler
     */
    public function getBody()
    {
        return new HtmlPageCrawler($this->getBodyNode());
    }

    public function __toString()
    {
        $html = $this->dom->saveHTML();
        if (function_exists('mb_convert_encoding') && in_array(strtolower($this->charset), array_map('strtolower', mb_list_encodings()))) {
            $html = mb_convert_encoding($html, $this->charset, 'HTML-ENTITIES');
        }
        return $html;
    }

    /**
     * Save this document to a HTML file or return HTML code as string
     *
     * @param string $filename If provided, output will be saved to this file, otherwise returned
     * @return string|void
     */
    public function save($filename = '')
    {
        if ($filename != '') {
            file_put_contents($filename, $this->__toString());
            return;
        } else {
            return $this->__toString();
        }
    }

    /**
     * Get an element in the document by it's id attribute
     *
     * @param string $id
     * @return HtmlPageCrawler
     */
    public function getElementById($id)
    {
        return $this->filterXPath('descendant-or-self::*[@id = \'' . $id . '\']');
    }

    /**
     * Filter nodes by using a CSS selector
     *
     * @param string $selector CSS selector
     * @return HtmlPageCrawler
     */
    public function filter($selector)
    {
        //echo "\n" . CssSelector::toXPath($selector) . "\n";
        return $this->crawler->filter($selector);
    }

    /**
     * Filter nodes by XPath expression
     *
     * @param string $xpath XPath expression
     * @return HtmlPageCrawler
     */
    public function filterXPath($xpath)
    {
        return $this->crawler->filterXPath($xpath);
    }

    /**
     * remove newlines from string and minimize whitespace (multiple whitespace characters replaced by one space)
     *
     * useful for cleaning up text retrieved by HtmlPageCrawler::text() (nodeValue of a DOMNode)
     *
     * @param string $string
     * @return string
     */
    public static function trimNewlines($string)
    {
        return Helpers::trimNewlines($string);
    }

    public function __clone()
    {
        $this->dom = $this->dom->cloneNode(true);
        $this->crawler = new HtmlPageCrawler($this->dom);
    }

    /**
     * minify the HTML document
     *
     * @param array $options Options passed to PrettyMin::__construct()
     * @return HtmlPage
     * @throws \Exception
     */
    public function minify(array $options = array())
    {
        if (!class_exists('Wa72\\HtmlPrettymin\\PrettyMin')) {
            throw new \Exception('Function minify needs composer package wa72/html-pretty-min');
        }
        $pm = new PrettyMin($options);
        $pm->load($this->dom)->minify();
        return $this;
    }

    /**
     * indent the HTML document
     *
     * @param array $options Options passed to PrettyMin::__construct()
     * @return HtmlPage
     * @throws \Exception
     */
    public function indent(array $options = array())
    {
        if (!class_exists('Wa72\\HtmlPrettymin\\PrettyMin')) {
            throw new \Exception('Function indent needs composer package wa72/html-pretty-min');
        }
        $pm = new PrettyMin($options);
        $pm->load($this->dom)->indent();
        return $this;
    }
}


GitHub Access Token became invalid

Pull Request — master (#19)

HtmlPage B

Complexity

Size/Duplication

Coupling/Cohesion

Test Coverage

Importance

24 Methods

How to fix Complexity

Complex Class

1		<?php
2		namespace Wa72\HtmlPageDom;
3
4		use Symfony\Component\CssSelector\CssSelector;
5		use Wa72\HtmlPrettymin\PrettyMin;
6
7		/**
8		* This class represents a complete HTML document.
9		*
10		* It offers convenience functions for getting and setting elements of the document
11		* such as setTitle(), getTitle(), setMeta($name, $value), getBody().
12		*
13		* It uses HtmlPageCrawler to navigate and manipulate the DOM tree.
14		*
15		* @author Christoph Singer
16		* @license MIT
17		*/
18		class HtmlPage
19		{
20		/**
21		*
22		* @var \DOMDocument
23		*/
24		protected $dom;
25
26		/**
27		* @var string
28		*/
29		protected $charset;
30
31		/**
32		* @var string
33		*/
34		protected $url;
35
36		/**
37		*
38		* @var HtmlPageCrawler
39		*/
40		protected $crawler;
41
42	5	public function __construct($content = '', $url = '', $charset = 'UTF-8')
43		{
44	5	$this->charset = $charset;
45	5	$this->url = $url;
46	5	if ($content == '') {
47	2	$content = '<!DOCTYPE html><html><head><title></title></head><body></body></html>';
48		}
49	5	$current = libxml_use_internal_errors(true);
50	5	$disableEntities = libxml_disable_entity_loader(true);
51
52	5	$this->dom = new \DOMDocument('1.0', $charset);
53	5	$this->dom->loadHTML('<meta http-equiv="Content-Type" content="text/html;charset='.$charset.'">');
54	5	$this->dom->validateOnParse = true;
55
56
57	5	if (function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) {
58	5	$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
59		}
60
61	5	@$this->dom->loadHTML($content);
		0 ignored issues – show Security Best Practice introduced 2016-03-18 20:10 UTC by Report Bug Copy Issue Report It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended. If you suppress an error, we recommend checking for the error condition explicitly: // For example instead of @mkdir($dir); // Better use if (@mkdir($dir) === false) { throw new \RuntimeException('The directory '.$dir.' could not be created.'); } Loading history...
62
63	5	libxml_use_internal_errors($current);
64	5	libxml_disable_entity_loader($disableEntities);
65	5	$this->crawler = new HtmlPageCrawler($this->dom);
66	5	}
67
68		/**
69		* Get a HtmlPageCrawler object containing the root node of the HTML document
70		*
71		* @return HtmlPageCrawler
72		*/
73		public function getCrawler()
74		{
75		return $this->crawler;
76		}
77
78		/**
79		* Get a DOMDocument object for the HTML document
80		*
81		* @return \DOMDocument
82		*/
83		public function getDOMDocument()
84		{
85		return $this->dom;
86		}
87
88		/**
89		* Sets the page title of the HTML document
90		*
91		* @param string $title
92		*/
93	2	public function setTitle($title)
94		{
95	2	$t = $this->dom->getElementsByTagName('title')->item(0);
96	2	if ($t == null) {
97		$t = $this->dom->createElement('title');
98		$this->getHeadNode()->appendChild($t);
99		}
100	2	$t->nodeValue = htmlspecialchars($title);
101	2	}
102
103		/**
104		* Get the page title of the HTML document
105		*
106		* @return null\|string
107		*/
108	2	public function getTitle()
109		{
110	2	$t = $this->dom->getElementsByTagName('title')->item(0);
111	2	if ($t == null) {
112		return null;
113		} else {
114	2	return $t->nodeValue;
115		}
116		}
117
118		/**
119		* Set a META tag with specified 'name' and 'content' attributes
120		*
121		* @TODO: add support for multiple meta tags with the same name but different languages
122		*
123		* @param $name
124		* @param $content
125		*/
126	1	public function setMeta($name, $content)
127		{
128	1	$c = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']');
129	1	if (count($c) == 0) {
130	1	$node = $this->dom->createElement('meta');
131	1	$node->setAttribute('name', $name);
132	1	$this->getHeadNode()->appendChild($node);
133	1	$c->addNode($node);
134		}
135	1	$c->setAttribute('content', $content);
136	1	}
137
138		/**
139		* Remove all meta tags with the specified name attribute
140		*
141		* @param string $name
142		*/
143	1	public function removeMeta($name)
144		{
145	1	$meta = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']');
146	1	$meta->remove();
147	1	}
148
149		/**
150		* Get the content attribute of a meta tag with the specified name attribute
151		*
152		* @param string $name
153		* @return null\|string
154		*/
155	1	public function getMeta($name)
156		{
157	1	$node = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']')->getNode(0);
158	1	if ($node instanceof \DOMElement) {
159	1	return $node->getAttribute('content');
160		} else {
161	1	return null;
162		}
163		}
164
165		/**
166		* Set the base tag with href attribute set to parameter $url
167		*
168		* @param string $url
169		*/
170		public function setBaseHref($url)
171		{
172		$node = $this->filterXPath('descendant-or-self::base')->getNode(0);
173		if ($node == null) {
174		$node = $this->dom->createElement('base');
175		$this->getHeadNode()->appendChild($node);
176		}
177		$node->setAttribute('href', $url);
178		}
179
180		/**
181		* Get the href attribute from the base tag, null if not present in document
182		*
183		* @return null\|string
184		*/
185		public function getBaseHref()
186		{
187		$node = $this->filterXPath('descendant-or-self::base')->getNode(0);
188		if ($node instanceof \DOMElement) {
189		return $node->getAttribute('href');
190		} else {
191		return null;
192		}
193		}
194
195		/**
196		* Sets innerHTML content of an element specified by elementId
197		*
198		* @param string $elementId
199		* @param string $html
200		*/
201	1	public function setHtmlById($elementId, $html)
202		{
203	1	$this->getElementById($elementId)->setInnerHtml($html);
204	1	}
205
206		/**
207		* Get the document's HEAD section as DOMElement
208		*
209		* @return \DOMElement
210		*/
211	1	public function getHeadNode()
212		{
213	1	$head = $this->dom->getElementsByTagName('head')->item(0);
214	1	if ($head == null) {
215		$head = $this->dom->createElement('head');
216		$head = $this->dom->documentElement->insertBefore($head, $this->getBodyNode());
217		}
218	1	return $head;
219		}
220
221		/**
222		* Get the document's body as DOMElement
223		*
224		* @return \DOMElement
225		*/
226	1	public function getBodyNode()
227		{
228	1	$body = $this->dom->getElementsByTagName('body')->item(0);
229	1	if ($body == null) {
230		$body = $this->dom->createElement('body');
231		$body = $this->dom->documentElement->appendChild($body);
232		}
233	1	return $body;
234		}
235
236		/**
237		* Get the document's HEAD section wrapped in a HtmlPageCrawler instance
238		*
239		* @return HtmlPageCrawler
240		*/
241		public function getHead()
242		{
243		return new HtmlPageCrawler($this->getHeadNode());
244		}
245
246		/**
247		* Get the document's body wrapped in a HtmlPageCrawler instance
248		*
249		* @return HtmlPageCrawler
250		*/
251	1	public function getBody()
252		{
253	1	return new HtmlPageCrawler($this->getBodyNode());
254		}
255
256	5	public function __toString()
257		{
258	5	$html = $this->dom->saveHTML();
259	5	if (function_exists('mb_convert_encoding') && in_array(strtolower($this->charset), array_map('strtolower', mb_list_encodings()))) {
260	5	$html = mb_convert_encoding($html, $this->charset, 'HTML-ENTITIES');
261		}
262	5	return $html;
263		}
264
265		/**
266		* Save this document to a HTML file or return HTML code as string
267		*
268		* @param string $filename If provided, output will be saved to this file, otherwise returned
269		* @return string\|void
270		*/
271	3	public function save($filename = '')
272		{
273	3	if ($filename != '') {
274		file_put_contents($filename, $this->__toString());
275		return;
276		} else {
277	3	return $this->__toString();
278		}
279		}
280
281		/**
282		* Get an element in the document by it's id attribute
283		*
284		* @param string $id
285		* @return HtmlPageCrawler
286		*/
287	1	public function getElementById($id)
288		{
289	1	return $this->filterXPath('descendant-or-self::*[@id = \'' . $id . '\']');
290		}
291
292		/**
293		* Filter nodes by using a CSS selector
294		*
295		* @param string $selector CSS selector
296		* @return HtmlPageCrawler
297		*/
298	1	public function filter($selector)
299		{
300		//echo "\n" . CssSelector::toXPath($selector) . "\n";
301	1	return $this->crawler->filter($selector);
302		}
303
304		/**
305		* Filter nodes by XPath expression
306		*
307		* @param string $xpath XPath expression
308		* @return HtmlPageCrawler
309		*/
310	1	public function filterXPath($xpath)
311		{
312	1	return $this->crawler->filterXPath($xpath);
313		}
314
315		/**
316		* remove newlines from string and minimize whitespace (multiple whitespace characters replaced by one space)
317		*
318		* useful for cleaning up text retrieved by HtmlPageCrawler::text() (nodeValue of a DOMNode)
319		*
320		* @param string $string
321		* @return string
322		*/
323		public static function trimNewlines($string)
324		{
325		return Helpers::trimNewlines($string);
326		}
327
328	1	public function __clone()
329		{
330	1	$this->dom = $this->dom->cloneNode(true);
331	1	$this->crawler = new HtmlPageCrawler($this->dom);
332	1	}
333
334		/**
335		* minify the HTML document
336		*
337		* @param array $options Options passed to PrettyMin::__construct()
338		* @return HtmlPage
339		* @throws \Exception
340		*/
341	1	public function minify(array $options = array())
342		{
343	1	if (!class_exists('Wa72\\HtmlPrettymin\\PrettyMin')) {
344		throw new \Exception('Function minify needs composer package wa72/html-pretty-min');
345		}
346	1	$pm = new PrettyMin($options);
347	1	$pm->load($this->dom)->minify();
348	1	return $this;
349		}
350
351		/**
352		* indent the HTML document
353		*
354		* @param array $options Options passed to PrettyMin::__construct()
355		* @return HtmlPage
356		* @throws \Exception
357		*/
358	1	public function indent(array $options = array())
359		{
360	1	if (!class_exists('Wa72\\HtmlPrettymin\\PrettyMin')) {
361		throw new \Exception('Function indent needs composer package wa72/html-pretty-min');
362		}
363	1	$pm = new PrettyMin($options);
364	1	$pm->load($this->dom)->indent();
365	1	return $this;
366		}
367		}
368

wasinger / htmlpagedom

GitHub Access Token became invalid

Pull Request — master (#19)

HtmlPage B

Complexity

Size/Duplication

Coupling/Cohesion

Test Coverage

Importance

24 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like