Completed
Push — namespace-model ( 303e01...f05869 )
by Sam
14:24 queued 06:36
created

HTML4Value::setContent()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 17
Code Lines 10

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
eloc 10
nc 2
nop 1
dl 0
loc 17
rs 9.4285
1
<?php
2
3
namespace SilverStripe\Model;
4
use DOMXPath;
5
use Convert;
6
use DOMDocument;
7
use ViewableData;
8
9
/**
10
 * This class handles the converting of HTML fragments between a string and a DOMDocument based
11
 * representation.
12
 *
13
 * It's designed to allow dependancy injection to replace the standard HTML4 version with one that
14
 * handles XHTML or HTML5 instead
15
 *
16
 * @package framework
17
 * @subpackage integration
18
 */
19
abstract class HTMLValue extends ViewableData {
20
21
	public function __construct($fragment = null) {
22
		if ($fragment) $this->setContent($fragment);
23
		parent::__construct();
24
	}
25
26
	abstract public function setContent($fragment);
27
28
	/**
29
	 * @return string
30
	 */
31
	public function getContent() {
32
		$doc = clone $this->getDocument();
33
		$xp = new DOMXPath($doc);
34
35
		// If there's no body, the content is empty string
36
		if (!$doc->getElementsByTagName('body')->length) return '';
37
38
		// saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with
39
		// shortcodes. So first, save all the attribute values for later restoration.
40
		$attrs = array(); $i = 0;
41
42
		foreach ($xp->query('//body//@*') as $attr) {
43
			$key = "__HTMLVALUE_".($i++);
44
			$attrs[$key] = $attr->value;
45
			$attr->value = $key;
46
		}
47
48
		// Then, call saveHTML & extract out the content from the body tag
49
		$res = preg_replace(
50
			array(
51
				'/^(.*?)<body>/is',
52
				'/<\/body>(.*?)$/isD',
53
			),
54
			'',
55
			$doc->saveHTML()
56
		);
57
58
		// Then replace the saved attributes with their original versions
59
		$res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) {
60
			return Convert::raw2att($attrs[$matches[0]]);
61
		}, $res);
62
63
		// Prevent &nbsp; being encoded as literal utf-8 characters
64
		// Possible alternative solution: http://stackoverflow.com/questions/2142120/php-encoding-with-domdocument
65
		$from = mb_convert_encoding('&nbsp;', 'utf-8', 'html-entities');
66
		$res = str_replace($from, '&nbsp;', $res);
67
68
		return $res;
69
	}
70
71
	/** @see HTMLValue::getContent() */
72
	public function forTemplate() {
73
		return $this->getContent();
74
	}
75
76
	/** @var DOMDocument */
77
	private $document = null;
78
	/** @var bool */
79
	private $valid = true;
80
81
	/**
82
	 * Get the DOMDocument for the passed content
83
	 * @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise
84
	 */
85
	public function getDocument() {
86
		if (!$this->valid) {
87
			return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by SilverStripe\Model\HTMLValue::getDocument of type DOMDocument.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
88
		}
89
		else if ($this->document) {
90
			return $this->document;
91
		}
92
		else {
93
			$this->document = new DOMDocument('1.0', 'UTF-8');
94
			$this->document->strictErrorChecking = false;
95
			$this->document->formatOutput = false;
96
97
			return $this->document;
98
		}
99
	}
100
101
	/**
102
	 * Is this HTMLValue in an errored state?
103
	 * @return bool
104
	 */
105
	public function isValid() {
106
		return $this->valid;
107
	}
108
109
	/**
110
	 * @param DOMDocument $document
111
	 */
112
	public function setDocument($document) {
113
		$this->document = $document;
114
		$this->valid = true;
115
	}
116
117
	public function setInvalid() {
118
		$this->document = $this->valid = false;
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->valid = false of type false is incompatible with the declared type object<DOMDocument> of property $document.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
119
	}
120
121
	/**
122
	 * Pass through any missed method calls to DOMDocument (if they exist)
123
	 * so that HTMLValue can be treated mostly like an instance of DOMDocument
124
	 */
125
	public function __call($method, $arguments) {
126
		$doc = $this->getDocument();
127
128
		if(method_exists($doc, $method)) {
129
			return call_user_func_array(array($doc, $method), $arguments);
130
		}
131
		else {
132
			return parent::__call($method, $arguments);
133
		}
134
	}
135
136
	/**
137
	 * Get the body element, or false if there isn't one (we haven't loaded any content
138
	 * or this instance is in an invalid state)
139
	 */
140
	public function getBody() {
141
		$doc = $this->getDocument();
142
		if (!$doc) return false;
143
144
		$body = $doc->getElementsByTagName('body');
145
		if (!$body->length) return false;
146
147
		return $body->item(0);
148
	}
149
150
	/**
151
	 * Make an xpath query against this HTML
152
	 *
153
	 * @param $query string - The xpath query string
154
	 * @return DOMNodeList
155
	 */
156
	public function query($query) {
157
		$xp = new DOMXPath($this->getDocument());
158
		return $xp->query($query);
159
	}
160
}
161
162
class HTML4Value extends HTMLValue {
163
164
	/**
165
	 * @param string $content
166
	 * @return bool
167
	 */
168
	public function setContent($content) {
169
		// Ensure that \r (carriage return) characters don't get replaced with "&#13;" entity by DOMDocument
170
		// This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
171
		$content = str_replace(chr(13), '', $content);
172
173
		// Reset the document if we're in an invalid state for some reason
174
		if (!$this->isValid()) $this->setDocument(null);
0 ignored issues
show
Documentation introduced by
null is of type null, but the function expects a object<DOMDocument>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
175
176
		$errorState = libxml_use_internal_errors(true);
177
		$result = $this->getDocument()->loadHTML(
178
			'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
179
			"<body>$content</body></html>"
180
		);
181
		libxml_clear_errors();
182
		libxml_use_internal_errors($errorState);
183
		return $result;
184
	}
185
}
186