1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SilverStripe\Model; |
4
|
|
|
use DOMXPath; |
5
|
|
|
use Convert; |
6
|
|
|
use DOMDocument; |
7
|
|
|
use ViewableData; |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* This class handles the converting of HTML fragments between a string and a DOMDocument based |
11
|
|
|
* representation. |
12
|
|
|
* |
13
|
|
|
* It's designed to allow dependancy injection to replace the standard HTML4 version with one that |
14
|
|
|
* handles XHTML or HTML5 instead |
15
|
|
|
* |
16
|
|
|
* @package framework |
17
|
|
|
* @subpackage integration |
18
|
|
|
*/ |
19
|
|
|
abstract class HTMLValue extends ViewableData { |
20
|
|
|
|
21
|
|
|
public function __construct($fragment = null) { |
22
|
|
|
if ($fragment) $this->setContent($fragment); |
23
|
|
|
parent::__construct(); |
24
|
|
|
} |
25
|
|
|
|
26
|
|
|
abstract public function setContent($fragment); |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @return string |
30
|
|
|
*/ |
31
|
|
|
public function getContent() { |
32
|
|
|
$doc = clone $this->getDocument(); |
33
|
|
|
$xp = new DOMXPath($doc); |
34
|
|
|
|
35
|
|
|
// If there's no body, the content is empty string |
36
|
|
|
if (!$doc->getElementsByTagName('body')->length) return ''; |
37
|
|
|
|
38
|
|
|
// saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with |
39
|
|
|
// shortcodes. So first, save all the attribute values for later restoration. |
40
|
|
|
$attrs = array(); $i = 0; |
41
|
|
|
|
42
|
|
|
foreach ($xp->query('//body//@*') as $attr) { |
43
|
|
|
$key = "__HTMLVALUE_".($i++); |
44
|
|
|
$attrs[$key] = $attr->value; |
45
|
|
|
$attr->value = $key; |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
// Then, call saveHTML & extract out the content from the body tag |
49
|
|
|
$res = preg_replace( |
50
|
|
|
array( |
51
|
|
|
'/^(.*?)<body>/is', |
52
|
|
|
'/<\/body>(.*?)$/isD', |
53
|
|
|
), |
54
|
|
|
'', |
55
|
|
|
$doc->saveHTML() |
56
|
|
|
); |
57
|
|
|
|
58
|
|
|
// Then replace the saved attributes with their original versions |
59
|
|
|
$res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) { |
60
|
|
|
return Convert::raw2att($attrs[$matches[0]]); |
61
|
|
|
}, $res); |
62
|
|
|
|
63
|
|
|
// Prevent being encoded as literal utf-8 characters |
64
|
|
|
// Possible alternative solution: http://stackoverflow.com/questions/2142120/php-encoding-with-domdocument |
65
|
|
|
$from = mb_convert_encoding(' ', 'utf-8', 'html-entities'); |
66
|
|
|
$res = str_replace($from, ' ', $res); |
67
|
|
|
|
68
|
|
|
return $res; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** @see HTMLValue::getContent() */ |
72
|
|
|
public function forTemplate() { |
73
|
|
|
return $this->getContent(); |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** @var DOMDocument */ |
77
|
|
|
private $document = null; |
78
|
|
|
/** @var bool */ |
79
|
|
|
private $valid = true; |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* Get the DOMDocument for the passed content |
83
|
|
|
* @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise |
84
|
|
|
*/ |
85
|
|
|
public function getDocument() { |
86
|
|
|
if (!$this->valid) { |
87
|
|
|
return false; |
|
|
|
|
88
|
|
|
} |
89
|
|
|
else if ($this->document) { |
90
|
|
|
return $this->document; |
91
|
|
|
} |
92
|
|
|
else { |
93
|
|
|
$this->document = new DOMDocument('1.0', 'UTF-8'); |
94
|
|
|
$this->document->strictErrorChecking = false; |
95
|
|
|
$this->document->formatOutput = false; |
96
|
|
|
|
97
|
|
|
return $this->document; |
98
|
|
|
} |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
/** |
102
|
|
|
* Is this HTMLValue in an errored state? |
103
|
|
|
* @return bool |
104
|
|
|
*/ |
105
|
|
|
public function isValid() { |
106
|
|
|
return $this->valid; |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
/** |
110
|
|
|
* @param DOMDocument $document |
111
|
|
|
*/ |
112
|
|
|
public function setDocument($document) { |
113
|
|
|
$this->document = $document; |
114
|
|
|
$this->valid = true; |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
public function setInvalid() { |
118
|
|
|
$this->document = $this->valid = false; |
|
|
|
|
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* Pass through any missed method calls to DOMDocument (if they exist) |
123
|
|
|
* so that HTMLValue can be treated mostly like an instance of DOMDocument |
124
|
|
|
*/ |
125
|
|
|
public function __call($method, $arguments) { |
126
|
|
|
$doc = $this->getDocument(); |
127
|
|
|
|
128
|
|
|
if(method_exists($doc, $method)) { |
129
|
|
|
return call_user_func_array(array($doc, $method), $arguments); |
130
|
|
|
} |
131
|
|
|
else { |
132
|
|
|
return parent::__call($method, $arguments); |
133
|
|
|
} |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Get the body element, or false if there isn't one (we haven't loaded any content |
138
|
|
|
* or this instance is in an invalid state) |
139
|
|
|
*/ |
140
|
|
|
public function getBody() { |
141
|
|
|
$doc = $this->getDocument(); |
142
|
|
|
if (!$doc) return false; |
143
|
|
|
|
144
|
|
|
$body = $doc->getElementsByTagName('body'); |
145
|
|
|
if (!$body->length) return false; |
146
|
|
|
|
147
|
|
|
return $body->item(0); |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Make an xpath query against this HTML |
152
|
|
|
* |
153
|
|
|
* @param $query string - The xpath query string |
154
|
|
|
* @return DOMNodeList |
155
|
|
|
*/ |
156
|
|
|
public function query($query) { |
157
|
|
|
$xp = new DOMXPath($this->getDocument()); |
158
|
|
|
return $xp->query($query); |
159
|
|
|
} |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
class HTML4Value extends HTMLValue { |
163
|
|
|
|
164
|
|
|
/** |
165
|
|
|
* @param string $content |
166
|
|
|
* @return bool |
167
|
|
|
*/ |
168
|
|
|
public function setContent($content) { |
169
|
|
|
// Ensure that \r (carriage return) characters don't get replaced with " " entity by DOMDocument |
170
|
|
|
// This behaviour is apparently XML spec, but we don't want this because it messes up the HTML |
171
|
|
|
$content = str_replace(chr(13), '', $content); |
172
|
|
|
|
173
|
|
|
// Reset the document if we're in an invalid state for some reason |
174
|
|
|
if (!$this->isValid()) $this->setDocument(null); |
|
|
|
|
175
|
|
|
|
176
|
|
|
$errorState = libxml_use_internal_errors(true); |
177
|
|
|
$result = $this->getDocument()->loadHTML( |
178
|
|
|
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' . |
179
|
|
|
"<body>$content</body></html>" |
180
|
|
|
); |
181
|
|
|
libxml_clear_errors(); |
182
|
|
|
libxml_use_internal_errors($errorState); |
183
|
|
|
return $result; |
184
|
|
|
} |
185
|
|
|
} |
186
|
|
|
|
If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.
Let’s take a look at an example:
Our function
my_function
expects aPost
object, and outputs the author of the post. The base classPost
returns a simple string and outputting a simple string will work just fine. However, the child classBlogPost
which is a sub-type ofPost
instead decided to return anobject
, and is therefore violating the SOLID principles. If aBlogPost
were passed tomy_function
, PHP would not complain, but ultimately fail when executing thestrtoupper
call in its body.