|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* Represents a large text field that contains HTML content. |
|
4
|
|
|
* This behaves similarly to {@link Text}, but the template processor won't escape any HTML content within it. |
|
5
|
|
|
* |
|
6
|
|
|
* @see HTMLVarchar |
|
7
|
|
|
* @see Text |
|
8
|
|
|
* @see Varchar |
|
9
|
|
|
* |
|
10
|
|
|
* @package framework |
|
11
|
|
|
* @subpackage model |
|
12
|
|
|
*/ |
|
13
|
|
|
class HTMLText extends Text { |
|
14
|
|
|
private static $escape_type = 'xml'; |
|
15
|
|
|
|
|
16
|
|
|
private static $casting = array( |
|
17
|
|
|
"AbsoluteLinks" => "HTMLText", |
|
18
|
|
|
"BigSummary" => "HTMLText", |
|
19
|
|
|
"ContextSummary" => "HTMLText", |
|
20
|
|
|
"FirstParagraph" => "HTMLText", |
|
21
|
|
|
"FirstSentence" => "HTMLText", |
|
22
|
|
|
"LimitCharacters" => "HTMLText", |
|
23
|
|
|
"LimitSentences" => "HTMLText", |
|
24
|
|
|
"Lower" => "HTMLText", |
|
25
|
|
|
"LowerCase" => "HTMLText", |
|
26
|
|
|
"Summary" => "HTMLText", |
|
27
|
|
|
"Upper" => "HTMLText", |
|
28
|
|
|
"UpperCase" => "HTMLText", |
|
29
|
|
|
'EscapeXML' => 'HTMLText', |
|
30
|
|
|
'LimitWordCount' => 'HTMLText', |
|
31
|
|
|
'LimitWordCountXML' => 'HTMLText', |
|
32
|
|
|
'NoHTML' => 'Text', |
|
33
|
|
|
); |
|
34
|
|
|
|
|
35
|
|
|
protected $processShortcodes = true; |
|
36
|
|
|
|
|
37
|
|
|
/** |
|
38
|
|
|
* Check if shortcodes are enabled |
|
39
|
|
|
* |
|
40
|
|
|
* @return bool |
|
41
|
|
|
*/ |
|
42
|
|
|
public function getProcessShortcodes() { |
|
43
|
|
|
return $this->processShortcodes; |
|
44
|
|
|
} |
|
45
|
|
|
|
|
46
|
|
|
/** |
|
47
|
|
|
* Set shortcodes on or off by default |
|
48
|
|
|
* |
|
49
|
|
|
* @param bool $process |
|
50
|
|
|
* @return $this |
|
51
|
|
|
*/ |
|
52
|
|
|
public function setProcessShortcodes($process) { |
|
53
|
|
|
$this->processShortcodes = (bool)$process; |
|
54
|
|
|
return $this; |
|
55
|
|
|
} |
|
56
|
|
|
|
|
57
|
|
|
protected $whitelist = false; |
|
58
|
|
|
|
|
59
|
|
|
public function __construct($name = null, $options = array()) { |
|
60
|
|
|
if(is_string($options)) { |
|
61
|
|
|
$options = array('whitelist' => $options); |
|
62
|
|
|
} |
|
63
|
|
|
|
|
64
|
|
|
return parent::__construct($name, $options); |
|
|
|
|
|
|
65
|
|
|
} |
|
66
|
|
|
|
|
67
|
|
|
/** |
|
68
|
|
|
* @param array $options |
|
69
|
|
|
* |
|
70
|
|
|
* Options accepted in addition to those provided by Text: |
|
71
|
|
|
* |
|
72
|
|
|
* - shortcodes: If true, shortcodes will be turned into the appropriate HTML. |
|
73
|
|
|
* If false, shortcodes will not be processed. |
|
74
|
|
|
* |
|
75
|
|
|
* - whitelist: If provided, a comma-separated list of elements that will be allowed to be stored |
|
76
|
|
|
* (be careful on relying on this for XSS protection - some seemingly-safe elements allow |
|
77
|
|
|
* attributes that can be exploited, for instance <img onload="exploiting_code();" src="..." />) |
|
78
|
|
|
* Text nodes outside of HTML tags are filtered out by default, but may be included by adding |
|
79
|
|
|
* the text() directive. E.g. 'link,meta,text()' will allow only <link /> <meta /> and text at |
|
80
|
|
|
* the root level. |
|
81
|
|
|
*/ |
|
82
|
|
|
public function setOptions(array $options = array()) { |
|
83
|
|
|
parent::setOptions($options); |
|
84
|
|
|
|
|
85
|
|
|
if(array_key_exists("shortcodes", $options)) { |
|
86
|
|
|
$this->processShortcodes = !!$options["shortcodes"]; |
|
87
|
|
|
} |
|
88
|
|
|
|
|
89
|
|
|
if(array_key_exists("whitelist", $options)) { |
|
90
|
|
|
if(is_array($options['whitelist'])) { |
|
91
|
|
|
$this->whitelist = $options['whitelist']; |
|
|
|
|
|
|
92
|
|
|
} |
|
93
|
|
|
else { |
|
94
|
|
|
$this->whitelist = preg_split('/,\s*/', $options['whitelist']); |
|
|
|
|
|
|
95
|
|
|
} |
|
96
|
|
|
} |
|
97
|
|
|
} |
|
98
|
|
|
|
|
99
|
|
|
/** |
|
100
|
|
|
* Create a summary of the content. This will be some section of the first paragraph, limited by |
|
101
|
|
|
* $maxWords. All internal tags are stripped out - the return value is a string |
|
102
|
|
|
* |
|
103
|
|
|
* This is sort of the HTML aware equivilent to Text#Summary, although the logic for summarising is not exactly |
|
104
|
|
|
* the same |
|
105
|
|
|
* |
|
106
|
|
|
* @param int $maxWords Maximum number of words to return - may return less, but never more. Pass -1 for no limit |
|
107
|
|
|
* @param int $flex Number of words to search through when looking for a nice cut point |
|
108
|
|
|
* @param string $add What to add to the end of the summary if we cut at a less-than-ideal cut point |
|
109
|
|
|
* @return string A nice(ish) summary with no html tags (but possibly still some html entities) |
|
110
|
|
|
* |
|
111
|
|
|
* @see framework/core/model/fieldtypes/Text#Summary($maxWords) |
|
112
|
|
|
*/ |
|
113
|
|
|
public function Summary($maxWords = 50, $flex = 15, $add = '...') { |
|
114
|
|
|
$str = false; |
|
115
|
|
|
|
|
116
|
|
|
/* First we need the text of the first paragraph, without tags. Try using SimpleXML first */ |
|
117
|
|
|
if (class_exists('SimpleXMLElement')) { |
|
118
|
|
|
$doc = new DOMDocument(); |
|
119
|
|
|
|
|
120
|
|
|
// Catch warnings thrown by loadHTML and turn them into a failure boolean rather than a SilverStripe error |
|
121
|
|
|
set_error_handler(function($no, $str) { |
|
122
|
|
|
throw new Exception("HTML Parse Error: " . $str); |
|
123
|
|
|
}, E_ALL); |
|
124
|
|
|
// Nonbreaking spaces get converted into weird characters, so strip them |
|
125
|
|
|
$value = str_replace(' ', ' ', $this->RAW()); |
|
126
|
|
|
try { |
|
127
|
|
|
$res = $doc->loadHTML('<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>' . $value); |
|
128
|
|
|
} |
|
129
|
|
|
catch (Exception $e) { $res = false; } |
|
130
|
|
|
restore_error_handler(); |
|
131
|
|
|
|
|
132
|
|
|
if ($res) { |
|
133
|
|
|
$xml = simplexml_import_dom($doc); |
|
134
|
|
|
$res = $xml->xpath('//p'); |
|
135
|
|
|
if (!empty($res)) $str = strip_tags($res[0]->asXML()); |
|
136
|
|
|
} |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
|
|
/* If that failed, most likely the passed HTML is broken. use a simple regex + a custom more brutal strip_tags. |
|
140
|
|
|
* We don't use strip_tags because that does very badly on broken HTML */ |
|
141
|
|
|
if (!$str) { |
|
|
|
|
|
|
142
|
|
|
/* See if we can pull a paragraph out*/ |
|
143
|
|
|
|
|
144
|
|
|
// Strip out any images in case there's one at the beginning. Not doing this will return a blank paragraph |
|
145
|
|
|
$str = preg_replace('{^\s*(<.+?>)*<img[^>]*>}u', '', $this->value); |
|
146
|
|
|
if (preg_match('{<p(\s[^<>]*)?>(.*[A-Za-z]+.*)</p>}u', $str, $matches)) $str = $matches[2]; |
|
147
|
|
|
|
|
148
|
|
|
/* If _that_ failed, just use the whole text */ |
|
149
|
|
|
if (!$str) $str = $this->value; |
|
150
|
|
|
|
|
151
|
|
|
/* Now pull out all the html-alike stuff */ |
|
152
|
|
|
/* Take out anything that is obviously a tag */ |
|
153
|
|
|
$str = preg_replace('{</?[a-zA-Z]+[^<>]*>}', '', $str); |
|
154
|
|
|
/* Strip out any left over looking bits. Textual < or > should already be encoded to < or > */ |
|
155
|
|
|
$str = preg_replace('{</|<|>}', '', $str); |
|
156
|
|
|
} |
|
157
|
|
|
|
|
158
|
|
|
/* Now split into words. If we are under the maxWords limit, just return the whole string (re-implode for |
|
159
|
|
|
* whitespace normalization) */ |
|
160
|
|
|
$words = preg_split('/\s+/u', $str); |
|
161
|
|
|
if ($maxWords == -1 || count($words) <= $maxWords) return implode(' ', $words); |
|
162
|
|
|
|
|
163
|
|
|
/* Otherwise work backwards for a looking for a sentence ending (we try to avoid abbreviations, but aren't |
|
164
|
|
|
* very good at it) */ |
|
165
|
|
|
for ($i = $maxWords; $i >= $maxWords - $flex && $i >= 0; $i--) { |
|
166
|
|
|
if (preg_match('/\.$/', $words[$i]) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $words[$i])) { |
|
167
|
|
|
return implode(' ', array_slice($words, 0, $i+1)); |
|
168
|
|
|
} |
|
169
|
|
|
} |
|
170
|
|
|
|
|
171
|
|
|
// If we didn't find a sentence ending quickly enough, just cut at the maxWords point and add '...' to the end |
|
172
|
|
|
return implode(' ', array_slice($words, 0, $maxWords)) . $add; |
|
173
|
|
|
} |
|
174
|
|
|
|
|
175
|
|
|
/** |
|
176
|
|
|
* Returns the first sentence from the first paragraph. If it can't figure out what the first paragraph is (or |
|
177
|
|
|
* there isn't one), it returns the same as Summary() |
|
178
|
|
|
* |
|
179
|
|
|
* This is the HTML aware equivilent to Text#FirstSentence |
|
180
|
|
|
* |
|
181
|
|
|
* @see framework/core/model/fieldtypes/Text#FirstSentence() |
|
182
|
|
|
*/ |
|
183
|
|
|
public function FirstSentence() { |
|
184
|
|
|
/* Use summary's html processing logic to get the first paragraph */ |
|
185
|
|
|
$paragraph = $this->Summary(-1); |
|
186
|
|
|
|
|
187
|
|
|
/* Then look for the first sentence ending. We could probably use a nice regex, but for now this will do */ |
|
188
|
|
|
$words = preg_split('/\s+/u', $paragraph); |
|
189
|
|
|
foreach ($words as $i => $word) { |
|
190
|
|
|
if (preg_match('/(!|\?|\.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $word)) { |
|
191
|
|
|
return implode(' ', array_slice($words, 0, $i+1)); |
|
192
|
|
|
} |
|
193
|
|
|
} |
|
194
|
|
|
|
|
195
|
|
|
/* If we didn't find a sentence ending, use the summary. We re-call rather than using paragraph so that |
|
196
|
|
|
* Summary will limit the result this time */ |
|
197
|
|
|
return $this->Summary(); |
|
198
|
|
|
} |
|
199
|
|
|
|
|
200
|
|
|
public function RAW() { |
|
201
|
|
|
if ($this->processShortcodes) { |
|
202
|
|
|
return ShortcodeParser::get_active()->parse($this->value); |
|
203
|
|
|
} |
|
204
|
|
|
else { |
|
205
|
|
|
return $this->value; |
|
206
|
|
|
} |
|
207
|
|
|
} |
|
208
|
|
|
|
|
209
|
|
|
/** |
|
210
|
|
|
* Return the value of the field with relative links converted to absolute urls (with placeholders parsed). |
|
211
|
|
|
* @return string |
|
212
|
|
|
*/ |
|
213
|
|
|
public function AbsoluteLinks() { |
|
214
|
|
|
return HTTP::absoluteURLs($this->forTemplate()); |
|
215
|
|
|
} |
|
216
|
|
|
|
|
217
|
|
|
public function forTemplate() { |
|
218
|
|
|
return $this->RAW(); |
|
219
|
|
|
} |
|
220
|
|
|
|
|
221
|
|
|
public function prepValueForDB($value) { |
|
222
|
|
|
return parent::prepValueForDB($this->whitelistContent($value)); |
|
223
|
|
|
} |
|
224
|
|
|
|
|
225
|
|
|
/** |
|
226
|
|
|
* Filter the given $value string through the whitelist filter |
|
227
|
|
|
* |
|
228
|
|
|
* @param string $value Input html content |
|
229
|
|
|
* @return string Value with all non-whitelisted content stripped (if applicable) |
|
230
|
|
|
*/ |
|
231
|
|
|
public function whitelistContent($value) { |
|
232
|
|
|
if($this->whitelist) { |
|
233
|
|
|
$dom = Injector::inst()->create('HTMLValue', $value); |
|
234
|
|
|
|
|
235
|
|
|
$query = array(); |
|
236
|
|
|
$textFilter = ' | //body/text()'; |
|
237
|
|
|
foreach ($this->whitelist as $tag) { |
|
|
|
|
|
|
238
|
|
|
if($tag === 'text()') { |
|
239
|
|
|
$textFilter = ''; // Disable text filter if allowed |
|
240
|
|
|
} else { |
|
241
|
|
|
$query[] = 'not(self::'.$tag.')'; |
|
242
|
|
|
} |
|
243
|
|
|
} |
|
244
|
|
|
|
|
245
|
|
|
foreach($dom->query('//body//*['.implode(' and ', $query).']'.$textFilter) as $el) { |
|
246
|
|
|
if ($el->parentNode) $el->parentNode->removeChild($el); |
|
247
|
|
|
} |
|
248
|
|
|
|
|
249
|
|
|
$value = $dom->getContent(); |
|
250
|
|
|
} |
|
251
|
|
|
return $value; |
|
252
|
|
|
} |
|
253
|
|
|
|
|
254
|
|
|
/** |
|
255
|
|
|
* Returns true if the field has meaningful content. |
|
256
|
|
|
* Excludes null content like <h1></h1>, <p></p> ,etc |
|
257
|
|
|
* |
|
258
|
|
|
* @return boolean |
|
259
|
|
|
*/ |
|
260
|
|
|
public function exists() { |
|
261
|
|
|
$value = $this->value; |
|
262
|
|
|
|
|
263
|
|
|
if (!$this->isPopulated($value)) { |
|
264
|
|
|
return false; |
|
265
|
|
|
} |
|
266
|
|
|
|
|
267
|
|
|
// If it's got a content tag |
|
268
|
|
|
if(preg_match('/<(img|embed|object|iframe|meta|source|link)[^>]*>/i', $value)) { |
|
269
|
|
|
return true; |
|
270
|
|
|
} |
|
271
|
|
|
|
|
272
|
|
|
// If it's just one or two tags on its own (and not the above) it's empty. |
|
273
|
|
|
// This might be <p></p> or <h1></h1> or whatever. |
|
274
|
|
|
if(preg_match('/^[\\s]*(<[^>]+>[\\s]*){1,2}$/u', $value)) { |
|
275
|
|
|
return false; |
|
276
|
|
|
} |
|
277
|
|
|
|
|
278
|
|
|
// Otherwise its content is genuine content |
|
279
|
|
|
return true; |
|
280
|
|
|
} |
|
281
|
|
|
|
|
282
|
|
|
public function scaffoldFormField($title = null, $params = null) { |
|
283
|
|
|
return new HtmlEditorField($this->name, $title); |
|
284
|
|
|
} |
|
285
|
|
|
|
|
286
|
|
|
public function scaffoldSearchField($title = null, $params = null) { |
|
287
|
|
|
return new TextField($this->name, $title); |
|
288
|
|
|
} |
|
289
|
|
|
|
|
290
|
|
|
} |
|
291
|
|
|
|
|
292
|
|
|
|
|
293
|
|
|
|