1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Represents a large text field that contains HTML content. |
4
|
|
|
* This behaves similarly to {@link Text}, but the template processor won't escape any HTML content within it. |
5
|
|
|
* |
6
|
|
|
* @see HTMLVarchar |
7
|
|
|
* @see Text |
8
|
|
|
* @see Varchar |
9
|
|
|
* |
10
|
|
|
* @package framework |
11
|
|
|
* @subpackage model |
12
|
|
|
*/ |
13
|
|
|
class HTMLText extends Text { |
14
|
|
|
private static $escape_type = 'xml'; |
15
|
|
|
|
16
|
|
|
private static $casting = array( |
17
|
|
|
"AbsoluteLinks" => "HTMLText", |
18
|
|
|
"BigSummary" => "HTMLText", |
19
|
|
|
"ContextSummary" => "HTMLText", |
20
|
|
|
"FirstParagraph" => "HTMLText", |
21
|
|
|
"FirstSentence" => "HTMLText", |
22
|
|
|
"LimitCharacters" => "HTMLText", |
23
|
|
|
"LimitSentences" => "HTMLText", |
24
|
|
|
"Lower" => "HTMLText", |
25
|
|
|
"LowerCase" => "HTMLText", |
26
|
|
|
"Summary" => "HTMLText", |
27
|
|
|
"Upper" => "HTMLText", |
28
|
|
|
"UpperCase" => "HTMLText", |
29
|
|
|
'EscapeXML' => 'HTMLText', |
30
|
|
|
'LimitWordCount' => 'HTMLText', |
31
|
|
|
'LimitWordCountXML' => 'HTMLText', |
32
|
|
|
'NoHTML' => 'Text', |
33
|
|
|
); |
34
|
|
|
|
35
|
|
|
protected $processShortcodes = true; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* Check if shortcodes are enabled |
39
|
|
|
* |
40
|
|
|
* @return bool |
41
|
|
|
*/ |
42
|
|
|
public function getProcessShortcodes() { |
43
|
|
|
return $this->processShortcodes; |
44
|
|
|
} |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* Set shortcodes on or off by default |
48
|
|
|
* |
49
|
|
|
* @param bool $process |
50
|
|
|
* @return $this |
51
|
|
|
*/ |
52
|
|
|
public function setProcessShortcodes($process) { |
53
|
|
|
$this->processShortcodes = (bool)$process; |
54
|
|
|
return $this; |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
protected $whitelist = false; |
58
|
|
|
|
59
|
|
|
public function __construct($name = null, $options = array()) { |
60
|
|
|
if(is_string($options)) { |
61
|
|
|
$options = array('whitelist' => $options); |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
return parent::__construct($name, $options); |
|
|
|
|
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @param array $options |
69
|
|
|
* |
70
|
|
|
* Options accepted in addition to those provided by Text: |
71
|
|
|
* |
72
|
|
|
* - shortcodes: If true, shortcodes will be turned into the appropriate HTML. |
73
|
|
|
* If false, shortcodes will not be processed. |
74
|
|
|
* |
75
|
|
|
* - whitelist: If provided, a comma-separated list of elements that will be allowed to be stored |
76
|
|
|
* (be careful on relying on this for XSS protection - some seemingly-safe elements allow |
77
|
|
|
* attributes that can be exploited, for instance <img onload="exploiting_code();" src="..." />) |
78
|
|
|
* Text nodes outside of HTML tags are filtered out by default, but may be included by adding |
79
|
|
|
* the text() directive. E.g. 'link,meta,text()' will allow only <link /> <meta /> and text at |
80
|
|
|
* the root level. |
81
|
|
|
*/ |
82
|
|
|
public function setOptions(array $options = array()) { |
83
|
|
|
parent::setOptions($options); |
84
|
|
|
|
85
|
|
|
if(array_key_exists("shortcodes", $options)) { |
86
|
|
|
$this->processShortcodes = !!$options["shortcodes"]; |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
if(array_key_exists("whitelist", $options)) { |
90
|
|
|
if(is_array($options['whitelist'])) { |
91
|
|
|
$this->whitelist = $options['whitelist']; |
|
|
|
|
92
|
|
|
} |
93
|
|
|
else { |
94
|
|
|
$this->whitelist = preg_split('/,\s*/', $options['whitelist']); |
|
|
|
|
95
|
|
|
} |
96
|
|
|
} |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* Create a summary of the content. This will be some section of the first paragraph, limited by |
101
|
|
|
* $maxWords. All internal tags are stripped out - the return value is a string |
102
|
|
|
* |
103
|
|
|
* This is sort of the HTML aware equivilent to Text#Summary, although the logic for summarising is not exactly |
104
|
|
|
* the same |
105
|
|
|
* |
106
|
|
|
* @param int $maxWords Maximum number of words to return - may return less, but never more. Pass -1 for no limit |
107
|
|
|
* @param int $flex Number of words to search through when looking for a nice cut point |
108
|
|
|
* @param string $add What to add to the end of the summary if we cut at a less-than-ideal cut point |
109
|
|
|
* @return string A nice(ish) summary with no html tags (but possibly still some html entities) |
110
|
|
|
* |
111
|
|
|
* @see framework/core/model/fieldtypes/Text#Summary($maxWords) |
112
|
|
|
*/ |
113
|
|
|
public function Summary($maxWords = 50, $flex = 15, $add = '...') { |
114
|
|
|
$str = false; |
115
|
|
|
|
116
|
|
|
/* First we need the text of the first paragraph, without tags. Try using SimpleXML first */ |
117
|
|
|
if (class_exists('SimpleXMLElement')) { |
118
|
|
|
$doc = new DOMDocument(); |
119
|
|
|
|
120
|
|
|
// Catch warnings thrown by loadHTML and turn them into a failure boolean rather than a SilverStripe error |
121
|
|
|
set_error_handler(function($no, $str) { |
122
|
|
|
throw new Exception("HTML Parse Error: " . $str); |
123
|
|
|
}, E_ALL); |
124
|
|
|
// Nonbreaking spaces get converted into weird characters, so strip them |
125
|
|
|
$value = str_replace(' ', ' ', $this->RAW()); |
126
|
|
|
try { |
127
|
|
|
$res = $doc->loadHTML('<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>' . $value); |
128
|
|
|
} |
129
|
|
|
catch (Exception $e) { $res = false; } |
130
|
|
|
restore_error_handler(); |
131
|
|
|
|
132
|
|
|
if ($res) { |
133
|
|
|
$xml = simplexml_import_dom($doc); |
134
|
|
|
$res = $xml->xpath('//p'); |
135
|
|
|
if (!empty($res)) $str = strip_tags($res[0]->asXML()); |
136
|
|
|
} |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/* If that failed, most likely the passed HTML is broken. use a simple regex + a custom more brutal strip_tags. |
140
|
|
|
* We don't use strip_tags because that does very badly on broken HTML */ |
141
|
|
|
if (!$str) { |
|
|
|
|
142
|
|
|
/* See if we can pull a paragraph out*/ |
143
|
|
|
|
144
|
|
|
// Strip out any images in case there's one at the beginning. Not doing this will return a blank paragraph |
145
|
|
|
$str = preg_replace('{^\s*(<.+?>)*<img[^>]*>}u', '', $this->value); |
146
|
|
|
if (preg_match('{<p(\s[^<>]*)?>(.*[A-Za-z]+.*)</p>}u', $str, $matches)) $str = $matches[2]; |
147
|
|
|
|
148
|
|
|
/* If _that_ failed, just use the whole text */ |
149
|
|
|
if (!$str) $str = $this->value; |
150
|
|
|
|
151
|
|
|
/* Now pull out all the html-alike stuff */ |
152
|
|
|
/* Take out anything that is obviously a tag */ |
153
|
|
|
$str = preg_replace('{</?[a-zA-Z]+[^<>]*>}', '', $str); |
154
|
|
|
/* Strip out any left over looking bits. Textual < or > should already be encoded to < or > */ |
155
|
|
|
$str = preg_replace('{</|<|>}', '', $str); |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/* Now split into words. If we are under the maxWords limit, just return the whole string (re-implode for |
159
|
|
|
* whitespace normalization) */ |
160
|
|
|
$words = preg_split('/\s+/u', $str); |
161
|
|
|
if ($maxWords == -1 || count($words) <= $maxWords) return implode(' ', $words); |
162
|
|
|
|
163
|
|
|
/* Otherwise work backwards for a looking for a sentence ending (we try to avoid abbreviations, but aren't |
164
|
|
|
* very good at it) */ |
165
|
|
|
for ($i = $maxWords; $i >= $maxWords - $flex && $i >= 0; $i--) { |
166
|
|
|
if (preg_match('/\.$/', $words[$i]) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $words[$i])) { |
167
|
|
|
return implode(' ', array_slice($words, 0, $i+1)); |
168
|
|
|
} |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
// If we didn't find a sentence ending quickly enough, just cut at the maxWords point and add '...' to the end |
172
|
|
|
return implode(' ', array_slice($words, 0, $maxWords)) . $add; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* Returns the first sentence from the first paragraph. If it can't figure out what the first paragraph is (or |
177
|
|
|
* there isn't one), it returns the same as Summary() |
178
|
|
|
* |
179
|
|
|
* This is the HTML aware equivilent to Text#FirstSentence |
180
|
|
|
* |
181
|
|
|
* @see framework/core/model/fieldtypes/Text#FirstSentence() |
182
|
|
|
*/ |
183
|
|
|
public function FirstSentence() { |
184
|
|
|
/* Use summary's html processing logic to get the first paragraph */ |
185
|
|
|
$paragraph = $this->Summary(-1); |
186
|
|
|
|
187
|
|
|
/* Then look for the first sentence ending. We could probably use a nice regex, but for now this will do */ |
188
|
|
|
$words = preg_split('/\s+/u', $paragraph); |
189
|
|
|
foreach ($words as $i => $word) { |
190
|
|
|
if (preg_match('/(!|\?|\.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $word)) { |
191
|
|
|
return implode(' ', array_slice($words, 0, $i+1)); |
192
|
|
|
} |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
/* If we didn't find a sentence ending, use the summary. We re-call rather than using paragraph so that |
196
|
|
|
* Summary will limit the result this time */ |
197
|
|
|
return $this->Summary(); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
public function RAW() { |
201
|
|
|
if ($this->processShortcodes) { |
202
|
|
|
return ShortcodeParser::get_active()->parse($this->value); |
203
|
|
|
} |
204
|
|
|
else { |
205
|
|
|
return $this->value; |
206
|
|
|
} |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
/** |
210
|
|
|
* Return the value of the field with relative links converted to absolute urls (with placeholders parsed). |
211
|
|
|
* @return string |
212
|
|
|
*/ |
213
|
|
|
public function AbsoluteLinks() { |
214
|
|
|
return HTTP::absoluteURLs($this->forTemplate()); |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
public function forTemplate() { |
218
|
|
|
return $this->RAW(); |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
public function prepValueForDB($value) { |
222
|
|
|
return parent::prepValueForDB($this->whitelistContent($value)); |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* Filter the given $value string through the whitelist filter |
227
|
|
|
* |
228
|
|
|
* @param string $value Input html content |
229
|
|
|
* @return string Value with all non-whitelisted content stripped (if applicable) |
230
|
|
|
*/ |
231
|
|
|
public function whitelistContent($value) { |
232
|
|
|
if($this->whitelist) { |
233
|
|
|
$dom = Injector::inst()->create('HTMLValue', $value); |
234
|
|
|
|
235
|
|
|
$query = array(); |
236
|
|
|
$textFilter = ' | //body/text()'; |
237
|
|
|
foreach ($this->whitelist as $tag) { |
|
|
|
|
238
|
|
|
if($tag === 'text()') { |
239
|
|
|
$textFilter = ''; // Disable text filter if allowed |
240
|
|
|
} else { |
241
|
|
|
$query[] = 'not(self::'.$tag.')'; |
242
|
|
|
} |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
foreach($dom->query('//body//*['.implode(' and ', $query).']'.$textFilter) as $el) { |
246
|
|
|
if ($el->parentNode) $el->parentNode->removeChild($el); |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
$value = $dom->getContent(); |
250
|
|
|
} |
251
|
|
|
return $value; |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
/** |
255
|
|
|
* Returns true if the field has meaningful content. |
256
|
|
|
* Excludes null content like <h1></h1>, <p></p> ,etc |
257
|
|
|
* |
258
|
|
|
* @return boolean |
259
|
|
|
*/ |
260
|
|
|
public function exists() { |
261
|
|
|
$value = $this->value; |
262
|
|
|
|
263
|
|
|
if (!$this->isPopulated($value)) { |
264
|
|
|
return false; |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
// If it's got a content tag |
268
|
|
|
if(preg_match('/<(img|embed|object|iframe|meta|source|link)[^>]*>/i', $value)) { |
269
|
|
|
return true; |
270
|
|
|
} |
271
|
|
|
|
272
|
|
|
// If it's just one or two tags on its own (and not the above) it's empty. |
273
|
|
|
// This might be <p></p> or <h1></h1> or whatever. |
274
|
|
|
if(preg_match('/^[\\s]*(<[^>]+>[\\s]*){1,2}$/u', $value)) { |
275
|
|
|
return false; |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
// Otherwise its content is genuine content |
279
|
|
|
return true; |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
public function scaffoldFormField($title = null, $params = null) { |
283
|
|
|
return new HtmlEditorField($this->name, $title); |
284
|
|
|
} |
285
|
|
|
|
286
|
|
|
public function scaffoldSearchField($title = null, $params = null) { |
287
|
|
|
return new TextField($this->name, $title); |
288
|
|
|
} |
289
|
|
|
|
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
|
293
|
|
|
|