1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Elgg\Views; |
4
|
|
|
|
5
|
|
|
use Elgg\EventsService; |
6
|
|
|
use Elgg\Exceptions\Configuration\RegistrationException; |
7
|
|
|
use Elgg\Exceptions\InvalidArgumentException; |
8
|
|
|
use Elgg\Traits\Loggable; |
9
|
|
|
use Elgg\ViewsService; |
10
|
|
|
use Pelago\Emogrifier\CssInliner; |
11
|
|
|
use Pelago\Emogrifier\HtmlProcessor\CssToAttributeConverter; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Various helper method for formatting and sanitizing output |
15
|
|
|
*/ |
16
|
|
|
class HtmlFormatter { |
17
|
|
|
|
18
|
|
|
use Loggable; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* Mentions regex |
22
|
|
|
* |
23
|
|
|
* Match anchor tag with all attributes and wrapped html |
24
|
|
|
* we want to exclude matches that have already been wrapped in an anchor |
25
|
|
|
* '<a[^>]*?>.*?<\/a>' |
26
|
|
|
* |
27
|
|
|
* Match tag name and attributes |
28
|
|
|
* we want to exclude matches that found within tag attributes |
29
|
|
|
* '<.*?>' |
30
|
|
|
* |
31
|
|
|
* Match at least one space or punctuation char before a match |
32
|
|
|
* '(^|\s|\!|\.|\?|>|\G)+' |
33
|
|
|
* |
34
|
|
|
* Match @ followed by username |
35
|
|
|
* @see \Elgg\Users\Accounts::assertValidUsername() |
36
|
|
|
* '(@([^\s<&]+))' |
37
|
|
|
*/ |
38
|
|
|
public const MENTION_REGEX = '/<a[^>]*?>.*?<\/a>|<.*?>|(^|\s|\!|\.|\?|>|\G)+(@([^\s<&]+))/iu'; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* Output constructor. |
42
|
|
|
* |
43
|
|
|
* @param ViewsService $views Views service |
44
|
|
|
* @param EventsService $events Events service |
45
|
|
|
* @param AutoParagraph $autop Paragraph wrapper |
46
|
|
|
*/ |
47
|
767 |
|
public function __construct( |
48
|
|
|
protected ViewsService $views, |
49
|
|
|
protected EventsService $events, |
50
|
|
|
protected AutoParagraph $autop |
51
|
|
|
) { |
52
|
767 |
|
} |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Prepare HTML output |
56
|
|
|
* |
57
|
|
|
* @param string $html HTML string |
58
|
|
|
* @param array $options Formatting options |
59
|
|
|
* |
60
|
|
|
* @option bool $parse_urls Replace URLs with anchor tags |
61
|
|
|
* @option bool $parse_emails Replace email addresses with anchor tags |
62
|
|
|
* @option bool $sanitize Sanitize HTML tags |
63
|
|
|
* @option bool $autop Add paragraphs instead of new lines |
64
|
|
|
* |
65
|
|
|
* @return string |
66
|
|
|
*/ |
67
|
33 |
|
public function formatBlock(string $html, array $options = []): string { |
68
|
33 |
|
$options = array_merge([ |
69
|
33 |
|
'parse_urls' => true, |
70
|
33 |
|
'parse_emails' => true, |
71
|
33 |
|
'parse_mentions' => true, |
72
|
33 |
|
'sanitize' => true, |
73
|
33 |
|
'autop' => true, |
74
|
33 |
|
], $options); |
75
|
|
|
|
76
|
33 |
|
$params = [ |
77
|
33 |
|
'options' => $options, |
78
|
33 |
|
'html' => $html, |
79
|
33 |
|
]; |
80
|
|
|
|
81
|
33 |
|
$params = $this->events->triggerResults('prepare', 'html', [], $params); |
82
|
|
|
|
83
|
33 |
|
$html = (string) elgg_extract('html', $params); |
84
|
33 |
|
$options = (array) elgg_extract('options', $params); |
85
|
|
|
|
86
|
33 |
|
if (elgg_extract('parse_urls', $options)) { |
87
|
32 |
|
$html = $this->parseUrls($html); |
88
|
|
|
} |
89
|
|
|
|
90
|
33 |
|
if (elgg_extract('parse_emails', $options)) { |
91
|
32 |
|
$html = $this->parseEmails($html); |
92
|
|
|
} |
93
|
|
|
|
94
|
33 |
|
if (elgg_extract('parse_mentions', $options)) { |
95
|
32 |
|
$html = $this->parseMentions($html); |
96
|
|
|
} |
97
|
|
|
|
98
|
33 |
|
if (elgg_extract('sanitize', $options)) { |
99
|
30 |
|
$html = elgg_sanitize_input($html); |
100
|
|
|
} |
101
|
|
|
|
102
|
33 |
|
if (elgg_extract('autop', $options)) { |
103
|
32 |
|
$html = $this->addParagaraphs($html); |
104
|
|
|
} |
105
|
|
|
|
106
|
33 |
|
return $html; |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
/** |
110
|
|
|
* Takes a string and turns any URLs into formatted links |
111
|
|
|
* |
112
|
|
|
* @param string $text The input string |
113
|
|
|
* |
114
|
|
|
* @return string The output string with formatted links |
115
|
|
|
*/ |
116
|
52 |
|
public function parseUrls(string $text): string { |
117
|
|
|
|
118
|
52 |
|
$linkify = new \Misd\Linkify\Linkify(); |
119
|
|
|
|
120
|
52 |
|
return $linkify->processUrls($text, ['attr' => ['rel' => 'nofollow']]); |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
/** |
124
|
|
|
* Takes a string and turns any email addresses into formatted links |
125
|
|
|
* |
126
|
|
|
* @param string $text The input string |
127
|
|
|
* |
128
|
|
|
* @return string The output string with formatted links |
129
|
|
|
* @since 2.3 |
130
|
|
|
*/ |
131
|
42 |
|
public function parseEmails(string $text): string { |
132
|
42 |
|
$linkify = new \Misd\Linkify\Linkify(); |
133
|
|
|
|
134
|
42 |
|
return $linkify->processEmails($text, ['attr' => ['rel' => 'nofollow']]); |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
/** |
138
|
|
|
* Takes a string and turns any @ mentions into a formatted link |
139
|
|
|
* |
140
|
|
|
* @param string $text The input string |
141
|
|
|
* |
142
|
|
|
* @return string |
143
|
|
|
* @since 5.0 |
144
|
|
|
*/ |
145
|
34 |
|
public function parseMentions(string $text): string { |
146
|
34 |
|
$callback = function (array $matches) { |
147
|
17 |
|
$source = elgg_extract(0, $matches); |
148
|
17 |
|
$preceding_char = elgg_extract(1, $matches); |
149
|
17 |
|
$username = elgg_extract(3, $matches); |
150
|
|
|
|
151
|
17 |
|
if (empty($username)) { |
152
|
17 |
|
return $source; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
try { |
156
|
2 |
|
_elgg_services()->accounts->assertValidUsername($username); |
157
|
2 |
|
} catch (RegistrationException $e) { |
158
|
2 |
|
return $source; |
159
|
|
|
} |
160
|
|
|
|
161
|
2 |
|
$user = elgg_get_user_by_username($username); |
162
|
|
|
|
163
|
|
|
// Catch the trailing period when used as punctuation and not a username. |
164
|
2 |
|
$period = ''; |
165
|
2 |
|
if (!$user && str_ends_with($username, '.')) { |
166
|
1 |
|
$user = elgg_get_user_by_username(substr($username, 0, -1)); |
167
|
1 |
|
$period = '.'; |
168
|
|
|
} |
169
|
|
|
|
170
|
2 |
|
if (!$user) { |
171
|
1 |
|
return $source; |
172
|
|
|
} |
173
|
|
|
|
174
|
2 |
|
if (elgg_get_config('mentions_display_format') === 'username') { |
175
|
1 |
|
$replacement = elgg_view_url($user->getURL(), "@{$user->username}"); |
176
|
|
|
} else { |
177
|
1 |
|
$replacement = elgg_view_url($user->getURL(), $user->getDisplayName()); |
178
|
|
|
} |
179
|
|
|
|
180
|
2 |
|
return $preceding_char . $replacement . $period; |
181
|
34 |
|
}; |
182
|
|
|
|
183
|
34 |
|
return preg_replace_callback(self::MENTION_REGEX, $callback, $text) ?? $text; |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
/** |
187
|
|
|
* Create paragraphs from text with line spacing |
188
|
|
|
* |
189
|
|
|
* @param string $string The string |
190
|
|
|
* |
191
|
|
|
* @return string |
192
|
|
|
**/ |
193
|
45 |
|
public function addParagaraphs(string $string): string { |
194
|
|
|
try { |
195
|
45 |
|
$result = $this->autop->process($string); |
196
|
45 |
|
if ($result !== false) { |
197
|
45 |
|
return $result; |
198
|
|
|
} |
199
|
|
|
} catch (\RuntimeException $e) { |
200
|
|
|
$this->getLogger()->warning('AutoParagraph failed to process the string: ' . $e->getMessage()); |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
return $string; |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
/** |
207
|
|
|
* Converts an associative array into a string of well-formed HTML/XML attributes |
208
|
|
|
* Returns a concatenated string of HTML attributes to be inserted into a tag (e.g., <tag $attrs>) |
209
|
|
|
* |
210
|
|
|
* An example of the attributes: |
211
|
|
|
* Attribute value can be a scalar value, an array of scalar values, or true |
212
|
|
|
* <code> |
213
|
|
|
* $attrs = [ |
214
|
|
|
* 'class' => ['elgg-input', 'elgg-input-text'], // will be imploded with spaces |
215
|
|
|
* 'style' => ['margin-left:10px;', 'color: #666;'], // will be imploded with spaces |
216
|
|
|
* 'alt' => 'Alt text', // will be left as is |
217
|
|
|
* 'disabled' => true, // will be converted to disabled="disabled" |
218
|
|
|
* 'data-options' => json_encode(['foo' => 'bar']), // will be output as an escaped JSON string |
219
|
|
|
* 'batch' => <\ElggBatch>, // will be ignored |
220
|
|
|
* 'items' => [<\ElggObject>], // will be ignored |
221
|
|
|
* ]; |
222
|
|
|
* </code> |
223
|
|
|
* |
224
|
|
|
* @param array $attrs An array of attribute => value pairs |
225
|
|
|
* |
226
|
|
|
* @return string |
227
|
|
|
* |
228
|
|
|
* @see elgg_format_element() |
229
|
|
|
*/ |
230
|
225 |
|
public function formatAttributes(array $attrs = []): string { |
231
|
225 |
|
if (empty($attrs)) { |
232
|
|
|
return ''; |
233
|
|
|
} |
234
|
|
|
|
235
|
225 |
|
$attributes = []; |
236
|
|
|
|
237
|
225 |
|
foreach ($attrs as $attr => $val) { |
238
|
225 |
|
if (!str_starts_with($attr, 'data-') && str_contains($attr, '_')) { |
239
|
|
|
// this is probably a view $vars variable not meant for output |
240
|
72 |
|
continue; |
241
|
|
|
} |
242
|
|
|
|
243
|
225 |
|
$attr = strtolower($attr); |
244
|
|
|
|
245
|
225 |
|
if (!isset($val) || $val === false) { |
246
|
105 |
|
continue; |
247
|
|
|
} |
248
|
|
|
|
249
|
225 |
|
if ($val === true) { |
250
|
65 |
|
$val = $attr; //e.g. checked => true ==> checked="checked" |
251
|
|
|
} |
252
|
|
|
|
253
|
225 |
|
if (is_array($val) && empty($val)) { |
254
|
|
|
//e.g. ['class' => []] |
255
|
50 |
|
continue; |
256
|
|
|
} |
257
|
|
|
|
258
|
225 |
|
if (is_scalar($val)) { |
259
|
211 |
|
$val = [$val]; |
260
|
|
|
} |
261
|
|
|
|
262
|
225 |
|
if (!is_array($val)) { |
263
|
8 |
|
continue; |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
// Check if array contains non-scalar values and bail if so |
267
|
225 |
|
$filtered_val = array_filter($val, function($e) { |
268
|
225 |
|
return is_scalar($e); |
269
|
225 |
|
}); |
270
|
|
|
|
271
|
225 |
|
if (count($val) != count($filtered_val)) { |
272
|
1 |
|
continue; |
273
|
|
|
} |
274
|
|
|
|
275
|
225 |
|
$val = implode(' ', $val); |
276
|
|
|
|
277
|
225 |
|
$val = htmlspecialchars($val, ENT_QUOTES, 'UTF-8', false); |
278
|
225 |
|
$attributes[] = "$attr=\"$val\""; |
279
|
|
|
} |
280
|
|
|
|
281
|
225 |
|
return implode(' ', $attributes); |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
/** |
285
|
|
|
* Format an HTML element |
286
|
|
|
* |
287
|
|
|
* @param string $tag_name The element tagName. e.g. "div". This will not be validated. |
288
|
|
|
* |
289
|
|
|
* @param array $attributes The element attributes. |
290
|
|
|
* |
291
|
|
|
* @param string $text The contents of the element. Assumed to be HTML unless encode_text is true. |
292
|
|
|
* |
293
|
|
|
* @param array $options Options array with keys: |
294
|
|
|
* |
295
|
|
|
* - encode_text => (bool, default false) If true, $text will be HTML-escaped. Already-escaped entities |
296
|
|
|
* will not be double-escaped. |
297
|
|
|
* |
298
|
|
|
* - double_encode => (bool, default false) If true, the $text HTML escaping will be allowed to double |
299
|
|
|
* encode HTML entities: '×' will become '&times;' |
300
|
|
|
* |
301
|
|
|
* - is_void => (bool) If given, this determines whether the function will return just the open tag. |
302
|
|
|
* Otherwise this will be determined by the tag name according to this list: |
303
|
|
|
* http://www.w3.org/html/wg/drafts/html/master/single-page.html#void-elements |
304
|
|
|
* |
305
|
|
|
* - is_xml => (bool, default false) If true, void elements will be formatted like "<tag />" |
306
|
|
|
* |
307
|
|
|
* @return string |
308
|
|
|
* @since 1.9.0 |
309
|
|
|
* @throws InvalidArgumentException |
310
|
|
|
*/ |
311
|
233 |
|
public function formatElement(string $tag_name, array $attributes = [], string $text = '', array $options = []): string { |
312
|
233 |
|
if ($tag_name === '') { |
313
|
1 |
|
throw new InvalidArgumentException('$tag_name is required'); |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
// from http://www.w3.org/TR/html-markup/syntax.html#syntax-elements |
317
|
232 |
|
$is_void = $options['is_void'] ?? in_array(strtolower($tag_name), [ |
318
|
232 |
|
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', |
319
|
232 |
|
'meta', 'param', 'source', 'track', 'wbr' |
320
|
232 |
|
]); |
321
|
|
|
|
322
|
232 |
|
if (!empty($options['encode_text']) && is_string($text)) { |
323
|
56 |
|
$double_encode = !empty($options['double_encode']); |
324
|
56 |
|
$text = htmlspecialchars($text, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8', $double_encode); |
325
|
|
|
} |
326
|
|
|
|
327
|
232 |
|
$attrs = ''; |
328
|
232 |
|
if (!empty($attributes)) { |
329
|
222 |
|
$attrs = $this->formatAttributes($attributes); |
330
|
222 |
|
if ($attrs !== '') { |
331
|
222 |
|
$attrs = " $attrs"; |
332
|
|
|
} |
333
|
|
|
} |
334
|
|
|
|
335
|
232 |
|
if ($is_void) { |
336
|
117 |
|
return empty($options['is_xml']) ? "<{$tag_name}{$attrs}>" : "<{$tag_name}{$attrs} />"; |
337
|
|
|
} |
338
|
|
|
|
339
|
228 |
|
return "<{$tag_name}{$attrs}>$text</$tag_name>"; |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
/** |
343
|
|
|
* Strip tags and offer plugins the chance. |
344
|
|
|
* Plugins register for output:strip_tags event. |
345
|
|
|
* Original string included in $params['original_string'] |
346
|
|
|
* |
347
|
|
|
* @param string $string Formatted string |
348
|
|
|
* @param string $allowable_tags Optional parameter to specify tags which should not be stripped |
349
|
|
|
* |
350
|
|
|
* @return string String run through strip_tags() and any event. |
351
|
|
|
*/ |
352
|
144 |
|
public function stripTags(string $string, string $allowable_tags = null): string { |
353
|
144 |
|
$params = [ |
354
|
144 |
|
'original_string' => $string, |
355
|
144 |
|
'allowable_tags' => $allowable_tags, |
356
|
144 |
|
]; |
357
|
|
|
|
358
|
144 |
|
$string = strip_tags($string, $allowable_tags); |
359
|
144 |
|
return (string) $this->events->triggerResults('format', 'strip_tags', $params, $string); |
360
|
|
|
} |
361
|
|
|
|
362
|
|
|
/** |
363
|
|
|
* Decode HTML markup into a raw text string |
364
|
|
|
* |
365
|
|
|
* This applies html_entity_decode() to a string while re-entitising HTML |
366
|
|
|
* special char entities to prevent them from being decoded back to their |
367
|
|
|
* unsafe original forms. |
368
|
|
|
* |
369
|
|
|
* This relies on html_entity_decode() not translating entities when |
370
|
|
|
* doing so leaves behind another entity, e.g. &gt; if decoded would |
371
|
|
|
* create > which is another entity itself. This seems to escape the |
372
|
|
|
* usual behaviour where any two paired entities creating a HTML tag are |
373
|
|
|
* usually decoded, i.e. a lone > is not decoded, but <foo> would |
374
|
|
|
* be decoded to <foo> since it creates a full tag. |
375
|
|
|
* |
376
|
|
|
* Note: html_entity_decode() is poorly explained in the manual - which is really |
377
|
|
|
* bad given its potential for misuse on user input already escaped elsewhere. |
378
|
|
|
* Stackoverflow is littered with advice to use this function in the precise |
379
|
|
|
* way that would lead to user input being capable of injecting arbitrary HTML. |
380
|
|
|
* |
381
|
|
|
* @param string $string Encoded HTML |
382
|
|
|
* |
383
|
|
|
* @return string |
384
|
|
|
* |
385
|
|
|
* @author Pádraic Brady |
386
|
|
|
* @copyright Copyright (c) 2010 Pádraic Brady (http://blog.astrumfutura.com) |
387
|
|
|
* @license Released under dual-license GPL2/MIT by explicit permission of Pádraic Brady |
388
|
|
|
*/ |
389
|
|
|
public function decode(string $string): string { |
390
|
|
|
$string = str_replace( |
391
|
|
|
['>', '<', '&', '"', '''], |
392
|
|
|
['&gt;', '&lt;', '&amp;', '&quot;', '&#039;'], |
393
|
|
|
$string |
394
|
|
|
); |
395
|
|
|
$string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8'); |
396
|
|
|
return str_replace( |
397
|
|
|
['&gt;', '&lt;', '&amp;', '&quot;', '&#039;'], |
398
|
|
|
['>', '<', '&', '"', '''], |
399
|
|
|
$string |
400
|
|
|
); |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
/** |
404
|
|
|
* Adds inline style to html content |
405
|
|
|
* |
406
|
|
|
* @param string $html html content |
407
|
|
|
* @param string $css style text |
408
|
|
|
* @param bool $body_only toggle to return the body contents instead of a full html |
409
|
|
|
* |
410
|
|
|
* @return string |
411
|
|
|
* |
412
|
|
|
* @since 4.0 |
413
|
|
|
*/ |
414
|
10 |
|
public function inlineCss(string $html, string $css, bool $body_only = false): string { |
415
|
10 |
|
if (empty($html) || empty($css)) { |
416
|
|
|
return $html; |
417
|
|
|
} |
418
|
|
|
|
419
|
10 |
|
$html_with_inlined_css = CssInliner::fromHtml($html)->disableStyleBlocksParsing()->inlineCss($css)->render(); |
420
|
10 |
|
$inlined_attribute_converter = CssToAttributeConverter::fromHtml($html_with_inlined_css)->convertCssToVisualAttributes(); |
421
|
|
|
|
422
|
10 |
|
return $body_only ? $inlined_attribute_converter->renderBodyContent() : $inlined_attribute_converter->render(); |
423
|
|
|
} |
424
|
|
|
|
425
|
|
|
/** |
426
|
|
|
* Replaces relative urls in href or src attributes in text |
427
|
|
|
* |
428
|
|
|
* @param string $text source content |
429
|
|
|
* |
430
|
|
|
* @return string |
431
|
|
|
* |
432
|
|
|
* @since 4.0 |
433
|
|
|
*/ |
434
|
11 |
|
public function normalizeUrls(string $text): string { |
435
|
11 |
|
$pattern = '/\s(?:href|src)=([\'"]\S+[\'"])/i'; |
436
|
|
|
|
437
|
|
|
// find all matches |
438
|
11 |
|
$matches = []; |
439
|
11 |
|
preg_match_all($pattern, $text, $matches); |
440
|
|
|
|
441
|
11 |
|
if (empty($matches) || !isset($matches[1])) { |
442
|
|
|
return $text; |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
// go through all the matches |
446
|
11 |
|
$urls = $matches[1]; |
447
|
11 |
|
$urls = array_unique($urls); |
448
|
|
|
|
449
|
11 |
|
foreach ($urls as $url) { |
450
|
|
|
// remove wrapping quotes from the url |
451
|
11 |
|
$real_url = substr($url, 1, -1); |
452
|
|
|
// normalize url |
453
|
11 |
|
$new_url = elgg_normalize_url($real_url); |
454
|
|
|
// make the correct replacement string |
455
|
11 |
|
$replacement = str_replace($real_url, $new_url, $url); |
456
|
|
|
|
457
|
|
|
// replace the url in the content |
458
|
11 |
|
$text = str_replace($url, $replacement, $text); |
459
|
|
|
} |
460
|
|
|
|
461
|
11 |
|
return $text; |
462
|
|
|
} |
463
|
|
|
} |
464
|
|
|
|