1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Mos\TextFilter; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* Filter and format content. |
7
|
|
|
* |
8
|
|
|
*/ |
9
|
|
|
class CTextFilter |
10
|
|
|
{ |
11
|
|
|
/** |
12
|
|
|
* Supported filters. |
13
|
|
|
*/ |
14
|
|
|
private $filters = [ |
15
|
|
|
"jsonfrontmatter", |
16
|
|
|
"yamlfrontmatter", |
17
|
|
|
"bbcode", |
18
|
|
|
"clickable", |
19
|
|
|
"shortcode", |
20
|
|
|
"markdown", |
21
|
|
|
"nl2br", |
22
|
|
|
"purify", |
23
|
|
|
"titlefromh1", |
24
|
|
|
]; |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* Current document parsed. |
30
|
|
|
*/ |
31
|
|
|
private $current; |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* Call each filter. |
37
|
|
|
* |
38
|
|
|
* @deprecated deprecated since version 1.2 in favour of parse(). |
39
|
|
|
* |
40
|
|
|
* @param string $text the text to filter. |
41
|
|
|
* @param string|array $filters as comma separated list of filter, |
42
|
|
|
* or filters sent in as array. |
43
|
|
|
* |
44
|
|
|
* @return string the formatted text. |
45
|
|
|
*/ |
46
|
|
|
public function doFilter($text, $filters) |
47
|
|
|
{ |
48
|
|
|
// Define all valid filters with their callback function. |
49
|
|
|
$callbacks = [ |
50
|
|
|
'bbcode' => 'bbcode2html', |
51
|
|
|
'clickable' => 'makeClickable', |
52
|
|
|
'shortcode' => 'shortCode', |
53
|
|
|
'markdown' => 'markdown', |
54
|
|
|
'nl2br' => 'nl2br', |
55
|
|
|
'purify' => 'purify', |
56
|
|
|
]; |
57
|
|
|
|
58
|
|
|
// Make an array of the comma separated string $filters |
59
|
|
|
if (is_array($filters)) { |
60
|
|
|
$filter = $filters; |
61
|
|
|
} else { |
62
|
|
|
$filters = strtolower($filters); |
63
|
|
|
$filter = preg_replace('/\s/', '', explode(',', $filters)); |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
// For each filter, call its function with the $text as parameter. |
67
|
|
|
foreach ($filter as $key) { |
68
|
|
|
|
69
|
|
|
if (!isset($callbacks[$key])) { |
70
|
|
|
throw new Exception("The filter '$filters' is not a valid filter string due to '$key'."); |
71
|
|
|
} |
72
|
|
|
$text = call_user_func_array([$this, $callbacks[$key]], [$text]); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
return $text; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
|
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* Return an array of all filters supported. |
82
|
|
|
* |
83
|
|
|
* @return array with strings of filters supported. |
84
|
|
|
*/ |
85
|
|
|
public function getFilters() |
86
|
|
|
{ |
87
|
|
|
return $this->filters; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
|
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* Check if filter is supported. |
94
|
|
|
* |
95
|
|
|
* @param string $filter to use. |
96
|
|
|
* |
97
|
|
|
* @throws mos/TextFilter/Exception when filter does not exists. |
98
|
|
|
* |
99
|
|
|
* @return boolean true if filter exists, false othwerwise. |
100
|
|
|
*/ |
101
|
|
|
public function hasFilter($filter) |
102
|
|
|
{ |
103
|
|
|
return in_array($filter, $this->filters); |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
|
107
|
|
|
|
108
|
|
|
/** |
109
|
|
|
* Add array items to frontmatter. |
110
|
|
|
* |
111
|
|
|
* @param array|null $matter key value array with items to add |
112
|
|
|
* or null if empty. |
113
|
|
|
* |
114
|
|
|
* @return $this |
115
|
|
|
*/ |
116
|
|
|
private function addToFrontmatter($matter) |
117
|
|
|
{ |
118
|
|
|
if (empty($matter)) { |
119
|
|
|
return $this; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
if (is_null($this->current->frontmatter)) { |
123
|
|
|
$this->current->frontmatter = []; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
$this->current->frontmatter = array_merge_recursive($this->current->frontmatter, $matter); |
127
|
|
|
return $this; |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
|
131
|
|
|
|
132
|
|
|
/** |
133
|
|
|
* Call a specific filter and store its details. |
134
|
|
|
* |
135
|
|
|
* @param string $filter to use. |
136
|
|
|
* |
137
|
|
|
* @throws mos/TextFilter/Exception when filter does not exists. |
138
|
|
|
* |
139
|
|
|
* @return string the formatted text. |
140
|
|
|
*/ |
141
|
|
|
private function parseFactory($filter) |
142
|
|
|
{ |
143
|
|
|
// Define single tasks filter with a callback. |
144
|
|
|
$callbacks = [ |
145
|
|
|
"bbcode" => "bbcode2html", |
146
|
|
|
"clickable" => "makeClickable", |
147
|
|
|
"shortcode" => "shortCode", |
148
|
|
|
"markdown" => "markdown", |
149
|
|
|
"nl2br" => "nl2br", |
150
|
|
|
"purify" => "purify", |
151
|
|
|
]; |
152
|
|
|
|
153
|
|
|
// Do the specific filter |
154
|
|
|
$text = $this->current->text; |
155
|
|
|
switch ($filter) { |
156
|
|
|
case "jsonfrontmatter": |
157
|
|
|
$res = $this->jsonFrontMatter($text); |
158
|
|
|
$this->current->text = $res["text"]; |
159
|
|
|
$this->addToFrontmatter($res["frontmatter"]); |
160
|
|
|
break; |
161
|
|
|
|
162
|
|
|
case "yamlfrontmatter": |
163
|
|
|
$res = $this->yamlFrontMatter($text); |
164
|
|
|
$this->current->text = $res["text"]; |
165
|
|
|
$this->addToFrontmatter($res["frontmatter"]); |
166
|
|
|
break; |
167
|
|
|
|
168
|
|
|
case "titlefromh1": |
169
|
|
|
$title = $this->getTitleFromFirstH1($text); |
170
|
|
|
$this->current->text = $text; |
171
|
|
|
$this->addToFrontmatter(["title" => $title]); |
172
|
|
|
break; |
173
|
|
|
|
174
|
|
|
case "bbcode": |
175
|
|
|
case "clickable": |
176
|
|
|
case "shortcode": |
177
|
|
|
case "markdown": |
178
|
|
|
case "nl2br": |
179
|
|
|
case "purify": |
180
|
|
|
$this->current->text = call_user_func_array( |
181
|
|
|
[$this, $callbacks[$filter]], |
182
|
|
|
[$text] |
183
|
|
|
); |
184
|
|
|
break; |
185
|
|
|
|
186
|
|
|
default: |
187
|
|
|
throw new Exception("The filter '$filter' is not a valid filter string."); |
188
|
|
|
} |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
|
192
|
|
|
|
193
|
|
|
/** |
194
|
|
|
* Call each filter and return array with details of the formatted content. |
195
|
|
|
* |
196
|
|
|
* @param string $text the text to filter. |
197
|
|
|
* @param array $filter array of filters to use. |
198
|
|
|
* |
199
|
|
|
* @throws mos/TextFilter/Exception when filterd does not exists. |
200
|
|
|
* |
201
|
|
|
* @return array with the formatted text and additional details. |
202
|
|
|
*/ |
203
|
|
|
public function parse($text, $filter) |
204
|
|
|
{ |
205
|
|
|
$this->current = new \stdClass(); |
206
|
|
|
$this->current->frontmatter = null; |
207
|
|
|
$this->current->text = $text; |
208
|
|
|
|
209
|
|
|
foreach ($filter as $key) { |
210
|
|
|
$this->parseFactory($key); |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
return $this->current; |
214
|
|
|
} |
215
|
|
|
|
216
|
|
|
|
217
|
|
|
|
218
|
|
|
/** |
219
|
|
|
* Extract front matter from text. |
220
|
|
|
* |
221
|
|
|
* @param string $text the text to be parsed. |
222
|
|
|
* @param string $startToken the start token. |
223
|
|
|
* @param string $stopToken the stop token. |
224
|
|
|
* |
225
|
|
|
* @return array with the formatted text and the front matter. |
226
|
|
|
*/ |
227
|
|
|
private function extractFrontMatter($text, $startToken, $stopToken) |
228
|
|
|
{ |
229
|
|
|
$tokenLength = strlen($startToken); |
230
|
|
|
|
231
|
|
|
$start = strpos($text, $startToken); |
232
|
|
|
|
233
|
|
|
$frontmatter = null; |
234
|
|
|
if ($start !== false) { |
235
|
|
|
$stop = strpos($text, $stopToken, $tokenLength - 1); |
236
|
|
|
|
237
|
|
|
if ($stop !== false) { |
238
|
|
|
$length = $stop - ($start + $tokenLength); |
239
|
|
|
|
240
|
|
|
$frontmatter = substr($text, $start + $tokenLength, $length); |
241
|
|
|
$textStart = substr($text, 0, $start); |
242
|
|
|
$text = $textStart . substr($text, $stop + $tokenLength); |
243
|
|
|
} |
244
|
|
|
} |
245
|
|
|
|
246
|
|
|
return [$text, $frontmatter]; |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
|
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* Extract JSON front matter from text. |
253
|
|
|
* |
254
|
|
|
* @param string $text the text to be parsed. |
255
|
|
|
* |
256
|
|
|
* @return array with the formatted text and the front matter. |
257
|
|
|
*/ |
258
|
|
|
public function jsonFrontMatter($text) |
259
|
|
|
{ |
260
|
|
|
list($text, $frontmatter) = $this->extractFrontMatter($text, "{{{\n", "}}}\n"); |
261
|
|
|
|
262
|
|
|
if (!empty($frontmatter)) { |
263
|
|
|
$frontmatter = json_decode($frontmatter, true); |
264
|
|
|
|
265
|
|
|
if (is_null($frontmatter)) { |
266
|
|
|
throw new Exception("Failed parsing JSON frontmatter."); |
267
|
|
|
} |
268
|
|
|
} |
269
|
|
|
|
270
|
|
|
return [ |
271
|
|
|
"text" => $text, |
272
|
|
|
"frontmatter" => $frontmatter |
273
|
|
|
]; |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
|
277
|
|
|
|
278
|
|
|
/** |
279
|
|
|
* Extract YAML front matter from text. |
280
|
|
|
* |
281
|
|
|
* @param string $text the text to be parsed. |
282
|
|
|
* |
283
|
|
|
* @return array with the formatted text and the front matter. |
284
|
|
|
*/ |
285
|
|
|
public function yamlFrontMatter($text) |
286
|
|
|
{ |
287
|
|
|
$needle = "---\n"; |
288
|
|
|
list($text, $frontmatter) = $this->extractFrontMatter($text, $needle, $needle); |
289
|
|
|
|
290
|
|
|
if (function_exists("yaml_parse") && !empty($frontmatter)) { |
291
|
|
|
$frontmatter = yaml_parse($needle . $frontmatter); |
292
|
|
|
|
293
|
|
|
if ($frontmatter === false) { |
294
|
|
|
throw new Exception("Failed parsing YAML frontmatter."); |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
return [ |
299
|
|
|
"text" => $text, |
300
|
|
|
"frontmatter" => $frontmatter |
301
|
|
|
]; |
302
|
|
|
} |
303
|
|
|
|
304
|
|
|
|
305
|
|
|
|
306
|
|
|
/** |
307
|
|
|
* Get the title from the first H1. |
308
|
|
|
* |
309
|
|
|
* @param string $text the text to be parsed. |
310
|
|
|
* |
311
|
|
|
* @return string|null with the title, if its found. |
312
|
|
|
*/ |
313
|
|
|
public function getTitleFromFirstH1($text) |
314
|
|
|
{ |
315
|
|
|
$matches = []; |
316
|
|
|
$title = null; |
317
|
|
|
|
318
|
|
|
if (preg_match("#<h1.*?>(.*)</h1>#", $text, $matches)) { |
319
|
|
|
$title = strip_tags($matches[1]); |
320
|
|
|
} |
321
|
|
|
|
322
|
|
|
return $title; |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
|
326
|
|
|
|
327
|
|
|
/** |
328
|
|
|
* Helper, BBCode formatting converting to HTML. |
329
|
|
|
* |
330
|
|
|
* @param string $text The text to be converted. |
331
|
|
|
* |
332
|
|
|
* @return string the formatted text. |
333
|
|
|
* |
334
|
|
|
* @link http://dbwebb.se/coachen/reguljara-uttryck-i-php-ger-bbcode-formattering |
335
|
|
|
*/ |
336
|
|
|
public function bbcode2html($text) |
337
|
|
|
{ |
338
|
|
|
$search = [ |
339
|
|
|
'/\[b\](.*?)\[\/b\]/is', |
340
|
|
|
'/\[i\](.*?)\[\/i\]/is', |
341
|
|
|
'/\[u\](.*?)\[\/u\]/is', |
342
|
|
|
'/\[img\](https?.*?)\[\/img\]/is', |
343
|
|
|
'/\[url\](https?.*?)\[\/url\]/is', |
344
|
|
|
'/\[url=(https?.*?)\](.*?)\[\/url\]/is' |
345
|
|
|
]; |
346
|
|
|
|
347
|
|
|
$replace = [ |
348
|
|
|
'<strong>$1</strong>', |
349
|
|
|
'<em>$1</em>', |
350
|
|
|
'<u>$1</u>', |
351
|
|
|
'<img src="$1" />', |
352
|
|
|
'<a href="$1">$1</a>', |
353
|
|
|
'<a href="$1">$2</a>' |
354
|
|
|
]; |
355
|
|
|
|
356
|
|
|
return preg_replace($search, $replace, $text); |
357
|
|
|
} |
358
|
|
|
|
359
|
|
|
|
360
|
|
|
|
361
|
|
|
/** |
362
|
|
|
* Make clickable links from URLs in text. |
363
|
|
|
* |
364
|
|
|
* @param string $text the text that should be formatted. |
365
|
|
|
* |
366
|
|
|
* @return string with formatted anchors. |
367
|
|
|
* |
368
|
|
|
* @link http://dbwebb.se/coachen/lat-php-funktion-make-clickable-automatiskt-skapa-klickbara-lankar |
369
|
|
|
*/ |
370
|
|
|
public function makeClickable($text) |
371
|
|
|
{ |
372
|
|
|
return preg_replace_callback( |
373
|
|
|
'#\b(?<![href|src]=[\'"])https?://[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/))#', |
374
|
|
|
function ($matches) { |
375
|
|
|
return "<a href='{$matches[0]}'>{$matches[0]}</a>"; |
376
|
|
|
}, |
377
|
|
|
$text |
378
|
|
|
); |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
|
382
|
|
|
|
383
|
|
|
/** |
384
|
|
|
* Format text according to HTML Purifier. |
385
|
|
|
* |
386
|
|
|
* @param string $text that should be formatted. |
387
|
|
|
* |
388
|
|
|
* @return string as the formatted html-text. |
389
|
|
|
*/ |
390
|
|
|
public function purify($text) |
391
|
|
|
{ |
392
|
|
|
$config = \HTMLPurifier_Config::createDefault(); |
393
|
|
|
$config->set("Cache.DefinitionImpl", null); |
394
|
|
|
//$config->set('Cache.SerializerPath', '/home/user/absolute/path'); |
|
|
|
|
395
|
|
|
|
396
|
|
|
$purifier = new \HTMLPurifier($config); |
397
|
|
|
|
398
|
|
|
return $purifier->purify($text); |
399
|
|
|
} |
400
|
|
|
|
401
|
|
|
|
402
|
|
|
|
403
|
|
|
/** |
404
|
|
|
* Format text according to Markdown syntax. |
405
|
|
|
* |
406
|
|
|
* @param string $text the text that should be formatted. |
407
|
|
|
* |
408
|
|
|
* @return string as the formatted html-text. |
409
|
|
|
*/ |
410
|
|
|
public function markdown($text) |
411
|
|
|
{ |
412
|
|
|
return \Michelf\MarkdownExtra::defaultTransform($text); |
413
|
|
|
} |
414
|
|
|
|
415
|
|
|
|
416
|
|
|
|
417
|
|
|
/** |
418
|
|
|
* For convenience access to nl2br |
419
|
|
|
* |
420
|
|
|
* @param string $text text to be converted. |
421
|
|
|
* |
422
|
|
|
* @return string the formatted text. |
423
|
|
|
*/ |
424
|
|
|
public function nl2br($text) |
425
|
|
|
{ |
426
|
|
|
return nl2br($text); |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
|
430
|
|
|
|
431
|
|
|
/** |
432
|
|
|
* Shortcode to to quicker format text as HTML. |
433
|
|
|
* |
434
|
|
|
* @param string $text text to be converted. |
435
|
|
|
* |
436
|
|
|
* @return string the formatted text. |
437
|
|
|
*/ |
438
|
|
|
public function shortCode($text) |
439
|
|
|
{ |
440
|
|
|
$patterns = [ |
441
|
|
|
'/\[(FIGURE)[\s+](.+)\]/', |
442
|
|
|
]; |
443
|
|
|
|
444
|
|
|
return preg_replace_callback( |
445
|
|
|
$patterns, |
446
|
|
|
function ($matches) { |
447
|
|
|
switch ($matches[1]) { |
448
|
|
|
|
449
|
|
|
case 'FIGURE': |
450
|
|
|
return self::ShortCodeFigure($matches[2]); |
451
|
|
|
break; |
|
|
|
|
452
|
|
|
|
453
|
|
|
default: |
454
|
|
|
return "{$matches[1]} is unknown shortcode."; |
455
|
|
|
} |
456
|
|
|
}, |
457
|
|
|
$text |
458
|
|
|
); |
459
|
|
|
} |
460
|
|
|
|
461
|
|
|
|
462
|
|
|
|
463
|
|
|
/** |
464
|
|
|
* Init shortcode handling by preparing the option list to an array, for those using arguments. |
465
|
|
|
* |
466
|
|
|
* @param string $options for the shortcode. |
467
|
|
|
* |
468
|
|
|
* @return array with all the options. |
469
|
|
|
*/ |
470
|
|
|
public static function shortCodeInit($options) |
471
|
|
|
{ |
472
|
|
|
preg_match_all('/[a-zA-Z0-9]+="[^"]+"|\S+/', $options, $matches); |
473
|
|
|
|
474
|
|
|
$res = array(); |
475
|
|
|
foreach ($matches[0] as $match) { |
476
|
|
|
$pos = strpos($match, '='); |
477
|
|
|
if ($pos === false) { |
478
|
|
|
$res[$match] = true; |
479
|
|
|
} else { |
480
|
|
|
$key = substr($match, 0, $pos); |
481
|
|
|
$val = trim(substr($match, $pos+1), '"'); |
482
|
|
|
$res[$key] = $val; |
483
|
|
|
} |
484
|
|
|
} |
485
|
|
|
|
486
|
|
|
return $res; |
487
|
|
|
} |
488
|
|
|
|
489
|
|
|
|
490
|
|
|
|
491
|
|
|
/** |
492
|
|
|
* Shortcode for <figure>. |
493
|
|
|
* |
494
|
|
|
* Usage example: [FIGURE src="img/home/me.jpg" caption="Me" alt="Bild på mig" nolink="nolink"] |
495
|
|
|
* |
496
|
|
|
* @param string $options for the shortcode. |
497
|
|
|
* |
498
|
|
|
* @return array with all the options. |
499
|
|
|
*/ |
500
|
|
|
public static function shortCodeFigure($options) |
501
|
|
|
{ |
502
|
|
|
// Merge incoming options with default and expose as variables |
503
|
|
|
$options= array_merge( |
504
|
|
|
[ |
505
|
|
|
'id' => null, |
506
|
|
|
'class' => null, |
507
|
|
|
'src' => null, |
508
|
|
|
'title' => null, |
509
|
|
|
'alt' => null, |
510
|
|
|
'caption' => null, |
511
|
|
|
'href' => null, |
512
|
|
|
'nolink' => false, |
513
|
|
|
], |
514
|
|
|
self::ShortCodeInit($options) |
515
|
|
|
); |
516
|
|
|
extract($options, EXTR_SKIP); |
517
|
|
|
|
518
|
|
|
$id = $id ? " id='$id'" : null; |
519
|
|
|
$class = $class ? " class='figure $class'" : " class='figure'"; |
520
|
|
|
$title = $title ? " title='$title'" : null; |
521
|
|
|
|
522
|
|
|
if (!$alt && $caption) { |
523
|
|
|
$alt = $caption; |
524
|
|
|
} |
525
|
|
|
|
526
|
|
|
if (!$href) { |
527
|
|
|
$pos = strpos($src, '?'); |
528
|
|
|
$href = $pos ? substr($src, 0, $pos) : $src; |
529
|
|
|
} |
530
|
|
|
|
531
|
|
|
$start = null; |
532
|
|
|
$end = null; |
533
|
|
|
if (!$nolink) { |
534
|
|
|
$start = "<a href='{$href}'>"; |
535
|
|
|
$end = "</a>"; |
536
|
|
|
} |
537
|
|
|
|
538
|
|
|
$html = <<<EOD |
539
|
|
|
<figure{$id}{$class}> |
540
|
|
|
{$start}<img src='{$src}' alt='{$alt}'{$title}/>{$end} |
541
|
|
|
<figcaption markdown=1>{$caption}</figcaption> |
542
|
|
|
</figure> |
543
|
|
|
EOD; |
544
|
|
|
|
545
|
|
|
return $html; |
546
|
|
|
} |
547
|
|
|
} |
548
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.