|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* Injector that converts http, https and ftp text URLs to actual links. |
|
5
|
|
|
*/ |
|
6
|
|
|
class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector |
|
7
|
|
|
{ |
|
8
|
|
|
/** |
|
9
|
|
|
* @type string |
|
10
|
|
|
*/ |
|
11
|
|
|
public $name = 'Linkify'; |
|
12
|
|
|
|
|
13
|
|
|
/** |
|
14
|
|
|
* @type array |
|
15
|
|
|
*/ |
|
16
|
|
|
public $needed = array('a' => array('href')); |
|
17
|
|
|
|
|
18
|
|
|
/** |
|
19
|
|
|
* @param HTMLPurifier_Token $token |
|
20
|
|
|
*/ |
|
21
|
|
|
public function handleText(&$token) |
|
22
|
|
|
{ |
|
23
|
|
|
if (!$this->allowsElement('a')) { |
|
24
|
|
|
return; |
|
25
|
|
|
} |
|
26
|
|
|
|
|
27
|
|
|
if (strpos($token->data, '://') === false) { |
|
|
|
|
|
|
28
|
|
|
// our really quick heuristic failed, abort |
|
29
|
|
|
// this may not work so well if we want to match things like |
|
30
|
|
|
// "google.com", but then again, most people don't |
|
31
|
|
|
return; |
|
32
|
|
|
} |
|
33
|
|
|
|
|
34
|
|
|
// there is/are URL(s). Let's split the string. |
|
35
|
|
|
// We use this regex: |
|
36
|
|
|
// https://gist.github.com/gruber/249502 |
|
37
|
|
|
// but with @cscott's backtracking fix and also |
|
38
|
|
|
// the Unicode characters un-Unicodified. |
|
39
|
|
|
$bits = preg_split( |
|
40
|
|
|
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu', |
|
41
|
|
|
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
42
|
|
|
|
|
43
|
|
|
if ($bits === false) { |
|
44
|
|
|
return; |
|
45
|
|
|
} |
|
46
|
|
|
|
|
47
|
|
|
$token = array(); |
|
48
|
|
|
|
|
49
|
|
|
// $i = index |
|
50
|
|
|
// $c = count |
|
51
|
|
|
// $l = is link |
|
52
|
|
|
for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
|
|
|
|
|
|
53
|
|
|
if (!$l) { |
|
54
|
|
|
if ($bits[$i] === '') { |
|
55
|
|
|
continue; |
|
56
|
|
|
} |
|
57
|
|
|
$token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
58
|
|
|
} else { |
|
59
|
|
|
$token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); |
|
60
|
|
|
$token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
61
|
|
|
$token[] = new HTMLPurifier_Token_End('a'); |
|
62
|
|
|
} |
|
63
|
|
|
} |
|
64
|
|
|
} |
|
65
|
|
|
} |
|
66
|
|
|
|
|
67
|
|
|
// vim: et sw=4 sts=4 |
|
68
|
|
|
|