1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Injector that converts http, https and ftp text URLs to actual links. |
5
|
|
|
*/ |
6
|
|
|
class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector |
7
|
|
|
{ |
8
|
|
|
/** |
9
|
|
|
* @type string |
10
|
|
|
*/ |
11
|
|
|
public $name = 'Linkify'; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* @type array |
15
|
|
|
*/ |
16
|
|
|
public $needed = array('a' => array('href')); |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* @param HTMLPurifier_Token $token |
20
|
|
|
*/ |
21
|
|
|
public function handleText(&$token) |
22
|
|
|
{ |
23
|
|
|
if (!$this->allowsElement('a')) { |
24
|
|
|
return; |
25
|
|
|
} |
26
|
|
|
|
27
|
|
|
if (strpos($token->data, '://') === false) { |
|
|
|
|
28
|
|
|
// our really quick heuristic failed, abort |
29
|
|
|
// this may not work so well if we want to match things like |
30
|
|
|
// "google.com", but then again, most people don't |
31
|
|
|
return; |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
// there is/are URL(s). Let's split the string. |
35
|
|
|
// We use this regex: |
36
|
|
|
// https://gist.github.com/gruber/249502 |
37
|
|
|
// but with @cscott's backtracking fix and also |
38
|
|
|
// the Unicode characters un-Unicodified. |
39
|
|
|
$bits = preg_split( |
40
|
|
|
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu', |
41
|
|
|
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE); |
42
|
|
|
|
43
|
|
|
if ($bits === false) { |
44
|
|
|
return; |
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
$token = array(); |
48
|
|
|
|
49
|
|
|
// $i = index |
50
|
|
|
// $c = count |
51
|
|
|
// $l = is link |
52
|
|
|
for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
|
|
|
|
53
|
|
|
if (!$l) { |
54
|
|
|
if ($bits[$i] === '') { |
55
|
|
|
continue; |
56
|
|
|
} |
57
|
|
|
$token[] = new HTMLPurifier_Token_Text($bits[$i]); |
58
|
|
|
} else { |
59
|
|
|
$token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); |
60
|
|
|
$token[] = new HTMLPurifier_Token_Text($bits[$i]); |
61
|
|
|
$token[] = new HTMLPurifier_Token_End('a'); |
62
|
|
|
} |
63
|
|
|
} |
64
|
|
|
} |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
// vim: et sw=4 sts=4 |
68
|
|
|
|