|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* @file |
|
4
|
|
|
* HTML entity utilities. |
|
5
|
|
|
*/ |
|
6
|
|
|
|
|
7
|
|
|
namespace QueryPath; |
|
8
|
|
|
|
|
9
|
|
|
/** |
|
10
|
|
|
* Perform various tasks on HTML/XML entities. |
|
11
|
|
|
* |
|
12
|
|
|
* @ingroup querypath_util |
|
13
|
|
|
*/ |
|
14
|
|
|
class Entities implements EntitiesContract |
|
15
|
|
|
{ |
|
16
|
|
|
|
|
17
|
|
|
/** |
|
18
|
|
|
* This is three regexes wrapped into 1. The | divides them. |
|
19
|
|
|
* 1: Match any char-based entity. This will go in $matches[1] |
|
20
|
|
|
* 2: Match any num-based entity. This will go in $matches[2] |
|
21
|
|
|
* 3: Match any hex-based entry. This will go in $matches[3] |
|
22
|
|
|
* 4: Match any ampersand that is not an entity. This goes in $matches[4] |
|
23
|
|
|
* This last rule will only match if one of the previous two has not already |
|
24
|
|
|
* matched. |
|
25
|
|
|
* XXX: Are octal encodings for entities acceptable? |
|
26
|
|
|
*/ |
|
27
|
|
|
//protected static $regex = '/&([\w]+);|&#([\d]+);|&([\w]*[\s$]+)/m'; |
|
28
|
|
|
protected static $regex = '/&([\w]+);|&#([\d]+);|&#(x[0-9a-fA-F]+);|(&)/m'; |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* Replace all entities. |
|
32
|
|
|
* This will scan a string and will attempt to replace all |
|
33
|
|
|
* entities with their numeric equivalent. This will not work |
|
34
|
|
|
* with specialized entities. |
|
35
|
|
|
* |
|
36
|
|
|
* @param string $string |
|
37
|
|
|
* The string to perform replacements on. |
|
38
|
|
|
* @return string |
|
39
|
|
|
* Returns a string that is similar to the original one, but with |
|
40
|
|
|
* all entity replacements made. |
|
41
|
|
|
*/ |
|
42
|
|
|
public static function replaceAllEntities(string $string): string |
|
43
|
|
|
{ |
|
44
|
|
|
return preg_replace_callback(self::$regex, '\QueryPath\Entities::doReplacement', $string); |
|
45
|
|
|
} |
|
46
|
|
|
|
|
47
|
|
|
/** |
|
48
|
|
|
* Callback for processing replacements. |
|
49
|
|
|
* |
|
50
|
|
|
* @param array $matches |
|
51
|
|
|
* The regular expression replacement array. |
|
52
|
|
|
* @return string |
|
53
|
|
|
*/ |
|
54
|
|
|
protected static function doReplacement($matches): string |
|
55
|
|
|
{ |
|
56
|
|
|
// See how the regex above works out. |
|
57
|
|
|
|
|
58
|
|
|
// From count, we can tell whether we got a |
|
59
|
|
|
// char, num, or bare ampersand. |
|
60
|
|
|
$count = count($matches); |
|
61
|
|
|
switch ($count) { |
|
62
|
|
|
case 2: |
|
63
|
|
|
// We have a character entity |
|
64
|
|
|
return '&#' . self::replaceEntity($matches[1]) . ';'; |
|
65
|
|
|
case 3: |
|
66
|
|
|
case 4: |
|
67
|
|
|
// we have a numeric entity |
|
68
|
|
|
return '&#' . $matches[$count - 1] . ';'; |
|
69
|
|
|
case 5: |
|
70
|
|
|
// We have an unescaped ampersand. |
|
71
|
|
|
return '&'; |
|
72
|
|
|
} |
|
73
|
|
|
|
|
74
|
|
|
return ''; |
|
75
|
|
|
} |
|
76
|
|
|
|
|
77
|
|
|
/** |
|
78
|
|
|
* Lookup an entity string's numeric equivalent. |
|
79
|
|
|
* |
|
80
|
|
|
* @param string $entity |
|
81
|
|
|
* The entity whose numeric value is needed. |
|
82
|
|
|
* @return int |
|
83
|
|
|
* The integer value corresponding to the entity. |
|
84
|
|
|
* @author Matt Butcher |
|
85
|
|
|
* @author Ryan Mahoney |
|
86
|
|
|
*/ |
|
87
|
|
|
public static function replaceEntity(string $entity): int |
|
88
|
|
|
{ |
|
89
|
|
|
return self::ENTITIES[$entity]; |
|
90
|
|
|
} |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
|