1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Limonte; |
4
|
|
|
|
5
|
|
|
class AdblockRule |
6
|
|
|
{ |
7
|
|
|
const FILTER_REGEXES = [ |
8
|
|
|
'/\$[script|image|stylesheet|object|object\-subrequest|subdocument|xmlhttprequest|websocket|webrtc|popup|generichide|genericblock|document|elemhide|third\-party|domain|rewrite]+.*domain=~?(.*)/i' => '$1', |
9
|
|
|
'/\$[script|image|stylesheet|object|object\-subrequest|subdocument|xmlhttprequest|websocket|webrtc|popup|generichide|genericblock|document|elemhide|third\-party|domain|rewrite]+.*$/i' => '', |
10
|
|
|
'/([\\\.\$\+\?\{\}\(\)\[\]\/])/' => '\\\\$1' |
11
|
|
|
]; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* @var string |
15
|
|
|
*/ |
16
|
17 |
|
private $rule; |
17
|
|
|
|
18
|
17 |
|
/** |
19
|
|
|
* @var string |
20
|
17 |
|
*/ |
21
|
4 |
|
private $regex; |
22
|
4 |
|
|
23
|
|
|
/** |
24
|
|
|
* @var bool |
25
|
|
|
*/ |
26
|
17 |
|
private $isComment = false; |
27
|
5 |
|
|
28
|
|
|
/** |
29
|
|
|
* @var bool |
30
|
17 |
|
*/ |
31
|
2 |
|
private $isHtml = false; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @var bool |
35
|
17 |
|
*/ |
36
|
|
|
private $isException = false; |
37
|
16 |
|
|
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* AdblockRule constructor. |
41
|
|
|
* @param string $rule |
42
|
|
|
* @throws InvalidRuleException |
43
|
|
|
*/ |
44
|
10 |
|
public function __construct(string $rule) |
45
|
|
|
{ |
46
|
10 |
|
$this->rule = $rule; |
47
|
10 |
|
|
48
|
|
|
if (Str::startsWith($this->rule, '@@')) { |
49
|
|
|
$this->isException = true; |
50
|
|
|
$this->rule = mb_substr($this->rule, 2); |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
// comment |
54
|
|
|
if (Str::startsWith($rule, '!') || Str::startsWith($rule, '[Adblock')) { |
55
|
15 |
|
$this->isComment = true; |
56
|
|
|
|
57
|
15 |
|
// HTML rule |
58
|
|
|
} elseif (Str::contains($rule, '##') || Str::contains($rule, '#@#')) { |
59
|
|
|
$this->isHtml = true; |
60
|
|
|
|
61
|
|
|
// URI rule |
62
|
|
|
} else { |
63
|
10 |
|
$this->makeRegex(); |
64
|
|
|
} |
65
|
10 |
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @param string $url |
69
|
|
|
* |
70
|
|
|
* @return boolean |
71
|
9 |
|
*/ |
72
|
|
|
public function matchUrl($url) |
73
|
9 |
|
{ |
74
|
|
|
try { |
75
|
|
|
return (boolean)preg_match('/' . $this->getRegex() . '/', $url); |
76
|
|
|
} catch (\Exception $e) { |
77
|
|
|
throw new \Exception($e); |
78
|
|
|
} |
79
|
9 |
|
} |
80
|
|
|
|
81
|
9 |
|
/** |
82
|
|
|
* @return string |
83
|
|
|
*/ |
84
|
17 |
|
public function getRegex() |
85
|
|
|
{ |
86
|
17 |
|
return $this->regex; |
87
|
3 |
|
} |
88
|
|
|
|
89
|
|
|
/** |
90
|
17 |
|
* @return string |
91
|
|
|
*/ |
92
|
|
|
public function getRule() |
93
|
17 |
|
{ |
94
|
2 |
|
return $this->rule; |
95
|
|
|
} |
96
|
2 |
|
|
97
|
1 |
|
/** |
98
|
|
|
* @return boolean |
99
|
|
|
*/ |
100
|
1 |
|
public function isComment() |
101
|
|
|
{ |
102
|
|
|
return $this->isComment; |
103
|
|
|
} |
104
|
15 |
|
|
105
|
|
|
/** |
106
|
|
|
* @return boolean |
107
|
|
|
*/ |
108
|
|
|
public function isHtml() |
109
|
15 |
|
{ |
110
|
|
|
return $this->isHtml; |
111
|
|
|
} |
112
|
15 |
|
|
113
|
|
|
/** |
114
|
|
|
* @return boolean |
115
|
15 |
|
*/ |
116
|
2 |
|
public function isException() |
117
|
|
|
{ |
118
|
|
|
return $this->isException; |
119
|
|
|
} |
120
|
15 |
|
|
121
|
5 |
|
/** |
122
|
|
|
* @throws InvalidRuleException |
123
|
5 |
|
*/ |
124
|
|
|
private function makeRegex() |
125
|
|
|
{ |
126
|
14 |
|
if (empty($this->rule)) { |
127
|
4 |
|
throw new InvalidRuleException("Empty rule"); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
$regex = $this->rule; |
131
|
15 |
|
|
132
|
|
|
foreach (self::FILTER_REGEXES as $rule => $replacement) { |
133
|
15 |
|
$regex = preg_replace($rule, $replacement, $regex); |
134
|
15 |
|
} |
135
|
|
|
|
136
|
|
|
// Separator character ^ matches anything but a letter, a digit, or |
137
|
|
|
// one of the following: _ - . %. The end of the address is also |
138
|
|
|
// accepted as separator. |
139
|
|
|
$regex = str_replace("^", "([^\w\d_\-\.%]|$)", $regex); |
140
|
|
|
|
141
|
|
|
// * symbol |
142
|
|
|
$regex = str_replace("*", ".*", $regex); |
143
|
|
|
|
144
|
|
|
// | in the end means the end of the address |
145
|
|
|
if (Str::endsWith($regex, '|')) { |
146
|
|
|
$regex = mb_substr($regex, 0, mb_strlen($regex) - 1) . '$'; |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
// || in the beginning means beginning of the domain name |
150
|
|
|
if (Str::startsWith($regex, '||')) { |
151
|
|
|
if (mb_strlen($regex) > 2) { |
152
|
|
|
// http://tools.ietf.org/html/rfc3986#appendix-B |
153
|
|
|
$regex = "^([^:\/?#]+:)?(\/\/([^\/?#]*\.)?)?" . mb_substr($regex, 2); |
154
|
|
|
} |
155
|
|
|
// | in the beginning means start of the address |
156
|
|
|
} elseif (Str::startsWith($regex, '|')) { |
157
|
|
|
$regex = '^' . mb_substr($regex, 1); |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
// other | symbols should be escaped |
161
|
|
|
$regex = preg_replace("/\|(?![\$])/", "\|$1", $regex); |
162
|
|
|
|
163
|
|
|
$this->regex = $regex; |
164
|
|
|
} |
165
|
|
|
} |
166
|
|
|
|