1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types = 1); |
4
|
|
|
|
5
|
|
|
namespace App\EventSubscriber; |
6
|
|
|
|
7
|
|
|
use App\Controller\XtoolsController; |
8
|
|
|
use App\Helper\I18nHelper; |
9
|
|
|
use DateInterval; |
10
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
11
|
|
|
use Psr\Log\LoggerInterface; |
12
|
|
|
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface; |
13
|
|
|
use Symfony\Component\EventDispatcher\EventSubscriberInterface; |
14
|
|
|
use Symfony\Component\HttpFoundation\Request; |
15
|
|
|
use Symfony\Component\HttpFoundation\RequestStack; |
16
|
|
|
use Symfony\Component\HttpFoundation\Session\SessionInterface; |
17
|
|
|
use Symfony\Component\HttpKernel\Event\ControllerEvent; |
18
|
|
|
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException; |
19
|
|
|
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException; |
20
|
|
|
use Symfony\Component\HttpKernel\KernelEvents; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* A RateLimitSubscriber checks to see if users are exceeding usage limitations. |
24
|
|
|
*/ |
25
|
|
|
class RateLimitSubscriber implements EventSubscriberInterface |
26
|
|
|
{ |
27
|
|
|
/** |
28
|
|
|
* Rate limiting will not apply to these actions. |
29
|
|
|
*/ |
30
|
|
|
public const ACTION_ALLOWLIST = [ |
31
|
|
|
'aboutAction', |
32
|
|
|
'indexAction', |
33
|
|
|
'loginAction', |
34
|
|
|
'oauthCallbackAction', |
35
|
|
|
'recordUsageAction', |
36
|
|
|
'showAction', |
37
|
|
|
]; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Maximum number of requests to the same URI with different interface languages. |
41
|
|
|
*/ |
42
|
|
|
public const MAX_CRAWLER_COUNT = 3; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* Duration in which the max number of requests to the same URI with different interface languages is allowed. |
46
|
|
|
*/ |
47
|
|
|
public const MAX_CRAWLER_TIME = 'PT1M'; |
48
|
|
|
|
49
|
|
|
protected CacheItemPoolInterface $cache; |
50
|
|
|
protected I18nHelper $i18n; |
51
|
|
|
protected LoggerInterface $crawlerLogger; |
52
|
|
|
protected LoggerInterface $denylistLogger; |
53
|
|
|
protected LoggerInterface $rateLimitLogger; |
54
|
|
|
protected ParameterBagInterface $parameterBag; |
55
|
|
|
protected Request $request; |
56
|
|
|
protected SessionInterface $session; |
57
|
|
|
|
58
|
|
|
/** @var int Number of requests allowed in time period */ |
59
|
|
|
protected int $rateLimit; |
60
|
|
|
|
61
|
|
|
/** @var int Number of minutes during which $rateLimit requests are permitted. */ |
62
|
|
|
protected int $rateDuration; |
63
|
|
|
|
64
|
|
|
/** @var string User agent string. */ |
65
|
|
|
protected string $userAgent; |
66
|
|
|
|
67
|
|
|
/** @var string The referer string. */ |
68
|
|
|
protected string $referer; |
69
|
|
|
|
70
|
|
|
/** @var string The URI. */ |
71
|
|
|
protected string $uri; |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* @param I18nHelper $i18n |
75
|
|
|
* @param CacheItemPoolInterface $cache |
76
|
|
|
* @param ParameterBagInterface $parameterBag |
77
|
|
|
* @param RequestStack $requestStack |
78
|
|
|
* @param LoggerInterface $crawlerLogger |
79
|
|
|
* @param LoggerInterface $denylistLogger |
80
|
|
|
* @param LoggerInterface $rateLimitLogger |
81
|
|
|
* @param int $rateLimit |
82
|
|
|
* @param int $rateDuration |
83
|
|
|
*/ |
84
|
|
|
public function __construct( |
85
|
|
|
I18nHelper $i18n, |
86
|
|
|
CacheItemPoolInterface $cache, |
87
|
|
|
ParameterBagInterface $parameterBag, |
88
|
|
|
RequestStack $requestStack, |
89
|
|
|
LoggerInterface $crawlerLogger, |
90
|
|
|
LoggerInterface $denylistLogger, |
91
|
|
|
LoggerInterface $rateLimitLogger, |
92
|
|
|
int $rateLimit, |
93
|
|
|
int $rateDuration |
94
|
|
|
) { |
95
|
|
|
$this->i18n = $i18n; |
96
|
|
|
$this->cache = $cache; |
97
|
|
|
$this->parameterBag = $parameterBag; |
98
|
|
|
$this->session = $requestStack->getSession(); |
99
|
|
|
$this->crawlerLogger = $crawlerLogger; |
100
|
|
|
$this->denylistLogger = $denylistLogger; |
101
|
|
|
$this->rateLimitLogger = $rateLimitLogger; |
102
|
|
|
$this->rateLimit = $rateLimit; |
103
|
|
|
$this->rateDuration = $rateDuration; |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* Register our interest in the kernel.controller event. |
108
|
|
|
* @return string[] |
109
|
|
|
*/ |
110
|
|
|
public static function getSubscribedEvents(): array |
111
|
|
|
{ |
112
|
|
|
return [ |
113
|
|
|
KernelEvents::CONTROLLER => 'onKernelController', |
114
|
|
|
]; |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
/** |
118
|
|
|
* Check if the current user has exceeded the configured usage limitations. |
119
|
|
|
* @param ControllerEvent $event The event. |
120
|
|
|
*/ |
121
|
|
|
public function onKernelController(ControllerEvent $event): void |
122
|
|
|
{ |
123
|
|
|
$controller = $event->getController(); |
124
|
|
|
$action = null; |
125
|
|
|
|
126
|
|
|
// when a controller class defines multiple action methods, the controller |
127
|
|
|
// is returned as [$controllerInstance, 'methodName'] |
128
|
|
|
if (is_array($controller)) { |
129
|
|
|
[$controller, $action] = $controller; |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
if (!$controller instanceof XtoolsController) { |
133
|
|
|
return; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
$this->request = $event->getRequest(); |
137
|
|
|
$this->userAgent = (string)$this->request->headers->get('User-Agent'); |
138
|
|
|
$this->referer = (string)$this->request->headers->get('referer'); |
139
|
|
|
$this->uri = $this->request->getRequestUri(); |
140
|
|
|
|
141
|
|
|
$this->checkDenylist(); |
142
|
|
|
|
143
|
|
|
// Zero values indicate the rate limiting feature should be disabled. |
144
|
|
|
if (0 === $this->rateLimit || 0 === $this->rateDuration) { |
145
|
|
|
return; |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
$loggedIn = (bool)$this->session->get('logged_in_user'); |
149
|
|
|
$isApi = 'ApiAction' === substr($action, -9); |
150
|
|
|
|
151
|
|
|
// No rate limits on lightweight pages, logged in users, subrequests or API requests. |
152
|
|
|
if (in_array($action, self::ACTION_ALLOWLIST) || $loggedIn || false === $event->isMainRequest() || $isApi) { |
153
|
|
|
return; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
$this->logCrawlers(); |
157
|
|
|
$this->xffRateLimit(); |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* Don't let individual users hog up all the resources. |
162
|
|
|
*/ |
163
|
|
|
private function xffRateLimit(): void |
164
|
|
|
{ |
165
|
|
|
$xff = $this->request->headers->get('x-forwarded-for', ''); |
166
|
|
|
|
167
|
|
|
if ('' === $xff) { |
168
|
|
|
// Happens in local environments, or outside of Cloud Services. |
169
|
|
|
return; |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
$cacheKey = "ratelimit.session.".sha1($xff); |
173
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
174
|
|
|
|
175
|
|
|
// If increment value already in cache, or start with 1. |
176
|
|
|
$count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1; |
177
|
|
|
|
178
|
|
|
// Check if limit has been exceeded, and if so, throw an error. |
179
|
|
|
if ($count > $this->rateLimit) { |
180
|
|
|
$this->denyAccess('Exceeded rate limitation'); |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
// Reset the clock on every request. |
184
|
|
|
$cacheItem->set($count) |
185
|
|
|
->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M')); |
186
|
|
|
$this->cache->save($cacheItem); |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log. |
191
|
|
|
* Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI |
192
|
|
|
* but with a different interface language, as happens when it is crawling the language dropdown in the UI. |
193
|
|
|
*/ |
194
|
|
|
private function logCrawlers(): void |
195
|
|
|
{ |
196
|
|
|
$useLangMatches = []; |
197
|
|
|
$hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches); |
198
|
|
|
|
199
|
|
|
if (1 !== $hasMatch) { |
200
|
|
|
return; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
$useLang = $useLangMatches[1]; |
204
|
|
|
|
205
|
|
|
// Requesting a language that's different than that of the target project. |
206
|
|
|
if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) { |
207
|
|
|
return; |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
// We're trying to check if everything BUT the uselang has remained unchanged. |
211
|
|
|
$cacheUri = str_replace('uselang='.$useLang, '', $this->uri); |
212
|
|
|
$cacheKey = 'ratelimit.crawler.'.sha1($this->userAgent.$cacheUri); |
213
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
214
|
|
|
|
215
|
|
|
// If increment value already in cache, or start with 1. |
216
|
|
|
$count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1; |
217
|
|
|
|
218
|
|
|
// Check if limit has been exceeded, and if so, add a log entry. |
219
|
|
|
if ($count > 3) { |
220
|
|
|
$this->crawlerLogger->info('Possible crawler detected'); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
// Reset the clock on every request. |
224
|
|
|
$cacheItem->set($count) |
225
|
|
|
->expiresAfter(new DateInterval(self::MAX_CRAWLER_TIME)); |
226
|
|
|
$this->cache->save($cacheItem); |
227
|
|
|
|
228
|
|
|
// If we've got a lot of hits, let's go ahead and assume it's a crawler and give a 429. |
229
|
|
|
if ($count > self::MAX_CRAWLER_COUNT) { |
230
|
|
|
$this->denyAccess('Web crawler detected'); |
231
|
|
|
} |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
/** |
235
|
|
|
* Check the request against denylisted URIs and user agents |
236
|
|
|
*/ |
237
|
|
|
private function checkDenylist(): void |
238
|
|
|
{ |
239
|
|
|
// First check user agent and URI denylists. |
240
|
|
|
if (!$this->parameterBag->has('request_denylist')) { |
241
|
|
|
return; |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
$denylist = (array)$this->parameterBag->get('request_denylist'); |
245
|
|
|
|
246
|
|
|
foreach ($denylist as $name => $item) { |
247
|
|
|
$matches = []; |
248
|
|
|
|
249
|
|
|
if (isset($item['user_agent'])) { |
250
|
|
|
$matches[] = $item['user_agent'] === $this->userAgent; |
251
|
|
|
} |
252
|
|
|
if (isset($item['user_agent_pattern'])) { |
253
|
|
|
$matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent); |
254
|
|
|
} |
255
|
|
|
if (isset($item['referer'])) { |
256
|
|
|
$matches[] = $item['referer'] === $this->referer; |
257
|
|
|
} |
258
|
|
|
if (isset($item['referer_pattern'])) { |
259
|
|
|
$matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer); |
260
|
|
|
} |
261
|
|
|
if (isset($item['uri'])) { |
262
|
|
|
$matches[] = $item['uri'] === $this->uri; |
263
|
|
|
} |
264
|
|
|
if (isset($item['uri_pattern'])) { |
265
|
|
|
$matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri); |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
if (count($matches) > 0 && count($matches) === count(array_filter($matches))) { |
269
|
|
|
$this->denyAccess("Matched denylist entry `$name`", true); |
270
|
|
|
} |
271
|
|
|
} |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
/** |
275
|
|
|
* Throw exception for denied access due to spider crawl or hitting usage limits. |
276
|
|
|
* @param string $logComment Comment to include with the log entry. |
277
|
|
|
* @param bool $denylist Changes the messaging to say access was denied due to abuse, rather than rate limiting. |
278
|
|
|
* @throws TooManyRequestsHttpException |
279
|
|
|
* @throws AccessDeniedHttpException |
280
|
|
|
*/ |
281
|
|
|
private function denyAccess(string $logComment, bool $denylist = false): void |
282
|
|
|
{ |
283
|
|
|
// Log the denied request |
284
|
|
|
$logger = $denylist ? $this->denylistLogger : $this->rateLimitLogger; |
285
|
|
|
$logger->info($logComment); |
286
|
|
|
|
287
|
|
|
if ($denylist) { |
288
|
|
|
$message = $this->i18n->msg('error-denied', ['[email protected]']); |
289
|
|
|
throw new AccessDeniedHttpException($message, null, 999); |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
$message = $this->i18n->msg('error-rate-limit', [ |
293
|
|
|
$this->rateDuration, |
294
|
|
|
"<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>", |
295
|
|
|
"<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" . |
296
|
|
|
$this->i18n->msg('api') . |
297
|
|
|
"</a>", |
298
|
|
|
]); |
299
|
|
|
|
300
|
|
|
/** |
301
|
|
|
* TODO: Find a better way to do this. |
302
|
|
|
* 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having |
303
|
|
|
* fully safe messages that can be display with |raw. (In this case we authored the message). |
304
|
|
|
*/ |
305
|
|
|
throw new TooManyRequestsHttpException(600, $message, null, 999); |
306
|
|
|
} |
307
|
|
|
} |
308
|
|
|
|