|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types = 1); |
|
4
|
|
|
|
|
5
|
|
|
namespace App\EventSubscriber; |
|
6
|
|
|
|
|
7
|
|
|
use App\Controller\XtoolsController; |
|
8
|
|
|
use App\Helper\I18nHelper; |
|
9
|
|
|
use DateInterval; |
|
10
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
|
11
|
|
|
use Psr\Log\LoggerInterface; |
|
12
|
|
|
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface; |
|
13
|
|
|
use Symfony\Component\EventDispatcher\EventSubscriberInterface; |
|
14
|
|
|
use Symfony\Component\HttpFoundation\Request; |
|
15
|
|
|
use Symfony\Component\HttpFoundation\RequestStack; |
|
16
|
|
|
use Symfony\Component\HttpFoundation\Session\SessionInterface; |
|
17
|
|
|
use Symfony\Component\HttpKernel\Event\ControllerEvent; |
|
18
|
|
|
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException; |
|
19
|
|
|
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException; |
|
20
|
|
|
use Symfony\Component\HttpKernel\KernelEvents; |
|
21
|
|
|
|
|
22
|
|
|
/** |
|
23
|
|
|
* A RateLimitSubscriber checks to see if users are exceeding usage limitations. |
|
24
|
|
|
*/ |
|
25
|
|
|
class RateLimitSubscriber implements EventSubscriberInterface |
|
26
|
|
|
{ |
|
27
|
|
|
/** |
|
28
|
|
|
* Rate limiting will not apply to these actions. |
|
29
|
|
|
*/ |
|
30
|
|
|
public const ACTION_ALLOWLIST = [ |
|
31
|
|
|
'aboutAction', |
|
32
|
|
|
'indexAction', |
|
33
|
|
|
'loginAction', |
|
34
|
|
|
'oauthCallbackAction', |
|
35
|
|
|
'recordUsageAction', |
|
36
|
|
|
'showAction', |
|
37
|
|
|
]; |
|
38
|
|
|
|
|
39
|
|
|
/** |
|
40
|
|
|
* Maximum number of requests to the same URI with different interface languages. |
|
41
|
|
|
*/ |
|
42
|
|
|
public const MAX_CRAWLER_COUNT = 3; |
|
43
|
|
|
|
|
44
|
|
|
/** |
|
45
|
|
|
* Duration in which the max number of requests to the same URI with different interface languages is allowed. |
|
46
|
|
|
*/ |
|
47
|
|
|
public const MAX_CRAWLER_TIME = 'PT1M'; |
|
48
|
|
|
|
|
49
|
|
|
protected CacheItemPoolInterface $cache; |
|
50
|
|
|
protected I18nHelper $i18n; |
|
51
|
|
|
protected LoggerInterface $crawlerLogger; |
|
52
|
|
|
protected LoggerInterface $denylistLogger; |
|
53
|
|
|
protected LoggerInterface $rateLimitLogger; |
|
54
|
|
|
protected ParameterBagInterface $parameterBag; |
|
55
|
|
|
protected Request $request; |
|
56
|
|
|
protected SessionInterface $session; |
|
57
|
|
|
|
|
58
|
|
|
/** @var int Number of requests allowed in time period */ |
|
59
|
|
|
protected int $rateLimit; |
|
60
|
|
|
|
|
61
|
|
|
/** @var int Number of minutes during which $rateLimit requests are permitted. */ |
|
62
|
|
|
protected int $rateDuration; |
|
63
|
|
|
|
|
64
|
|
|
/** @var string User agent string. */ |
|
65
|
|
|
protected string $userAgent; |
|
66
|
|
|
|
|
67
|
|
|
/** @var string The referer string. */ |
|
68
|
|
|
protected string $referer; |
|
69
|
|
|
|
|
70
|
|
|
/** @var string The URI. */ |
|
71
|
|
|
protected string $uri; |
|
72
|
|
|
|
|
73
|
|
|
/** |
|
74
|
|
|
* @param I18nHelper $i18n |
|
75
|
|
|
* @param CacheItemPoolInterface $cache |
|
76
|
|
|
* @param ParameterBagInterface $parameterBag |
|
77
|
|
|
* @param RequestStack $requestStack |
|
78
|
|
|
* @param LoggerInterface $crawlerLogger |
|
79
|
|
|
* @param LoggerInterface $denylistLogger |
|
80
|
|
|
* @param LoggerInterface $rateLimitLogger |
|
81
|
|
|
* @param int $rateLimit |
|
82
|
|
|
* @param int $rateDuration |
|
83
|
|
|
*/ |
|
84
|
|
|
public function __construct( |
|
85
|
|
|
I18nHelper $i18n, |
|
86
|
|
|
CacheItemPoolInterface $cache, |
|
87
|
|
|
ParameterBagInterface $parameterBag, |
|
88
|
|
|
RequestStack $requestStack, |
|
89
|
|
|
LoggerInterface $crawlerLogger, |
|
90
|
|
|
LoggerInterface $denylistLogger, |
|
91
|
|
|
LoggerInterface $rateLimitLogger, |
|
92
|
|
|
int $rateLimit, |
|
93
|
|
|
int $rateDuration |
|
94
|
|
|
) { |
|
95
|
|
|
$this->i18n = $i18n; |
|
96
|
|
|
$this->cache = $cache; |
|
97
|
|
|
$this->parameterBag = $parameterBag; |
|
98
|
|
|
$this->session = $requestStack->getSession(); |
|
99
|
|
|
$this->crawlerLogger = $crawlerLogger; |
|
100
|
|
|
$this->denylistLogger = $denylistLogger; |
|
101
|
|
|
$this->rateLimitLogger = $rateLimitLogger; |
|
102
|
|
|
$this->rateLimit = $rateLimit; |
|
103
|
|
|
$this->rateDuration = $rateDuration; |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
/** |
|
107
|
|
|
* Register our interest in the kernel.controller event. |
|
108
|
|
|
* @return string[] |
|
109
|
|
|
*/ |
|
110
|
|
|
public static function getSubscribedEvents(): array |
|
111
|
|
|
{ |
|
112
|
|
|
return [ |
|
113
|
|
|
KernelEvents::CONTROLLER => 'onKernelController', |
|
114
|
|
|
]; |
|
115
|
|
|
} |
|
116
|
|
|
|
|
117
|
|
|
/** |
|
118
|
|
|
* Check if the current user has exceeded the configured usage limitations. |
|
119
|
|
|
* @param ControllerEvent $event The event. |
|
120
|
|
|
*/ |
|
121
|
|
|
public function onKernelController(ControllerEvent $event): void |
|
122
|
|
|
{ |
|
123
|
|
|
$controller = $event->getController(); |
|
124
|
|
|
$action = null; |
|
125
|
|
|
|
|
126
|
|
|
// when a controller class defines multiple action methods, the controller |
|
127
|
|
|
// is returned as [$controllerInstance, 'methodName'] |
|
128
|
|
|
if (is_array($controller)) { |
|
129
|
|
|
[$controller, $action] = $controller; |
|
130
|
|
|
} |
|
131
|
|
|
|
|
132
|
|
|
if (!$controller instanceof XtoolsController) { |
|
133
|
|
|
return; |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
$this->request = $event->getRequest(); |
|
137
|
|
|
$this->userAgent = (string)$this->request->headers->get('User-Agent'); |
|
138
|
|
|
$this->referer = (string)$this->request->headers->get('referer'); |
|
139
|
|
|
$this->uri = $this->request->getRequestUri(); |
|
140
|
|
|
|
|
141
|
|
|
$this->checkDenylist(); |
|
142
|
|
|
|
|
143
|
|
|
// Zero values indicate the rate limiting feature should be disabled. |
|
144
|
|
|
if (0 === $this->rateLimit || 0 === $this->rateDuration) { |
|
145
|
|
|
return; |
|
146
|
|
|
} |
|
147
|
|
|
|
|
148
|
|
|
$loggedIn = (bool)$this->session->get('logged_in_user'); |
|
149
|
|
|
$isApi = 'ApiAction' === substr($action, -9); |
|
150
|
|
|
|
|
151
|
|
|
// No rate limits on lightweight pages, logged in users, subrequests or API requests. |
|
152
|
|
|
if (in_array($action, self::ACTION_ALLOWLIST) || $loggedIn || false === $event->isMainRequest() || $isApi) { |
|
153
|
|
|
return; |
|
154
|
|
|
} |
|
155
|
|
|
|
|
156
|
|
|
$this->logCrawlers(); |
|
157
|
|
|
$this->xffRateLimit(); |
|
158
|
|
|
} |
|
159
|
|
|
|
|
160
|
|
|
/** |
|
161
|
|
|
* Don't let individual users hog up all the resources. |
|
162
|
|
|
*/ |
|
163
|
|
|
private function xffRateLimit(): void |
|
164
|
|
|
{ |
|
165
|
|
|
$xff = $this->request->headers->get('x-forwarded-for', ''); |
|
166
|
|
|
|
|
167
|
|
|
if ('' === $xff) { |
|
168
|
|
|
// Happens in local environments, or outside of Cloud Services. |
|
169
|
|
|
return; |
|
170
|
|
|
} |
|
171
|
|
|
|
|
172
|
|
|
$cacheKey = "ratelimit.session.".sha1($xff); |
|
173
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
|
174
|
|
|
|
|
175
|
|
|
// If increment value already in cache, or start with 1. |
|
176
|
|
|
$count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1; |
|
177
|
|
|
|
|
178
|
|
|
// Check if limit has been exceeded, and if so, throw an error. |
|
179
|
|
|
if ($count > $this->rateLimit) { |
|
180
|
|
|
$this->denyAccess('Exceeded rate limitation'); |
|
181
|
|
|
} |
|
182
|
|
|
|
|
183
|
|
|
// Reset the clock on every request. |
|
184
|
|
|
$cacheItem->set($count) |
|
185
|
|
|
->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M')); |
|
186
|
|
|
$this->cache->save($cacheItem); |
|
187
|
|
|
} |
|
188
|
|
|
|
|
189
|
|
|
/** |
|
190
|
|
|
* Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log. |
|
191
|
|
|
* Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI |
|
192
|
|
|
* but with a different interface language, as happens when it is crawling the language dropdown in the UI. |
|
193
|
|
|
*/ |
|
194
|
|
|
private function logCrawlers(): void |
|
195
|
|
|
{ |
|
196
|
|
|
$useLangMatches = []; |
|
197
|
|
|
$hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches); |
|
198
|
|
|
|
|
199
|
|
|
if (1 !== $hasMatch) { |
|
200
|
|
|
return; |
|
201
|
|
|
} |
|
202
|
|
|
|
|
203
|
|
|
$useLang = $useLangMatches[1]; |
|
204
|
|
|
|
|
205
|
|
|
// Requesting a language that's different than that of the target project. |
|
206
|
|
|
if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) { |
|
207
|
|
|
return; |
|
208
|
|
|
} |
|
209
|
|
|
|
|
210
|
|
|
// We're trying to check if everything BUT the uselang has remained unchanged. |
|
211
|
|
|
$cacheUri = str_replace('uselang='.$useLang, '', $this->uri); |
|
212
|
|
|
$cacheKey = 'ratelimit.crawler.'.sha1($this->userAgent.$cacheUri); |
|
213
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
|
214
|
|
|
|
|
215
|
|
|
// If increment value already in cache, or start with 1. |
|
216
|
|
|
$count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1; |
|
217
|
|
|
|
|
218
|
|
|
// Check if limit has been exceeded, and if so, add a log entry. |
|
219
|
|
|
if ($count > 3) { |
|
220
|
|
|
$this->crawlerLogger->info('Possible crawler detected'); |
|
221
|
|
|
} |
|
222
|
|
|
|
|
223
|
|
|
// Reset the clock on every request. |
|
224
|
|
|
$cacheItem->set($count) |
|
225
|
|
|
->expiresAfter(new DateInterval(self::MAX_CRAWLER_TIME)); |
|
226
|
|
|
$this->cache->save($cacheItem); |
|
227
|
|
|
|
|
228
|
|
|
// If we've got a lot of hits, let's go ahead and assume it's a crawler and give a 429. |
|
229
|
|
|
if ($count > self::MAX_CRAWLER_COUNT) { |
|
230
|
|
|
$this->denyAccess('Web crawler detected'); |
|
231
|
|
|
} |
|
232
|
|
|
} |
|
233
|
|
|
|
|
234
|
|
|
/** |
|
235
|
|
|
* Check the request against denylisted URIs and user agents |
|
236
|
|
|
*/ |
|
237
|
|
|
private function checkDenylist(): void |
|
238
|
|
|
{ |
|
239
|
|
|
// First check user agent and URI denylists. |
|
240
|
|
|
if (!$this->parameterBag->has('request_denylist')) { |
|
241
|
|
|
return; |
|
242
|
|
|
} |
|
243
|
|
|
|
|
244
|
|
|
$denylist = (array)$this->parameterBag->get('request_denylist'); |
|
245
|
|
|
|
|
246
|
|
|
foreach ($denylist as $name => $item) { |
|
247
|
|
|
$matches = []; |
|
248
|
|
|
|
|
249
|
|
|
if (isset($item['user_agent'])) { |
|
250
|
|
|
$matches[] = $item['user_agent'] === $this->userAgent; |
|
251
|
|
|
} |
|
252
|
|
|
if (isset($item['user_agent_pattern'])) { |
|
253
|
|
|
$matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent); |
|
254
|
|
|
} |
|
255
|
|
|
if (isset($item['referer'])) { |
|
256
|
|
|
$matches[] = $item['referer'] === $this->referer; |
|
257
|
|
|
} |
|
258
|
|
|
if (isset($item['referer_pattern'])) { |
|
259
|
|
|
$matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer); |
|
260
|
|
|
} |
|
261
|
|
|
if (isset($item['uri'])) { |
|
262
|
|
|
$matches[] = $item['uri'] === $this->uri; |
|
263
|
|
|
} |
|
264
|
|
|
if (isset($item['uri_pattern'])) { |
|
265
|
|
|
$matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri); |
|
266
|
|
|
} |
|
267
|
|
|
|
|
268
|
|
|
if (count($matches) > 0 && count($matches) === count(array_filter($matches))) { |
|
269
|
|
|
$this->denyAccess("Matched denylist entry `$name`", true); |
|
270
|
|
|
} |
|
271
|
|
|
} |
|
272
|
|
|
} |
|
273
|
|
|
|
|
274
|
|
|
/** |
|
275
|
|
|
* Throw exception for denied access due to spider crawl or hitting usage limits. |
|
276
|
|
|
* @param string $logComment Comment to include with the log entry. |
|
277
|
|
|
* @param bool $denylist Changes the messaging to say access was denied due to abuse, rather than rate limiting. |
|
278
|
|
|
* @throws TooManyRequestsHttpException |
|
279
|
|
|
* @throws AccessDeniedHttpException |
|
280
|
|
|
*/ |
|
281
|
|
|
private function denyAccess(string $logComment, bool $denylist = false): void |
|
282
|
|
|
{ |
|
283
|
|
|
// Log the denied request |
|
284
|
|
|
$logger = $denylist ? $this->denylistLogger : $this->rateLimitLogger; |
|
285
|
|
|
$logger->info($logComment); |
|
286
|
|
|
|
|
287
|
|
|
if ($denylist) { |
|
288
|
|
|
$message = $this->i18n->msg('error-denied', ['[email protected]']); |
|
289
|
|
|
throw new AccessDeniedHttpException($message, null, 999); |
|
290
|
|
|
} |
|
291
|
|
|
|
|
292
|
|
|
$message = $this->i18n->msg('error-rate-limit', [ |
|
293
|
|
|
$this->rateDuration, |
|
294
|
|
|
"<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>", |
|
295
|
|
|
"<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" . |
|
296
|
|
|
$this->i18n->msg('api') . |
|
297
|
|
|
"</a>", |
|
298
|
|
|
]); |
|
299
|
|
|
|
|
300
|
|
|
/** |
|
301
|
|
|
* TODO: Find a better way to do this. |
|
302
|
|
|
* 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having |
|
303
|
|
|
* fully safe messages that can be display with |raw. (In this case we authored the message). |
|
304
|
|
|
*/ |
|
305
|
|
|
throw new TooManyRequestsHttpException(600, $message, null, 999); |
|
306
|
|
|
} |
|
307
|
|
|
} |
|
308
|
|
|
|