|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types = 1); |
|
4
|
|
|
|
|
5
|
|
|
namespace App\EventSubscriber; |
|
6
|
|
|
|
|
7
|
|
|
use App\Controller\XtoolsController; |
|
8
|
|
|
use App\Helper\I18nHelper; |
|
9
|
|
|
use DateInterval; |
|
10
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
|
11
|
|
|
use Psr\Container\ContainerInterface; |
|
12
|
|
|
use Symfony\Component\EventDispatcher\EventSubscriberInterface; |
|
13
|
|
|
use Symfony\Component\HttpFoundation\Request; |
|
14
|
|
|
use Symfony\Component\HttpKernel\Event\ControllerEvent; |
|
15
|
|
|
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException; |
|
16
|
|
|
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException; |
|
17
|
|
|
use Symfony\Component\HttpKernel\KernelEvents; |
|
18
|
|
|
|
|
19
|
|
|
/** |
|
20
|
|
|
* A RateLimitSubscriber checks to see if users are exceeding usage limitations. |
|
21
|
|
|
*/ |
|
22
|
|
|
class RateLimitSubscriber implements EventSubscriberInterface |
|
23
|
|
|
{ |
|
24
|
|
|
/** |
|
25
|
|
|
* Rate limiting will not apply to these actions. |
|
26
|
|
|
*/ |
|
27
|
|
|
public const ACTION_ALLOWLIST = [ |
|
28
|
|
|
'aboutAction', |
|
29
|
|
|
'indexAction', |
|
30
|
|
|
'loginAction', |
|
31
|
|
|
'oauthCallbackAction', |
|
32
|
|
|
'recordUsageAction', |
|
33
|
|
|
'showAction', |
|
34
|
|
|
]; |
|
35
|
|
|
|
|
36
|
|
|
protected CacheItemPoolInterface $cache; |
|
37
|
|
|
protected ContainerInterface $container; |
|
38
|
|
|
protected I18nHelper $i18n; |
|
39
|
|
|
protected Request $request; |
|
40
|
|
|
|
|
41
|
|
|
/** @var int Number of requests allowed in time period */ |
|
42
|
|
|
protected int $rateLimit; |
|
43
|
|
|
|
|
44
|
|
|
/** @var int Number of minutes during which $rateLimit requests are permitted. */ |
|
45
|
|
|
protected int $rateDuration; |
|
46
|
|
|
|
|
47
|
|
|
/** @var string User agent string. */ |
|
48
|
|
|
protected string $userAgent; |
|
49
|
|
|
|
|
50
|
|
|
/** @var string The referer string. */ |
|
51
|
|
|
protected string $referer; |
|
52
|
|
|
|
|
53
|
|
|
/** @var string The URI. */ |
|
54
|
|
|
protected string $uri; |
|
55
|
|
|
|
|
56
|
|
|
/** |
|
57
|
|
|
* Save the container for later use. |
|
58
|
|
|
* @param ContainerInterface $container The DI container. |
|
59
|
|
|
* @param I18nHelper $i18n |
|
60
|
|
|
* @param CacheItemPoolInterface $cache |
|
61
|
|
|
*/ |
|
62
|
|
|
public function __construct(ContainerInterface $container, I18nHelper $i18n, CacheItemPoolInterface $cache) |
|
63
|
|
|
{ |
|
64
|
|
|
$this->container = $container; |
|
65
|
|
|
$this->i18n = $i18n; |
|
66
|
|
|
$this->cache = $cache; |
|
67
|
|
|
} |
|
68
|
|
|
|
|
69
|
|
|
/** |
|
70
|
|
|
* Register our interest in the kernel.controller event. |
|
71
|
|
|
* @return string[] |
|
72
|
|
|
*/ |
|
73
|
|
|
public static function getSubscribedEvents(): array |
|
74
|
|
|
{ |
|
75
|
|
|
return [ |
|
76
|
|
|
KernelEvents::CONTROLLER => 'onKernelController', |
|
77
|
|
|
]; |
|
78
|
|
|
} |
|
79
|
|
|
|
|
80
|
|
|
/** |
|
81
|
|
|
* Check if the current user has exceeded the configured usage limitations. |
|
82
|
|
|
* @param ControllerEvent $event The event. |
|
83
|
|
|
*/ |
|
84
|
|
|
public function onKernelController(ControllerEvent $event): void |
|
85
|
|
|
{ |
|
86
|
|
|
$controller = $event->getController(); |
|
87
|
|
|
$action = null; |
|
88
|
|
|
|
|
89
|
|
|
// when a controller class defines multiple action methods, the controller |
|
90
|
|
|
// is returned as [$controllerInstance, 'methodName'] |
|
91
|
|
|
if (is_array($controller)) { |
|
92
|
|
|
[$controller, $action] = $controller; |
|
93
|
|
|
} |
|
94
|
|
|
|
|
95
|
|
|
if (!$controller instanceof XtoolsController) { |
|
96
|
|
|
return; |
|
97
|
|
|
} |
|
98
|
|
|
|
|
99
|
|
|
$this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count'); |
|
|
|
|
|
|
100
|
|
|
$this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time'); |
|
101
|
|
|
$this->request = $event->getRequest(); |
|
102
|
|
|
$this->userAgent = (string)$this->request->headers->get('User-Agent'); |
|
103
|
|
|
$this->referer = (string)$this->request->headers->get('referer'); |
|
104
|
|
|
$this->uri = $this->request->getRequestUri(); |
|
105
|
|
|
|
|
106
|
|
|
$this->checkDenylist(); |
|
107
|
|
|
|
|
108
|
|
|
// Zero values indicate the rate limiting feature should be disabled. |
|
109
|
|
|
if (0 === $this->rateLimit || 0 === $this->rateDuration) { |
|
110
|
|
|
return; |
|
111
|
|
|
} |
|
112
|
|
|
|
|
113
|
|
|
$loggedIn = (bool)$this->container->get('session')->get('logged_in_user'); |
|
114
|
|
|
$isApi = 'ApiAction' === substr($action, -9); |
|
115
|
|
|
|
|
116
|
|
|
// No rate limits on lightweight pages, logged in users, subrequests or API requests. |
|
117
|
|
|
if (in_array($action, self::ACTION_ALLOWLIST) || $loggedIn || false === $event->isMasterRequest() || $isApi) { |
|
118
|
|
|
return; |
|
119
|
|
|
} |
|
120
|
|
|
|
|
121
|
|
|
$this->logCrawlers(); |
|
122
|
|
|
$this->xffRateLimit(); |
|
123
|
|
|
} |
|
124
|
|
|
|
|
125
|
|
|
/** |
|
126
|
|
|
* Don't let individual users hog up all the resources. |
|
127
|
|
|
*/ |
|
128
|
|
|
private function xffRateLimit(): void |
|
129
|
|
|
{ |
|
130
|
|
|
$xff = $this->request->headers->get('x-forwarded-for', ''); |
|
131
|
|
|
|
|
132
|
|
|
if ('' === $xff) { |
|
133
|
|
|
// Happens in local environments, or outside of Cloud Services. |
|
134
|
|
|
return; |
|
135
|
|
|
} |
|
136
|
|
|
|
|
137
|
|
|
$cacheKey = "ratelimit.session.".md5($xff); |
|
138
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
|
139
|
|
|
|
|
140
|
|
|
// If increment value already in cache, or start with 1. |
|
141
|
|
|
$count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1; |
|
142
|
|
|
|
|
143
|
|
|
// Check if limit has been exceeded, and if so, throw an error. |
|
144
|
|
|
if ($count > $this->rateLimit) { |
|
145
|
|
|
$this->denyAccess('Exceeded rate limitation'); |
|
146
|
|
|
} |
|
147
|
|
|
|
|
148
|
|
|
// Reset the clock on every request. |
|
149
|
|
|
$cacheItem->set($count) |
|
150
|
|
|
->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M')); |
|
151
|
|
|
$this->cache->save($cacheItem); |
|
152
|
|
|
} |
|
153
|
|
|
|
|
154
|
|
|
/** |
|
155
|
|
|
* Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log. |
|
156
|
|
|
* Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI |
|
157
|
|
|
* but with a different interface language, as happens when it is crawling the language dropdown in the UI. |
|
158
|
|
|
*/ |
|
159
|
|
|
private function logCrawlers(): void |
|
160
|
|
|
{ |
|
161
|
|
|
$useLangMatches = []; |
|
162
|
|
|
$hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches); |
|
163
|
|
|
|
|
164
|
|
|
if (1 !== $hasMatch) { |
|
165
|
|
|
return; |
|
166
|
|
|
} |
|
167
|
|
|
|
|
168
|
|
|
$useLang = $useLangMatches[1]; |
|
169
|
|
|
|
|
170
|
|
|
// Requesting a language that's different than that of the target project. |
|
171
|
|
|
if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) { |
|
172
|
|
|
return; |
|
173
|
|
|
} |
|
174
|
|
|
|
|
175
|
|
|
// We're trying to check if everything BUT the uselang has remained unchanged. |
|
176
|
|
|
$cacheUri = str_replace('uselang='.$useLang, '', $this->uri); |
|
177
|
|
|
$cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri); |
|
178
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
|
179
|
|
|
|
|
180
|
|
|
// If increment value already in cache, or start with 1. |
|
181
|
|
|
$count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1; |
|
182
|
|
|
|
|
183
|
|
|
// Check if limit has been exceeded, and if so, add a log entry. |
|
184
|
|
|
if ($count > 3) { |
|
185
|
|
|
$logger = $this->container->get('monolog.logger.crawler'); |
|
186
|
|
|
$logger->info('Possible crawler detected'); |
|
187
|
|
|
} |
|
188
|
|
|
|
|
189
|
|
|
// Reset the clock on every request. |
|
190
|
|
|
$cacheItem->set($count) |
|
191
|
|
|
->expiresAfter(new DateInterval('PT1M')); |
|
192
|
|
|
$this->cache->save($cacheItem); |
|
193
|
|
|
} |
|
194
|
|
|
|
|
195
|
|
|
/** |
|
196
|
|
|
* Check the request against denylisted URIs and user agents |
|
197
|
|
|
*/ |
|
198
|
|
|
private function checkDenylist(): void |
|
199
|
|
|
{ |
|
200
|
|
|
// First check user agent and URI blacklists |
|
201
|
|
|
if (!$this->container->hasParameter('request_blacklist')) { |
|
|
|
|
|
|
202
|
|
|
return; |
|
203
|
|
|
} |
|
204
|
|
|
|
|
205
|
|
|
$denylist = (array)$this->container->getParameter('request_blacklist'); |
|
206
|
|
|
|
|
207
|
|
|
foreach ($denylist as $name => $item) { |
|
208
|
|
|
$matches = []; |
|
209
|
|
|
|
|
210
|
|
|
if (isset($item['user_agent'])) { |
|
211
|
|
|
$matches[] = $item['user_agent'] === $this->userAgent; |
|
212
|
|
|
} |
|
213
|
|
|
if (isset($item['user_agent_pattern'])) { |
|
214
|
|
|
$matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent); |
|
215
|
|
|
} |
|
216
|
|
|
if (isset($item['referer'])) { |
|
217
|
|
|
$matches[] = $item['referer'] === $this->referer; |
|
218
|
|
|
} |
|
219
|
|
|
if (isset($item['referer_pattern'])) { |
|
220
|
|
|
$matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer); |
|
221
|
|
|
} |
|
222
|
|
|
if (isset($item['uri'])) { |
|
223
|
|
|
$matches[] = $item['uri'] === $this->uri; |
|
224
|
|
|
} |
|
225
|
|
|
if (isset($item['uri_pattern'])) { |
|
226
|
|
|
$matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri); |
|
227
|
|
|
} |
|
228
|
|
|
|
|
229
|
|
|
if (count($matches) > 0 && count($matches) === count(array_filter($matches))) { |
|
230
|
|
|
$this->denyAccess("Matched denylist entry `$name`", true); |
|
231
|
|
|
} |
|
232
|
|
|
} |
|
233
|
|
|
} |
|
234
|
|
|
|
|
235
|
|
|
/** |
|
236
|
|
|
* Throw exception for denied access due to spider crawl or hitting usage limits. |
|
237
|
|
|
* @param string $logComment Comment to include with the log entry. |
|
238
|
|
|
* @param bool $denylist Changes the messaging to say access was denied due to abuse, rather than rate limiting. |
|
239
|
|
|
* @throws TooManyRequestsHttpException |
|
240
|
|
|
* @throws AccessDeniedHttpException |
|
241
|
|
|
*/ |
|
242
|
|
|
private function denyAccess(string $logComment, bool $denylist = false): void |
|
243
|
|
|
{ |
|
244
|
|
|
// Log the denied request |
|
245
|
|
|
$logger = $this->container->get($denylist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit'); |
|
246
|
|
|
$logger->info($logComment); |
|
247
|
|
|
|
|
248
|
|
|
if ($denylist) { |
|
249
|
|
|
$message = $this->i18n->msg('error-denied', ['[email protected]']); |
|
250
|
|
|
throw new AccessDeniedHttpException($message, null, 999); |
|
251
|
|
|
} |
|
252
|
|
|
|
|
253
|
|
|
$message = $this->i18n->msg('error-rate-limit', [ |
|
254
|
|
|
$this->rateDuration, |
|
255
|
|
|
"<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>", |
|
256
|
|
|
"<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" . |
|
257
|
|
|
$this->i18n->msg('api') . |
|
258
|
|
|
"</a>", |
|
259
|
|
|
]); |
|
260
|
|
|
|
|
261
|
|
|
/** |
|
262
|
|
|
* TODO: Find a better way to do this. |
|
263
|
|
|
* 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having |
|
264
|
|
|
* fully safe messages that can be display with |raw. (In this case we authored the message). |
|
265
|
|
|
*/ |
|
266
|
|
|
throw new TooManyRequestsHttpException(600, $message, null, 999); |
|
267
|
|
|
} |
|
268
|
|
|
} |
|
269
|
|
|
|