1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types = 1); |
4
|
|
|
|
5
|
|
|
namespace App\EventSubscriber; |
6
|
|
|
|
7
|
|
|
use App\Controller\XtoolsController; |
8
|
|
|
use App\Helper\I18nHelper; |
9
|
|
|
use DateInterval; |
10
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
11
|
|
|
use Psr\Container\ContainerInterface; |
12
|
|
|
use Symfony\Component\EventDispatcher\EventSubscriberInterface; |
13
|
|
|
use Symfony\Component\HttpFoundation\Request; |
14
|
|
|
use Symfony\Component\HttpKernel\Event\ControllerEvent; |
15
|
|
|
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException; |
16
|
|
|
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException; |
17
|
|
|
use Symfony\Component\HttpKernel\KernelEvents; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* A RateLimitSubscriber checks to see if users are exceeding usage limitations. |
21
|
|
|
*/ |
22
|
|
|
class RateLimitSubscriber implements EventSubscriberInterface |
23
|
|
|
{ |
24
|
|
|
/** |
25
|
|
|
* Rate limiting will not apply to these actions. |
26
|
|
|
*/ |
27
|
|
|
public const ACTION_ALLOWLIST = [ |
28
|
|
|
'aboutAction', |
29
|
|
|
'indexAction', |
30
|
|
|
'loginAction', |
31
|
|
|
'oauthCallbackAction', |
32
|
|
|
'recordUsageAction', |
33
|
|
|
'showAction', |
34
|
|
|
]; |
35
|
|
|
|
36
|
|
|
protected CacheItemPoolInterface $cache; |
37
|
|
|
protected ContainerInterface $container; |
38
|
|
|
protected I18nHelper $i18n; |
39
|
|
|
protected Request $request; |
40
|
|
|
|
41
|
|
|
/** @var int Number of requests allowed in time period */ |
42
|
|
|
protected int $rateLimit; |
43
|
|
|
|
44
|
|
|
/** @var int Number of minutes during which $rateLimit requests are permitted. */ |
45
|
|
|
protected int $rateDuration; |
46
|
|
|
|
47
|
|
|
/** @var string User agent string. */ |
48
|
|
|
protected string $userAgent; |
49
|
|
|
|
50
|
|
|
/** @var string The referer string. */ |
51
|
|
|
protected string $referer; |
52
|
|
|
|
53
|
|
|
/** @var string The URI. */ |
54
|
|
|
protected string $uri; |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* Save the container for later use. |
58
|
|
|
* @param ContainerInterface $container The DI container. |
59
|
|
|
* @param I18nHelper $i18n |
60
|
|
|
* @param CacheItemPoolInterface $cache |
61
|
|
|
*/ |
62
|
|
|
public function __construct(ContainerInterface $container, I18nHelper $i18n, CacheItemPoolInterface $cache) |
63
|
|
|
{ |
64
|
|
|
$this->container = $container; |
65
|
|
|
$this->i18n = $i18n; |
66
|
|
|
$this->cache = $cache; |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* Register our interest in the kernel.controller event. |
71
|
|
|
* @return string[] |
72
|
|
|
*/ |
73
|
|
|
public static function getSubscribedEvents(): array |
74
|
|
|
{ |
75
|
|
|
return [ |
76
|
|
|
KernelEvents::CONTROLLER => 'onKernelController', |
77
|
|
|
]; |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* Check if the current user has exceeded the configured usage limitations. |
82
|
|
|
* @param ControllerEvent $event The event. |
83
|
|
|
*/ |
84
|
|
|
public function onKernelController(ControllerEvent $event): void |
85
|
|
|
{ |
86
|
|
|
$controller = $event->getController(); |
87
|
|
|
$action = null; |
88
|
|
|
|
89
|
|
|
// when a controller class defines multiple action methods, the controller |
90
|
|
|
// is returned as [$controllerInstance, 'methodName'] |
91
|
|
|
if (is_array($controller)) { |
92
|
|
|
[$controller, $action] = $controller; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
if (!$controller instanceof XtoolsController) { |
96
|
|
|
return; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
$this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count'); |
|
|
|
|
100
|
|
|
$this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time'); |
101
|
|
|
$this->request = $event->getRequest(); |
102
|
|
|
$this->userAgent = (string)$this->request->headers->get('User-Agent'); |
103
|
|
|
$this->referer = (string)$this->request->headers->get('referer'); |
104
|
|
|
$this->uri = $this->request->getRequestUri(); |
105
|
|
|
|
106
|
|
|
$this->checkDenylist(); |
107
|
|
|
|
108
|
|
|
// Zero values indicate the rate limiting feature should be disabled. |
109
|
|
|
if (0 === $this->rateLimit || 0 === $this->rateDuration) { |
110
|
|
|
return; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
$loggedIn = (bool)$this->container->get('session')->get('logged_in_user'); |
114
|
|
|
$isApi = 'ApiAction' === substr($action, -9); |
115
|
|
|
|
116
|
|
|
// No rate limits on lightweight pages, logged in users, subrequests or API requests. |
117
|
|
|
if (in_array($action, self::ACTION_ALLOWLIST) || $loggedIn || false === $event->isMasterRequest() || $isApi) { |
118
|
|
|
return; |
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
$this->logCrawlers(); |
122
|
|
|
$this->xffRateLimit(); |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* Don't let individual users hog up all the resources. |
127
|
|
|
*/ |
128
|
|
|
private function xffRateLimit(): void |
129
|
|
|
{ |
130
|
|
|
$xff = $this->request->headers->get('x-forwarded-for', ''); |
131
|
|
|
|
132
|
|
|
if ('' === $xff) { |
133
|
|
|
// Happens in local environments, or outside of Cloud Services. |
134
|
|
|
return; |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
$cacheKey = "ratelimit.session.".md5($xff); |
138
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
139
|
|
|
|
140
|
|
|
// If increment value already in cache, or start with 1. |
141
|
|
|
$count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1; |
142
|
|
|
|
143
|
|
|
// Check if limit has been exceeded, and if so, throw an error. |
144
|
|
|
if ($count > $this->rateLimit) { |
145
|
|
|
$this->denyAccess('Exceeded rate limitation'); |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
// Reset the clock on every request. |
149
|
|
|
$cacheItem->set($count) |
150
|
|
|
->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M')); |
151
|
|
|
$this->cache->save($cacheItem); |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
/** |
155
|
|
|
* Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log. |
156
|
|
|
* Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI |
157
|
|
|
* but with a different interface language, as happens when it is crawling the language dropdown in the UI. |
158
|
|
|
*/ |
159
|
|
|
private function logCrawlers(): void |
160
|
|
|
{ |
161
|
|
|
$useLangMatches = []; |
162
|
|
|
$hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches); |
163
|
|
|
|
164
|
|
|
if (1 !== $hasMatch) { |
165
|
|
|
return; |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
$useLang = $useLangMatches[1]; |
169
|
|
|
|
170
|
|
|
// Requesting a language that's different than that of the target project. |
171
|
|
|
if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) { |
172
|
|
|
return; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
// We're trying to check if everything BUT the uselang has remained unchanged. |
176
|
|
|
$cacheUri = str_replace('uselang='.$useLang, '', $this->uri); |
177
|
|
|
$cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri); |
178
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
179
|
|
|
|
180
|
|
|
// If increment value already in cache, or start with 1. |
181
|
|
|
$count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1; |
182
|
|
|
|
183
|
|
|
// Check if limit has been exceeded, and if so, add a log entry. |
184
|
|
|
if ($count > 3) { |
185
|
|
|
$logger = $this->container->get('monolog.logger.crawler'); |
186
|
|
|
$logger->info('Possible crawler detected'); |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
// Reset the clock on every request. |
190
|
|
|
$cacheItem->set($count) |
191
|
|
|
->expiresAfter(new DateInterval('PT1M')); |
192
|
|
|
$this->cache->save($cacheItem); |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* Check the request against denylisted URIs and user agents |
197
|
|
|
*/ |
198
|
|
|
private function checkDenylist(): void |
199
|
|
|
{ |
200
|
|
|
// First check user agent and URI blacklists |
201
|
|
|
if (!$this->container->hasParameter('request_blacklist')) { |
|
|
|
|
202
|
|
|
return; |
203
|
|
|
} |
204
|
|
|
|
205
|
|
|
$denylist = (array)$this->container->getParameter('request_blacklist'); |
206
|
|
|
|
207
|
|
|
foreach ($denylist as $name => $item) { |
208
|
|
|
$matches = []; |
209
|
|
|
|
210
|
|
|
if (isset($item['user_agent'])) { |
211
|
|
|
$matches[] = $item['user_agent'] === $this->userAgent; |
212
|
|
|
} |
213
|
|
|
if (isset($item['user_agent_pattern'])) { |
214
|
|
|
$matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent); |
215
|
|
|
} |
216
|
|
|
if (isset($item['referer'])) { |
217
|
|
|
$matches[] = $item['referer'] === $this->referer; |
218
|
|
|
} |
219
|
|
|
if (isset($item['referer_pattern'])) { |
220
|
|
|
$matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer); |
221
|
|
|
} |
222
|
|
|
if (isset($item['uri'])) { |
223
|
|
|
$matches[] = $item['uri'] === $this->uri; |
224
|
|
|
} |
225
|
|
|
if (isset($item['uri_pattern'])) { |
226
|
|
|
$matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri); |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
if (count($matches) > 0 && count($matches) === count(array_filter($matches))) { |
230
|
|
|
$this->denyAccess("Matched denylist entry `$name`", true); |
231
|
|
|
} |
232
|
|
|
} |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
/** |
236
|
|
|
* Throw exception for denied access due to spider crawl or hitting usage limits. |
237
|
|
|
* @param string $logComment Comment to include with the log entry. |
238
|
|
|
* @param bool $denylist Changes the messaging to say access was denied due to abuse, rather than rate limiting. |
239
|
|
|
* @throws TooManyRequestsHttpException |
240
|
|
|
* @throws AccessDeniedHttpException |
241
|
|
|
*/ |
242
|
|
|
private function denyAccess(string $logComment, bool $denylist = false): void |
243
|
|
|
{ |
244
|
|
|
// Log the denied request |
245
|
|
|
$logger = $this->container->get($denylist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit'); |
246
|
|
|
$logger->info($logComment); |
247
|
|
|
|
248
|
|
|
if ($denylist) { |
249
|
|
|
$message = $this->i18n->msg('error-denied', ['[email protected]']); |
250
|
|
|
throw new AccessDeniedHttpException($message, null, 999); |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
$message = $this->i18n->msg('error-rate-limit', [ |
254
|
|
|
$this->rateDuration, |
255
|
|
|
"<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>", |
256
|
|
|
"<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" . |
257
|
|
|
$this->i18n->msg('api') . |
258
|
|
|
"</a>", |
259
|
|
|
]); |
260
|
|
|
|
261
|
|
|
/** |
262
|
|
|
* TODO: Find a better way to do this. |
263
|
|
|
* 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having |
264
|
|
|
* fully safe messages that can be display with |raw. (In this case we authored the message). |
265
|
|
|
*/ |
266
|
|
|
throw new TooManyRequestsHttpException(600, $message, null, 999); |
267
|
|
|
} |
268
|
|
|
} |
269
|
|
|
|