1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* This file contains only the RateLimitSubscriber class. |
4
|
|
|
*/ |
5
|
|
|
|
6
|
|
|
declare(strict_types = 1); |
7
|
|
|
|
8
|
|
|
namespace App\EventSubscriber; |
9
|
|
|
|
10
|
|
|
use App\Controller\XtoolsController; |
11
|
|
|
use App\Helper\I18nHelper; |
12
|
|
|
use DateInterval; |
13
|
|
|
use Symfony\Component\DependencyInjection\ContainerInterface; |
14
|
|
|
use Symfony\Component\EventDispatcher\EventSubscriberInterface; |
15
|
|
|
use Symfony\Component\HttpFoundation\Request; |
16
|
|
|
use Symfony\Component\HttpKernel\Event\ControllerEvent; |
17
|
|
|
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException; |
18
|
|
|
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException; |
19
|
|
|
use Symfony\Component\HttpKernel\KernelEvents; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* A RateLimitSubscriber checks to see if users are exceeding usage limitations. |
23
|
|
|
*/ |
24
|
|
|
class RateLimitSubscriber implements EventSubscriberInterface |
25
|
|
|
{ |
26
|
|
|
|
27
|
|
|
/** @var ContainerInterface The DI container. */ |
28
|
|
|
protected $container; |
29
|
|
|
|
30
|
|
|
/** @var I18nHelper For i18n and l10n. */ |
31
|
|
|
protected $i18n; |
32
|
|
|
|
33
|
|
|
/** @var int Number of requests allowed in time period */ |
34
|
|
|
protected $rateLimit; |
35
|
|
|
|
36
|
|
|
/** @var int Number of minutes during which $rateLimit requests are permitted. */ |
37
|
|
|
protected $rateDuration; |
38
|
|
|
|
39
|
|
|
/** @var \Symfony\Component\Cache\Adapter\TraceableAdapter Cache adapter. */ |
40
|
|
|
protected $cache; |
41
|
|
|
|
42
|
|
|
/** @var Request The Request object. */ |
43
|
|
|
protected $request; |
44
|
|
|
|
45
|
|
|
/** @var string User agent string. */ |
46
|
|
|
protected $userAgent; |
47
|
|
|
|
48
|
|
|
/** @var string The referer string. */ |
49
|
|
|
protected $referer; |
50
|
|
|
|
51
|
|
|
/** @var string The URI. */ |
52
|
|
|
protected $uri; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Save the container for later use. |
56
|
|
|
* @param ContainerInterface $container The DI container. |
57
|
|
|
* @param I18nHelper $i18n |
58
|
|
|
*/ |
59
|
|
|
public function __construct(ContainerInterface $container, I18nHelper $i18n) |
60
|
|
|
{ |
61
|
|
|
$this->container = $container; |
62
|
|
|
$this->i18n = $i18n; |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* Register our interest in the kernel.controller event. |
67
|
|
|
* @return string[] |
68
|
|
|
*/ |
69
|
|
|
public static function getSubscribedEvents(): array |
70
|
|
|
{ |
71
|
|
|
return [ |
72
|
|
|
KernelEvents::CONTROLLER => 'onKernelController', |
73
|
|
|
]; |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* Check if the current user has exceeded the configured usage limitations. |
78
|
|
|
* @param ControllerEvent $event The event. |
79
|
|
|
*/ |
80
|
|
|
public function onKernelController(ControllerEvent $event): void |
81
|
|
|
{ |
82
|
|
|
$controller = $event->getController(); |
83
|
|
|
$action = null; |
84
|
|
|
|
85
|
|
|
// when a controller class defines multiple action methods, the controller |
86
|
|
|
// is returned as [$controllerInstance, 'methodName'] |
87
|
|
|
if (is_array($controller)) { |
88
|
|
|
[$controller, $action] = $controller; |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
if (!$controller instanceof XtoolsController) { |
92
|
|
|
return; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
$this->cache = $this->container->get('cache.app'); |
96
|
|
|
$this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count'); |
97
|
|
|
$this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time'); |
98
|
|
|
$this->request = $event->getRequest(); |
99
|
|
|
$this->userAgent = (string)$this->request->headers->get('User-Agent'); |
100
|
|
|
$this->referer = (string)$this->request->headers->get('referer'); |
101
|
|
|
$this->uri = $this->request->getRequestUri(); |
102
|
|
|
|
103
|
|
|
$this->checkBlacklist(); |
104
|
|
|
|
105
|
|
|
// Zero values indicate the rate limiting feature should be disabled. |
106
|
|
|
if (0 === $this->rateLimit || 0 === $this->rateDuration) { |
107
|
|
|
return; |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
$loggedIn = (bool)$this->container->get('session')->get('logged_in_user'); |
111
|
|
|
$isApi = 'ApiAction' === substr($action, -9); |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* Rate limiting will not apply to these actions |
115
|
|
|
* @var array |
116
|
|
|
*/ |
117
|
|
|
$actionWhitelist = [ |
118
|
|
|
'indexAction', 'showAction', 'aboutAction', 'loginAction', 'recordUsageAction', 'oauthCallbackAction', |
119
|
|
|
]; |
120
|
|
|
|
121
|
|
|
// No rate limits on lightweight pages, logged in users, subrequests or API requests. |
122
|
|
|
if (in_array($action, $actionWhitelist) || $loggedIn || false === $event->isMasterRequest() || $isApi) { |
123
|
|
|
return; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
$this->logCrawlers(); |
127
|
|
|
$this->xffRateLimit(); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Don't let individual users hog up all the resources. |
132
|
|
|
*/ |
133
|
|
|
private function xffRateLimit(): void |
134
|
|
|
{ |
135
|
|
|
$xff = $this->request->headers->get('x-forwarded-for', ''); |
136
|
|
|
|
137
|
|
|
if ('' === $xff) { |
138
|
|
|
// Happens in local environments, or outside of Cloud Services. |
139
|
|
|
return; |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
$cacheKey = "ratelimit.session.".md5($xff); |
143
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
144
|
|
|
|
145
|
|
|
// If increment value already in cache, or start with 1. |
146
|
|
|
$count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1; |
147
|
|
|
|
148
|
|
|
// Check if limit has been exceeded, and if so, throw an error. |
149
|
|
|
if ($count > $this->rateLimit) { |
150
|
|
|
$this->denyAccess('Exceeded rate limitation'); |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
// Reset the clock on every request. |
154
|
|
|
$cacheItem->set($count) |
155
|
|
|
->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M')); |
156
|
|
|
$this->cache->save($cacheItem); |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log. |
161
|
|
|
* Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI |
162
|
|
|
* but with a different interface language, as happens when it is crawling the language dropdown in the UI. |
163
|
|
|
*/ |
164
|
|
|
private function logCrawlers(): void |
165
|
|
|
{ |
166
|
|
|
$useLangMatches = []; |
167
|
|
|
$hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches); |
168
|
|
|
|
169
|
|
|
if (1 !== $hasMatch) { |
170
|
|
|
return; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
$useLang = $useLangMatches[1]; |
174
|
|
|
|
175
|
|
|
// Requesting a language that's different than that of the target project. |
176
|
|
|
if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) { |
177
|
|
|
return; |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
// We're trying to check if everything BUT the uselang has remained unchanged. |
181
|
|
|
$cacheUri = str_replace('uselang='.$useLang, '', $this->uri); |
182
|
|
|
$cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri); |
183
|
|
|
$cacheItem = $this->cache->getItem($cacheKey); |
184
|
|
|
|
185
|
|
|
// If increment value already in cache, or start with 1. |
186
|
|
|
$count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1; |
187
|
|
|
|
188
|
|
|
// Check if limit has been exceeded, and if so, add a log entry. |
189
|
|
|
if ($count > 3) { |
190
|
|
|
$logger = $this->container->get('monolog.logger.crawler'); |
191
|
|
|
$logger->info('Possible crawler detected'); |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
// Reset the clock on every request. |
195
|
|
|
$cacheItem->set($count) |
196
|
|
|
->expiresAfter(new DateInterval('PT1M')); |
197
|
|
|
$this->cache->save($cacheItem); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* Check the request against blacklisted URIs and user agents |
202
|
|
|
*/ |
203
|
|
|
private function checkBlacklist(): void |
204
|
|
|
{ |
205
|
|
|
// First check user agent and URI blacklists |
206
|
|
|
if (!$this->container->hasParameter('request_blacklist')) { |
207
|
|
|
return; |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
$blacklist = (array)$this->container->getParameter('request_blacklist'); |
211
|
|
|
|
212
|
|
|
foreach ($blacklist as $name => $item) { |
213
|
|
|
$matches = []; |
214
|
|
|
|
215
|
|
|
if (isset($item['user_agent'])) { |
216
|
|
|
$matches[] = $item['user_agent'] === $this->userAgent; |
217
|
|
|
} |
218
|
|
|
if (isset($item['user_agent_pattern'])) { |
219
|
|
|
$matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent); |
220
|
|
|
} |
221
|
|
|
if (isset($item['referer'])) { |
222
|
|
|
$matches[] = $item['referer'] === $this->referer; |
223
|
|
|
} |
224
|
|
|
if (isset($item['referer_pattern'])) { |
225
|
|
|
$matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer); |
226
|
|
|
} |
227
|
|
|
if (isset($item['uri'])) { |
228
|
|
|
$matches[] = $item['uri'] === $this->uri; |
229
|
|
|
} |
230
|
|
|
if (isset($item['uri_pattern'])) { |
231
|
|
|
$matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri); |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
if (count($matches) > 0 && count($matches) === count(array_filter($matches))) { |
235
|
|
|
$this->denyAccess("Matched blacklist entry `$name`", true); |
236
|
|
|
} |
237
|
|
|
} |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
/** |
241
|
|
|
* Throw exception for denied access due to spider crawl or hitting usage limits. |
242
|
|
|
* @param string $logComment Comment to include with the log entry. |
243
|
|
|
* @param bool $blacklist Changes the messaging to say access was denied due to abuse, rather than rate limiting. |
244
|
|
|
* @throws TooManyRequestsHttpException |
245
|
|
|
* @throws AccessDeniedHttpException |
246
|
|
|
*/ |
247
|
|
|
private function denyAccess(string $logComment, bool $blacklist = false): void |
248
|
|
|
{ |
249
|
|
|
// Log the denied request |
250
|
|
|
$logger = $this->container->get($blacklist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit'); |
251
|
|
|
$logger->info($logComment); |
252
|
|
|
|
253
|
|
|
if ($blacklist) { |
254
|
|
|
$message = $this->i18n->msg('error-denied', ['[email protected]']); |
255
|
|
|
throw new AccessDeniedHttpException($message, null, 999); |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
$message = $this->i18n->msg('error-rate-limit', [ |
259
|
|
|
$this->rateDuration, |
260
|
|
|
"<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>", |
261
|
|
|
"<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" . |
262
|
|
|
$this->i18n->msg('api') . |
263
|
|
|
"</a>", |
264
|
|
|
]); |
265
|
|
|
|
266
|
|
|
/** |
267
|
|
|
* TODO: Find a better way to do this. |
268
|
|
|
* 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having |
269
|
|
|
* fully safe messages that can be display with |raw. (In this case we authored the message). |
270
|
|
|
*/ |
271
|
|
|
throw new TooManyRequestsHttpException(600, $message, null, 999); |
272
|
|
|
} |
273
|
|
|
} |
274
|
|
|
|