RateLimitSubscriber::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 20
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 9
nc 1
nop 9
dl 0
loc 20
rs 9.9666
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\EventSubscriber;
6
7
use App\Controller\XtoolsController;
8
use App\Helper\I18nHelper;
9
use DateInterval;
10
use Psr\Cache\CacheItemPoolInterface;
11
use Psr\Log\LoggerInterface;
12
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
13
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
14
use Symfony\Component\HttpFoundation\Request;
15
use Symfony\Component\HttpFoundation\RequestStack;
16
use Symfony\Component\HttpFoundation\Session\SessionInterface;
17
use Symfony\Component\HttpKernel\Event\ControllerEvent;
18
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
19
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
20
use Symfony\Component\HttpKernel\KernelEvents;
21
22
/**
23
 * A RateLimitSubscriber checks to see if users are exceeding usage limitations.
24
 */
25
class RateLimitSubscriber implements EventSubscriberInterface
26
{
27
    /**
28
     * Rate limiting will not apply to these actions.
29
     */
30
    public const ACTION_ALLOWLIST = [
31
        'aboutAction',
32
        'indexAction',
33
        'loginAction',
34
        'oauthCallbackAction',
35
        'recordUsageAction',
36
        'showAction',
37
    ];
38
39
    /**
40
     * Maximum number of requests to the same URI with different interface languages.
41
     */
42
    public const MAX_CRAWLER_COUNT = 3;
43
44
    /**
45
     * Duration in which the max number of requests to the same URI with different interface languages is allowed.
46
     */
47
    public const MAX_CRAWLER_TIME = 'PT1M';
48
49
    protected CacheItemPoolInterface $cache;
50
    protected I18nHelper $i18n;
51
    protected LoggerInterface $crawlerLogger;
52
    protected LoggerInterface $denylistLogger;
53
    protected LoggerInterface $rateLimitLogger;
54
    protected ParameterBagInterface $parameterBag;
55
    protected Request $request;
56
    protected SessionInterface $session;
57
58
    /** @var int Number of requests allowed in time period */
59
    protected int $rateLimit;
60
61
    /** @var int Number of minutes during which $rateLimit requests are permitted. */
62
    protected int $rateDuration;
63
64
    /** @var string User agent string. */
65
    protected string $userAgent;
66
67
    /** @var string The referer string. */
68
    protected string $referer;
69
70
    /** @var string The URI. */
71
    protected string $uri;
72
73
    /**
74
     * @param I18nHelper $i18n
75
     * @param CacheItemPoolInterface $cache
76
     * @param ParameterBagInterface $parameterBag
77
     * @param RequestStack $requestStack
78
     * @param LoggerInterface $crawlerLogger
79
     * @param LoggerInterface $denylistLogger
80
     * @param LoggerInterface $rateLimitLogger
81
     * @param int $rateLimit
82
     * @param int $rateDuration
83
     */
84
    public function __construct(
85
        I18nHelper $i18n,
86
        CacheItemPoolInterface $cache,
87
        ParameterBagInterface $parameterBag,
88
        RequestStack $requestStack,
89
        LoggerInterface $crawlerLogger,
90
        LoggerInterface $denylistLogger,
91
        LoggerInterface $rateLimitLogger,
92
        int $rateLimit,
93
        int $rateDuration
94
    ) {
95
        $this->i18n = $i18n;
96
        $this->cache = $cache;
97
        $this->parameterBag = $parameterBag;
98
        $this->session = $requestStack->getSession();
99
        $this->crawlerLogger = $crawlerLogger;
100
        $this->denylistLogger = $denylistLogger;
101
        $this->rateLimitLogger = $rateLimitLogger;
102
        $this->rateLimit = $rateLimit;
103
        $this->rateDuration = $rateDuration;
104
    }
105
106
    /**
107
     * Register our interest in the kernel.controller event.
108
     * @return string[]
109
     */
110
    public static function getSubscribedEvents(): array
111
    {
112
        return [
113
            KernelEvents::CONTROLLER => 'onKernelController',
114
        ];
115
    }
116
117
    /**
118
     * Check if the current user has exceeded the configured usage limitations.
119
     * @param ControllerEvent $event The event.
120
     */
121
    public function onKernelController(ControllerEvent $event): void
122
    {
123
        $controller = $event->getController();
124
        $action = null;
125
126
        // when a controller class defines multiple action methods, the controller
127
        // is returned as [$controllerInstance, 'methodName']
128
        if (is_array($controller)) {
129
            [$controller, $action] = $controller;
130
        }
131
132
        if (!$controller instanceof XtoolsController) {
133
            return;
134
        }
135
136
        $this->request = $event->getRequest();
137
        $this->userAgent = (string)$this->request->headers->get('User-Agent');
138
        $this->referer = (string)$this->request->headers->get('referer');
139
        $this->uri = $this->request->getRequestUri();
140
141
        $this->checkDenylist();
142
143
        // Zero values indicate the rate limiting feature should be disabled.
144
        if (0 === $this->rateLimit || 0 === $this->rateDuration) {
145
            return;
146
        }
147
148
        $loggedIn = (bool)$this->session->get('logged_in_user');
149
        $isApi = 'ApiAction' === substr($action, -9);
150
151
        // No rate limits on lightweight pages, logged in users, subrequests or API requests.
152
        if (in_array($action, self::ACTION_ALLOWLIST) || $loggedIn || false === $event->isMainRequest() || $isApi) {
153
            return;
154
        }
155
156
        $this->logCrawlers();
157
        $this->xffRateLimit();
158
    }
159
160
    /**
161
     * Don't let individual users hog up all the resources.
162
     */
163
    private function xffRateLimit(): void
164
    {
165
        $xff = $this->request->headers->get('x-forwarded-for', '');
166
167
        if ('' === $xff) {
168
            // Happens in local environments, or outside of Cloud Services.
169
            return;
170
        }
171
172
        $cacheKey = "ratelimit.session.".sha1($xff);
173
        $cacheItem = $this->cache->getItem($cacheKey);
174
175
        // If increment value already in cache, or start with 1.
176
        $count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1;
177
178
        // Check if limit has been exceeded, and if so, throw an error.
179
        if ($count > $this->rateLimit) {
180
            $this->denyAccess('Exceeded rate limitation');
181
        }
182
183
        // Reset the clock on every request.
184
        $cacheItem->set($count)
185
            ->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M'));
186
        $this->cache->save($cacheItem);
187
    }
188
189
    /**
190
     * Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log.
191
     * Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI
192
     * but with a different interface language, as happens when it is crawling the language dropdown in the UI.
193
     */
194
    private function logCrawlers(): void
195
    {
196
        $useLangMatches = [];
197
        $hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches);
198
199
        if (1 !== $hasMatch) {
200
            return;
201
        }
202
203
        $useLang = $useLangMatches[1];
204
205
        // Requesting a language that's different than that of the target project.
206
        if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) {
207
            return;
208
        }
209
210
        // We're trying to check if everything BUT the uselang has remained unchanged.
211
        $cacheUri = str_replace('uselang='.$useLang, '', $this->uri);
212
        $cacheKey = 'ratelimit.crawler.'.sha1($this->userAgent.$cacheUri);
213
        $cacheItem = $this->cache->getItem($cacheKey);
214
215
        // If increment value already in cache, or start with 1.
216
        $count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;
217
218
        // Check if limit has been exceeded, and if so, add a log entry.
219
        if ($count > 3) {
220
            $this->crawlerLogger->info('Possible crawler detected');
221
        }
222
223
        // Reset the clock on every request.
224
        $cacheItem->set($count)
225
            ->expiresAfter(new DateInterval(self::MAX_CRAWLER_TIME));
226
        $this->cache->save($cacheItem);
227
228
        // If we've got a lot of hits, let's go ahead and assume it's a crawler and give a 429.
229
        if ($count > self::MAX_CRAWLER_COUNT) {
230
            $this->denyAccess('Web crawler detected');
231
        }
232
    }
233
234
    /**
235
     * Check the request against denylisted URIs and user agents
236
     */
237
    private function checkDenylist(): void
238
    {
239
        // First check user agent and URI denylists.
240
        if (!$this->parameterBag->has('request_denylist')) {
241
            return;
242
        }
243
244
        $denylist = (array)$this->parameterBag->get('request_denylist');
245
246
        foreach ($denylist as $name => $item) {
247
            $matches = [];
248
249
            if (isset($item['user_agent'])) {
250
                $matches[] = $item['user_agent'] === $this->userAgent;
251
            }
252
            if (isset($item['user_agent_pattern'])) {
253
                $matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent);
254
            }
255
            if (isset($item['referer'])) {
256
                $matches[] = $item['referer'] === $this->referer;
257
            }
258
            if (isset($item['referer_pattern'])) {
259
                $matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer);
260
            }
261
            if (isset($item['uri'])) {
262
                $matches[] = $item['uri'] === $this->uri;
263
            }
264
            if (isset($item['uri_pattern'])) {
265
                $matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri);
266
            }
267
268
            if (count($matches) > 0 && count($matches) === count(array_filter($matches))) {
269
                $this->denyAccess("Matched denylist entry `$name`", true);
270
            }
271
        }
272
    }
273
274
    /**
275
     * Throw exception for denied access due to spider crawl or hitting usage limits.
276
     * @param string $logComment Comment to include with the log entry.
277
     * @param bool $denylist Changes the messaging to say access was denied due to abuse, rather than rate limiting.
278
     * @throws TooManyRequestsHttpException
279
     * @throws AccessDeniedHttpException
280
     */
281
    private function denyAccess(string $logComment, bool $denylist = false): void
282
    {
283
        // Log the denied request
284
        $logger = $denylist ? $this->denylistLogger : $this->rateLimitLogger;
285
        $logger->info($logComment);
286
287
        if ($denylist) {
288
            $message = $this->i18n->msg('error-denied', ['[email protected]']);
289
            throw new AccessDeniedHttpException($message, null, 999);
290
        }
291
292
        $message = $this->i18n->msg('error-rate-limit', [
293
            $this->rateDuration,
294
            "<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>",
295
            "<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" .
296
                $this->i18n->msg('api') .
297
            "</a>",
298
        ]);
299
300
        /**
301
         * TODO: Find a better way to do this.
302
         * 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having
303
         * fully safe messages that can be display with |raw. (In this case we authored the message).
304
         */
305
        throw new TooManyRequestsHttpException(600, $message, null, 999);
306
    }
307
}
308