RateLimitSubscriber::checkDenylist()   B
last analyzed

Complexity

Conditions 11
Paths 130

Size

Total Lines 33
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 19
nc 130
nop 0
dl 0
loc 33
rs 7.0666
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\EventSubscriber;
6
7
use App\Controller\XtoolsController;
8
use App\Helper\I18nHelper;
9
use DateInterval;
10
use Psr\Cache\CacheItemPoolInterface;
11
use Psr\Log\LoggerInterface;
12
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
13
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
14
use Symfony\Component\HttpFoundation\Request;
15
use Symfony\Component\HttpFoundation\RequestStack;
16
use Symfony\Component\HttpFoundation\Session\SessionInterface;
17
use Symfony\Component\HttpKernel\Event\ControllerEvent;
18
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
19
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
20
use Symfony\Component\HttpKernel\KernelEvents;
21
22
/**
23
 * A RateLimitSubscriber checks to see if users are exceeding usage limitations.
24
 */
25
class RateLimitSubscriber implements EventSubscriberInterface
26
{
27
    /**
28
     * Rate limiting will not apply to these actions.
29
     */
30
    public const ACTION_ALLOWLIST = [
31
        'aboutAction',
32
        'indexAction',
33
        'loginAction',
34
        'oauthCallbackAction',
35
        'recordUsageAction',
36
        'showAction',
37
    ];
38
39
    /**
40
     * Maximum number of requests to the same URI with different interface languages.
41
     */
42
    public const MAX_CRAWLER_COUNT = 3;
43
44
    /**
45
     * Duration in which the max number of requests to the same URI with different interface languages is allowed.
46
     */
47
    public const MAX_CRAWLER_TIME = 'PT1M';
48
49
    protected CacheItemPoolInterface $cache;
50
    protected I18nHelper $i18n;
51
    protected LoggerInterface $crawlerLogger;
52
    protected LoggerInterface $denylistLogger;
53
    protected LoggerInterface $rateLimitLogger;
54
    protected ParameterBagInterface $parameterBag;
55
    protected Request $request;
56
    protected SessionInterface $session;
57
58
    /** @var int Number of requests allowed in time period */
59
    protected int $rateLimit;
60
61
    /** @var int Number of minutes during which $rateLimit requests are permitted. */
62
    protected int $rateDuration;
63
64
    /** @var string User agent string. */
65
    protected string $userAgent;
66
67
    /** @var string The referer string. */
68
    protected string $referer;
69
70
    /** @var string The URI. */
71
    protected string $uri;
72
73
    /**
74
     * @param I18nHelper $i18n
75
     * @param CacheItemPoolInterface $cache
76
     * @param ParameterBagInterface $parameterBag
77
     * @param RequestStack $requestStack
78
     * @param LoggerInterface $crawlerLogger
79
     * @param LoggerInterface $denylistLogger
80
     * @param LoggerInterface $rateLimitLogger
81
     * @param int $rateLimit
82
     * @param int $rateDuration
83
     */
84
    public function __construct(
85
        I18nHelper $i18n,
86
        CacheItemPoolInterface $cache,
87
        ParameterBagInterface $parameterBag,
88
        RequestStack $requestStack,
89
        LoggerInterface $crawlerLogger,
90
        LoggerInterface $denylistLogger,
91
        LoggerInterface $rateLimitLogger,
92
        int $rateLimit,
93
        int $rateDuration
94
    ) {
95
        $this->i18n = $i18n;
96
        $this->cache = $cache;
97
        $this->parameterBag = $parameterBag;
98
        $this->session = $requestStack->getSession();
99
        $this->crawlerLogger = $crawlerLogger;
100
        $this->denylistLogger = $denylistLogger;
101
        $this->rateLimitLogger = $rateLimitLogger;
102
        $this->rateLimit = $rateLimit;
103
        $this->rateDuration = $rateDuration;
104
    }
105
106
    /**
107
     * Register our interest in the kernel.controller event.
108
     * @return string[]
109
     */
110
    public static function getSubscribedEvents(): array
111
    {
112
        return [
113
            KernelEvents::CONTROLLER => 'onKernelController',
114
        ];
115
    }
116
117
    /**
118
     * Check if the current user has exceeded the configured usage limitations.
119
     * @param ControllerEvent $event The event.
120
     */
121
    public function onKernelController(ControllerEvent $event): void
122
    {
123
        $controller = $event->getController();
124
        $action = null;
125
126
        // when a controller class defines multiple action methods, the controller
127
        // is returned as [$controllerInstance, 'methodName']
128
        if (is_array($controller)) {
129
            [$controller, $action] = $controller;
130
        }
131
132
        if (!$controller instanceof XtoolsController) {
133
            return;
134
        }
135
136
        $this->request = $event->getRequest();
137
        $this->userAgent = (string)$this->request->headers->get('User-Agent');
138
        $this->referer = (string)$this->request->headers->get('referer');
139
        $this->uri = $this->request->getRequestUri();
140
141
        $this->checkDenylist();
142
143
        // Zero values indicate the rate limiting feature should be disabled.
144
        if (0 === $this->rateLimit || 0 === $this->rateDuration) {
145
            return;
146
        }
147
148
        $loggedIn = (bool)$this->session->get('logged_in_user');
149
        $isApi = 'ApiAction' === substr($action, -9);
150
151
        // No rate limits on lightweight pages, logged in users, subrequests or API requests.
152
        if (in_array($action, self::ACTION_ALLOWLIST) || $loggedIn || false === $event->isMainRequest() || $isApi) {
153
            return;
154
        }
155
156
        $this->logCrawlers();
157
        $this->xffRateLimit();
158
    }
159
160
    /**
161
     * Don't let individual users hog up all the resources.
162
     */
163
    private function xffRateLimit(): void
164
    {
165
        $xff = $this->request->headers->get('x-forwarded-for', '');
166
167
        if ('' === $xff) {
168
            // Happens in local environments, or outside of Cloud Services.
169
            return;
170
        }
171
172
        $cacheKey = "ratelimit.session.".sha1($xff);
173
        $cacheItem = $this->cache->getItem($cacheKey);
174
175
        // If increment value already in cache, or start with 1.
176
        $count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1;
177
178
        // Check if limit has been exceeded, and if so, throw an error.
179
        if ($count > $this->rateLimit) {
180
            $this->denyAccess('Exceeded rate limitation');
181
        }
182
183
        // Reset the clock on every request.
184
        $cacheItem->set($count)
185
            ->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M'));
186
        $this->cache->save($cacheItem);
187
    }
188
189
    /**
190
     * Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log.
191
     * Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI
192
     * but with a different interface language, as happens when it is crawling the language dropdown in the UI.
193
     */
194
    private function logCrawlers(): void
195
    {
196
        $useLangMatches = [];
197
        $hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches);
198
199
        if (1 !== $hasMatch) {
200
            return;
201
        }
202
203
        $useLang = $useLangMatches[1];
204
205
        // Requesting a language that's different than that of the target project.
206
        if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) {
207
            return;
208
        }
209
210
        // We're trying to check if everything BUT the uselang has remained unchanged.
211
        $cacheUri = str_replace('uselang='.$useLang, '', $this->uri);
212
        $cacheKey = 'ratelimit.crawler.'.sha1($this->userAgent.$cacheUri);
213
        $cacheItem = $this->cache->getItem($cacheKey);
214
215
        // If increment value already in cache, or start with 1.
216
        $count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;
217
218
        // Check if limit has been exceeded, and if so, add a log entry.
219
        if ($count > 3) {
220
            $this->crawlerLogger->info('Possible crawler detected');
221
        }
222
223
        // Reset the clock on every request.
224
        $cacheItem->set($count)
225
            ->expiresAfter(new DateInterval(self::MAX_CRAWLER_TIME));
226
        $this->cache->save($cacheItem);
227
228
        // If we've got a lot of hits, let's go ahead and assume it's a crawler and give a 429.
229
        if ($count > self::MAX_CRAWLER_COUNT) {
230
            $this->denyAccess('Web crawler detected');
231
        }
232
    }
233
234
    /**
235
     * Check the request against denylisted URIs and user agents
236
     */
237
    private function checkDenylist(): void
238
    {
239
        // First check user agent and URI denylists.
240
        if (!$this->parameterBag->has('request_denylist')) {
241
            return;
242
        }
243
244
        $denylist = (array)$this->parameterBag->get('request_denylist');
245
246
        foreach ($denylist as $name => $item) {
247
            $matches = [];
248
249
            if (isset($item['user_agent'])) {
250
                $matches[] = $item['user_agent'] === $this->userAgent;
251
            }
252
            if (isset($item['user_agent_pattern'])) {
253
                $matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent);
254
            }
255
            if (isset($item['referer'])) {
256
                $matches[] = $item['referer'] === $this->referer;
257
            }
258
            if (isset($item['referer_pattern'])) {
259
                $matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer);
260
            }
261
            if (isset($item['uri'])) {
262
                $matches[] = $item['uri'] === $this->uri;
263
            }
264
            if (isset($item['uri_pattern'])) {
265
                $matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri);
266
            }
267
268
            if (count($matches) > 0 && count($matches) === count(array_filter($matches))) {
269
                $this->denyAccess("Matched denylist entry `$name`", true);
270
            }
271
        }
272
    }
273
274
    /**
275
     * Throw exception for denied access due to spider crawl or hitting usage limits.
276
     * @param string $logComment Comment to include with the log entry.
277
     * @param bool $denylist Changes the messaging to say access was denied due to abuse, rather than rate limiting.
278
     * @throws TooManyRequestsHttpException
279
     * @throws AccessDeniedHttpException
280
     */
281
    private function denyAccess(string $logComment, bool $denylist = false): void
282
    {
283
        // Log the denied request
284
        $logger = $denylist ? $this->denylistLogger : $this->rateLimitLogger;
285
        $logger->info($logComment);
286
287
        if ($denylist) {
288
            $message = $this->i18n->msg('error-denied', ['[email protected]']);
289
            throw new AccessDeniedHttpException($message, null, 999);
290
        }
291
292
        $message = $this->i18n->msg('error-rate-limit', [
293
            $this->rateDuration,
294
            "<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>",
295
            "<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" .
296
                $this->i18n->msg('api') .
297
            "</a>",
298
        ]);
299
300
        /**
301
         * TODO: Find a better way to do this.
302
         * 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having
303
         * fully safe messages that can be display with |raw. (In this case we authored the message).
304
         */
305
        throw new TooManyRequestsHttpException(600, $message, null, 999);
306
    }
307
}
308