Passed
Pull Request — main (#442)
by MusikAnimal
08:15 queued 04:14
created

RateLimitSubscriber::checkDenylist()   B

Complexity

Conditions 11
Paths 130

Size

Total Lines 33
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 19
nc 130
nop 0
dl 0
loc 33
rs 7.0666
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\EventSubscriber;
6
7
use App\Controller\XtoolsController;
8
use App\Helper\I18nHelper;
9
use DateInterval;
10
use Psr\Cache\CacheItemPoolInterface;
11
use Psr\Container\ContainerInterface;
12
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
13
use Symfony\Component\HttpFoundation\Request;
14
use Symfony\Component\HttpKernel\Event\ControllerEvent;
15
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
16
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
17
use Symfony\Component\HttpKernel\KernelEvents;
18
19
/**
20
 * A RateLimitSubscriber checks to see if users are exceeding usage limitations.
21
 */
22
class RateLimitSubscriber implements EventSubscriberInterface
23
{
24
    /**
25
     * Rate limiting will not apply to these actions.
26
     */
27
    public const ACTION_ALLOWLIST = [
28
        'aboutAction',
29
        'indexAction',
30
        'loginAction',
31
        'oauthCallbackAction',
32
        'recordUsageAction',
33
        'showAction',
34
    ];
35
36
    protected CacheItemPoolInterface $cache;
37
    protected ContainerInterface $container;
38
    protected I18nHelper $i18n;
39
    protected Request $request;
40
41
    /** @var int Number of requests allowed in time period */
42
    protected int $rateLimit;
43
44
    /** @var int Number of minutes during which $rateLimit requests are permitted. */
45
    protected int $rateDuration;
46
47
    /** @var string User agent string. */
48
    protected string $userAgent;
49
50
    /** @var string The referer string. */
51
    protected string $referer;
52
53
    /** @var string The URI. */
54
    protected string $uri;
55
56
    /**
57
     * Save the container for later use.
58
     * @param ContainerInterface $container The DI container.
59
     * @param I18nHelper $i18n
60
     * @param CacheItemPoolInterface $cache
61
     */
62
    public function __construct(ContainerInterface $container, I18nHelper $i18n, CacheItemPoolInterface $cache)
63
    {
64
        $this->container = $container;
65
        $this->i18n = $i18n;
66
        $this->cache = $cache;
67
    }
68
69
    /**
70
     * Register our interest in the kernel.controller event.
71
     * @return string[]
72
     */
73
    public static function getSubscribedEvents(): array
74
    {
75
        return [
76
            KernelEvents::CONTROLLER => 'onKernelController',
77
        ];
78
    }
79
80
    /**
81
     * Check if the current user has exceeded the configured usage limitations.
82
     * @param ControllerEvent $event The event.
83
     */
84
    public function onKernelController(ControllerEvent $event): void
85
    {
86
        $controller = $event->getController();
87
        $action = null;
88
89
        // when a controller class defines multiple action methods, the controller
90
        // is returned as [$controllerInstance, 'methodName']
91
        if (is_array($controller)) {
92
            [$controller, $action] = $controller;
93
        }
94
95
        if (!$controller instanceof XtoolsController) {
96
            return;
97
        }
98
99
        $this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count');
0 ignored issues
show
Bug introduced by
The method getParameter() does not exist on Psr\Container\ContainerInterface. It seems like you code against a sub-type of Psr\Container\ContainerInterface such as Symfony\Component\Depend...tion\ContainerInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

99
        $this->rateLimit = (int)$this->container->/** @scrutinizer ignore-call */ getParameter('app.rate_limit_count');
Loading history...
100
        $this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time');
101
        $this->request = $event->getRequest();
102
        $this->userAgent = (string)$this->request->headers->get('User-Agent');
103
        $this->referer = (string)$this->request->headers->get('referer');
104
        $this->uri = $this->request->getRequestUri();
105
106
        $this->checkDenylist();
107
108
        // Zero values indicate the rate limiting feature should be disabled.
109
        if (0 === $this->rateLimit || 0 === $this->rateDuration) {
110
            return;
111
        }
112
113
        $loggedIn = (bool)$this->container->get('session')->get('logged_in_user');
114
        $isApi = 'ApiAction' === substr($action, -9);
115
116
        // No rate limits on lightweight pages, logged in users, subrequests or API requests.
117
        if (in_array($action, self::ACTION_ALLOWLIST) || $loggedIn || false === $event->isMasterRequest() || $isApi) {
118
            return;
119
        }
120
121
        $this->logCrawlers();
122
        $this->xffRateLimit();
123
    }
124
125
    /**
126
     * Don't let individual users hog up all the resources.
127
     */
128
    private function xffRateLimit(): void
129
    {
130
        $xff = $this->request->headers->get('x-forwarded-for', '');
131
132
        if ('' === $xff) {
133
            // Happens in local environments, or outside of Cloud Services.
134
            return;
135
        }
136
137
        $cacheKey = "ratelimit.session.".md5($xff);
138
        $cacheItem = $this->cache->getItem($cacheKey);
139
140
        // If increment value already in cache, or start with 1.
141
        $count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1;
142
143
        // Check if limit has been exceeded, and if so, throw an error.
144
        if ($count > $this->rateLimit) {
145
            $this->denyAccess('Exceeded rate limitation');
146
        }
147
148
        // Reset the clock on every request.
149
        $cacheItem->set($count)
150
            ->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M'));
151
        $this->cache->save($cacheItem);
152
    }
153
154
    /**
155
     * Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log.
156
     * Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI
157
     * but with a different interface language, as happens when it is crawling the language dropdown in the UI.
158
     */
159
    private function logCrawlers(): void
160
    {
161
        $useLangMatches = [];
162
        $hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches);
163
164
        if (1 !== $hasMatch) {
165
            return;
166
        }
167
168
        $useLang = $useLangMatches[1];
169
170
        // Requesting a language that's different than that of the target project.
171
        if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) {
172
            return;
173
        }
174
175
        // We're trying to check if everything BUT the uselang has remained unchanged.
176
        $cacheUri = str_replace('uselang='.$useLang, '', $this->uri);
177
        $cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri);
178
        $cacheItem = $this->cache->getItem($cacheKey);
179
180
        // If increment value already in cache, or start with 1.
181
        $count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;
182
183
        // Check if limit has been exceeded, and if so, add a log entry.
184
        if ($count > 3) {
185
            $logger = $this->container->get('monolog.logger.crawler');
186
            $logger->info('Possible crawler detected');
187
        }
188
189
        // Reset the clock on every request.
190
        $cacheItem->set($count)
191
            ->expiresAfter(new DateInterval('PT1M'));
192
        $this->cache->save($cacheItem);
193
    }
194
195
    /**
196
     * Check the request against denylisted URIs and user agents
197
     */
198
    private function checkDenylist(): void
199
    {
200
        // First check user agent and URI blacklists
201
        if (!$this->container->hasParameter('request_blacklist')) {
0 ignored issues
show
Bug introduced by
The method hasParameter() does not exist on Psr\Container\ContainerInterface. It seems like you code against a sub-type of Psr\Container\ContainerInterface such as Symfony\Component\Depend...tion\ContainerInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

201
        if (!$this->container->/** @scrutinizer ignore-call */ hasParameter('request_blacklist')) {
Loading history...
202
            return;
203
        }
204
205
        $denylist = (array)$this->container->getParameter('request_blacklist');
206
207
        foreach ($denylist as $name => $item) {
208
            $matches = [];
209
210
            if (isset($item['user_agent'])) {
211
                $matches[] = $item['user_agent'] === $this->userAgent;
212
            }
213
            if (isset($item['user_agent_pattern'])) {
214
                $matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent);
215
            }
216
            if (isset($item['referer'])) {
217
                $matches[] = $item['referer'] === $this->referer;
218
            }
219
            if (isset($item['referer_pattern'])) {
220
                $matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer);
221
            }
222
            if (isset($item['uri'])) {
223
                $matches[] = $item['uri'] === $this->uri;
224
            }
225
            if (isset($item['uri_pattern'])) {
226
                $matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri);
227
            }
228
229
            if (count($matches) > 0 && count($matches) === count(array_filter($matches))) {
230
                $this->denyAccess("Matched denylist entry `$name`", true);
231
            }
232
        }
233
    }
234
235
    /**
236
     * Throw exception for denied access due to spider crawl or hitting usage limits.
237
     * @param string $logComment Comment to include with the log entry.
238
     * @param bool $denylist Changes the messaging to say access was denied due to abuse, rather than rate limiting.
239
     * @throws TooManyRequestsHttpException
240
     * @throws AccessDeniedHttpException
241
     */
242
    private function denyAccess(string $logComment, bool $denylist = false): void
243
    {
244
        // Log the denied request
245
        $logger = $this->container->get($denylist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit');
246
        $logger->info($logComment);
247
248
        if ($denylist) {
249
            $message = $this->i18n->msg('error-denied', ['[email protected]']);
250
            throw new AccessDeniedHttpException($message, null, 999);
251
        }
252
253
        $message = $this->i18n->msg('error-rate-limit', [
254
            $this->rateDuration,
255
            "<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>",
256
            "<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" .
257
                $this->i18n->msg('api') .
258
            "</a>",
259
        ]);
260
261
        /**
262
         * TODO: Find a better way to do this.
263
         * 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having
264
         * fully safe messages that can be display with |raw. (In this case we authored the message).
265
         */
266
        throw new TooManyRequestsHttpException(600, $message, null, 999);
267
    }
268
}
269