Test Setup Failed
Pull Request — main (#426)
by MusikAnimal
17:10 queued 11:44
created

RateLimitSubscriber::checkBlacklist()   B

Complexity

Conditions 11
Paths 130

Size

Total Lines 33
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 19
nc 130
nop 0
dl 0
loc 33
rs 7.0666
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file contains only the RateLimitSubscriber class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace App\EventSubscriber;
9
10
use App\Controller\XtoolsController;
11
use App\Helper\I18nHelper;
12
use DateInterval;
13
use Symfony\Component\DependencyInjection\ContainerInterface;
14
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
15
use Symfony\Component\HttpFoundation\Request;
16
use Symfony\Component\HttpKernel\Event\ControllerEvent;
17
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
18
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
19
use Symfony\Component\HttpKernel\KernelEvents;
20
21
/**
22
 * A RateLimitSubscriber checks to see if users are exceeding usage limitations.
23
 */
24
class RateLimitSubscriber implements EventSubscriberInterface
25
{
26
27
    /** @var ContainerInterface The DI container. */
28
    protected $container;
29
30
    /** @var I18nHelper For i18n and l10n. */
31
    protected $i18n;
32
33
    /** @var int Number of requests allowed in time period */
34
    protected $rateLimit;
35
36
    /** @var int Number of minutes during which $rateLimit requests are permitted. */
37
    protected $rateDuration;
38
39
    /** @var \Symfony\Component\Cache\Adapter\TraceableAdapter Cache adapter. */
40
    protected $cache;
41
42
    /** @var Request The Request object. */
43
    protected $request;
44
45
    /** @var string User agent string. */
46
    protected $userAgent;
47
48
    /** @var string The referer string. */
49
    protected $referer;
50
51
    /** @var string The URI. */
52
    protected $uri;
53
54
    /**
55
     * Save the container for later use.
56
     * @param ContainerInterface $container The DI container.
57
     * @param I18nHelper $i18n
58
     */
59
    public function __construct(ContainerInterface $container, I18nHelper $i18n)
60
    {
61
        $this->container = $container;
62
        $this->i18n = $i18n;
63
    }
64
65
    /**
66
     * Register our interest in the kernel.controller event.
67
     * @return string[]
68
     */
69
    public static function getSubscribedEvents(): array
70
    {
71
        return [
72
            KernelEvents::CONTROLLER => 'onKernelController',
73
        ];
74
    }
75
76
    /**
77
     * Check if the current user has exceeded the configured usage limitations.
78
     * @param ControllerEvent $event The event.
79
     */
80
    public function onKernelController(ControllerEvent $event): void
81
    {
82
        $controller = $event->getController();
83
        $action = null;
84
85
        // when a controller class defines multiple action methods, the controller
86
        // is returned as [$controllerInstance, 'methodName']
87
        if (is_array($controller)) {
88
            [$controller, $action] = $controller;
89
        }
90
91
        if (!$controller instanceof XtoolsController) {
92
            return;
93
        }
94
95
        $this->cache = $this->container->get('cache.app');
96
        $this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count');
97
        $this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time');
98
        $this->request = $event->getRequest();
99
        $this->userAgent = (string)$this->request->headers->get('User-Agent');
100
        $this->referer = (string)$this->request->headers->get('referer');
101
        $this->uri = $this->request->getRequestUri();
102
103
        $this->checkBlacklist();
104
105
        // Zero values indicate the rate limiting feature should be disabled.
106
        if (0 === $this->rateLimit || 0 === $this->rateDuration) {
107
            return;
108
        }
109
110
        $loggedIn = (bool)$this->container->get('session')->get('logged_in_user');
111
        $isApi = 'ApiAction' === substr($action, -9);
112
113
        /**
114
         * Rate limiting will not apply to these actions
115
         * @var array
116
         */
117
        $actionWhitelist = [
118
            'indexAction', 'showAction', 'aboutAction', 'loginAction', 'recordUsageAction', 'oauthCallbackAction',
119
        ];
120
121
        // No rate limits on lightweight pages, logged in users, subrequests or API requests.
122
        if (in_array($action, $actionWhitelist) || $loggedIn || false === $event->isMasterRequest() || $isApi) {
123
            return;
124
        }
125
126
        $this->logCrawlers();
127
        $this->xffRateLimit();
128
    }
129
130
    /**
131
     * Don't let individual users hog up all the resources.
132
     */
133
    private function xffRateLimit(): void
134
    {
135
        $xff = $this->request->headers->get('x-forwarded-for', '');
136
137
        if ('' === $xff) {
138
            // Happens in local environments, or outside of Cloud Services.
139
            return;
140
        }
141
142
        $cacheKey = "ratelimit.session.".md5($xff);
143
        $cacheItem = $this->cache->getItem($cacheKey);
144
145
        // If increment value already in cache, or start with 1.
146
        $count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1;
147
148
        // Check if limit has been exceeded, and if so, throw an error.
149
        if ($count > $this->rateLimit) {
150
            $this->denyAccess('Exceeded rate limitation');
151
        }
152
153
        // Reset the clock on every request.
154
        $cacheItem->set($count)
155
            ->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M'));
156
        $this->cache->save($cacheItem);
157
    }
158
159
    /**
160
     * Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log.
161
     * Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI
162
     * but with a different interface language, as happens when it is crawling the language dropdown in the UI.
163
     */
164
    private function logCrawlers(): void
165
    {
166
        $useLangMatches = [];
167
        $hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches);
168
169
        if (1 !== $hasMatch) {
170
            return;
171
        }
172
173
        $useLang = $useLangMatches[1];
174
175
        // Requesting a language that's different than that of the target project.
176
        if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) {
177
            return;
178
        }
179
180
        // We're trying to check if everything BUT the uselang has remained unchanged.
181
        $cacheUri = str_replace('uselang='.$useLang, '', $this->uri);
182
        $cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri);
183
        $cacheItem = $this->cache->getItem($cacheKey);
184
185
        // If increment value already in cache, or start with 1.
186
        $count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;
187
188
        // Check if limit has been exceeded, and if so, add a log entry.
189
        if ($count > 3) {
190
            $logger = $this->container->get('monolog.logger.crawler');
191
            $logger->info('Possible crawler detected');
192
        }
193
194
        // Reset the clock on every request.
195
        $cacheItem->set($count)
196
            ->expiresAfter(new DateInterval('PT1M'));
197
        $this->cache->save($cacheItem);
198
    }
199
200
    /**
201
     * Check the request against blacklisted URIs and user agents
202
     */
203
    private function checkBlacklist(): void
204
    {
205
        // First check user agent and URI blacklists
206
        if (!$this->container->hasParameter('request_blacklist')) {
207
            return;
208
        }
209
210
        $blacklist = (array)$this->container->getParameter('request_blacklist');
211
212
        foreach ($blacklist as $name => $item) {
213
            $matches = [];
214
215
            if (isset($item['user_agent'])) {
216
                $matches[] = $item['user_agent'] === $this->userAgent;
217
            }
218
            if (isset($item['user_agent_pattern'])) {
219
                $matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent);
220
            }
221
            if (isset($item['referer'])) {
222
                $matches[] = $item['referer'] === $this->referer;
223
            }
224
            if (isset($item['referer_pattern'])) {
225
                $matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer);
226
            }
227
            if (isset($item['uri'])) {
228
                $matches[] = $item['uri'] === $this->uri;
229
            }
230
            if (isset($item['uri_pattern'])) {
231
                $matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri);
232
            }
233
234
            if (count($matches) > 0 && count($matches) === count(array_filter($matches))) {
235
                $this->denyAccess("Matched blacklist entry `$name`", true);
236
            }
237
        }
238
    }
239
240
    /**
241
     * Throw exception for denied access due to spider crawl or hitting usage limits.
242
     * @param string $logComment Comment to include with the log entry.
243
     * @param bool $blacklist Changes the messaging to say access was denied due to abuse, rather than rate limiting.
244
     * @throws TooManyRequestsHttpException
245
     * @throws AccessDeniedHttpException
246
     */
247
    private function denyAccess(string $logComment, bool $blacklist = false): void
248
    {
249
        // Log the denied request
250
        $logger = $this->container->get($blacklist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit');
251
        $logger->info($logComment);
252
253
        if ($blacklist) {
254
            $message = $this->i18n->msg('error-denied', ['[email protected]']);
255
            throw new AccessDeniedHttpException($message, null, 999);
256
        }
257
258
        $message = $this->i18n->msg('error-rate-limit', [
259
            $this->rateDuration,
260
            "<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>",
261
            "<a href='https://www.mediawiki.org/wiki/Special:MyLanguage/XTools/API' target='_blank'>" .
262
                $this->i18n->msg('api') .
263
            "</a>",
264
        ]);
265
266
        /**
267
         * TODO: Find a better way to do this.
268
         * 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having
269
         * fully safe messages that can be display with |raw. (In this case we authored the message).
270
         */
271
        throw new TooManyRequestsHttpException(600, $message, null, 999);
272
    }
273
}
274