Passed
Push — main ( c369f5...5a9707 )
by MusikAnimal
13:18 queued 03:12
created

RateLimitSubscriber::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 2
c 1
b 0
f 0
nc 1
nop 2
dl 0
loc 4
ccs 3
cts 3
cp 1
crap 1
rs 10
1
<?php
2
/**
3
 * This file contains only the RateLimitSubscriber class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\EventSubscriber;
9
10
use AppBundle\Controller\XtoolsController;
11
use AppBundle\Helper\I18nHelper;
12
use DateInterval;
13
use Symfony\Component\DependencyInjection\ContainerInterface;
14
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
15
use Symfony\Component\HttpFoundation\Request;
16
use Symfony\Component\HttpKernel\Event\ControllerEvent;
17
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
18
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
19
use Symfony\Component\HttpKernel\KernelEvents;
20
21
/**
22
 * A RateLimitSubscriber checks to see if users are exceeding usage limitations.
23
 */
24
class RateLimitSubscriber implements EventSubscriberInterface
25
{
26
27
    /** @var ContainerInterface The DI container. */
28
    protected $container;
29
30
    /** @var I18nHelper For i18n and l10n. */
31
    protected $i18n;
32
33
    /** @var int Number of requests allowed in time period */
34
    protected $rateLimit;
35
36
    /** @var int Number of minutes during which $rateLimit requests are permitted. */
37
    protected $rateDuration;
38
39
    /** @var \Symfony\Component\Cache\Adapter\TraceableAdapter Cache adapter. */
40
    protected $cache;
41
42
    /** @var Request The Request object. */
43
    protected $request;
44
45
    /** @var string User agent string. */
46
    protected $userAgent;
47
48
    /** @var string The referer string. */
49
    protected $referer;
50
51
    /** @var string The URI. */
52
    protected $uri;
53
54
    /**
55
     * Save the container for later use.
56
     * @param ContainerInterface $container The DI container.
57
     * @param I18nHelper $i18n
58
     */
59 14
    public function __construct(ContainerInterface $container, I18nHelper $i18n)
60
    {
61 14
        $this->container = $container;
62 14
        $this->i18n = $i18n;
63 14
    }
64
65
    /**
66
     * Register our interest in the kernel.controller event.
67
     * @return string[]
68
     */
69 1
    public static function getSubscribedEvents(): array
70
    {
71
        return [
72 1
            KernelEvents::CONTROLLER => 'onKernelController',
73
        ];
74
    }
75
76
    /**
77
     * Check if the current user has exceeded the configured usage limitations.
78
     * @param ControllerEvent $event The event.
79
     */
80 14
    public function onKernelController(ControllerEvent $event): void
81
    {
82 14
        $controller = $event->getController();
83 14
        $action = null;
84
85
        // when a controller class defines multiple action methods, the controller
86
        // is returned as [$controllerInstance, 'methodName']
87 14
        if (is_array($controller)) {
88 14
            [$controller, $action] = $controller;
89
        }
90
91 14
        if (!$controller instanceof XtoolsController) {
92 1
            return;
93
        }
94
95 14
        $this->cache = $this->container->get('cache.app');
96 14
        $this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count');
97 14
        $this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time');
98 14
        $this->request = $event->getRequest();
99 14
        $this->userAgent = (string)$this->request->headers->get('User-Agent');
100 14
        $this->referer = (string)$this->request->headers->get('referer');
101 14
        $this->uri = $this->request->getRequestUri();
102
103 14
        $this->checkBlacklist();
104
105
        // Zero values indicate the rate limiting feature should be disabled.
106 14
        if (0 === $this->rateLimit || 0 === $this->rateDuration) {
107 14
            return;
108
        }
109
110
        $loggedIn = (bool)$this->container->get('session')->get('logged_in_user');
111
        $isApi = 'ApiAction' === substr($action, -9);
112
113
        /**
114
         * Rate limiting will not apply to these actions
115
         * @var array
116
         */
117
        $actionWhitelist = [
118
            'indexAction', 'showAction', 'aboutAction', 'loginAction', 'recordUsageAction', 'oauthCallbackAction',
119
        ];
120
121
        // No rate limits on lightweight pages, logged in users, subrequests or API requests.
122
        if (in_array($action, $actionWhitelist) || $loggedIn || false === $event->isMasterRequest() || $isApi) {
123
            return;
124
        }
125
126
        $this->logCrawlers();
127
        $this->sessionRateLimit();
128
    }
129
130
    /**
131
     * Don't let individual users hog up all the resources.
132
     */
133
    private function sessionRateLimit(): void
134
    {
135
        $sessionId = $this->request->getSession()->getId();
136
        $cacheKey = "ratelimit.session.$sessionId";
137
        $cacheItem = $this->cache->getItem($cacheKey);
138
139
        // If increment value already in cache, or start with 1.
140
        $count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1;
141
142
        // Check if limit has been exceeded, and if so, throw an error.
143
        if ($count > $this->rateLimit) {
144
            $this->denyAccess('Exceeded rate limitation');
145
        }
146
147
        // Reset the clock on every request.
148
        $cacheItem->set($count)
149
            ->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M'));
150
        $this->cache->save($cacheItem);
151
    }
152
153
    /**
154
     * Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log.
155
     * Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI
156
     * but with a different interface language, as happens when it is crawling the language dropdown in the UI.
157
     */
158
    private function logCrawlers(): void
159
    {
160
        $useLangMatches = [];
161
        $hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches);
162
163
        if (1 !== $hasMatch) {
164
            return;
165
        }
166
167
        $useLang = $useLangMatches[1];
168
169
        // Requesting a language that's different than that of the target project.
170
        if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) {
171
            return;
172
        }
173
174
        // We're trying to check if everything BUT the uselang has remained unchanged.
175
        $cacheUri = str_replace('uselang='.$useLang, '', $this->uri);
176
        $cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri);
177
        $cacheItem = $this->cache->getItem($cacheKey);
178
179
        // If increment value already in cache, or start with 1.
180
        $count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;
181
182
        // Check if limit has been exceeded, and if so, add a log entry.
183
        if ($count > 3) {
184
            $logger = $this->container->get('monolog.logger.crawler');
185
            $logger->info('Possible crawler detected');
186
        }
187
188
        // Reset the clock on every request.
189
        $cacheItem->set($count)
190
            ->expiresAfter(new DateInterval('PT1M'));
191
        $this->cache->save($cacheItem);
192
    }
193
194
    /**
195
     * Check the request against blacklisted URIs and user agents
196
     */
197 14
    private function checkBlacklist(): void
198
    {
199
        // First check user agent and URI blacklists
200 14
        if (!$this->container->hasParameter('request_blacklist')) {
201 14
            return;
202
        }
203
204
        $blacklist = (array)$this->container->getParameter('request_blacklist');
205
206
        foreach ($blacklist as $name => $item) {
207
            $matches = [];
208
209
            if (isset($item['user_agent'])) {
210
                $matches[] = $item['user_agent'] === $this->userAgent;
211
            }
212
            if (isset($item['user_agent_pattern'])) {
213
                $matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent);
214
            }
215
            if (isset($item['referer'])) {
216
                $matches[] = $item['referer'] === $this->referer;
217
            }
218
            if (isset($item['referer_pattern'])) {
219
                $matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer);
220
            }
221
            if (isset($item['uri'])) {
222
                $matches[] = $item['uri'] === $this->uri;
223
            }
224
            if (isset($item['uri_pattern'])) {
225
                $matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri);
226
            }
227
228
            if (count($matches) > 0 && count($matches) === count(array_filter($matches))) {
229
                $this->denyAccess("Matched blacklist entry `$name`", true);
230
            }
231
        }
232
    }
233
234
    /**
235
     * Throw exception for denied access due to spider crawl or hitting usage limits.
236
     * @param string $logComment Comment to include with the log entry.
237
     * @param bool $blacklist Changes the messaging to say access was denied due to abuse, rather than rate limiting.
238
     * @throws TooManyRequestsHttpException
239
     * @throws AccessDeniedHttpException
240
     */
241
    private function denyAccess(string $logComment, bool $blacklist = false): void
242
    {
243
        // Log the denied request
244
        $logger = $this->container->get($blacklist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit');
245
        $logger->info($logComment);
246
247
        if ($blacklist) {
248
            $message = $this->i18n->msg('error-denied', ['[email protected]']);
249
            throw new AccessDeniedHttpException($message, null, 999);
250
        }
251
252
        $message = $this->i18n->msg('error-rate-limit', [
253
            $this->rateDuration,
254
            "<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>",
255
            "<a href='https://xtools.readthedocs.io/en/stable/api' target='_blank'>" .
256
                $this->i18n->msg('api') .
257
            "</a>",
258
        ]);
259
260
        /**
261
         * TODO: Find a better way to do this.
262
         * 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having
263
         * fully safe messages that can be display with |raw. (In this case we authored the message).
264
         */
265
        throw new TooManyRequestsHttpException(600, $message, null, 999);
266
    }
267
}
268