Passed
Pull Request — master (#376)
by MusikAnimal
10:04 queued 21s
created

RateLimitSubscriber   A

Complexity

Total Complexity 32

Size/Duplication

Total Lines 234
Duplicated Lines 0 %

Test Coverage

Coverage 8.79%

Importance

Changes 6
Bugs 0 Features 0
Metric Value
eloc 92
c 6
b 0
f 0
dl 0
loc 234
ccs 8
cts 91
cp 0.0879
rs 9.84
wmc 32

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A sessionRateLimit() 0 18 3
A logCrawlers() 0 34 5
A getSubscribedEvents() 0 4 1
A denyAccess() 0 25 3
B checkBlacklist() 0 33 11
B onKernelController() 0 40 8
1
<?php
2
/**
3
 * This file contains only the RateLimitSubscriber class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\EventSubscriber;
9
10
use AppBundle\Controller\XtoolsController;
11
use AppBundle\Helper\I18nHelper;
12
use DateInterval;
13
use Symfony\Component\DependencyInjection\ContainerInterface;
14
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
15
use Symfony\Component\HttpFoundation\Request;
16
use Symfony\Component\HttpKernel\Event\ControllerEvent;
17
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
18
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
19
use Symfony\Component\HttpKernel\KernelEvents;
20
21
/**
22
 * A RateLimitSubscriber checks to see if users are exceeding usage limitations.
23
 */
24
class RateLimitSubscriber implements EventSubscriberInterface
25
{
26
27
    /** @var ContainerInterface The DI container. */
28
    protected $container;
29
30
    /** @var I18nHelper For i18n and l10n. */
31
    protected $i18n;
32
33
    /** @var int Number of requests allowed in time period */
34
    protected $rateLimit;
35
36
    /** @var int Number of minutes during which $rateLimit requests are permitted. */
37
    protected $rateDuration;
38
39
    /** @var \Symfony\Component\Cache\Adapter\TraceableAdapter Cache adapter. */
40
    protected $cache;
41
42
    /** @var Request The Request object. */
43
    protected $request;
44
45
    /** @var string User agent string. */
46
    protected $userAgent;
47
48
    /** @var string The referer string. */
49
    protected $referer;
50
51
    /** @var string The URI. */
52
    protected $uri;
53
54
    /**
55
     * Save the container for later use.
56
     * @param ContainerInterface $container The DI container.
57
     * @param I18nHelper $i18n
58
     */
59 15
    public function __construct(ContainerInterface $container, I18nHelper $i18n)
60
    {
61 15
        $this->container = $container;
62 15
        $this->i18n = $i18n;
63 15
    }
64
65
    /**
66
     * Register our interest in the kernel.controller event.
67
     * @return string[]
68
     */
69
    public static function getSubscribedEvents(): array
70
    {
71
        return [
72
            KernelEvents::CONTROLLER => 'onKernelController',
73
        ];
74
    }
75
76
    /**
77
     * Check if the current user has exceeded the configured usage limitations.
78
     * @param ControllerEvent $event The event.
79
     */
80 15
    public function onKernelController(ControllerEvent $event): void
81
    {
82 15
        $controller = $event->getController();
83 15
        if (!$controller instanceof XtoolsController) {
84 15
            return;
85
        }
86
87
        $this->cache = $this->container->get('cache.app');
88
        $this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count');
89
        $this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time');
90
        $this->request = $event->getRequest();
91
        $this->userAgent = (string)$this->request->headers->get('User-Agent');
92
        $this->referer = (string)$this->request->headers->get('referer');
93
        $this->uri = $this->request->getRequestUri();
94
95
        $this->checkBlacklist();
96
97
        // Zero values indicate the rate limiting feature should be disabled.
98
        if (0 === $this->rateLimit || 0 === $this->rateDuration) {
99
            return;
100
        }
101
102
        $loggedIn = (bool)$this->container->get('session')->get('logged_in_user');
103
        $isApi = 'ApiAction' === substr($controller[1], -9);
104
105
        /**
106
         * Rate limiting will not apply to these actions
107
         * @var array
108
         */
109
        $actionWhitelist = [
110
            'indexAction', 'showAction', 'aboutAction', 'loginAction', 'recordUsageAction', 'oauthCallbackAction',
111
        ];
112
113
        // No rate limits on lightweight pages, logged in users, subrequests or API requests.
114
        if (in_array($controller[1], $actionWhitelist) || $loggedIn || false === $event->isMasterRequest() || $isApi) {
115
            return;
116
        }
117
118
        $this->logCrawlers();
119
        $this->sessionRateLimit();
120
    }
121
122
    /**
123
     * Don't let individual users hog up all the resources.
124
     */
125
    private function sessionRateLimit(): void
126
    {
127
        $sessionId = $this->request->getSession()->getId();
128
        $cacheKey = "ratelimit.session.$sessionId";
129
        $cacheItem = $this->cache->getItem($cacheKey);
130
131
        // If increment value already in cache, or start with 1.
132
        $count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1;
133
134
        // Check if limit has been exceeded, and if so, throw an error.
135
        if ($count > $this->rateLimit) {
136
            $this->denyAccess('Exceeded rate limitation');
137
        }
138
139
        // Reset the clock on every request.
140
        $cacheItem->set($count)
141
            ->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M'));
142
        $this->cache->save($cacheItem);
143
    }
144
145
    /**
146
     * Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log.
147
     * Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI
148
     * but with a different interface language, as happens when it is crawling the language dropdown in the UI.
149
     */
150
    private function logCrawlers(): void
151
    {
152
        $useLangMatches = [];
153
        $hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches);
154
155
        if (1 !== $hasMatch) {
156
            return;
157
        }
158
159
        $useLang = $useLangMatches[1];
160
161
        // Requesting a language that's different than that of the target project.
162
        if (1 === preg_match("/[=\/]$useLang.wik/", $this->uri)) {
163
            return;
164
        }
165
166
        // We're trying to check if everything BUT the uselang has remained unchanged.
167
        $cacheUri = str_replace('uselang='.$useLang, '', $this->uri);
168
        $cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri);
169
        $cacheItem = $this->cache->getItem($cacheKey);
170
171
        // If increment value already in cache, or start with 1.
172
        $count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;
173
174
        // Check if limit has been exceeded, and if so, add a log entry.
175
        if ($count > 3) {
176
            $logger = $this->container->get('monolog.logger.crawler');
177
            $logger->info('Possible crawler detected');
178
        }
179
180
        // Reset the clock on every request.
181
        $cacheItem->set($count)
182
            ->expiresAfter(new DateInterval('PT1M'));
183
        $this->cache->save($cacheItem);
184
    }
185
186
    /**
187
     * Check the request against blacklisted URIs and user agents
188
     */
189
    private function checkBlacklist(): void
190
    {
191
        // First check user agent and URI blacklists
192
        if (!$this->container->hasParameter('request_blacklist')) {
193
            return;
194
        }
195
196
        $blacklist = (array)$this->container->getParameter('request_blacklist');
197
198
        foreach ($blacklist as $name => $item) {
199
            $matches = [];
200
201
            if (isset($item['user_agent'])) {
202
                $matches[] = $item['user_agent'] === $this->userAgent;
203
            }
204
            if (isset($item['user_agent_pattern'])) {
205
                $matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent);
206
            }
207
            if (isset($item['referer'])) {
208
                $matches[] = $item['referer'] === $this->referer;
209
            }
210
            if (isset($item['referer_pattern'])) {
211
                $matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer);
212
            }
213
            if (isset($item['uri'])) {
214
                $matches[] = $item['uri'] === $this->uri;
215
            }
216
            if (isset($item['uri_pattern'])) {
217
                $matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri);
218
            }
219
220
            if (count($matches) > 0 && count($matches) === count(array_filter($matches))) {
221
                $this->denyAccess("Matched blacklist entry `$name`", true);
222
            }
223
        }
224
    }
225
226
    /**
227
     * Throw exception for denied access due to spider crawl or hitting usage limits.
228
     * @param string $logComment Comment to include with the log entry.
229
     * @param bool $blacklist Changes the messaging to say access was denied due to abuse, rather than rate limiting.
230
     * @throws TooManyRequestsHttpException
231
     * @throws AccessDeniedHttpException
232
     */
233
    private function denyAccess(string $logComment, bool $blacklist = false): void
234
    {
235
        // Log the denied request
236
        $logger = $this->container->get($blacklist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit');
237
        $logger->info($logComment);
238
239
        if ($blacklist) {
240
            $message = $this->i18n->msg('error-denied', ['[email protected]']);
241
            throw new AccessDeniedHttpException($message, null, 999);
242
        }
243
244
        $message = $this->i18n->msg('error-rate-limit', [
245
            $this->rateDuration,
246
            "<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>",
247
            "<a href='https://xtools.readthedocs.io/en/stable/api' target='_blank'>" .
248
                $this->i18n->msg('api') .
249
            "</a>",
250
        ]);
251
252
        /**
253
         * TODO: Find a better way to do this.
254
         * 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having
255
         * fully safe messages that can be display with |raw. (In this case we authored the message).
256
         */
257
        throw new TooManyRequestsHttpException(600, $message, null, 999);
258
    }
259
}
260