Passed
Push — master ( 070649...43936c )
by MusikAnimal
12:20
created

RateLimitSubscriber::logCrawlers()   A

Complexity

Conditions 5
Paths 6

Size

Total Lines 34
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 30

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 5
eloc 17
c 1
b 0
f 0
nc 6
nop 0
dl 0
loc 34
ccs 0
cts 18
cp 0
crap 30
rs 9.3888
1
<?php
2
/**
3
 * This file contains only the RateLimitSubscriber class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\EventSubscriber;
9
10
use AppBundle\Helper\I18nHelper;
11
use DateInterval;
12
use Symfony\Component\DependencyInjection\ContainerInterface;
13
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
14
use Symfony\Component\HttpFoundation\Request;
15
use Symfony\Component\HttpKernel\Event\FilterControllerEvent;
16
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
17
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
18
use Symfony\Component\HttpKernel\KernelEvents;
19
20
/**
21
 * A RateLimitSubscriber checks to see if users are exceeding usage limitations.
22
 */
23
class RateLimitSubscriber implements EventSubscriberInterface
24
{
25
26
    /** @var ContainerInterface The DI container. */
27
    protected $container;
28
29
    /** @var I18nHelper For i18n and l10n. */
30
    protected $i18n;
31
32
    /** @var int Number of requests allowed in time period */
33
    protected $rateLimit;
34
35
    /** @var int Number of minutes during which $rateLimit requests are permitted. */
36
    protected $rateDuration;
37
38
    /** @var \Symfony\Component\Cache\Adapter\TraceableAdapter Cache adapter. */
39
    protected $cache;
40
41
    /** @var Request The Request object. */
42
    protected $request;
43
44
    /** @var string User agent string. */
45
    protected $userAgent;
46
47
    /** @var string The referer string. */
48
    protected $referer;
49
50
    /** @var string The URI. */
51
    protected $uri;
52
53
    /**
54
     * Save the container for later use.
55
     * @param ContainerInterface $container The DI container.
56
     * @param I18nHelper $i18n
57
     */
58 15
    public function __construct(ContainerInterface $container, I18nHelper $i18n)
59
    {
60 15
        $this->container = $container;
61 15
        $this->i18n = $i18n;
62 15
    }
63
64
    /**
65
     * Register our interest in the kernel.controller event.
66
     * @return string[]
67
     */
68 1
    public static function getSubscribedEvents(): array
69
    {
70
        return [
71 1
            KernelEvents::CONTROLLER => 'onKernelController',
72
        ];
73
    }
74
75
    /**
76
     * Check if the current user has exceeded the configured usage limitations.
77
     * @param FilterControllerEvent $event The event.
78
     */
79 15
    public function onKernelController(FilterControllerEvent $event): void
80
    {
81 15
        $this->cache = $this->container->get('cache.app');
82 15
        $this->rateLimit = (int)$this->container->getParameter('app.rate_limit_count');
83 15
        $this->rateDuration = (int)$this->container->getParameter('app.rate_limit_time');
84 15
        $this->request = $event->getRequest();
85 15
        $this->userAgent = (string)$this->request->headers->get('User-Agent');
86 15
        $this->referer = (string)$this->request->headers->get('referer');
87 15
        $this->uri = $this->request->getRequestUri();
88
89 15
        $this->checkBlacklist();
90
91
        // Zero values indicate the rate limiting feature should be disabled.
92 15
        if (0 === $this->rateLimit || 0 === $this->rateDuration) {
93 15
            return;
94
        }
95
96
        $controller = $event->getController();
97
        $loggedIn = (bool)$this->container->get('session')->get('logged_in_user');
98
        $isApi = 'ApiAction' === substr($controller[1], -9);
99
100
        /**
101
         * Rate limiting will not apply to these actions
102
         * @var array
103
         */
104
        $actionWhitelist = [
105
            'indexAction', 'showAction', 'aboutAction', 'loginAction', 'recordUsageAction', 'oauthCallbackAction',
106
        ];
107
108
        // No rate limits on lightweight pages, logged in users, subrequests or API requests.
109
        if (in_array($controller[1], $actionWhitelist) || $loggedIn || false === $event->isMasterRequest() || $isApi) {
110
            return;
111
        }
112
113
        $this->logCrawlers();
114
        $this->sessionRateLimit();
115
    }
116
117
    /**
118
     * Don't let individual users hog up all the resources.
119
     */
120
    private function sessionRateLimit(): void
121
    {
122
        $sessionId = $this->request->getSession()->getId();
123
        $cacheKey = "ratelimit.session.$sessionId";
124
        $cacheItem = $this->cache->getItem($cacheKey);
125
126
        // If increment value already in cache, or start with 1.
127
        $count = $cacheItem->isHit() ? (int) $cacheItem->get() + 1 : 1;
128
129
        // Check if limit has been exceeded, and if so, throw an error.
130
        if ($count > $this->rateLimit) {
131
            $this->denyAccess('Exceeded rate limitation');
132
        }
133
134
        // Reset the clock on every request.
135
        $cacheItem->set($count)
136
            ->expiresAfter(new DateInterval('PT'.$this->rateDuration.'M'));
137
        $this->cache->save($cacheItem);
138
    }
139
140
    /**
141
     * Detect possible web crawlers and log the requests, and log them to /var/logs/crawlers.log.
142
     * Crawlers typically click on every visible link on the page, so we check for rapid requests to the same URI
143
     * but with a different interface language, as happens when it is crawling the language dropdown in the UI.
144
     */
145
    private function logCrawlers(): void
146
    {
147
        $useLangMatches = [];
148
        $hasMatch = preg_match('/\?uselang=(.*)/', $this->uri, $useLangMatches);
149
150
        if (1 !== $hasMatch) {
151
            return;
152
        }
153
154
        $useLang = $useLangMatches[1];
155
156
        // Requesting a language that's different than that of the target project.
157
        if (1 === preg_match("/[\=\/]$useLang.wik/", $this->uri)) {
158
            return;
159
        }
160
161
        // We're trying to check if everything BUT the uselang has remained unchanged.
162
        $cacheUri = str_replace('uselang='.$useLang, '', $this->uri);
163
        $cacheKey = 'ratelimit.crawler.'.md5($this->userAgent.$cacheUri);
164
        $cacheItem = $this->cache->getItem($cacheKey);
165
166
        // If increment value already in cache, or start with 1.
167
        $count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;
168
169
        // Check if limit has been exceeded, and if so, add a log entry.
170
        if ($count > 3) {
171
            $logger = $this->container->get('monolog.logger.crawler');
172
            $logger->info('Possible crawler detected');
173
        }
174
175
        // Reset the clock on every request.
176
        $cacheItem->set($count)
177
            ->expiresAfter(new DateInterval('PT1M'));
178
        $this->cache->save($cacheItem);
179
    }
180
181
    /**
182
     * Check the request against blacklisted URIs and user agents
183
     */
184 15
    private function checkBlacklist(): void
185
    {
186
        // First check user agent and URI blacklists
187 15
        if (!$this->container->hasParameter('request_blacklist')) {
188 15
            return;
189
        }
190
191
        $blacklist = (array)$this->container->getParameter('request_blacklist');
192
193
        foreach ($blacklist as $name => $item) {
194
            $matches = [];
195
196
            if (isset($item['user_agent'])) {
197
                $matches[] = $item['user_agent'] === $this->userAgent;
198
            }
199
            if (isset($item['user_agent_pattern'])) {
200
                $matches[] = 1 === preg_match('/'.$item['user_agent_pattern'].'/', $this->userAgent);
201
            }
202
            if (isset($item['referer'])) {
203
                $matches[] = $item['referer'] === $this->referer;
204
            }
205
            if (isset($item['referer_pattern'])) {
206
                $matches[] = 1 === preg_match('/'.$item['referer_pattern'].'/', $this->referer);
207
            }
208
            if (isset($item['uri'])) {
209
                $matches[] = $item['uri'] === $this->uri;
210
            }
211
            if (isset($item['uri_pattern'])) {
212
                $matches[] = 1 === preg_match('/'.$item['uri_pattern'].'/', $this->uri);
213
            }
214
215
            if (count($matches) > 0 && count($matches) === count(array_filter($matches))) {
216
                $this->denyAccess("Matched blacklist entry `$name`", true);
217
            }
218
        }
219
    }
220
221
    /**
222
     * Throw exception for denied access due to spider crawl or hitting usage limits.
223
     * @param string $logComment Comment to include with the log entry.
224
     * @param bool $blacklist Changes the messaging to say access was denied due to abuse, rather than rate limiting.
225
     * @throws TooManyRequestsHttpException
226
     * @throws AccessDeniedHttpException
227
     */
228
    private function denyAccess(string $logComment, bool $blacklist = false): void
229
    {
230
        // Log the denied request
231
        $logger = $this->container->get($blacklist ? 'monolog.logger.blacklist' : 'monolog.logger.rate_limit');
232
        $logger->info($logComment);
233
234
        if ($blacklist) {
235
            $message = $this->i18n->msg('error-denied', ['[email protected]']);
236
            throw new AccessDeniedHttpException($message, null, 999);
237
        }
238
239
        $message = $this->i18n->msg('error-rate-limit', [
240
            $this->rateDuration,
241
            "<a href='/login'>".$this->i18n->msg('error-rate-limit-login')."</a>",
242
            "<a href='https://xtools.readthedocs.io/en/stable/api' target='_blank'>" .
243
                $this->i18n->msg('api') .
244
            "</a>",
245
        ]);
246
247
        /**
248
         * TODO: Find a better way to do this.
249
         * 999 is a random, complete hack to tell error.html.twig file to treat these exceptions as having
250
         * fully safe messages that can be display with |raw. (In this case we authored the message).
251
         */
252
        throw new TooManyRequestsHttpException(600, $message, null, 999);
253
    }
254
}
255