Passed
Pull Request — master (#4)
by MusikAnimal
02:34
created

DefaultController::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Controller;
6
7
use App\WikiDomainLookup;
8
use GuzzleHttp\Client;
9
use GuzzleHttp\Exception\ClientException;
10
use Psr\Cache\CacheItemPoolInterface;
11
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
12
use Symfony\Component\HttpFoundation\Request;
13
use Symfony\Component\HttpFoundation\Response;
14
use Symfony\Component\Routing\Annotation\Route;
15
16
class DefaultController extends AbstractController
17
{
18
    /** @var Client */
19
    private $client;
20
21
    /** @var CacheItemPoolInterface */
22
    private $cache;
23
24
    /** @var bool Whether the results were pulled from cache. */
25
    private $fromCache = false;
26
27
    private const PRE_TAG = '%**%';
28
    private const POST_TAG = '*%%*';
29
    private const MAX_RESULTS = 5000;
30
31
    /** @var string Duration of cache for main results set, as accepted by DateInterval::createFromDateString() */
32
    private const CACHE_TIME = '10 minutes';
33
34
    /** @var string[]|null Map from wiki dbname to domain name */
35
    private $domainLookup;
36
37
    /**
38
     * DefaultController constructor.
39
     * @param CacheItemPoolInterface $cache
40
     */
41
    public function __construct(CacheItemPoolInterface $cache)
42
    {
43
        $this->cache = $cache;
44
    }
45
46
    /**
47
     * Splash page, shown when user is logged out.
48
     * @Route("/splash")
49
     */
50
    public function splashAction(): Response
51
    {
52
        return $this->render('jumbotron.html.twig');
53
    }
54
55
    /**
56
     * The main route.
57
     * @Route("/", name="home")
58
     * @param Request $request
59
     * @return Response
60
     */
61
    public function indexAction(Request $request): Response
62
    {
63
        if (!$this->get('session')->get('logged_in_user')) {
64
            return $this->render('jumbotron.html.twig');
65
        }
66
        $query = $request->query->get('q');
67
        $regex = (bool)$request->query->get('regex');
68
        $ignoreCase = (bool)$request->query->get('ignorecase');
69
        [$namespaces, $namespaceIds] = $this->parseNamespaces($request);
70
        $purgeCache = (bool)$request->query->get('purge');
71
72
        $ret = [
73
            'q' => $query,
74
            'regex' => $regex,
75
            'max_results' => self::MAX_RESULTS,
76
            'namespaces' => $namespaces,
77
            'ignore_case' => $ignoreCase,
78
        ];
79
80
        if ($query) {
81
            $ret = array_merge($ret, $this->getResults($query, $regex, $ignoreCase, $namespaceIds, $purgeCache));
82
            $ret['from_cache'] = $this->fromCache;
83
            return $this->render('default/result.html.twig', $ret);
84
        }
85
86
        return $this->render('default/index.html.twig', $ret);
87
    }
88
89
    /**
90
     * Parse the namespaces parameter of the query string.
91
     * @param Request $request
92
     * @return mixed[] [normalized comma-separated list as a string, array of ids as ints]
93
     */
94
    private function parseNamespaces(Request $request): array
95
    {
96
        $param = $request->query->get('namespaces', '');
97
98
        if ('' === $param) {
99
            $ids = [];
100
        } else {
101
            $ids = array_map(
102
                'intval',
103
                explode(',', $param)
104
            );
105
        }
106
107
        return [
108
            implode(',', $ids),
109
            $ids,
110
        ];
111
    }
112
113
    /**
114
     * Get results based on given Request.
115
     * @param string $query
116
     * @param bool $regex
117
     * @param bool $ignoreCase
118
     * @param int[] $namespaceIds
119
     * @param bool $purgeCache
120
     * @return mixed[]
121
     */
122
    public function getResults(
123
        string $query,
124
        bool $regex,
125
        bool $ignoreCase,
126
        array $namespaceIds,
127
        bool $purgeCache = false
128
    ): array {
129
        $cacheItem = md5($query.$regex.$ignoreCase.implode('|', $namespaceIds));
130
131
        if (!$purgeCache && $this->cache->hasItem($cacheItem)) {
132
            $this->fromCache = true;
133
            return $this->cache->getItem($cacheItem)->get();
134
        }
135
136
        // Setup data structure to be passed to the view. We only set the query and regex here because they
137
        // are silently changed if the query is wrapped in double-quotes (see below).
138
        $data = [
139
            'query' => $query,
140
            'regex' => $regex,
141
            'ignore_case' => $ignoreCase,
142
        ];
143
144
        // Silently use regex to do exact match if query is wrapped in double-quotes.
145
        if ('"' === substr($query, 0, 1) && '"' === substr($query, -1, 1)) {
146
            $regex = true;
147
            $query = preg_quote(substr($query, 1, -1));
148
        }
149
150
        $params = $regex
151
            ? $this->getParamsForRegexQuery($query, $ignoreCase)
152
            : $this->getParamsForPlainQuery($query);
153
154
        if (!empty($namespaceIds)) {
155
            $params['query']['bool']['filter'][] = [ 'terms' => [
156
                'namespace' => $namespaceIds,
157
            ] ];
158
        }
159
160
        $res = $this->makeRequest($params);
161
        $data['total'] = $res['hits']['total'];
162
        $data['hits'] = $this->formatHits($res);
163
164
        $cacheItem = $this->cache->getItem($cacheItem)
165
            ->set($data)
166
            ->expiresAfter(\DateInterval::createFromDateString(self::CACHE_TIME));
167
        $this->cache->save($cacheItem);
168
        return $data;
169
    }
170
171
    /**
172
     * Query the CloudElastic service with the given params.
173
     * @param mixed[] $params
174
     * @return mixed[]
175
     */
176
    private function makeRequest(array $params): array
177
    {
178
        $this->client = new Client([
179
            'verify' => $_ENV['ELASTIC_INSECURE'] ? false : true,
180
        ]);
181
182
        // FIXME: Eventually will be able to remove _prefer_nodes
183
        $uri = $_ENV['ELASTIC_HOST'].'/*,*:*/_search?preference=_prefer_nodes:cloudelastic1001-cloudelastic-chi-eqiad,'.
184
            'cloudelastic1002-cloudelastic-chi-eqiad,cloudelastic1003-cloudelastic-chi-eqiad';
185
186
        $request = new \GuzzleHttp\Psr7\Request('GET', $uri, [
187
            'Content-Type' => 'application/json',
188
        ], \GuzzleHttp\json_encode($params));
189
190
        // FIXME: increase cURL timeout
191
        try {
192
            $res = $this->client->send($request);
193
        } catch (ClientException $e) {
194
            // Dump the full response in development environments since Guzzle truncates the error messages.
195
            if ('dev' === $_ENV['APP_ENV']) {
196
                dump($e->getResponse()->getBody()->getContents());
197
            }
198
            throw $e;
199
        }
200
201
        return json_decode($res->getBody()->getContents(), true);
202
    }
203
204
    /**
205
     * Build the data structure for each hit, giving the view what it needs.
206
     * @param mixed[] $data
207
     * @return mixed[]
208
     */
209
    private function formatHits(array $data): array
210
    {
211
        $hits = $data['hits']['hits'];
212
        $newData = [];
213
214
        foreach ($hits as $hit) {
215
            $result = $hit['_source'];
216
            $title = ($result['namespace_text'] ? $result['namespace_text'].':' : '').$result['title'];
217
            $domain = $this->getWikiDomainFromDbName($result['wiki']);
218
            $newData[] = [
219
                'wiki' => rtrim($domain, '.org'),
220
                'title' => $title,
221
                'url' => $this->getUrlForTitle($domain, $title),
222
                'source_text' => $this->highlightQuery(
223
                    $hit['highlight']['source_text.plain'][0] ?? ''
224
                ),
225
            ];
226
        }
227
228
        return $newData;
229
    }
230
231
    /**
232
     * Get the URL to the page with the given title on the given wiki.
233
     * @param string $domain
234
     * @param string $title
235
     * @return string
236
     */
237
    private function getUrlForTitle(string $domain, string $title): string
238
    {
239
        return 'https://'.$domain.'/wiki/'.$title;
240
    }
241
242
    /**
243
     * Query XTools API to get the domain of the wiki with the given database name. Results are cached for a week.
244
     * @param string $wiki
245
     * @return string
246
     */
247
    private function getWikiDomainFromDbName(string $wiki): string
248
    {
249
        if (null === $this->domainLookup) {
250
            $this->domainLookup = (new WikiDomainLookup($this->client, $this->cache))->load();
251
        }
252
        return $this->domainLookup[$wiki] ?? 'WIKINOTFOUND';
253
    }
254
255
    /**
256
     * Make the highlight text safe and wrap the search term in a span so that we can style it.
257
     * @param string $text
258
     * @return string
259
     */
260
    private function highlightQuery(string $text): string
261
    {
262
        $text = htmlspecialchars($text);
263
        return strtr($text, [
264
            self::PRE_TAG => "<span class='highlight'>",
265
            self::POST_TAG => "</span>",
266
        ]);
267
    }
268
269
    /**
270
     * Params to be passed to Cloud Elastic for a plain (normal) query.
271
     * @param string $query
272
     * @return mixed[]
273
     */
274
    private function getParamsForPlainQuery(string $query): array
275
    {
276
        return [
277
            'timeout' => '150s',
278
            'size' => self::MAX_RESULTS,
279
            '_source' => ['wiki', 'namespace_text', 'title'],
280
            'query' => [
281
                'bool' => [
282
                    'filter' => [
283
                        [ 'match' => [
284
                            'source_text.plain' => $query,
285
                        ] ],
286
                    ],
287
                ],
288
            ],
289
            'stats' => ['global-search'],
290
            'highlight' => [
291
                'pre_tags' => [self::PRE_TAG],
292
                'post_tags' => [self::POST_TAG],
293
                'fields' => [
294
                    'source_text.plain' => [
295
                        'type' => 'experimental',
296
                    ],
297
                ],
298
                'highlight_query' => [
299
                    'match' => [
300
                        'source_text.plain' => $query,
301
                    ],
302
                ],
303
            ],
304
        ];
305
    }
306
307
    /**
308
     * Params to be passed to Cloud Elastic for a regular expression query.
309
     * @param string $query
310
     * @param bool $ignoreCase
311
     * @return mixed[]
312
     */
313
    private function getParamsForRegexQuery(string $query, bool $ignoreCase = false): array
314
    {
315
        return [
316
            'timeout' => '150s',
317
            'size' => 5000,
318
            '_source' => ['wiki', 'namespace_text', 'title'],
319
            'query' => [
320
                'bool' => [
321
                    'filter' => [
322
                        [ 'source_regex' => [
323
                            'regex' => $query,
324
                            'field' => 'source_text',
325
                            'ngram_field' => 'source_text.trigram',
326
                            'max_determinized_states' => 20000,
327
                            'max_expand' => 10,
328
                            'case_sensitive' => !$ignoreCase,
329
                            'locale' => 'en',
330
                        ] ],
331
                    ],
332
                ],
333
            ],
334
            'stats' => ['global-search'],
335
            'highlight' => [
336
                'pre_tags' => [self::PRE_TAG],
337
                'post_tags' => [self::POST_TAG],
338
                'fields' => [
339
                    'source_text.plain' => [
340
                        'type' => 'experimental',
341
                        'number_of_fragments' => 1,
342
                        'fragmenter' => 'scan',
343
                        'fragment_size' => 150,
344
                        'options' => [
345
                            // FIXME: won't highlight case-insensitive matches
346
                            'regex' => [$query],
347
                            'locale' => 'en',
348
                            'regex_flavor' => 'lucene',
349
                            'skip_query' => true,
350
                            'regex_case_sensitive' => !$ignoreCase,
351
                            'max_determinized_states' => 20000,
352
                        ],
353
                    ],
354
                ],
355
            ],
356
        ];
357
    }
358
}
359