Passed
Push — master ( 5a7268...3e6b84 )
by MusikAnimal
02:02
created

DefaultController::getResults()   B

Complexity

Conditions 7
Paths 9

Size

Total Lines 47
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 47
rs 8.5866
c 0
b 0
f 0
cc 7
nc 9
nop 5
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Controller;
6
7
use App\WikiDomainLookup;
8
use GuzzleHttp\Client;
9
use GuzzleHttp\Exception\ClientException;
10
use Psr\Cache\CacheItemPoolInterface;
11
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
12
use Symfony\Component\HttpFoundation\Request;
13
use Symfony\Component\HttpFoundation\Response;
14
use Symfony\Component\Routing\Annotation\Route;
15
16
class DefaultController extends AbstractController
17
{
18
    /** @var Client */
19
    private $client;
20
21
    /** @var CacheItemPoolInterface */
22
    private $cache;
23
24
    /** @var bool Whether the results were pulled from cache. */
25
    private $fromCache = false;
26
27
    private const PRE_TAG = '%**%';
28
    private const POST_TAG = '*%%*';
29
    private const MAX_RESULTS = 5000;
30
31
    /** @var string Duration of cache for main results set, as accepted by DateInterval::createFromDateString() */
32
    private const CACHE_TIME = '10 minutes';
33
34
    /** @var string[]|null Map from wiki dbname to domain name */
35
    private $domainLookup;
36
37
    /**
38
     * Splash page, shown when user is logged out.
39
     * @Route("/splash")
40
     */
41
    public function splashAction(): Response
42
    {
43
        return $this->render('jumbotron.html.twig');
44
    }
45
46
    /**
47
     * The main route.
48
     * @Route("/", name="home")
49
     * @param Request $request
50
     * @param CacheItemPoolInterface $cache
51
     * @return Response
52
     */
53
    public function indexAction(Request $request, CacheItemPoolInterface $cache): Response
54
    {
55
        if (!$this->get('session')->get('logged_in_user')) {
56
            return $this->render('jumbotron.html.twig');
57
        }
58
        $query = $request->query->get('q');
59
        $regex = (bool)$request->query->get('regex');
60
        [$namespaces, $namespaceIds] = $this->parseNamespaces($request);
61
        $purgeCache = (bool)$request->query->get('purge');
62
63
        $ret = [
64
            'q' => $query,
65
            'regex' => $regex,
66
            'max_results' => self::MAX_RESULTS,
67
            'namespaces' => $namespaces,
68
        ];
69
70
        if ($query) {
71
            $ret = array_merge($ret, $this->getResults($query, $regex, $namespaceIds, $cache, $purgeCache));
72
            $ret['from_cache'] = $this->fromCache;
73
            return $this->render('default/result.html.twig', $ret);
74
        }
75
76
        return $this->render('default/index.html.twig', $ret);
77
    }
78
79
    /**
80
     * Parse the namespaces parameter of the query string.
81
     * @param Request $request
82
     * @return mixed[] [normalized comma-separated list as a string, array of ids as ints]
83
     */
84
    private function parseNamespaces(Request $request): array
85
    {
86
        $param = $request->query->get('namespaces', '');
87
88
        if ('' === $param) {
89
            $ids = [];
90
        } else {
91
            $ids = array_map(
92
                'intval',
93
                explode(',', $param)
94
            );
95
        }
96
97
        return [
98
            implode(',', $ids),
99
            $ids,
100
        ];
101
    }
102
103
    /**
104
     * Get results based on given Request.
105
     * @param string $query
106
     * @param bool $regex
107
     * @param int[] $namespaceIds
108
     * @param CacheItemPoolInterface $cache
109
     * @param bool $purgeCache
110
     * @return mixed[]
111
     */
112
    public function getResults(
113
        string $query,
114
        bool $regex,
115
        array $namespaceIds,
116
        CacheItemPoolInterface $cache,
117
        bool $purgeCache = false
118
    ): array {
119
        $this->cache = $cache;
120
        $cacheItem = md5($query.$regex.implode('|', $namespaceIds));
121
122
        if (!$purgeCache && $this->cache->hasItem($cacheItem)) {
123
            $this->fromCache = true;
124
            return $this->cache->getItem($cacheItem)->get();
125
        }
126
127
        // Setup data structure to be passed to the view. We only set the query and regex here because they
128
        // are silently changed if the query is wrapped in double-quotes (see below).
129
        $data = [
130
            'query' => $query,
131
            'regex' => $regex,
132
        ];
133
134
        // Silently use regex to do exact match if query is wrapped in double-quotes.
135
        if ('"' === substr($query, 0, 1) && '"' === substr($query, -1, 1)) {
136
            $regex = true;
137
            $query = preg_quote(substr($query, 1, -1));
138
        }
139
140
        $params = $regex
141
            ? $this->getParamsForRegexQuery($query)
142
            : $this->getParamsForPlainQuery($query);
143
144
        if (!empty($namespaceIds)) {
145
            $params['query']['bool']['filter'][] = [ 'terms' => [
146
                'namespace' => $namespaceIds,
147
            ] ];
148
        }
149
150
        $res = $this->makeRequest($params);
151
        $data['total'] = $res['hits']['total'];
152
        $data['hits'] = $this->formatHits($res);
153
154
        $cacheItem = $this->cache->getItem($cacheItem)
155
            ->set($data)
156
            ->expiresAfter(\DateInterval::createFromDateString(self::CACHE_TIME));
157
        $this->cache->save($cacheItem);
158
        return $data;
159
    }
160
161
    /**
162
     * Query the CloudElastic service with the given params.
163
     * @param mixed[] $params
164
     * @return mixed[]
165
     */
166
    private function makeRequest(array $params): array
167
    {
168
        $this->client = new Client([
169
            'verify' => $_ENV['ELASTIC_INSECURE'] ? false : true,
170
        ]);
171
172
        // FIXME: Eventually will be able to remove _prefer_nodes
173
        $uri = $_ENV['ELASTIC_HOST'].'/*,*:*/_search?preference=_prefer_nodes:cloudelastic1001-cloudelastic-chi-eqiad,'.
174
            'cloudelastic1002-cloudelastic-chi-eqiad,cloudelastic1003-cloudelastic-chi-eqiad';
175
176
        $request = new \GuzzleHttp\Psr7\Request('GET', $uri, [
177
            'Content-Type' => 'application/json',
178
        ], \GuzzleHttp\json_encode($params));
179
180
        // FIXME: increase cURL timeout
181
        try {
182
            $res = $this->client->send($request);
183
        } catch (ClientException $e) {
184
            // Dump the full response in development environments since Guzzle truncates the error messages.
185
            if ('dev' === $_ENV['APP_ENV']) {
186
                dump($e->getResponse()->getBody()->getContents());
187
            }
188
            throw $e;
189
        }
190
191
        return json_decode($res->getBody()->getContents(), true);
192
    }
193
194
    /**
195
     * Build the data structure for each hit, giving the view what it needs.
196
     * @param mixed[] $data
197
     * @return mixed[]
198
     */
199
    private function formatHits(array $data): array
200
    {
201
        $hits = $data['hits']['hits'];
202
        $newData = [];
203
204
        foreach ($hits as $hit) {
205
            $result = $hit['_source'];
206
            $title = ($result['namespace_text'] ? $result['namespace_text'].':' : '').$result['title'];
207
            $domain = $this->getWikiDomainFromDbName($result['wiki']);
208
            $newData[] = [
209
                'wiki' => rtrim($domain, '.org'),
210
                'title' => $title,
211
                'url' => $this->getUrlForTitle($domain, $title),
212
                'source_text' => $this->highlightQuery(
213
                    $hit['highlight']['source_text.plain'][0] ?? ''
214
                ),
215
            ];
216
        }
217
218
        return $newData;
219
    }
220
221
    /**
222
     * Get the URL to the page with the given title on the given wiki.
223
     * @param string $domain
224
     * @param string $title
225
     * @return string
226
     */
227
    private function getUrlForTitle(string $domain, string $title): string
228
    {
229
        return 'https://'.$domain.'/wiki/'.$title;
230
    }
231
232
    /**
233
     * Query XTools API to get the domain of the wiki with the given database name. Results are cached for a week.
234
     * @param string $wiki
235
     * @return string
236
     */
237
    private function getWikiDomainFromDbName(string $wiki): string
238
    {
239
        if (null === $this->domainLookup) {
240
            $this->domainLookup = (new WikiDomainLookup($this->client, $this->cache))->load();
241
        }
242
        return $this->domainLookup[$wiki] ?? 'WIKINOTFOUND';
243
    }
244
245
    /**
246
     * Make the highlight text safe and wrap the search term in a span so that we can style it.
247
     * @param string $text
248
     * @return string
249
     */
250
    private function highlightQuery(string $text): string
251
    {
252
        $text = htmlspecialchars($text);
253
        return strtr($text, [
254
            self::PRE_TAG => "<span class='highlight'>",
255
            self::POST_TAG => "</span>",
256
        ]);
257
    }
258
259
    /**
260
     * Params to be passed to Cloud Elastic for a plain (normal) query.
261
     * @param string $query
262
     * @return mixed[]
263
     */
264
    private function getParamsForPlainQuery(string $query): array
265
    {
266
        return [
267
            'timeout' => '150s',
268
            'size' => self::MAX_RESULTS,
269
            '_source' => ['wiki', 'namespace_text', 'title'],
270
            'query' => [
271
                'bool' => [
272
                    'filter' => [
273
                        [ 'match' => [
274
                            'source_text.plain' => $query,
275
                        ] ],
276
                    ],
277
                ],
278
            ],
279
            'stats' => ['global-search'],
280
            'highlight' => [
281
                'pre_tags' => [self::PRE_TAG],
282
                'post_tags' => [self::POST_TAG],
283
                'fields' => [
284
                    'source_text.plain' => [
285
                        'type' => 'experimental',
286
                    ],
287
                ],
288
                'highlight_query' => [
289
                    'match' => [
290
                        'source_text.plain' => $query,
291
                    ],
292
                ],
293
            ],
294
        ];
295
    }
296
297
    /**
298
     * Params to be passed to Cloud Elastic for a regular expression query.
299
     * @param string $query
300
     * @return mixed[]
301
     */
302
    private function getParamsForRegexQuery(string $query): array
303
    {
304
        return [
305
            'timeout' => '150s',
306
            'size' => 100,
307
            '_source' => ['wiki', 'namespace_text', 'title'],
308
            'query' => [
309
                'bool' => [
310
                    'filter' => [
311
                        [ 'source_regex' => [
312
                            'regex' => $query,
313
                            'field' => 'source_text',
314
                            'ngram_field' => 'source_text.trigram',
315
                            'max_determinized_states' => 20000,
316
                            'max_expand' => 10,
317
                            'case_sensitive' => true,
318
                            'locale' => 'en',
319
                        ] ],
320
                    ],
321
                ],
322
            ],
323
            'stats' => ['global-search'],
324
            'highlight' => [
325
                'pre_tags' => [self::PRE_TAG],
326
                'post_tags' => [self::POST_TAG],
327
                'fields' => [
328
                    'source_text.plain' => [
329
                        'type' => 'experimental',
330
                        'number_of_fragments' => 1,
331
                        'fragmenter' => 'scan',
332
                        'fragment_size' => 150,
333
                        'options' => [
334
                            'regex' => [$query],
335
                            'locale' => 'en',
336
                            'regex_flavor' => 'lucene',
337
                            'skip_query' => true,
338
                            'max_determinized_states' => 20000,
339
                        ],
340
                    ],
341
                ],
342
            ],
343
        ];
344
    }
345
}
346