Passed
Push — master ( 854cc1...b77bfa )
by MusikAnimal
07:15
created

Query::getRegexQuery()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 38
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 32
nc 1
nop 0
dl 0
loc 38
rs 9.408
c 1
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Model;
6
7
/**
8
 * A Query produces the parameters needed to be passed to the CloudElastic service.
9
 */
10
class Query
11
{
12
    public const PRE_TAG = '%**%';
13
    public const POST_TAG = '*%%*';
14
    public const MAX_RESULTS = 5000;
15
16
    /** @var string The query string. */
17
    protected $query;
18
19
    /** @var int[] Array of namespace IDs. */
20
    protected $namespaces;
21
22
    /** @var bool Whether to get params for a regular expression search. */
23
    protected $regex;
24
25
    /** @var bool Whether the params should be for a case-insensitive search. */
26
    protected $ignoreCase;
27
28
    /**
29
     * Query constructor.
30
     * @param string $query
31
     * @param int[] $namespaces
32
     * @param bool $regex
33
     * @param bool $ignoreCase
34
     */
35
    public function __construct(string $query, array $namespaces, bool $regex = false, bool $ignoreCase = false)
36
    {
37
        // Silently use regex to do exact match if query is wrapped in double-quotes.
38
        if ('"' === substr($query, 0, 1) && '"' === substr($query, -1, 1)) {
39
            $regex = true;
40
            $query = preg_quote(substr($query, 1, -1));
41
        }
42
43
        $this->query = $query;
44
        $this->namespaces = $namespaces;
45
        $this->regex = $regex;
46
        $this->ignoreCase = $ignoreCase;
47
    }
48
49
    /**
50
     * Get parameters needed to make the CloudElastic query.
51
     * @return array|mixed[]
52
     */
53
    public function getParams(): array
54
    {
55
        $params = $this->regex ? $this->getRegexQuery() : $this->getPlainQuery();
56
57
        if (!empty($this->namespaces)) {
58
            $params['query']['bool']['filter'][] = [ 'terms' => [
59
                'namespace' => $this->namespaces,
60
            ] ];
61
        }
62
63
        return $params;
64
    }
65
66
    /**
67
     * Params to be passed to CloudElastic for a plain (normal) query.
68
     * @return mixed[]
69
     */
70
    private function getPlainQuery(): array
71
    {
72
        return [
73
            'timeout' => '150s',
74
            'size' => self::MAX_RESULTS,
75
            '_source' => ['wiki', 'namespace_text', 'title'],
76
            'query' => [
77
                'bool' => [
78
                    'filter' => [
79
                        [ 'match' => [
80
                            'source_text.plain' => $this->query,
81
                        ] ],
82
                    ],
83
                ],
84
            ],
85
            'stats' => ['global-search'],
86
            'highlight' => [
87
                'pre_tags' => [self::PRE_TAG],
88
                'post_tags' => [self::POST_TAG],
89
                'fields' => [
90
                    'source_text.plain' => [
91
                        'type' => 'experimental',
92
                    ],
93
                ],
94
                'highlight_query' => [
95
                    'match' => [
96
                        'source_text.plain' => $this->query,
97
                    ],
98
                ],
99
            ],
100
        ];
101
    }
102
103
    /**
104
     * Params to be passed to CloudElastic for a regular expression query.
105
     * @return mixed[]
106
     */
107
    private function getRegexQuery(): array
108
    {
109
        return [
110
            'timeout' => '150s',
111
            'size' => 5000,
112
            '_source' => ['wiki', 'namespace_text', 'title'],
113
            'query' => [
114
                'bool' => [
115
                    'filter' => [
116
                        [ 'source_regex' => [
117
                            'regex' => $this->query,
118
                            'field' => 'source_text',
119
                            'ngram_field' => 'source_text.trigram',
120
                            'max_determinized_states' => 20000,
121
                            'max_expand' => 10,
122
                            'case_sensitive' => !$this->ignoreCase,
123
                            'locale' => 'en',
124
                        ] ],
125
                    ],
126
                ],
127
            ],
128
            'stats' => ['global-search'],
129
            'highlight' => [
130
                'pre_tags' => [self::PRE_TAG],
131
                'post_tags' => [self::POST_TAG],
132
                'fields' => [
133
                    'source_text.plain' => [
134
                        'type' => 'experimental',
135
                        'number_of_fragments' => 1,
136
                        'fragmenter' => 'scan',
137
                        'fragment_size' => 150,
138
                        'options' => [
139
                            'regex' => [$this->query],
140
                            'locale' => 'en',
141
                            'regex_flavor' => 'lucene',
142
                            'skip_query' => true,
143
                            'regex_case_insensitive' => $this->ignoreCase,
144
                            'max_determinized_states' => 20000,
145
                        ],
146
                    ],
147
                ],
148
            ],
149
        ];
150
    }
151
}
152