Searcher::search()   F
last analyzed

Complexity

Conditions 16
Paths 320

Size

Total Lines 132
Code Lines 77

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 16
eloc 77
c 4
b 0
f 0
nc 320
nop 1
dl 0
loc 132
rs 3.2333

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types = 1);
2
3
/**
4
 * Created by PhpStorm.
5
 * User: gordon
6
 * Date: 25/3/2561
7
 * Time: 1:35 น.
8
 */
9
10
namespace Suilven\ManticoreSearch\Service;
11
12
use Manticoresearch\Search;
13
use SilverStripe\ORM\ArrayList;
14
use SilverStripe\ORM\DataObject;
15
use Suilven\FreeTextSearch\Container\Facet;
16
use Suilven\FreeTextSearch\Container\SearchResults;
17
use Suilven\FreeTextSearch\Helper\FieldHelper;
18
use Suilven\FreeTextSearch\Helper\SearchHelper;
19
use Suilven\FreeTextSearch\Indexes;
20
use Suilven\FreeTextSearch\Types\SearchParamTypes;
21
22
class Searcher extends \Suilven\FreeTextSearch\Base\Searcher implements \Suilven\FreeTextSearch\Interfaces\Searcher
23
{
24
    /** @var \Suilven\ManticoreSearch\Service\Client */
25
    private $client;
26
27
    public function __construct()
28
    {
29
        $this->client = new Client();
30
    }
31
32
33
    public function search(?string $q): SearchResults
34
    {
35
        $q = \is_null($q)
36
            ? ''
37
            : $q;
38
        if ($this->searchType === SearchParamTypes::OR) {
39
            $q = $this->makeQueryOr($q);
40
        }
41
        $startTime = \microtime(true);
42
        $client = new Client();
43
        $manticoreClient = $client->getConnection();
44
45
        $searcher = new Search($manticoreClient);
46
        $searcher->setIndex($this->indexName);
47
48
        $searcher->limit($this->pageSize);
49
        $offset=$this->pageSize * ($this->page-1);
50
        $searcher->offset($offset);
51
52
        $indexes = new Indexes();
53
        $index = $indexes->getIndex($this->indexName);
54
        $hasManyFieldsDetails = $index->getHasManyFields();
55
        $hasManyFieldsNames = \array_keys($hasManyFieldsDetails);
56
        $hasOneFieldsDetails = $index->getHasOneFields();
57
        $hasOneFieldsNames = \array_keys($hasOneFieldsDetails);
58
59
        $searcher->highlight(
60
            [],
61
            ['pre_tags' => '<b>', 'post_tags'=>'</b>']
62
        );
63
64
65
        $fieldHelper = new FieldHelper();
66
        foreach ($this->filters as $key => $value) {
67
            if ($key === 'q' || $key === 'start') {
68
                continue;
69
            }
70
            $typedValue = $fieldHelper->getFieldValueCorrectlyTyped($index, $key, $value);
71
72
            if (\in_array($key, $hasManyFieldsNames, true)) {
73
                $searcher->filter($key, 'in', $typedValue);
74
            } elseif (\in_array($key, $hasOneFieldsNames, true)) {
75
                $searcher->filter($key, 'equals', ($typedValue));
76
            } else {
77
                $searcher->filter($key, 'equals', $typedValue);
78
            }
79
        }
80
81
        // @todo Deal with subsequent params
82
        foreach ($this->facettedTokens as $facetName) {
83
            // manticore errors out with no error message if the facet name is not lowercase.  The second param is an
84
            // alias, use the correctly capitalized version of the fact
85
            $searcher->facet(\strtolower($facetName), $facetName, 1000);
86
        }
87
88
        // add has many
89
        foreach ($this->hasManyTokens as $facetName) {
90
            // manticore errors out with no error message if the facet name is not lowercase.  The second param is an
91
            // alias, use the correctly capitalized version of the fact
92
            $searcher->facet(\strtolower($facetName), $facetName, 1000);
93
        }
94
95
        $manticoreResult = $searcher->search($q)->get();
96
        $allFields = $this->getAllFields($index);
97
98
        $ssResult = new ArrayList();
99
        while ($manticoreResult->valid()) {
100
            $hit = $manticoreResult->current();
101
            $source = $hit->getData();
102
            $ssDataObject = new DataObject();
103
104
            $this->populateSearchResult($ssDataObject, $allFields, $source);
105
106
            // manticore lowercases fields, so as above normalize them back to the SS fieldnames
107
            $highlights = $hit->getHighlight();
108
            $fieldsToHighlight = $index->getHighlightedFields();
109
            $this->addHighlights($ssDataObject, $allFields, $highlights, $fieldsToHighlight);
110
111
            $ssDataObject->ID = (int)($hit->getId());
112
            $ssResult->push($ssDataObject);
113
            $manticoreResult->next();
114
        }
115
116
        // we now need to standardize the output returned
117
118
        $searchResults = new SearchResults();
119
        $searchResults->setRecords($ssResult);
120
        $searchResults->setPage($this->page);
121
        $searchResults->setPageSize($this->pageSize);
122
        $searchResults->setQuery($q);
123
        $searchResults->setTotalNumberOfResults($manticoreResult->getTotal());
124
125
        // create facet result objects
126
        $manticoreFacets = $manticoreResult->getFacets();
127
128
        $hasManyFields = $index->getHasManyFields();
129
130
        if (!\is_null($manticoreFacets)) {
131
            $facetTitles = \array_keys($manticoreFacets);
132
133
            /** @var string $facetTitle */
134
            foreach ($facetTitles as $facetTitle) {
135
                $facet = new Facet($facetTitle);
136
137
                // the BY functionality of facets has not yet been implemented, as such database calls required
138
                if (\in_array($facetTitle, $this->hasManyTokens, true)) {
139
                    $field = $hasManyFields[$facetTitle]['field'];
140
                    $clazz = $hasManyFields[$facetTitle]['class'];
141
142
                    foreach ($manticoreFacets[$facetTitle]['buckets'] as $count) {
143
                        $facetClassInstance = DataObject::get_by_id($clazz, $count['key']);
144
                        // @phpstan-ignore-next-line
145
                        $facet->addFacetCount($facetClassInstance->$field, $count['doc_count']);
146
                    }
147
                } else {
148
                    // use values as is
149
                    foreach ($manticoreFacets[$facetTitle]['buckets'] as $count) {
150
                        $facet->addFacetCount($count['key'], $count['doc_count']);
151
                    }
152
                }
153
154
155
                $searchResults->addFacet($facet);
156
            }
157
        }
158
159
        $endTime = \microtime(true);
160
        $delta = $endTime - $startTime;
161
        $delta = \round(1000*$delta)/1000;
162
        $searchResults->setTime($delta);
163
164
        return $searchResults;
165
    }
166
167
168
    /** @return array<array<string, string>|string> */
169
    public function getAllFields(\Suilven\FreeTextSearch\Index $index): array
170
    {
171
        $allFields = \array_merge(
172
            $index->getFields(),
173
            $index->getTokens(),
174
            //$index->getHasManyFields(),
175
            $index->getHasOneFields(),
176
            $index->getStoredFields()
177
        );
178
179
        $hasManyFields = $index->getHasManyFields();
180
        foreach (\array_keys($hasManyFields) as $key) {
181
            $allFields[] = $key;
182
        }
183
184
        return $allFields;
185
    }
186
187
188
    public function refactorKeyName(string $keyname): string
189
    {
190
        // @todo This is a hack as $Title is rendering the ID in the template
191
        if ($keyname === 'Title') {
192
            $keyname = 'ResultTitle';
193
        } elseif ($keyname === 'link') {
194
            $keyname = 'Link';
195
        };
196
197
        return $keyname;
198
    }
199
200
201
    /** @param array<array<string, string>|string> $allFields */
202
    public function matchKey(string $key, array $allFields): string
203
    {
204
        $keyname = $key;
205
        foreach ($allFields as $field) {
206
            $cf = \is_array($field)
207
                ? $field['relationship']
208
                : $field;
209
210
            if (\strtolower($cf) === $key) {
211
                $keyname = $cf;
212
213
                break;
214
            }
215
        }
216
217
        return $keyname;
218
    }
219
220
221
    /** @param \SilverStripe\ORM\DataObject $dataObject a dataObject relevant to the index */
222
    public function searchForSimilar(DataObject $dataObject): SearchResults
223
    {
224
        $helper = new SearchHelper();
225
        $indexedTextFields = $helper->getTextFieldPayload($dataObject);
226
        $textForCurrentIndex = $indexedTextFields[$this->indexName];
227
228
        // @todo Search by multiple fields?
229
        $amalgamatedText = '';
230
        foreach (\array_keys($textForCurrentIndex) as $fieldName) {
231
            $amalgamatedText .= $textForCurrentIndex[$fieldName] . ' ';
232
        }
233
234
        $this->searchType = SearchParamTypes::OR;
235
        $text = $this->getLeastCommonTerms($amalgamatedText, 10);
236
237
        return $this->search($text);
238
    }
239
240
241
    /**
242
     * Find terms suitable for similarity searching
243
     *
244
     * @todo Rename this method, or separate into a helper?
245
     * @param string $text text of a document being searched for
246
     */
247
    private function getLeastCommonTerms(string $text, int $number = 20): string
248
    {
249
        $client = new Client();
250
        $connection = $client->getConnection();
251
        $params = [
252
            'index' => $this->indexName,
253
            'body' => [
254
                'query'=>$text,
255
                'options' => [
256
                    'stats' =>1,
257
                    'fold_lemmas' => 1,
258
                ],
259
            ],
260
        ];
261
262
        $keywords = $connection->keywords($params);
263
264
265
        \usort(
266
            $keywords,
267
            static function ($a, $b): int {
268
                return ($a["docs"] <= $b["docs"])
269
                    ? -1
270
                    : +1;
271
            }
272
        );
273
274
        $wordInstances = [];
275
        $wordNDocs = [];
276
        foreach ($keywords as $entry) {
277
            // @todo this or normalized?
278
            $word = $entry['tokenized'];
279
280
            // if a word is unique to the source document, it is useless for finding other similar documents
281
            if ($entry['docs'] > 1) {
282
                if (!isset($wordInstances[$word])) {
283
                    $wordInstances[$word] = 0;
284
                }
285
                $wordInstances[$word] += 1;
286
            }
287
288
            $wordNDocs[$word] = $entry['docs'];
289
        }
290
291
        $toGlue = \array_keys($wordInstances);
292
        $toGlue = \array_slice($toGlue, 0, $number);
293
        $text = \implode(' ', $toGlue);
294
295
        return $text;
296
    }
297
298
299
    /**
300
     * Make a query OR instead of the default AND
301
     *
302
     * @param string $q the search query
303
     * @return string same query for with the terms separated by a | character,to form an OR query
304
     */
305
    private function makeQueryOr(string $q): string
306
    {
307
        $q = \trim($q);
308
        /** @var array<int, string> $splits */
309
        $splits = \preg_split('/\s+/', $q);
310
311
        return \implode('|', $splits);
312
    }
313
314
315
    /**
316
     * @param array<string> $allFields
317
     * @param array<array<string, string>|string> $allFields
318
     * @param array<string, string|int|float|bool> $source
319
     */
320
    private function populateSearchResult(DataObject &$ssDataObject, array $allFields, array $source): void
321
    {
322
        $keys = \array_keys($source);
323
        foreach ($keys as $key) {
324
            /** @var string $keyname */
325
            $keyname = $this->matchKey($key, $allFields);
326
            $keyname = $this->refactorKeyName($keyname);
327
328
            /** @phpstan-ignore-next-line */
329
            $ssDataObject->$keyname = $source[$key];
330
        }
331
    }
332
333
334
    /**
335
     * @param array<array<string, string>|string> $allFields
336
     * @param array<array<string>> $highlights
337
     * @param array<string> $fieldsToHighlight
338
     */
339
    private function addHighlights(
340
        DataObject &$ssDataObject,
341
        array $allFields,
342
        array $highlights,
343
        array $fieldsToHighlight
344
    ): void {
345
        $highlightsSS = [];
346
        $lowercaseFieldsToHighlight = [];
347
        foreach ($fieldsToHighlight as $fieldname) {
348
            $lowercaseFieldsToHighlight[] = \strtolower($fieldname);
349
        }
350
351
        $keys = \array_keys($highlights);
352
        foreach ($keys as $key) {
353
            if (!isset($highlights[$key]) || !\in_array($key, $lowercaseFieldsToHighlight, true)) {
354
                continue;
355
            }
356
            $keyname = $key;
357
            foreach ($allFields as $field) {
358
                $cf = \is_array($field)
359
                    ? $field['relationship']
360
                    : $field;
361
362
                if (\strtolower($cf) === $key) {
363
                    $keyname = $cf;
364
365
                    continue;
366
                }
367
            }
368
369
            if ($key === 'link') {
370
                $keyname = 'Link';
371
            }
372
373
            $highlightsSS[$keyname] = $highlights[$key];
374
        }
375
376
        /** @phpstan-ignore-next-line */
377
        $ssDataObject->Highlights = $highlightsSS;
378
    }
379
}
380