Passed
Push — master ( 0f35aa...8a28a1 )
by Gordon
04:15 queued 01:37
created

Searcher::search()   A

Complexity

Conditions 4
Paths 8

Size

Total Lines 67
Code Lines 43

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 4
eloc 43
c 4
b 0
f 0
nc 8
nop 1
dl 0
loc 67
rs 9.232

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types = 1);
2
3
/**
4
 * Created by PhpStorm.
5
 * User: gordon
6
 * Date: 25/3/2561
7
 * Time: 1:35 น.
8
 */
9
10
namespace Suilven\ManticoreSearch\Service;
11
12
use Manticoresearch\Search;
13
use SilverStripe\ORM\ArrayList;
14
use SilverStripe\ORM\DataObject;
15
use Suilven\FreeTextSearch\Container\SearchResults;
16
use Suilven\FreeTextSearch\Helper\SearchHelper;
0 ignored issues
show
Bug introduced by
The type Suilven\FreeTextSearch\Helper\SearchHelper was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
17
use Suilven\FreeTextSearch\Indexes;
18
use Suilven\FreeTextSearch\Types\SearchParamTypes;
0 ignored issues
show
Bug introduced by
The type Suilven\FreeTextSearch\Types\SearchParamTypes was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
19
20
class Searcher extends \Suilven\FreeTextSearch\Base\Searcher implements \Suilven\FreeTextSearch\Interfaces\Searcher
21
{
22
    /** @var \Suilven\ManticoreSearch\Service\Client */
23
    private $client;
24
25
    public function __construct()
26
    {
27
        $this->client = new Client();
28
    }
29
30
31
    public function search(?string $q): SearchResults
32
    {
33
        $q = \is_null($q)
34
            ? ''
35
            : $q;
36
        if ($this->searchType === SearchParamTypes::OR) {
37
            $q = $this->makeQueryOr($q);
38
        }
39
        $startTime = \microtime(true);
40
        $client = new Client();
41
        $manticoreClient = $client->getConnection();
42
43
        $searcher = new Search($manticoreClient);
44
        $searcher->setIndex($this->indexName);
45
46
        $searcher->limit($this->pageSize);
47
        $offset=$this->pageSize * ($this->page-1);
48
        $searcher->offset($offset);
49
50
        $indexes = new Indexes();
51
        $index = $indexes->getIndex($this->indexName);
52
53
        $searcher->highlight(
54
            [],
55
            ['pre_tags' => '<b>', 'post_tags'=>'</b>']
56
        );
57
58
       // $q = 'sheep';
59
        \error_log('Q: ' . $q);
60
61
        $manticoreResult = $searcher->search($q)->get();
62
        $allFields = $this->getAllFields($index);
63
64
65
        $ssResult = new ArrayList();
66
        while ($manticoreResult->valid()) {
67
            $hit = $manticoreResult->current();
68
            $source = $hit->getData();
69
            $ssDataObject = new DataObject();
70
71
            $this->populateSearchResult($ssDataObject, $allFields, $source);
72
73
            // manticore lowercases fields, so as above normalize them back to the SS fieldnames
74
            $highlights = $hit->getHighlight();
75
            $fieldsToHighlight = $index->getHighlightedFields();
76
            $this->addHighlights($ssDataObject, $allFields, $highlights, $fieldsToHighlight);
77
78
            $ssDataObject->ID = $hit->getId();
79
            $ssResult->push($ssDataObject);
80
            $manticoreResult->next();
81
        }
82
83
        // we now need to standardize the output returned
84
85
        $searchResults = new SearchResults();
86
        $searchResults->setRecords($ssResult);
87
        $searchResults->setPage($this->page);
88
        $searchResults->setPageSize($this->pageSize);
89
        $searchResults->setQuery($q);
90
        $searchResults->setTotalNumberOfResults($manticoreResult->getTotal());
91
92
        $endTime = \microtime(true);
93
        $delta = $endTime - $startTime;
94
        $delta = \round(1000*$delta)/1000;
95
        $searchResults->setTime($delta);
96
97
        return $searchResults;
98
    }
99
100
101
    /** @return array<string> */
102
    public function getAllFields(\Suilven\FreeTextSearch\Index $index): array
103
    {
104
        $allFields = \array_merge(
105
            $index->getFields(),
106
            $index->getTokens(),
107
            //$index->getHasManyFields(),
108
            $index->getHasOneFields(),
109
            $index->getStoredFields()
110
        );
111
112
        $hasManyFields = $index->getHasManyFields();
113
        foreach (\array_keys($hasManyFields) as $key) {
114
            $allFields[] = $key;
115
        }
116
117
        return $allFields;
118
    }
119
120
121
    public function refactorKeyName(string $keyname): string
122
    {
123
        // @todo This is a hack as $Title is rendering the ID in the template
124
        if ($keyname === 'Title') {
125
            $keyname = 'ResultTitle';
126
        } elseif ($keyname === 'link') {
127
            $keyname = 'Link';
128
        };
129
130
        return $keyname;
131
    }
132
133
134
    /** @param array<string> $allFields */
135
    public function matchKey(string $key, array $allFields): string
136
    {
137
        $keyname = $key;
138
        foreach ($allFields as $field) {
139
            if (\strtolower($field) === $key) {
140
                $keyname = $field;
141
142
                break;
143
            }
144
        }
145
146
        return $keyname;
147
    }
148
149
150
    /** @param \SilverStripe\ORM\DataObject $dataObject a dataObject relevant to the index */
151
    public function searchForSimilar(DataObject $dataObject): SearchResults
152
    {
153
        $helper = new SearchHelper();
154
        $indexedTextFields = $helper->getTextFieldPayload($dataObject);
155
        $textForCurrentIndex = $indexedTextFields[$this->indexName];
156
157
        // @todo Search by multiple fields?
158
        $amalgamatedText = '';
159
        foreach (\array_keys($textForCurrentIndex) as $fieldName) {
160
            $amalgamatedText .= $textForCurrentIndex[$fieldName] . ' ';
161
        }
162
163
        $this->searchType = SearchParamTypes::OR;
0 ignored issues
show
Bug Best Practice introduced by
The property searchType does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
164
        $text = $this->getLeastCommonTerms($amalgamatedText, 10);
165
166
        return $this->search($text);
167
    }
168
169
170
    /**
171
     * Find terms suitable for similarity searching
172
     *
173
     * @todo Rename this method, or separate into a helper?
174
     * @param string $text text of a document being searched for
175
     */
176
    private function getLeastCommonTerms(string $text, int $number = 20): string
177
    {
178
        $client = new Client();
179
        $connection = $client->getConnection();
180
        $params = [
181
            'index' => $this->indexName,
182
            'body' => [
183
                'query'=>$text,
184
                'options' => [
185
                    'stats' =>1,
186
                    'fold_lemmas' => 1,
187
                ],
188
            ],
189
        ];
190
191
        $keywords = $connection->keywords($params);
192
193
        /* @phpstan-ignore-next-line */
194
        \usort(
195
            $keywords,
196
            static function ($a, $b): void {
197
198
                ($a["docs"] <= $b["docs"])
199
                    ? -1
200
                    : +1;
201
            }
202
        );
203
204
        $wordInstances = [];
205
        $wordNDocs = [];
206
        foreach ($keywords as $entry) {
207
            // @todo this or normalized?
208
            $word = $entry['tokenized'];
209
210
            // if a word is unique to the source document, it is useless for finding other similar documents
211
            if ($entry['docs'] > 1) {
212
                if (!isset($wordInstances[$word])) {
213
                    $wordInstances[$word] = 0;
214
                }
215
                $wordInstances[$word] += 1;
216
            }
217
218
            $wordNDocs[$word] = $entry['docs'];
219
        }
220
221
        $toGlue = \array_keys($wordInstances);
222
        $toGlue = \array_slice($toGlue, 0, $number);
223
        $text = \implode(' ', $toGlue);
224
225
        return $text;
226
    }
227
228
229
    /**
230
     * Make a query OR instead of the default AND
231
     *
232
     * @param string $q the search query
233
     * @return string same query for with the terms separated by a | character,to form an OR query
234
     */
235
    private function makeQueryOr(string $q): string
236
    {
237
        $q = \trim($q);
238
        /** @var array<int, string> $splits */
239
        $splits = \preg_split('/\s+/', $q);
240
241
        return \implode('|', $splits);
242
    }
243
244
245
    /**
246
     * @param array<string> $allFields
247
     * @param array<string, string|int|float|bool> $source
248
     */
249
    private function populateSearchResult(DataObject &$ssDataObject, array $allFields, array $source): void
250
    {
251
        $keys = \array_keys($source);
252
        foreach ($keys as $key) {
253
            /** @var string $keyname */
254
            $keyname = $this->matchKey($key, $allFields);
255
            $keyname = $this->refactorKeyName($keyname);
256
257
            /** @phpstan-ignore-next-line */
258
            $ssDataObject->$keyname = $source[$key];
259
        }
260
    }
261
262
263
    /**
264
     * @param array<string> $allFields
265
     * @param array<array<string>> $highlights
266
     * @param array<string> $fieldsToHighlight
267
     */
268
    private function addHighlights(
269
        DataObject &$ssDataObject,
270
        array $allFields,
271
        array $highlights,
272
        array $fieldsToHighlight
273
    ): void {
274
        $highlightsSS = [];
275
        $lowercaseFieldsToHighlight = [];
276
        foreach ($fieldsToHighlight as $fieldname) {
277
            $lowercaseFieldsToHighlight[] = \strtolower($fieldname);
278
        }
279
280
        $keys = \array_keys($highlights);
281
        foreach ($keys as $key) {
282
            if (!isset($highlights[$key]) || !\in_array($key, $lowercaseFieldsToHighlight, true)) {
283
                continue;
284
            }
285
            $keyname = $key;
286
            foreach ($allFields as $field) {
287
                if (\strtolower($field) === $key) {
288
                    $keyname = $field;
289
290
                    continue;
291
                }
292
            }
293
294
            if ($key === 'link') {
295
                $keyname = 'Link';
296
            }
297
298
            $highlightsSS[$keyname] = $highlights[$key];
299
        }
300
301
        /** @phpstan-ignore-next-line */
302
        $ssDataObject->Highlights = $highlightsSS;
303
    }
304
}
305