Passed
Push — master ( 6deaee...64bf45 )
by Gordon
04:46 queued 02:47
created

Searcher::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 3
rs 10
1
<?php declare(strict_types = 1);
2
3
/**
4
 * Created by PhpStorm.
5
 * User: gordon
6
 * Date: 25/3/2561
7
 * Time: 1:35 น.
8
 */
9
10
namespace Suilven\ManticoreSearch\Service;
11
12
use Manticoresearch\Search;
13
use SilverStripe\ORM\ArrayList;
14
use SilverStripe\ORM\DataObject;
15
use Suilven\FreeTextSearch\Container\Facet;
16
use Suilven\FreeTextSearch\Container\SearchResults;
17
use Suilven\FreeTextSearch\Helper\SearchHelper;
18
use Suilven\FreeTextSearch\Indexes;
19
use Suilven\FreeTextSearch\Types\SearchParamTypes;
20
21
class Searcher extends \Suilven\FreeTextSearch\Base\Searcher implements \Suilven\FreeTextSearch\Interfaces\Searcher
22
{
23
    /** @var \Suilven\ManticoreSearch\Service\Client */
24
    private $client;
25
26
    public function __construct()
27
    {
28
        $this->client = new Client();
29
    }
30
31
32
    public function search(?string $q): SearchResults
33
    {
34
        $q = \is_null($q)
35
            ? ''
36
            : $q;
37
        if ($this->searchType === SearchParamTypes::OR) {
38
            $q = $this->makeQueryOr($q);
39
        }
40
        $startTime = \microtime(true);
41
        $client = new Client();
42
        $manticoreClient = $client->getConnection();
43
44
        $searcher = new Search($manticoreClient);
45
        $searcher->setIndex($this->indexName);
46
47
        $searcher->limit($this->pageSize);
48
        $offset=$this->pageSize * ($this->page-1);
49
        $searcher->offset($offset);
50
51
        $indexes = new Indexes();
52
        $index = $indexes->getIndex($this->indexName);
53
54
        $searcher->highlight(
55
            [],
56
            ['pre_tags' => '<b>', 'post_tags'=>'</b>']
57
        );
58
59
        // @todo Deal with subsequent params
60
        foreach ($this->facettedTokens as $facetName) {
61
            // manticore errors out with no error message if the facet name is not lowercase.  The second param is an
62
            // alias, use the correctly capitalized version of the fact
63
            $searcher->facet(\strtolower($facetName), $facetName);
64
        }
65
66
        $manticoreResult = $searcher->search($q)->get();
67
        $allFields = $this->getAllFields($index);
68
69
        $ssResult = new ArrayList();
70
        while ($manticoreResult->valid()) {
71
            $hit = $manticoreResult->current();
72
            $source = $hit->getData();
73
            $ssDataObject = new DataObject();
74
75
            $this->populateSearchResult($ssDataObject, $allFields, $source);
76
77
            // manticore lowercases fields, so as above normalize them back to the SS fieldnames
78
            $highlights = $hit->getHighlight();
79
            $fieldsToHighlight = $index->getHighlightedFields();
80
            $this->addHighlights($ssDataObject, $allFields, $highlights, $fieldsToHighlight);
81
82
            $ssDataObject->ID = $hit->getId();
83
            $ssResult->push($ssDataObject);
84
            $manticoreResult->next();
85
        }
86
87
        // we now need to standardize the output returned
88
89
        $searchResults = new SearchResults();
90
        $searchResults->setRecords($ssResult);
91
        $searchResults->setPage($this->page);
92
        $searchResults->setPageSize($this->pageSize);
93
        $searchResults->setQuery($q);
94
        $searchResults->setTotalNumberOfResults($manticoreResult->getTotal());
95
96
        // create facet result objects
97
        $manticoreFacets = $manticoreResult->getFacets();
98
        
99
        if (!\is_null($manticoreFacets)) {
100
            $facetTitles = \array_keys($manticoreFacets);
101
102
            /** @var string $facetTitle */
103
            foreach ($facetTitles as $facetTitle) {
104
                $facet = new Facet($facetTitle);
105
                foreach ($manticoreFacets[$facetTitle]['buckets'] as $count) {
106
                    $facet->addFacetCount($count['key'], $count['doc_count']);
107
                }
108
                $searchResults->addFacet($facet);
109
            }
110
        }
111
112
        $endTime = \microtime(true);
113
        $delta = $endTime - $startTime;
114
        $delta = \round(1000*$delta)/1000;
115
        $searchResults->setTime($delta);
116
117
        return $searchResults;
118
    }
119
120
121
    /** @return array<string> */
122
    public function getAllFields(\Suilven\FreeTextSearch\Index $index): array
123
    {
124
        $allFields = \array_merge(
125
            $index->getFields(),
126
            $index->getTokens(),
127
            //$index->getHasManyFields(),
128
            $index->getHasOneFields(),
129
            $index->getStoredFields()
130
        );
131
132
        $hasManyFields = $index->getHasManyFields();
133
        foreach (\array_keys($hasManyFields) as $key) {
134
            $allFields[] = $key;
135
        }
136
137
        return $allFields;
138
    }
139
140
141
    public function refactorKeyName(string $keyname): string
142
    {
143
        // @todo This is a hack as $Title is rendering the ID in the template
144
        if ($keyname === 'Title') {
145
            $keyname = 'ResultTitle';
146
        } elseif ($keyname === 'link') {
147
            $keyname = 'Link';
148
        };
149
150
        return $keyname;
151
    }
152
153
154
    /** @param array<string> $allFields */
155
    public function matchKey(string $key, array $allFields): string
156
    {
157
        $keyname = $key;
158
        foreach ($allFields as $field) {
159
            if (\strtolower($field) === $key) {
160
                $keyname = $field;
161
162
                break;
163
            }
164
        }
165
166
        return $keyname;
167
    }
168
169
170
    /** @param \SilverStripe\ORM\DataObject $dataObject a dataObject relevant to the index */
171
    public function searchForSimilar(DataObject $dataObject): SearchResults
172
    {
173
        $helper = new SearchHelper();
174
        $indexedTextFields = $helper->getTextFieldPayload($dataObject);
175
        $textForCurrentIndex = $indexedTextFields[$this->indexName];
176
177
        // @todo Search by multiple fields?
178
        $amalgamatedText = '';
179
        foreach (\array_keys($textForCurrentIndex) as $fieldName) {
180
            $amalgamatedText .= $textForCurrentIndex[$fieldName] . ' ';
181
        }
182
183
        $this->searchType = SearchParamTypes::OR;
184
        $text = $this->getLeastCommonTerms($amalgamatedText, 10);
185
186
        return $this->search($text);
187
    }
188
189
190
    /**
191
     * Find terms suitable for similarity searching
192
     *
193
     * @todo Rename this method, or separate into a helper?
194
     * @param string $text text of a document being searched for
195
     */
196
    private function getLeastCommonTerms(string $text, int $number = 20): string
197
    {
198
        $client = new Client();
199
        $connection = $client->getConnection();
200
        $params = [
201
            'index' => $this->indexName,
202
            'body' => [
203
                'query'=>$text,
204
                'options' => [
205
                    'stats' =>1,
206
                    'fold_lemmas' => 1,
207
                ],
208
            ],
209
        ];
210
211
        $keywords = $connection->keywords($params);
212
213
        /* @phpstan-ignore-next-line */
214
        \usort(
215
            $keywords,
216
            static function ($a, $b): void {
217
218
                ($a["docs"] <= $b["docs"])
219
                    ? -1
220
                    : +1;
221
            }
222
        );
223
224
        $wordInstances = [];
225
        $wordNDocs = [];
226
        foreach ($keywords as $entry) {
227
            // @todo this or normalized?
228
            $word = $entry['tokenized'];
229
230
            // if a word is unique to the source document, it is useless for finding other similar documents
231
            if ($entry['docs'] > 1) {
232
                if (!isset($wordInstances[$word])) {
233
                    $wordInstances[$word] = 0;
234
                }
235
                $wordInstances[$word] += 1;
236
            }
237
238
            $wordNDocs[$word] = $entry['docs'];
239
        }
240
241
        $toGlue = \array_keys($wordInstances);
242
        $toGlue = \array_slice($toGlue, 0, $number);
243
        $text = \implode(' ', $toGlue);
244
245
        return $text;
246
    }
247
248
249
    /**
250
     * Make a query OR instead of the default AND
251
     *
252
     * @param string $q the search query
253
     * @return string same query for with the terms separated by a | character,to form an OR query
254
     */
255
    private function makeQueryOr(string $q): string
256
    {
257
        $q = \trim($q);
258
        /** @var array<int, string> $splits */
259
        $splits = \preg_split('/\s+/', $q);
260
261
        return \implode('|', $splits);
262
    }
263
264
265
    /**
266
     * @param array<string> $allFields
267
     * @param array<string, string|int|float|bool> $source
268
     */
269
    private function populateSearchResult(DataObject &$ssDataObject, array $allFields, array $source): void
270
    {
271
        $keys = \array_keys($source);
272
        foreach ($keys as $key) {
273
            /** @var string $keyname */
274
            $keyname = $this->matchKey($key, $allFields);
275
            $keyname = $this->refactorKeyName($keyname);
276
277
            /** @phpstan-ignore-next-line */
278
            $ssDataObject->$keyname = $source[$key];
279
        }
280
    }
281
282
283
    /**
284
     * @param array<string> $allFields
285
     * @param array<array<string>> $highlights
286
     * @param array<string> $fieldsToHighlight
287
     */
288
    private function addHighlights(
289
        DataObject &$ssDataObject,
290
        array $allFields,
291
        array $highlights,
292
        array $fieldsToHighlight
293
    ): void {
294
        $highlightsSS = [];
295
        $lowercaseFieldsToHighlight = [];
296
        foreach ($fieldsToHighlight as $fieldname) {
297
            $lowercaseFieldsToHighlight[] = \strtolower($fieldname);
298
        }
299
300
        $keys = \array_keys($highlights);
301
        foreach ($keys as $key) {
302
            if (!isset($highlights[$key]) || !\in_array($key, $lowercaseFieldsToHighlight, true)) {
303
                continue;
304
            }
305
            $keyname = $key;
306
            foreach ($allFields as $field) {
307
                if (\strtolower($field) === $key) {
308
                    $keyname = $field;
309
310
                    continue;
311
                }
312
            }
313
314
            if ($key === 'link') {
315
                $keyname = 'Link';
316
            }
317
318
            $highlightsSS[$keyname] = $highlights[$key];
319
        }
320
321
        /** @phpstan-ignore-next-line */
322
        $ssDataObject->Highlights = $highlightsSS;
323
    }
324
}
325