Total Complexity | 45 |
Total Lines | 357 |
Duplicated Lines | 0 % |
Changes | 5 | ||
Bugs | 0 | Features | 0 |
Complex classes like Searcher often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Searcher, and based on these observations, apply Extract Interface, too.
1 | <?php declare(strict_types = 1); |
||
22 | class Searcher extends \Suilven\FreeTextSearch\Base\Searcher implements \Suilven\FreeTextSearch\Interfaces\Searcher |
||
23 | { |
||
24 | /** @var \Suilven\ManticoreSearch\Service\Client */ |
||
25 | private $client; |
||
26 | |||
27 | public function __construct() |
||
28 | { |
||
29 | $this->client = new Client(); |
||
30 | } |
||
31 | |||
32 | |||
33 | public function search(?string $q): SearchResults |
||
34 | { |
||
35 | $q = \is_null($q) |
||
36 | ? '' |
||
37 | : $q; |
||
38 | if ($this->searchType === SearchParamTypes::OR) { |
||
39 | $q = $this->makeQueryOr($q); |
||
40 | } |
||
41 | $startTime = \microtime(true); |
||
42 | $client = new Client(); |
||
43 | $manticoreClient = $client->getConnection(); |
||
44 | |||
45 | $searcher = new Search($manticoreClient); |
||
46 | $searcher->setIndex($this->indexName); |
||
47 | |||
48 | $searcher->limit($this->pageSize); |
||
49 | $offset=$this->pageSize * ($this->page-1); |
||
50 | $searcher->offset($offset); |
||
51 | |||
52 | $indexes = new Indexes(); |
||
53 | $index = $indexes->getIndex($this->indexName); |
||
54 | $hasManyFieldsDetails = $index->getHasManyFields(); |
||
55 | $hasManyFieldsNames = \array_keys($hasManyFieldsDetails); |
||
56 | $hasOneFieldsDetails = $index->getHasOneFields(); |
||
57 | $hasOneFieldsNames = \array_keys($hasOneFieldsDetails); |
||
58 | |||
59 | $searcher->highlight( |
||
60 | [], |
||
61 | ['pre_tags' => '<b>', 'post_tags'=>'</b>'] |
||
62 | ); |
||
63 | |||
64 | |||
65 | $fieldHelper = new FieldHelper(); |
||
66 | foreach ($this->filters as $key => $value) { |
||
67 | if ($key === 'q' || $key === 'start') { |
||
68 | continue; |
||
69 | } |
||
70 | $typedValue = $fieldHelper->getFieldValueCorrectlyTyped($index, $key, $value); |
||
71 | |||
72 | if (\in_array($key, $hasManyFieldsNames, true)) { |
||
73 | $searcher->filter($key, 'in', $typedValue); |
||
74 | } elseif (\in_array($key, $hasOneFieldsNames, true)) { |
||
75 | $searcher->filter($key, 'equals', ($typedValue)); |
||
76 | } else { |
||
77 | $searcher->filter($key, 'equals', $typedValue); |
||
78 | } |
||
79 | } |
||
80 | |||
81 | // @todo Deal with subsequent params |
||
82 | foreach ($this->facettedTokens as $facetName) { |
||
83 | // manticore errors out with no error message if the facet name is not lowercase. The second param is an |
||
84 | // alias, use the correctly capitalized version of the fact |
||
85 | $searcher->facet(\strtolower($facetName), $facetName, 1000); |
||
86 | } |
||
87 | |||
88 | // add has many |
||
89 | foreach ($this->hasManyTokens as $facetName) { |
||
90 | // manticore errors out with no error message if the facet name is not lowercase. The second param is an |
||
91 | // alias, use the correctly capitalized version of the fact |
||
92 | $searcher->facet(\strtolower($facetName), $facetName, 1000); |
||
93 | } |
||
94 | |||
95 | $manticoreResult = $searcher->search($q)->get(); |
||
96 | $allFields = $this->getAllFields($index); |
||
97 | |||
98 | $ssResult = new ArrayList(); |
||
99 | while ($manticoreResult->valid()) { |
||
100 | $hit = $manticoreResult->current(); |
||
101 | $source = $hit->getData(); |
||
102 | $ssDataObject = new DataObject(); |
||
103 | |||
104 | $this->populateSearchResult($ssDataObject, $allFields, $source); |
||
105 | |||
106 | // manticore lowercases fields, so as above normalize them back to the SS fieldnames |
||
107 | $highlights = $hit->getHighlight(); |
||
108 | $fieldsToHighlight = $index->getHighlightedFields(); |
||
109 | $this->addHighlights($ssDataObject, $allFields, $highlights, $fieldsToHighlight); |
||
110 | |||
111 | $ssDataObject->ID = $hit->getId(); |
||
112 | $ssResult->push($ssDataObject); |
||
113 | $manticoreResult->next(); |
||
114 | } |
||
115 | |||
116 | // we now need to standardize the output returned |
||
117 | |||
118 | $searchResults = new SearchResults(); |
||
119 | $searchResults->setRecords($ssResult); |
||
120 | $searchResults->setPage($this->page); |
||
121 | $searchResults->setPageSize($this->pageSize); |
||
122 | $searchResults->setQuery($q); |
||
123 | $searchResults->setTotalNumberOfResults($manticoreResult->getTotal()); |
||
124 | |||
125 | // create facet result objects |
||
126 | $manticoreFacets = $manticoreResult->getFacets(); |
||
127 | |||
128 | $hasManyFields = $index->getHasManyFields(); |
||
129 | |||
130 | if (!\is_null($manticoreFacets)) { |
||
131 | $facetTitles = \array_keys($manticoreFacets); |
||
132 | |||
133 | /** @var string $facetTitle */ |
||
134 | foreach ($facetTitles as $facetTitle) { |
||
135 | $facet = new Facet($facetTitle); |
||
136 | |||
137 | // the BY functionality of facets has not yet been implemented, as such database calls required |
||
138 | if (\in_array($facetTitle, $this->hasManyTokens, true)) { |
||
139 | $field = $hasManyFields[$facetTitle]['field']; |
||
140 | $clazz = $hasManyFields[$facetTitle]['class']; |
||
141 | |||
142 | foreach ($manticoreFacets[$facetTitle]['buckets'] as $count) { |
||
143 | $facetClassInstance = DataObject::get_by_id($clazz, $count['key']); |
||
144 | // @phpstan-ignore-next-line |
||
145 | $facet->addFacetCount($facetClassInstance->$field, $count['doc_count']); |
||
146 | } |
||
147 | } else { |
||
148 | // use values as is |
||
149 | foreach ($manticoreFacets[$facetTitle]['buckets'] as $count) { |
||
150 | $facet->addFacetCount($count['key'], $count['doc_count']); |
||
151 | } |
||
152 | } |
||
153 | |||
154 | |||
155 | $searchResults->addFacet($facet); |
||
156 | } |
||
157 | } |
||
158 | |||
159 | $endTime = \microtime(true); |
||
160 | $delta = $endTime - $startTime; |
||
161 | $delta = \round(1000*$delta)/1000; |
||
162 | $searchResults->setTime($delta); |
||
163 | |||
164 | return $searchResults; |
||
165 | } |
||
166 | |||
167 | |||
168 | /** @return array<array<string, string>|string> */ |
||
169 | public function getAllFields(\Suilven\FreeTextSearch\Index $index): array |
||
170 | { |
||
171 | $allFields = \array_merge( |
||
172 | $index->getFields(), |
||
173 | $index->getTokens(), |
||
174 | //$index->getHasManyFields(), |
||
175 | $index->getHasOneFields(), |
||
176 | $index->getStoredFields() |
||
177 | ); |
||
178 | |||
179 | $hasManyFields = $index->getHasManyFields(); |
||
180 | foreach (\array_keys($hasManyFields) as $key) { |
||
181 | $allFields[] = $key; |
||
182 | } |
||
183 | |||
184 | return $allFields; |
||
185 | } |
||
186 | |||
187 | |||
188 | public function refactorKeyName(string $keyname): string |
||
189 | { |
||
190 | // @todo This is a hack as $Title is rendering the ID in the template |
||
191 | if ($keyname === 'Title') { |
||
192 | $keyname = 'ResultTitle'; |
||
193 | } elseif ($keyname === 'link') { |
||
194 | $keyname = 'Link'; |
||
195 | }; |
||
196 | |||
197 | return $keyname; |
||
198 | } |
||
199 | |||
200 | |||
201 | /** @param array<array<string, string>|string> $allFields */ |
||
202 | public function matchKey(string $key, array $allFields): string |
||
203 | { |
||
204 | $keyname = $key; |
||
205 | foreach ($allFields as $field) { |
||
206 | $cf = \is_array($field) |
||
207 | ? $field['relationship'] |
||
208 | : $field; |
||
209 | |||
210 | if (\strtolower($cf) === $key) { |
||
211 | $keyname = $cf; |
||
212 | |||
213 | break; |
||
214 | } |
||
215 | } |
||
216 | |||
217 | return $keyname; |
||
218 | } |
||
219 | |||
220 | |||
221 | /** @param \SilverStripe\ORM\DataObject $dataObject a dataObject relevant to the index */ |
||
222 | public function searchForSimilar(DataObject $dataObject): SearchResults |
||
238 | } |
||
239 | |||
240 | |||
241 | /** |
||
242 | * Find terms suitable for similarity searching |
||
243 | * |
||
244 | * @todo Rename this method, or separate into a helper? |
||
245 | * @param string $text text of a document being searched for |
||
246 | */ |
||
247 | private function getLeastCommonTerms(string $text, int $number = 20): string |
||
248 | { |
||
249 | $client = new Client(); |
||
250 | $connection = $client->getConnection(); |
||
251 | $params = [ |
||
252 | 'index' => $this->indexName, |
||
253 | 'body' => [ |
||
254 | 'query'=>$text, |
||
255 | 'options' => [ |
||
256 | 'stats' =>1, |
||
257 | 'fold_lemmas' => 1, |
||
258 | ], |
||
259 | ], |
||
260 | ]; |
||
261 | |||
262 | $keywords = $connection->keywords($params); |
||
263 | |||
264 | /* @phpstan-ignore-next-line */ |
||
265 | \usort( |
||
266 | $keywords, |
||
267 | static function ($a, $b): void { |
||
268 | |||
269 | ($a["docs"] <= $b["docs"]) |
||
270 | ? -1 |
||
271 | : +1; |
||
272 | } |
||
273 | ); |
||
274 | |||
275 | $wordInstances = []; |
||
276 | $wordNDocs = []; |
||
277 | foreach ($keywords as $entry) { |
||
278 | // @todo this or normalized? |
||
279 | $word = $entry['tokenized']; |
||
280 | |||
281 | // if a word is unique to the source document, it is useless for finding other similar documents |
||
282 | if ($entry['docs'] > 1) { |
||
283 | if (!isset($wordInstances[$word])) { |
||
284 | $wordInstances[$word] = 0; |
||
285 | } |
||
286 | $wordInstances[$word] += 1; |
||
287 | } |
||
288 | |||
289 | $wordNDocs[$word] = $entry['docs']; |
||
290 | } |
||
291 | |||
292 | $toGlue = \array_keys($wordInstances); |
||
293 | $toGlue = \array_slice($toGlue, 0, $number); |
||
294 | $text = \implode(' ', $toGlue); |
||
295 | |||
296 | return $text; |
||
297 | } |
||
298 | |||
299 | |||
300 | /** |
||
301 | * Make a query OR instead of the default AND |
||
302 | * |
||
303 | * @param string $q the search query |
||
304 | * @return string same query for with the terms separated by a | character,to form an OR query |
||
305 | */ |
||
306 | private function makeQueryOr(string $q): string |
||
307 | { |
||
308 | $q = \trim($q); |
||
309 | /** @var array<int, string> $splits */ |
||
310 | $splits = \preg_split('/\s+/', $q); |
||
311 | |||
312 | return \implode('|', $splits); |
||
313 | } |
||
314 | |||
315 | |||
316 | /** |
||
317 | * @param array<string> $allFields |
||
318 | * @param array<array<string, string>|string> $allFields |
||
319 | * @param array<string, string|int|float|bool> $source |
||
320 | */ |
||
321 | private function populateSearchResult(DataObject &$ssDataObject, array $allFields, array $source): void |
||
331 | } |
||
332 | } |
||
333 | |||
334 | |||
335 | /** |
||
336 | * @param array<array<string, string>|string> $allFields |
||
337 | * @param array<array<string>> $highlights |
||
338 | * @param array<string> $fieldsToHighlight |
||
339 | */ |
||
340 | private function addHighlights( |
||
379 | } |
||
380 | } |
||
381 |