| Total Complexity | 45 |
| Total Lines | 357 |
| Duplicated Lines | 0 % |
| Changes | 11 | ||
| Bugs | 2 | Features | 0 |
Complex classes like Searcher often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Searcher, and based on these observations, apply Extract Interface, too.
| 1 | <?php declare(strict_types = 1); |
||
| 22 | class Searcher extends \Suilven\FreeTextSearch\Base\Searcher implements \Suilven\FreeTextSearch\Interfaces\Searcher |
||
| 23 | { |
||
| 24 | /** @var \Suilven\ManticoreSearch\Service\Client */ |
||
| 25 | private $client; |
||
| 26 | |||
| 27 | public function __construct() |
||
| 28 | { |
||
| 29 | $this->client = new Client(); |
||
| 30 | } |
||
| 31 | |||
| 32 | |||
| 33 | public function search(?string $q): SearchResults |
||
| 34 | { |
||
| 35 | $q = \is_null($q) |
||
| 36 | ? '' |
||
| 37 | : $q; |
||
| 38 | if ($this->searchType === SearchParamTypes::OR) { |
||
| 39 | $q = $this->makeQueryOr($q); |
||
| 40 | } |
||
| 41 | $startTime = \microtime(true); |
||
| 42 | $client = new Client(); |
||
| 43 | $manticoreClient = $client->getConnection(); |
||
| 44 | |||
| 45 | $searcher = new Search($manticoreClient); |
||
| 46 | $searcher->setIndex($this->indexName); |
||
| 47 | |||
| 48 | $searcher->limit($this->pageSize); |
||
| 49 | $offset=$this->pageSize * ($this->page-1); |
||
| 50 | $searcher->offset($offset); |
||
| 51 | |||
| 52 | $indexes = new Indexes(); |
||
| 53 | $index = $indexes->getIndex($this->indexName); |
||
| 54 | $hasManyFieldsDetails = $index->getHasManyFields(); |
||
| 55 | $hasManyFieldsNames = \array_keys($hasManyFieldsDetails); |
||
| 56 | $hasOneFieldsDetails = $index->getHasOneFields(); |
||
| 57 | $hasOneFieldsNames = \array_keys($hasOneFieldsDetails); |
||
| 58 | |||
| 59 | $searcher->highlight( |
||
| 60 | [], |
||
| 61 | ['pre_tags' => '<b>', 'post_tags'=>'</b>'] |
||
| 62 | ); |
||
| 63 | |||
| 64 | |||
| 65 | $fieldHelper = new FieldHelper(); |
||
| 66 | foreach ($this->filters as $key => $value) { |
||
| 67 | if ($key === 'q' || $key === 'start') { |
||
| 68 | continue; |
||
| 69 | } |
||
| 70 | $typedValue = $fieldHelper->getFieldValueCorrectlyTyped($index, $key, $value); |
||
| 71 | |||
| 72 | if (\in_array($key, $hasManyFieldsNames, true)) { |
||
| 73 | $searcher->filter($key, 'in', $typedValue); |
||
| 74 | } elseif (\in_array($key, $hasOneFieldsNames, true)) { |
||
| 75 | $searcher->filter($key, 'equals', ($typedValue)); |
||
| 76 | } else { |
||
| 77 | $searcher->filter($key, 'equals', $typedValue); |
||
| 78 | } |
||
| 79 | } |
||
| 80 | |||
| 81 | // @todo Deal with subsequent params |
||
| 82 | foreach ($this->facettedTokens as $facetName) { |
||
| 83 | // manticore errors out with no error message if the facet name is not lowercase. The second param is an |
||
| 84 | // alias, use the correctly capitalized version of the fact |
||
| 85 | $searcher->facet(\strtolower($facetName), $facetName, 1000); |
||
| 86 | } |
||
| 87 | |||
| 88 | // add has many |
||
| 89 | foreach ($this->hasManyTokens as $facetName) { |
||
| 90 | // manticore errors out with no error message if the facet name is not lowercase. The second param is an |
||
| 91 | // alias, use the correctly capitalized version of the fact |
||
| 92 | $searcher->facet(\strtolower($facetName), $facetName, 1000); |
||
| 93 | } |
||
| 94 | |||
| 95 | $manticoreResult = $searcher->search($q)->get(); |
||
| 96 | $allFields = $this->getAllFields($index); |
||
| 97 | |||
| 98 | $ssResult = new ArrayList(); |
||
| 99 | while ($manticoreResult->valid()) { |
||
| 100 | $hit = $manticoreResult->current(); |
||
| 101 | $source = $hit->getData(); |
||
| 102 | $ssDataObject = new DataObject(); |
||
| 103 | |||
| 104 | $this->populateSearchResult($ssDataObject, $allFields, $source); |
||
| 105 | |||
| 106 | // manticore lowercases fields, so as above normalize them back to the SS fieldnames |
||
| 107 | $highlights = $hit->getHighlight(); |
||
| 108 | $fieldsToHighlight = $index->getHighlightedFields(); |
||
| 109 | $this->addHighlights($ssDataObject, $allFields, $highlights, $fieldsToHighlight); |
||
| 110 | |||
| 111 | $ssDataObject->ID = $hit->getId(); |
||
| 112 | $ssResult->push($ssDataObject); |
||
| 113 | $manticoreResult->next(); |
||
| 114 | } |
||
| 115 | |||
| 116 | // we now need to standardize the output returned |
||
| 117 | |||
| 118 | $searchResults = new SearchResults(); |
||
| 119 | $searchResults->setRecords($ssResult); |
||
| 120 | $searchResults->setPage($this->page); |
||
| 121 | $searchResults->setPageSize($this->pageSize); |
||
| 122 | $searchResults->setQuery($q); |
||
| 123 | $searchResults->setTotalNumberOfResults($manticoreResult->getTotal()); |
||
| 124 | |||
| 125 | // create facet result objects |
||
| 126 | $manticoreFacets = $manticoreResult->getFacets(); |
||
| 127 | |||
| 128 | $hasManyFields = $index->getHasManyFields(); |
||
| 129 | |||
| 130 | if (!\is_null($manticoreFacets)) { |
||
| 131 | $facetTitles = \array_keys($manticoreFacets); |
||
| 132 | |||
| 133 | /** @var string $facetTitle */ |
||
| 134 | foreach ($facetTitles as $facetTitle) { |
||
| 135 | $facet = new Facet($facetTitle); |
||
| 136 | |||
| 137 | // the BY functionality of facets has not yet been implemented, as such database calls required |
||
| 138 | if (\in_array($facetTitle, $this->hasManyTokens, true)) { |
||
| 139 | $field = $hasManyFields[$facetTitle]['field']; |
||
| 140 | $clazz = $hasManyFields[$facetTitle]['class']; |
||
| 141 | |||
| 142 | foreach ($manticoreFacets[$facetTitle]['buckets'] as $count) { |
||
| 143 | $facetClassInstance = DataObject::get_by_id($clazz, $count['key']); |
||
| 144 | // @phpstan-ignore-next-line |
||
| 145 | $facet->addFacetCount($facetClassInstance->$field, $count['doc_count']); |
||
| 146 | } |
||
| 147 | } else { |
||
| 148 | // use values as is |
||
| 149 | foreach ($manticoreFacets[$facetTitle]['buckets'] as $count) { |
||
| 150 | $facet->addFacetCount($count['key'], $count['doc_count']); |
||
| 151 | } |
||
| 152 | } |
||
| 153 | |||
| 154 | |||
| 155 | $searchResults->addFacet($facet); |
||
| 156 | } |
||
| 157 | } |
||
| 158 | |||
| 159 | $endTime = \microtime(true); |
||
| 160 | $delta = $endTime - $startTime; |
||
| 161 | $delta = \round(1000*$delta)/1000; |
||
| 162 | $searchResults->setTime($delta); |
||
| 163 | |||
| 164 | return $searchResults; |
||
| 165 | } |
||
| 166 | |||
| 167 | |||
| 168 | /** @return array<array<string, string>|string> */ |
||
| 169 | public function getAllFields(\Suilven\FreeTextSearch\Index $index): array |
||
| 170 | { |
||
| 171 | $allFields = \array_merge( |
||
| 172 | $index->getFields(), |
||
| 173 | $index->getTokens(), |
||
| 174 | //$index->getHasManyFields(), |
||
| 175 | $index->getHasOneFields(), |
||
| 176 | $index->getStoredFields() |
||
| 177 | ); |
||
| 178 | |||
| 179 | $hasManyFields = $index->getHasManyFields(); |
||
| 180 | foreach (\array_keys($hasManyFields) as $key) { |
||
| 181 | $allFields[] = $key; |
||
| 182 | } |
||
| 183 | |||
| 184 | return $allFields; |
||
| 185 | } |
||
| 186 | |||
| 187 | |||
| 188 | public function refactorKeyName(string $keyname): string |
||
| 189 | { |
||
| 190 | // @todo This is a hack as $Title is rendering the ID in the template |
||
| 191 | if ($keyname === 'Title') { |
||
| 192 | $keyname = 'ResultTitle'; |
||
| 193 | } elseif ($keyname === 'link') { |
||
| 194 | $keyname = 'Link'; |
||
| 195 | }; |
||
| 196 | |||
| 197 | return $keyname; |
||
| 198 | } |
||
| 199 | |||
| 200 | |||
| 201 | /** @param array<array<string, string>|string> $allFields */ |
||
| 202 | public function matchKey(string $key, array $allFields): string |
||
| 218 | } |
||
| 219 | |||
| 220 | |||
| 221 | /** @param \SilverStripe\ORM\DataObject $dataObject a dataObject relevant to the index */ |
||
| 222 | public function searchForSimilar(DataObject $dataObject): SearchResults |
||
| 223 | { |
||
| 224 | $helper = new SearchHelper(); |
||
| 225 | $indexedTextFields = $helper->getTextFieldPayload($dataObject); |
||
| 226 | $textForCurrentIndex = $indexedTextFields[$this->indexName]; |
||
| 227 | |||
| 228 | // @todo Search by multiple fields? |
||
| 229 | $amalgamatedText = ''; |
||
| 230 | foreach (\array_keys($textForCurrentIndex) as $fieldName) { |
||
| 231 | $amalgamatedText .= $textForCurrentIndex[$fieldName] . ' '; |
||
| 232 | } |
||
| 233 | |||
| 234 | $this->searchType = SearchParamTypes::OR; |
||
| 235 | $text = $this->getLeastCommonTerms($amalgamatedText, 10); |
||
| 236 | |||
| 237 | return $this->search($text); |
||
| 238 | } |
||
| 239 | |||
| 240 | |||
| 241 | /** |
||
| 242 | * Find terms suitable for similarity searching |
||
| 243 | * |
||
| 244 | * @todo Rename this method, or separate into a helper? |
||
| 245 | * @param string $text text of a document being searched for |
||
| 246 | */ |
||
| 247 | private function getLeastCommonTerms(string $text, int $number = 20): string |
||
| 248 | { |
||
| 249 | $client = new Client(); |
||
| 250 | $connection = $client->getConnection(); |
||
| 251 | $params = [ |
||
| 252 | 'index' => $this->indexName, |
||
| 253 | 'body' => [ |
||
| 254 | 'query'=>$text, |
||
| 255 | 'options' => [ |
||
| 256 | 'stats' =>1, |
||
| 257 | 'fold_lemmas' => 1, |
||
| 258 | ], |
||
| 259 | ], |
||
| 260 | ]; |
||
| 261 | |||
| 262 | $keywords = $connection->keywords($params); |
||
| 263 | |||
| 264 | /* @phpstan-ignore-next-line */ |
||
| 265 | \usort( |
||
| 266 | $keywords, |
||
| 267 | static function ($a, $b): void { |
||
| 268 | |||
| 269 | ($a["docs"] <= $b["docs"]) |
||
| 270 | ? -1 |
||
| 271 | : +1; |
||
| 272 | } |
||
| 273 | ); |
||
| 274 | |||
| 275 | $wordInstances = []; |
||
| 276 | $wordNDocs = []; |
||
| 277 | foreach ($keywords as $entry) { |
||
| 278 | // @todo this or normalized? |
||
| 279 | $word = $entry['tokenized']; |
||
| 280 | |||
| 281 | // if a word is unique to the source document, it is useless for finding other similar documents |
||
| 282 | if ($entry['docs'] > 1) { |
||
| 283 | if (!isset($wordInstances[$word])) { |
||
| 284 | $wordInstances[$word] = 0; |
||
| 285 | } |
||
| 286 | $wordInstances[$word] += 1; |
||
| 287 | } |
||
| 288 | |||
| 289 | $wordNDocs[$word] = $entry['docs']; |
||
| 290 | } |
||
| 291 | |||
| 292 | $toGlue = \array_keys($wordInstances); |
||
| 293 | $toGlue = \array_slice($toGlue, 0, $number); |
||
| 294 | $text = \implode(' ', $toGlue); |
||
| 295 | |||
| 296 | return $text; |
||
| 297 | } |
||
| 298 | |||
| 299 | |||
| 300 | /** |
||
| 301 | * Make a query OR instead of the default AND |
||
| 302 | * |
||
| 303 | * @param string $q the search query |
||
| 304 | * @return string same query for with the terms separated by a | character,to form an OR query |
||
| 305 | */ |
||
| 306 | private function makeQueryOr(string $q): string |
||
| 307 | { |
||
| 308 | $q = \trim($q); |
||
| 309 | /** @var array<int, string> $splits */ |
||
| 310 | $splits = \preg_split('/\s+/', $q); |
||
| 311 | |||
| 312 | return \implode('|', $splits); |
||
| 313 | } |
||
| 314 | |||
| 315 | |||
| 316 | /** |
||
| 317 | * @param array<string> $allFields |
||
| 318 | * @param array<array<string, string>|string> $allFields |
||
| 319 | * @param array<string, string|int|float|bool> $source |
||
| 320 | */ |
||
| 321 | private function populateSearchResult(DataObject &$ssDataObject, array $allFields, array $source): void |
||
| 322 | { |
||
| 323 | $keys = \array_keys($source); |
||
| 324 | foreach ($keys as $key) { |
||
| 325 | /** @var string $keyname */ |
||
| 326 | $keyname = $this->matchKey($key, $allFields); |
||
| 327 | $keyname = $this->refactorKeyName($keyname); |
||
| 328 | |||
| 329 | /** @phpstan-ignore-next-line */ |
||
| 330 | $ssDataObject->$keyname = $source[$key]; |
||
| 331 | } |
||
| 332 | } |
||
| 333 | |||
| 334 | |||
| 335 | /** |
||
| 336 | * @param array<array<string, string>|string> $allFields |
||
| 337 | * @param array<array<string>> $highlights |
||
| 338 | * @param array<string> $fieldsToHighlight |
||
| 339 | */ |
||
| 340 | private function addHighlights( |
||
| 379 | } |
||
| 380 | } |
||
| 381 |