Complex classes like QueryGenerator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use QueryGenerator, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 16 | class QueryGenerator { |
||
| 17 | |||
| 18 | /* The term to search for */ |
||
| 19 | private $queryText = ''; |
||
| 20 | |||
| 21 | /* Fields to search for as an array of Name to weighting, otherwise null for all, ie not |
||
| 22 | a multi match query */ |
||
| 23 | private $fields = null; |
||
| 24 | |||
| 25 | /* Aggregations already selected in format array(key => value), e.g. array('ISO' => 400) */ |
||
| 26 | private $selectedFilters = null; |
||
| 27 | |||
| 28 | /* For an empty query, show results or not */ |
||
| 29 | private $showResultsForEmptyQuery = false; |
||
| 30 | |||
| 31 | /* Manipulator to be used for aggregations */ |
||
| 32 | private $manipulator = null; |
||
| 33 | |||
| 34 | /* The length of a page of results */ |
||
| 35 | private $pageLength = 10; |
||
| 36 | |||
| 37 | /* Where to start, normally a multiple of pageLength */ |
||
| 38 | private $start = 0; |
||
| 39 | |||
| 40 | /* Cache hit counter for test purposes */ |
||
| 41 | private static $cacheHitCtr = 0; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Comma separated list of SilverStripe ClassNames to search. Leave blank for all |
||
| 45 | * @var string |
||
| 46 | */ |
||
| 47 | private $classes = ''; |
||
| 48 | |||
| 49 | |||
| 50 | public function setQueryText($newQueryText) { |
||
| 51 | $this->queryText = $newQueryText; |
||
| 52 | } |
||
| 53 | |||
| 54 | |||
| 55 | public function setFields($newFields) { |
||
| 56 | $this->fields = $newFields; |
||
| 57 | } |
||
| 58 | |||
| 59 | |||
| 60 | public function setSelectedFilters($newSelectedFilters) { |
||
| 61 | $this->selectedFilters = $newSelectedFilters; |
||
| 62 | } |
||
| 63 | |||
| 64 | |||
| 65 | public function setShowResultsForEmptyQuery($newShowResultsForEmptyQuery) { |
||
| 66 | $this->showResultsForEmptyQuery = $newShowResultsForEmptyQuery; |
||
| 67 | } |
||
| 68 | |||
| 69 | |||
| 70 | public function getShowResultsForEmptyQuery() { |
||
| 73 | |||
| 74 | |||
| 75 | public function setPageLength($newPageLength) { |
||
| 76 | $this->pageLength = $newPageLength; |
||
| 77 | } |
||
| 78 | |||
| 79 | |||
| 80 | public function setStart($newStart) { |
||
| 81 | $this->start = $newStart; |
||
| 82 | } |
||
| 83 | |||
| 84 | |||
| 85 | /** |
||
| 86 | * Update the list of Classes to search, use SilverStripe ClassName comma separated |
||
| 87 | * @param string $newClasses comma separated list of SilverStripe ClassNames |
||
| 88 | */ |
||
| 89 | public function setClasses($newClasses) { |
||
| 90 | $this->classes = $newClasses; |
||
| 91 | } |
||
| 92 | |||
| 93 | |||
| 94 | |||
| 95 | /** |
||
| 96 | * Set the manipulator, mainly used for aggregation |
||
| 97 | * @param string $newManipulator manipulator used for aggregation, must implement ElasticaSearchHelper |
||
| 98 | */ |
||
| 99 | public function setQueryResultManipulator($newManipulator) { |
||
| 100 | $this->manipulator = $newManipulator; |
||
| 101 | } |
||
| 102 | |||
| 103 | |||
| 104 | /* |
||
| 105 | Accessor to cache hit counter, for testing purposes |
||
| 106 | */ |
||
| 107 | public static function getCacheHitCounter() { |
||
| 110 | |||
| 111 | |||
| 112 | public static function resetCacheHitCounter() { |
||
| 115 | |||
| 116 | |||
| 117 | /** |
||
| 118 | * From the input variables create a suitable query using Elastica. This is somewhat complex |
||
| 119 | * due to different formats with and without query text, with and without filters, with and |
||
| 120 | * without selected filters. Extracting this logic into a separate class makes testing much |
||
| 121 | * faster and can be used for testing new cases |
||
| 122 | * |
||
| 123 | * @return \Elastica\Query Query object suitable for searching using the Elastica library |
||
| 124 | */ |
||
| 125 | public function generateElasticaQuery() { |
||
| 126 | $queryTextExists = ($this->queryText != ''); |
||
| 127 | $isMultiMatch = ($this->fields != null); |
||
| 128 | |||
| 129 | if ($this->selectedFilters == null) { |
||
| 130 | $this->selectedFilters = array(); |
||
| 131 | } |
||
| 132 | |||
| 133 | $this->manipulatorInstance = null; |
||
| 134 | if ($this->manipulator) { |
||
| 135 | $this->manipulatorInstance = \Injector::inst()->create($this->manipulator); |
||
| 136 | $this->manipulatorInstance->queryGenerator = $this; |
||
| 137 | $this->manipulatorInstance->originalQueryString = $this->queryText; |
||
| 138 | } |
||
| 139 | |||
| 140 | //This is a query_string object |
||
| 141 | $textQuery = null; |
||
| 142 | |||
| 143 | if (!$isMultiMatch) { |
||
| 144 | $textQuery = $this->simpleTextQuery(); |
||
| 145 | } else { |
||
| 146 | $textQuery = $this->multiMatchQuery(); |
||
| 147 | } |
||
| 148 | |||
| 149 | $query = $this->addFilters($textQuery); |
||
|
|
|||
| 150 | |||
| 151 | //This ensures that the original query text is available to search helpers |
||
| 152 | $query->OriginalQueryText = $this->queryText; |
||
| 153 | |||
| 154 | //This needs to be query object of some form |
||
| 155 | $this->addAggregation($query); |
||
| 156 | |||
| 157 | |||
| 158 | // pagination |
||
| 159 | $query->setSize($this->pageLength); |
||
| 160 | $query->setFrom($this->start); |
||
| 161 | |||
| 162 | if ($this->manipulatorInstance && !$queryTextExists) { |
||
| 163 | $sort = $this->manipulatorInstance->getDefaultSort(); |
||
| 164 | $query->setSort($sort); |
||
| 165 | } |
||
| 166 | |||
| 167 | // Add suggestions in case required for poor spellings or typos |
||
| 168 | $suggest = new \Elastica\Suggest(); |
||
| 169 | $phrase = new \Elastica\Suggest\Phrase('query-phrase-suggestions', '_all'); |
||
| 170 | $phrase->setText($this->queryText)->setSize(4); |
||
| 171 | |||
| 172 | $highlightsCfg = \Config::inst()->get('Elastica', 'Highlights'); |
||
| 173 | $preTags = $highlightsCfg['PreTags']; |
||
| 174 | $postTags = $highlightsCfg['PostTags']; |
||
| 175 | |||
| 176 | $phrase->setHighlight($preTags, $postTags); |
||
| 177 | $suggest->addSuggestion($phrase); |
||
| 178 | $query->setSuggest($suggest); |
||
| 179 | return $query; |
||
| 180 | } |
||
| 181 | |||
| 182 | |||
| 183 | /** |
||
| 184 | * Generate a query for autocomplete |
||
| 185 | * @return \Elastica\Query Autocompletion query for use with Elastica library |
||
| 186 | */ |
||
| 187 | public function generateElasticaAutocompleteQuery() { |
||
| 221 | |||
| 222 | |||
| 223 | |||
| 224 | |||
| 225 | |||
| 226 | /** |
||
| 227 | * Using a query string object, return a suitable filtered or unfiltered query object |
||
| 228 | * @param Elastica\Query\QueryString $textQuery A query_string representing the current query |
||
| 229 | */ |
||
| 230 | private function addFilters($textQuery) { |
||
| 231 | if ($this->manipulator) { |
||
| 232 | $this->manipulatorInstance->updateFilters($this->selectedFilters); |
||
| 233 | } |
||
| 234 | |||
| 235 | $elFilters = array(); |
||
| 236 | $rangeFilterKeys = RangedAggregation::getTitles(); |
||
| 237 | |||
| 238 | foreach ($this->selectedFilters as $key => $value) { |
||
| 239 | if (!in_array($key, $rangeFilterKeys)) { |
||
| 240 | $filter = new Term(); |
||
| 241 | $filter->setTerm($key,$value); |
||
| 242 | $elFilters[] = $filter; |
||
| 243 | } else { |
||
| 244 | // get the selected range filter |
||
| 245 | $range = RangedAggregation::getByTitle($key); |
||
| 246 | $filter = $range->getFilter($value); |
||
| 247 | $elFilters[] = $filter; |
||
| 248 | } |
||
| 249 | } |
||
| 250 | |||
| 251 | |||
| 252 | // if not facets selected, pass through null |
||
| 253 | $queryFilter = null; |
||
| 254 | switch (count($this->selectedFilters)) { |
||
| 255 | case 0: |
||
| 256 | // filter already null |
||
| 257 | break; |
||
| 258 | case 1: |
||
| 259 | $queryFilter = $elFilters[0]; |
||
| 260 | break; |
||
| 261 | default: |
||
| 262 | $queryFilter = new BoolAnd(); |
||
| 263 | |||
| 264 | foreach ($elFilters as $filter) { |
||
| 265 | $queryFilter->addFilter($filter); |
||
| 266 | } |
||
| 267 | break; |
||
| 268 | } |
||
| 269 | |||
| 270 | // the Elastica query object |
||
| 271 | if ($queryFilter == null) { |
||
| 272 | $query = new Query($textQuery); |
||
| 273 | } else { |
||
| 274 | //MatchAll appears not be allowed inside a filtered query which is a bit of a pain. |
||
| 275 | if ($textQuery instanceof MatchAll) { |
||
| 276 | $textQuery = null; |
||
| 277 | } |
||
| 278 | |||
| 279 | $filtered = new Filtered( |
||
| 280 | $textQuery, |
||
| 281 | $queryFilter |
||
| 282 | ); |
||
| 283 | $query = new Query($filtered); |
||
| 284 | } |
||
| 285 | |||
| 286 | return $query; |
||
| 287 | } |
||
| 288 | |||
| 289 | |||
| 290 | private function addAggregation(&$query) { |
||
| 291 | // aggregation (optional) |
||
| 292 | if ($this->manipulatorInstance) { |
||
| 293 | $this->manipulatorInstance->augmentQuery($query); |
||
| 294 | } |
||
| 295 | } |
||
| 296 | |||
| 297 | |||
| 298 | /* |
||
| 299 | Simplest form of search, namely search for text string against all fields. In Curl terms: |
||
| 300 | |||
| 301 | curl -XGET 'http://localhost:9200/elastica_ss_module_test_en_us/_search?pretty' -d ' |
||
| 302 | { |
||
| 303 | "query": { |
||
| 304 | "query_string": { |
||
| 305 | "query": "Image" |
||
| 306 | } |
||
| 307 | } |
||
| 308 | } |
||
| 309 | ' |
||
| 310 | */ |
||
| 311 | private function simpleTextQuery() { |
||
| 312 | // this will search all fields |
||
| 313 | |||
| 314 | $textQuery = new QueryString($this->queryText); |
||
| 315 | |||
| 316 | //Setting the lenient flag means that numeric fields can be searched for text values |
||
| 317 | $textQuery->setParam('lenient', true); |
||
| 318 | |||
| 319 | if ($this->showResultsForEmptyQuery && $this->queryText == '') { |
||
| 320 | $textQuery = new MatchAll(); |
||
| 321 | } |
||
| 322 | |||
| 323 | return $textQuery; |
||
| 324 | } |
||
| 325 | |||
| 326 | |||
| 327 | // USE MATCH_ALL, see https://www.elastic.co/guide/en/elasticsearch/reference/1.4/query-dsl-match-all-query.html |
||
| 328 | private function multiMatchQuery() { |
||
| 329 | $textQuery = new MultiMatch(); |
||
| 330 | |||
| 331 | // Differing cases for showing and not showing empty string |
||
| 332 | if ($this->queryText == '') { |
||
| 333 | if (!$this->showResultsForEmptyQuery) { |
||
| 334 | $textQuery->setQuery(''); |
||
| 335 | } else { |
||
| 336 | //WIP |
||
| 337 | $textQuery = new MatchAll(); |
||
| 338 | } |
||
| 339 | } |
||
| 340 | |||
| 341 | // If there is text, search for it regardless |
||
| 342 | else { |
||
| 343 | $textQuery->setQuery($this->queryText); |
||
| 344 | } |
||
| 345 | |||
| 346 | if ($textQuery instanceof MultiMatch) { |
||
| 347 | $elasticaFields = $this->convertWeightedFieldsForElastica($this->fields); |
||
| 348 | $textQuery->setFields($elasticaFields); |
||
| 349 | $textQuery->setType('most_fields'); |
||
| 350 | |||
| 351 | //Setting the lenient flag means that numeric fields can be searched for text values |
||
| 352 | $textQuery->setParam('lenient', true); |
||
| 353 | } |
||
| 354 | |||
| 355 | return $textQuery; |
||
| 356 | } |
||
| 357 | |||
| 358 | |||
| 359 | |||
| 360 | /** |
||
| 361 | * Use the configuration from the Search settings held in the database to |
||
| 362 | * form the array of fields suitable for a multimatch query. Call this |
||
| 363 | * after having called setClasses |
||
| 364 | * |
||
| 365 | * @return array Array of fieldsname to weight |
||
| 366 | */ |
||
| 367 | public function convertWeightedFieldsForElastica($fields) { |
||
| 368 | $result = array(); |
||
| 369 | $nameToType = self::getSearchFieldsMappingForClasses($this->classes,$fields); |
||
| 370 | |||
| 371 | if (sizeof($fields) != 0) { |
||
| 372 | foreach ($fields as $fieldName => $weight) { |
||
| 373 | $fieldCfg = "$fieldName"; |
||
| 374 | if ($weight != 1) { |
||
| 375 | $fieldCfg .= '^'.$weight; |
||
| 376 | } |
||
| 377 | array_push($result, $fieldCfg); |
||
| 378 | if (isset($nameToType[$fieldName])) { |
||
| 379 | if ($nameToType[$fieldName] == 'string') { |
||
| 380 | $fieldCfg = "{$fieldName}.*"; |
||
| 381 | if ($weight != 1) { |
||
| 382 | $fieldCfg .= '^'.$weight; |
||
| 383 | } |
||
| 384 | array_push($result, $fieldCfg); |
||
| 385 | } |
||
| 386 | } else { |
||
| 387 | throw new \Exception("Field $fieldName does not exist"); |
||
| 388 | } |
||
| 389 | } |
||
| 390 | } |
||
| 391 | return $result; |
||
| 392 | } |
||
| 393 | |||
| 394 | |||
| 395 | |||
| 396 | /** |
||
| 397 | * Get a hash of name to Elasticserver mapping, e.g. 'Title' => 'string' |
||
| 398 | * Use SS_Cache to save on database hits, as this data only changes at build time |
||
| 399 | * @param string $classes CSV or array of ClassNames to search, or empty for |
||
| 400 | * all of SiteTree |
||
| 401 | * @return array Array hash of fieldname to Elasticsearch mapping |
||
| 402 | */ |
||
| 403 | 8 | public static function getSearchFieldsMappingForClasses($classes = null, $fieldsAllowed = null) { |
|
| 404 | |||
| 405 | // Get a array of relevant classes to search |
||
| 406 | 8 | $cache = QueryGenerator::getCache(); |
|
|
1 ignored issue
–
show
|
|||
| 407 | 8 | $csvClasses = $classes; |
|
| 408 | 8 | if (is_array($classes)) { |
|
| 409 | $csvClasses = implode(',',$classes); |
||
| 410 | } |
||
| 411 | |||
| 412 | 8 | $key ='SEARCHABLE_FIELDS_'.str_replace(',', '_', $csvClasses); |
|
| 413 | |||
| 414 | 8 | if ($fieldsAllowed) { |
|
| 415 | $fieldsAllowedCSV = self::convertToQuotedCSV(array_keys($fieldsAllowed)); |
||
| 416 | $key .= '_' . str_replace(',', '_', str_replace("'", '_',$fieldsAllowedCSV)); |
||
| 417 | $key = str_replace('.', '_', $key); |
||
| 418 | $key = str_replace(' ', '_', $key); |
||
| 419 | } |
||
| 420 | |||
| 421 | 8 | $result = $cache->load($key); |
|
| 422 | 8 | if (!$result) { |
|
| 423 | 8 | $relevantClasses = array(); |
|
| 424 | 8 | if (empty($csvClasses)) { |
|
| 425 | 8 | $sql = "SELECT DISTINCT Name from SearchableClass where InSiteTree = 1 order by Name"; |
|
| 426 | 8 | $records = \DB::query($sql); |
|
| 427 | 8 | foreach ($records as $record) { |
|
| 428 | 8 | array_push($relevantClasses, $record['Name']); |
|
| 429 | 8 | } |
|
| 430 | 8 | } else { |
|
| 431 | $relevantClasses = explode(',', $csvClasses); |
||
| 432 | } |
||
| 433 | |||
| 434 | 8 | $result = array(); |
|
| 435 | 8 | if (sizeof($relevantClasses) > 0) { |
|
| 436 | 8 | $relevantClassesCSV = self::convertToQuotedCSV($relevantClasses); |
|
| 437 | |||
| 438 | //Perform a database query to get get a list of searchable fieldnames to Elasticsearch mapping |
||
| 439 | $sql = "SELECT sf.Name,sf.Type FROM SearchableClass sc INNER JOIN SearchableField sf ON " |
||
| 440 | 8 | . "sc.id = sf.SearchableClassID WHERE sc.name IN ($relevantClassesCSV)"; |
|
| 441 | 8 | if ($fieldsAllowed) { |
|
| 442 | $fieldsAllowedCSV = self::convertToQuotedCSV(array_keys($fieldsAllowed)); |
||
| 443 | if (strlen($fieldsAllowedCSV) > 0) { |
||
| 444 | $sql .= " AND sf.Name IN ($fieldsAllowedCSV)"; |
||
| 445 | } |
||
| 446 | } |
||
| 447 | |||
| 448 | 8 | $records = \DB::query($sql); |
|
| 449 | 8 | foreach ($records as $record) { |
|
| 450 | 8 | $name = $record['Name']; |
|
| 451 | 8 | $type = $record['Type']; |
|
| 452 | |||
| 453 | /** |
||
| 454 | * FIXME: |
||
| 455 | * This will overwrite duplicate keys such as Content or Title from other Classes. |
||
| 456 | * Ideally need to check if the mapping being overwritten changes, e.g. if |
||
| 457 | * a field such as BirthDate is date in one class and string in another |
||
| 458 | * and throw an exception accordingly |
||
| 459 | */ |
||
| 460 | 8 | $result[$name] = $type; |
|
| 461 | 8 | } |
|
| 462 | 8 | } |
|
| 463 | 8 | $cache->save(json_encode($result),$key); |
|
| 464 | 8 | } else { |
|
| 465 | // true is necessary here to decode the array hash back to an array and not a struct |
||
| 466 | 8 | self::$cacheHitCtr++; |
|
| 467 | 8 | $result = json_decode($result,true); |
|
| 468 | } |
||
| 469 | |||
| 470 | 8 | return $result; |
|
| 471 | } |
||
| 472 | |||
| 473 | |||
| 474 | 8 | public static function getCache() { |
|
| 478 | |||
| 479 | |||
| 480 | /** |
||
| 481 | * Convert either a CSV string or an array to a CSV single quoted string, suitable for use in |
||
| 482 | * an SQL IN clause |
||
| 483 | * @param string|array $csvOrArray A string separated by commas or an array |
||
| 484 | * @return string string or array as a CSV, but values quoted with single quotes |
||
| 485 | */ |
||
| 486 | 8 | public static function convertToQuotedCSV($csvOrArray) { |
|
| 505 | } |
||
| 506 |
It seems like the type of the argument is not accepted by the function/method which you are calling.
In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.
We suggest to add an explicit type cast like in the following example: