Complex classes like ElasticaService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ElasticaService, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class ElasticaService { |
||
14 | |||
15 | /** |
||
16 | * @var \Elastica\Document[] |
||
17 | */ |
||
18 | protected $buffer = array(); |
||
19 | |||
20 | |||
21 | /** |
||
22 | * @var bool controls whether indexing operations are buffered or not |
||
23 | */ |
||
24 | protected $buffered = false; |
||
25 | |||
26 | |||
27 | /** |
||
28 | * @var \Elastica\Client Elastica Client object |
||
29 | */ |
||
30 | private $client; |
||
31 | |||
32 | |||
33 | /** |
||
34 | * @var string index name |
||
35 | */ |
||
36 | private $indexName; |
||
37 | |||
38 | |||
39 | /** |
||
40 | * The code of the locale being indexed or searched |
||
41 | * @var string e.g. th_TH, en_US |
||
42 | */ |
||
43 | private $locale; |
||
44 | |||
45 | |||
46 | /** |
||
47 | * Mapping of DataObject ClassName and whether it is in the SiteTree or not |
||
48 | * @var array $site_tree_classes; |
||
49 | */ |
||
50 | private static $site_tree_classes = array(); |
||
51 | |||
52 | |||
53 | /** |
||
54 | * Counter used to for testing, records indexing requests |
||
55 | * @var integer |
||
56 | */ |
||
57 | public static $indexing_request_ctr = 0; |
||
58 | |||
59 | |||
60 | /** |
||
61 | * Array of highlighted fields, e.g. Title, Title.standard. If this is empty then the |
||
62 | * ShowHighlight field of SearchableField is used to determine which fields to highlight |
||
63 | * @var array |
||
64 | */ |
||
65 | private $highlightedFields = array(); |
||
66 | |||
67 | |||
68 | /** |
||
69 | * The number of documents to index currently for this locale |
||
70 | * @var integer The number of documents left to index |
||
71 | */ |
||
72 | private $nDocumentsToIndexForLocale = 0; |
||
73 | |||
74 | |||
75 | /* |
||
76 | Set the highlight fields for subsequent searches |
||
77 | */ |
||
78 | public function setHighlightedFields($newHighlightedFields) { |
||
79 | $this->highlightedFields = $newHighlightedFields; |
||
80 | } |
||
81 | |||
82 | |||
83 | /* |
||
84 | Enable this to allow test classes not to be ignored when indexing |
||
85 | */ |
||
86 | public $test_mode = false; |
||
87 | |||
88 | |||
89 | /** |
||
90 | * @param \Elastica\Client $client |
||
91 | * @param string $newIndexName Name of the new index |
||
92 | */ |
||
93 | public function __construct(Client $client, $newIndexName) { |
||
94 | $this->client = $client; |
||
95 | $this->indexName = $newIndexName; |
||
96 | $this->locale = \i18n::default_locale(); |
||
97 | } |
||
98 | |||
99 | |||
100 | public function setTestMode($newTestMode) { |
||
101 | $this->test_mode = $newTestMode; |
||
102 | } |
||
103 | |||
104 | |||
105 | /** |
||
106 | * @return \Elastica\Client |
||
107 | */ |
||
108 | public function getClient() { |
||
109 | return $this->client; |
||
110 | } |
||
111 | |||
112 | |||
113 | /** |
||
114 | * @return \Elastica\Index |
||
115 | */ |
||
116 | public function getIndex() { |
||
117 | $index = $this->getClient()->getIndex($this->getLocaleIndexName()); |
||
118 | return $index; |
||
119 | } |
||
120 | |||
121 | |||
122 | public function setLocale($newLocale) { |
||
123 | $this->locale = $newLocale; |
||
124 | } |
||
125 | |||
126 | public function getIndexName() { |
||
129 | |||
130 | private function getLocaleIndexName() { |
||
131 | $name = $this->indexName . '-' . $this->locale; |
||
132 | $name = strtolower($name); |
||
133 | $name = str_replace('-', '_', $name); |
||
134 | return $name; |
||
135 | } |
||
136 | |||
137 | |||
138 | /** |
||
139 | * Performs a search query and returns a result list. |
||
140 | * |
||
141 | * @param \Elastica\Query|string|array $query |
||
142 | * @param string|array $types List of comma separated SilverStripe classes to search, or blank for all |
||
143 | * @return \Elastica\ResultList |
||
144 | */ |
||
145 | public function search($query, $types = '') { |
||
146 | $query = Query::create($query); // may be a string |
||
147 | if(is_string($types)) { |
||
148 | $types = explode(',', $types); |
||
149 | } |
||
150 | |||
151 | $data = $query->toArray(); |
||
152 | $query->MoreLikeThis = isset($data['query']['more_like_this']); |
||
153 | |||
154 | $search = new Search(new Client()); |
||
155 | |||
156 | // get results from all shards, this makes test repeatable |
||
157 | if($this->test_mode) { |
||
158 | $search->setOption('search_type', Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH); |
||
159 | } |
||
160 | |||
161 | $search->addIndex($this->getLocaleIndexName()); |
||
162 | $this->addTypesToSearch($search, $types, $query); |
||
163 | |||
164 | $highlights = $this->getHighlightingConfig(); |
||
165 | $this->addExtractedQueryTermsForMoreLikeThis($query, $highlights); |
||
166 | $query->setHighlight($highlights); |
||
167 | |||
168 | $search->addIndex($this->getLocaleIndexName()); |
||
169 | if(!empty($types)) { |
||
170 | foreach($types as $type) { |
||
171 | $search->addType($type); |
||
172 | } |
||
173 | } |
||
174 | |||
175 | $params = $search->getOptions(); |
||
176 | $searchResults = $search->search($query, $params); |
||
177 | if(isset($this->MoreLikeThisTerms)) { |
||
178 | $searchResults->MoreLikeThisTerms = $this->MoreLikeThisTerms; |
||
179 | } |
||
180 | |||
181 | return $searchResults; |
||
182 | } |
||
183 | |||
184 | |||
185 | /** |
||
186 | * @param Query $query |
||
187 | */ |
||
188 | private function addExtractedQueryTermsForMoreLikeThis($query, &$highlights) { |
||
189 | if($query->MoreLikeThis) { |
||
190 | $termsMatchingQuery = array(); |
||
191 | foreach($this->MoreLikeThisTerms as $field => $terms) { |
||
192 | $termQuery = array('multi_match' => array( |
||
193 | 'query' => implode(' ', $terms), |
||
194 | 'type' => 'most_fields', |
||
195 | 'fields' => array($field) |
||
196 | )); |
||
197 | $termsMatchingQuery[$field] = array('highlight_query' => $termQuery); |
||
198 | } |
||
199 | $highlights['fields'] = $termsMatchingQuery; |
||
200 | } |
||
201 | } |
||
202 | |||
203 | |||
204 | /** |
||
205 | * @param Search $search |
||
206 | * @param Query $query |
||
207 | */ |
||
208 | private function addTypesToSearch(&$search, $types, $query) { |
||
209 | // If the query is a 'more like this' we can get the terms used for searching by performing |
||
210 | // an extra query, in this case a query validation with explain and rewrite turned on |
||
211 | $this->checkForTermsMoreLikeThis($query, $search); |
||
212 | |||
213 | if(!empty($types)) { |
||
214 | foreach($types as $type) { |
||
215 | $search->addType($type); |
||
216 | } |
||
217 | } |
||
218 | } |
||
219 | |||
220 | |||
221 | private function getHighlightingConfig() { |
||
222 | $highlightsCfg = \Config::inst()->get('Elastica', 'Highlights'); |
||
223 | $preTags = $highlightsCfg['PreTags']; |
||
224 | $postTags = $highlightsCfg['PostTags']; |
||
225 | $fragmentSize = $highlightsCfg['Phrase']['FragmentSize']; |
||
226 | $nFragments = $highlightsCfg['Phrase']['NumberOfFragments']; |
||
227 | |||
228 | $stringFields = $this->highlightedFields; |
||
229 | $usingProvidedHighlightFields = true; |
||
230 | |||
231 | if(sizeof($stringFields) == 0) { |
||
232 | $filter = array('Type' => 'string', 'ShowHighlights' => true); |
||
233 | $stringFields = \SearchableField::get()->filter($filter)->map('Name')->toArray(); |
||
234 | $usingProvidedHighlightFields = false; |
||
235 | } |
||
236 | |||
237 | |||
238 | $highlightFields = array(); |
||
239 | foreach($stringFields as $name) { |
||
240 | // Add the stemmed and the unstemmed for now |
||
241 | $fieldName = $name; |
||
242 | if(!$usingProvidedHighlightFields) { |
||
243 | $fieldName .= '.standard'; |
||
244 | } |
||
245 | $highlightFields[$fieldName] = array( |
||
246 | 'fragment_size' => $fragmentSize, |
||
247 | 'number_of_fragments' => $nFragments, |
||
248 | 'no_match_size'=> 200 |
||
249 | ); |
||
250 | } |
||
251 | |||
252 | $highlights = array( |
||
253 | 'pre_tags' => array($preTags), |
||
254 | 'post_tags' => array($postTags), |
||
255 | 'fields' => $highlightFields |
||
256 | ); |
||
257 | |||
258 | return $highlights; |
||
259 | } |
||
260 | |||
261 | |||
262 | private function checkForTermsMoreLikeThis($elasticaQuery, $search) { |
||
263 | if($elasticaQuery->MoreLikeThis) { |
||
264 | |||
265 | |||
266 | $path = $search->getPath(); |
||
267 | |||
268 | $termData = array(); |
||
269 | $data = $elasticaQuery->toArray(); |
||
270 | $termData['query'] = $data['query']; |
||
271 | |||
272 | $path = str_replace('_search', '_validate/query', $path); |
||
273 | $params = array('explain' => true, 'rewrite' => true); |
||
274 | |||
275 | if($this->test_mode) { |
||
276 | $params['search_type'] = Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH; |
||
277 | } |
||
278 | |||
279 | $response = $this->getClient()->request( |
||
280 | $path, |
||
281 | \Elastica\Request::GET, |
||
282 | $termData, |
||
283 | $params |
||
284 | ); |
||
285 | |||
286 | $rData = $response->getData(); |
||
287 | $terms = null; // keep in scope |
||
288 | |||
289 | if(isset($rData['explanations'])) { |
||
290 | $explanation = $rData['explanations'][0]['explanation']; |
||
291 | $terms = ElasticaUtil::parseSuggestionExplanation($explanation); |
||
292 | } |
||
293 | |||
294 | if(isset($terms)) { |
||
295 | $this->MoreLikeThisTerms = $terms; |
||
296 | } |
||
297 | } |
||
298 | } |
||
299 | |||
300 | |||
301 | /** |
||
302 | * Ensure that the index is present |
||
303 | */ |
||
304 | protected function ensureIndex() { |
||
305 | $index = $this->getIndex(); |
||
306 | if(!$index->exists()) { |
||
307 | $this->createIndex(); |
||
308 | } |
||
309 | } |
||
310 | |||
311 | |||
312 | /** |
||
313 | * Ensure that there is a mapping present |
||
314 | * |
||
315 | * @param \Elastica\Type Type object |
||
316 | * @param SilverStripe\Elastica\Searchable DataObject that implements Searchable |
||
317 | * @return \Elastica\Mapping Mapping object |
||
318 | */ |
||
319 | protected function ensureMapping(\Elastica\Type $type, \DataObject $record) { |
||
320 | $mapping = $type->getMapping(); |
||
321 | if($mapping == array()) { |
||
322 | $this->ensureIndex(); |
||
323 | $mapping = $record->getElasticaMapping(); |
||
324 | $type->setMapping($mapping); |
||
325 | $mapping = $mapping->toArray(); |
||
326 | } |
||
327 | return $mapping; |
||
328 | } |
||
329 | |||
330 | |||
331 | /** |
||
332 | * Either creates or updates a record in the index. |
||
333 | * |
||
334 | * @param Searchable $record |
||
335 | */ |
||
336 | public function index($record) { |
||
337 | $document = $record->getElasticaDocument(); |
||
338 | $typeName = $record->getElasticaType(); |
||
339 | |||
340 | if($this->buffered) { |
||
341 | if(array_key_exists($typeName, $this->buffer)) { |
||
342 | $this->buffer[$typeName][] = $document; |
||
343 | } else { |
||
344 | $this->buffer[$typeName] = array($document); |
||
345 | } |
||
346 | } else { |
||
347 | $index = $this->getIndex(); |
||
348 | $type = $index->getType($typeName); |
||
349 | |||
350 | $this->ensureMapping($type, $record); |
||
351 | |||
352 | $type->addDocument($document); |
||
353 | $index->refresh(); |
||
354 | self::$indexing_request_ctr++; |
||
355 | } |
||
356 | } |
||
357 | |||
358 | |||
359 | /** |
||
360 | * Begins a bulk indexing operation where documents are buffered rather than |
||
361 | * indexed immediately. |
||
362 | */ |
||
363 | public function startBulkIndex() { |
||
364 | $this->buffered = true; |
||
365 | } |
||
366 | |||
367 | |||
368 | public function listIndexes($trace) { |
||
369 | $command = "curl 'localhost:9200/_cat/indices?v'"; |
||
370 | exec($command, $op); |
||
371 | ElasticaUtil::message("\n++++ $trace ++++\n"); |
||
372 | ElasticaUtil::message(print_r($op, 1)); |
||
373 | ElasticaUtil::message("++++ /{$trace} ++++\n\n"); |
||
374 | return $op; |
||
375 | } |
||
376 | |||
377 | |||
378 | /** |
||
379 | * Ends the current bulk index operation and indexes the buffered documents. |
||
380 | */ |
||
381 | public function endBulkIndex() { |
||
382 | $index = $this->getIndex(); |
||
383 | foreach($this->buffer as $type => $documents) { |
||
384 | $amount = 0; |
||
385 | |||
386 | foreach(array_keys($this->buffer) as $key) { |
||
387 | $amount += sizeof($this->buffer[$key]); |
||
388 | } |
||
389 | $index->getType($type)->addDocuments($documents); |
||
390 | $index->refresh(); |
||
391 | |||
392 | ElasticaUtil::message("\tAdding $amount documents to the index\n"); |
||
393 | if(isset($this->StartTime)) { |
||
394 | $elapsed = microtime(true) - $this->StartTime; |
||
395 | $timePerDoc = ($elapsed) / ($this->nDocumentsIndexed); |
||
396 | $documentsRemaining = $this->nDocumentsToIndexForLocale - $this->nDocumentsIndexed; |
||
397 | $eta = ($documentsRemaining) * $timePerDoc; |
||
398 | $hours = (int)($eta / 3600); |
||
399 | $minutes = (int)(($eta - $hours * 3600) / 60); |
||
400 | $seconds = (int)(0.5 + $eta - $minutes * 60 - $hours * 3600); |
||
401 | $etaHR = "{$hours}h {$minutes}m {$seconds}s"; |
||
402 | ElasticaUtil::message("ETA to completion of indexing $this->locale ($documentsRemaining documents): $etaHR"); |
||
403 | } |
||
404 | self::$indexing_request_ctr++; |
||
405 | } |
||
406 | |||
407 | $this->buffered = false; |
||
408 | $this->buffer = array(); |
||
409 | } |
||
410 | |||
411 | |||
412 | /** |
||
413 | * Deletes a record from the index. |
||
414 | * |
||
415 | * @param Searchable $record |
||
416 | */ |
||
417 | public function remove($record) { |
||
423 | |||
424 | |||
425 | /** |
||
426 | * Creates the index and the type mappings. |
||
427 | */ |
||
428 | public function define() { |
||
429 | $index = $this->getIndex(); |
||
430 | |||
431 | # Recreate the index |
||
432 | if($index->exists()) { |
||
433 | $index->delete(); |
||
434 | } |
||
435 | $this->createIndex(); |
||
436 | |||
437 | foreach($this->getIndexedClasses() as $class) { |
||
438 | $sng = singleton($class); |
||
439 | $mapping = $sng->getElasticaMapping(); |
||
440 | $mapping->setType($index->getType($sng->getElasticaType())); |
||
441 | $mapping->send(); |
||
442 | } |
||
443 | } |
||
444 | |||
445 | |||
446 | /** |
||
447 | * Refresh an array of records in the index |
||
448 | * |
||
449 | * @param array $records |
||
450 | */ |
||
451 | protected function refreshRecords($records) { |
||
452 | foreach($records as $record) { |
||
453 | if($record->showRecordInSearch()) { |
||
454 | $this->index($record); |
||
455 | } |
||
456 | } |
||
457 | } |
||
458 | |||
459 | |||
460 | /** |
||
461 | * Get a List of all records by class. Get the "Live data" If the class has the "Versioned" extension |
||
462 | * |
||
463 | * @param string $class Class Name |
||
464 | * @param int $pageSize Optional page size, only a max of this number of records returned |
||
465 | * @param int $page Page number to return |
||
466 | * @return \DataList $records |
||
467 | */ |
||
468 | protected function recordsByClassConsiderVersioned($class, $pageSize = 0, $page = 0) { |
||
469 | $offset = $page * $pageSize; |
||
470 | |||
471 | if($class::has_extension("Versioned")) { |
||
472 | if($pageSize > 0) { |
||
473 | $records = \Versioned::get_by_stage($class, 'Live')->limit($pageSize, $offset); |
||
474 | } else { |
||
475 | $records = \Versioned::get_by_stage($class, 'Live'); |
||
476 | } |
||
477 | } else { |
||
478 | if($pageSize > 0) { |
||
479 | $records = $class::get()->limit($pageSize, $offset); |
||
480 | } else { |
||
481 | $records = $class::get(); |
||
482 | } |
||
483 | |||
484 | } |
||
485 | return $records; |
||
486 | } |
||
487 | |||
488 | |||
489 | /** |
||
490 | * Refresh the records of a given class within the search index |
||
491 | * |
||
492 | * @param string $class Class Name |
||
493 | */ |
||
494 | protected function refreshClass($class) { |
||
495 | $nRecords = $this->recordsByClassConsiderVersioned($class)->count(); |
||
496 | $batchSize = 500; |
||
497 | $pages = $nRecords / $batchSize + 1; |
||
498 | |||
499 | for($i = 0; $i < $pages; $i++) { |
||
500 | $this->startBulkIndex(); |
||
501 | $pagedRecords = $this->recordsByClassConsiderVersioned($class, $batchSize, $i); |
||
502 | $this->nDocumentsIndexed += $pagedRecords->count(); |
||
503 | $batch = $pagedRecords->toArray(); |
||
504 | $this->refreshRecords($batch); |
||
505 | $this->endBulkIndex(); |
||
506 | } |
||
507 | } |
||
508 | |||
509 | |||
510 | /** |
||
511 | * Re-indexes each record in the index. |
||
512 | */ |
||
513 | public function refresh() { |
||
514 | $this->StartTime = microtime(true); |
||
515 | |||
516 | $classes = $this->getIndexedClasses(); |
||
517 | |||
518 | //Count the number of documents for this locale |
||
519 | $amount = 0; |
||
520 | foreach($classes as $class) { |
||
521 | $amount += $this->recordsByClassConsiderVersioned($class)->count(); |
||
522 | } |
||
523 | |||
524 | $this->nDocumentsToIndexForLocale = $amount; |
||
525 | $this->nDocumentsIndexed = 0; |
||
526 | |||
527 | foreach($this->getIndexedClasses() as $classname) { |
||
528 | ElasticaUtil::message("Indexing class $classname"); |
||
529 | |||
530 | $inSiteTree = null; |
||
531 | if(isset(self::$site_tree_classes[$classname])) { |
||
532 | $inSiteTree = self::$site_tree_classes[$classname]; |
||
533 | } else { |
||
534 | $inSiteTree = SearchableHelper::isInSiteTree($classname); |
||
535 | self::$site_tree_classes[$classname] = $inSiteTree; |
||
536 | } |
||
537 | |||
538 | if($inSiteTree) { |
||
539 | // this prevents the same item being indexed twice due to class inheritance |
||
540 | if($classname === 'SiteTree') { |
||
541 | $this->refreshClass($classname); |
||
542 | } |
||
543 | // Data objects |
||
544 | } else { |
||
545 | $this->refreshClass($classname); |
||
546 | } |
||
547 | |||
548 | } |
||
549 | |||
550 | ElasticaUtil::message("Completed indexing documents for locale $this->locale\n"); |
||
551 | |||
552 | } |
||
553 | |||
554 | |||
555 | /** |
||
556 | * Reset the current index |
||
557 | */ |
||
558 | public function reset() { |
||
559 | $index = $this->getIndex(); |
||
560 | $index->delete(); |
||
561 | $this->createIndex(); |
||
562 | } |
||
563 | |||
564 | |||
565 | private function createIndex() { |
||
566 | $index = $this->getIndex(); |
||
567 | $settings = $this->getIndexSettingsForCurrentLocale()->generateConfig(); |
||
568 | $index->create($settings, true); |
||
569 | } |
||
570 | |||
571 | |||
572 | /** |
||
573 | * Get the index settings for the current locale |
||
574 | * @return IndexSettings index settings for the current locale |
||
575 | */ |
||
576 | public function getIndexSettingsForCurrentLocale() { |
||
577 | $result = null; |
||
578 | $indexSettings = \Config::inst()->get('Elastica', 'indexsettings'); |
||
579 | if(isset($indexSettings[$this->locale])) { |
||
580 | $settingsClassName = $indexSettings[$this->locale]; |
||
581 | $result = \Injector::inst()->create($settingsClassName); |
||
582 | } else { |
||
583 | throw new \Exception('ERROR: No index settings are provided for locale ' . $this->locale . "\n"); |
||
584 | |||
585 | } |
||
586 | return $result; |
||
587 | } |
||
588 | |||
589 | |||
590 | /** |
||
591 | * Gets the classes which are indexed (i.e. have the extension applied). |
||
592 | * |
||
593 | * @return array |
||
594 | */ |
||
595 | public function getIndexedClasses() { |
||
596 | $classes = array(); |
||
597 | |||
598 | $whitelist = array('SearchableTestPage', 'SearchableTestFatherPage', 'SearchableTestGrandFatherPage', |
||
599 | 'FlickrPhotoTO', 'FlickrTagTO', 'FlickrPhotoTO', 'FlickrAuthorTO', 'FlickrSetTO'); |
||
600 | |||
601 | foreach(\ClassInfo::subclassesFor('DataObject') as $candidate) { |
||
602 | $instance = singleton($candidate); |
||
603 | |||
604 | $interfaces = class_implements($candidate); |
||
605 | // Only allow test classes in testing mode |
||
606 | if(isset($interfaces['TestOnly'])) { |
||
607 | if(in_array($candidate, $whitelist)) { |
||
608 | if(!$this->test_mode) { |
||
609 | continue; |
||
610 | } |
||
611 | } else { |
||
612 | // If it's not in the test whitelist we definitely do not want to know |
||
613 | continue; |
||
614 | } |
||
615 | } |
||
616 | |||
617 | if($instance->hasExtension('SilverStripe\\Elastica\\Searchable')) { |
||
618 | $classes[] = $candidate; |
||
619 | } |
||
620 | } |
||
621 | |||
622 | return $classes; |
||
623 | } |
||
624 | |||
625 | |||
626 | /** |
||
627 | * Get the number of indexing requests made. Used for testing bulk indexing |
||
628 | * @return integer indexing request counter |
||
629 | */ |
||
630 | public function getIndexingRequestCtr() { |
||
633 | |||
634 | |||
635 | /** |
||
636 | * Get the term vectors in the index for the provided Searchable is_object |
||
637 | * @param Searchable $searchable An object that implements Searchable |
||
638 | * @return array array of field name to terms indexed |
||
639 | */ |
||
640 | public function getTermVectors($searchable) { |
||
681 | } |
||
682 |