Completed
Push — dev2 ( 81b774...143263 )
by Gordon
02:53
created

ElasticaService::search()   B

Complexity

Conditions 9
Paths 64

Size

Total Lines 54
Code Lines 34

Duplication

Lines 0
Ratio 0 %

Importance

Changes 8
Bugs 6 Features 0
Metric Value
c 8
b 6
f 0
dl 0
loc 54
rs 7.2551
cc 9
eloc 34
nc 64
nop 2

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Client;
6
use Elastica\Query;
7
use Elastica\Search;
8
use SilverStripe\Elastica\ElasticaUtil;
9
10
/**
11
 * A service used to interact with elastic search.
12
 */
13
class ElasticaService {
14
15
	/**
16
	 * @var \Elastica\Document[]
17
	 */
18
	protected $buffer = array();
19
20
21
	/**
22
	 * @var bool controls whether indexing operations are buffered or not
23
	 */
24
	protected $buffered = false;
25
26
27
	/**
28
	 * @var \Elastica\Client Elastica Client object
29
	 */
30
	private $client;
31
32
33
	/**
34
	 * @var string index name
35
	 */
36
	private $indexName;
37
38
39
	/**
40
	 * The code of the locale being indexed or searched
41
	 * @var string e.g. th_TH, en_US
42
	 */
43
	private $locale;
44
45
46
	/**
47
	 * Mapping of DataObject ClassName and whether it is in the SiteTree or not
48
	 * @var array $site_tree_classes;
49
	 */
50
	private static $site_tree_classes = array();
51
52
53
	/**
54
	 * Counter used to for testing, records indexing requests
55
	 * @var integer
56
	 */
57
	public static $indexing_request_ctr = 0;
58
59
60
	/**
61
	 * Array of highlighted fields, e.g. Title, Title.standard.  If this is empty then the
62
	 * ShowHighlight field of SearchableField is used to determine which fields to highlight
63
	 * @var array
64
	 */
65
	private $highlightedFields = array();
66
67
68
	/**
69
	 * The number of documents to index currently for this locale
70
	 * @var integer The number of documents left to index
71
	 */
72
	private $nDocumentsToIndexForLocale = 0;
73
74
75
	/*
76
	Set the highlight fields for subsequent searches
77
	 */
78
	public function setHighlightedFields($newHighlightedFields) {
79
		$this->highlightedFields = $newHighlightedFields;
80
	}
81
82
83
	/*
84
	Enable this to allow test classes not to be ignored when indexing
85
	 */
86
	public $test_mode = false;
87
88
89
	/**
90
	 * @param \Elastica\Client $client
91
	 * @param string $newIndexName Name of the new index
92
	 */
93
	public function __construct(Client $client, $newIndexName) {
94
		$this->client = $client;
95
		$this->indexName = $newIndexName;
96
		$this->locale = \i18n::default_locale();
97
	}
98
99
100
	public function setTestMode($newTestMode) {
101
		$this->test_mode = $newTestMode;
102
	}
103
104
105
	/**
106
	 * @return \Elastica\Client
107
	 */
108
	public function getClient() {
109
		return $this->client;
110
	}
111
112
113
	/**
114
	 * @return \Elastica\Index
115
	 */
116
	public function getIndex() {
117
		$index = $this->getClient()->getIndex($this->getLocaleIndexName());
118
		return $index;
119
	}
120
121
122
	public function setLocale($newLocale) {
123
		$this->locale = $newLocale;
124
	}
125
126
	public function getIndexName() {
127
		return $this->indexName;
128
	}
129
130
	private function getLocaleIndexName() {
131
		$name = $this->indexName . '-' . $this->locale;
132
		$name = strtolower($name);
133
		$name = str_replace('-', '_', $name);
134
		return $name;
135
	}
136
137
138
	/**
139
	 * Performs a search query and returns a result list.
140
	 *
141
	 * @param \Elastica\Query|string|array $query
142
	 * @param string|array $types List of comma separated SilverStripe classes to search, or blank for all
143
	 * @return \Elastica\ResultList
144
	 */
145
	public function search($query, $types = '') {
146
		$query = Query::create($query); // may be a string
147
		if(is_string($types)) {
148
			$types = explode(',', $types);
149
		}
150
151
		$data = $query->toArray();
152
		if(isset($data['query']['more_like_this'])) {
153
			$query->MoreLikeThis = true;
154
		} else {
155
			$query->MoreLikeThis = false;
156
		}
157
158
		$search = new Search(new Client());
159
160
		// get results from all shards, this makes test repeatable
161
		if($this->test_mode) {
162
			$search->setOption('search_type', Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH);
163
		}
164
165
		$search->addIndex($this->getLocaleIndexName());
166
		$this->addTypesToSearch($search, $types);
167
168
		$highlights = $this->getHighlightingConfig();
169
170
		if($query->MoreLikeThis) {
171
			$termsMatchingQuery = array();
172
			foreach($this->MoreLikeThisTerms as $field => $terms) {
173
				$termQuery = array('multi_match' => array(
174
					'query' => implode(' ', $terms),
175
					'type' => 'most_fields',
176
					'fields' => array($field)
177
				));
178
				$termsMatchingQuery[$field] = array('highlight_query' => $termQuery);
179
			}
180
			$highlights['fields'] = $termsMatchingQuery;
181
		}
182
183
		$query->setHighlight($highlights);
184
		$search->addIndex($this->getLocaleIndexName());
185
		if(!empty($types)) {
186
			foreach($types as $type) {
187
				$search->addType($type);
188
			}
189
		}
190
191
		$params = $search->getOptions();
192
		$searchResults = $search->search($query, $params);
193
		if(isset($this->MoreLikeThisTerms)) {
194
			$searchResults->MoreLikeThisTerms = $this->MoreLikeThisTerms;
195
		}
196
197
		return $searchResults;
198
	}
199
200
201
	private function addTypesToSearch(&$search, $type) {
0 ignored issues
show
Unused Code introduced by
The parameter $type is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
202
		// If the query is a 'more like this' we can get the terms used for searching by performing
203
		// an extra query, in this case a query validation with explain and rewrite turned on
204
		$this->checkForTermsMoreLikeThis($query, $search);
0 ignored issues
show
Bug introduced by
The variable $query does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
205
206
		if(!empty($types)) {
0 ignored issues
show
Bug introduced by
The variable $types seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
207
			foreach($types as $type) {
208
				$search->addType($type);
209
			}
210
		}
211
	}
212
213
214
	private function getHighlightingConfig() {
215
		$highlightsCfg = \Config::inst()->get('Elastica', 'Highlights');
216
		$preTags = $highlightsCfg['PreTags'];
217
		$postTags = $highlightsCfg['PostTags'];
218
		$fragmentSize = $highlightsCfg['Phrase']['FragmentSize'];
219
		$nFragments = $highlightsCfg['Phrase']['NumberOfFragments'];
220
221
		$stringFields = $this->highlightedFields;
222
		$usingProvidedHighlightFields = true;
223
224
		if(sizeof($stringFields) == 0) {
225
			$filter = array('Type' => 'string', 'ShowHighlights' => true);
226
			$stringFields = \SearchableField::get()->filter($filter)->map('Name')->toArray();
227
			$usingProvidedHighlightFields = false;
228
		}
229
230
231
		$highlightFields = array();
232
		foreach($stringFields as $name) {
233
			// Add the stemmed and the unstemmed for now
234
			$fieldName = $name;
235
			if(!$usingProvidedHighlightFields) {
236
				$fieldName .= '.standard';
237
			}
238
			$highlightFields[$fieldName] = array(
239
				'fragment_size' => $fragmentSize,
240
				'number_of_fragments' => $nFragments,
241
				'no_match_size'=> 200
242
			);
243
		}
244
245
		$highlights = array(
246
			'pre_tags' => array($preTags),
247
			'post_tags' => array($postTags),
248
			'fields' => $highlightFields
249
		);
250
251
		return $highlights;
252
	}
253
254
255
	private function checkForTermsMoreLikeThis($elasticaQuery, $search) {
256
		if($elasticaQuery->MoreLikeThis) {
257
258
259
			$path = $search->getPath();
260
261
			$termData = array();
262
			$data = $elasticaQuery->toArray();
263
			$termData['query'] = $data['query'];
264
265
			$path = str_replace('_search', '_validate/query', $path);
266
			$params = array('explain' => true, 'rewrite' => true);
267
			if($this->test_mode) {
268
				$params['search_type'] = Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
269
			}
270
271
			$response = $this->getClient()->request(
272
				$path,
273
				\Elastica\Request::GET,
274
				$termData,
275
				$params
276
			);
277
278
			$rData = $response->getData();
279
			$terms = null; // keep in scope
280
281
			if(isset($rData['explanations'])) {
282
				$explanation = $rData['explanations'][0]['explanation'];
283
				$terms = ElasticaUtil::parseSuggestionExplanation($explanation);
284
			}
285
286
			if(isset($terms)) {
287
				$this->MoreLikeThisTerms = $terms;
288
			}
289
		}
290
	}
291
292
293
	/**
294
	 * Ensure that the index is present
295
	 */
296
	protected function ensureIndex() {
297
		$index = $this->getIndex();
298
		if(!$index->exists()) {
299
			$this->createIndex();
300
		}
301
	}
302
303
304
	/**
305
	 * Ensure that there is a mapping present
306
	 *
307
	 * @param \Elastica\Type Type object
308
	 * @param SilverStripe\Elastica\Searchable DataObject that implements Searchable
309
	 * @return \Elastica\Mapping Mapping object
310
	 */
311
	protected function ensureMapping(\Elastica\Type $type, \DataObject $record) {
312
		$mapping = $type->getMapping();
313
		if($mapping == array()) {
314
			$this->ensureIndex();
315
			$mapping = $record->getElasticaMapping();
316
			$type->setMapping($mapping);
317
			$mapping = $mapping->toArray();
318
		}
319
		return $mapping;
320
	}
321
322
323
	/**
324
	 * Either creates or updates a record in the index.
325
	 *
326
	 * @param Searchable $record
327
	 */
328
	public function index($record) {
329
		$document = $record->getElasticaDocument();
330
		$typeName = $record->getElasticaType();
331
332
		if($this->buffered) {
333
			if(array_key_exists($typeName, $this->buffer)) {
334
				$this->buffer[$typeName][] = $document;
335
			} else {
336
				$this->buffer[$typeName] = array($document);
337
			}
338
		} else {
339
			$index = $this->getIndex();
340
			$type = $index->getType($typeName);
341
342
			$this->ensureMapping($type, $record);
343
344
			$type->addDocument($document);
345
			$index->refresh();
346
			self::$indexing_request_ctr++;
347
		}
348
	}
349
350
351
	/**
352
	 * Begins a bulk indexing operation where documents are buffered rather than
353
	 * indexed immediately.
354
	 */
355
	public function startBulkIndex() {
356
		$this->buffered = true;
357
	}
358
359
360
	public function listIndexes($trace) {
361
		$command = "curl 'localhost:9200/_cat/indices?v'";
362
		exec($command, $op);
363
		ElasticaUtil::message("\n++++ $trace ++++\n");
364
		ElasticaUtil::message(print_r($op, 1));
365
		ElasticaUtil::message("++++ /{$trace} ++++\n\n");
366
		return $op;
367
	}
368
369
370
	/**
371
	 * Ends the current bulk index operation and indexes the buffered documents.
372
	 */
373
	public function endBulkIndex() {
374
		$index = $this->getIndex();
375
		foreach($this->buffer as $type => $documents) {
376
			$amount = 0;
377
378
			foreach(array_keys($this->buffer) as $key) {
379
				$amount += sizeof($this->buffer[$key]);
380
			}
381
			$index->getType($type)->addDocuments($documents);
382
			$index->refresh();
383
384
			ElasticaUtil::message("\tAdding $amount documents to the index\n");
385
			if(isset($this->StartTime)) {
386
				$elapsed = microtime(true) - $this->StartTime;
387
				$timePerDoc = ($elapsed) / ($this->nDocumentsIndexed);
388
				$documentsRemaining = $this->nDocumentsToIndexForLocale - $this->nDocumentsIndexed;
389
				$eta = ($documentsRemaining) * $timePerDoc;
390
				$hours = (int)($eta / 3600);
391
				$minutes = (int)(($eta - $hours * 3600) / 60);
392
				$seconds = (int)(0.5 + $eta - $minutes * 60 - $hours * 3600);
393
				$etaHR = "{$hours}h {$minutes}m {$seconds}s";
394
				ElasticaUtil::message("ETA to completion of indexing $this->locale ($documentsRemaining documents): $etaHR");
395
			}
396
			self::$indexing_request_ctr++;
397
		}
398
399
		$this->buffered = false;
400
		$this->buffer = array();
401
	}
402
403
404
	/**
405
	 * Deletes a record from the index.
406
	 *
407
	 * @param Searchable $record
408
	 */
409
	public function remove($record) {
410
		$index = $this->getIndex();
411
		$type = $index->getType($record->getElasticaType());
412
		$type->deleteDocument($record->getElasticaDocument());
413
		$index->refresh();
414
	}
415
416
417
	/**
418
	 * Creates the index and the type mappings.
419
	 */
420
	public function define() {
421
		$index = $this->getIndex();
422
423
		# Recreate the index
424
		if($index->exists()) {
425
			$index->delete();
426
		}
427
		$this->createIndex();
428
429
		foreach($this->getIndexedClasses() as $class) {
430
			$sng = singleton($class);
431
			$mapping = $sng->getElasticaMapping();
432
			$mapping->setType($index->getType($sng->getElasticaType()));
433
			$mapping->send();
434
		}
435
	}
436
437
438
	/**
439
	 * Refresh an array of records in the index
440
	 *
441
	 * @param array $records
442
	 */
443
	protected function refreshRecords($records) {
444
		foreach($records as $record) {
445
			if($record->showRecordInSearch()) {
446
				$this->index($record);
447
			}
448
		}
449
	}
450
451
452
	/**
453
	 * Get a List of all records by class. Get the "Live data" If the class has the "Versioned" extension
454
	 *
455
	 * @param string $class Class Name
456
	 * @param  int $pageSize Optional page size, only a max of this number of records returned
457
	 * @param  int $page Page number to return
458
	 * @return \DataList $records
459
	 */
460
	protected function recordsByClassConsiderVersioned($class, $pageSize = 0, $page = 0) {
461
		$offset = $page * $pageSize;
462
463
		if($class::has_extension("Versioned")) {
464
			if($pageSize > 0) {
465
				$records = \Versioned::get_by_stage($class, 'Live')->limit($pageSize, $offset);
466
			} else {
467
				$records = \Versioned::get_by_stage($class, 'Live');
468
			}
469
		} else {
470
			if($pageSize > 0) {
471
				$records = $class::get()->limit($pageSize, $offset);
472
			} else {
473
				$records = $class::get();
474
			}
475
476
		}
477
		return $records;
478
	}
479
480
481
	/**
482
	 * Refresh the records of a given class within the search index
483
	 *
484
	 * @param string $class Class Name
485
	 */
486
	protected function refreshClass($class) {
487
		$nRecords = $this->recordsByClassConsiderVersioned($class)->count();
488
		$batchSize = 500;
489
		$pages = $nRecords / $batchSize + 1;
490
491
		for($i = 0; $i < $pages; $i++) {
492
			$this->startBulkIndex();
493
			$pagedRecords = $this->recordsByClassConsiderVersioned($class, $batchSize, $i);
494
			$this->nDocumentsIndexed += $pagedRecords->count();
495
			$batch = $pagedRecords->toArray();
496
			$this->refreshRecords($batch);
497
			$this->endBulkIndex();
498
		}
499
	}
500
501
502
	/**
503
	 * Re-indexes each record in the index.
504
	 */
505
	public function refresh() {
506
		$this->StartTime = microtime(true);
507
508
		$classes = $this->getIndexedClasses();
509
510
		//Count the number of documents for this locale
511
		$amount = 0;
512
		echo "CURRENT LOCALE:" . $this->locale;
513
		foreach($classes as $class) {
514
			$amount += $this->recordsByClassConsiderVersioned($class)->count();
515
		}
516
517
		$this->nDocumentsToIndexForLocale = $amount;
518
		$this->nDocumentsIndexed = 0;
519
520
		foreach($this->getIndexedClasses() as $classname) {
521
			ElasticaUtil::message("Indexing class $classname");
522
523
			$inSiteTree = null;
1 ignored issue
show
Unused Code introduced by
$inSiteTree is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
524
			if(isset(self::$site_tree_classes[$classname])) {
525
				$inSiteTree = self::$site_tree_classes[$classname];
526
			} else {
527
				$inSiteTree = SearchableHelper::isInSiteTree($classname);
528
				self::$site_tree_classes[$classname] = $inSiteTree;
529
			}
530
531
			if($inSiteTree) {
532
				// this prevents the same item being indexed twice due to class inheritance
533
				if($classname === 'SiteTree') {
534
					$this->refreshClass($classname);
535
				}
536
			// Data objects
537
			} else {
538
				$this->refreshClass($classname);
539
			}
540
541
		}
542
543
		echo "Completed indexing documents for locale $this->locale\n";
544
545
	}
546
547
548
	/**
549
	 * Reset the current index
550
	 */
551
	public function reset() {
552
		$index = $this->getIndex();
553
		$index->delete();
554
		$this->createIndex();
555
	}
556
557
558
	private function createIndex() {
559
		$index = $this->getIndex();
560
		$settings = $this->getIndexSettingsForCurrentLocale()->generateConfig();
561
		$index->create($settings, true);
562
	}
563
564
565
	/**
566
	 * Get the index settings for the current locale
567
	 * @return IndexSettings index settings for the current locale
568
	 */
569
	public function getIndexSettingsForCurrentLocale() {
570
		$result = null;
571
		$indexSettings = \Config::inst()->get('Elastica', 'indexsettings');
572
		if(isset($indexSettings[$this->locale])) {
573
			$settingsClassName = $indexSettings[$this->locale];
574
			$result = \Injector::inst()->create($settingsClassName);
575
		} else {
576
			throw new \Exception('ERROR: No index settings are provided for locale ' . $this->locale . "\n");
577
578
		}
579
		return $result;
580
	}
581
582
583
	/**
584
	 * Gets the classes which are indexed (i.e. have the extension applied).
585
	 *
586
	 * @return array
587
	 */
588
	public function getIndexedClasses() {
589
		$classes = array();
590
591
		$whitelist = array('SearchableTestPage', 'SearchableTestFatherPage', 'SearchableTestGrandFatherPage',
592
			'FlickrPhotoTO', 'FlickrTagTO', 'FlickrPhotoTO', 'FlickrAuthorTO', 'FlickrSetTO');
593
594
		foreach(\ClassInfo::subclassesFor('DataObject') as $candidate) {
595
			$instance = singleton($candidate);
596
597
			$interfaces = class_implements($candidate);
598
			// Only allow test classes in testing mode
599
			if(isset($interfaces['TestOnly'])) {
600
				if(in_array($candidate, $whitelist)) {
601
					if(!$this->test_mode) {
602
						continue;
603
					}
604
				} else {
605
					// If it's not in the test whitelist we definitely do not want to know
606
					continue;
607
				}
608
			}
609
610
			if($instance->hasExtension('SilverStripe\\Elastica\\Searchable')) {
611
				$classes[] = $candidate;
612
			}
613
		}
614
615
		return $classes;
616
	}
617
618
619
	/**
620
	 * Get the number of indexing requests made.  Used for testing bulk indexing
621
	 * @return integer indexing request counter
622
	 */
623
	public function getIndexingRequestCtr() {
624
		return self::$indexing_request_ctr;
625
	}
626
627
628
	/**
629
	 * Get the term vectors in the index for the provided  Searchable is_object
630
	 * @param  Searchable $searchable An object that implements Searchable
631
	 * @return array             array of field name to terms indexed
632
	 */
633
	public function getTermVectors($searchable) {
634
		$params = array();
635
636
		$fieldMappings = $searchable->getElasticaMapping()->getProperties();
637
		$fields = array_keys($fieldMappings);
638
		$allFields = array();
639
		foreach($fields as $field) {
640
			array_push($allFields, $field);
641
642
			$mapping = $fieldMappings[$field];
643
644
645
			if(isset($mapping['fields'])) {
646
				$subFields = array_keys($mapping['fields']);
647
				foreach($subFields as $subField) {
648
					$name = $field . '.' . $subField;
649
					array_push($allFields, $name);
650
				}
651
			}
652
		}
653
		sort($allFields);
654
		$data = array(
655
			'fields' => $allFields,
656
			'offsets' => true,
657
			'payloads' => true,
658
			'positions' => true,
659
			'term_statistics' => true,
660
			'field_statistics' => true
661
		);
662
663
		$path = $this->getIndex()->getName() . '/' . $searchable->ClassName . '/' . $searchable->ID . '/_termvector';
664
		$response = $this->getClient()->request(
665
				$path,
666
				\Elastica\Request::GET,
667
				$data,
668
				$params
669
		);
670
671
		$data = $response->getData();
672
		return $data['term_vectors'];
673
	}
674
}
675