Completed
Push — dev2 ( 2002e2...2692ec )
by Gordon
03:11
created

ElasticaService::search()   B

Complexity

Conditions 5
Paths 8

Size

Total Lines 36
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 10
Bugs 6 Features 0
Metric Value
c 10
b 6
f 0
dl 0
loc 36
rs 8.439
cc 5
eloc 22
nc 8
nop 2
1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Client;
6
use Elastica\Query;
7
use Elastica\Search;
8
use SilverStripe\Elastica\ElasticaUtil;
9
10
/**
11
 * A service used to interact with elastic search.
12
 */
13
class ElasticaService {
14
15
	/**
16
	 * @var \Elastica\Document[]
17
	 */
18
	protected $buffer = array();
19
20
21
	/**
22
	 * @var bool controls whether indexing operations are buffered or not
23
	 */
24
	protected $buffered = false;
25
26
27
	/**
28
	 * @var \Elastica\Client Elastica Client object
29
	 */
30
	private $client;
31
32
33
	/**
34
	 * @var string index name
35
	 */
36
	private $indexName;
37
38
39
	/**
40
	 * The code of the locale being indexed or searched
41
	 * @var string e.g. th_TH, en_US
42
	 */
43
	private $locale;
44
45
46
	/**
47
	 * Mapping of DataObject ClassName and whether it is in the SiteTree or not
48
	 * @var array $site_tree_classes;
49
	 */
50
	private static $site_tree_classes = array();
51
52
53
	/**
54
	 * Counter used to for testing, records indexing requests
55
	 * @var integer
56
	 */
57
	public static $indexing_request_ctr = 0;
58
59
60
	/**
61
	 * Array of highlighted fields, e.g. Title, Title.standard.  If this is empty then the
62
	 * ShowHighlight field of SearchableField is used to determine which fields to highlight
63
	 * @var array
64
	 */
65
	private $highlightedFields = array();
66
67
68
	/**
69
	 * The number of documents to index currently for this locale
70
	 * @var integer The number of documents left to index
71
	 */
72
	private $nDocumentsToIndexForLocale = 0;
73
74
75
	/*
76
	Set the highlight fields for subsequent searches
77
	 */
78
	public function setHighlightedFields($newHighlightedFields) {
79
		$this->highlightedFields = $newHighlightedFields;
80
	}
81
82
83
	/*
84
	Enable this to allow test classes not to be ignored when indexing
85
	 */
86
	public $test_mode = false;
87
88
89
	/**
90
	 * @param \Elastica\Client $client
91
	 * @param string $newIndexName Name of the new index
92
	 */
93
	public function __construct(Client $client, $newIndexName) {
94
		$this->client = $client;
95
		$this->indexName = $newIndexName;
96
		$this->locale = \i18n::default_locale();
97
	}
98
99
100
	public function setTestMode($newTestMode) {
101
		$this->test_mode = $newTestMode;
102
	}
103
104
105
	/**
106
	 * @return \Elastica\Client
107
	 */
108
	public function getClient() {
109
		return $this->client;
110
	}
111
112
113
	/**
114
	 * @return \Elastica\Index
115
	 */
116
	public function getIndex() {
117
		$index = $this->getClient()->getIndex($this->getLocaleIndexName());
118
		return $index;
119
	}
120
121
122
	public function setLocale($newLocale) {
123
		$this->locale = $newLocale;
124
	}
125
126
	public function getIndexName() {
127
		return $this->indexName;
128
	}
129
130
	private function getLocaleIndexName() {
131
		$name = $this->indexName . '-' . $this->locale;
132
		$name = strtolower($name);
133
		$name = str_replace('-', '_', $name);
134
		return $name;
135
	}
136
137
138
	/**
139
	 * Performs a search query and returns a result list.
140
	 *
141
	 * @param \Elastica\Query|string|array $query
142
	 * @param string|array $types List of comma separated SilverStripe classes to search, or blank for all
143
	 * @return \Elastica\ResultList
144
	 */
145
	public function search($query, $types = '') {
146
		$query = Query::create($query); // may be a string
147
		if(is_string($types)) {
148
			$types = explode(',', $types);
149
		}
150
151
		$data = $query->toArray();
152
		$query->MoreLikeThis = isset($data['query']['more_like_this']);
153
154
		$search = new Search(new Client());
155
156
		// get results from all shards, this makes test repeatable
157
		$this->checkIfTestMode($search);
1 ignored issue
show
Unused Code introduced by
The call to the method SilverStripe\Elastica\El...vice::checkIfTestMode() seems un-needed as the method has no side-effects.

PHP Analyzer performs a side-effects analysis of your code. A side-effect is basically anything that might be visible after the scope of the method is left.

Let’s take a look at an example:

class User
{
    private $email;

    public function getEmail()
    {
        return $this->email;
    }

    public function setEmail($email)
    {
        $this->email = $email;
    }
}

If we look at the getEmail() method, we can see that it has no side-effect. Whether you call this method or not, no future calls to other methods are affected by this. As such code as the following is useless:

$user = new User();
$user->getEmail(); // This line could safely be removed as it has no effect.

On the hand, if we look at the setEmail(), this method _has_ side-effects. In the following case, we could not remove the method call:

$user = new User();
$user->setEmail('email@domain'); // This line has a side-effect (it changes an
                                 // instance variable).
Loading history...
158
159
		$search->addIndex($this->getLocaleIndexName());
160
		$this->addTypesToSearch($search, $types, $query);
161
162
		$highlights = $this->getHighlightingConfig();
163
		$this->addExtractedQueryTermsForMoreLikeThis($query, $highlights);
164
		$query->setHighlight($highlights);
165
166
		$search->addIndex($this->getLocaleIndexName());
167
		if(!empty($types)) {
168
			foreach($types as $type) {
169
				$search->addType($type);
170
			}
171
		}
172
173
		$params = $search->getOptions();
174
		$searchResults = $search->search($query, $params);
175
		if(isset($this->MoreLikeThisTerms)) {
176
			$searchResults->MoreLikeThisTerms = $this->MoreLikeThisTerms;
177
		}
178
179
		return $searchResults;
180
	}
181
182
183
	/**
184
	 * @param Query $query
185
	 */
186
	private function addExtractedQueryTermsForMoreLikeThis($query, &$highlights) {
187
		if($query->MoreLikeThis) {
188
			$termsMatchingQuery = array();
189
			foreach($this->MoreLikeThisTerms as $field => $terms) {
190
				$termQuery = array('multi_match' => array(
191
					'query' => implode(' ', $terms),
192
					'type' => 'most_fields',
193
					'fields' => array($field)
194
				));
195
				$termsMatchingQuery[$field] = array('highlight_query' => $termQuery);
196
			}
197
			$highlights['fields'] = $termsMatchingQuery;
198
		}
199
	}
200
201
202
	/**
203
	 * @param Search $search
204
	 * @param Query $query
205
	 */
206
	private function addTypesToSearch(&$search, $types, $query) {
207
		// If the query is a 'more like this' we can get the terms used for searching by performing
208
		// an extra query, in this case a query validation with explain and rewrite turned on
209
		$this->checkForTermsMoreLikeThis($query, $search);
210
211
		if(!empty($types)) {
212
			foreach($types as $type) {
213
				$search->addType($type);
214
			}
215
		}
216
	}
217
218
219
	private function getHighlightingConfig() {
220
		$highlightsCfg = \Config::inst()->get('Elastica', 'Highlights');
221
		$preTags = $highlightsCfg['PreTags'];
222
		$postTags = $highlightsCfg['PostTags'];
223
		$fragmentSize = $highlightsCfg['Phrase']['FragmentSize'];
224
		$nFragments = $highlightsCfg['Phrase']['NumberOfFragments'];
225
226
		$stringFields = $this->highlightedFields;
227
		$usingProvidedHighlightFields = true;
228
229
		if(sizeof($stringFields) == 0) {
230
			$filter = array('Type' => 'string', 'ShowHighlights' => true);
231
			$stringFields = \SearchableField::get()->filter($filter)->map('Name')->toArray();
232
			$usingProvidedHighlightFields = false;
233
		}
234
235
236
		$highlightFields = array();
237
		foreach($stringFields as $name) {
238
			// Add the stemmed and the unstemmed for now
239
			$fieldName = $name;
240
			if(!$usingProvidedHighlightFields) {
241
				$fieldName .= '.standard';
242
			}
243
			$highlightFields[$fieldName] = array(
244
				'fragment_size' => $fragmentSize,
245
				'number_of_fragments' => $nFragments,
246
				'no_match_size'=> 200
247
			);
248
		}
249
250
		$highlights = array(
251
			'pre_tags' => array($preTags),
252
			'post_tags' => array($postTags),
253
			'fields' => $highlightFields
254
		);
255
256
		return $highlights;
257
	}
258
259
260
	private function checkIfTestMode(&$search) {
0 ignored issues
show
Unused Code introduced by
The parameter $search is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
261
		if($this->test_mode) {
262
			$params['search_type'] = Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
0 ignored issues
show
Coding Style Comprehensibility introduced by
$params was never initialized. Although not strictly required by PHP, it is generally a good practice to add $params = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
263
		}
264
	}
265
266
267
	private function checkForTermsMoreLikeThis($elasticaQuery, $search) {
268
		if($elasticaQuery->MoreLikeThis) {
269
270
271
			$path = $search->getPath();
272
273
			$termData = array();
274
			$data = $elasticaQuery->toArray();
275
			$termData['query'] = $data['query'];
276
277
			$path = str_replace('_search', '_validate/query', $path);
278
			$params = array('explain' => true, 'rewrite' => true);
279
			$this->checkIfTestMode($search);
1 ignored issue
show
Unused Code introduced by
The call to the method SilverStripe\Elastica\El...vice::checkIfTestMode() seems un-needed as the method has no side-effects.

PHP Analyzer performs a side-effects analysis of your code. A side-effect is basically anything that might be visible after the scope of the method is left.

Let’s take a look at an example:

class User
{
    private $email;

    public function getEmail()
    {
        return $this->email;
    }

    public function setEmail($email)
    {
        $this->email = $email;
    }
}

If we look at the getEmail() method, we can see that it has no side-effect. Whether you call this method or not, no future calls to other methods are affected by this. As such code as the following is useless:

$user = new User();
$user->getEmail(); // This line could safely be removed as it has no effect.

On the hand, if we look at the setEmail(), this method _has_ side-effects. In the following case, we could not remove the method call:

$user = new User();
$user->setEmail('email@domain'); // This line has a side-effect (it changes an
                                 // instance variable).
Loading history...
280
281
			$response = $this->getClient()->request(
282
				$path,
283
				\Elastica\Request::GET,
284
				$termData,
285
				$params
286
			);
287
288
			$rData = $response->getData();
289
			$terms = null; // keep in scope
290
291
			if(isset($rData['explanations'])) {
292
				$explanation = $rData['explanations'][0]['explanation'];
293
				$terms = ElasticaUtil::parseSuggestionExplanation($explanation);
294
			}
295
296
			if(isset($terms)) {
297
				$this->MoreLikeThisTerms = $terms;
298
			}
299
		}
300
	}
301
302
303
	/**
304
	 * Ensure that the index is present
305
	 */
306
	protected function ensureIndex() {
307
		$index = $this->getIndex();
308
		if(!$index->exists()) {
309
			$this->createIndex();
310
		}
311
	}
312
313
314
	/**
315
	 * Ensure that there is a mapping present
316
	 *
317
	 * @param \Elastica\Type Type object
318
	 * @param SilverStripe\Elastica\Searchable DataObject that implements Searchable
319
	 * @return \Elastica\Mapping Mapping object
320
	 */
321
	protected function ensureMapping(\Elastica\Type $type, \DataObject $record) {
322
		$mapping = $type->getMapping();
323
		if($mapping == array()) {
324
			$this->ensureIndex();
325
			$mapping = $record->getElasticaMapping();
326
			$type->setMapping($mapping);
327
			$mapping = $mapping->toArray();
328
		}
329
		return $mapping;
330
	}
331
332
333
	/**
334
	 * Either creates or updates a record in the index.
335
	 *
336
	 * @param Searchable $record
337
	 */
338
	public function index($record) {
339
		$document = $record->getElasticaDocument();
340
		$typeName = $record->getElasticaType();
341
342
		if($this->buffered) {
343
			if(array_key_exists($typeName, $this->buffer)) {
344
				$this->buffer[$typeName][] = $document;
345
			} else {
346
				$this->buffer[$typeName] = array($document);
347
			}
348
		} else {
349
			$index = $this->getIndex();
350
			$type = $index->getType($typeName);
351
352
			$this->ensureMapping($type, $record);
353
354
			$type->addDocument($document);
355
			$index->refresh();
356
			self::$indexing_request_ctr++;
357
		}
358
	}
359
360
361
	/**
362
	 * Begins a bulk indexing operation where documents are buffered rather than
363
	 * indexed immediately.
364
	 */
365
	public function startBulkIndex() {
366
		$this->buffered = true;
367
	}
368
369
370
	public function listIndexes($trace) {
371
		$command = "curl 'localhost:9200/_cat/indices?v'";
372
		exec($command, $op);
373
		ElasticaUtil::message("\n++++ $trace ++++\n");
374
		ElasticaUtil::message(print_r($op, 1));
375
		ElasticaUtil::message("++++ /{$trace} ++++\n\n");
376
		return $op;
377
	}
378
379
380
	/**
381
	 * Ends the current bulk index operation and indexes the buffered documents.
382
	 */
383
	public function endBulkIndex() {
384
		$index = $this->getIndex();
385
		foreach($this->buffer as $type => $documents) {
386
			$amount = 0;
387
388
			foreach(array_keys($this->buffer) as $key) {
389
				$amount += sizeof($this->buffer[$key]);
390
			}
391
			$index->getType($type)->addDocuments($documents);
392
			$index->refresh();
393
394
			ElasticaUtil::message("\tAdding $amount documents to the index\n");
395
			if(isset($this->StartTime)) {
396
				$elapsed = microtime(true) - $this->StartTime;
397
				$timePerDoc = ($elapsed) / ($this->nDocumentsIndexed);
398
				$documentsRemaining = $this->nDocumentsToIndexForLocale - $this->nDocumentsIndexed;
399
				$eta = ($documentsRemaining) * $timePerDoc;
400
				$hours = (int)($eta / 3600);
401
				$minutes = (int)(($eta - $hours * 3600) / 60);
402
				$seconds = (int)(0.5 + $eta - $minutes * 60 - $hours * 3600);
403
				$etaHR = "{$hours}h {$minutes}m {$seconds}s";
404
				ElasticaUtil::message("ETA to completion of indexing $this->locale ($documentsRemaining documents): $etaHR");
405
			}
406
			self::$indexing_request_ctr++;
407
		}
408
409
		$this->buffered = false;
410
		$this->buffer = array();
411
	}
412
413
414
	/**
415
	 * Deletes a record from the index.
416
	 *
417
	 * @param Searchable $record
418
	 */
419
	public function remove($record) {
420
		$index = $this->getIndex();
421
		$type = $index->getType($record->getElasticaType());
422
		$type->deleteDocument($record->getElasticaDocument());
423
		$index->refresh();
424
	}
425
426
427
	/**
428
	 * Creates the index and the type mappings.
429
	 */
430
	public function define() {
431
		$index = $this->getIndex();
432
433
		# Recreate the index
434
		if($index->exists()) {
435
			$index->delete();
436
		}
437
		$this->createIndex();
438
439
		foreach($this->getIndexedClasses() as $class) {
440
			$sng = singleton($class);
441
			$mapping = $sng->getElasticaMapping();
442
			$mapping->setType($index->getType($sng->getElasticaType()));
443
			$mapping->send();
444
		}
445
	}
446
447
448
	/**
449
	 * Refresh an array of records in the index
450
	 *
451
	 * @param array $records
452
	 */
453
	protected function refreshRecords($records) {
454
		foreach($records as $record) {
455
			if($record->showRecordInSearch()) {
456
				$this->index($record);
457
			}
458
		}
459
	}
460
461
462
	/**
463
	 * Get a List of all records by class. Get the "Live data" If the class has the "Versioned" extension
464
	 *
465
	 * @param string $class Class Name
466
	 * @param  int $pageSize Optional page size, only a max of this number of records returned
467
	 * @param  int $page Page number to return
468
	 * @return \DataList $records
469
	 */
470
	protected function recordsByClassConsiderVersioned($class, $pageSize = 0, $page = 0) {
471
		$offset = $page * $pageSize;
472
473
		if($class::has_extension("Versioned")) {
474
			if($pageSize > 0) {
475
				$records = \Versioned::get_by_stage($class, 'Live')->limit($pageSize, $offset);
476
			} else {
477
				$records = \Versioned::get_by_stage($class, 'Live');
478
			}
479
		} else {
480
			if($pageSize > 0) {
481
				$records = $class::get()->limit($pageSize, $offset);
482
			} else {
483
				$records = $class::get();
484
			}
485
486
		}
487
		return $records;
488
	}
489
490
491
	/**
492
	 * Refresh the records of a given class within the search index
493
	 *
494
	 * @param string $class Class Name
495
	 */
496
	protected function refreshClass($class) {
497
		$nRecords = $this->recordsByClassConsiderVersioned($class)->count();
498
		$batchSize = 500;
499
		$pages = $nRecords / $batchSize + 1;
500
501
		for($i = 0; $i < $pages; $i++) {
502
			$this->startBulkIndex();
503
			$pagedRecords = $this->recordsByClassConsiderVersioned($class, $batchSize, $i);
504
			$this->nDocumentsIndexed += $pagedRecords->count();
505
			$batch = $pagedRecords->toArray();
506
			$this->refreshRecords($batch);
507
			$this->endBulkIndex();
508
		}
509
	}
510
511
512
	/**
513
	 * Re-indexes each record in the index.
514
	 */
515
	public function refresh() {
516
		$this->StartTime = microtime(true);
517
518
		$classes = $this->getIndexedClasses();
519
520
		//Count the number of documents for this locale
521
		$amount = 0;
522
		echo "CURRENT LOCALE:" . $this->locale;
523
		foreach($classes as $class) {
524
			$amount += $this->recordsByClassConsiderVersioned($class)->count();
525
		}
526
527
		$this->nDocumentsToIndexForLocale = $amount;
528
		$this->nDocumentsIndexed = 0;
529
530
		foreach($this->getIndexedClasses() as $classname) {
531
			ElasticaUtil::message("Indexing class $classname");
532
533
			$inSiteTree = null;
534
			if(isset(self::$site_tree_classes[$classname])) {
535
				$inSiteTree = self::$site_tree_classes[$classname];
536
			} else {
537
				$inSiteTree = SearchableHelper::isInSiteTree($classname);
538
				self::$site_tree_classes[$classname] = $inSiteTree;
539
			}
540
541
			if($inSiteTree) {
542
				// this prevents the same item being indexed twice due to class inheritance
543
				if($classname === 'SiteTree') {
544
					$this->refreshClass($classname);
545
				}
546
			// Data objects
547
			} else {
548
				$this->refreshClass($classname);
549
			}
550
551
		}
552
553
		echo "Completed indexing documents for locale $this->locale\n";
554
555
	}
556
557
558
	/**
559
	 * Reset the current index
560
	 */
561
	public function reset() {
562
		$index = $this->getIndex();
563
		$index->delete();
564
		$this->createIndex();
565
	}
566
567
568
	private function createIndex() {
569
		$index = $this->getIndex();
570
		$settings = $this->getIndexSettingsForCurrentLocale()->generateConfig();
571
		$index->create($settings, true);
572
	}
573
574
575
	/**
576
	 * Get the index settings for the current locale
577
	 * @return IndexSettings index settings for the current locale
578
	 */
579
	public function getIndexSettingsForCurrentLocale() {
580
		$result = null;
581
		$indexSettings = \Config::inst()->get('Elastica', 'indexsettings');
582
		if(isset($indexSettings[$this->locale])) {
583
			$settingsClassName = $indexSettings[$this->locale];
584
			$result = \Injector::inst()->create($settingsClassName);
585
		} else {
586
			throw new \Exception('ERROR: No index settings are provided for locale ' . $this->locale . "\n");
587
588
		}
589
		return $result;
590
	}
591
592
593
	/**
594
	 * Gets the classes which are indexed (i.e. have the extension applied).
595
	 *
596
	 * @return array
597
	 */
598
	public function getIndexedClasses() {
599
		$classes = array();
600
601
		$whitelist = array('SearchableTestPage', 'SearchableTestFatherPage', 'SearchableTestGrandFatherPage',
602
			'FlickrPhotoTO', 'FlickrTagTO', 'FlickrPhotoTO', 'FlickrAuthorTO', 'FlickrSetTO');
603
604
		foreach(\ClassInfo::subclassesFor('DataObject') as $candidate) {
605
			$instance = singleton($candidate);
606
607
			$interfaces = class_implements($candidate);
608
			// Only allow test classes in testing mode
609
			if(isset($interfaces['TestOnly'])) {
610
				if(in_array($candidate, $whitelist)) {
611
					if(!$this->test_mode) {
612
						continue;
613
					}
614
				} else {
615
					// If it's not in the test whitelist we definitely do not want to know
616
					continue;
617
				}
618
			}
619
620
			if($instance->hasExtension('SilverStripe\\Elastica\\Searchable')) {
621
				$classes[] = $candidate;
622
			}
623
		}
624
625
		return $classes;
626
	}
627
628
629
	/**
630
	 * Get the number of indexing requests made.  Used for testing bulk indexing
631
	 * @return integer indexing request counter
632
	 */
633
	public function getIndexingRequestCtr() {
634
		return self::$indexing_request_ctr;
635
	}
636
637
638
	/**
639
	 * Get the term vectors in the index for the provided  Searchable is_object
640
	 * @param  Searchable $searchable An object that implements Searchable
641
	 * @return array             array of field name to terms indexed
642
	 */
643
	public function getTermVectors($searchable) {
644
		$params = array();
645
646
		$fieldMappings = $searchable->getElasticaMapping()->getProperties();
647
		$fields = array_keys($fieldMappings);
648
		$allFields = array();
649
		foreach($fields as $field) {
650
			array_push($allFields, $field);
651
652
			$mapping = $fieldMappings[$field];
653
654
655
			if(isset($mapping['fields'])) {
656
				$subFields = array_keys($mapping['fields']);
657
				foreach($subFields as $subField) {
658
					$name = $field . '.' . $subField;
659
					array_push($allFields, $name);
660
				}
661
			}
662
		}
663
		sort($allFields);
664
		$data = array(
665
			'fields' => $allFields,
666
			'offsets' => true,
667
			'payloads' => true,
668
			'positions' => true,
669
			'term_statistics' => true,
670
			'field_statistics' => true
671
		);
672
673
		$path = $this->getIndex()->getName() . '/' . $searchable->ClassName . '/' . $searchable->ID . '/_termvector';
674
		$response = $this->getClient()->request(
675
				$path,
676
				\Elastica\Request::GET,
677
				$data,
678
				$params
679
		);
680
681
		$data = $response->getData();
682
		return $data['term_vectors'];
683
	}
684
}
685