Completed
Push — dev2 ( 143263...a90a23 )
by Gordon
02:54
created

ElasticaService::search()   C

Complexity

Conditions 7
Paths 32

Size

Total Lines 42
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 9
Bugs 6 Features 0
Metric Value
c 9
b 6
f 0
dl 0
loc 42
rs 6.7273
cc 7
eloc 26
nc 32
nop 2
1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Client;
6
use Elastica\Query;
7
use Elastica\Search;
8
use SilverStripe\Elastica\ElasticaUtil;
9
10
/**
11
 * A service used to interact with elastic search.
12
 */
13
class ElasticaService {
14
15
	/**
16
	 * @var \Elastica\Document[]
17
	 */
18
	protected $buffer = array();
19
20
21
	/**
22
	 * @var bool controls whether indexing operations are buffered or not
23
	 */
24
	protected $buffered = false;
25
26
27
	/**
28
	 * @var \Elastica\Client Elastica Client object
29
	 */
30
	private $client;
31
32
33
	/**
34
	 * @var string index name
35
	 */
36
	private $indexName;
37
38
39
	/**
40
	 * The code of the locale being indexed or searched
41
	 * @var string e.g. th_TH, en_US
42
	 */
43
	private $locale;
44
45
46
	/**
47
	 * Mapping of DataObject ClassName and whether it is in the SiteTree or not
48
	 * @var array $site_tree_classes;
49
	 */
50
	private static $site_tree_classes = array();
51
52
53
	/**
54
	 * Counter used to for testing, records indexing requests
55
	 * @var integer
56
	 */
57
	public static $indexing_request_ctr = 0;
58
59
60
	/**
61
	 * Array of highlighted fields, e.g. Title, Title.standard.  If this is empty then the
62
	 * ShowHighlight field of SearchableField is used to determine which fields to highlight
63
	 * @var array
64
	 */
65
	private $highlightedFields = array();
66
67
68
	/**
69
	 * The number of documents to index currently for this locale
70
	 * @var integer The number of documents left to index
71
	 */
72
	private $nDocumentsToIndexForLocale = 0;
73
74
75
	/*
76
	Set the highlight fields for subsequent searches
77
	 */
78
	public function setHighlightedFields($newHighlightedFields) {
79
		$this->highlightedFields = $newHighlightedFields;
80
	}
81
82
83
	/*
84
	Enable this to allow test classes not to be ignored when indexing
85
	 */
86
	public $test_mode = false;
87
88
89
	/**
90
	 * @param \Elastica\Client $client
91
	 * @param string $newIndexName Name of the new index
92
	 */
93
	public function __construct(Client $client, $newIndexName) {
94
		$this->client = $client;
95
		$this->indexName = $newIndexName;
96
		$this->locale = \i18n::default_locale();
97
	}
98
99
100
	public function setTestMode($newTestMode) {
101
		$this->test_mode = $newTestMode;
102
	}
103
104
105
	/**
106
	 * @return \Elastica\Client
107
	 */
108
	public function getClient() {
109
		return $this->client;
110
	}
111
112
113
	/**
114
	 * @return \Elastica\Index
115
	 */
116
	public function getIndex() {
117
		$index = $this->getClient()->getIndex($this->getLocaleIndexName());
118
		return $index;
119
	}
120
121
122
	public function setLocale($newLocale) {
123
		$this->locale = $newLocale;
124
	}
125
126
	public function getIndexName() {
127
		return $this->indexName;
128
	}
129
130
	private function getLocaleIndexName() {
131
		$name = $this->indexName . '-' . $this->locale;
132
		$name = strtolower($name);
133
		$name = str_replace('-', '_', $name);
134
		return $name;
135
	}
136
137
138
	/**
139
	 * Performs a search query and returns a result list.
140
	 *
141
	 * @param \Elastica\Query|string|array $query
142
	 * @param string|array $types List of comma separated SilverStripe classes to search, or blank for all
143
	 * @return \Elastica\ResultList
144
	 */
145
	public function search($query, $types = '') {
146
		$query = Query::create($query); // may be a string
147
		if(is_string($types)) {
148
			$types = explode(',', $types);
149
		}
150
151
		$data = $query->toArray();
152
		if(isset($data['query']['more_like_this'])) {
153
			$query->MoreLikeThis = true;
154
		} else {
155
			$query->MoreLikeThis = false;
156
		}
157
158
		$search = new Search(new Client());
159
160
		// get results from all shards, this makes test repeatable
161
		if($this->test_mode) {
162
			$search->setOption('search_type', Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH);
163
		}
164
165
		$search->addIndex($this->getLocaleIndexName());
166
		$this->addTypesToSearch($search, $types);
167
168
		$highlights = $this->getHighlightingConfig();
169
		$this->addExtractedQueryTermsForMoreLikeThis($query, $highlights);
170
		$query->setHighlight($highlights);
171
172
		$search->addIndex($this->getLocaleIndexName());
173
		if(!empty($types)) {
174
			foreach($types as $type) {
175
				$search->addType($type);
176
			}
177
		}
178
179
		$params = $search->getOptions();
180
		$searchResults = $search->search($query, $params);
181
		if(isset($this->MoreLikeThisTerms)) {
182
			$searchResults->MoreLikeThisTerms = $this->MoreLikeThisTerms;
183
		}
184
185
		return $searchResults;
186
	}
187
188
189
	private function addExtractedQueryTermsForMoreLikeThis($query, &$highlights) {
190
		if($query->MoreLikeThis) {
191
			$termsMatchingQuery = array();
192
			foreach($this->MoreLikeThisTerms as $field => $terms) {
193
				$termQuery = array('multi_match' => array(
194
					'query' => implode(' ', $terms),
195
					'type' => 'most_fields',
196
					'fields' => array($field)
197
				));
198
				$termsMatchingQuery[$field] = array('highlight_query' => $termQuery);
199
			}
200
			$highlights['fields'] = $termsMatchingQuery;
201
		}
202
	}
203
204
205
	private function addTypesToSearch(&$search, $type) {
0 ignored issues
show
Unused Code introduced by
The parameter $type is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
206
		// If the query is a 'more like this' we can get the terms used for searching by performing
207
		// an extra query, in this case a query validation with explain and rewrite turned on
208
		$this->checkForTermsMoreLikeThis($query, $search);
0 ignored issues
show
Bug introduced by
The variable $query does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
209
210
		if(!empty($types)) {
0 ignored issues
show
Bug introduced by
The variable $types seems to never exist, and therefore empty should always return true. Did you maybe rename this variable?

This check looks for calls to isset(...) or empty() on variables that are yet undefined. These calls will always produce the same result and can be removed.

This is most likely caused by the renaming of a variable or the removal of a function/method parameter.

Loading history...
211
			foreach($types as $type) {
212
				$search->addType($type);
213
			}
214
		}
215
	}
216
217
218
	private function getHighlightingConfig() {
219
		$highlightsCfg = \Config::inst()->get('Elastica', 'Highlights');
220
		$preTags = $highlightsCfg['PreTags'];
221
		$postTags = $highlightsCfg['PostTags'];
222
		$fragmentSize = $highlightsCfg['Phrase']['FragmentSize'];
223
		$nFragments = $highlightsCfg['Phrase']['NumberOfFragments'];
224
225
		$stringFields = $this->highlightedFields;
226
		$usingProvidedHighlightFields = true;
227
228
		if(sizeof($stringFields) == 0) {
229
			$filter = array('Type' => 'string', 'ShowHighlights' => true);
230
			$stringFields = \SearchableField::get()->filter($filter)->map('Name')->toArray();
231
			$usingProvidedHighlightFields = false;
232
		}
233
234
235
		$highlightFields = array();
236
		foreach($stringFields as $name) {
237
			// Add the stemmed and the unstemmed for now
238
			$fieldName = $name;
239
			if(!$usingProvidedHighlightFields) {
240
				$fieldName .= '.standard';
241
			}
242
			$highlightFields[$fieldName] = array(
243
				'fragment_size' => $fragmentSize,
244
				'number_of_fragments' => $nFragments,
245
				'no_match_size'=> 200
246
			);
247
		}
248
249
		$highlights = array(
250
			'pre_tags' => array($preTags),
251
			'post_tags' => array($postTags),
252
			'fields' => $highlightFields
253
		);
254
255
		return $highlights;
256
	}
257
258
259
	private function checkForTermsMoreLikeThis($elasticaQuery, $search) {
260
		if($elasticaQuery->MoreLikeThis) {
261
262
263
			$path = $search->getPath();
264
265
			$termData = array();
266
			$data = $elasticaQuery->toArray();
267
			$termData['query'] = $data['query'];
268
269
			$path = str_replace('_search', '_validate/query', $path);
270
			$params = array('explain' => true, 'rewrite' => true);
271
			if($this->test_mode) {
272
				$params['search_type'] = Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
273
			}
274
275
			$response = $this->getClient()->request(
276
				$path,
277
				\Elastica\Request::GET,
278
				$termData,
279
				$params
280
			);
281
282
			$rData = $response->getData();
283
			$terms = null; // keep in scope
284
285
			if(isset($rData['explanations'])) {
286
				$explanation = $rData['explanations'][0]['explanation'];
287
				$terms = ElasticaUtil::parseSuggestionExplanation($explanation);
288
			}
289
290
			if(isset($terms)) {
291
				$this->MoreLikeThisTerms = $terms;
292
			}
293
		}
294
	}
295
296
297
	/**
298
	 * Ensure that the index is present
299
	 */
300
	protected function ensureIndex() {
301
		$index = $this->getIndex();
302
		if(!$index->exists()) {
303
			$this->createIndex();
304
		}
305
	}
306
307
308
	/**
309
	 * Ensure that there is a mapping present
310
	 *
311
	 * @param \Elastica\Type Type object
312
	 * @param SilverStripe\Elastica\Searchable DataObject that implements Searchable
313
	 * @return \Elastica\Mapping Mapping object
314
	 */
315
	protected function ensureMapping(\Elastica\Type $type, \DataObject $record) {
316
		$mapping = $type->getMapping();
317
		if($mapping == array()) {
318
			$this->ensureIndex();
319
			$mapping = $record->getElasticaMapping();
320
			$type->setMapping($mapping);
321
			$mapping = $mapping->toArray();
322
		}
323
		return $mapping;
324
	}
325
326
327
	/**
328
	 * Either creates or updates a record in the index.
329
	 *
330
	 * @param Searchable $record
331
	 */
332
	public function index($record) {
333
		$document = $record->getElasticaDocument();
334
		$typeName = $record->getElasticaType();
335
336
		if($this->buffered) {
337
			if(array_key_exists($typeName, $this->buffer)) {
338
				$this->buffer[$typeName][] = $document;
339
			} else {
340
				$this->buffer[$typeName] = array($document);
341
			}
342
		} else {
343
			$index = $this->getIndex();
344
			$type = $index->getType($typeName);
345
346
			$this->ensureMapping($type, $record);
347
348
			$type->addDocument($document);
349
			$index->refresh();
350
			self::$indexing_request_ctr++;
351
		}
352
	}
353
354
355
	/**
356
	 * Begins a bulk indexing operation where documents are buffered rather than
357
	 * indexed immediately.
358
	 */
359
	public function startBulkIndex() {
360
		$this->buffered = true;
361
	}
362
363
364
	public function listIndexes($trace) {
365
		$command = "curl 'localhost:9200/_cat/indices?v'";
366
		exec($command, $op);
367
		ElasticaUtil::message("\n++++ $trace ++++\n");
368
		ElasticaUtil::message(print_r($op, 1));
369
		ElasticaUtil::message("++++ /{$trace} ++++\n\n");
370
		return $op;
371
	}
372
373
374
	/**
375
	 * Ends the current bulk index operation and indexes the buffered documents.
376
	 */
377
	public function endBulkIndex() {
378
		$index = $this->getIndex();
379
		foreach($this->buffer as $type => $documents) {
380
			$amount = 0;
381
382
			foreach(array_keys($this->buffer) as $key) {
383
				$amount += sizeof($this->buffer[$key]);
384
			}
385
			$index->getType($type)->addDocuments($documents);
386
			$index->refresh();
387
388
			ElasticaUtil::message("\tAdding $amount documents to the index\n");
389
			if(isset($this->StartTime)) {
390
				$elapsed = microtime(true) - $this->StartTime;
391
				$timePerDoc = ($elapsed) / ($this->nDocumentsIndexed);
392
				$documentsRemaining = $this->nDocumentsToIndexForLocale - $this->nDocumentsIndexed;
393
				$eta = ($documentsRemaining) * $timePerDoc;
394
				$hours = (int)($eta / 3600);
395
				$minutes = (int)(($eta - $hours * 3600) / 60);
396
				$seconds = (int)(0.5 + $eta - $minutes * 60 - $hours * 3600);
397
				$etaHR = "{$hours}h {$minutes}m {$seconds}s";
398
				ElasticaUtil::message("ETA to completion of indexing $this->locale ($documentsRemaining documents): $etaHR");
399
			}
400
			self::$indexing_request_ctr++;
401
		}
402
403
		$this->buffered = false;
404
		$this->buffer = array();
405
	}
406
407
408
	/**
409
	 * Deletes a record from the index.
410
	 *
411
	 * @param Searchable $record
412
	 */
413
	public function remove($record) {
414
		$index = $this->getIndex();
415
		$type = $index->getType($record->getElasticaType());
416
		$type->deleteDocument($record->getElasticaDocument());
417
		$index->refresh();
418
	}
419
420
421
	/**
422
	 * Creates the index and the type mappings.
423
	 */
424
	public function define() {
425
		$index = $this->getIndex();
426
427
		# Recreate the index
428
		if($index->exists()) {
429
			$index->delete();
430
		}
431
		$this->createIndex();
432
433
		foreach($this->getIndexedClasses() as $class) {
434
			$sng = singleton($class);
435
			$mapping = $sng->getElasticaMapping();
436
			$mapping->setType($index->getType($sng->getElasticaType()));
437
			$mapping->send();
438
		}
439
	}
440
441
442
	/**
443
	 * Refresh an array of records in the index
444
	 *
445
	 * @param array $records
446
	 */
447
	protected function refreshRecords($records) {
448
		foreach($records as $record) {
449
			if($record->showRecordInSearch()) {
450
				$this->index($record);
451
			}
452
		}
453
	}
454
455
456
	/**
457
	 * Get a List of all records by class. Get the "Live data" If the class has the "Versioned" extension
458
	 *
459
	 * @param string $class Class Name
460
	 * @param  int $pageSize Optional page size, only a max of this number of records returned
461
	 * @param  int $page Page number to return
462
	 * @return \DataList $records
463
	 */
464
	protected function recordsByClassConsiderVersioned($class, $pageSize = 0, $page = 0) {
465
		$offset = $page * $pageSize;
466
467
		if($class::has_extension("Versioned")) {
468
			if($pageSize > 0) {
469
				$records = \Versioned::get_by_stage($class, 'Live')->limit($pageSize, $offset);
470
			} else {
471
				$records = \Versioned::get_by_stage($class, 'Live');
472
			}
473
		} else {
474
			if($pageSize > 0) {
475
				$records = $class::get()->limit($pageSize, $offset);
476
			} else {
477
				$records = $class::get();
478
			}
479
480
		}
481
		return $records;
482
	}
483
484
485
	/**
486
	 * Refresh the records of a given class within the search index
487
	 *
488
	 * @param string $class Class Name
489
	 */
490
	protected function refreshClass($class) {
491
		$nRecords = $this->recordsByClassConsiderVersioned($class)->count();
492
		$batchSize = 500;
493
		$pages = $nRecords / $batchSize + 1;
494
495
		for($i = 0; $i < $pages; $i++) {
496
			$this->startBulkIndex();
497
			$pagedRecords = $this->recordsByClassConsiderVersioned($class, $batchSize, $i);
498
			$this->nDocumentsIndexed += $pagedRecords->count();
499
			$batch = $pagedRecords->toArray();
500
			$this->refreshRecords($batch);
501
			$this->endBulkIndex();
502
		}
503
	}
504
505
506
	/**
507
	 * Re-indexes each record in the index.
508
	 */
509
	public function refresh() {
510
		$this->StartTime = microtime(true);
511
512
		$classes = $this->getIndexedClasses();
513
514
		//Count the number of documents for this locale
515
		$amount = 0;
516
		echo "CURRENT LOCALE:" . $this->locale;
517
		foreach($classes as $class) {
518
			$amount += $this->recordsByClassConsiderVersioned($class)->count();
519
		}
520
521
		$this->nDocumentsToIndexForLocale = $amount;
522
		$this->nDocumentsIndexed = 0;
523
524
		foreach($this->getIndexedClasses() as $classname) {
525
			ElasticaUtil::message("Indexing class $classname");
526
527
			$inSiteTree = null;
1 ignored issue
show
Unused Code introduced by
$inSiteTree is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
528
			if(isset(self::$site_tree_classes[$classname])) {
529
				$inSiteTree = self::$site_tree_classes[$classname];
530
			} else {
531
				$inSiteTree = SearchableHelper::isInSiteTree($classname);
532
				self::$site_tree_classes[$classname] = $inSiteTree;
533
			}
534
535
			if($inSiteTree) {
536
				// this prevents the same item being indexed twice due to class inheritance
537
				if($classname === 'SiteTree') {
538
					$this->refreshClass($classname);
539
				}
540
			// Data objects
541
			} else {
542
				$this->refreshClass($classname);
543
			}
544
545
		}
546
547
		echo "Completed indexing documents for locale $this->locale\n";
548
549
	}
550
551
552
	/**
553
	 * Reset the current index
554
	 */
555
	public function reset() {
556
		$index = $this->getIndex();
557
		$index->delete();
558
		$this->createIndex();
559
	}
560
561
562
	private function createIndex() {
563
		$index = $this->getIndex();
564
		$settings = $this->getIndexSettingsForCurrentLocale()->generateConfig();
565
		$index->create($settings, true);
566
	}
567
568
569
	/**
570
	 * Get the index settings for the current locale
571
	 * @return IndexSettings index settings for the current locale
572
	 */
573
	public function getIndexSettingsForCurrentLocale() {
574
		$result = null;
575
		$indexSettings = \Config::inst()->get('Elastica', 'indexsettings');
576
		if(isset($indexSettings[$this->locale])) {
577
			$settingsClassName = $indexSettings[$this->locale];
578
			$result = \Injector::inst()->create($settingsClassName);
579
		} else {
580
			throw new \Exception('ERROR: No index settings are provided for locale ' . $this->locale . "\n");
581
582
		}
583
		return $result;
584
	}
585
586
587
	/**
588
	 * Gets the classes which are indexed (i.e. have the extension applied).
589
	 *
590
	 * @return array
591
	 */
592
	public function getIndexedClasses() {
593
		$classes = array();
594
595
		$whitelist = array('SearchableTestPage', 'SearchableTestFatherPage', 'SearchableTestGrandFatherPage',
596
			'FlickrPhotoTO', 'FlickrTagTO', 'FlickrPhotoTO', 'FlickrAuthorTO', 'FlickrSetTO');
597
598
		foreach(\ClassInfo::subclassesFor('DataObject') as $candidate) {
599
			$instance = singleton($candidate);
600
601
			$interfaces = class_implements($candidate);
602
			// Only allow test classes in testing mode
603
			if(isset($interfaces['TestOnly'])) {
604
				if(in_array($candidate, $whitelist)) {
605
					if(!$this->test_mode) {
606
						continue;
607
					}
608
				} else {
609
					// If it's not in the test whitelist we definitely do not want to know
610
					continue;
611
				}
612
			}
613
614
			if($instance->hasExtension('SilverStripe\\Elastica\\Searchable')) {
615
				$classes[] = $candidate;
616
			}
617
		}
618
619
		return $classes;
620
	}
621
622
623
	/**
624
	 * Get the number of indexing requests made.  Used for testing bulk indexing
625
	 * @return integer indexing request counter
626
	 */
627
	public function getIndexingRequestCtr() {
628
		return self::$indexing_request_ctr;
629
	}
630
631
632
	/**
633
	 * Get the term vectors in the index for the provided  Searchable is_object
634
	 * @param  Searchable $searchable An object that implements Searchable
635
	 * @return array             array of field name to terms indexed
636
	 */
637
	public function getTermVectors($searchable) {
638
		$params = array();
639
640
		$fieldMappings = $searchable->getElasticaMapping()->getProperties();
641
		$fields = array_keys($fieldMappings);
642
		$allFields = array();
643
		foreach($fields as $field) {
644
			array_push($allFields, $field);
645
646
			$mapping = $fieldMappings[$field];
647
648
649
			if(isset($mapping['fields'])) {
650
				$subFields = array_keys($mapping['fields']);
651
				foreach($subFields as $subField) {
652
					$name = $field . '.' . $subField;
653
					array_push($allFields, $name);
654
				}
655
			}
656
		}
657
		sort($allFields);
658
		$data = array(
659
			'fields' => $allFields,
660
			'offsets' => true,
661
			'payloads' => true,
662
			'positions' => true,
663
			'term_statistics' => true,
664
			'field_statistics' => true
665
		);
666
667
		$path = $this->getIndex()->getName() . '/' . $searchable->ClassName . '/' . $searchable->ID . '/_termvector';
668
		$response = $this->getClient()->request(
669
				$path,
670
				\Elastica\Request::GET,
671
				$data,
672
				$params
673
		);
674
675
		$data = $response->getData();
676
		return $data['term_vectors'];
677
	}
678
}
679