Completed
Push — dev2 ( 6eabe6...81b774 )
by Gordon
03:04
created

ElasticaService::refreshClass()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 14
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 14
rs 9.4286
cc 2
eloc 11
nc 2
nop 1
1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Client;
6
use Elastica\Query;
7
use Elastica\Search;
8
use SilverStripe\Elastica\ElasticaUtil;
9
10
/**
11
 * A service used to interact with elastic search.
12
 */
13
class ElasticaService {
14
15
	/**
16
	 * @var \Elastica\Document[]
17
	 */
18
	protected $buffer = array();
19
20
21
	/**
22
	 * @var bool controls whether indexing operations are buffered or not
23
	 */
24
	protected $buffered = false;
25
26
27
	/**
28
	 * @var \Elastica\Client Elastica Client object
29
	 */
30
	private $client;
31
32
33
	/**
34
	 * @var string index name
35
	 */
36
	private $indexName;
37
38
39
	/**
40
	 * The code of the locale being indexed or searched
41
	 * @var string e.g. th_TH, en_US
42
	 */
43
	private $locale;
44
45
46
	/**
47
	 * Mapping of DataObject ClassName and whether it is in the SiteTree or not
48
	 * @var array $site_tree_classes;
49
	 */
50
	private static $site_tree_classes = array();
51
52
53
	/**
54
	 * Counter used to for testing, records indexing requests
55
	 * @var integer
56
	 */
57
	public static $indexing_request_ctr = 0;
58
59
60
	/**
61
	 * Array of highlighted fields, e.g. Title, Title.standard.  If this is empty then the
62
	 * ShowHighlight field of SearchableField is used to determine which fields to highlight
63
	 * @var array
64
	 */
65
	private $highlightedFields = array();
66
67
68
	/**
69
	 * The number of documents to index currently for this locale
70
	 * @var integer The number of documents left to index
71
	 */
72
	private $nDocumentsToIndexForLocale = 0;
73
74
75
	/*
76
	Set the highlight fields for subsequent searches
77
	 */
78
	public function setHighlightedFields($newHighlightedFields) {
79
		$this->highlightedFields = $newHighlightedFields;
80
	}
81
82
83
	/*
84
	Enable this to allow test classes not to be ignored when indexing
85
	 */
86
	public $test_mode = false;
87
88
89
	/**
90
	 * @param \Elastica\Client $client
91
	 * @param string $newIndexName Name of the new index
92
	 */
93
	public function __construct(Client $client, $newIndexName) {
94
		$this->client = $client;
95
		$this->indexName = $newIndexName;
96
		$this->locale = \i18n::default_locale();
97
	}
98
99
100
	public function setTestMode($newTestMode) {
101
		$this->test_mode = $newTestMode;
102
	}
103
104
105
	/**
106
	 * @return \Elastica\Client
107
	 */
108
	public function getClient() {
109
		return $this->client;
110
	}
111
112
113
	/**
114
	 * @return \Elastica\Index
115
	 */
116
	public function getIndex() {
117
		$index = $this->getClient()->getIndex($this->getLocaleIndexName());
118
		return $index;
119
	}
120
121
122
	public function setLocale($newLocale) {
123
		$this->locale = $newLocale;
124
	}
125
126
	public function getIndexName() {
127
		return $this->indexName;
128
	}
129
130
	private function getLocaleIndexName() {
131
		$name = $this->indexName . '-' . $this->locale;
132
		$name = strtolower($name);
133
		$name = str_replace('-', '_', $name);
134
		return $name;
135
	}
136
137
138
	/**
139
	 * Performs a search query and returns a result list.
140
	 *
141
	 * @param \Elastica\Query|string|array $query
142
	 * @param string|array $types List of comma separated SilverStripe classes to search, or blank for all
143
	 * @return \Elastica\ResultList
144
	 */
145
	public function search($query, $types = '') {
146
		$query = Query::create($query); // may be a string
147
		if(is_string($types)) {
148
			$types = explode(',', $types);
149
		}
150
151
		$data = $query->toArray();
152
		if(isset($data['query']['more_like_this'])) {
153
			$query->MoreLikeThis = true;
154
		} else {
155
			$query->MoreLikeThis = false;
156
		}
157
158
		$search = new Search(new Client());
159
160
		if($this->test_mode) {
161
			$search->setOption('search_type', Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH);
162
		}
163
164
165
166
		$search->addIndex($this->getLocaleIndexName());
167
168
		// If the query is a 'more like this' we can get the terms used for searching by performing
169
		// an extra query, in this case a query validation with explain and rewrite turned on
170
		$this->checkForTermsMoreLikeThis($query, $search);
171
172
173
		if(!empty($types)) {
174
			foreach($types as $type) {
175
				$search->addType($type);
176
			}
177
		}
178
179
		$highlightsCfg = \Config::inst()->get('Elastica', 'Highlights');
180
		$preTags = $highlightsCfg['PreTags'];
181
		$postTags = $highlightsCfg['PostTags'];
182
		$fragmentSize = $highlightsCfg['Phrase']['FragmentSize'];
183
		$nFragments = $highlightsCfg['Phrase']['NumberOfFragments'];
184
185
		$stringFields = $this->highlightedFields;
186
		$usingProvidedHighlightFields = true;
187
188
		if(sizeof($stringFields) == 0) {
189
			$filter = array('Type' => 'string', 'ShowHighlights' => true);
190
			$stringFields = \SearchableField::get()->filter($filter)->map('Name')->toArray();
191
			$usingProvidedHighlightFields = false;
192
		}
193
194
195
		$highlightFields = array();
196
		foreach($stringFields as $name) {
197
			// Add the stemmed and the unstemmed for now
198
			$fieldName = $name;
199
			if(!$usingProvidedHighlightFields) {
200
				$fieldName .= '.standard';
201
			}
202
			$highlightFields[$fieldName] = array(
203
				'fragment_size' => $fragmentSize,
204
				'number_of_fragments' => $nFragments,
205
				'no_match_size'=> 200
206
			);
207
		}
208
209
		$highlights = array(
210
			'pre_tags' => array($preTags),
211
			'post_tags' => array($postTags),
212
			'fields' => $highlightFields
213
		);
214
215
		if($query->MoreLikeThis) {
216
			$termsMatchingQuery = array();
217
			foreach($this->MoreLikeThisTerms as $field => $terms) {
218
				$termQuery = array('multi_match' => array(
219
					'query' => implode(' ', $terms),
220
					'type' => 'most_fields',
221
					'fields' => array($field)
222
				));
223
				$termsMatchingQuery[$field] = array('highlight_query' => $termQuery);
224
			}
225
226
			$highlights['fields'] = $termsMatchingQuery;
227
		}
228
229
		$query->setHighlight($highlights);
230
		$search->addIndex($this->getLocaleIndexName());
231
		if(!empty($types)) {
232
			foreach($types as $type) {
233
				$search->addType($type);
234
			}
235
		}
236
237
		$params = $search->getOptions();
238
		$searchResults = $search->search($query, $params);
239
		if(isset($this->MoreLikeThisTerms)) {
240
			$searchResults->MoreLikeThisTerms = $this->MoreLikeThisTerms;
241
		}
242
243
		return $searchResults;
244
	}
245
246
247
	private function checkForTermsMoreLikeThis($elasticaQuery, $search) {
248
		if($elasticaQuery->MoreLikeThis) {
249
250
251
			$path = $search->getPath();
252
253
			$termData = array();
254
			$data = $elasticaQuery->toArray();
255
			$termData['query'] = $data['query'];
256
257
			$path = str_replace('_search', '_validate/query', $path);
258
			$params = array('explain' => true, 'rewrite' => true);
259
			if($this->test_mode) {
260
				$params['search_type'] = Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
261
			}
262
263
			$response = $this->getClient()->request(
264
				$path,
265
				\Elastica\Request::GET,
266
				$termData,
267
				$params
268
			);
269
270
			$rData = $response->getData();
271
			$terms = null; // keep in scope
272
273
			if(isset($rData['explanations'])) {
274
				$explanation = $rData['explanations'][0]['explanation'];
275
				$terms = ElasticaUtil::parseSuggestionExplanation($explanation);
276
			}
277
278
			if(isset($terms)) {
279
				$this->MoreLikeThisTerms = $terms;
280
			}
281
		}
282
	}
283
284
285
	/**
286
	 * Ensure that the index is present
287
	 */
288
	protected function ensureIndex() {
289
		$index = $this->getIndex();
290
		if(!$index->exists()) {
291
			$this->createIndex();
292
		}
293
	}
294
295
296
	/**
297
	 * Ensure that there is a mapping present
298
	 *
299
	 * @param \Elastica\Type Type object
300
	 * @param SilverStripe\Elastica\Searchable DataObject that implements Searchable
301
	 * @return \Elastica\Mapping Mapping object
302
	 */
303
	protected function ensureMapping(\Elastica\Type $type, \DataObject $record) {
304
		$mapping = $type->getMapping();
305
		if($mapping == array()) {
306
			$this->ensureIndex();
307
			$mapping = $record->getElasticaMapping();
308
			$type->setMapping($mapping);
309
			$mapping = $mapping->toArray();
310
		}
311
		return $mapping;
312
	}
313
314
315
	/**
316
	 * Either creates or updates a record in the index.
317
	 *
318
	 * @param Searchable $record
319
	 */
320
	public function index($record) {
321
		$document = $record->getElasticaDocument();
322
		$typeName = $record->getElasticaType();
323
324
		if($this->buffered) {
325
			if(array_key_exists($typeName, $this->buffer)) {
326
				$this->buffer[$typeName][] = $document;
327
			} else {
328
				$this->buffer[$typeName] = array($document);
329
			}
330
		} else {
331
			$index = $this->getIndex();
332
			$type = $index->getType($typeName);
333
334
			$this->ensureMapping($type, $record);
335
336
			$type->addDocument($document);
337
			$index->refresh();
338
			self::$indexing_request_ctr++;
339
		}
340
	}
341
342
343
	/**
344
	 * Begins a bulk indexing operation where documents are buffered rather than
345
	 * indexed immediately.
346
	 */
347
	public function startBulkIndex() {
348
		$this->buffered = true;
349
	}
350
351
352
	public function listIndexes($trace) {
353
		$command = "curl 'localhost:9200/_cat/indices?v'";
354
		exec($command, $op);
355
		ElasticaUtil::message("\n++++ $trace ++++\n");
356
		ElasticaUtil::message(print_r($op, 1));
357
		ElasticaUtil::message("++++ /{$trace} ++++\n\n");
358
		return $op;
359
	}
360
361
362
	/**
363
	 * Ends the current bulk index operation and indexes the buffered documents.
364
	 */
365
	public function endBulkIndex() {
366
		$index = $this->getIndex();
367
		foreach($this->buffer as $type => $documents) {
368
			$amount = 0;
369
370
			foreach(array_keys($this->buffer) as $key) {
371
				$amount += sizeof($this->buffer[$key]);
372
			}
373
			$index->getType($type)->addDocuments($documents);
374
			$index->refresh();
375
376
			ElasticaUtil::message("\tAdding $amount documents to the index\n");
377
			if(isset($this->StartTime)) {
378
				$elapsed = microtime(true) - $this->StartTime;
379
				$timePerDoc = ($elapsed) / ($this->nDocumentsIndexed);
380
				$documentsRemaining = $this->nDocumentsToIndexForLocale - $this->nDocumentsIndexed;
381
				$eta = ($documentsRemaining) * $timePerDoc;
382
				$hours = (int)($eta / 3600);
383
				$minutes = (int)(($eta - $hours * 3600) / 60);
384
				$seconds = (int)(0.5 + $eta - $minutes * 60 - $hours * 3600);
385
				$etaHR = "{$hours}h {$minutes}m {$seconds}s";
386
				ElasticaUtil::message("ETA to completion of indexing $this->locale ($documentsRemaining documents): $etaHR");
387
			}
388
			self::$indexing_request_ctr++;
389
		}
390
391
		$this->buffered = false;
392
		$this->buffer = array();
393
	}
394
395
396
	/**
397
	 * Deletes a record from the index.
398
	 *
399
	 * @param Searchable $record
400
	 */
401
	public function remove($record) {
402
		$index = $this->getIndex();
403
		$type = $index->getType($record->getElasticaType());
404
		$type->deleteDocument($record->getElasticaDocument());
405
		$index->refresh();
406
	}
407
408
409
	/**
410
	 * Creates the index and the type mappings.
411
	 */
412
	public function define() {
413
		$index = $this->getIndex();
414
415
		# Recreate the index
416
		if($index->exists()) {
417
			$index->delete();
418
		}
419
		$this->createIndex();
420
421
		foreach($this->getIndexedClasses() as $class) {
422
			$sng = singleton($class);
423
			$mapping = $sng->getElasticaMapping();
424
			$mapping->setType($index->getType($sng->getElasticaType()));
425
			$mapping->send();
426
		}
427
	}
428
429
430
	/**
431
	 * Refresh an array of records in the index
432
	 *
433
	 * @param array $records
434
	 */
435
	protected function refreshRecords($records) {
436
		foreach($records as $record) {
437
			if($record->showRecordInSearch()) {
438
				$this->index($record);
439
			}
440
		}
441
	}
442
443
444
	/**
445
	 * Get a List of all records by class. Get the "Live data" If the class has the "Versioned" extension
446
	 *
447
	 * @param string $class Class Name
448
	 * @param  int $pageSize Optional page size, only a max of this number of records returned
449
	 * @param  int $page Page number to return
450
	 * @return \DataList $records
451
	 */
452
	protected function recordsByClassConsiderVersioned($class, $pageSize = 0, $page = 0) {
453
		$offset = $page * $pageSize;
454
455
		if($class::has_extension("Versioned")) {
456
			if($pageSize > 0) {
457
				$records = \Versioned::get_by_stage($class, 'Live')->limit($pageSize, $offset);
458
			} else {
459
				$records = \Versioned::get_by_stage($class, 'Live');
460
			}
461
		} else {
462
			if($pageSize > 0) {
463
				$records = $class::get()->limit($pageSize, $offset);
464
			} else {
465
				$records = $class::get();
466
			}
467
468
		}
469
		return $records;
470
	}
471
472
473
	/**
474
	 * Refresh the records of a given class within the search index
475
	 *
476
	 * @param string $class Class Name
477
	 */
478
	protected function refreshClass($class) {
479
		$nRecords = $this->recordsByClassConsiderVersioned($class)->count();
480
		$batchSize = 500;
481
		$pages = $nRecords / $batchSize + 1;
482
483
		for($i = 0; $i < $pages; $i++) {
484
			$this->startBulkIndex();
485
			$pagedRecords = $this->recordsByClassConsiderVersioned($class, $batchSize, $i);
486
			$this->nDocumentsIndexed += $pagedRecords->count();
487
			$batch = $pagedRecords->toArray();
488
			$this->refreshRecords($batch);
489
			$this->endBulkIndex();
490
		}
491
	}
492
493
494
	/**
495
	 * Re-indexes each record in the index.
496
	 */
497
	public function refresh() {
498
		$this->StartTime = microtime(true);
499
500
		$classes = $this->getIndexedClasses();
501
502
		//Count the number of documents for this locale
503
		$amount = 0;
504
		echo "CURRENT LOCALE:" . $this->locale;
505
		foreach($classes as $class) {
506
			$amount += $this->recordsByClassConsiderVersioned($class)->count();
507
		}
508
509
		$this->nDocumentsToIndexForLocale = $amount;
510
		$this->nDocumentsIndexed = 0;
511
512
		foreach($this->getIndexedClasses() as $classname) {
513
			ElasticaUtil::message("Indexing class $classname");
514
515
			$inSiteTree = null;
1 ignored issue
show
Unused Code introduced by
$inSiteTree is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
516
			if(isset(self::$site_tree_classes[$classname])) {
517
				$inSiteTree = self::$site_tree_classes[$classname];
518
			} else {
519
				$inSiteTree = SearchableHelper::isInSiteTree($classname);
520
				self::$site_tree_classes[$classname] = $inSiteTree;
521
			}
522
523
			if($inSiteTree) {
524
				// this prevents the same item being indexed twice due to class inheritance
525
				if($classname === 'SiteTree') {
526
					$this->refreshClass($classname);
527
				}
528
			// Data objects
529
			} else {
530
				$this->refreshClass($classname);
531
			}
532
533
		}
534
535
		echo "Completed indexing documents for locale $this->locale\n";
536
537
	}
538
539
540
	/**
541
	 * Reset the current index
542
	 */
543
	public function reset() {
544
		$index = $this->getIndex();
545
		$index->delete();
546
		$this->createIndex();
547
	}
548
549
550
	private function createIndex() {
551
		$index = $this->getIndex();
552
		$settings = $this->getIndexSettingsForCurrentLocale()->generateConfig();
553
		$index->create($settings, true);
554
	}
555
556
557
	/**
558
	 * Get the index settings for the current locale
559
	 * @return IndexSettings index settings for the current locale
560
	 */
561
	public function getIndexSettingsForCurrentLocale() {
562
		$result = null;
563
		$indexSettings = \Config::inst()->get('Elastica', 'indexsettings');
564
		if(isset($indexSettings[$this->locale])) {
565
			$settingsClassName = $indexSettings[$this->locale];
566
			$result = \Injector::inst()->create($settingsClassName);
567
		} else {
568
			throw new \Exception('ERROR: No index settings are provided for locale ' . $this->locale . "\n");
569
570
		}
571
		return $result;
572
	}
573
574
575
	/**
576
	 * Gets the classes which are indexed (i.e. have the extension applied).
577
	 *
578
	 * @return array
579
	 */
580
	public function getIndexedClasses() {
581
		$classes = array();
582
583
		$whitelist = array('SearchableTestPage', 'SearchableTestFatherPage', 'SearchableTestGrandFatherPage',
584
			'FlickrPhotoTO', 'FlickrTagTO', 'FlickrPhotoTO', 'FlickrAuthorTO', 'FlickrSetTO');
585
586
		foreach(\ClassInfo::subclassesFor('DataObject') as $candidate) {
587
			$instance = singleton($candidate);
588
589
			$interfaces = class_implements($candidate);
590
			// Only allow test classes in testing mode
591
			if(isset($interfaces['TestOnly'])) {
592
				if(in_array($candidate, $whitelist)) {
593
					if(!$this->test_mode) {
594
						continue;
595
					}
596
				} else {
597
					// If it's not in the test whitelist we definitely do not want to know
598
					continue;
599
				}
600
			}
601
602
			if($instance->hasExtension('SilverStripe\\Elastica\\Searchable')) {
603
				$classes[] = $candidate;
604
			}
605
		}
606
607
		return $classes;
608
	}
609
610
611
	/**
612
	 * Get the number of indexing requests made.  Used for testing bulk indexing
613
	 * @return integer indexing request counter
614
	 */
615
	public function getIndexingRequestCtr() {
616
		return self::$indexing_request_ctr;
617
	}
618
619
620
	/**
621
	 * Get the term vectors in the index for the provided  Searchable is_object
622
	 * @param  Searchable $searchable An object that implements Searchable
623
	 * @return array             array of field name to terms indexed
624
	 */
625
	public function getTermVectors($searchable) {
626
		$params = array();
627
628
		$fieldMappings = $searchable->getElasticaMapping()->getProperties();
629
		$fields = array_keys($fieldMappings);
630
		$allFields = array();
631
		foreach($fields as $field) {
632
			array_push($allFields, $field);
633
634
			$mapping = $fieldMappings[$field];
635
636
637
			if(isset($mapping['fields'])) {
638
				$subFields = array_keys($mapping['fields']);
639
				foreach($subFields as $subField) {
640
					$name = $field . '.' . $subField;
641
					array_push($allFields, $name);
642
				}
643
			}
644
		}
645
		sort($allFields);
646
		$data = array(
647
			'fields' => $allFields,
648
			'offsets' => true,
649
			'payloads' => true,
650
			'positions' => true,
651
			'term_statistics' => true,
652
			'field_statistics' => true
653
		);
654
655
		$path = $this->getIndex()->getName() . '/' . $searchable->ClassName . '/' . $searchable->ID . '/_termvector';
656
		$response = $this->getClient()->request(
657
				$path,
658
				\Elastica\Request::GET,
659
				$data,
660
				$params
661
		);
662
663
		$data = $response->getData();
664
		return $data['term_vectors'];
665
	}
666
}
667