ElasticaService::getHighlightingConfig()   B
last analyzed

Complexity

Conditions 4
Paths 6

Size

Total Lines 39
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 4.3731
Metric Value
dl 0
loc 39
ccs 20
cts 28
cp 0.7143
rs 8.5806
cc 4
eloc 26
nc 6
nop 0
crap 4.3731
1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Client;
6
use Elastica\Query;
7
use Elastica\Search;
8
use SilverStripe\Elastica\ElasticaUtil;
9
10
/**
11
 * A service used to interact with elastic search.
12
 */
13
class ElasticaService {
14
15
	/**
16
	 * @var \Elastica\Document[]
17
	 */
18
	protected $buffer = array();
19
20
21
	/**
22
	 * @var bool controls whether indexing operations are buffered or not
23
	 */
24
	protected $buffered = false;
25
26
27
	/**
28
	 * @var \Elastica\Client Elastica Client object
29
	 */
30
	private $client;
31
32
33
	/**
34
	 * @var string index name
35
	 */
36
	private $indexName;
37
38
39
	/**
40
	 * The code of the locale being indexed or searched
41
	 * @var string e.g. th_TH, en_US
42
	 */
43
	private $locale;
44
45
46
	/**
47
	 * Mapping of DataObject ClassName and whether it is in the SiteTree or not
48
	 * @var array $site_tree_classes;
49
	 */
50
	private static $site_tree_classes = array();
51
52
53
	/**
54
	 * Counter used to for testing, records indexing requests
55
	 * @var integer
56
	 */
57
	public static $indexing_request_ctr = 0;
58
59
60
	/**
61
	 * Array of highlighted fields, e.g. Title, Title.standard.  If this is empty then the
62
	 * ShowHighlight field of SearchableField is used to determine which fields to highlight
63
	 * @var array
64
	 */
65
	private $highlightedFields = array();
66
67
68
	/**
69
	 * The number of documents to index currently for this locale
70
	 * @var integer The number of documents left to index
71
	 */
72
	private $nDocumentsToIndexForLocale = 0;
73
74
75
	/*
76
	Set the highlight fields for subsequent searches
77
	 */
78 9
	public function setHighlightedFields($newHighlightedFields) {
79 9
		$this->highlightedFields = $newHighlightedFields;
80 9
	}
81
82
83
	/*
84
	Enable this to allow test classes not to be ignored when indexing
85
	 */
86
	public $test_mode = false;
87
88
89
	/**
90
	 * @param \Elastica\Client $client
91
	 * @param string $newIndexName Name of the new index
92
	 */
93 10
	public function __construct(Client $client, $newIndexName) {
94 10
		$this->client = $client;
95 10
		$this->indexName = $newIndexName;
96 10
		$this->locale = \i18n::default_locale();
97 10
	}
98
99
100 10
	public function setTestMode($newTestMode) {
101 10
		$this->test_mode = $newTestMode;
102 10
	}
103
104
105
	/**
106
	 * @return \Elastica\Client
107
	 */
108 10
	public function getClient() {
109 10
		return $this->client;
110
	}
111
112
113
	/**
114
	 * @return \Elastica\Index
115
	 */
116 10
	public function getIndex() {
117 10
		$index = $this->getClient()->getIndex($this->getLocaleIndexName());
118 10
		return $index;
119
	}
120
121
122 10
	public function setLocale($newLocale) {
123 10
		$this->locale = $newLocale;
124 10
	}
125
126
	public function getIndexName() {
127
		return $this->indexName;
128
	}
129
130 10
	private function getLocaleIndexName() {
131 10
		$name = $this->indexName . '-' . $this->locale;
132 10
		$name = strtolower($name);
133 10
		$name = str_replace('-', '_', $name);
134 10
		return $name;
135
	}
136
137
138
	/**
139
	 * Performs a search query and returns a result list.
140
	 *
141
	 * @param \Elastica\Query|string|array $query
142
	 * @param string|array $types List of comma separated SilverStripe classes to search, or blank for all
143
	 * @return \Elastica\ResultList
144
	 */
145 9
	public function search($query, $types = '') {
146 9
		$query = Query::create($query); // may be a string
147 9
		if(is_string($types)) {
148 9
			$types = explode(',', $types);
149 9
		}
150
151 9
		$data = $query->toArray();
152 9
		$query->MoreLikeThis = isset($data['query']['more_like_this']);
153
154 9
		$search = new Search(new Client());
155
156
		// get results from all shards, this makes test repeatable
157 9
		if($this->test_mode) {
158
			$search->setOption('search_type', Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH);
159
		}
160
161 9
		$search->addIndex($this->getLocaleIndexName());
162 9
		$this->addTypesToSearch($search, $types, $query);
163
164 9
		$highlights = $this->getHighlightingConfig();
165 9
		$this->addExtractedQueryTermsForMoreLikeThis($query, $highlights);
166 9
		$query->setHighlight($highlights);
167
168 9
		$search->addIndex($this->getLocaleIndexName());
169 9
		if(!empty($types)) {
170 9
			foreach($types as $type) {
171 9
				$search->addType($type);
172 9
			}
173 9
		}
174
175 9
		$params = $search->getOptions();
176 9
		$searchResults = $search->search($query, $params);
177 9
		if(isset($this->MoreLikeThisTerms)) {
178
			$searchResults->MoreLikeThisTerms = $this->MoreLikeThisTerms;
179
		}
180
181 9
		return $searchResults;
182
	}
183
184
185
	/**
186
	 * @param Query $query
187
	 */
188 9
	private function addExtractedQueryTermsForMoreLikeThis($query, &$highlights) {
189 9
		if($query->MoreLikeThis) {
190
			$termsMatchingQuery = array();
191
			foreach($this->MoreLikeThisTerms as $field => $terms) {
192
				$termQuery = array('multi_match' => array(
193
					'query' => implode(' ', $terms),
194
					'type' => 'most_fields',
195
					'fields' => array($field)
196
				));
197
				$termsMatchingQuery[$field] = array('highlight_query' => $termQuery);
198
			}
199
			$highlights['fields'] = $termsMatchingQuery;
200
		}
201 9
	}
202
203
204
	/**
205
	 * @param Search $search
206
	 * @param Query $query
207
	 */
208 9
	private function addTypesToSearch(&$search, $types, $query) {
209
		// If the query is a 'more like this' we can get the terms used for searching by performing
210
		// an extra query, in this case a query validation with explain and rewrite turned on
211 9
		$this->checkForTermsMoreLikeThis($query, $search);
212
213 9
		if(!empty($types)) {
214 9
			foreach($types as $type) {
215 9
				$search->addType($type);
216 9
			}
217 9
		}
218 9
	}
219
220
221 9
	private function getHighlightingConfig() {
222 9
		$highlightsCfg = \Config::inst()->get('Elastica', 'Highlights');
223 9
		$preTags = $highlightsCfg['PreTags'];
224 9
		$postTags = $highlightsCfg['PostTags'];
225 9
		$fragmentSize = $highlightsCfg['Phrase']['FragmentSize'];
226 9
		$nFragments = $highlightsCfg['Phrase']['NumberOfFragments'];
227
228 9
		$stringFields = $this->highlightedFields;
229 9
		$usingProvidedHighlightFields = true;
230
231 9
		if(sizeof($stringFields) == 0) {
232 9
			$filter = array('Type' => 'string', 'ShowHighlights' => true);
233 9
			$stringFields = \SearchableField::get()->filter($filter)->map('Name')->toArray();
234 9
			$usingProvidedHighlightFields = false;
235 9
		}
236
237
238 9
		$highlightFields = array();
239 9
		foreach($stringFields as $name) {
240
			// Add the stemmed and the unstemmed for now
241
			$fieldName = $name;
242
			if(!$usingProvidedHighlightFields) {
243
				$fieldName .= '.standard';
244
			}
245
			$highlightFields[$fieldName] = array(
246
				'fragment_size' => $fragmentSize,
247
				'number_of_fragments' => $nFragments,
248
				'no_match_size'=> 200
249
			);
250 9
		}
251
252
		$highlights = array(
253 9
			'pre_tags' => array($preTags),
254 9
			'post_tags' => array($postTags),
255
			'fields' => $highlightFields
256 9
		);
257
258 9
		return $highlights;
259
	}
260
261
262 9
	private function checkForTermsMoreLikeThis($elasticaQuery, $search) {
263 9
		if($elasticaQuery->MoreLikeThis) {
264
265
266
			$path = $search->getPath();
267
268
			$termData = array();
269
			$data = $elasticaQuery->toArray();
270
			$termData['query'] = $data['query'];
271
272
			$path = str_replace('_search', '_validate/query', $path);
273
			$params = array('explain' => true, 'rewrite' => true);
274
275
			if($this->test_mode) {
276
				$params['search_type'] = Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
277
			}
278
279
			$response = $this->getClient()->request(
280
				$path,
281
				\Elastica\Request::GET,
282
				$termData,
283
				$params
284
			);
285
286
			$rData = $response->getData();
287
			$terms = null; // keep in scope
288
289
			if(isset($rData['explanations'])) {
290
				$explanation = $rData['explanations'][0]['explanation'];
291
				$terms = ElasticaUtil::parseSuggestionExplanation($explanation);
292
			}
293
294
			if(isset($terms)) {
295
				$this->MoreLikeThisTerms = $terms;
296
			}
297
		}
298 9
	}
299
300
301
	/**
302
	 * Ensure that the index is present
303
	 */
304 10
	protected function ensureIndex() {
305 10
		$index = $this->getIndex();
306 10
		if(!$index->exists()) {
307
			$this->createIndex();
308
		}
309 10
	}
310
311
312
	/**
313
	 * Ensure that there is a mapping present
314
	 *
315
	 * @param \Elastica\Type Type object
316
	 * @param SilverStripe\Elastica\Searchable DataObject that implements Searchable
317
	 * @return \Elastica\Mapping Mapping object
318
	 */
319 10
	protected function ensureMapping(\Elastica\Type $type, \DataObject $record) {
320 10
		$mapping = $type->getMapping();
321 10
		if($mapping == array()) {
322 10
			$this->ensureIndex();
323 10
			$mapping = $record->getElasticaMapping();
324 10
			$type->setMapping($mapping);
325 10
			$mapping = $mapping->toArray();
326 10
		}
327 10
		return $mapping;
328
	}
329
330
331
	/**
332
	 * Either creates or updates a record in the index.
333
	 *
334
	 * @param Searchable $record
335
	 */
336 10
	public function index($record) {
337 10
		$document = $record->getElasticaDocument();
338 10
		$typeName = $record->getElasticaType();
339
340 10
		if($this->buffered) {
341 10
			if(array_key_exists($typeName, $this->buffer)) {
342 10
				$this->buffer[$typeName][] = $document;
343 10
			} else {
344 10
				$this->buffer[$typeName] = array($document);
345
			}
346 10
		} else {
347 10
			$index = $this->getIndex();
348 10
			$type = $index->getType($typeName);
349
350 10
			$this->ensureMapping($type, $record);
351
352 10
			$type->addDocument($document);
353 10
			$index->refresh();
354 10
			self::$indexing_request_ctr++;
355
		}
356 10
	}
357
358
359
	/**
360
	 * Begins a bulk indexing operation where documents are buffered rather than
361
	 * indexed immediately.
362
	 */
363 10
	public function startBulkIndex() {
364 10
		$this->buffered = true;
365 10
	}
366
367
368
	public function listIndexes($trace) {
369
		$command = "curl 'localhost:9200/_cat/indices?v'";
370
		exec($command, $op);
371
		ElasticaUtil::message("\n++++ $trace ++++\n");
372
		ElasticaUtil::message(print_r($op, 1));
373
		ElasticaUtil::message("++++ /{$trace} ++++\n\n");
374
		return $op;
375
	}
376
377
378
	/**
379
	 * Ends the current bulk index operation and indexes the buffered documents.
380
	 */
381 10
	public function endBulkIndex() {
382 10
		$index = $this->getIndex();
383 10
		foreach($this->buffer as $type => $documents) {
384 10
			$amount = 0;
385
386 10
			foreach(array_keys($this->buffer) as $key) {
387 10
				$amount += sizeof($this->buffer[$key]);
388 10
			}
389 10
			$index->getType($type)->addDocuments($documents);
390 10
			$index->refresh();
391
392 10
			ElasticaUtil::message("\tAdding $amount documents to the index\n");
393 10
			if(isset($this->StartTime)) {
394 10
				$elapsed = microtime(true) - $this->StartTime;
395 10
				$timePerDoc = ($elapsed) / ($this->nDocumentsIndexed);
396 10
				$documentsRemaining = $this->nDocumentsToIndexForLocale - $this->nDocumentsIndexed;
397 10
				$eta = ($documentsRemaining) * $timePerDoc;
398 10
				$hours = (int)($eta / 3600);
399 10
				$minutes = (int)(($eta - $hours * 3600) / 60);
400 10
				$seconds = (int)(0.5 + $eta - $minutes * 60 - $hours * 3600);
401 10
				$etaHR = "{$hours}h {$minutes}m {$seconds}s";
402 10
				ElasticaUtil::message("ETA to completion of indexing $this->locale ($documentsRemaining documents): $etaHR");
403 10
			}
404 10
			self::$indexing_request_ctr++;
405 10
		}
406
407 10
		$this->buffered = false;
408 10
		$this->buffer = array();
409 10
	}
410
411
412
	/**
413
	 * Deletes a record from the index.
414
	 *
415
	 * @param Searchable $record
416
	 */
417
	public function remove($record) {
418
		$index = $this->getIndex();
419
		$type = $index->getType($record->getElasticaType());
420
		$type->deleteDocument($record->getElasticaDocument());
421
		$index->refresh();
422
	}
423
424
425
	/**
426
	 * Creates the index and the type mappings.
427
	 */
428 10
	public function define() {
429 10
		$index = $this->getIndex();
430
431
		# Recreate the index
432 10
		if($index->exists()) {
433 10
			$index->delete();
434 10
		}
435 10
		$this->createIndex();
436
437 10
		foreach($this->getIndexedClasses() as $class) {
438 10
			$sng = singleton($class);
439 10
			$mapping = $sng->getElasticaMapping();
440 10
			$mapping->setType($index->getType($sng->getElasticaType()));
441 10
			$mapping->send();
442 10
		}
443 10
	}
444
445
446
	/**
447
	 * Refresh an array of records in the index
448
	 *
449
	 * @param array $records
450
	 */
451 10
	protected function refreshRecords($records) {
452 10
		foreach($records as $record) {
453 10
			if($record->showRecordInSearch()) {
454 10
				$this->index($record);
455 10
			}
456 10
		}
457 10
	}
458
459
460
	/**
461
	 * Get a List of all records by class. Get the "Live data" If the class has the "Versioned" extension
462
	 *
463
	 * @param string $class Class Name
464
	 * @param  int $pageSize Optional page size, only a max of this number of records returned
465
	 * @param  int $page Page number to return
466
	 * @return \DataList $records
467
	 */
468 10
	protected function recordsByClassConsiderVersioned($class, $pageSize = 0, $page = 0) {
469 10
		$offset = $page * $pageSize;
470
471 10
		if($class::has_extension("Versioned")) {
472 10
			if($pageSize > 0) {
473 10
				$records = \Versioned::get_by_stage($class, 'Live')->limit($pageSize, $offset);
474 10
			} else {
475 10
				$records = \Versioned::get_by_stage($class, 'Live');
476
			}
477 10
		} else {
478 10
			if($pageSize > 0) {
479 10
				$records = $class::get()->limit($pageSize, $offset);
480 10
			} else {
481 10
				$records = $class::get();
482
			}
483
484
		}
485 10
		return $records;
486
	}
487
488
489
	/**
490
	 * Refresh the records of a given class within the search index
491
	 *
492
	 * @param string $class Class Name
493
	 */
494 10
	protected function refreshClass($class) {
495 10
		$nRecords = $this->recordsByClassConsiderVersioned($class)->count();
496 10
		$batchSize = 500;
497 10
		$pages = $nRecords / $batchSize + 1;
498
499 10
		for($i = 0; $i < $pages; $i++) {
500 10
			$this->startBulkIndex();
501 10
			$pagedRecords = $this->recordsByClassConsiderVersioned($class, $batchSize, $i);
502 10
			$this->nDocumentsIndexed += $pagedRecords->count();
503 10
			$batch = $pagedRecords->toArray();
504 10
			$this->refreshRecords($batch);
505 10
			$this->endBulkIndex();
506 10
		}
507 10
	}
508
509
510
	/**
511
	 * Re-indexes each record in the index.
512
	 */
513 10
	public function refresh() {
514 10
		$this->StartTime = microtime(true);
515
516 10
		$classes = $this->getIndexedClasses();
517
518
		//Count the number of documents for this locale
519 10
		$amount = 0;
520 10
		foreach($classes as $class) {
521 10
			$amount += $this->recordsByClassConsiderVersioned($class)->count();
522 10
		}
523
524 10
		$this->nDocumentsToIndexForLocale = $amount;
525 10
		$this->nDocumentsIndexed = 0;
526
527 10
		foreach($this->getIndexedClasses() as $classname) {
528 10
			ElasticaUtil::message("Indexing class $classname");
529
530 10
			$inSiteTree = null;
531 10
			if(isset(self::$site_tree_classes[$classname])) {
532 9
				$inSiteTree = self::$site_tree_classes[$classname];
533 9
			} else {
534 1
				$inSiteTree = SearchableHelper::isInSiteTree($classname);
535 1
				self::$site_tree_classes[$classname] = $inSiteTree;
536
			}
537
538 10
			if($inSiteTree) {
539
				// this prevents the same item being indexed twice due to class inheritance
540 10
				if($classname === 'SiteTree') {
541 10
					$this->refreshClass($classname);
542 10
				}
543
			// Data objects
544 10
			} else {
545 10
				$this->refreshClass($classname);
546
			}
547
548 10
		}
549
550 10
		ElasticaUtil::message("Completed indexing documents for locale $this->locale\n");
551
552 10
	}
553
554
555
	/**
556
	 * Reset the current index
557
	 */
558 10
	public function reset() {
559 10
		$index = $this->getIndex();
560 10
		$index->delete();
561 10
		$this->createIndex();
562 10
	}
563
564
565 10
	private function createIndex() {
566 10
		$index = $this->getIndex();
567 10
		$settings = $this->getIndexSettingsForCurrentLocale()->generateConfig();
568 10
		$index->create($settings, true);
569 10
	}
570
571
572
	/**
573
	 * Get the index settings for the current locale
574
	 * @return IndexSettings index settings for the current locale
575
	 */
576 10
	public function getIndexSettingsForCurrentLocale() {
577 10
		$result = null;
578 10
		$indexSettings = \Config::inst()->get('Elastica', 'indexsettings');
579 10
		if(isset($indexSettings[$this->locale])) {
580 10
			$settingsClassName = $indexSettings[$this->locale];
581 10
			$result = \Injector::inst()->create($settingsClassName);
582 10
		} else {
583
			throw new \Exception('ERROR: No index settings are provided for locale ' . $this->locale . "\n");
584
585
		}
586 10
		return $result;
587
	}
588
589
590
	/**
591
	 * Gets the classes which are indexed (i.e. have the extension applied).
592
	 *
593
	 * @return array
594
	 */
595 10
	public function getIndexedClasses() {
596 10
		$classes = array();
597
598 10
		$whitelist = array('SearchableTestPage', 'SearchableTestFatherPage', 'SearchableTestGrandFatherPage',
599 10
			'FlickrPhotoTO', 'FlickrTagTO', 'FlickrPhotoTO', 'FlickrAuthorTO', 'FlickrSetTO');
600
601 10
		foreach(\ClassInfo::subclassesFor('DataObject') as $candidate) {
602 10
			$instance = singleton($candidate);
603
604 10
			$interfaces = class_implements($candidate);
605
			// Only allow test classes in testing mode
606 10
			if(isset($interfaces['TestOnly'])) {
607 10
				if(in_array($candidate, $whitelist)) {
608 10
					if(!$this->test_mode) {
609
						continue;
610
					}
611 10
				} else {
612
					// If it's not in the test whitelist we definitely do not want to know
613 10
					continue;
614
				}
615 10
			}
616
617 10
			if($instance->hasExtension('SilverStripe\\Elastica\\Searchable')) {
618 10
				$classes[] = $candidate;
619 10
			}
620 10
		}
621
622 10
		return $classes;
623
	}
624
625
626
	/**
627
	 * Get the number of indexing requests made.  Used for testing bulk indexing
628
	 * @return integer indexing request counter
629
	 */
630
	public function getIndexingRequestCtr() {
631
		return self::$indexing_request_ctr;
632
	}
633
634
635
	/**
636
	 * Get the term vectors in the index for the provided  Searchable is_object
637
	 * @param  Searchable $searchable An object that implements Searchable
638
	 * @return array             array of field name to terms indexed
639
	 */
640 1
	public function getTermVectors($searchable) {
641
		$params = array();
642
643
		$fieldMappings = $searchable->getElasticaMapping()->getProperties();
644
		$fields = array_keys($fieldMappings);
645
		$allFields = array();
646
		foreach($fields as $field) {
647
			array_push($allFields, $field);
648
649
			$mapping = $fieldMappings[$field];
650
651
652
			if(isset($mapping['fields'])) {
653
				$subFields = array_keys($mapping['fields']);
654
				foreach($subFields as $subField) {
655
					$name = $field . '.' . $subField;
656
					array_push($allFields, $name);
657
				}
658
			}
659
		}
660
		sort($allFields);
661
		$data = array(
662
			'fields' => $allFields,
663
			'offsets' => true,
664
			'payloads' => true,
665
			'positions' => true,
666 1
			'term_statistics' => true,
667
			'field_statistics' => true
668
		);
669
670
		$path = $this->getIndex()->getName() . '/' . $searchable->ClassName . '/' . $searchable->ID . '/_termvector';
671
		$response = $this->getClient()->request(
672
				$path,
673
				\Elastica\Request::GET,
674
				$data,
675
				$params
676
		);
677
678
		$data = $response->getData();
679
		return isset($data['term_vectors']) ? $data['term_vectors'] : array();
680
	}
681
}
682