Searchable::getElasticaFields()   B
last analyzed

Complexity

Conditions 6
Paths 10

Size

Total Lines 42
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 30
CRAP Score 6
Metric Value
dl 0
loc 42
ccs 30
cts 30
cp 1
rs 8.439
cc 6
eloc 26
nc 10
nop 2
crap 6
1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Document;
6
use Elastica\Type\Mapping;
7
use ShortcodeParser;
8
9
/**
10
 * Adds elastic search integration to a data object.
11
 */
12
class Searchable extends \DataExtension {
13
14
	/**
15
	 * Counter used to display progress of indexing
16
	 * @var integer
17
	 */
18
	public static $index_ctr = 0;
19
20
	/**
21
	 * Everytime progressInterval divides $index_ctr exactly display progress
22
	 * @var integer
23
	 */
24
	private static $progressInterval = 0;
25
26
	public static $mappings = array(
27
		'Boolean'     => 'boolean',
28
		'Decimal'     => 'double',
29
		'Currency'    => 'double',
30
		'Double'      => 'double',
31
		'Enum'        => 'string',
32
		'Float'       => 'float',
33
		'HTMLText'    => 'string',
34
		'HTMLVarchar' => 'string',
35
		'Int'         => 'integer',
36
		'Text'        => 'string',
37
		'VarChar'     => 'string',
38
		'Varchar'     => 'string',
39
		'Year'        => 'integer',
40
		'Percentage'  => 'double',
41
		'Time'  => 'date',
42
		// The 2 different date types will be stored with different formats
43
		'Date'        => 'date',
44
		'SS_Datetime' => 'date',
45
		'Datetime' => 'date',
46
		'DBLocale'    => 'string'
47
	);
48
49
50
	/**
51
	 * @var ElasticaService associated elastica search service
52
	 */
53
	protected $service;
54
55
56
	/**
57
	 * Array of fields that need HTML parsed
58
	 * @var array
59
	 */
60
	protected $html_fields = array();
61
62
	/**
63
	 * Store a mapping of relationship name to result type
64
	 */
65
	protected $relationship_methods = array();
66
67
68
	/**
69
	 * If importing a large number of items from a fixtures file, or indeed some other source, then
70
	 * it is quicker to set a flag of value IndexingOff => false.  This has the effect of ensuring
71
	 * no indexing happens, a request is normally made per fixture when loading.  One can then run
72
	 * the reindexing teask to bulk index in one HTTP POST request to Elasticsearch
73
	 *
74
	 * @var boolean
75
	 */
76
	private static $IndexingOff = false;
77
78
79
	/**
80
	 * @see getElasticaResult
81
	 * @var \Elastica\Result
82
	 */
83
	protected $elastica_result;
84
85
	public function __construct(ElasticaService $service) {
86
		$this->service = $service;
87
		parent::__construct();
88
	}
89
90
91
	/**
92
	 * Get the elasticsearch type name
93
	 *
94
	 * @return string
95
	 */
96 10
	public function getElasticaType() {
97 10
		return get_class($this->owner);
98
	}
99
100
101
	/**
102
	 * If the owner is part of a search result
103
	 * the raw Elastica search result is returned
104
	 * if set via setElasticaResult
105
	 *
106
	 * @return \Elastica\Result
107
	 */
108
	public function getElasticaResult() {
109
		return $this->elastica_result;
110
	}
111
112
113
	/**
114
	 * Set the raw Elastica search result
115
	 *
116
	 * @param \Elastica\Result
117
	 */
118 2
	public function setElasticaResult(\Elastica\Result $result) {
119 2
		$this->elastica_result = $result;
120 2
	}
121
122
123
	/**
124
	 * Gets an array of elastic field definitions.
125
	 *
126
	 * @return array
127
	 */
128 10
	public function getElasticaFields($storeMethodName = false, $recurse = true) {
129 10
		$db = $this->owner->db();
130 10
		$fields = $this->getAllSearchableFields();
131 10
		$result = array();
132
133 10
		foreach($fields as $name => $params) {
134 10
			$spec = array();
135 10
			$name = str_replace('()', '', $name);
136
137 10
			if(array_key_exists($name, $db)) {
138 10
				$class = $db[$name];
139 10
				SearchableHelper::assignSpecForStandardFieldType($name, $class, $spec, $this->html_fields, self::$mappings);
140 10
			} else {
141
				// field name is not in the db, it could be a method
142 10
				$has_lists = SearchableHelper::getListRelationshipMethods($this->owner);
143 10
				$has_ones = $this->owner->has_one();
144
145
				// check has_many and many_many relations
146 10
				if(isset($has_lists[$name])) {
147
					// the classes returned by the list method
148 10
					$resultType = $has_lists[$name];
149 10
					SearchableHelper::assignSpecForRelationship($name, $resultType, $spec, $storeMethodName, $recurse);
150 10
				} else if(isset($has_ones[$name])) {
151 10
					$resultType = $has_ones[$name];
152 10
					SearchableHelper::assignSpecForRelationship($name, $resultType, $spec, $storeMethodName, $recurse);
153 10
				}
154
				// otherwise fall back to string - Enum is one such category
155 10
				else {
156 10
					$spec["type"] = "string";
157 10
				}
158
			}
159
160 10
			SearchableHelper::addIndexedFields($name, $spec, $this->owner->ClassName);
161 10
			$result[$name] = $spec;
162 10
		}
163
164 10
		if($this->owner->hasMethod('updateElasticHTMLFields')) {
165 10
			$this->html_fields = $this->owner->updateElasticHTMLFields($this->html_fields);
166 10
		}
167
168 10
		return $result;
169
	}
170
171
172
173
	/**
174
	 * Get the elasticsearch mapping for the current document/type
175
	 *
176
	 * @return \Elastica\Type\Mapping
177
	 */
178 10
	public function getElasticaMapping() {
179 10
		$mapping = new Mapping();
180
181 10
		$fields = $this->getElasticaFields(false);
182
183 10
		$localeMapping = array();
184
185 10
		if($this->owner->hasField('Locale')) {
186
			$localeMapping['type'] = 'string';
187
			// we wish the locale to be stored as is
188
			$localeMapping['index'] = 'not_analyzed';
189
			$fields['Locale'] = $localeMapping;
190
		}
191
192
		// ADD CUSTOM FIELDS HERE THAT ARE INDEXED BY DEFAULT
193
		// add a mapping to flag whether or not class is in SiteTree
194 10
		$fields['IsInSiteTree'] = array('type'=>'boolean');
195 10
		$fields['Link'] = array('type' => 'string', 'index' => 'not_analyzed');
196
197 10
		$mapping->setProperties($fields);
198
199
		//This concatenates all the fields together into a single field.
200
		//Initially added for suggestions compatibility, in that searching
201
		//_all field picks up all possible suggestions
202 10
		$mapping->enableAllField();
203
204 10
		if($this->owner->hasMethod('updateElasticsearchMapping')) {
205 10
			$mapping = $this->owner->updateElasticsearchMapping($mapping);
206 10
		}
207 10
		return $mapping;
208
	}
209
210
211
	/**
212
	 * Get an elasticsearch document
213
	 *
214
	 * @return \Elastica\Document
215
	 */
216 10
	public function getElasticaDocument() {
217 10
		self::$index_ctr++;
218 10
		$fields = $this->getFieldValuesAsArray();
219 10
		$progress = \Controller::curr()->request->getVar('progress');
220 10
		if(!empty($progress)) {
221
			self::$progressInterval = (int)$progress;
222 10
		}
223
224 10
		if(self::$progressInterval > 0) {
225 10
			if(self::$index_ctr % self::$progressInterval === 0) {
226
				ElasticaUtil::message("\t" . $this->owner->ClassName . " - Prepared " . self::$index_ctr . " for indexing...");
227
			}
228 10
		}
229
230
		// Optionally update the document
231 10
		$document = new Document($this->owner->ID, $fields);
232 10
		if($this->owner->hasMethod('updateElasticsearchDocument')) {
233 10
			$document = $this->owner->updateElasticsearchDocument($document);
234 10
		}
235
236
		// Check if the current classname is part of the site tree or not
237
		// Results are cached to save reprocessing the same
238 10
		$classname = $this->owner->ClassName;
239 10
		$inSiteTree = SearchableHelper::isInSiteTree($classname);
240
241 10
		$document->set('IsInSiteTree', $inSiteTree);
242 10
		if($inSiteTree) {
243 10
			$document->set('Link', $this->owner->AbsoluteLink());
244 10
		}
245
246 10
		if(isset($this->owner->Locale)) {
247
			$document->set('Locale', $this->owner->Locale);
248 1
		}
249
250 10
		return $document;
251
	}
252
253
254 10
	public function getFieldValuesAsArray($recurse = true) {
255 10
		$fields = array();
256 10
		foreach($this->getElasticaFields($recurse) as $field => $config) {
257
			//This is the case of calling a method to get a value, the field does not exist in the DB
258 10
			if(null === $this->owner->$field && is_callable(get_class($this->owner) . "::" . $field)) {
259
				// call a method to get a field value
260 10
				SearchableHelper::storeMethodTextValue($this->owner, $field, $fields, $this->html_fields);
261 10
			} else {
262 10
				if(in_array($field, $this->html_fields)) {
263 10
					SearchableHelper::storeFieldHTMLValue($this->owner, $field, $fields);
264 10
				} else {
265 10
					SearchableHelper::storeRelationshipValue($this->owner, $field, $fields, $config, $recurse);
266
				}
267
			}
268 10
		}
269 10
		return $fields;
270
	}
271
272
273
	/**
274
	 * Returns whether to include the document into the search index.
275
	 * All documents are added unless they have a field "ShowInSearch" which is set to false
276
	 *
277
	 * @return boolean
278
	 */
279 10
	public function showRecordInSearch() {
280 10
		return !($this->owner->hasField('ShowInSearch') && false == $this->owner->ShowInSearch);
281
	}
282
283
284
	/**
285
	 * Delete the record from the search index if ShowInSearch is deactivated (non-SiteTree).
286
	 */
287 10
	public function onBeforeWrite() {
288
		if(
289 10
			$this->owner instanceof \SiteTree &&
290 10
			$this->owner->hasField('ShowInSearch') &&
291 10
			$this->owner->isChanged('ShowInSearch', 2) &&
292
			false == $this->owner->ShowInSearch
293 10
		) {
294
			$this->doDeleteDocument();
295
		}
296 10
	}
297
298
299
	/**
300
	 * Delete the record from the search index if ShowInSearch is deactivated (SiteTree).
301
	 */
302
	public function onBeforePublish() {
303
		if(false == $this->owner->ShowInSearch && $this->owner->isPublished()) {
304
			$liveRecord = \Versioned::get_by_stage(get_class($this->owner), 'Live')->
305
				byID($this->owner->ID);
306
			if($liveRecord->ShowInSearch != $this->owner->ShowInSearch) {
307
				$this->doDeleteDocument();
308
			}
309
		}
310
	}
311
312
313
	/**
314
	 * Updates the record in the search index (non-SiteTree).
315
	 */
316 10
	public function onAfterWrite() {
317 10
		$this->doIndexDocument();
318 10
	}
319
320
321
	/**
322
	 * Updates the record in the search index (SiteTree).
323
	 */
324
	public function onAfterPublish() {
325
		$this->doIndexDocument();
326
	}
327
328
329
	/**
330
	 * Updates the record in the search index.
331
	 */
332 10
	protected function doIndexDocument() {
333 10
		if($this->showRecordInSearch() && !$this->owner->IndexingOff) {
334 10
			$this->service->index($this->owner);
335 10
		}
336 10
	}
337
338
339
	/**
340
	 * Removes the record from the search index (non-SiteTree).
341
	 */
342
	public function onAfterDelete() {
343
		$this->doDeleteDocumentIfInSearch();
344
	}
345
346
347
	/**
348
	 * Removes the record from the search index (non-SiteTree).
349
	 */
350
	public function onAfterUnpublish() {
351
		$this->doDeleteDocumentIfInSearch();
352
	}
353
354
355
	/**
356
	 * Removes the record from the search index if the "ShowInSearch" attribute is set to true.
357
	 */
358
	protected function doDeleteDocumentIfInSearch() {
359
		if($this->showRecordInSearch()) {
360
			$this->doDeleteDocument();
361
		}
362
	}
363
364
365
	/**
366
	 * Removes the record from the search index.
367
	 */
368
	protected function doDeleteDocument() {
369
		try {
370
			if(!$this->owner->IndexingOff) {
371
				// this goes to elastica service
372
				$this->service->remove($this->owner);
373
			}
374
		} catch (\Elastica\Exception\NotFoundException $e) {
375
			trigger_error("Deleted document " . $this->owner->ClassName . " (" . $this->owner->ID .
376
				") not found in search index.", E_USER_NOTICE);
377
		}
378
	}
379
380
381
	/**
382
	 * Return all of the searchable fields defined in $this->owner::$searchable_fields and all the parent classes.
383
	 *
384
	 * @param  $recuse Whether or not to traverse relationships. First time round yes, subsequently no
385
	 * @return array searchable fields
386
	 */
387 10
	public function getAllSearchableFields($recurse = true) {
388 10
		$fields = \Config::inst()->get(get_class($this->owner), 'searchable_fields');
389
390
		// fallback to default method
391 10
		if(!$fields) {
392
			user_error('The field $searchable_fields must be set for the class ' . $this->owner->ClassName);
393
		}
394
395
		// get the values of these fields
396 10
		$elasticaMapping = SearchableHelper::fieldsToElasticaConfig($fields);
397
398 10
		if($recurse) {
399
			// now for the associated methods and their results
400 10
			$methodDescs = \Config::inst()->get(get_class($this->owner), 'searchable_relationships');
401 10
			$has_ones = $this->owner->has_one();
402 10
			$has_lists = SearchableHelper::getListRelationshipMethods($this->owner);
403
404 10
			if(isset($methodDescs) && is_array($methodDescs)) {
405 10
				foreach($methodDescs as $methodDesc) {
406
					// split before the brackets which can optionally list which fields to index
407 10
					$splits = explode('(', $methodDesc);
408 10
					$methodName = $splits[0];
409
410 10
					if(isset($has_lists[$methodName])) {
411
412 10
						$relClass = $has_lists[$methodName];
413 10
						$fields = \Config::inst()->get($relClass, 'searchable_fields');
414 10
						if(!$fields) {
415
							user_error('The field $searchable_fields must be set for the class ' . $relClass);
416
						}
417 10
						$rewrite = SearchableHelper::fieldsToElasticaConfig($fields);
418
419
						// mark as a method, the resultant fields are correct
420 10
						$elasticaMapping[$methodName . '()'] = $rewrite;
421 10
					} else if(isset($has_ones[$methodName])) {
422 10
						$relClass = $has_ones[$methodName];
423 10
						$fields = \Config::inst()->get($relClass, 'searchable_fields');
424 10
						if(!$fields) {
425
							user_error('The field $searchable_fields must be set for the class ' . $relClass);
426
						}
427 10
						$rewrite = SearchableHelper::fieldsToElasticaConfig($fields);
428
429
						// mark as a method, the resultant fields are correct
430 10
						$elasticaMapping[$methodName . '()'] = $rewrite;
431 10
					} else {
432
						user_error('The method ' . $methodName . ' not found in class ' . $this->owner->ClassName .
433
								', please check configuration');
434
					}
435 10
				}
436 10
			}
437 10
		}
438
439 10
		return $elasticaMapping;
440
	}
441
442
443
444
445 10
	public function requireDefaultRecords() {
446 10
		parent::requireDefaultRecords();
447 10
		$searchableFields = $this->getElasticaFields(true, true);
448 10
		$doSC = SearchableHelper::findOrCreateSearchableClass($this->owner->ClassName);
449
450 10
		foreach($searchableFields as $name => $searchableField) {
451
			// check for existence of methods and if they exist use that as the name
452 10
			if(!isset($searchableField['type'])) {
453 10
				$name = $searchableField['properties']['__method'];
454 10
			}
455
456 10
			SearchableHelper::findOrCreateSearchableField(
457 10
				$this->owner->ClassName,
458 10
				$name,
459 10
				$searchableField,
460
				$doSC
461 10
			);
462
463
			// FIXME deal with deletions
464 10
		}
465 10
	}
466
467
468
	/*
469
	Allow the option of overriding the default template with one of <ClassName>ElasticSearchResult
470
	 */
471
	public function RenderResult($linkToContainer = '') {
472
		$vars = new \ArrayData(array('SearchResult' => $this->owner, 'ContainerLink' => $linkToContainer));
473
		$possibleTemplates = array($this->owner->ClassName . 'ElasticSearchResult', 'ElasticSearchResult');
474
		return $this->owner->customise($vars)->renderWith($possibleTemplates);
475
	}
476
477
478
	public function getTermVectors() {
479
		return $this->service->getTermVectors($this->owner);
480
	}
481
482
483
	public function updateCMSFields(\FieldList $fields) {
484
		$isIndexed = false;
485
		// SIteTree object must have a live record, ShowInSearch = true
486
		if (\DB::getConn()->hasTable($this->owner->ClassName)) {
0 ignored issues
show
Deprecated Code introduced by
The method DB::getConn() has been deprecated with message: since version 4.0 Use DB::get_conn instead

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
Deprecated Code introduced by
The method SS_Database::hasTable() has been deprecated with message: since version 4.0 Use DB::get_schema()->hasTable() instead

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
487
			if(SearchableHelper::isInSiteTree($this->owner->ClassName)) {
488
				$liveRecord = \Versioned::get_by_stage(get_class($this->owner), 'Live')->
489
					byID($this->owner->ID);
490
				if(!empty($liveRecord) && $liveRecord->ShowInSearch) {
491
					$isIndexed = true;
492
				} else {
493
					$isIndexed = false;
494
				}
495
			} else {
496
				// In the case of a DataObject we use the ShowInSearchFlag
497
				$isIndexed = true;
498
			}
499
		}
500
501
		if($isIndexed) {
502
			$termVectors = $this->getTermVectors();
503
			$termFields = array_keys($termVectors);
504
			sort($termFields);
505
506
			foreach($termFields as $field) {
507
				$terms = new \ArrayList();
508
509
				foreach(array_keys($termVectors[$field]['terms']) as $term) {
510
					$do = new \DataObject();
511
					$do->Term = $term;
512
					$stats = $termVectors[$field]['terms'][$term];
513
					if(isset($stats['ttf'])) {
514
						$do->TTF = $stats['ttf'];
515
					}
516
517
					if(isset($stats['doc_freq'])) {
518
						$do->DocFreq = $stats['doc_freq'];
519
					}
520
521
					if(isset($stats['term_freq'])) {
522
						$do->TermFreq = $stats['term_freq'];
523
					}
524
					$terms->push($do);
525
				}
526
527
				$config = \GridFieldConfig_RecordViewer::create(100);
528
				$viewer = $config->getComponentByType('GridFieldDataColumns');
529
				$viewer->setDisplayFields(array(
530
					'Term' => 'Term',
531
					'TTF' => 'Total term frequency (how often a term occurs in all documents)',
532
					'DocFreq' => 'n documents with this term',
533
					'TermFreq'=> 'n times this term appears in this field'
534
				));
535
536
				$underscored = str_replace('.', '_', $field);
537
538
				$alteredFieldName = str_replace('standard', 'unstemmed', $field);
539
				$splits = explode('_', $underscored);
540
				if (sizeof($splits) == 1) {
541
					$alteredFieldName .= '.stemmed';
542
				}
543
544
				$gridField = new \GridField(
545
					'TermsFor' . $underscored, // Field name
546
					$alteredFieldName , // Field title
547
					$terms,
548
					$config
549
				);
550
551
				$underscored = str_replace('.', '_', $alteredFieldName);
552
				$fields->addFieldToTab('Root.ElasticaTerms.' . $underscored, $gridField);
553
			}
554
555
		}
556
557
		return $fields;
558
	}
559
560
561
}
562