Completed
Push — dev2 ( 24a813...6eabe6 )
by Gordon
04:20
created

Searchable::getTermVectors()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 3
rs 10
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Document;
6
use Elastica\Type\Mapping;
7
use ShortcodeParser;
8
9
/**
10
 * Adds elastic search integration to a data object.
11
 */
12
class Searchable extends \DataExtension {
13
14
	/**
15
	 * Counter used to display progress of indexing
16
	 * @var integer
17
	 */
18
	public static $index_ctr = 0;
19
20
	/**
21
	 * Everytime progressInterval divides $index_ctr exactly display progress
22
	 * @var integer
23
	 */
24
	private static $progressInterval = 0;
25
26
	public static $mappings = array(
27
		'Boolean'     => 'boolean',
28
		'Decimal'     => 'double',
29
		'Currency'    => 'double',
30
		'Double'      => 'double',
31
		'Enum'        => 'string',
32
		'Float'       => 'float',
33
		'HTMLText'    => 'string',
34
		'HTMLVarchar' => 'string',
35
		'Int'         => 'integer',
36
		'Text'        => 'string',
37
		'VarChar'     => 'string',
38
		'Varchar'     => 'string',
39
		'Year'        => 'integer',
40
		'Percentage'  => 'double',
41
		'Time'  => 'date',
42
		// The 2 different date types will be stored with different formats
43
		'Date'        => 'date',
44
		'SS_Datetime' => 'date',
45
		'Datetime' => 'date',
46
		'DBLocale'    => 'string'
47
	);
48
49
50
	/**
51
	 * @var ElasticaService associated elastica search service
52
	 */
53
	protected $service;
54
55
56
	/**
57
	 * Array of fields that need HTML parsed
58
	 * @var array
59
	 */
60
	protected $html_fields = array();
61
62
	/**
63
	 * Store a mapping of relationship name to result type
64
	 */
65
	protected $relationship_methods = array();
66
67
68
	/**
69
	 * If importing a large number of items from a fixtures file, or indeed some other source, then
70
	 * it is quicker to set a flag of value IndexingOff => false.  This has the effect of ensuring
71
	 * no indexing happens, a request is normally made per fixture when loading.  One can then run
72
	 * the reindexing teask to bulk index in one HTTP POST request to Elasticsearch
73
	 *
74
	 * @var boolean
75
	 */
76
	private static $IndexingOff = false;
77
78
79
	/**
80
	 * @see getElasticaResult
81
	 * @var \Elastica\Result
82
	 */
83
	protected $elastica_result;
84
85
	public function __construct(ElasticaService $service) {
86
		$this->service = $service;
87
		parent::__construct();
88
	}
89
90
91
	/**
92
	 * Get the elasticsearch type name
93
	 *
94
	 * @return string
95
	 */
96
	public function getElasticaType() {
97
		return get_class($this->owner);
98
	}
99
100
101
	/**
102
	 * If the owner is part of a search result
103
	 * the raw Elastica search result is returned
104
	 * if set via setElasticaResult
105
	 *
106
	 * @return \Elastica\Result
107
	 */
108
	public function getElasticaResult() {
109
		return $this->elastica_result;
110
	}
111
112
113
	/**
114
	 * Set the raw Elastica search result
115
	 *
116
	 * @param \Elastica\Result
117
	 */
118
	public function setElasticaResult(\Elastica\Result $result) {
119
		$this->elastica_result = $result;
120
	}
121
122
123
	/**
124
	 * Gets an array of elastic field definitions.
125
	 *
126
	 * @return array
127
	 */
128
	public function getElasticaFields($storeMethodName = false, $recurse = true) {
129
		$db = $this->owner->db();
130
		$fields = $this->getAllSearchableFields();
131
		$result = array();
132
133
		foreach($fields as $name => $params) {
134
			$spec = array();
135
			$name = str_replace('()', '', $name);
136
137
			if(array_key_exists($name, $db)) {
138
				$class = $db[$name];
139
				SearchableHelper::assignSpecForStandardFieldType($name, $class, $spec, $this->html_fields, self::$mappings);
140
			} else {
141
				// field name is not in the db, it could be a method
142
				$has_lists = SearchableHelper::getListRelationshipMethods($this->owner);
143
				$has_ones = $this->owner->has_one();
144
145
				// check has_many and many_many relations
146
				if(isset($has_lists[$name])) {
147
					// the classes returned by the list method
148
					$resultType = $has_lists[$name];
149
					SearchableHelper::assignSpecForRelationship($name, $resultType, $spec, $storeMethodName, $recurse);
150
				} else if(isset($has_ones[$name])) {
151
					$resultType = $has_ones[$name];
152
					SearchableHelper::assignSpecForRelationship($name, $resultType, $spec, $storeMethodName, $recurse);
153
				}
154
				// otherwise fall back to string - Enum is one such category
155
				else {
156
					$spec["type"] = "string";
157
				}
158
			}
159
160
			SearchableHelper::addIndexedFields($name, $spec, $this->owner->ClassName);
161
			$result[$name] = $spec;
162
		}
163
164
		if($this->owner->hasMethod('updateElasticHTMLFields')) {
165
			$this->html_fields = $this->owner->updateElasticHTMLFields($this->html_fields);
166
		}
167
168
		return $result;
169
	}
170
171
172
173
	/**
174
	 * Get the elasticsearch mapping for the current document/type
175
	 *
176
	 * @return \Elastica\Type\Mapping
177
	 */
178
	public function getElasticaMapping() {
179
		$mapping = new Mapping();
180
181
		$fields = $this->getElasticaFields(false);
182
183
		$localeMapping = array();
184
185
		if($this->owner->hasField('Locale')) {
186
			$localeMapping['type'] = 'string';
187
			// we wish the locale to be stored as is
188
			$localeMapping['index'] = 'not_analyzed';
189
			$fields['Locale'] = $localeMapping;
190
		}
191
192
		// ADD CUSTOM FIELDS HERE THAT ARE INDEXED BY DEFAULT
193
		// add a mapping to flag whether or not class is in SiteTree
194
		$fields['IsInSiteTree'] = array('type'=>'boolean');
195
		$fields['Link'] = array('type' => 'string', 'index' => 'not_analyzed');
196
197
		$mapping->setProperties($fields);
198
199
		//This concatenates all the fields together into a single field.
200
		//Initially added for suggestions compatibility, in that searching
201
		//_all field picks up all possible suggestions
202
		$mapping->enableAllField();
203
204
		if($this->owner->hasMethod('updateElasticsearchMapping')) {
205
			$mapping = $this->owner->updateElasticsearchMapping($mapping);
206
		}
207
		return $mapping;
208
	}
209
210
211
	/**
212
	 * Get an elasticsearch document
213
	 *
214
	 * @return \Elastica\Document
215
	 */
216
	public function getElasticaDocument() {
217
		self::$index_ctr++;
218
		$fields = $this->getFieldValuesAsArray();
219
		$progress = \Controller::curr()->request->getVar('progress');
220
		if(!empty($progress)) {
221
			self::$progressInterval = (int)$progress;
222
		}
223
224
		if(self::$progressInterval > 0) {
225
			if(self::$index_ctr % self::$progressInterval === 0) {
226
				ElasticaUtil::message("\t" . $this->owner->ClassName . " - Prepared " . self::$index_ctr . " for indexing...");
227
			}
228
		}
229
230
		// Optionally update the document
231
		$document = new Document($this->owner->ID, $fields);
232
		if($this->owner->hasMethod('updateElasticsearchDocument')) {
233
			$document = $this->owner->updateElasticsearchDocument($document);
234
		}
235
236
		// Check if the current classname is part of the site tree or not
237
		// Results are cached to save reprocessing the same
238
		$classname = $this->owner->ClassName;
239
		$inSiteTree = SearchableHelper::isInSiteTree($classname);
240
241
		$document->set('IsInSiteTree', $inSiteTree);
242
		if($inSiteTree) {
243
			$document->set('Link', $this->owner->AbsoluteLink());
244
		}
245
246
		if(isset($this->owner->Locale)) {
247
			$document->set('Locale', $this->owner->Locale);
248
		}
249
250
		return $document;
251
	}
252
253
254
	public function getFieldValuesAsArray($recurse = true) {
255
		$fields = array();
256
		foreach($this->getElasticaFields($recurse) as $field => $config) {
257
			//This is the case of calling a method to get a value, the field does not exist in the DB
258
			if(null === $this->owner->$field && is_callable(get_class($this->owner) . "::" . $field)) {
259
				// call a method to get a field value
260
				SearchableHelper::storeMethodTextValue($this->owner, $field, $fields, $this->html_fields);
261
			} else {
262
				if(in_array($field, $this->html_fields)) {
263
					SearchableHelper::storeFieldHTMLValue($this->owner, $field, $fields);
264
				} else {
265
					SearchableHelper::storeRelationshipValue($this->owner, $field, $fields, $config, $recurse);
266
				}
267
			}
268
		}
269
		return $fields;
270
	}
271
272
273
	/**
274
	 * Returns whether to include the document into the search index.
275
	 * All documents are added unless they have a field "ShowInSearch" which is set to false
276
	 *
277
	 * @return boolean
278
	 */
279
	public function showRecordInSearch() {
280
		return !($this->owner->hasField('ShowInSearch') && false == $this->owner->ShowInSearch);
281
	}
282
283
284
	/**
285
	 * Delete the record from the search index if ShowInSearch is deactivated (non-SiteTree).
286
	 */
287
	public function onBeforeWrite() {
288
		if(
289
			$this->owner instanceof \SiteTree &&
290
			$this->owner->hasField('ShowInSearch') &&
291
			$this->owner->isChanged('ShowInSearch', 2) &&
292
			false == $this->owner->ShowInSearch
293
		) {
294
			$this->doDeleteDocument();
295
		}
296
	}
297
298
299
	/**
300
	 * Delete the record from the search index if ShowInSearch is deactivated (SiteTree).
301
	 */
302
	public function onBeforePublish() {
303
		if(false == $this->owner->ShowInSearch && $this->owner->isPublished()) {
304
			$liveRecord = \Versioned::get_by_stage(get_class($this->owner), 'Live')->
305
				byID($this->owner->ID);
306
			if($liveRecord->ShowInSearch != $this->owner->ShowInSearch) {
307
				$this->doDeleteDocument();
308
			}
309
		}
310
	}
311
312
313
	/**
314
	 * Updates the record in the search index (non-SiteTree).
315
	 */
316
	public function onAfterWrite() {
317
		$this->doIndexDocument();
318
	}
319
320
321
	/**
322
	 * Updates the record in the search index (SiteTree).
323
	 */
324
	public function onAfterPublish() {
325
		$this->doIndexDocument();
326
	}
327
328
329
	/**
330
	 * Updates the record in the search index.
331
	 */
332
	protected function doIndexDocument() {
333
		if($this->showRecordInSearch() && !$this->owner->IndexingOff) {
334
			$this->service->index($this->owner);
335
		}
336
	}
337
338
339
	/**
340
	 * Removes the record from the search index (non-SiteTree).
341
	 */
342
	public function onAfterDelete() {
343
		$this->doDeleteDocumentIfInSearch();
344
	}
345
346
347
	/**
348
	 * Removes the record from the search index (non-SiteTree).
349
	 */
350
	public function onAfterUnpublish() {
351
		$this->doDeleteDocumentIfInSearch();
352
	}
353
354
355
	/**
356
	 * Removes the record from the search index if the "ShowInSearch" attribute is set to true.
357
	 */
358
	protected function doDeleteDocumentIfInSearch() {
359
		if($this->showRecordInSearch()) {
360
			$this->doDeleteDocument();
361
		}
362
	}
363
364
365
	/**
366
	 * Removes the record from the search index.
367
	 */
368
	protected function doDeleteDocument() {
369
		try {
370
			if(!$this->owner->IndexingOff) {
371
				// this goes to elastica service
372
				$this->service->remove($this->owner);
373
			}
374
		} catch (\Elastica\Exception\NotFoundException $e) {
375
			trigger_error("Deleted document " . $this->owner->ClassName . " (" . $this->owner->ID .
376
				") not found in search index.", E_USER_NOTICE);
377
		}
378
	}
379
380
381
	/**
382
	 * Return all of the searchable fields defined in $this->owner::$searchable_fields and all the parent classes.
383
	 *
384
	 * @param  $recuse Whether or not to traverse relationships. First time round yes, subsequently no
385
	 * @return array searchable fields
386
	 */
387
	public function getAllSearchableFields($recurse = true) {
388
		$fields = \Config::inst()->get(get_class($this->owner), 'searchable_fields');
389
390
		// fallback to default method
391
		if(!$fields) {
392
			user_error('The field $searchable_fields must be set for the class ' . $this->owner->ClassName);
393
		}
394
395
		// get the values of these fields
396
		$elasticaMapping = SearchableHelper::fieldsToElasticaConfig($fields);
397
398
		if($recurse) {
399
			// now for the associated methods and their results
400
			$methodDescs = \Config::inst()->get(get_class($this->owner), 'searchable_relationships');
401
			$has_ones = $this->owner->has_one();
402
			$has_lists = SearchableHelper::getListRelationshipMethods($this->owner);
403
404
			if(isset($methodDescs) && is_array($methodDescs)) {
405
				foreach($methodDescs as $methodDesc) {
406
					// split before the brackets which can optionally list which fields to index
407
					$splits = explode('(', $methodDesc);
408
					$methodName = $splits[0];
409
410
					if(isset($has_lists[$methodName])) {
411
412
						$relClass = $has_lists[$methodName];
413
						$fields = \Config::inst()->get($relClass, 'searchable_fields');
414
						if(!$fields) {
415
							user_error('The field $searchable_fields must be set for the class ' . $relClass);
416
						}
417
						$rewrite = SearchableHelper::fieldsToElasticaConfig($fields);
418
419
						// mark as a method, the resultant fields are correct
420
						$elasticaMapping[$methodName . '()'] = $rewrite;
421
					} else if(isset($has_ones[$methodName])) {
422
						$relClass = $has_ones[$methodName];
423
						$fields = \Config::inst()->get($relClass, 'searchable_fields');
424
						if(!$fields) {
425
							user_error('The field $searchable_fields must be set for the class ' . $relClass);
426
						}
427
						$rewrite = SearchableHelper::fieldsToElasticaConfig($fields);
428
429
						// mark as a method, the resultant fields are correct
430
						$elasticaMapping[$methodName . '()'] = $rewrite;
431
					} else {
432
						user_error('The method ' . $methodName . ' not found in class ' . $this->owner->ClassName .
433
								', please check configuration');
434
					}
435
				}
436
			}
437
		}
438
439
		return $elasticaMapping;
440
	}
441
442
443
444
445
	public function requireDefaultRecords() {
446
		parent::requireDefaultRecords();
447
		$searchableFields = $this->getElasticaFields(true, true);
448
		$doSC = SearchableHelper::findOrCreateSearchableClass($this->owner->ClassName);
449
450
		foreach($searchableFields as $name => $searchableField) {
451
			// check for existence of methods and if they exist use that as the name
452
			$name = '';
453
			if(!isset($searchableField['type'])) {
454
				$name = $searchableField['properties']['__method'];
455
			}
456
457
			SearchableHelper::findOrCreateSearchableField(
458
				$this->owner->ClassName,
459
				$name,
460
				$searchableField,
461
				$doSC
462
			);
463
464
			// FIXME deal with deletions
465
		}
466
	}
467
468
469
	/*
470
	Allow the option of overriding the default template with one of <ClassName>ElasticSearchResult
471
	 */
472
	public function RenderResult($linkToContainer = '') {
473
		$vars = new \ArrayData(array('SearchResult' => $this->owner, 'ContainerLink' => $linkToContainer));
474
		$possibleTemplates = array($this->owner->ClassName . 'ElasticSearchResult', 'ElasticSearchResult');
475
		return $this->owner->customise($vars)->renderWith($possibleTemplates);
476
	}
477
478
479
	public function getTermVectors() {
480
		return $this->service->getTermVectors($this->owner);
481
	}
482
483
484
	public function updateCMSFields(\FieldList $fields) {
485
		$isIndexed = false;
486
		// SIteTree object must have a live record, ShowInSearch = true
487
		if(SearchableHelper::isInSiteTree($this->owner->ClassName)) {
488
			$liveRecord = \Versioned::get_by_stage(get_class($this->owner), 'Live')->
489
				byID($this->owner->ID);
490
			if($liveRecord->ShowInSearch) {
491
				$isIndexed = true;
492
			} else {
493
				$isIndexed = false;
494
			}
495
		} else {
496
			// In the case of a DataObject we use the ShowInSearchFlag
497
			$isIndexed = true;
498
		}
499
500
		if($isIndexed) {
501
			$termVectors = $this->getTermVectors();
502
			$termFields = array_keys($termVectors);
503
			sort($termFields);
504
505
			foreach($termFields as $field) {
506
				$terms = new \ArrayList();
507
508
				foreach(array_keys($termVectors[$field]['terms']) as $term) {
509
					$do = new \DataObject();
510
					$do->Term = $term;
511
					$stats = $termVectors[$field]['terms'][$term];
512
					if(isset($stats['ttf'])) {
513
						$do->TTF = $stats['ttf'];
514
					}
515
516
					if(isset($stats['doc_freq'])) {
517
						$do->DocFreq = $stats['doc_freq'];
518
					}
519
520
					if(isset($stats['term_freq'])) {
521
						$do->TermFreq = $stats['term_freq'];
522
					}
523
					$terms->push($do);
524
				}
525
526
				$config = \GridFieldConfig_RecordViewer::create(100);
527
				$config->getComponentByType('GridFieldDataColumns')->setDisplayFields(array(
528
					'Term' => 'Term',
529
					'TTF' => 'Total term frequency (how often a term occurs in all documents)',
530
					'DocFreq' => 'n documents with this term',
531
					'TermFreq'=> 'n times this term appears in this field'
532
				));
533
534
			   $underscored = str_replace('.', '_', $field);
535
536
				$gridField = new \GridField(
537
					'TermsFor' . $underscored, // Field name
538
					$field . 'TITLE' . $field, // Field title
539
					$terms,
540
					$config
541
				);
542
			   $fields->addFieldToTab('Root.ElasticaTerms.' . $underscored, $gridField);
543
			}
544
545
		}
546
547
		return $fields;
548
	}
549
550
551
}
552