Completed
Push — dev2 ( 1ebec9...91be19 )
by Gordon
03:33
created

Searchable::getFormatForDate()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 19
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 19
rs 8.8571
cc 5
eloc 16
nc 5
nop 1
1
<?php
2
3
namespace SilverStripe\Elastica;
4
5
use Elastica\Document;
6
use Elastica\Type\Mapping;
7
use ShortcodeParser;
8
9
/**
10
 * Adds elastic search integration to a data object.
11
 */
12
class Searchable extends \DataExtension {
13
14
	/**
15
	 * Counter used to display progress of indexing
16
	 * @var integer
17
	 */
18
	public static $index_ctr = 0;
19
20
	/**
21
	 * Everytime progressInterval divides $index_ctr exactly display progress
22
	 * @var integer
23
	 */
24
	private static $progressInterval = 0;
25
26
	public static $mappings = array(
27
		'Boolean'     => 'boolean',
28
		'Decimal'     => 'double',
29
		'Currency'    => 'double',
30
		'Double'      => 'double',
31
		'Enum'        => 'string',
32
		'Float'       => 'float',
33
		'HTMLText'    => 'string',
34
		'HTMLVarchar' => 'string',
35
		'Int'         => 'integer',
36
		'Text'        => 'string',
37
		'VarChar'     => 'string',
38
		'Varchar'     => 'string',
39
		'Year'        => 'integer',
40
		'Percentage'  => 'double',
41
		'Time'  => 'date',
42
43
		// The 2 different date types will be stored with different formats
44
		'Date'        => 'date',
45
		'SS_Datetime' => 'date',
46
		'Datetime' => 'date',
47
		'DBLocale'    => 'string'
48
	);
49
50
51
	/**
52
	 * @var ElasticaService associated elastica search service
53
	 */
54
	protected $service;
55
56
57
	/**
58
	 * Array of fields that need HTML parsed
59
	 * @var array
60
	 */
61
	protected $html_fields = array();
62
63
	/**
64
	 * Store a mapping of relationship name to result type
65
	 */
66
	protected $relationship_methods = array();
67
68
69
	/**
70
	 * If importing a large number of items from a fixtures file, or indeed some other source, then
71
	 * it is quicker to set a flag of value IndexingOff => false.  This has the effect of ensuring
72
	 * no indexing happens, a request is normally made per fixture when loading.  One can then run
73
	 * the reindexing teask to bulk index in one HTTP POST request to Elasticsearch
74
	 *
75
	 * @var boolean
76
	 */
77
	private static $IndexingOff = false;
78
79
80
	/**
81
	 * @see getElasticaResult
82
	 * @var \Elastica\Result
83
	 */
84
	protected $elastica_result;
85
86
	public function __construct(ElasticaService $service) {
87
		$this->service = $service;
88
		parent::__construct();
89
	}
90
91
92
	/**
93
	 * Get the elasticsearch type name
94
	 *
95
	 * @return string
96
	 */
97
	public function getElasticaType() {
98
		return get_class($this->owner);
99
	}
100
101
102
	/**
103
	 * If the owner is part of a search result
104
	 * the raw Elastica search result is returned
105
	 * if set via setElasticaResult
106
	 *
107
	 * @return \Elastica\Result
108
	 */
109
	public function getElasticaResult() {
110
		return $this->elastica_result;
111
	}
112
113
114
	/**
115
	 * Set the raw Elastica search result
116
	 *
117
	 * @param \Elastica\Result
118
	 */
119
	public function setElasticaResult(\Elastica\Result $result) {
120
		$this->elastica_result = $result;
121
	}
122
123
124
	/**
125
	 * Gets an array of elastic field definitions.
126
	 *
127
	 * @return array
128
	 */
129
	public function getElasticaFields($storeMethodName = false, $recurse = true) {
130
		$db = $this->owner->db();
131
		$fields = $this->getAllSearchableFields();
132
		$result = array();
133
134
		foreach($fields as $name => $params) {
135
			$spec = array();
136
			$name = str_replace('()', '', $name);
137
138
			if(array_key_exists($name, $db)) {
139
				$class = $db[$name];
140
				SearchableHelper::assignSpecForStandardFieldType($name, $class, $spec, $this->html_fields, self::$mappings);
141
			} else {
142
				// field name is not in the db, it could be a method
143
				$has_lists = $this->getListRelationshipMethods();
144
				$has_ones = $this->owner->has_one();
145
146
				// check has_many and many_many relations
147
				if(isset($has_lists[$name])) {
148
					// the classes returned by the list method
149
					$resultType = $has_lists[$name];
150
					SearchableHelper::assignSpecForRelationship($name, $resultType, $spec, $storeMethodName, $recurse);
151
				} else if(isset($has_ones[$name])) {
152
					$resultType = $has_ones[$name];
153
					SearchableHelper::assignSpecForRelationship($name, $resultType, $spec, $storeMethodName, $recurse);
154
				}
155
				// otherwise fall back to string - Enum is one such category
156
				else {
157
					$spec["type"] = "string";
158
				}
159
			}
160
161
			SearchableHelper::addIndexedFields($name, $spec, $this->owner->ClassName);
162
163
			$result[$name] = $spec;
164
		}
165
166
		if($this->owner->hasMethod('updateElasticHTMLFields')) {
167
			$this->html_fields = $this->owner->updateElasticHTMLFields($this->html_fields);
168
		}
169
170
		return $result;
171
	}
172
173
174
175
	/**
176
	 * Get the elasticsearch mapping for the current document/type
177
	 *
178
	 * @return \Elastica\Type\Mapping
179
	 */
180
	public function getElasticaMapping() {
181
		$mapping = new Mapping();
182
183
		$fields = $this->getElasticaFields(false);
184
185
		$localeMapping = array();
186
187
		if($this->owner->hasField('Locale')) {
188
			$localeMapping['type'] = 'string';
189
			// we wish the locale to be stored as is
190
			$localeMapping['index'] = 'not_analyzed';
191
			$fields['Locale'] = $localeMapping;
192
		}
193
194
		// ADD CUSTOM FIELDS HERE THAT ARE INDEXED BY DEFAULT
195
		// add a mapping to flag whether or not class is in SiteTree
196
		$fields['IsInSiteTree'] = array('type'=>'boolean');
197
		$fields['Link'] = array('type' => 'string', 'index' => 'not_analyzed');
198
199
		$mapping->setProperties($fields);
200
201
		//This concatenates all the fields together into a single field.
202
		//Initially added for suggestions compatibility, in that searching
203
		//_all field picks up all possible suggestions
204
		$mapping->enableAllField();
205
206
		if($this->owner->hasMethod('updateElasticsearchMapping')) {
207
			$mapping = $this->owner->updateElasticsearchMapping($mapping);
208
		}
209
		return $mapping;
210
	}
211
212
213
	/**
214
	 * Get an elasticsearch document
215
	 *
216
	 * @return \Elastica\Document
217
	 */
218
	public function getElasticaDocument() {
219
		self::$index_ctr++;
220
		$fields = $this->getFieldValuesAsArray();
221
		$progress = \Controller::curr()->request->getVar('progress');
222
		if(!empty($progress)) {
223
			self::$progressInterval = (int)$progress;
224
		}
225
226
		if(self::$progressInterval > 0) {
227
			if(self::$index_ctr % self::$progressInterval === 0) {
228
				ElasticaUtil::message("\t" . $this->owner->ClassName . " - Prepared " . self::$index_ctr . " for indexing...");
229
			}
230
		}
231
232
		// Optionally update the document
233
		$document = new Document($this->owner->ID, $fields);
234
		if($this->owner->hasMethod('updateElasticsearchDocument')) {
235
			$document = $this->owner->updateElasticsearchDocument($document);
236
		}
237
238
		// Check if the current classname is part of the site tree or not
239
		// Results are cached to save reprocessing the same
240
		$classname = $this->owner->ClassName;
241
		$inSiteTree = $this->isInSiteTree($classname);
242
243
		$document->set('IsInSiteTree', $inSiteTree);
244
245
		if($inSiteTree) {
246
			$document->set('Link', $this->owner->AbsoluteLink());
247
		}
248
249
		if(isset($this->owner->Locale)) {
250
			$document->set('Locale', $this->owner->Locale);
251
		}
252
253
		return $document;
254
	}
255
256
257
	public function getFieldValuesAsArray($recurse = true) {
258
		$fields = array();
259
		$has_ones = $this->owner->has_one();
260
261
		foreach($this->getElasticaFields($recurse) as $field => $config) {
262
			if(null === $this->owner->$field && is_callable(get_class($this->owner) . "::" . $field)) {
263
				// call a method to get a field value
264
				if(in_array($field, $this->html_fields)) {
265
					// Parse short codes in HTML, and then convert to text
266
					$fields[$field] = $this->owner->$field;
267
					$html = ShortcodeParser::get_active()->parse($this->owner->$field());
268
					$txt = \Convert::html2raw($html);
269
					$fields[$field] = $txt;
270
				} else {
271
					// Plain text
272
					$fields[$field] = $this->owner->$field();
273
				}
274
275
			} else {
276
				if(in_array($field, $this->html_fields)) {
277
					$fields[$field] = $this->owner->$field;
278
					if(gettype($this->owner->$field) !== 'NULL') {
279
						$html = ShortcodeParser::get_active()->parse($this->owner->$field);
280
						$txt = \Convert::html2raw($html);
281
						$fields[$field] = $txt;
282
					}
283
				} else {
284
					if(isset($config['properties']['__method'])) {
285
						$methodName = $config['properties']['__method'];
286
						$data = $this->owner->$methodName();
287
						$relArray = array();
288
289
						// get the fields of a has_one relational object
290
						if(isset($has_ones[$methodName])) {
291
							if($data->ID > 0) {
292
								$item = $data->getFieldValuesAsArray(false);
293
								$relArray = $item;
294
							}
295
296
						// get the fields for a has_many or many_many relational list
297
						} else {
298
							foreach($data->getIterator() as $item) {
299
								if($recurse) {
300
									// populate the subitem but do not recurse any further if more relationships
301
									$itemDoc = $item->getFieldValuesAsArray(false);
302
									array_push($relArray, $itemDoc);
303
								}
304
							}
305
						}
306
						// save the relation as an array (for now)
307
						$fields[$methodName] = $relArray;
308
					} else {
309
						$fields[$field] = $this->owner->$field;
310
					}
311
312
				}
313
314
			}
315
		}
316
317
		return $fields;
318
	}
319
320
321
	/**
322
	 * Returns whether to include the document into the search index.
323
	 * All documents are added unless they have a field "ShowInSearch" which is set to false
324
	 *
325
	 * @return boolean
326
	 */
327
	public function showRecordInSearch() {
328
		return !($this->owner->hasField('ShowInSearch') && false == $this->owner->ShowInSearch);
329
	}
330
331
332
	/**
333
	 * Delete the record from the search index if ShowInSearch is deactivated (non-SiteTree).
334
	 */
335
	public function onBeforeWrite() {
336
		if(($this->owner instanceof \SiteTree)) {
337
			if($this->owner->hasField('ShowInSearch') &&
338
				$this->owner->isChanged('ShowInSearch', 2) && false == $this->owner->ShowInSearch) {
339
				$this->doDeleteDocument();
340
			}
341
		}
342
	}
343
344
345
	/**
346
	 * Delete the record from the search index if ShowInSearch is deactivated (SiteTree).
347
	 */
348
	public function onBeforePublish() {
349
		if(false == $this->owner->ShowInSearch) {
350
			if($this->owner->isPublished()) {
351
				$liveRecord = \Versioned::get_by_stage(get_class($this->owner), 'Live')->
352
					byID($this->owner->ID);
353
				if($liveRecord->ShowInSearch != $this->owner->ShowInSearch) {
354
					$this->doDeleteDocument();
355
				}
356
			}
357
		}
358
	}
359
360
361
	/**
362
	 * Updates the record in the search index (non-SiteTree).
363
	 */
364
	public function onAfterWrite() {
365
		$this->doIndexDocument();
366
	}
367
368
369
	/**
370
	 * Updates the record in the search index (SiteTree).
371
	 */
372
	public function onAfterPublish() {
373
		$this->doIndexDocument();
374
	}
375
376
377
	/**
378
	 * Updates the record in the search index.
379
	 */
380
	protected function doIndexDocument() {
381
		if($this->showRecordInSearch()) {
382
			if(!$this->owner->IndexingOff) {
383
				$this->service->index($this->owner);
384
			}
385
		}
386
	}
387
388
389
	/**
390
	 * Removes the record from the search index (non-SiteTree).
391
	 */
392
	public function onAfterDelete() {
393
		$this->doDeleteDocumentIfInSearch();
394
	}
395
396
397
	/**
398
	 * Removes the record from the search index (non-SiteTree).
399
	 */
400
	public function onAfterUnpublish() {
401
		$this->doDeleteDocumentIfInSearch();
402
	}
403
404
405
	/**
406
	 * Removes the record from the search index if the "ShowInSearch" attribute is set to true.
407
	 */
408
	protected function doDeleteDocumentIfInSearch() {
409
		if($this->showRecordInSearch()) {
410
			$this->doDeleteDocument();
411
		}
412
	}
413
414
415
	/**
416
	 * Removes the record from the search index.
417
	 */
418
	protected function doDeleteDocument() {
419
		try {
420
			if(!$this->owner->IndexingOff) {
421
				// this goes to elastica service
422
				$this->service->remove($this->owner);
423
			}
424
		} catch (\Elastica\Exception\NotFoundException $e) {
425
			trigger_error("Deleted document " . $this->owner->ClassName . " (" . $this->owner->ID .
426
				") not found in search index.", E_USER_NOTICE);
427
		}
428
429
	}
430
431
432
	/**
433
	 * Return all of the searchable fields defined in $this->owner::$searchable_fields and all the parent classes.
434
	 *
435
	 * @param  $recuse Whether or not to traverse relationships. First time round yes, subsequently no
436
	 * @return array searchable fields
437
	 */
438
	public function getAllSearchableFields($recurse = true) {
439
		$fields = \Config::inst()->get(get_class($this->owner), 'searchable_fields');
440
441
		// fallback to default method
442
		if(!$fields) {
443
			user_error('The field $searchable_fields must be set for the class ' . $this->owner->ClassName);
444
		}
445
446
		// get the values of these fields
447
		$elasticaMapping = $this->fieldsToElasticaConfig($fields);
448
449
		if($recurse) {
450
			// now for the associated methods and their results
451
			$methodDescs = \Config::inst()->get(get_class($this->owner), 'searchable_relationships');
452
			$has_ones = $this->owner->has_one();
453
			$has_lists = $this->getListRelationshipMethods();
454
455
			if(isset($methodDescs) && is_array($methodDescs)) {
456
				foreach($methodDescs as $methodDesc) {
457
					// split before the brackets which can optionally list which fields to index
458
					$splits = explode('(', $methodDesc);
459
					$methodName = $splits[0];
460
461
					if(isset($has_lists[$methodName])) {
462
463
						$relClass = $has_lists[$methodName];
464
						$fields = \Config::inst()->get($relClass, 'searchable_fields');
465
						if(!$fields) {
466
							user_error('The field $searchable_fields must be set for the class ' . $relClass);
467
						}
468
						$rewrite = $this->fieldsToElasticaConfig($fields);
469
470
						// mark as a method, the resultant fields are correct
471
						$elasticaMapping[$methodName . '()'] = $rewrite;
472
					} else if(isset($has_ones[$methodName])) {
473
						$relClass = $has_ones[$methodName];
474
						$fields = \Config::inst()->get($relClass, 'searchable_fields');
475
						if(!$fields) {
476
							user_error('The field $searchable_fields must be set for the class ' . $relClass);
477
						}
478
						$rewrite = $this->fieldsToElasticaConfig($fields);
479
480
						// mark as a method, the resultant fields are correct
481
						$elasticaMapping[$methodName . '()'] = $rewrite;
482
					} else {
483
						user_error('The method ' . $methodName . ' not found in class ' . $this->owner->ClassName .
484
								', please check configuration');
485
					}
486
				}
487
			}
488
		}
489
490
		return $elasticaMapping;
491
	}
492
493
494
	/*
495
	Evaluate each field, e.g. 'Title', 'Member.Name'
496
	 */
497
	private function fieldsToElasticaConfig($fields) {
498
		// Copied from DataObject::searchableFields() as there is no separate accessible method
499
		$rewrite = array();
500
		foreach($fields as $name => $specOrName) {
501
			$identifer = (is_int($name)) ? $specOrName : $name;
502
			$rewrite[$identifer] = array();
503
			if(!isset($rewrite[$identifer]['title'])) {
504
				$rewrite[$identifer]['title'] = (isset($labels[$identifer]))
505
					? $labels[$identifer] : \FormField::name_to_label($identifer);
506
			}
507
			if(!isset($rewrite[$identifer]['filter'])) {
508
				$rewrite[$identifer]['filter'] = 'PartialMatchFilter';
509
			}
510
		}
511
512
		return $rewrite;
513
	}
514
515
516
	public function requireDefaultRecords() {
517
		parent::requireDefaultRecords();
518
519
		$searchableFields = $this->getElasticaFields(true, true);
520
521
522
		$doSC = \SearchableClass::get()->filter(array('Name' => $this->owner->ClassName))->first();
523
		if(!$doSC) {
524
			$doSC = new \SearchableClass();
525
			$doSC->Name = $this->owner->ClassName;
526
527
			$inSiteTree = $this->isInSiteTree($this->owner->ClassName);
528
			$doSC->InSiteTree = $inSiteTree;
529
530
			$doSC->write();
531
		}
532
533
		foreach($searchableFields as $name => $searchableField) {
534
			// check for existence of methods and if they exist use that as the name
535
			if(!isset($searchableField['type'])) {
536
				$name = $searchableField['properties']['__method'];
537
			}
538
539
			$filter = array('ClazzName' => $this->owner->ClassName, 'Name' => $name);
540
			$doSF = \SearchableField::get()->filter($filter)->first();
541
542
543
			if(!$doSF) {
544
				$doSF = new \SearchableField();
545
				$doSF->ClazzName = $this->owner->ClassName;
546
				$doSF->Name = $name;
547
548
				if(isset($searchableField['type'])) {
549
					$doSF->Type = $searchableField['type'];
550
				} else {
551
					$doSF->Name = $searchableField['properties']['__method'];
552
					$doSF->Type = 'relationship';
553
				}
554
				$doSF->SearchableClassID = $doSC->ID;
555
556
				if(isset($searchableField['fields']['autocomplete'])) {
557
					$doSF->Autocomplete = true;
558
				}
559
560
				$doSF->write();
561
				\DB::alteration_message("Created new searchable editable field " . $name, "changed");
562
			}
563
564
			// FIXME deal with deletions
565
		}
566
	}
567
568
569
	private function getListRelationshipMethods() {
570
		$has_manys = $this->owner->has_many();
571
		$many_manys = $this->owner->many_many();
572
573
		// array of method name to retuned object ClassName for relationships returning lists
574
		$has_lists = $has_manys;
575
		foreach(array_keys($many_manys) as $key) {
576
			$has_lists[$key] = $many_manys[$key];
577
		}
578
579
		return $has_lists;
580
	}
581
582
583
	private function isInSiteTree($classname) {
584
		$inSiteTree = ($classname === 'SiteTree' ? true : false);
585
		if(!$inSiteTree) {
586
			$class = new \ReflectionClass($this->owner->ClassName);
587
			while($class = $class->getParentClass()) {
588
				$parentClass = $class->getName();
589
				if($parentClass == 'SiteTree') {
590
					$inSiteTree = true;
591
					break;
592
				}
593
			}
594
		}
595
		return $inSiteTree;
596
	}
597
598
599
	/*
600
	Allow the option of overriding the default template with one of <ClassName>ElasticSearchResult
601
	 */
602
	public function RenderResult($linkToContainer = '') {
603
		$vars = new \ArrayData(array('SearchResult' => $this->owner, 'ContainerLink' => $linkToContainer));
604
		$possibleTemplates = array($this->owner->ClassName . 'ElasticSearchResult', 'ElasticSearchResult');
605
		return $this->owner->customise($vars)->renderWith($possibleTemplates);
606
	}
607
608
609
610
	public function getTermVectors() {
611
		return $this->service->getTermVectors($this->owner);
612
	}
613
614
615
	public function updateCMSFields(\FieldList $fields) {
616
		$isIndexed = false;
617
		// SIteTree object must have a live record, ShowInSearch = true
618
		if($this->isInSiteTree($this->owner->ClassName)) {
619
			$liveRecord = \Versioned::get_by_stage(get_class($this->owner), 'Live')->
620
				byID($this->owner->ID);
621
			if($liveRecord->ShowInSearch) {
622
				$isIndexed = true;
623
			} else {
624
				$isIndexed = false;
625
			}
626
		} else {
627
			// In the case of a DataObject we use the ShowInSearchFlag
628
			$isIndexed = true;
629
		}
630
631
		if($isIndexed) {
632
			$termVectors = $this->getTermVectors();
633
			$termFields = array_keys($termVectors);
634
			sort($termFields);
635
636
			foreach($termFields as $field) {
637
				$terms = new \ArrayList();
638
639
				foreach(array_keys($termVectors[$field]['terms']) as $term) {
640
					$do = new \DataObject();
641
					$do->Term = $term;
642
					$stats = $termVectors[$field]['terms'][$term];
643
					if(isset($stats['ttf'])) {
644
						$do->TTF = $stats['ttf'];
645
					}
646
647
					if(isset($stats['doc_freq'])) {
648
						$do->DocFreq = $stats['doc_freq'];
649
					}
650
651
					if(isset($stats['term_freq'])) {
652
						$do->TermFreq = $stats['term_freq'];
653
					}
654
					$terms->push($do);
655
				}
656
657
				$config = \GridFieldConfig_RecordViewer::create(100);
658
				$config->getComponentByType('GridFieldDataColumns')->setDisplayFields(array(
659
					'Term' => 'Term',
660
					'TTF' => 'Total term frequency (how often a term occurs in all documents)',
661
					'DocFreq' => 'n documents with this term',
662
					'TermFreq'=> 'n times this term appears in this field'
663
				));
664
665
			   $underscored = str_replace('.', '_', $field);
666
667
				$gridField = new \GridField(
668
					'TermsFor' . $underscored, // Field name
669
					$field . 'TITLE' . $field, // Field title
670
					$terms,
671
					$config
672
				);
673
			   $fields->addFieldToTab('Root.ElasticaTerms.' . $underscored, $gridField);
674
			}
675
676
		}
677
678
		return $fields;
679
	}
680
681
682
}
683