1
|
|
|
<?php |
2
|
|
|
namespace SilverStripe\Elastica; |
3
|
|
|
|
4
|
|
|
//use \SilverStripe\Elastica\ResultList; |
5
|
|
|
use Elastica\Query; |
6
|
|
|
|
7
|
|
|
use Elastica\Query\QueryString; |
8
|
|
|
use Elastica\Aggregation\Filter; |
9
|
|
|
use Elastica\Filter\Term; |
10
|
|
|
use Elastica\Filter\BoolAnd; |
11
|
|
|
use Elastica\Query\Filtered; |
12
|
|
|
use Elastica\Query\MultiMatch; |
13
|
|
|
use Elastica\Query\MoreLikeThis; |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
|
17
|
|
|
class ElasticSearcher { |
18
|
|
|
/** |
19
|
|
|
* Comma separated list of SilverStripe ClassNames to search. Leave blank for all |
20
|
|
|
* @var string |
21
|
|
|
*/ |
22
|
|
|
private $classes = ''; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* Array of aggregation selected mapped to the value selected, e.g. 'Aperture' => '11' |
26
|
|
|
* @var array |
27
|
|
|
*/ |
28
|
|
|
private $filters = array(); |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* The locale to search, is set to current locale or default locale by default |
32
|
|
|
* but can be overriden. This is the code in the form en_US, th_TH etc |
33
|
|
|
*/ |
34
|
|
|
private $locale = null; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* Object just to manipulate the query and result, used for aggregations |
38
|
|
|
* @var ElasticaSearchHelper |
39
|
|
|
*/ |
40
|
|
|
private $manipulator; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Offset from zero to return search results from |
44
|
|
|
* @var integer |
45
|
|
|
*/ |
46
|
|
|
private $start = 0; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* How many search results to return |
50
|
|
|
* @var integer |
51
|
|
|
*/ |
52
|
|
|
private $pageLength = 10; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* After a search is performed aggregrations are saved here |
56
|
|
|
* @var array |
57
|
|
|
*/ |
58
|
|
|
private $aggregations = null; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Array of highlighted fields, e.g. Title, Title.standard. If this is empty then the |
62
|
|
|
* ShowHighlight field of SearchableField is used to determine which fields to highlight |
63
|
|
|
* @var array |
64
|
|
|
*/ |
65
|
|
|
private $highlightedFields = array(); |
66
|
|
|
|
67
|
|
|
|
68
|
|
|
/* |
69
|
|
|
Allow an empty search to return either no results (default) or all results, useful for |
70
|
|
|
showing some results during aggregation |
71
|
|
|
*/ |
72
|
|
|
private $showResultsForEmptySearch = false; |
73
|
|
|
|
74
|
|
|
|
75
|
|
|
private $SuggestedQuery = null; |
76
|
|
|
|
77
|
|
|
|
78
|
|
|
// ---- variables for more like this searching, defaults as per Elasticsearch ---- |
79
|
|
|
private $minTermFreq = 2; |
80
|
|
|
|
81
|
|
|
private $maxTermFreq = 25; |
82
|
|
|
|
83
|
|
|
private $minDocFreq = 2; |
84
|
|
|
|
85
|
|
|
private $maxDocFreq = 0; |
86
|
|
|
|
87
|
|
|
private $minWordLength = 0; |
88
|
|
|
|
89
|
|
|
private $maxWordLength = 0; |
90
|
|
|
|
91
|
|
|
private $minShouldMatch = '30%'; |
92
|
|
|
|
93
|
|
|
private $similarityStopWords = ''; |
94
|
|
|
|
95
|
|
|
|
96
|
|
|
/* |
97
|
|
|
Show results for an empty search string |
98
|
|
|
*/ |
99
|
9 |
|
public function showResultsForEmptySearch() { |
100
|
9 |
|
$this->showResultsForEmptySearch = true; |
101
|
9 |
|
} |
102
|
|
|
|
103
|
|
|
|
104
|
|
|
/* |
105
|
|
|
Hide results for an empty search |
106
|
|
|
*/ |
107
|
|
|
public function hideResultsForEmptySearch() { |
108
|
|
|
$this->showResultsForEmptySearch = false; |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* Accessor the variable to determine whether or not to show results for an empty search |
114
|
|
|
* @return boolean true to show results for empty search, otherwise false |
115
|
|
|
*/ |
116
|
|
|
public function getShowResultsForEmptySearch() { |
117
|
|
|
return $this->showResultsForEmptySearch; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* Update the list of Classes to search, use SilverStripe ClassName comma separated |
122
|
|
|
* @param string $newClasses comma separated list of SilverStripe ClassNames |
123
|
|
|
*/ |
124
|
10 |
|
public function setClasses($newClasses) { |
125
|
10 |
|
$this->classes = $newClasses; |
126
|
10 |
|
} |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* Set the manipulator, mainly used for aggregation |
130
|
|
|
* @param ElasticaSearchHelper $newManipulator manipulator used for aggregation |
131
|
|
|
*/ |
132
|
10 |
|
public function setQueryResultManipulator($newManipulator) { |
133
|
10 |
|
$this->manipulator = $newManipulator; |
134
|
10 |
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Update the start variable |
138
|
|
|
* @param int $newStart Offset for search |
139
|
|
|
*/ |
140
|
10 |
|
public function setStart($newStart) { |
141
|
10 |
|
$this->start = $newStart; |
142
|
10 |
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* Update the page length variable |
146
|
|
|
* @param int $newPageLength the number of results to be returned |
147
|
|
|
*/ |
148
|
10 |
|
public function setPageLength($newPageLength) { |
149
|
10 |
|
$this->pageLength = $newPageLength; |
150
|
10 |
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* Set a new locale |
154
|
|
|
* @param string $newLocale locale in short form, e.g. th_TH |
155
|
|
|
*/ |
156
|
|
|
public function setLocale($newLocale) { |
157
|
|
|
$this->locale = $newLocale; |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* Add a filter to the current query in the form of a key/value pair |
162
|
|
|
* @param string $field the name of the indexed field to filter on |
163
|
|
|
* @param string|boolean|integer $value the value of the indexed field to filter on |
164
|
|
|
*/ |
165
|
4 |
|
public function addFilter($field, $value) { |
166
|
4 |
|
$this->filters[$field] = $value; |
167
|
4 |
|
} |
168
|
|
|
|
169
|
|
|
/** |
170
|
|
|
* Accessor to the aggregations, to be used after a search |
171
|
|
|
* @return array Aggregations returned after a search |
172
|
|
|
*/ |
173
|
1 |
|
public function getAggregations() { |
174
|
1 |
|
return $this->aggregations; |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
/** |
178
|
|
|
* Set the minimum term frequency for term to be considered in input query |
179
|
|
|
*/ |
180
|
|
|
public function setMinTermFreq($newMinTermFreq) { |
181
|
|
|
$this->minTermFreq = $newMinTermFreq; |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* Set the maximum term frequency for term to be considered in input query |
186
|
|
|
*/ |
187
|
|
|
public function setMaxTermFreq($newMaxTermFreq) { |
188
|
|
|
$this->maxTermFreq = $newMaxTermFreq; |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
/** |
192
|
|
|
* Set the minimum number of documents a term can reside in for consideration as |
193
|
|
|
* part of the input query |
194
|
|
|
*/ |
195
|
|
|
public function setMinDocFreq($newMinDocFreq) { |
196
|
|
|
$this->minDocFreq = $newMinDocFreq; |
197
|
|
|
} |
198
|
|
|
|
199
|
|
|
/** |
200
|
|
|
* Set the maximum number of documents a term can reside in for consideration as |
201
|
|
|
* part of the input query |
202
|
|
|
*/ |
203
|
|
|
public function setMaxDocFreq($newMaxDocFreq) { |
204
|
|
|
$this->maxDocFreq = $newMaxDocFreq; |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
/** |
208
|
|
|
* Set the minimum word length for a term to be considered part of the query |
209
|
|
|
*/ |
210
|
|
|
public function setMinWordLength($newMinWordLength) { |
211
|
|
|
$this->minWordLength = $newMinWordLength; |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
/** |
215
|
|
|
* Set the maximum word length for a term to be considered part of the query |
216
|
|
|
*/ |
217
|
|
|
public function setMaxWordLength($newMaxWordLength) { |
218
|
|
|
$this->maxWordLength = $newMaxWordLength; |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/* |
222
|
|
|
Number or percentage of chosen terms that match |
223
|
|
|
*/ |
224
|
|
|
public function setMinShouldMatch($newMinShouldMatch) { |
225
|
|
|
$this->minShouldMatch = $newMinShouldMatch; |
226
|
|
|
} |
227
|
|
|
|
228
|
|
|
public function setSimilarityStopWords($newSimilarityStopWords) { |
229
|
|
|
$this->similarityStopWords = $newSimilarityStopWords; |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
|
233
|
|
|
/* |
234
|
|
|
Set the highlight fields for subsequent searches |
235
|
|
|
*/ |
236
|
|
|
|
237
|
|
|
/** |
238
|
|
|
* @param string[] $newHighlightedFields |
239
|
|
|
*/ |
240
|
|
|
public function setHighlightedFields($newHighlightedFields) { |
241
|
|
|
$this->highlightedFields = $newHighlightedFields; |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
|
245
|
|
|
/** |
246
|
|
|
* Search against elastica using the criteria already provided, such as page length, start, |
247
|
|
|
* and of course the filters |
248
|
|
|
* @param string $queryText query string, e.g. 'New Zealand' |
249
|
|
|
* @param array $fieldsToSearch Mapping of name to an array of mapping Weight and Elastic mapping, |
250
|
|
|
* e.g. array('Title' => array('Weight' => 2, 'Type' => 'string')) |
251
|
|
|
* @return \PaginatedList SilverStripe DataObjects returned from the search against ElasticSearch |
252
|
|
|
*/ |
253
|
|
|
public function search($queryText, $fieldsToSearch = null, $testMode = false) { |
254
|
10 |
View Code Duplication |
if ($this->locale == null) { |
|
|
|
|
255
|
10 |
|
if (class_exists('Translatable') && \SiteTree::has_extension('Translatable')) { |
256
|
10 |
|
$this->locale = \Translatable::get_current_locale(); |
257
|
|
|
} else { |
258
|
|
|
// if no translatable we only have the default locale |
259
|
|
|
$this->locale = \i18n::default_locale(); |
260
|
10 |
|
} |
261
|
|
|
} |
262
|
10 |
|
|
263
|
|
|
$qg = new QueryGenerator(); |
264
|
10 |
|
$qg->setQueryText($queryText); |
265
|
10 |
|
|
266
|
|
|
$qg->setFields($fieldsToSearch); |
267
|
10 |
|
$qg->setSelectedFilters($this->filters); |
268
|
10 |
|
$qg->setClasses($this->classes); |
269
|
10 |
|
|
270
|
|
|
$qg->setPageLength($this->pageLength); |
271
|
10 |
|
$qg->setStart($this->start); |
272
|
10 |
|
|
273
|
|
|
$qg->setQueryResultManipulator($this->manipulator); |
274
|
10 |
|
|
275
|
|
|
$qg->setShowResultsForEmptyQuery($this->showResultsForEmptySearch); |
276
|
10 |
|
|
277
|
|
|
$query = $qg->generateElasticaQuery(); |
278
|
10 |
|
|
279
|
|
|
$elasticService = \Injector::inst()->create('SilverStripe\Elastica\ElasticaService'); |
280
|
9 |
|
$elasticService->setLocale($this->locale); |
281
|
9 |
|
$elasticService->setHighlightedFields($this->highlightedFields); |
282
|
9 |
|
if ($testMode) { |
283
|
9 |
|
$elasticService->setTestMode(true); |
284
|
|
|
} |
285
|
|
|
$resultList = new ResultList($elasticService, $query, $queryText, $this->filters); |
286
|
9 |
|
|
287
|
|
|
// restrict SilverStripe ClassNames returned |
288
|
|
|
// elasticsearch uses the notion of a 'type', and here this maps to a SilverStripe class |
289
|
|
|
$types = $this->classes; |
290
|
9 |
|
|
291
|
|
|
$resultList->setTypes($types); |
292
|
9 |
|
|
293
|
|
|
// set the optional aggregation manipulator |
294
|
|
|
$resultList->SearchHelper = $this->manipulator; |
295
|
9 |
|
|
296
|
|
|
// at this point ResultList object, not yet executed search query |
297
|
|
|
$paginated = new \PaginatedList( |
298
|
9 |
|
$resultList |
299
|
|
|
); |
300
|
9 |
|
|
301
|
|
|
$paginated->setPageStart($this->start); |
302
|
9 |
|
$paginated->setPageLength($this->pageLength); |
303
|
9 |
|
$paginated->setTotalItems($resultList->getTotalItems()); |
304
|
9 |
|
|
305
|
|
|
$this->aggregations = $resultList->getAggregations(); |
|
|
|
|
306
|
9 |
|
|
307
|
|
|
if ($resultList->SuggestedQuery) { |
|
|
|
|
308
|
9 |
|
$this->SuggestedQuery = $resultList->SuggestedQuery; |
|
|
|
|
309
|
|
|
$this->SuggestedQueryHighlighted = $resultList->SuggestedQueryHighlighted; |
310
|
|
|
} |
311
|
|
|
return $paginated; |
312
|
9 |
|
} |
313
|
|
|
|
314
|
|
|
|
315
|
|
|
/* Perform an autocomplete search */ |
316
|
|
|
|
317
|
|
|
/** |
318
|
|
|
* @param string $queryText |
319
|
|
|
*/ |
320
|
|
|
public function autocomplete_search($queryText, $field) { |
321
|
|
View Code Duplication |
if ($this->locale == null) { |
|
|
|
|
322
|
|
|
if (class_exists('Translatable') && \SiteTree::has_extension('Translatable')) { |
323
|
|
|
$this->locale = \Translatable::get_current_locale(); |
324
|
|
|
} else { |
325
|
|
|
// if no translatable we only have the default locale |
326
|
|
|
$this->locale = \i18n::default_locale(); |
327
|
|
|
} |
328
|
|
|
} |
329
|
|
|
|
330
|
|
|
$qg = new QueryGenerator(); |
331
|
|
|
$qg->setQueryText($queryText); |
332
|
|
|
|
333
|
|
|
//only one field but must be array |
334
|
|
|
$qg->setFields(array($field => 1)); |
335
|
|
|
if ($this->classes) { |
336
|
|
|
$qg->setClasses($this->classes); |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
if (!empty($this->filters)) { |
340
|
|
|
$qg->setSelectedFilters($this->filters); |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
$qg->setPageLength($this->pageLength); |
344
|
|
|
$qg->setStart(0); |
345
|
|
|
|
346
|
|
|
$qg->setShowResultsForEmptyQuery(false); |
347
|
|
|
$query = $qg->generateElasticaAutocompleteQuery(); |
348
|
|
|
|
349
|
|
|
$elasticService = \Injector::inst()->create('SilverStripe\Elastica\ElasticaService'); |
350
|
|
|
$elasticService->setLocale($this->locale); |
351
|
|
|
$resultList = new ResultList($elasticService, $query, $queryText, $this->filters); |
352
|
|
|
|
353
|
|
|
// restrict SilverStripe ClassNames returned |
354
|
|
|
// elasticsearch uses the notion of a 'type', and here this maps to a SilverStripe class |
355
|
|
|
$types = $this->classes; |
356
|
|
|
$resultList->setTypes($types); |
357
|
|
|
// This works in that is breaks things $resultList->setTypes(array('SiteTree')); |
358
|
|
|
|
359
|
|
|
return $resultList; |
360
|
|
|
} |
361
|
|
|
|
362
|
|
|
|
363
|
|
|
/** |
364
|
|
|
* Perform a 'More Like This' search, aka relevance feedback, using the provided indexed DataObject |
365
|
|
|
* @param \DataObject $indexedItem A DataObject that has been indexed in Elasticsearch |
366
|
|
|
* @param array $fieldsToSearch array of fieldnames to search, mapped to weighting |
367
|
|
|
* @param $$testMode Use all shards, not just one, for consistent results during unit testing. See |
368
|
|
|
* https://www.elastic.co/guide/en/elasticsearch/guide/current/relevance-is-broken.html#relevance-is-broken |
369
|
|
|
* @return \PaginatedList List of results |
370
|
|
|
*/ |
371
|
|
|
public function moreLikeThis($indexedItem, $fieldsToSearch, $testMode = false) { |
372
|
|
|
if ($indexedItem == null) { |
373
|
|
|
throw new \InvalidArgumentException('A searchable item cannot be null'); |
374
|
|
|
} |
375
|
|
|
|
376
|
|
|
if (!$indexedItem->hasExtension('SilverStripe\Elastica\Searchable')) { |
377
|
|
|
throw new \InvalidArgumentException('Objects of class '.$indexedItem->ClassName.' are not searchable'); |
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
if ($fieldsToSearch == null) { |
381
|
|
|
throw new \InvalidArgumentException('Fields cannot be null'); |
382
|
|
|
} |
383
|
|
|
|
384
|
|
View Code Duplication |
if ($this->locale == null) { |
|
|
|
|
385
|
|
|
if (class_exists('Translatable') && \SiteTree::has_extension('Translatable')) { |
386
|
|
|
$this->locale = \Translatable::get_current_locale(); |
387
|
|
|
} else { |
388
|
|
|
// if no translatable we only have the default locale |
389
|
|
|
$this->locale = \i18n::default_locale(); |
390
|
|
|
} |
391
|
|
|
} |
392
|
|
|
|
393
|
|
|
$weightedFieldsArray = array(); |
394
|
|
|
foreach ($fieldsToSearch as $field => $weighting) { |
395
|
|
|
if (!is_string($field)) { |
396
|
|
|
throw new \InvalidArgumentException('Fields must be of the form fieldname => weight'); |
397
|
|
|
} |
398
|
|
|
if (!is_numeric($weighting)) { |
399
|
|
|
throw new \InvalidArgumentException('Fields must be of the form fieldname => weight'); |
400
|
|
|
} |
401
|
|
|
$weightedField = $field.'^'.$weighting; |
402
|
|
|
$weightedField = str_replace('^1', '', $weightedField); |
403
|
|
|
array_push($weightedFieldsArray, $weightedField); |
404
|
|
|
} |
405
|
|
|
|
406
|
|
|
$mlt = array( |
407
|
|
|
'fields' => $weightedFieldsArray, |
408
|
|
|
'docs' => array( |
409
|
|
|
array( |
410
|
|
|
'_type' => $indexedItem->ClassName, |
411
|
|
|
'_id' => $indexedItem->ID |
412
|
|
|
) |
413
|
|
|
), |
414
|
|
|
// defaults - FIXME, make configurable |
415
|
|
|
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-mlt-query.html |
416
|
|
|
// ---- term selection params ---- |
417
|
|
|
'min_term_freq' => $this->minTermFreq, |
418
|
|
|
'max_query_terms' => $this->maxTermFreq, |
419
|
|
|
'min_doc_freq' => $this->minDocFreq, |
420
|
|
|
'min_word_length' => $this->minWordLength, |
421
|
|
|
'max_word_length' => $this->maxWordLength, |
422
|
|
|
'max_word_length' => $this->minWordLength, |
423
|
|
|
|
424
|
|
|
// ---- query formation params ---- |
425
|
|
|
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-minimum-should-match.html |
426
|
|
|
'minimum_should_match' => $this->minShouldMatch, |
427
|
|
|
|
428
|
|
|
#FIXME configuration |
429
|
|
|
'stop_words' => explode(',', $this->similarityStopWords) |
430
|
|
|
); |
431
|
|
|
|
432
|
|
|
if ($this->maxDocFreq > 0) { |
433
|
|
|
$mlt['max_doc_freq'] = $this->maxDocFreq; |
434
|
|
|
} |
435
|
|
|
|
436
|
|
|
$query = new Query(); |
437
|
|
|
$query->setParams(array('query' => array('more_like_this' => $mlt))); |
438
|
|
|
|
439
|
|
|
|
440
|
|
|
$elasticService = \Injector::inst()->create('SilverStripe\Elastica\ElasticaService'); |
441
|
|
|
$elasticService->setLocale($this->locale); |
442
|
|
|
if ($testMode) { |
443
|
|
|
$elasticService->setTestMode(true); |
444
|
|
|
} |
445
|
|
|
|
446
|
|
|
// pagination |
447
|
|
|
$query->setSize($this->pageLength); |
448
|
|
|
$query->setFrom($this->start); |
449
|
|
|
|
450
|
|
|
$resultList = new ResultList($elasticService, $query, null); |
451
|
|
|
// at this point ResultList object, not yet executed search query |
452
|
|
|
$paginated = new \PaginatedList( |
453
|
|
|
$resultList |
454
|
|
|
); |
455
|
|
|
|
456
|
|
|
$paginated->setPageStart($this->start); |
457
|
|
|
$paginated->setPageLength($this->pageLength); |
458
|
|
|
$paginated->setTotalItems($resultList->getTotalItems()); |
459
|
|
|
$this->aggregations = $resultList->getAggregations(); |
|
|
|
|
460
|
|
|
|
461
|
|
|
return $paginated; |
462
|
|
|
} |
463
|
|
|
|
464
|
|
|
|
465
|
|
|
public function hasSuggestedQuery() { |
466
|
|
|
$result = isset($this->SuggestedQuery) && $this->SuggestedQuery != null; |
467
|
|
|
return $result; |
468
|
|
|
} |
469
|
|
|
|
470
|
|
|
/** |
471
|
|
|
* @return string |
472
|
|
|
*/ |
473
|
|
|
public function getSuggestedQuery() { |
474
|
|
|
return $this->SuggestedQuery; |
475
|
|
|
} |
476
|
|
|
|
477
|
|
|
public function getSuggestedQueryHighlighted() { |
478
|
|
|
return $this->SuggestedQueryHighlighted; |
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
} |
482
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.