1 | <?php |
||
2 | |||
3 | namespace SilverStripe\FullTextSearch\Solr; |
||
4 | |||
5 | use Exception; |
||
6 | use SilverStripe\Control\Director; |
||
7 | use SilverStripe\Core\Environment; |
||
8 | use SilverStripe\FullTextSearch\Search\Indexes\SearchIndex; |
||
9 | use SilverStripe\FullTextSearch\Search\Variants\SearchVariant_Caller; |
||
10 | use SilverStripe\FullTextSearch\Solr\Services\SolrService; |
||
11 | use SilverStripe\FullTextSearch\Search\Queries\SearchQuery; |
||
12 | use SilverStripe\FullTextSearch\Search\Queries\SearchQuery_Range; |
||
13 | use SilverStripe\FullTextSearch\Search\Variants\SearchVariant; |
||
14 | use SilverStripe\FullTextSearch\Search\SearchIntrospection; |
||
15 | use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore; |
||
16 | use SilverStripe\ORM\ArrayList; |
||
17 | use SilverStripe\ORM\DataObject; |
||
18 | use SilverStripe\ORM\FieldType\DBField; |
||
19 | use SilverStripe\ORM\PaginatedList; |
||
20 | use SilverStripe\View\ArrayData; |
||
21 | |||
22 | abstract class SolrIndex extends SearchIndex |
||
23 | { |
||
24 | public static $fulltextTypeMap = array( |
||
25 | '*' => 'text', |
||
26 | 'HTMLVarchar' => 'htmltext', |
||
27 | 'HTMLText' => 'htmltext' |
||
28 | ); |
||
29 | |||
30 | public static $filterTypeMap = array( |
||
31 | '*' => 'string', |
||
32 | 'Boolean' => 'boolean', |
||
33 | 'Date' => 'tdate', |
||
34 | 'SSDatetime' => 'tdate', |
||
35 | 'SS_Datetime' => 'tdate', |
||
36 | 'ForeignKey' => 'tint', |
||
37 | 'Int' => 'tint', |
||
38 | 'Float' => 'tfloat', |
||
39 | 'Double' => 'tdouble' |
||
40 | ); |
||
41 | |||
42 | public static $sortTypeMap = array(); |
||
43 | |||
44 | protected $analyzerFields = array(); |
||
45 | |||
46 | protected $copyFields = array(); |
||
47 | |||
48 | protected $extrasPath = null; |
||
49 | |||
50 | protected $templatesPath = null; |
||
51 | |||
52 | private static $casting = [ |
||
53 | 'FieldDefinitions' => 'HTMLText', |
||
54 | 'CopyFieldDefinitions' => 'HTMLText' |
||
55 | ]; |
||
56 | |||
57 | /** |
||
58 | * List of boosted fields |
||
59 | * |
||
60 | * @var array |
||
61 | */ |
||
62 | protected $boostedFields = array(); |
||
63 | |||
64 | /** |
||
65 | * Name of default field |
||
66 | * |
||
67 | * @var string |
||
68 | * @config |
||
69 | */ |
||
70 | private static $default_field = '_text'; |
||
71 | |||
72 | /** |
||
73 | * List of copy fields all fulltext fields should be copied into. |
||
74 | * This will fallback to default_field if not specified |
||
75 | * |
||
76 | * @var array |
||
77 | */ |
||
78 | private static $copy_fields = array(); |
||
79 | |||
80 | /** |
||
81 | * @return String Absolute path to the folder containing |
||
82 | * templates which are used for generating the schema and field definitions. |
||
83 | */ |
||
84 | public function getTemplatesPath() |
||
85 | { |
||
86 | $globalOptions = Solr::solr_options(); |
||
87 | $path = $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath']; |
||
88 | return rtrim($path, '/'); |
||
89 | } |
||
90 | |||
91 | /** |
||
92 | * @return String Absolute path to the configuration default files, |
||
93 | * e.g. solrconfig.xml. |
||
94 | */ |
||
95 | public function getExtrasPath() |
||
96 | { |
||
97 | $globalOptions = Solr::solr_options(); |
||
98 | return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath']; |
||
99 | } |
||
100 | |||
101 | public function generateSchema() |
||
102 | { |
||
103 | return $this->renderWith($this->getTemplatesPath() . '/schema.ss'); |
||
104 | } |
||
105 | |||
106 | /** |
||
107 | * Helper for returning the correct index name. Supports prefixing and |
||
108 | * suffixing |
||
109 | * |
||
110 | * @return string |
||
111 | */ |
||
112 | public function getIndexName() |
||
113 | { |
||
114 | $name = $this->sanitiseClassName(get_class($this), '-'); |
||
115 | |||
116 | $indexParts = [$name]; |
||
117 | |||
118 | if ($indexPrefix = Environment::getEnv('SS_SOLR_INDEX_PREFIX')) { |
||
119 | array_unshift($indexParts, $indexPrefix); |
||
120 | } |
||
121 | |||
122 | if ($indexSuffix = Environment::getEnv('SS_SOLR_INDEX_SUFFIX')) { |
||
123 | $indexParts[] = $indexSuffix; |
||
124 | } |
||
125 | |||
126 | return implode($indexParts); |
||
127 | } |
||
128 | |||
129 | public function getTypes() |
||
130 | { |
||
131 | return $this->renderWith($this->getTemplatesPath() . '/types.ss'); |
||
132 | } |
||
133 | |||
134 | /** |
||
135 | * Index-time analyzer which is applied to a specific field. |
||
136 | * Can be used to remove HTML tags, apply stemming, etc. |
||
137 | * |
||
138 | * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory |
||
139 | * |
||
140 | * @param string $field |
||
141 | * @param string $type |
||
142 | * @param array $params parameters for the analyzer, usually at least a "class" |
||
143 | */ |
||
144 | public function addAnalyzer($field, $type, $params) |
||
145 | { |
||
146 | $fullFields = $this->fieldData($field); |
||
147 | if ($fullFields) { |
||
148 | foreach ($fullFields as $fullField => $spec) { |
||
149 | if (!isset($this->analyzerFields[$fullField])) { |
||
150 | $this->analyzerFields[$fullField] = array(); |
||
151 | } |
||
152 | $this->analyzerFields[$fullField][$type] = $params; |
||
153 | } |
||
154 | } |
||
155 | } |
||
156 | |||
157 | /** |
||
158 | * Get the default text field, normally '_text' |
||
159 | * |
||
160 | * @return string |
||
161 | */ |
||
162 | public function getDefaultField() |
||
163 | { |
||
164 | return $this->config()->default_field; |
||
165 | } |
||
166 | |||
167 | /** |
||
168 | * Get list of fields each text field should be copied into. |
||
169 | * This will fallback to the default field if omitted. |
||
170 | * |
||
171 | * @return array |
||
172 | */ |
||
173 | protected function getCopyDestinations() |
||
174 | { |
||
175 | $copyFields = $this->config()->copy_fields; |
||
176 | if ($copyFields) { |
||
177 | return $copyFields; |
||
178 | } |
||
179 | // Fallback to default field |
||
180 | $df = $this->getDefaultField(); |
||
181 | return array($df); |
||
182 | } |
||
183 | |||
184 | public function getFieldDefinitions() |
||
185 | { |
||
186 | $xml = array(); |
||
187 | $stored = $this->getStoredDefault(); |
||
188 | |||
189 | $xml[] = ""; |
||
190 | |||
191 | // Add the hardcoded field definitions |
||
192 | |||
193 | $xml[] = "<field name='_documentid' type='string' indexed='true' stored='true' required='true' />"; |
||
194 | |||
195 | $xml[] = "<field name='ID' type='tint' indexed='true' stored='true' required='true' />"; |
||
196 | $xml[] = "<field name='ClassName' type='string' indexed='true' stored='true' required='true' />"; |
||
197 | $xml[] = "<field name='ClassHierarchy' type='string' indexed='true' stored='true' required='true' multiValued='true' />"; |
||
198 | |||
199 | // Add the fulltext collation field |
||
200 | |||
201 | $df = $this->getDefaultField(); |
||
202 | $xml[] = "<field name='{$df}' type='htmltext' indexed='true' stored='{$stored}' multiValued='true' />" ; |
||
203 | |||
204 | // Add the user-specified fields |
||
205 | |||
206 | foreach ($this->fulltextFields as $name => $field) { |
||
207 | $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap); |
||
208 | } |
||
209 | |||
210 | foreach ($this->filterFields as $name => $field) { |
||
211 | if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') { |
||
212 | continue; |
||
213 | } |
||
214 | $xml[] = $this->getFieldDefinition($name, $field); |
||
215 | } |
||
216 | |||
217 | foreach ($this->sortFields as $name => $field) { |
||
218 | if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') { |
||
219 | continue; |
||
220 | } |
||
221 | $xml[] = $this->getFieldDefinition($name, $field); |
||
222 | } |
||
223 | |||
224 | return implode("\n\t\t", $xml); |
||
225 | } |
||
226 | |||
227 | /** |
||
228 | * Extract first suggestion text from collated values |
||
229 | * |
||
230 | * @param mixed $collation |
||
231 | * @return string |
||
232 | */ |
||
233 | protected function getCollatedSuggestion($collation = '') |
||
234 | { |
||
235 | if (is_string($collation)) { |
||
236 | return $collation; |
||
237 | } |
||
238 | if (is_object($collation)) { |
||
239 | if (isset($collation->misspellingsAndCorrections)) { |
||
240 | foreach ($collation->misspellingsAndCorrections as $key => $value) { |
||
241 | return $value; |
||
242 | } |
||
243 | } |
||
244 | } |
||
245 | return ''; |
||
246 | } |
||
247 | |||
248 | /** |
||
249 | * Extract a human friendly spelling suggestion from a Solr spellcheck collation string. |
||
250 | * @param string $collation |
||
251 | * @return String |
||
252 | */ |
||
253 | protected function getNiceSuggestion($collation = '') |
||
254 | { |
||
255 | $collationParts = explode(' ', $collation); |
||
256 | |||
257 | // Remove advanced query params from the beginning of each collation part. |
||
258 | foreach ($collationParts as $key => &$part) { |
||
259 | $part = ltrim($part, '+'); |
||
260 | } |
||
261 | |||
262 | return implode(' ', $collationParts); |
||
263 | } |
||
264 | |||
265 | /** |
||
266 | * Extract a query string from a Solr spellcheck collation string. |
||
267 | * Useful for constructing 'Did you mean?' links, for example: |
||
268 | * <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> |
||
269 | * @param string $collation |
||
270 | * @return String |
||
271 | */ |
||
272 | protected function getSuggestionQueryString($collation = '') |
||
273 | { |
||
274 | return str_replace(' ', '+', $this->getNiceSuggestion($collation)); |
||
275 | } |
||
276 | |||
277 | /** |
||
278 | * Add a field that should be stored |
||
279 | * |
||
280 | * @param string $field The field to add |
||
281 | * @param string $forceType The type to force this field as (required in some cases, when not |
||
282 | * detectable from metadata) |
||
283 | * @param array $extraOptions Dependent on search implementation |
||
284 | */ |
||
285 | public function addStoredField($field, $forceType = null, $extraOptions = array()) |
||
286 | { |
||
287 | $options = array_merge($extraOptions, array('stored' => 'true')); |
||
288 | $this->addFulltextField($field, $forceType, $options); |
||
289 | } |
||
290 | |||
291 | /** |
||
292 | * Add a fulltext field with a boosted value |
||
293 | * |
||
294 | * @param string $field The field to add |
||
295 | * @param string $forceType The type to force this field as (required in some cases, when not |
||
296 | * detectable from metadata) |
||
297 | * @param array $extraOptions Dependent on search implementation |
||
298 | * @param float $boost Numeric boosting value (defaults to 2) |
||
299 | */ |
||
300 | public function addBoostedField($field, $forceType = null, $extraOptions = array(), $boost = 2) |
||
301 | { |
||
302 | $options = array_merge($extraOptions, array('boost' => $boost)); |
||
303 | $this->addFulltextField($field, $forceType, $options); |
||
304 | } |
||
305 | |||
306 | |||
307 | public function fieldData($field, $forceType = null, $extraOptions = array()) |
||
308 | { |
||
309 | // Ensure that 'boost' is recorded here without being captured by solr |
||
310 | $boost = null; |
||
311 | if (array_key_exists('boost', $extraOptions)) { |
||
312 | $boost = $extraOptions['boost']; |
||
313 | unset($extraOptions['boost']); |
||
314 | } |
||
315 | $data = parent::fieldData($field, $forceType, $extraOptions); |
||
316 | |||
317 | // Boost all fields with this name |
||
318 | if (isset($boost)) { |
||
319 | foreach ($data as $fieldName => $fieldInfo) { |
||
320 | $this->boostedFields[$fieldName] = $boost; |
||
321 | } |
||
322 | } |
||
323 | return $data; |
||
324 | } |
||
325 | |||
326 | /** |
||
327 | * Set the default boosting level for a specific field. |
||
328 | * Will control the default value for qf param (Query Fields), but will not |
||
329 | * override a query-specific value. |
||
330 | * |
||
331 | * Fields must be added before having a field boosting specified |
||
332 | * |
||
333 | * @param string $field Full field key (Model_Field) |
||
334 | * @param float|null $level Numeric boosting value. Set to null to clear boost |
||
335 | */ |
||
336 | public function setFieldBoosting($field, $level) |
||
337 | { |
||
338 | if (!isset($this->fulltextFields[$field])) { |
||
339 | throw new \InvalidArgumentException("No fulltext field $field exists on " . $this->getIndexName()); |
||
340 | } |
||
341 | if ($level === null) { |
||
342 | unset($this->boostedFields[$field]); |
||
343 | } else { |
||
344 | $this->boostedFields[$field] = $level; |
||
345 | } |
||
346 | } |
||
347 | |||
348 | /** |
||
349 | * Get all boosted fields |
||
350 | * |
||
351 | * @return array |
||
352 | */ |
||
353 | public function getBoostedFields() |
||
354 | { |
||
355 | return $this->boostedFields; |
||
356 | } |
||
357 | |||
358 | /** |
||
359 | * Determine the best default value for the 'qf' parameter |
||
360 | * |
||
361 | * @return array|null List of query fields, or null if not specified |
||
362 | */ |
||
363 | public function getQueryFields() |
||
364 | { |
||
365 | // Not necessary to specify this unless boosting |
||
366 | if (empty($this->boostedFields)) { |
||
367 | return null; |
||
368 | } |
||
369 | $queryFields = array(); |
||
370 | foreach ($this->boostedFields as $fieldName => $boost) { |
||
371 | $queryFields[] = $fieldName . '^' . $boost; |
||
372 | } |
||
373 | |||
374 | // If any fields are queried, we must always include the default field, otherwise it will be excluded |
||
375 | $df = $this->getDefaultField(); |
||
376 | if ($queryFields && !isset($this->boostedFields[$df])) { |
||
377 | $queryFields[] = $df; |
||
378 | } |
||
379 | |||
380 | return $queryFields; |
||
381 | } |
||
382 | |||
383 | /** |
||
384 | * Gets the default 'stored' value for fields in this index |
||
385 | * |
||
386 | * @return string A default value for the 'stored' field option, either 'true' or 'false' |
||
387 | */ |
||
388 | protected function getStoredDefault() |
||
389 | { |
||
390 | return Director::isDev() ? 'true' : 'false'; |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * @param string $name |
||
395 | * @param Array $spec |
||
396 | * @param Array $typeMap |
||
397 | * @return String XML |
||
398 | */ |
||
399 | protected function getFieldDefinition($name, $spec, $typeMap = null) |
||
400 | { |
||
401 | if (!$typeMap) { |
||
402 | $typeMap = self::$filterTypeMap; |
||
403 | } |
||
404 | $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : ''; |
||
405 | $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*']; |
||
406 | |||
407 | $analyzerXml = ''; |
||
408 | if (isset($this->analyzerFields[$name])) { |
||
409 | foreach ($this->analyzerFields[$name] as $analyzerType => $analyzerParams) { |
||
410 | $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams); |
||
411 | } |
||
412 | } |
||
413 | |||
414 | $fieldParams = array_merge( |
||
415 | array( |
||
416 | 'name' => $name, |
||
417 | 'type' => $type, |
||
418 | 'indexed' => 'true', |
||
419 | 'stored' => $this->getStoredDefault(), |
||
420 | 'multiValued' => $multiValued |
||
421 | ), |
||
422 | isset($spec['extra_options']) ? $spec['extra_options'] : array() |
||
423 | ); |
||
424 | |||
425 | return $this->toXmlTag( |
||
426 | "field", |
||
427 | $fieldParams, |
||
428 | $analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null |
||
429 | ); |
||
430 | } |
||
431 | |||
432 | /** |
||
433 | * Convert definition to XML tag |
||
434 | * |
||
435 | * @param string $tag |
||
436 | * @param string $attrs Map of attributes |
||
437 | * @param string $content Inner content |
||
438 | * @return String XML tag |
||
439 | */ |
||
440 | protected function toXmlTag($tag, $attrs, $content = null) |
||
441 | { |
||
442 | $xml = "<$tag "; |
||
443 | if ($attrs) { |
||
444 | $attrStrs = array(); |
||
445 | foreach ($attrs as $attrName => $attrVal) { |
||
446 | $attrStrs[] = "$attrName='$attrVal'"; |
||
447 | } |
||
448 | $xml .= $attrStrs ? implode(' ', $attrStrs) : ''; |
||
449 | } |
||
450 | $xml .= $content ? ">$content</$tag>" : '/>'; |
||
451 | return $xml; |
||
452 | } |
||
453 | |||
454 | /** |
||
455 | * @param string $source Composite field name (<class>_<fieldname>) |
||
456 | * @param string $dest |
||
457 | */ |
||
458 | public function addCopyField($source, $dest, $extraOptions = array()) |
||
459 | { |
||
460 | if (!isset($this->copyFields[$source])) { |
||
461 | $this->copyFields[$source] = array(); |
||
462 | } |
||
463 | $this->copyFields[$source][] = array_merge( |
||
464 | array('source' => $source, 'dest' => $dest), |
||
465 | $extraOptions |
||
466 | ); |
||
467 | } |
||
468 | |||
469 | /** |
||
470 | * Generate XML for copy field definitions |
||
471 | * |
||
472 | * @return string |
||
473 | */ |
||
474 | public function getCopyFieldDefinitions() |
||
475 | { |
||
476 | $xml = array(); |
||
477 | |||
478 | // Default copy fields |
||
479 | foreach ($this->getCopyDestinations() as $copyTo) { |
||
480 | foreach ($this->fulltextFields as $name => $field) { |
||
481 | $xml[] = "<copyField source='{$name}' dest='{$copyTo}' />"; |
||
482 | } |
||
483 | } |
||
484 | |||
485 | // Explicit copy fields |
||
486 | foreach ($this->copyFields as $source => $fields) { |
||
487 | foreach ($fields as $fieldAttrs) { |
||
488 | $xml[] = $this->toXmlTag('copyField', $fieldAttrs); |
||
489 | } |
||
490 | } |
||
491 | |||
492 | return implode("\n\t", $xml); |
||
493 | } |
||
494 | |||
495 | /** |
||
496 | * Determine if the given object is one of the given type |
||
497 | * |
||
498 | * @param string $class |
||
499 | * @param array|string $base Class or list of base classes |
||
500 | * @return bool |
||
501 | */ |
||
502 | protected function classIs($class, $base) |
||
503 | { |
||
504 | if (is_array($base)) { |
||
505 | foreach ($base as $nextBase) { |
||
506 | if ($this->classIs($class, $nextBase)) { |
||
507 | return true; |
||
508 | } |
||
509 | } |
||
510 | return false; |
||
511 | } |
||
512 | |||
513 | // Check single origin |
||
514 | return $class === $base || is_subclass_of($class, $base); |
||
515 | } |
||
516 | |||
517 | protected function _addField($doc, $object, $field) |
||
518 | { |
||
519 | $class = get_class($object); |
||
520 | if (!$this->classIs($class, $field['origin'])) { |
||
521 | return; |
||
522 | } |
||
523 | |||
524 | $value = $this->_getFieldValue($object, $field); |
||
525 | |||
526 | $type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*']; |
||
527 | |||
528 | if (is_array($value)) { |
||
529 | foreach ($value as $sub) { |
||
530 | /* Solr requires dates in the form 1995-12-31T23:59:59Z */ |
||
531 | if ($type == 'tdate') { |
||
532 | if (!$sub) { |
||
533 | continue; |
||
534 | } |
||
535 | $sub = gmdate('Y-m-d\TH:i:s\Z', strtotime($sub)); |
||
536 | } |
||
537 | |||
538 | /* Solr requires numbers to be valid if presented, not just empty */ |
||
539 | if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($sub)) { |
||
540 | continue; |
||
541 | } |
||
542 | |||
543 | $doc->addField($field['name'], $sub); |
||
544 | } |
||
545 | } else { |
||
546 | /* Solr requires dates in the form 1995-12-31T23:59:59Z */ |
||
547 | if ($type == 'tdate') { |
||
548 | if (!$value) { |
||
549 | return; |
||
550 | } |
||
551 | $value = gmdate('Y-m-d\TH:i:s\Z', strtotime($value)); |
||
552 | } |
||
553 | |||
554 | /* Solr requires numbers to be valid if presented, not just empty */ |
||
555 | if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($value)) { |
||
556 | return; |
||
557 | } |
||
558 | |||
559 | // Only index fields that are not null |
||
560 | if ($value !== null) { |
||
561 | $doc->setField($field['name'], $value); |
||
562 | } |
||
563 | } |
||
564 | } |
||
565 | |||
566 | protected function _addAs($object, $base, $options) |
||
567 | { |
||
568 | $includeSubs = $options['include_children']; |
||
569 | |||
570 | $doc = new \Apache_Solr_Document(); |
||
571 | |||
572 | // Always present fields |
||
573 | |||
574 | $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs)); |
||
575 | $doc->setField('ID', $object->ID); |
||
576 | $doc->setField('ClassName', $object->ClassName); |
||
577 | |||
578 | foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) { |
||
579 | $doc->addField('ClassHierarchy', $class); |
||
580 | } |
||
581 | |||
582 | // Add the user-specified fields |
||
583 | |||
584 | foreach ($this->getFieldsIterator() as $name => $field) { |
||
585 | if ($field['base'] === $base || (is_array($field['base']) && in_array($base, $field['base']))) { |
||
586 | $this->_addField($doc, $object, $field); |
||
587 | } |
||
588 | } |
||
589 | |||
590 | try { |
||
591 | $this->getService()->addDocument($doc); |
||
592 | } catch (Exception $e) { |
||
593 | static::warn($e); |
||
594 | return false; |
||
595 | } |
||
596 | |||
597 | return $doc; |
||
598 | } |
||
599 | |||
600 | public function add($object) |
||
601 | { |
||
602 | $class = get_class($object); |
||
603 | $docs = array(); |
||
604 | |||
605 | foreach ($this->getClasses() as $searchclass => $options) { |
||
606 | if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) { |
||
607 | $base = DataObject::getSchema()->baseDataClass($searchclass); |
||
608 | $docs[] = $this->_addAs($object, $base, $options); |
||
609 | } |
||
610 | } |
||
611 | |||
612 | return $docs; |
||
613 | } |
||
614 | |||
615 | public function canAdd($class) |
||
616 | { |
||
617 | foreach ($this->classes as $searchclass => $options) { |
||
618 | if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) { |
||
619 | return true; |
||
620 | } |
||
621 | } |
||
622 | |||
623 | return false; |
||
624 | } |
||
625 | |||
626 | public function delete($base, $id, $state) |
||
627 | { |
||
628 | $documentID = $this->getDocumentIDForState($base, $id, $state); |
||
629 | |||
630 | try { |
||
631 | $this->getService()->deleteById($documentID); |
||
632 | } catch (Exception $e) { |
||
633 | static::warn($e); |
||
634 | return false; |
||
635 | } |
||
636 | |||
637 | return true; |
||
638 | } |
||
639 | |||
640 | /** |
||
641 | * Clear all records which do not match the given classname whitelist. |
||
642 | * |
||
643 | * Can also be used to trim an index when reducing to a narrower set of classes. |
||
644 | * |
||
645 | * Ignores current state / variant. |
||
646 | * |
||
647 | * @param array $classes List of non-obsolete classes in the same format as SolrIndex::getClasses() |
||
648 | * @return bool Flag if successful |
||
649 | * @throws \Apache_Solr_HttpTransportException |
||
650 | */ |
||
651 | public function clearObsoleteClasses($classes) |
||
652 | { |
||
653 | if (empty($classes)) { |
||
654 | return false; |
||
655 | } |
||
656 | |||
657 | // Delete all records which do not match the necessary classname rules |
||
658 | $conditions = array(); |
||
659 | foreach ($classes as $class => $options) { |
||
660 | if ($options['include_children']) { |
||
661 | $conditions[] = "ClassHierarchy:{$class}"; |
||
662 | } else { |
||
663 | $conditions[] = "ClassName:{$class}"; |
||
664 | } |
||
665 | } |
||
666 | |||
667 | // Delete records which don't match any of these conditions in this index |
||
668 | $deleteQuery = "-(" . implode(' ', $conditions) . ")"; |
||
669 | $this |
||
670 | ->getService() |
||
671 | ->deleteByQuery($deleteQuery); |
||
672 | return true; |
||
673 | } |
||
674 | |||
675 | public function commit() |
||
676 | { |
||
677 | try { |
||
678 | $this->getService()->commit(false, false, false); |
||
679 | } catch (Exception $e) { |
||
680 | static::warn($e); |
||
681 | return false; |
||
682 | } |
||
683 | |||
684 | return true; |
||
685 | } |
||
686 | |||
687 | /** |
||
688 | * @param SearchQuery $query |
||
689 | * @param integer $offset |
||
690 | * @param integer $limit |
||
691 | * @param array $params Extra request parameters passed through to Solr |
||
692 | * @return ArrayData Map with the following keys: |
||
693 | * - 'Matches': ArrayList of the matched object instances |
||
694 | * @throws \Apache_Solr_HttpTransportException |
||
695 | * @throws \Apache_Solr_InvalidArgumentException |
||
696 | */ |
||
697 | public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) |
||
698 | { |
||
699 | $service = $this->getService(); |
||
700 | $this->applySearchVariants($query); |
||
701 | |||
702 | $q = array(); // Query |
||
703 | $fq = array(); // Filter query |
||
704 | $qf = array(); // Query fields |
||
705 | $hlq = array(); // Highlight query |
||
706 | |||
707 | // Build the search itself |
||
708 | $q = $this->getQueryComponent($query, $hlq); |
||
709 | |||
710 | // If using boosting, set the clean term separately for highlighting. |
||
711 | // See https://issues.apache.org/jira/browse/SOLR-2632 |
||
712 | if (array_key_exists('hl', $params) && !array_key_exists('hl.q', $params)) { |
||
713 | $params['hl.q'] = implode(' ', $hlq); |
||
714 | } |
||
715 | |||
716 | // Filter by class if requested |
||
717 | $classq = array(); |
||
718 | foreach ($query->classes as $class) { |
||
719 | if (!empty($class['includeSubclasses'])) { |
||
720 | $classq[] = 'ClassHierarchy:' . $this->sanitiseClassName($class['class']); |
||
721 | } else { |
||
722 | $classq[] = 'ClassName:' . $this->sanitiseClassName($class['class']); |
||
723 | } |
||
724 | } |
||
725 | if ($classq) { |
||
726 | $fq[] = '+(' . implode(' ', $classq) . ')'; |
||
727 | } |
||
728 | |||
729 | // Filter by filters |
||
730 | $fq = array_merge($fq, $this->getFiltersComponent($query)); |
||
731 | |||
732 | // Prepare query fields unless specified explicitly |
||
733 | if (isset($params['qf'])) { |
||
734 | $qf = $params['qf']; |
||
735 | } else { |
||
736 | $qf = $this->getQueryFields(); |
||
737 | } |
||
738 | if (is_array($qf)) { |
||
739 | $qf = implode(' ', $qf); |
||
740 | } |
||
741 | if ($qf) { |
||
742 | $params['qf'] = $qf; |
||
743 | } |
||
744 | |||
745 | if (!headers_sent() && Director::isDev()) { |
||
746 | if ($q) { |
||
747 | header('X-Query: ' . implode(' ', $q)); |
||
748 | } |
||
749 | if ($fq) { |
||
750 | header('X-Filters: "' . implode('", "', $fq) . '"'); |
||
751 | } |
||
752 | if ($qf) { |
||
753 | header('X-QueryFields: ' . $qf); |
||
754 | } |
||
755 | } |
||
756 | |||
757 | if ($offset == -1) { |
||
758 | $offset = $query->start; |
||
759 | } |
||
760 | if ($limit == -1) { |
||
761 | $limit = $query->limit; |
||
762 | } |
||
763 | if ($limit == -1) { |
||
764 | $limit = SearchQuery::$default_page_size; |
||
765 | } |
||
766 | |||
767 | $params = array_merge($params, array('fq' => implode(' ', $fq))); |
||
768 | |||
769 | $res = $service->search( |
||
770 | $q ? implode(' ', $q) : '*:*', |
||
771 | $offset, |
||
772 | $limit, |
||
773 | $params, |
||
774 | \Apache_Solr_Service::METHOD_POST |
||
775 | ); |
||
776 | |||
777 | $results = new ArrayList(); |
||
778 | if ($res->getHttpStatus() >= 200 && $res->getHttpStatus() < 300) { |
||
779 | foreach ($res->response->docs as $doc) { |
||
780 | $result = DataObject::get_by_id($doc->ClassName, $doc->ID); |
||
781 | if ($result) { |
||
782 | $results->push($result); |
||
783 | |||
784 | // Add highlighting (optional) |
||
785 | $docId = $doc->_documentid; |
||
786 | if ($res->highlighting && $res->highlighting->$docId) { |
||
787 | // TODO Create decorator class for search results rather than adding arbitrary object properties |
||
788 | // TODO Allow specifying highlighted field, and lazy loading |
||
789 | // in case the search API needs another query (similar to SphinxSearchable->buildExcerpt()). |
||
790 | $combinedHighlights = array(); |
||
791 | foreach ($res->highlighting->$docId as $field => $highlights) { |
||
792 | $combinedHighlights = array_merge($combinedHighlights, $highlights); |
||
793 | } |
||
794 | |||
795 | // Remove entity-encoded U+FFFD replacement character. It signifies non-displayable characters, |
||
796 | // and shows up as an encoding error in browsers. |
||
797 | $result->Excerpt = DBField::create_field( |
||
798 | 'HTMLText', |
||
799 | str_replace( |
||
800 | '�', |
||
801 | '', |
||
802 | implode(' ... ', $combinedHighlights) |
||
803 | ) |
||
804 | ); |
||
805 | } |
||
806 | } |
||
807 | } |
||
808 | $numFound = $res->response->numFound; |
||
809 | } else { |
||
810 | $numFound = 0; |
||
811 | } |
||
812 | |||
813 | $ret = array(); |
||
814 | $ret['Matches'] = new PaginatedList($results); |
||
815 | $ret['Matches']->setLimitItems(false); |
||
816 | // Tell PaginatedList how many results there are |
||
817 | $ret['Matches']->setTotalItems($numFound); |
||
818 | // Results for current page start at $offset |
||
819 | $ret['Matches']->setPageStart($offset); |
||
820 | // Results per page |
||
821 | $ret['Matches']->setPageLength($limit); |
||
822 | |||
823 | // Include spellcheck and suggestion data. Requires spellcheck=true in $params |
||
824 | if (isset($res->spellcheck)) { |
||
825 | // Expose all spellcheck data, for custom handling. |
||
826 | $ret['Spellcheck'] = $res->spellcheck; |
||
827 | |||
828 | // Suggestions. Requires spellcheck.collate=true in $params |
||
829 | if (isset($res->spellcheck->suggestions->collation)) { |
||
830 | // Extract string suggestion |
||
831 | $suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation); |
||
832 | |||
833 | // The collation, including advanced query params (e.g. +), suitable for making another query |
||
834 | // programmatically. |
||
835 | $ret['Suggestion'] = $suggestion; |
||
836 | |||
837 | // A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display. |
||
838 | $ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion); |
||
839 | |||
840 | // A string suitable for appending to an href as a query string. |
||
841 | // For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> |
||
842 | $ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion); |
||
843 | } |
||
844 | } |
||
845 | |||
846 | $ret = new ArrayData($ret); |
||
847 | |||
848 | // Enable extensions to add extra data from the response into |
||
849 | // the returned results set. |
||
850 | $this->extend('updateSearchResults', $ret, $res); |
||
851 | |||
852 | return $ret; |
||
853 | } |
||
854 | |||
855 | /** |
||
856 | * With a common set of variants that are relevant to at least one class in the list (from either the query or |
||
857 | * the current index), allow them to alter the query to add their variant column conditions. |
||
858 | * |
||
859 | * @param SearchQuery $query |
||
860 | */ |
||
861 | protected function applySearchVariants(SearchQuery $query) |
||
862 | { |
||
863 | $classes = count($query->classes) ? $query->classes : $this->getClasses(); |
||
864 | |||
865 | /** @var SearchVariant_Caller $variantCaller */ |
||
866 | $variantCaller = SearchVariant::withCommon($classes); |
||
867 | $variantCaller->call('alterQuery', $query, $this); |
||
868 | } |
||
869 | |||
870 | /** |
||
871 | * Solr requires namespaced classes to have double escaped backslashes |
||
872 | * |
||
873 | * @param string $className E.g. My\Object\Here |
||
874 | * @param string $replaceWith The replacement character(s) to use |
||
875 | * @return string E.g. My\\Object\\Here |
||
876 | */ |
||
877 | public function sanitiseClassName($className, $replaceWith = '\\\\') |
||
878 | { |
||
879 | return str_replace('\\', $replaceWith, $className); |
||
880 | } |
||
881 | |||
882 | /** |
||
883 | * Get the query (q) component for this search |
||
884 | * |
||
885 | * @param SearchQuery $searchQuery |
||
886 | * @param array &$hlq Highlight query returned by reference |
||
887 | * @return array |
||
888 | */ |
||
889 | protected function getQueryComponent(SearchQuery $searchQuery, &$hlq = array()) |
||
890 | { |
||
891 | $q = array(); |
||
892 | foreach ($searchQuery->search as $search) { |
||
893 | $text = $search['text']; |
||
894 | preg_match_all('/"[^"]*"|\S+/', $text, $parts); |
||
895 | |||
896 | $fuzzy = $search['fuzzy'] ? '~' : ''; |
||
897 | |||
898 | foreach ($parts[0] as $part) { |
||
899 | $fields = (isset($search['fields'])) ? $search['fields'] : array(); |
||
900 | if (isset($search['boost'])) { |
||
901 | $fields = array_merge($fields, array_keys($search['boost'])); |
||
902 | } |
||
903 | if ($fields) { |
||
904 | $searchq = array(); |
||
905 | foreach ($fields as $field) { |
||
906 | // Escape namespace separators in class names |
||
907 | $field = $this->sanitiseClassName($field); |
||
908 | |||
909 | $boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : ''; |
||
910 | $searchq[] = "{$field}:" . $part . $fuzzy . $boost; |
||
911 | } |
||
912 | $q[] = '+(' . implode(' OR ', $searchq) . ')'; |
||
913 | } else { |
||
914 | $q[] = '+' . $part . $fuzzy; |
||
915 | } |
||
916 | $hlq[] = $part; |
||
917 | } |
||
918 | } |
||
919 | return $q; |
||
920 | } |
||
921 | |||
922 | /** |
||
923 | * Parse all require constraints for inclusion in a filter query |
||
924 | * |
||
925 | * @param SearchQuery $searchQuery |
||
926 | * @return array List of parsed string values for each require |
||
927 | */ |
||
928 | protected function getRequireFiltersComponent(SearchQuery $searchQuery) |
||
929 | { |
||
930 | $fq = array(); |
||
931 | foreach ($searchQuery->require as $field => $values) { |
||
932 | $requireq = array(); |
||
933 | |||
934 | foreach ($values as $value) { |
||
935 | if ($value === SearchQuery::$missing) { |
||
936 | $requireq[] = "(*:* -{$field}:[* TO *])"; |
||
937 | } elseif ($value === SearchQuery::$present) { |
||
938 | $requireq[] = "{$field}:[* TO *]"; |
||
939 | } elseif ($value instanceof SearchQuery_Range) { |
||
940 | $start = $value->start; |
||
941 | if ($start === null) { |
||
942 | $start = '*'; |
||
943 | } |
||
944 | $end = $value->end; |
||
945 | if ($end === null) { |
||
946 | $end = '*'; |
||
947 | } |
||
948 | $requireq[] = "$field:[$start TO $end]"; |
||
949 | } else { |
||
950 | $requireq[] = $field . ':"' . $value . '"'; |
||
951 | } |
||
952 | } |
||
953 | |||
954 | $fq[] = '+(' . implode(' ', $requireq) . ')'; |
||
955 | } |
||
956 | return $fq; |
||
957 | } |
||
958 | |||
959 | /** |
||
960 | * Parse all exclude constraints for inclusion in a filter query |
||
961 | * |
||
962 | * @param SearchQuery $searchQuery |
||
963 | * @return array List of parsed string values for each exclusion |
||
964 | */ |
||
965 | protected function getExcludeFiltersComponent(SearchQuery $searchQuery) |
||
966 | { |
||
967 | $fq = array(); |
||
968 | foreach ($searchQuery->exclude as $field => $values) { |
||
969 | // Handle namespaced class names |
||
970 | $field = $this->sanitiseClassName($field); |
||
971 | |||
972 | $excludeq = []; |
||
973 | $missing = false; |
||
974 | |||
975 | foreach ($values as $value) { |
||
976 | if ($value === SearchQuery::$missing) { |
||
977 | $missing = true; |
||
978 | } elseif ($value === SearchQuery::$present) { |
||
979 | $excludeq[] = "{$field}:[* TO *]"; |
||
980 | } elseif ($value instanceof SearchQuery_Range) { |
||
981 | $start = $value->start; |
||
982 | if ($start === null) { |
||
983 | $start = '*'; |
||
984 | } |
||
985 | $end = $value->end; |
||
986 | if ($end === null) { |
||
987 | $end = '*'; |
||
988 | } |
||
989 | $excludeq[] = "$field:[$start TO $end]"; |
||
990 | } else { |
||
991 | $excludeq[] = $field . ':"' . $value . '"'; |
||
992 | } |
||
993 | } |
||
994 | |||
995 | $fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-(' . implode(' ', $excludeq) . ')'; |
||
996 | } |
||
997 | return $fq; |
||
998 | } |
||
999 | |||
1000 | /** |
||
1001 | * @param SearchQuery $searchQuery |
||
1002 | * @return string |
||
1003 | * @throws \Exception |
||
1004 | */ |
||
1005 | protected function getCriteriaComponent(SearchQuery $searchQuery) |
||
1006 | { |
||
1007 | if (count($searchQuery->getCriteria()) === 0) { |
||
1008 | return null; |
||
1009 | } |
||
1010 | |||
1011 | if ($searchQuery->getAdapter() === null) { |
||
1012 | throw new \Exception('SearchQuery does not have a SearchAdapter applied'); |
||
1013 | } |
||
1014 | |||
1015 | // Need to start with a positive conjunction. |
||
1016 | $ps = $searchQuery->getAdapter()->getPrependToCriteriaComponent(); |
||
1017 | |||
1018 | foreach ($searchQuery->getCriteria() as $clause) { |
||
1019 | $clause->setAdapter($searchQuery->getAdapter()); |
||
1020 | $clause->appendPreparedStatementTo($ps); |
||
1021 | } |
||
1022 | |||
1023 | // Need to start with a positive conjunction. |
||
1024 | $ps .= $searchQuery->getAdapter()->getAppendToCriteriaComponent(); |
||
1025 | |||
1026 | // Returned as an array because that's how `getFiltersComponent` expects it. |
||
1027 | return $ps; |
||
1028 | } |
||
1029 | |||
1030 | /** |
||
1031 | * Get all filter conditions for this search |
||
1032 | * |
||
1033 | * @param SearchQuery $searchQuery |
||
1034 | * @return array |
||
1035 | * @throws \Exception |
||
1036 | */ |
||
1037 | public function getFiltersComponent(SearchQuery $searchQuery) |
||
1038 | { |
||
1039 | $criteriaComponent = $this->getCriteriaComponent($searchQuery); |
||
1040 | |||
1041 | $components = array_merge( |
||
1042 | $this->getRequireFiltersComponent($searchQuery), |
||
1043 | $this->getExcludeFiltersComponent($searchQuery) |
||
1044 | ); |
||
1045 | |||
1046 | if ($criteriaComponent !== null) { |
||
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
1047 | $components[] = $criteriaComponent; |
||
1048 | } |
||
1049 | |||
1050 | return $components; |
||
1051 | } |
||
1052 | |||
1053 | protected $service; |
||
1054 | |||
1055 | /** |
||
1056 | * @return SolrService |
||
1057 | */ |
||
1058 | public function getService() |
||
1059 | { |
||
1060 | if (!$this->service) { |
||
1061 | $this->service = Solr::service(get_class($this)); |
||
1062 | } |
||
1063 | return $this->service; |
||
1064 | } |
||
1065 | |||
1066 | public function setService(SolrService $service) |
||
1067 | { |
||
1068 | $this->service = $service; |
||
1069 | return $this; |
||
1070 | } |
||
1071 | |||
1072 | /** |
||
1073 | * Upload config for this index to the given store |
||
1074 | * |
||
1075 | * @param SolrConfigStore $store |
||
1076 | */ |
||
1077 | public function uploadConfig($store) |
||
1078 | { |
||
1079 | // Upload the config files for this index |
||
1080 | $store->uploadString( |
||
1081 | $this->getIndexName(), |
||
1082 | 'schema.xml', |
||
1083 | (string)$this->generateSchema() |
||
1084 | ); |
||
1085 | |||
1086 | // Upload additional files |
||
1087 | foreach (glob($this->getExtrasPath() . '/*') as $file) { |
||
1088 | if (is_file($file)) { |
||
1089 | $store->uploadFile($this->getIndexName(), $file); |
||
1090 | } |
||
1091 | } |
||
1092 | } |
||
1093 | } |
||
1094 |