Total Complexity | 187 |
Total Lines | 1078 |
Duplicated Lines | 0 % |
Changes | 7 | ||
Bugs | 0 | Features | 0 |
Complex classes like SolrIndex often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use SolrIndex, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
23 | abstract class SolrIndex extends SearchIndex |
||
24 | { |
||
25 | public static $fulltextTypeMap = array( |
||
26 | '*' => 'text', |
||
27 | 'HTMLVarchar' => 'htmltext', |
||
28 | 'HTMLText' => 'htmltext' |
||
29 | ); |
||
30 | |||
31 | public static $filterTypeMap = array( |
||
32 | '*' => 'string', |
||
33 | 'Boolean' => 'boolean', |
||
34 | 'Date' => 'tdate', |
||
35 | 'Datetime' => 'tdate', |
||
36 | 'DBDate' => 'tdate', |
||
37 | 'DBDatetime' => 'tdate', |
||
38 | 'SSDatetime' => 'tdate', |
||
39 | 'SS_Datetime' => 'tdate', |
||
40 | 'ForeignKey' => 'tint', |
||
41 | 'Int' => 'tint', |
||
42 | 'Float' => 'tfloat', |
||
43 | 'Double' => 'tdouble' |
||
44 | ); |
||
45 | |||
46 | public static $sortTypeMap = array(); |
||
47 | |||
48 | protected $analyzerFields = array(); |
||
49 | |||
50 | protected $copyFields = array(); |
||
51 | |||
52 | protected $extrasPath = null; |
||
53 | |||
54 | protected $templatesPath = null; |
||
55 | |||
56 | private static $casting = [ |
||
57 | 'FieldDefinitions' => 'HTMLText', |
||
58 | 'CopyFieldDefinitions' => 'HTMLText' |
||
59 | ]; |
||
60 | |||
61 | /** |
||
62 | * List of boosted fields |
||
63 | * |
||
64 | * @var array |
||
65 | */ |
||
66 | protected $boostedFields = array(); |
||
67 | |||
68 | /** |
||
69 | * Name of default field |
||
70 | * |
||
71 | * @var string |
||
72 | * @config |
||
73 | */ |
||
74 | private static $default_field = '_text'; |
||
75 | |||
76 | /** |
||
77 | * List of copy fields all fulltext fields should be copied into. |
||
78 | * This will fallback to default_field if not specified |
||
79 | * |
||
80 | * @var array |
||
81 | */ |
||
82 | private static $copy_fields = array(); |
||
83 | |||
84 | /** |
||
85 | * @return String Absolute path to the folder containing |
||
86 | * templates which are used for generating the schema and field definitions. |
||
87 | */ |
||
88 | public function getTemplatesPath() |
||
89 | { |
||
90 | $globalOptions = Solr::solr_options(); |
||
91 | $path = $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath']; |
||
92 | return rtrim($path, '/'); |
||
93 | } |
||
94 | |||
95 | /** |
||
96 | * @return String Absolute path to the configuration default files, |
||
97 | * e.g. solrconfig.xml. |
||
98 | */ |
||
99 | public function getExtrasPath() |
||
100 | { |
||
101 | $globalOptions = Solr::solr_options(); |
||
102 | return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath']; |
||
103 | } |
||
104 | |||
105 | public function generateSchema() |
||
106 | { |
||
107 | return $this->renderWith($this->getTemplatesPath() . '/schema.ss'); |
||
108 | } |
||
109 | |||
110 | /** |
||
111 | * Helper for returning the correct index name. Supports prefixing and |
||
112 | * suffixing |
||
113 | * |
||
114 | * @return string |
||
115 | */ |
||
116 | public function getIndexName() |
||
117 | { |
||
118 | $name = $this->sanitiseClassName(get_class($this), '-'); |
||
119 | |||
120 | $indexParts = [$name]; |
||
121 | |||
122 | if ($indexPrefix = Environment::getEnv('SS_SOLR_INDEX_PREFIX')) { |
||
123 | array_unshift($indexParts, $indexPrefix); |
||
124 | } |
||
125 | |||
126 | if ($indexSuffix = Environment::getEnv('SS_SOLR_INDEX_SUFFIX')) { |
||
127 | $indexParts[] = $indexSuffix; |
||
128 | } |
||
129 | |||
130 | return implode($indexParts); |
||
131 | } |
||
132 | |||
133 | public function getTypes() |
||
134 | { |
||
135 | return $this->renderWith($this->getTemplatesPath() . '/types.ss'); |
||
136 | } |
||
137 | |||
138 | /** |
||
139 | * Index-time analyzer which is applied to a specific field. |
||
140 | * Can be used to remove HTML tags, apply stemming, etc. |
||
141 | * |
||
142 | * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory |
||
143 | * |
||
144 | * @param string $field |
||
145 | * @param string $type |
||
146 | * @param array $params parameters for the analyzer, usually at least a "class" |
||
147 | */ |
||
148 | public function addAnalyzer($field, $type, $params) |
||
149 | { |
||
150 | $fullFields = $this->fieldData($field); |
||
151 | if ($fullFields) { |
||
152 | foreach ($fullFields as $fullField => $spec) { |
||
153 | if (!isset($this->analyzerFields[$fullField])) { |
||
154 | $this->analyzerFields[$fullField] = array(); |
||
155 | } |
||
156 | $this->analyzerFields[$fullField][$type] = $params; |
||
157 | } |
||
158 | } |
||
159 | } |
||
160 | |||
161 | /** |
||
162 | * Get the default text field, normally '_text' |
||
163 | * |
||
164 | * @return string |
||
165 | */ |
||
166 | public function getDefaultField() |
||
167 | { |
||
168 | return $this->config()->default_field; |
||
169 | } |
||
170 | |||
171 | /** |
||
172 | * Get list of fields each text field should be copied into. |
||
173 | * This will fallback to the default field if omitted. |
||
174 | * |
||
175 | * @return array |
||
176 | */ |
||
177 | protected function getCopyDestinations() |
||
178 | { |
||
179 | $copyFields = $this->config()->copy_fields; |
||
180 | if ($copyFields) { |
||
181 | return $copyFields; |
||
182 | } |
||
183 | // Fallback to default field |
||
184 | $df = $this->getDefaultField(); |
||
185 | return array($df); |
||
186 | } |
||
187 | |||
188 | public function getFieldDefinitions() |
||
189 | { |
||
190 | $xml = array(); |
||
191 | $stored = $this->getStoredDefault(); |
||
192 | |||
193 | $xml[] = ""; |
||
194 | |||
195 | // Add the hardcoded field definitions |
||
196 | |||
197 | $xml[] = "<field name='_documentid' type='string' indexed='true' stored='true' required='true' />"; |
||
198 | |||
199 | $xml[] = "<field name='ID' type='tint' indexed='true' stored='true' required='true' />"; |
||
200 | $xml[] = "<field name='ClassName' type='string' indexed='true' stored='true' required='true' />"; |
||
201 | $xml[] = "<field name='ClassHierarchy' type='string' indexed='true' stored='true' required='true' multiValued='true' />"; |
||
202 | |||
203 | // Add the fulltext collation field |
||
204 | |||
205 | $df = $this->getDefaultField(); |
||
206 | $xml[] = "<field name='{$df}' type='htmltext' indexed='true' stored='{$stored}' multiValued='true' />" ; |
||
207 | |||
208 | // Add the user-specified fields |
||
209 | |||
210 | foreach ($this->fulltextFields as $name => $field) { |
||
211 | $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap); |
||
212 | } |
||
213 | |||
214 | foreach ($this->filterFields as $name => $field) { |
||
215 | if ($field['fullfield'] === 'ID' || $field['fullfield'] === 'ClassName') { |
||
216 | continue; |
||
217 | } |
||
218 | $xml[] = $this->getFieldDefinition($name, $field); |
||
219 | } |
||
220 | |||
221 | foreach ($this->sortFields as $name => $field) { |
||
222 | if ($field['fullfield'] === 'ID' || $field['fullfield'] === 'ClassName') { |
||
223 | continue; |
||
224 | } |
||
225 | $xml[] = $this->getFieldDefinition($name, $field); |
||
226 | } |
||
227 | |||
228 | return implode("\n\t\t", $xml); |
||
229 | } |
||
230 | |||
231 | /** |
||
232 | * Extract first suggestion text from collated values |
||
233 | * |
||
234 | * @param mixed $collation |
||
235 | * @return string |
||
236 | */ |
||
237 | protected function getCollatedSuggestion($collation = '') |
||
238 | { |
||
239 | if (is_string($collation)) { |
||
240 | return $collation; |
||
241 | } |
||
242 | if (is_object($collation)) { |
||
243 | if (isset($collation->misspellingsAndCorrections)) { |
||
244 | foreach ($collation->misspellingsAndCorrections as $key => $value) { |
||
245 | return $value; |
||
246 | } |
||
247 | } |
||
248 | } |
||
249 | return ''; |
||
250 | } |
||
251 | |||
252 | /** |
||
253 | * Extract a human friendly spelling suggestion from a Solr spellcheck collation string. |
||
254 | * @param string $collation |
||
255 | * @return String |
||
256 | */ |
||
257 | protected function getNiceSuggestion($collation = '') |
||
258 | { |
||
259 | $collationParts = explode(' ', $collation); |
||
260 | |||
261 | // Remove advanced query params from the beginning of each collation part. |
||
262 | foreach ($collationParts as $key => &$part) { |
||
263 | $part = ltrim($part, '+'); |
||
264 | } |
||
265 | |||
266 | return implode(' ', $collationParts); |
||
267 | } |
||
268 | |||
269 | /** |
||
270 | * Extract a query string from a Solr spellcheck collation string. |
||
271 | * Useful for constructing 'Did you mean?' links, for example: |
||
272 | * <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> |
||
273 | * @param string $collation |
||
274 | * @return String |
||
275 | */ |
||
276 | protected function getSuggestionQueryString($collation = '') |
||
277 | { |
||
278 | return str_replace(' ', '+', $this->getNiceSuggestion($collation)); |
||
279 | } |
||
280 | |||
281 | /** |
||
282 | * Add a field that should be stored |
||
283 | * |
||
284 | * @param string $field The field to add |
||
285 | * @param string $forceType The type to force this field as (required in some cases, when not |
||
286 | * detectable from metadata) |
||
287 | * @param array $extraOptions Dependent on search implementation |
||
288 | */ |
||
289 | public function addStoredField($field, $forceType = null, $extraOptions = array()) |
||
293 | } |
||
294 | |||
295 | /** |
||
296 | * Add a fulltext field with a boosted value |
||
297 | * |
||
298 | * @param string $field The field to add |
||
299 | * @param string $forceType The type to force this field as (required in some cases, when not |
||
300 | * detectable from metadata) |
||
301 | * @param array $extraOptions Dependent on search implementation |
||
302 | * @param float $boost Numeric boosting value (defaults to 2) |
||
303 | */ |
||
304 | public function addBoostedField($field, $forceType = null, $extraOptions = array(), $boost = 2) |
||
305 | { |
||
306 | $options = array_merge($extraOptions, array('boost' => $boost)); |
||
307 | $this->addFulltextField($field, $forceType, $options); |
||
308 | } |
||
309 | |||
310 | |||
311 | public function fieldData($field, $forceType = null, $extraOptions = array()) |
||
312 | { |
||
313 | // Ensure that 'boost' is recorded here without being captured by solr |
||
314 | $boost = null; |
||
315 | if (array_key_exists('boost', $extraOptions)) { |
||
316 | $boost = $extraOptions['boost']; |
||
317 | unset($extraOptions['boost']); |
||
318 | } |
||
319 | $data = parent::fieldData($field, $forceType, $extraOptions); |
||
320 | |||
321 | // Boost all fields with this name |
||
322 | if (isset($boost)) { |
||
323 | foreach ($data as $fieldName => $fieldInfo) { |
||
324 | $this->boostedFields[$fieldName] = $boost; |
||
325 | } |
||
326 | } |
||
327 | return $data; |
||
328 | } |
||
329 | |||
330 | /** |
||
331 | * Set the default boosting level for a specific field. |
||
332 | * Will control the default value for qf param (Query Fields), but will not |
||
333 | * override a query-specific value. |
||
334 | * |
||
335 | * Fields must be added before having a field boosting specified |
||
336 | * |
||
337 | * @param string $field Full field key (Model_Field) |
||
338 | * @param float|null $level Numeric boosting value. Set to null to clear boost |
||
339 | */ |
||
340 | public function setFieldBoosting($field, $level) |
||
341 | { |
||
342 | if (!isset($this->fulltextFields[$field])) { |
||
343 | throw new \InvalidArgumentException("No fulltext field $field exists on " . $this->getIndexName()); |
||
344 | } |
||
345 | if ($level === null) { |
||
346 | unset($this->boostedFields[$field]); |
||
347 | } else { |
||
348 | $this->boostedFields[$field] = $level; |
||
349 | } |
||
350 | } |
||
351 | |||
352 | /** |
||
353 | * Get all boosted fields |
||
354 | * |
||
355 | * @return array |
||
356 | */ |
||
357 | public function getBoostedFields() |
||
358 | { |
||
359 | return $this->boostedFields; |
||
360 | } |
||
361 | |||
362 | /** |
||
363 | * Determine the best default value for the 'qf' parameter |
||
364 | * |
||
365 | * @return array|null List of query fields, or null if not specified |
||
366 | */ |
||
367 | public function getQueryFields() |
||
368 | { |
||
369 | // Not necessary to specify this unless boosting |
||
370 | if (empty($this->boostedFields)) { |
||
371 | return null; |
||
372 | } |
||
373 | $queryFields = array(); |
||
374 | foreach ($this->boostedFields as $fieldName => $boost) { |
||
375 | $queryFields[] = $fieldName . '^' . $boost; |
||
376 | } |
||
377 | |||
378 | // If any fields are queried, we must always include the default field, otherwise it will be excluded |
||
379 | $df = $this->getDefaultField(); |
||
380 | if ($queryFields && !isset($this->boostedFields[$df])) { |
||
381 | $queryFields[] = $df; |
||
382 | } |
||
383 | |||
384 | return $queryFields; |
||
385 | } |
||
386 | |||
387 | /** |
||
388 | * Gets the default 'stored' value for fields in this index |
||
389 | * |
||
390 | * @return string A default value for the 'stored' field option, either 'true' or 'false' |
||
391 | */ |
||
392 | protected function getStoredDefault() |
||
393 | { |
||
394 | return Director::isDev() ? 'true' : 'false'; |
||
395 | } |
||
396 | |||
397 | /** |
||
398 | * @param string $name |
||
399 | * @param array $spec |
||
400 | * @param array $typeMap |
||
401 | * @return String XML |
||
402 | */ |
||
403 | protected function getFieldDefinition($name, $spec, $typeMap = null) |
||
404 | { |
||
405 | if (!$typeMap) { |
||
406 | $typeMap = self::$filterTypeMap; |
||
407 | } |
||
408 | $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : ''; |
||
409 | $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*']; |
||
410 | |||
411 | $analyzerXml = ''; |
||
412 | if (isset($this->analyzerFields[$name])) { |
||
413 | foreach ($this->analyzerFields[$name] as $analyzerType => $analyzerParams) { |
||
414 | $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams); |
||
415 | } |
||
416 | } |
||
417 | |||
418 | $fieldParams = array_merge( |
||
419 | array( |
||
420 | 'name' => $name, |
||
421 | 'type' => $type, |
||
422 | 'indexed' => 'true', |
||
423 | 'stored' => $this->getStoredDefault(), |
||
424 | 'multiValued' => $multiValued |
||
425 | ), |
||
426 | isset($spec['extra_options']) ? $spec['extra_options'] : array() |
||
427 | ); |
||
428 | |||
429 | return $this->toXmlTag( |
||
430 | "field", |
||
431 | $fieldParams, |
||
432 | $analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null |
||
433 | ); |
||
434 | } |
||
435 | |||
436 | /** |
||
437 | * Convert definition to XML tag |
||
438 | * |
||
439 | * @param string $tag |
||
440 | * @param string[] $attrs Map of attributes |
||
441 | * @param string $content Inner content |
||
442 | * @return string XML tag |
||
443 | */ |
||
444 | protected function toXmlTag($tag, $attrs, $content = null) |
||
445 | { |
||
446 | $xml = "<$tag "; |
||
447 | if ($attrs) { |
||
448 | $attrStrs = array(); |
||
449 | foreach ($attrs as $attrName => $attrVal) { |
||
450 | $attrStrs[] = "$attrName='$attrVal'"; |
||
451 | } |
||
452 | $xml .= $attrStrs ? implode(' ', $attrStrs) : ''; |
||
453 | } |
||
454 | $xml .= $content ? ">$content</$tag>" : '/>'; |
||
455 | return $xml; |
||
456 | } |
||
457 | |||
458 | /** |
||
459 | * @param string $source Composite field name (<class>_<fieldname>) |
||
460 | * @param string $dest |
||
461 | */ |
||
462 | public function addCopyField($source, $dest, $extraOptions = array()) |
||
463 | { |
||
464 | if (!isset($this->copyFields[$source])) { |
||
465 | $this->copyFields[$source] = array(); |
||
466 | } |
||
467 | $this->copyFields[$source][] = array_merge( |
||
468 | array('source' => $source, 'dest' => $dest), |
||
469 | $extraOptions |
||
470 | ); |
||
471 | } |
||
472 | |||
473 | /** |
||
474 | * Generate XML for copy field definitions |
||
475 | * |
||
476 | * @return string |
||
477 | */ |
||
478 | public function getCopyFieldDefinitions() |
||
479 | { |
||
480 | $xml = array(); |
||
481 | |||
482 | // Default copy fields |
||
483 | foreach ($this->getCopyDestinations() as $copyTo) { |
||
484 | foreach ($this->fulltextFields as $name => $field) { |
||
485 | $xml[] = "<copyField source='{$name}' dest='{$copyTo}' />"; |
||
486 | } |
||
487 | } |
||
488 | |||
489 | // Explicit copy fields |
||
490 | foreach ($this->copyFields as $source => $fields) { |
||
491 | foreach ($fields as $fieldAttrs) { |
||
492 | $xml[] = $this->toXmlTag('copyField', $fieldAttrs); |
||
493 | } |
||
494 | } |
||
495 | |||
496 | return implode("\n\t", $xml); |
||
497 | } |
||
498 | |||
499 | /** |
||
500 | * Determine if the given object is one of the given type |
||
501 | * |
||
502 | * @param string $class |
||
503 | * @param array|string $base Class or list of base classes |
||
504 | * @return bool |
||
505 | */ |
||
506 | protected function classIs($class, $base) |
||
507 | { |
||
508 | if (is_array($base)) { |
||
509 | foreach ($base as $nextBase) { |
||
510 | if ($this->classIs($class, $nextBase)) { |
||
511 | return true; |
||
512 | } |
||
513 | } |
||
514 | return false; |
||
515 | } |
||
516 | |||
517 | // Check single origin |
||
518 | return $class === $base || is_subclass_of($class, $base); |
||
519 | } |
||
520 | |||
521 | protected function _addField($doc, $object, $field) |
||
522 | { |
||
523 | $class = get_class($object); |
||
524 | if (!$this->classIs($class, $field['origin'])) { |
||
525 | return; |
||
526 | } |
||
527 | |||
528 | $value = $this->_getFieldValue($object, $field); |
||
529 | |||
530 | $type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*']; |
||
531 | |||
532 | if (is_array($value)) { |
||
533 | foreach ($value as $sub) { |
||
534 | /* Solr requires dates in the form 1995-12-31T23:59:59Z */ |
||
535 | if ($type === 'tdate') { |
||
536 | if (!$sub) { |
||
537 | continue; |
||
538 | } |
||
539 | $sub = gmdate('Y-m-d\TH:i:s\Z', strtotime($sub)); |
||
540 | } |
||
541 | |||
542 | /* Solr requires numbers to be valid if presented, not just empty */ |
||
543 | if (($type === 'tint' || $type === 'tfloat' || $type === 'tdouble') && !is_numeric($sub)) { |
||
544 | continue; |
||
545 | } |
||
546 | |||
547 | $doc->addField($field['name'], $sub); |
||
548 | } |
||
549 | } else { |
||
550 | /* Solr requires dates in the form 1995-12-31T23:59:59Z */ |
||
551 | if ($type === 'tdate') { |
||
552 | if (!$value) { |
||
553 | return; |
||
554 | } |
||
555 | $value = gmdate('Y-m-d\TH:i:s\Z', strtotime($value)); |
||
556 | } |
||
557 | |||
558 | /* Solr requires numbers to be valid if presented, not just empty */ |
||
559 | if (($type === 'tint' || $type === 'tfloat' || $type === 'tdouble') && !is_numeric($value)) { |
||
560 | return; |
||
561 | } |
||
562 | |||
563 | // Only index fields that are not null |
||
564 | if ($value !== null) { |
||
565 | $doc->setField($field['name'], $value); |
||
566 | } |
||
567 | } |
||
568 | } |
||
569 | |||
570 | protected function _addAs($object, $base, $options) |
||
571 | { |
||
572 | $includeSubs = $options['include_children']; |
||
573 | |||
574 | $doc = new \Apache_Solr_Document(); |
||
575 | |||
576 | // Always present fields |
||
577 | |||
578 | $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs)); |
||
579 | $doc->setField('ID', $object->ID); |
||
580 | $doc->setField('ClassName', $object->ClassName); |
||
581 | |||
582 | foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) { |
||
583 | $doc->addField('ClassHierarchy', $class); |
||
584 | } |
||
585 | |||
586 | // Add the user-specified fields |
||
587 | |||
588 | foreach ($this->getFieldsIterator() as $name => $field) { |
||
589 | if ($field['base'] === $base || (is_array($field['base']) && in_array($base, $field['base']))) { |
||
590 | $this->_addField($doc, $object, $field); |
||
591 | } |
||
592 | } |
||
593 | |||
594 | try { |
||
595 | $this->getService()->addDocument($doc); |
||
596 | } catch (Exception $e) { |
||
597 | static::warn($e); |
||
598 | return false; |
||
599 | } |
||
600 | |||
601 | return $doc; |
||
602 | } |
||
603 | |||
604 | public function add($object) |
||
605 | { |
||
606 | $class = get_class($object); |
||
607 | $docs = array(); |
||
608 | |||
609 | foreach ($this->getClasses() as $searchclass => $options) { |
||
610 | if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) { |
||
611 | $base = DataObject::getSchema()->baseDataClass($searchclass); |
||
612 | $docs[] = $this->_addAs($object, $base, $options); |
||
613 | } |
||
614 | } |
||
615 | |||
616 | return $docs; |
||
617 | } |
||
618 | |||
619 | public function canAdd($class) |
||
620 | { |
||
621 | foreach ($this->classes as $searchclass => $options) { |
||
622 | if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) { |
||
623 | return true; |
||
624 | } |
||
625 | } |
||
626 | |||
627 | return false; |
||
628 | } |
||
629 | |||
630 | public function delete($base, $id, $state) |
||
631 | { |
||
632 | $documentID = $this->getDocumentIDForState($base, $id, $state); |
||
633 | |||
634 | try { |
||
635 | $this->getService()->deleteById($documentID); |
||
636 | } catch (Exception $e) { |
||
637 | static::warn($e); |
||
638 | return false; |
||
639 | } |
||
640 | |||
641 | return true; |
||
642 | } |
||
643 | |||
644 | /** |
||
645 | * Clear all records which do not match the given classname whitelist. |
||
646 | * |
||
647 | * Can also be used to trim an index when reducing to a narrower set of classes. |
||
648 | * |
||
649 | * Ignores current state / variant. |
||
650 | * |
||
651 | * @param array $classes List of non-obsolete classes in the same format as SolrIndex::getClasses() |
||
652 | * @return bool Flag if successful |
||
653 | * @throws \Apache_Solr_HttpTransportException |
||
654 | */ |
||
655 | public function clearObsoleteClasses($classes) |
||
656 | { |
||
657 | if (empty($classes)) { |
||
658 | return false; |
||
659 | } |
||
660 | |||
661 | // Delete all records which do not match the necessary classname rules |
||
662 | $conditions = array(); |
||
663 | foreach ($classes as $class => $options) { |
||
664 | if ($options['include_children']) { |
||
665 | $conditions[] = "ClassHierarchy:{$class}"; |
||
666 | } else { |
||
667 | $conditions[] = "ClassName:{$class}"; |
||
668 | } |
||
669 | } |
||
670 | |||
671 | // Delete records which don't match any of these conditions in this index |
||
672 | $deleteQuery = "-(" . implode(' ', $conditions) . ")"; |
||
673 | $this |
||
674 | ->getService() |
||
675 | ->deleteByQuery($deleteQuery); |
||
676 | return true; |
||
677 | } |
||
678 | |||
679 | public function commit() |
||
680 | { |
||
681 | try { |
||
682 | $this->getService()->commit(false, false, false); |
||
683 | } catch (Exception $e) { |
||
684 | static::warn($e); |
||
685 | return false; |
||
686 | } |
||
687 | |||
688 | return true; |
||
689 | } |
||
690 | |||
691 | /** |
||
692 | * @param SearchQuery $query |
||
693 | * @param integer $offset |
||
694 | * @param integer $limit |
||
695 | * @param array $params Extra request parameters passed through to Solr |
||
696 | * @return ArrayData Map with the following keys: |
||
697 | * - 'Matches': ArrayList of the matched object instances |
||
698 | * @throws \Apache_Solr_HttpTransportException |
||
699 | * @throws \Apache_Solr_InvalidArgumentException |
||
700 | */ |
||
701 | public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) |
||
702 | { |
||
703 | $service = $this->getService(); |
||
704 | $this->applySearchVariants($query); |
||
705 | |||
706 | $q = array(); // Query |
||
707 | $fq = array(); // Filter query |
||
708 | $qf = array(); // Query fields |
||
709 | $hlq = array(); // Highlight query |
||
710 | |||
711 | // Build the search itself |
||
712 | $q = $this->getQueryComponent($query, $hlq); |
||
713 | |||
714 | // If using boosting, set the clean term separately for highlighting. |
||
715 | // See https://issues.apache.org/jira/browse/SOLR-2632 |
||
716 | if (array_key_exists('hl', $params) && !array_key_exists('hl.q', $params)) { |
||
717 | $params['hl.q'] = implode(' ', $hlq); |
||
718 | } |
||
719 | |||
720 | // Filter by class if requested |
||
721 | $classq = array(); |
||
722 | foreach ($query->classes as $class) { |
||
723 | if (!empty($class['includeSubclasses'])) { |
||
724 | $classq[] = 'ClassHierarchy:' . $this->sanitiseClassName($class['class']); |
||
725 | } else { |
||
726 | $classq[] = 'ClassName:' . $this->sanitiseClassName($class['class']); |
||
727 | } |
||
728 | } |
||
729 | if ($classq) { |
||
730 | $fq[] = '+(' . implode(' ', $classq) . ')'; |
||
731 | } |
||
732 | |||
733 | // Filter by filters |
||
734 | $fq = array_merge($fq, $this->getFiltersComponent($query)); |
||
735 | |||
736 | // Prepare query fields unless specified explicitly |
||
737 | if (isset($params['qf'])) { |
||
738 | $qf = $params['qf']; |
||
739 | } else { |
||
740 | $qf = $this->getQueryFields(); |
||
741 | } |
||
742 | if (is_array($qf)) { |
||
743 | $qf = implode(' ', $qf); |
||
744 | } |
||
745 | if ($qf) { |
||
746 | $params['qf'] = $qf; |
||
747 | } |
||
748 | |||
749 | if (!headers_sent() && Director::isDev()) { |
||
750 | if ($q) { |
||
751 | header('X-Query: ' . implode(' ', $q)); |
||
752 | } |
||
753 | if ($fq) { |
||
754 | header('X-Filters: "' . implode('", "', $fq) . '"'); |
||
755 | } |
||
756 | if ($qf) { |
||
757 | header('X-QueryFields: ' . $qf); |
||
758 | } |
||
759 | } |
||
760 | |||
761 | if ($offset == -1) { |
||
762 | $offset = $query->start; |
||
763 | } |
||
764 | if ($limit == -1) { |
||
765 | $limit = $query->limit; |
||
766 | } |
||
767 | if ($limit == -1) { |
||
768 | $limit = SearchQuery::$default_page_size; |
||
769 | } |
||
770 | |||
771 | $params = array_merge($params, array('fq' => implode(' ', $fq))); |
||
772 | |||
773 | $res = $service->search( |
||
774 | $q ? implode(' ', $q) : '*:*', |
||
775 | $offset, |
||
776 | $limit, |
||
777 | $params, |
||
778 | \Apache_Solr_Service::METHOD_POST |
||
779 | ); |
||
780 | |||
781 | $indexableService = IndexableService::singleton(); |
||
782 | |||
783 | $results = new ArrayList(); |
||
784 | if ($res->getHttpStatus() >= 200 && $res->getHttpStatus() < 300) { |
||
785 | foreach ($res->response->docs as $doc) { |
||
786 | $result = DataObject::get_by_id($doc->ClassName, $doc->ID); |
||
787 | if ($result) { |
||
788 | // Filter out any results previously added to the solr index where ShowInSearch == false |
||
789 | if (!$indexableService->isIndexable($result)) { |
||
790 | continue; |
||
791 | } |
||
792 | |||
793 | $results->push($result); |
||
794 | |||
795 | // Add highlighting (optional) |
||
796 | $docId = $doc->_documentid; |
||
797 | if ($res->highlighting && $res->highlighting->$docId) { |
||
798 | // TODO Create decorator class for search results rather than adding arbitrary object properties |
||
799 | // TODO Allow specifying highlighted field, and lazy loading |
||
800 | // in case the search API needs another query (similar to SphinxSearchable->buildExcerpt()). |
||
801 | $combinedHighlights = array(); |
||
802 | foreach ($res->highlighting->$docId as $field => $highlights) { |
||
803 | $combinedHighlights = array_merge($combinedHighlights, $highlights); |
||
804 | } |
||
805 | |||
806 | // Remove entity-encoded U+FFFD replacement character. It signifies non-displayable characters, |
||
807 | // and shows up as an encoding error in browsers. |
||
808 | $result->Excerpt = DBField::create_field( |
||
809 | 'HTMLText', |
||
810 | str_replace( |
||
811 | '�', |
||
812 | '', |
||
813 | implode(' ... ', $combinedHighlights) |
||
814 | ) |
||
815 | ); |
||
816 | } |
||
817 | } |
||
818 | } |
||
819 | $numFound = $res->response->numFound; |
||
820 | } else { |
||
821 | $numFound = 0; |
||
822 | } |
||
823 | |||
824 | $ret = array(); |
||
825 | $ret['Matches'] = new PaginatedList($results); |
||
826 | $ret['Matches']->setLimitItems(false); |
||
827 | // Tell PaginatedList how many results there are |
||
828 | $ret['Matches']->setTotalItems($numFound); |
||
829 | // Results for current page start at $offset |
||
830 | $ret['Matches']->setPageStart($offset); |
||
831 | // Results per page |
||
832 | $ret['Matches']->setPageLength($limit); |
||
833 | |||
834 | // Include spellcheck and suggestion data. Requires spellcheck=true in $params |
||
835 | if (isset($res->spellcheck)) { |
||
836 | // Expose all spellcheck data, for custom handling. |
||
837 | $ret['Spellcheck'] = $res->spellcheck; |
||
838 | |||
839 | // Suggestions. Requires spellcheck.collate=true in $params |
||
840 | if (isset($res->spellcheck->suggestions->collation)) { |
||
841 | // Extract string suggestion |
||
842 | $suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation); |
||
843 | |||
844 | // The collation, including advanced query params (e.g. +), suitable for making another query |
||
845 | // programmatically. |
||
846 | $ret['Suggestion'] = $suggestion; |
||
847 | |||
848 | // A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display. |
||
849 | $ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion); |
||
850 | |||
851 | // A string suitable for appending to an href as a query string. |
||
852 | // For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> |
||
853 | $ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion); |
||
854 | } |
||
855 | } |
||
856 | |||
857 | $ret = new ArrayData($ret); |
||
858 | |||
859 | // Enable extensions to add extra data from the response into |
||
860 | // the returned results set. |
||
861 | $this->extend('updateSearchResults', $ret, $res); |
||
862 | |||
863 | return $ret; |
||
864 | } |
||
865 | |||
866 | /** |
||
867 | * With a common set of variants that are relevant to at least one class in the list (from either the query or |
||
868 | * the current index), allow them to alter the query to add their variant column conditions. |
||
869 | * |
||
870 | * @param SearchQuery $query |
||
871 | */ |
||
872 | protected function applySearchVariants(SearchQuery $query) |
||
873 | { |
||
874 | $classes = count($query->classes) ? $query->classes : $this->getClasses(); |
||
875 | |||
876 | /** @var SearchVariant_Caller $variantCaller */ |
||
877 | $variantCaller = SearchVariant::withCommon($classes); |
||
878 | $variantCaller->call('alterQuery', $query, $this); |
||
879 | } |
||
880 | |||
881 | /** |
||
882 | * Solr requires namespaced classes to have double escaped backslashes |
||
883 | * |
||
884 | * @param string $className E.g. My\Object\Here |
||
885 | * @param string $replaceWith The replacement character(s) to use |
||
886 | * @return string E.g. My\\Object\\Here |
||
887 | */ |
||
888 | public function sanitiseClassName($className, $replaceWith = '\\\\') |
||
889 | { |
||
890 | return str_replace('\\', $replaceWith, $className); |
||
891 | } |
||
892 | |||
893 | /** |
||
894 | * Get the query (q) component for this search |
||
895 | * |
||
896 | * @param SearchQuery $searchQuery |
||
897 | * @param array &$hlq Highlight query returned by reference |
||
898 | * @return array |
||
899 | */ |
||
900 | protected function getQueryComponent(SearchQuery $searchQuery, &$hlq = array()) |
||
901 | { |
||
902 | $q = array(); |
||
903 | foreach ($searchQuery->search as $search) { |
||
904 | $text = $search['text']; |
||
905 | preg_match_all('/"[^"]*"|\S+/', $text, $parts); |
||
906 | |||
907 | $fuzzy = $search['fuzzy'] ? '~' : ''; |
||
908 | |||
909 | foreach ($parts[0] as $part) { |
||
910 | $fields = (isset($search['fields'])) ? $search['fields'] : array(); |
||
911 | if (isset($search['boost'])) { |
||
912 | $fields = array_merge($fields, array_keys($search['boost'])); |
||
913 | } |
||
914 | if ($fields) { |
||
915 | $searchq = array(); |
||
916 | foreach ($fields as $field) { |
||
917 | // Escape namespace separators in class names |
||
918 | $field = $this->sanitiseClassName($field); |
||
919 | |||
920 | $boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : ''; |
||
921 | $searchq[] = "{$field}:" . $part . $fuzzy . $boost; |
||
922 | } |
||
923 | $q[] = '+(' . implode(' OR ', $searchq) . ')'; |
||
924 | } else { |
||
925 | $q[] = '+' . $part . $fuzzy; |
||
926 | } |
||
927 | $hlq[] = $part; |
||
928 | } |
||
929 | } |
||
930 | return $q; |
||
931 | } |
||
932 | |||
933 | /** |
||
934 | * Parse all require constraints for inclusion in a filter query |
||
935 | * |
||
936 | * @param SearchQuery $searchQuery |
||
937 | * @return array List of parsed string values for each require |
||
938 | */ |
||
939 | protected function getRequireFiltersComponent(SearchQuery $searchQuery) |
||
940 | { |
||
941 | $fq = array(); |
||
942 | foreach ($searchQuery->require as $field => $values) { |
||
943 | $requireq = array(); |
||
944 | |||
945 | foreach ($values as $value) { |
||
946 | if ($value === SearchQuery::$missing) { |
||
947 | $requireq[] = "(*:* -{$field}:[* TO *])"; |
||
948 | } elseif ($value === SearchQuery::$present) { |
||
949 | $requireq[] = "{$field}:[* TO *]"; |
||
950 | } elseif ($value instanceof SearchQuery_Range) { |
||
951 | $start = $value->start; |
||
952 | if ($start === null) { |
||
953 | $start = '*'; |
||
954 | } |
||
955 | $end = $value->end; |
||
956 | if ($end === null) { |
||
957 | $end = '*'; |
||
958 | } |
||
959 | $requireq[] = "$field:[$start TO $end]"; |
||
960 | } else { |
||
961 | $requireq[] = $field . ':"' . $value . '"'; |
||
962 | } |
||
963 | } |
||
964 | |||
965 | $fq[] = '+(' . implode(' ', $requireq) . ')'; |
||
966 | } |
||
967 | return $fq; |
||
968 | } |
||
969 | |||
970 | /** |
||
971 | * Parse all exclude constraints for inclusion in a filter query |
||
972 | * |
||
973 | * @param SearchQuery $searchQuery |
||
974 | * @return array List of parsed string values for each exclusion |
||
975 | */ |
||
976 | protected function getExcludeFiltersComponent(SearchQuery $searchQuery) |
||
977 | { |
||
978 | $fq = array(); |
||
979 | foreach ($searchQuery->exclude as $field => $values) { |
||
980 | // Handle namespaced class names |
||
981 | $field = $this->sanitiseClassName($field); |
||
982 | |||
983 | $excludeq = []; |
||
984 | $missing = false; |
||
985 | |||
986 | foreach ($values as $value) { |
||
987 | if ($value === SearchQuery::$missing) { |
||
988 | $missing = true; |
||
989 | } elseif ($value === SearchQuery::$present) { |
||
990 | $excludeq[] = "{$field}:[* TO *]"; |
||
991 | } elseif ($value instanceof SearchQuery_Range) { |
||
992 | $start = $value->start; |
||
993 | if ($start === null) { |
||
994 | $start = '*'; |
||
995 | } |
||
996 | $end = $value->end; |
||
997 | if ($end === null) { |
||
998 | $end = '*'; |
||
999 | } |
||
1000 | $excludeq[] = "$field:[$start TO $end]"; |
||
1001 | } else { |
||
1002 | $excludeq[] = $field . ':"' . $value . '"'; |
||
1003 | } |
||
1004 | } |
||
1005 | |||
1006 | $fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-(' . implode(' ', $excludeq) . ')'; |
||
1007 | } |
||
1008 | return $fq; |
||
1009 | } |
||
1010 | |||
1011 | /** |
||
1012 | * @param SearchQuery $searchQuery |
||
1013 | * @return string |
||
1014 | * @throws \Exception |
||
1015 | */ |
||
1016 | protected function getCriteriaComponent(SearchQuery $searchQuery) |
||
1017 | { |
||
1018 | if (count($searchQuery->getCriteria()) === 0) { |
||
1019 | return null; |
||
1020 | } |
||
1021 | |||
1022 | if ($searchQuery->getAdapter() === null) { |
||
1023 | throw new \Exception('SearchQuery does not have a SearchAdapter applied'); |
||
1024 | } |
||
1025 | |||
1026 | // Need to start with a positive conjunction. |
||
1027 | $ps = $searchQuery->getAdapter()->getPrependToCriteriaComponent(); |
||
1028 | |||
1029 | foreach ($searchQuery->getCriteria() as $clause) { |
||
1030 | $clause->setAdapter($searchQuery->getAdapter()); |
||
1031 | $clause->appendPreparedStatementTo($ps); |
||
1032 | } |
||
1033 | |||
1034 | // Need to start with a positive conjunction. |
||
1035 | $ps .= $searchQuery->getAdapter()->getAppendToCriteriaComponent(); |
||
1036 | |||
1037 | // Returned as an array because that's how `getFiltersComponent` expects it. |
||
1038 | return $ps; |
||
1039 | } |
||
1040 | |||
1041 | /** |
||
1042 | * Get all filter conditions for this search |
||
1043 | * |
||
1044 | * @param SearchQuery $searchQuery |
||
1045 | * @return array |
||
1046 | * @throws \Exception |
||
1047 | */ |
||
1048 | public function getFiltersComponent(SearchQuery $searchQuery) |
||
1049 | { |
||
1050 | $criteriaComponent = $this->getCriteriaComponent($searchQuery); |
||
1051 | |||
1052 | $components = array_merge( |
||
1053 | $this->getRequireFiltersComponent($searchQuery), |
||
1054 | $this->getExcludeFiltersComponent($searchQuery) |
||
1055 | ); |
||
1056 | |||
1057 | if ($criteriaComponent !== null) { |
||
1058 | $components[] = $criteriaComponent; |
||
1059 | } |
||
1060 | |||
1061 | return $components; |
||
1062 | } |
||
1063 | |||
1064 | protected $service; |
||
1065 | |||
1066 | /** |
||
1067 | * @return SolrService |
||
1068 | */ |
||
1069 | public function getService() |
||
1070 | { |
||
1071 | if (!$this->service) { |
||
1072 | $this->service = Solr::service(get_class($this)); |
||
1073 | } |
||
1074 | return $this->service; |
||
1075 | } |
||
1076 | |||
1077 | public function setService(SolrService $service) |
||
1078 | { |
||
1079 | $this->service = $service; |
||
1080 | return $this; |
||
1081 | } |
||
1082 | |||
1083 | /** |
||
1084 | * Upload config for this index to the given store |
||
1085 | * |
||
1086 | * @param SolrConfigStore $store |
||
1087 | */ |
||
1088 | public function uploadConfig($store) |
||
1101 | } |
||
1102 | } |
||
1103 | } |
||
1104 | } |
||
1105 |