| Total Complexity | 186 |
| Total Lines | 1068 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like SolrIndex often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use SolrIndex, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 21 | abstract class SolrIndex extends SearchIndex |
||
| 22 | { |
||
| 23 | public static $fulltextTypeMap = array( |
||
| 24 | '*' => 'text', |
||
| 25 | 'HTMLVarchar' => 'htmltext', |
||
| 26 | 'HTMLText' => 'htmltext' |
||
| 27 | ); |
||
| 28 | |||
| 29 | public static $filterTypeMap = array( |
||
| 30 | '*' => 'string', |
||
| 31 | 'Boolean' => 'boolean', |
||
| 32 | 'Date' => 'tdate', |
||
| 33 | 'SSDatetime' => 'tdate', |
||
| 34 | 'SS_Datetime' => 'tdate', |
||
| 35 | 'ForeignKey' => 'tint', |
||
| 36 | 'Int' => 'tint', |
||
| 37 | 'Float' => 'tfloat', |
||
| 38 | 'Double' => 'tdouble' |
||
| 39 | ); |
||
| 40 | |||
| 41 | public static $sortTypeMap = array(); |
||
| 42 | |||
| 43 | protected $analyzerFields = array(); |
||
| 44 | |||
| 45 | protected $copyFields = array(); |
||
| 46 | |||
| 47 | protected $extrasPath = null; |
||
| 48 | |||
| 49 | protected $templatesPath = null; |
||
| 50 | |||
| 51 | private static $casting = [ |
||
| 52 | 'FieldDefinitions' => 'HTMLText', |
||
| 53 | 'CopyFieldDefinitions' => 'HTMLText' |
||
| 54 | ]; |
||
| 55 | |||
| 56 | /** |
||
| 57 | * List of boosted fields |
||
| 58 | * |
||
| 59 | * @var array |
||
| 60 | */ |
||
| 61 | protected $boostedFields = array(); |
||
| 62 | |||
| 63 | /** |
||
| 64 | * Name of default field |
||
| 65 | * |
||
| 66 | * @var string |
||
| 67 | * @config |
||
| 68 | */ |
||
| 69 | private static $default_field = '_text'; |
||
| 70 | |||
| 71 | /** |
||
| 72 | * List of copy fields all fulltext fields should be copied into. |
||
| 73 | * This will fallback to default_field if not specified |
||
| 74 | * |
||
| 75 | * @var array |
||
| 76 | */ |
||
| 77 | private static $copy_fields = array(); |
||
| 78 | |||
| 79 | /** |
||
| 80 | * @return String Absolute path to the folder containing |
||
| 81 | * templates which are used for generating the schema and field definitions. |
||
| 82 | */ |
||
| 83 | public function getTemplatesPath() |
||
| 84 | { |
||
| 85 | $globalOptions = Solr::solr_options(); |
||
| 86 | $path = $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath']; |
||
| 87 | return rtrim($path, '/'); |
||
| 88 | } |
||
| 89 | |||
| 90 | /** |
||
| 91 | * @return String Absolute path to the configuration default files, |
||
| 92 | * e.g. solrconfig.xml. |
||
| 93 | */ |
||
| 94 | public function getExtrasPath() |
||
| 95 | { |
||
| 96 | $globalOptions = Solr::solr_options(); |
||
| 97 | return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath']; |
||
| 98 | } |
||
| 99 | |||
| 100 | public function generateSchema() |
||
| 101 | { |
||
| 102 | return $this->renderWith($this->getTemplatesPath() . '/schema.ss'); |
||
| 103 | } |
||
| 104 | |||
| 105 | /** |
||
| 106 | * Helper for returning the correct index name. Supports prefixing and |
||
| 107 | * suffixing |
||
| 108 | * |
||
| 109 | * @return string |
||
| 110 | */ |
||
| 111 | public function getIndexName() |
||
| 112 | { |
||
| 113 | $name = $this->sanitiseClassName(get_class($this), '-'); |
||
| 114 | |||
| 115 | $indexParts = [$name]; |
||
| 116 | |||
| 117 | if ($indexPrefix = Environment::getEnv('SS_SOLR_INDEX_PREFIX')) { |
||
| 118 | array_unshift($indexParts, $indexPrefix); |
||
| 119 | } |
||
| 120 | |||
| 121 | if ($indexSuffix = Environment::getEnv('SS_SOLR_INDEX_SUFFIX')) { |
||
| 122 | $indexParts[] = $indexSuffix; |
||
| 123 | } |
||
| 124 | |||
| 125 | return implode($indexParts); |
||
| 126 | } |
||
| 127 | |||
| 128 | public function getTypes() |
||
| 129 | { |
||
| 130 | return $this->renderWith($this->getTemplatesPath() . '/types.ss'); |
||
| 131 | } |
||
| 132 | |||
| 133 | /** |
||
| 134 | * Index-time analyzer which is applied to a specific field. |
||
| 135 | * Can be used to remove HTML tags, apply stemming, etc. |
||
| 136 | * |
||
| 137 | * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory |
||
| 138 | * |
||
| 139 | * @param string $field |
||
| 140 | * @param string $type |
||
| 141 | * @param array $params parameters for the analyzer, usually at least a "class" |
||
| 142 | */ |
||
| 143 | public function addAnalyzer($field, $type, $params) |
||
| 144 | { |
||
| 145 | $fullFields = $this->fieldData($field); |
||
| 146 | if ($fullFields) { |
||
| 147 | foreach ($fullFields as $fullField => $spec) { |
||
| 148 | if (!isset($this->analyzerFields[$fullField])) { |
||
| 149 | $this->analyzerFields[$fullField] = array(); |
||
| 150 | } |
||
| 151 | $this->analyzerFields[$fullField][$type] = $params; |
||
| 152 | } |
||
| 153 | } |
||
| 154 | } |
||
| 155 | |||
| 156 | /** |
||
| 157 | * Get the default text field, normally '_text' |
||
| 158 | * |
||
| 159 | * @return string |
||
| 160 | */ |
||
| 161 | public function getDefaultField() |
||
| 162 | { |
||
| 163 | return $this->config()->default_field; |
||
| 164 | } |
||
| 165 | |||
| 166 | /** |
||
| 167 | * Get list of fields each text field should be copied into. |
||
| 168 | * This will fallback to the default field if omitted. |
||
| 169 | * |
||
| 170 | * @return array |
||
| 171 | */ |
||
| 172 | protected function getCopyDestinations() |
||
| 173 | { |
||
| 174 | $copyFields = $this->config()->copy_fields; |
||
| 175 | if ($copyFields) { |
||
| 176 | return $copyFields; |
||
| 177 | } |
||
| 178 | // Fallback to default field |
||
| 179 | $df = $this->getDefaultField(); |
||
| 180 | return array($df); |
||
| 181 | } |
||
| 182 | |||
| 183 | public function getFieldDefinitions() |
||
| 184 | { |
||
| 185 | $xml = array(); |
||
| 186 | $stored = $this->getStoredDefault(); |
||
| 187 | |||
| 188 | $xml[] = ""; |
||
| 189 | |||
| 190 | // Add the hardcoded field definitions |
||
| 191 | |||
| 192 | $xml[] = "<field name='_documentid' type='string' indexed='true' stored='true' required='true' />"; |
||
| 193 | |||
| 194 | $xml[] = "<field name='ID' type='tint' indexed='true' stored='true' required='true' />"; |
||
| 195 | $xml[] = "<field name='ClassName' type='string' indexed='true' stored='true' required='true' />"; |
||
| 196 | $xml[] = "<field name='ClassHierarchy' type='string' indexed='true' stored='true' required='true' multiValued='true' />"; |
||
| 197 | |||
| 198 | // Add the fulltext collation field |
||
| 199 | |||
| 200 | $df = $this->getDefaultField(); |
||
| 201 | $xml[] = "<field name='{$df}' type='htmltext' indexed='true' stored='{$stored}' multiValued='true' />" ; |
||
| 202 | |||
| 203 | // Add the user-specified fields |
||
| 204 | |||
| 205 | foreach ($this->fulltextFields as $name => $field) { |
||
| 206 | $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap); |
||
| 207 | } |
||
| 208 | |||
| 209 | foreach ($this->filterFields as $name => $field) { |
||
| 210 | if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') { |
||
| 211 | continue; |
||
| 212 | } |
||
| 213 | $xml[] = $this->getFieldDefinition($name, $field); |
||
| 214 | } |
||
| 215 | |||
| 216 | foreach ($this->sortFields as $name => $field) { |
||
| 217 | if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') { |
||
| 218 | continue; |
||
| 219 | } |
||
| 220 | $xml[] = $this->getFieldDefinition($name, $field); |
||
| 221 | } |
||
| 222 | |||
| 223 | return implode("\n\t\t", $xml); |
||
| 224 | } |
||
| 225 | |||
| 226 | /** |
||
| 227 | * Extract first suggestion text from collated values |
||
| 228 | * |
||
| 229 | * @param mixed $collation |
||
| 230 | * @return string |
||
| 231 | */ |
||
| 232 | protected function getCollatedSuggestion($collation = '') |
||
| 233 | { |
||
| 234 | if (is_string($collation)) { |
||
| 235 | return $collation; |
||
| 236 | } |
||
| 237 | if (is_object($collation)) { |
||
| 238 | if (isset($collation->misspellingsAndCorrections)) { |
||
| 239 | foreach ($collation->misspellingsAndCorrections as $key => $value) { |
||
| 240 | return $value; |
||
| 241 | } |
||
| 242 | } |
||
| 243 | } |
||
| 244 | return ''; |
||
| 245 | } |
||
| 246 | |||
| 247 | /** |
||
| 248 | * Extract a human friendly spelling suggestion from a Solr spellcheck collation string. |
||
| 249 | * @param string $collation |
||
| 250 | * @return String |
||
| 251 | */ |
||
| 252 | protected function getNiceSuggestion($collation = '') |
||
| 253 | { |
||
| 254 | $collationParts = explode(' ', $collation); |
||
| 255 | |||
| 256 | // Remove advanced query params from the beginning of each collation part. |
||
| 257 | foreach ($collationParts as $key => &$part) { |
||
| 258 | $part = ltrim($part, '+'); |
||
| 259 | } |
||
| 260 | |||
| 261 | return implode(' ', $collationParts); |
||
| 262 | } |
||
| 263 | |||
| 264 | /** |
||
| 265 | * Extract a query string from a Solr spellcheck collation string. |
||
| 266 | * Useful for constructing 'Did you mean?' links, for example: |
||
| 267 | * <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> |
||
| 268 | * @param string $collation |
||
| 269 | * @return String |
||
| 270 | */ |
||
| 271 | protected function getSuggestionQueryString($collation = '') |
||
| 272 | { |
||
| 273 | return str_replace(' ', '+', $this->getNiceSuggestion($collation)); |
||
| 274 | } |
||
| 275 | |||
| 276 | /** |
||
| 277 | * Add a field that should be stored |
||
| 278 | * |
||
| 279 | * @param string $field The field to add |
||
| 280 | * @param string $forceType The type to force this field as (required in some cases, when not |
||
| 281 | * detectable from metadata) |
||
| 282 | * @param array $extraOptions Dependent on search implementation |
||
| 283 | */ |
||
| 284 | public function addStoredField($field, $forceType = null, $extraOptions = array()) |
||
| 288 | } |
||
| 289 | |||
| 290 | /** |
||
| 291 | * Add a fulltext field with a boosted value |
||
| 292 | * |
||
| 293 | * @param string $field The field to add |
||
| 294 | * @param string $forceType The type to force this field as (required in some cases, when not |
||
| 295 | * detectable from metadata) |
||
| 296 | * @param array $extraOptions Dependent on search implementation |
||
| 297 | * @param float $boost Numeric boosting value (defaults to 2) |
||
| 298 | */ |
||
| 299 | public function addBoostedField($field, $forceType = null, $extraOptions = array(), $boost = 2) |
||
| 300 | { |
||
| 301 | $options = array_merge($extraOptions, array('boost' => $boost)); |
||
| 302 | $this->addFulltextField($field, $forceType, $options); |
||
| 303 | } |
||
| 304 | |||
| 305 | |||
| 306 | public function fieldData($field, $forceType = null, $extraOptions = array()) |
||
| 307 | { |
||
| 308 | // Ensure that 'boost' is recorded here without being captured by solr |
||
| 309 | $boost = null; |
||
| 310 | if (array_key_exists('boost', $extraOptions)) { |
||
| 311 | $boost = $extraOptions['boost']; |
||
| 312 | unset($extraOptions['boost']); |
||
| 313 | } |
||
| 314 | $data = parent::fieldData($field, $forceType, $extraOptions); |
||
| 315 | |||
| 316 | // Boost all fields with this name |
||
| 317 | if (isset($boost)) { |
||
| 318 | foreach ($data as $fieldName => $fieldInfo) { |
||
| 319 | $this->boostedFields[$fieldName] = $boost; |
||
| 320 | } |
||
| 321 | } |
||
| 322 | return $data; |
||
| 323 | } |
||
| 324 | |||
| 325 | /** |
||
| 326 | * Set the default boosting level for a specific field. |
||
| 327 | * Will control the default value for qf param (Query Fields), but will not |
||
| 328 | * override a query-specific value. |
||
| 329 | * |
||
| 330 | * Fields must be added before having a field boosting specified |
||
| 331 | * |
||
| 332 | * @param string $field Full field key (Model_Field) |
||
| 333 | * @param float|null $level Numeric boosting value. Set to null to clear boost |
||
| 334 | */ |
||
| 335 | public function setFieldBoosting($field, $level) |
||
| 336 | { |
||
| 337 | if (!isset($this->fulltextFields[$field])) { |
||
| 338 | throw new \InvalidArgumentException("No fulltext field $field exists on " . $this->getIndexName()); |
||
| 339 | } |
||
| 340 | if ($level === null) { |
||
| 341 | unset($this->boostedFields[$field]); |
||
| 342 | } else { |
||
| 343 | $this->boostedFields[$field] = $level; |
||
| 344 | } |
||
| 345 | } |
||
| 346 | |||
| 347 | /** |
||
| 348 | * Get all boosted fields |
||
| 349 | * |
||
| 350 | * @return array |
||
| 351 | */ |
||
| 352 | public function getBoostedFields() |
||
| 353 | { |
||
| 354 | return $this->boostedFields; |
||
| 355 | } |
||
| 356 | |||
| 357 | /** |
||
| 358 | * Determine the best default value for the 'qf' parameter |
||
| 359 | * |
||
| 360 | * @return array|null List of query fields, or null if not specified |
||
| 361 | */ |
||
| 362 | public function getQueryFields() |
||
| 363 | { |
||
| 364 | // Not necessary to specify this unless boosting |
||
| 365 | if (empty($this->boostedFields)) { |
||
| 366 | return null; |
||
| 367 | } |
||
| 368 | $queryFields = array(); |
||
| 369 | foreach ($this->boostedFields as $fieldName => $boost) { |
||
| 370 | $queryFields[] = $fieldName . '^' . $boost; |
||
| 371 | } |
||
| 372 | |||
| 373 | // If any fields are queried, we must always include the default field, otherwise it will be excluded |
||
| 374 | $df = $this->getDefaultField(); |
||
| 375 | if ($queryFields && !isset($this->boostedFields[$df])) { |
||
| 376 | $queryFields[] = $df; |
||
| 377 | } |
||
| 378 | |||
| 379 | return $queryFields; |
||
| 380 | } |
||
| 381 | |||
| 382 | /** |
||
| 383 | * Gets the default 'stored' value for fields in this index |
||
| 384 | * |
||
| 385 | * @return string A default value for the 'stored' field option, either 'true' or 'false' |
||
| 386 | */ |
||
| 387 | protected function getStoredDefault() |
||
| 388 | { |
||
| 389 | return Director::isDev() ? 'true' : 'false'; |
||
| 390 | } |
||
| 391 | |||
| 392 | /** |
||
| 393 | * @param string $name |
||
| 394 | * @param Array $spec |
||
| 395 | * @param Array $typeMap |
||
| 396 | * @return String XML |
||
| 397 | */ |
||
| 398 | protected function getFieldDefinition($name, $spec, $typeMap = null) |
||
| 399 | { |
||
| 400 | if (!$typeMap) { |
||
| 401 | $typeMap = self::$filterTypeMap; |
||
| 402 | } |
||
| 403 | $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : ''; |
||
| 404 | $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*']; |
||
| 405 | |||
| 406 | $analyzerXml = ''; |
||
| 407 | if (isset($this->analyzerFields[$name])) { |
||
| 408 | foreach ($this->analyzerFields[$name] as $analyzerType => $analyzerParams) { |
||
| 409 | $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams); |
||
| 410 | } |
||
| 411 | } |
||
| 412 | |||
| 413 | $fieldParams = array_merge( |
||
| 414 | array( |
||
| 415 | 'name' => $name, |
||
| 416 | 'type' => $type, |
||
| 417 | 'indexed' => 'true', |
||
| 418 | 'stored' => $this->getStoredDefault(), |
||
| 419 | 'multiValued' => $multiValued |
||
| 420 | ), |
||
| 421 | isset($spec['extra_options']) ? $spec['extra_options'] : array() |
||
| 422 | ); |
||
| 423 | |||
| 424 | return $this->toXmlTag( |
||
| 425 | "field", |
||
| 426 | $fieldParams, |
||
| 427 | $analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null |
||
| 428 | ); |
||
| 429 | } |
||
| 430 | |||
| 431 | /** |
||
| 432 | * Convert definition to XML tag |
||
| 433 | * |
||
| 434 | * @param string $tag |
||
| 435 | * @param string $attrs Map of attributes |
||
| 436 | * @param string $content Inner content |
||
| 437 | * @return String XML tag |
||
| 438 | */ |
||
| 439 | protected function toXmlTag($tag, $attrs, $content = null) |
||
| 440 | { |
||
| 441 | $xml = "<$tag "; |
||
| 442 | if ($attrs) { |
||
| 443 | $attrStrs = array(); |
||
| 444 | foreach ($attrs as $attrName => $attrVal) { |
||
| 445 | $attrStrs[] = "$attrName='$attrVal'"; |
||
| 446 | } |
||
| 447 | $xml .= $attrStrs ? implode(' ', $attrStrs) : ''; |
||
| 448 | } |
||
| 449 | $xml .= $content ? ">$content</$tag>" : '/>'; |
||
| 450 | return $xml; |
||
| 451 | } |
||
| 452 | |||
| 453 | /** |
||
| 454 | * @param string $source Composite field name (<class>_<fieldname>) |
||
| 455 | * @param string $dest |
||
| 456 | */ |
||
| 457 | public function addCopyField($source, $dest, $extraOptions = array()) |
||
| 458 | { |
||
| 459 | if (!isset($this->copyFields[$source])) { |
||
| 460 | $this->copyFields[$source] = array(); |
||
| 461 | } |
||
| 462 | $this->copyFields[$source][] = array_merge( |
||
| 463 | array('source' => $source, 'dest' => $dest), |
||
| 464 | $extraOptions |
||
| 465 | ); |
||
| 466 | } |
||
| 467 | |||
| 468 | /** |
||
| 469 | * Generate XML for copy field definitions |
||
| 470 | * |
||
| 471 | * @return string |
||
| 472 | */ |
||
| 473 | public function getCopyFieldDefinitions() |
||
| 474 | { |
||
| 475 | $xml = array(); |
||
| 476 | |||
| 477 | // Default copy fields |
||
| 478 | foreach ($this->getCopyDestinations() as $copyTo) { |
||
| 479 | foreach ($this->fulltextFields as $name => $field) { |
||
| 480 | $xml[] = "<copyField source='{$name}' dest='{$copyTo}' />"; |
||
| 481 | } |
||
| 482 | } |
||
| 483 | |||
| 484 | // Explicit copy fields |
||
| 485 | foreach ($this->copyFields as $source => $fields) { |
||
| 486 | foreach ($fields as $fieldAttrs) { |
||
| 487 | $xml[] = $this->toXmlTag('copyField', $fieldAttrs); |
||
| 488 | } |
||
| 489 | } |
||
| 490 | |||
| 491 | return implode("\n\t", $xml); |
||
| 492 | } |
||
| 493 | |||
| 494 | /** |
||
| 495 | * Determine if the given object is one of the given type |
||
| 496 | * |
||
| 497 | * @param string $class |
||
| 498 | * @param array|string $base Class or list of base classes |
||
| 499 | * @return bool |
||
| 500 | */ |
||
| 501 | protected function classIs($class, $base) |
||
| 502 | { |
||
| 503 | if (is_array($base)) { |
||
| 504 | foreach ($base as $nextBase) { |
||
| 505 | if ($this->classIs($class, $nextBase)) { |
||
| 506 | return true; |
||
| 507 | } |
||
| 508 | } |
||
| 509 | return false; |
||
| 510 | } |
||
| 511 | |||
| 512 | // Check single origin |
||
| 513 | return $class === $base || is_subclass_of($class, $base); |
||
| 514 | } |
||
| 515 | |||
| 516 | protected function _addField($doc, $object, $field) |
||
| 517 | { |
||
| 518 | $class = get_class($object); |
||
| 519 | if (!$this->classIs($class, $field['origin'])) { |
||
| 520 | return; |
||
| 521 | } |
||
| 522 | |||
| 523 | $value = $this->_getFieldValue($object, $field); |
||
| 524 | |||
| 525 | $type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*']; |
||
| 526 | |||
| 527 | if (is_array($value)) { |
||
| 528 | foreach ($value as $sub) { |
||
| 529 | /* Solr requires dates in the form 1995-12-31T23:59:59Z */ |
||
| 530 | if ($type == 'tdate') { |
||
| 531 | if (!$sub) { |
||
| 532 | continue; |
||
| 533 | } |
||
| 534 | $sub = gmdate('Y-m-d\TH:i:s\Z', strtotime($sub)); |
||
| 535 | } |
||
| 536 | |||
| 537 | /* Solr requires numbers to be valid if presented, not just empty */ |
||
| 538 | if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($sub)) { |
||
| 539 | continue; |
||
| 540 | } |
||
| 541 | |||
| 542 | $doc->addField($field['name'], $sub); |
||
| 543 | } |
||
| 544 | } else { |
||
| 545 | /* Solr requires dates in the form 1995-12-31T23:59:59Z */ |
||
| 546 | if ($type == 'tdate') { |
||
| 547 | if (!$value) { |
||
| 548 | return; |
||
| 549 | } |
||
| 550 | $value = gmdate('Y-m-d\TH:i:s\Z', strtotime($value)); |
||
| 551 | } |
||
| 552 | |||
| 553 | /* Solr requires numbers to be valid if presented, not just empty */ |
||
| 554 | if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($value)) { |
||
| 555 | return; |
||
| 556 | } |
||
| 557 | |||
| 558 | // Only index fields that are not null |
||
| 559 | if ($value !== null) { |
||
| 560 | $doc->setField($field['name'], $value); |
||
| 561 | } |
||
| 562 | } |
||
| 563 | } |
||
| 564 | |||
| 565 | protected function _addAs($object, $base, $options) |
||
| 597 | } |
||
| 598 | |||
| 599 | public function add($object) |
||
| 600 | { |
||
| 601 | $class = get_class($object); |
||
| 602 | $docs = array(); |
||
| 603 | |||
| 604 | foreach ($this->getClasses() as $searchclass => $options) { |
||
| 605 | if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) { |
||
| 606 | $base = DataObject::getSchema()->baseDataClass($searchclass); |
||
| 607 | $docs[] = $this->_addAs($object, $base, $options); |
||
| 608 | } |
||
| 609 | } |
||
| 610 | |||
| 611 | return $docs; |
||
| 612 | } |
||
| 613 | |||
| 614 | public function canAdd($class) |
||
| 623 | } |
||
| 624 | |||
| 625 | public function delete($base, $id, $state) |
||
| 626 | { |
||
| 627 | $documentID = $this->getDocumentIDForState($base, $id, $state); |
||
| 628 | |||
| 629 | try { |
||
| 630 | $this->getService()->deleteById($documentID); |
||
| 631 | } catch (Exception $e) { |
||
| 632 | static::warn($e); |
||
| 633 | return false; |
||
| 634 | } |
||
| 635 | |||
| 636 | return true; |
||
| 637 | } |
||
| 638 | |||
| 639 | /** |
||
| 640 | * Clear all records which do not match the given classname whitelist. |
||
| 641 | * |
||
| 642 | * Can also be used to trim an index when reducing to a narrower set of classes. |
||
| 643 | * |
||
| 644 | * Ignores current state / variant. |
||
| 645 | * |
||
| 646 | * @param array $classes List of non-obsolete classes in the same format as SolrIndex::getClasses() |
||
| 647 | * @return bool Flag if successful |
||
| 648 | * @throws \Apache_Solr_HttpTransportException |
||
| 649 | */ |
||
| 650 | public function clearObsoleteClasses($classes) |
||
| 651 | { |
||
| 652 | if (empty($classes)) { |
||
| 653 | return false; |
||
| 654 | } |
||
| 655 | |||
| 656 | // Delete all records which do not match the necessary classname rules |
||
| 657 | $conditions = array(); |
||
| 658 | foreach ($classes as $class => $options) { |
||
| 659 | if ($options['include_children']) { |
||
| 660 | $conditions[] = "ClassHierarchy:{$class}"; |
||
| 661 | } else { |
||
| 662 | $conditions[] = "ClassName:{$class}"; |
||
| 663 | } |
||
| 664 | } |
||
| 665 | |||
| 666 | // Delete records which don't match any of these conditions in this index |
||
| 667 | $deleteQuery = "-(" . implode(' ', $conditions) . ")"; |
||
| 668 | $this |
||
| 669 | ->getService() |
||
| 670 | ->deleteByQuery($deleteQuery); |
||
| 671 | return true; |
||
| 672 | } |
||
| 673 | |||
| 674 | public function commit() |
||
| 675 | { |
||
| 676 | try { |
||
| 677 | $this->getService()->commit(false, false, false); |
||
| 678 | } catch (Exception $e) { |
||
| 679 | static::warn($e); |
||
| 680 | return false; |
||
| 681 | } |
||
| 682 | |||
| 683 | return true; |
||
| 684 | } |
||
| 685 | |||
| 686 | /** |
||
| 687 | * @param SearchQuery $query |
||
| 688 | * @param integer $offset |
||
| 689 | * @param integer $limit |
||
| 690 | * @param array $params Extra request parameters passed through to Solr |
||
| 691 | * @return ArrayData Map with the following keys: |
||
| 692 | * - 'Matches': ArrayList of the matched object instances |
||
| 693 | * @throws \Apache_Solr_HttpTransportException |
||
| 694 | * @throws \Apache_Solr_InvalidArgumentException |
||
| 695 | */ |
||
| 696 | public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) |
||
| 697 | { |
||
| 698 | $service = $this->getService(); |
||
| 699 | $this->applySearchVariants($query); |
||
| 700 | |||
| 701 | $q = array(); // Query |
||
| 702 | $fq = array(); // Filter query |
||
| 703 | $qf = array(); // Query fields |
||
| 704 | $hlq = array(); // Highlight query |
||
| 705 | |||
| 706 | // Build the search itself |
||
| 707 | $q = $this->getQueryComponent($query, $hlq); |
||
| 708 | |||
| 709 | // If using boosting, set the clean term separately for highlighting. |
||
| 710 | // See https://issues.apache.org/jira/browse/SOLR-2632 |
||
| 711 | if (array_key_exists('hl', $params) && !array_key_exists('hl.q', $params)) { |
||
| 712 | $params['hl.q'] = implode(' ', $hlq); |
||
| 713 | } |
||
| 714 | |||
| 715 | // Filter by class if requested |
||
| 716 | $classq = array(); |
||
| 717 | foreach ($query->classes as $class) { |
||
| 718 | if (!empty($class['includeSubclasses'])) { |
||
| 719 | $classq[] = 'ClassHierarchy:' . $this->sanitiseClassName($class['class']); |
||
| 720 | } else { |
||
| 721 | $classq[] = 'ClassName:' . $this->sanitiseClassName($class['class']); |
||
| 722 | } |
||
| 723 | } |
||
| 724 | if ($classq) { |
||
| 725 | $fq[] = '+(' . implode(' ', $classq) . ')'; |
||
| 726 | } |
||
| 727 | |||
| 728 | // Filter by filters |
||
| 729 | $fq = array_merge($fq, $this->getFiltersComponent($query)); |
||
| 730 | |||
| 731 | // Prepare query fields unless specified explicitly |
||
| 732 | if (isset($params['qf'])) { |
||
| 733 | $qf = $params['qf']; |
||
| 734 | } else { |
||
| 735 | $qf = $this->getQueryFields(); |
||
| 736 | } |
||
| 737 | if (is_array($qf)) { |
||
| 738 | $qf = implode(' ', $qf); |
||
| 739 | } |
||
| 740 | if ($qf) { |
||
| 741 | $params['qf'] = $qf; |
||
| 742 | } |
||
| 743 | |||
| 744 | if (!headers_sent() && Director::isDev()) { |
||
| 745 | if ($q) { |
||
| 746 | header('X-Query: ' . implode(' ', $q)); |
||
| 747 | } |
||
| 748 | if ($fq) { |
||
| 749 | header('X-Filters: "' . implode('", "', $fq) . '"'); |
||
| 750 | } |
||
| 751 | if ($qf) { |
||
| 752 | header('X-QueryFields: ' . $qf); |
||
| 753 | } |
||
| 754 | } |
||
| 755 | |||
| 756 | if ($offset == -1) { |
||
| 757 | $offset = $query->start; |
||
| 758 | } |
||
| 759 | if ($limit == -1) { |
||
| 760 | $limit = $query->limit; |
||
| 761 | } |
||
| 762 | if ($limit == -1) { |
||
| 763 | $limit = SearchQuery::$default_page_size; |
||
| 764 | } |
||
| 765 | |||
| 766 | $params = array_merge($params, array('fq' => implode(' ', $fq))); |
||
| 767 | |||
| 768 | $res = $service->search( |
||
| 769 | $q ? implode(' ', $q) : '*:*', |
||
| 770 | $offset, |
||
| 771 | $limit, |
||
| 772 | $params, |
||
| 773 | \Apache_Solr_Service::METHOD_POST |
||
| 774 | ); |
||
| 775 | |||
| 776 | $results = new ArrayList(); |
||
| 777 | if ($res->getHttpStatus() >= 200 && $res->getHttpStatus() < 300) { |
||
| 778 | foreach ($res->response->docs as $doc) { |
||
| 779 | $result = DataObject::get_by_id($doc->ClassName, $doc->ID); |
||
| 780 | if ($result) { |
||
| 781 | $results->push($result); |
||
| 782 | |||
| 783 | // Add highlighting (optional) |
||
| 784 | $docId = $doc->_documentid; |
||
| 785 | if ($res->highlighting && $res->highlighting->$docId) { |
||
| 786 | // TODO Create decorator class for search results rather than adding arbitrary object properties |
||
| 787 | // TODO Allow specifying highlighted field, and lazy loading |
||
| 788 | // in case the search API needs another query (similar to SphinxSearchable->buildExcerpt()). |
||
| 789 | $combinedHighlights = array(); |
||
| 790 | foreach ($res->highlighting->$docId as $field => $highlights) { |
||
| 791 | $combinedHighlights = array_merge($combinedHighlights, $highlights); |
||
| 792 | } |
||
| 793 | |||
| 794 | // Remove entity-encoded U+FFFD replacement character. It signifies non-displayable characters, |
||
| 795 | // and shows up as an encoding error in browsers. |
||
| 796 | $result->Excerpt = DBField::create_field( |
||
| 797 | 'HTMLText', |
||
| 798 | str_replace( |
||
| 799 | '�', |
||
| 800 | '', |
||
| 801 | implode(' ... ', $combinedHighlights) |
||
| 802 | ) |
||
| 803 | ); |
||
| 804 | } |
||
| 805 | } |
||
| 806 | } |
||
| 807 | $numFound = $res->response->numFound; |
||
| 808 | } else { |
||
| 809 | $numFound = 0; |
||
| 810 | } |
||
| 811 | |||
| 812 | $ret = array(); |
||
| 813 | $ret['Matches'] = new PaginatedList($results); |
||
| 814 | $ret['Matches']->setLimitItems(false); |
||
| 815 | // Tell PaginatedList how many results there are |
||
| 816 | $ret['Matches']->setTotalItems($numFound); |
||
| 817 | // Results for current page start at $offset |
||
| 818 | $ret['Matches']->setPageStart($offset); |
||
| 819 | // Results per page |
||
| 820 | $ret['Matches']->setPageLength($limit); |
||
| 821 | |||
| 822 | // Include spellcheck and suggestion data. Requires spellcheck=true in $params |
||
| 823 | if (isset($res->spellcheck)) { |
||
| 824 | // Expose all spellcheck data, for custom handling. |
||
| 825 | $ret['Spellcheck'] = $res->spellcheck; |
||
| 826 | |||
| 827 | // Suggestions. Requires spellcheck.collate=true in $params |
||
| 828 | if (isset($res->spellcheck->suggestions->collation)) { |
||
| 829 | // Extract string suggestion |
||
| 830 | $suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation); |
||
| 831 | |||
| 832 | // The collation, including advanced query params (e.g. +), suitable for making another query |
||
| 833 | // programmatically. |
||
| 834 | $ret['Suggestion'] = $suggestion; |
||
| 835 | |||
| 836 | // A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display. |
||
| 837 | $ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion); |
||
| 838 | |||
| 839 | // A string suitable for appending to an href as a query string. |
||
| 840 | // For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> |
||
| 841 | $ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion); |
||
| 842 | } |
||
| 843 | } |
||
| 844 | |||
| 845 | $ret = new ArrayData($ret); |
||
| 846 | |||
| 847 | // Enable extensions to add extra data from the response into |
||
| 848 | // the returned results set. |
||
| 849 | $this->extend('updateSearchResults', $ret, $res); |
||
| 850 | |||
| 851 | return $ret; |
||
| 852 | } |
||
| 853 | |||
| 854 | /** |
||
| 855 | * With a common set of variants that are relevant to at least one class in the list (from either the query or |
||
| 856 | * the current index), allow them to alter the query to add their variant column conditions. |
||
| 857 | * |
||
| 858 | * @param SearchQuery $query |
||
| 859 | */ |
||
| 860 | protected function applySearchVariants(SearchQuery $query) |
||
| 861 | { |
||
| 862 | $classes = count($query->classes) ? $query->classes : $this->getClasses(); |
||
| 863 | |||
| 864 | /** @var SearchVariant_Caller $variantCaller */ |
||
| 865 | $variantCaller = SearchVariant::withCommon($classes); |
||
| 866 | $variantCaller->call('alterQuery', $query, $this); |
||
| 867 | } |
||
| 868 | |||
| 869 | /** |
||
| 870 | * Solr requires namespaced classes to have double escaped backslashes |
||
| 871 | * |
||
| 872 | * @param string $className E.g. My\Object\Here |
||
| 873 | * @param string $replaceWith The replacement character(s) to use |
||
| 874 | * @return string E.g. My\\Object\\Here |
||
| 875 | */ |
||
| 876 | public function sanitiseClassName($className, $replaceWith = '\\\\') |
||
| 877 | { |
||
| 878 | return str_replace('\\', $replaceWith, $className); |
||
| 879 | } |
||
| 880 | |||
| 881 | /** |
||
| 882 | * Get the query (q) component for this search |
||
| 883 | * |
||
| 884 | * @param SearchQuery $searchQuery |
||
| 885 | * @param array &$hlq Highlight query returned by reference |
||
| 886 | * @return array |
||
| 887 | */ |
||
| 888 | protected function getQueryComponent(SearchQuery $searchQuery, &$hlq = array()) |
||
| 889 | { |
||
| 890 | $q = array(); |
||
| 891 | foreach ($searchQuery->search as $search) { |
||
| 892 | $text = $search['text']; |
||
| 893 | preg_match_all('/"[^"]*"|\S+/', $text, $parts); |
||
| 894 | |||
| 895 | $fuzzy = $search['fuzzy'] ? '~' : ''; |
||
| 896 | |||
| 897 | foreach ($parts[0] as $part) { |
||
| 898 | $fields = (isset($search['fields'])) ? $search['fields'] : array(); |
||
| 899 | if (isset($search['boost'])) { |
||
| 900 | $fields = array_merge($fields, array_keys($search['boost'])); |
||
| 901 | } |
||
| 902 | if ($fields) { |
||
| 903 | $searchq = array(); |
||
| 904 | foreach ($fields as $field) { |
||
| 905 | // Escape namespace separators in class names |
||
| 906 | $field = $this->sanitiseClassName($field); |
||
| 907 | |||
| 908 | $boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : ''; |
||
| 909 | $searchq[] = "{$field}:" . $part . $fuzzy . $boost; |
||
| 910 | } |
||
| 911 | $q[] = '+(' . implode(' OR ', $searchq) . ')'; |
||
| 912 | } else { |
||
| 913 | $q[] = '+' . $part . $fuzzy; |
||
| 914 | } |
||
| 915 | $hlq[] = $part; |
||
| 916 | } |
||
| 917 | } |
||
| 918 | return $q; |
||
| 919 | } |
||
| 920 | |||
| 921 | /** |
||
| 922 | * Parse all require constraints for inclusion in a filter query |
||
| 923 | * |
||
| 924 | * @param SearchQuery $searchQuery |
||
| 925 | * @return array List of parsed string values for each require |
||
| 926 | */ |
||
| 927 | protected function getRequireFiltersComponent(SearchQuery $searchQuery) |
||
| 928 | { |
||
| 929 | $fq = array(); |
||
| 930 | foreach ($searchQuery->require as $field => $values) { |
||
| 931 | $requireq = array(); |
||
| 932 | |||
| 933 | foreach ($values as $value) { |
||
| 934 | if ($value === SearchQuery::$missing) { |
||
| 935 | $requireq[] = "(*:* -{$field}:[* TO *])"; |
||
| 936 | } elseif ($value === SearchQuery::$present) { |
||
| 937 | $requireq[] = "{$field}:[* TO *]"; |
||
| 938 | } elseif ($value instanceof SearchQuery_Range) { |
||
| 939 | $start = $value->start; |
||
| 940 | if ($start === null) { |
||
| 941 | $start = '*'; |
||
| 942 | } |
||
| 943 | $end = $value->end; |
||
| 944 | if ($end === null) { |
||
| 945 | $end = '*'; |
||
| 946 | } |
||
| 947 | $requireq[] = "$field:[$start TO $end]"; |
||
| 948 | } else { |
||
| 949 | $requireq[] = $field . ':"' . $value . '"'; |
||
| 950 | } |
||
| 951 | } |
||
| 952 | |||
| 953 | $fq[] = '+(' . implode(' ', $requireq) . ')'; |
||
| 954 | } |
||
| 955 | return $fq; |
||
| 956 | } |
||
| 957 | |||
| 958 | /** |
||
| 959 | * Parse all exclude constraints for inclusion in a filter query |
||
| 960 | * |
||
| 961 | * @param SearchQuery $searchQuery |
||
| 962 | * @return array List of parsed string values for each exclusion |
||
| 963 | */ |
||
| 964 | protected function getExcludeFiltersComponent(SearchQuery $searchQuery) |
||
| 965 | { |
||
| 966 | $fq = array(); |
||
| 967 | foreach ($searchQuery->exclude as $field => $values) { |
||
| 968 | // Handle namespaced class names |
||
| 969 | $field = $this->sanitiseClassName($field); |
||
| 970 | |||
| 971 | $excludeq = []; |
||
| 972 | $missing = false; |
||
| 973 | |||
| 974 | foreach ($values as $value) { |
||
| 975 | if ($value === SearchQuery::$missing) { |
||
| 976 | $missing = true; |
||
| 977 | } elseif ($value === SearchQuery::$present) { |
||
| 978 | $excludeq[] = "{$field}:[* TO *]"; |
||
| 979 | } elseif ($value instanceof SearchQuery_Range) { |
||
| 980 | $start = $value->start; |
||
| 981 | if ($start === null) { |
||
| 982 | $start = '*'; |
||
| 983 | } |
||
| 984 | $end = $value->end; |
||
| 985 | if ($end === null) { |
||
| 986 | $end = '*'; |
||
| 987 | } |
||
| 988 | $excludeq[] = "$field:[$start TO $end]"; |
||
| 989 | } else { |
||
| 990 | $excludeq[] = $field . ':"' . $value . '"'; |
||
| 991 | } |
||
| 992 | } |
||
| 993 | |||
| 994 | $fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-(' . implode(' ', $excludeq) . ')'; |
||
| 995 | } |
||
| 996 | return $fq; |
||
| 997 | } |
||
| 998 | |||
| 999 | /** |
||
| 1000 | * @param SearchQuery $searchQuery |
||
| 1001 | * @return string |
||
| 1002 | * @throws \Exception |
||
| 1003 | */ |
||
| 1004 | protected function getCriteriaComponent(SearchQuery $searchQuery) |
||
| 1005 | { |
||
| 1006 | if (count($searchQuery->getCriteria()) === 0) { |
||
| 1007 | return null; |
||
| 1008 | } |
||
| 1009 | |||
| 1010 | if ($searchQuery->getAdapter() === null) { |
||
| 1011 | throw new \Exception('SearchQuery does not have a SearchAdapter applied'); |
||
| 1012 | } |
||
| 1013 | |||
| 1014 | // Need to start with a positive conjunction. |
||
| 1015 | $ps = $searchQuery->getAdapter()->getPrependToCriteriaComponent(); |
||
| 1016 | |||
| 1017 | foreach ($searchQuery->getCriteria() as $clause) { |
||
| 1018 | $clause->setAdapter($searchQuery->getAdapter()); |
||
| 1019 | $clause->appendPreparedStatementTo($ps); |
||
| 1020 | } |
||
| 1021 | |||
| 1022 | // Need to start with a positive conjunction. |
||
| 1023 | $ps .= $searchQuery->getAdapter()->getAppendToCriteriaComponent(); |
||
| 1024 | |||
| 1025 | // Returned as an array because that's how `getFiltersComponent` expects it. |
||
| 1026 | return $ps; |
||
| 1027 | } |
||
| 1028 | |||
| 1029 | /** |
||
| 1030 | * Get all filter conditions for this search |
||
| 1031 | * |
||
| 1032 | * @param SearchQuery $searchQuery |
||
| 1033 | * @return array |
||
| 1034 | * @throws \Exception |
||
| 1035 | */ |
||
| 1036 | public function getFiltersComponent(SearchQuery $searchQuery) |
||
| 1037 | { |
||
| 1038 | $criteriaComponent = $this->getCriteriaComponent($searchQuery); |
||
| 1039 | |||
| 1040 | $components = array_merge( |
||
| 1041 | $this->getRequireFiltersComponent($searchQuery), |
||
| 1042 | $this->getExcludeFiltersComponent($searchQuery) |
||
| 1043 | ); |
||
| 1044 | |||
| 1045 | if ($criteriaComponent !== null) { |
||
| 1046 | $components[] = $criteriaComponent; |
||
| 1047 | } |
||
| 1048 | |||
| 1049 | return $components; |
||
| 1050 | } |
||
| 1051 | |||
| 1052 | protected $service; |
||
| 1053 | |||
| 1054 | /** |
||
| 1055 | * @return SolrService |
||
| 1056 | */ |
||
| 1057 | public function getService() |
||
| 1058 | { |
||
| 1059 | if (!$this->service) { |
||
| 1060 | $this->service = Solr::service(get_class($this)); |
||
| 1061 | } |
||
| 1062 | return $this->service; |
||
| 1063 | } |
||
| 1064 | |||
| 1065 | public function setService(SolrService $service) |
||
| 1066 | { |
||
| 1067 | $this->service = $service; |
||
| 1068 | return $this; |
||
| 1069 | } |
||
| 1070 | |||
| 1071 | /** |
||
| 1072 | * Upload config for this index to the given store |
||
| 1073 | * |
||
| 1074 | * @param SolrConfigStore $store |
||
| 1075 | */ |
||
| 1076 | public function uploadConfig($store) |
||
| 1089 | } |
||
| 1090 | } |
||
| 1091 | } |
||
| 1092 | } |
||
| 1093 |