Completed
Pull Request — master (#173)
by Robbie
01:58
created

SolrIndex::sanitiseClassName()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
1
<?php
0 ignored issues
show
Coding Style Compatibility introduced by
For compatibility and reusability of your code, PSR1 recommends that a file should introduce either new symbols (like classes, functions, etc.) or have side-effects (like outputting something, or including other files), but not both at the same time. The first symbol is defined on line 20 and the first side effect is on line 5.

The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.

The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.

To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.

Loading history...
2
3
namespace SilverStripe\FullTextSearch\Solr;
4
5
Solr::include_client_api();
6
7
use SilverStripe\Control\Director;
8
use SilverStripe\FulltextSearch\Search\Indexes\SearchIndex;
9
use SilverStripe\FullTextSearch\Solr\Services\SolrService;
10
use SilverStripe\FulltextSearch\Search\Queries\SearchQuery;
11
use SilverStripe\FullTextSearch\Search\Queries\SearchQuery_Range;
12
use SilverStripe\FullTextSearch\Search\Variants\SearchVariant;
13
use SilverStripe\FulltextSearch\Search\SearchIntrospection;
14
use SilverStripe\ORM\ArrayList;
15
use SilverStripe\ORM\DataObject;
16
use SilverStripe\ORM\FieldType\DBField;
17
use SilverStripe\ORM\PaginatedList;
18
use SilverStripe\View\ArrayData;
19
20
abstract class SolrIndex extends SearchIndex
21
{
22
    public static $fulltextTypeMap = array(
23
        '*' => 'text',
24
        'HTMLVarchar' => 'htmltext',
25
        'HTMLText' => 'htmltext'
26
    );
27
28
    public static $filterTypeMap = array(
29
        '*' => 'string',
30
        'Boolean' => 'boolean',
31
        'Date' => 'tdate',
32
        'SSDatetime' => 'tdate',
33
        'SS_Datetime' => 'tdate',
34
        'ForeignKey' => 'tint',
35
        'Int' => 'tint',
36
        'Float' => 'tfloat',
37
        'Double' => 'tdouble'
38
    );
39
40
    public static $sortTypeMap = array();
41
42
    protected $analyzerFields = array();
43
44
    protected $copyFields = array();
45
46
    protected $extrasPath = null;
47
48
    protected $templatesPath = null;
49
50
    private static $casting = [
0 ignored issues
show
Comprehensibility introduced by
Consider using a different property name as you override a private property of the parent class.
Loading history...
Unused Code introduced by
The property $casting is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
51
        'FieldDefinitions' => 'HTMLText',
52
        'CopyFieldDefinitions' => 'HTMLText'
53
    ];
54
55
    /**
56
     * List of boosted fields
57
     *
58
     * @var array
59
     */
60
    protected $boostedFields = array();
61
62
    /**
63
     * Name of default field
64
     *
65
     * @var string
66
     * @config
67
     */
68
    private static $default_field = '_text';
0 ignored issues
show
Unused Code introduced by
The property $default_field is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
69
70
    /**
71
     * List of copy fields all fulltext fields should be copied into.
72
     * This will fallback to default_field if not specified
73
     *
74
     * @var array
75
     */
76
    private static $copy_fields = array();
0 ignored issues
show
Unused Code introduced by
The property $copy_fields is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
77
78
    /**
79
     * @return String Absolute path to the folder containing
80
     * templates which are used for generating the schema and field definitions.
81
     */
82
    public function getTemplatesPath()
83
    {
84
        $globalOptions = Solr::solr_options();
85
        $path = $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath'];
86
        return rtrim($path, '/');
87
    }
88
89
    /**
90
     * @return String Absolute path to the configuration default files,
91
     * e.g. solrconfig.xml.
92
     */
93
    public function getExtrasPath()
94
    {
95
        $globalOptions = Solr::solr_options();
96
        return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath'];
97
    }
98
99
    public function generateSchema()
100
    {
101
        return $this->renderWith($this->getTemplatesPath() . '/schema.ss');
102
    }
103
104
    /**
105
     * Helper for returning the correct index name. Supports prefixing and
106
     * suffixing
107
     *
108
     * @return string
109
     */
110
    public function getIndexName()
111
    {
112
        $name = get_class($this);
113
114
        if (defined('SS_SOLR_INDEX_PREFIX')) {
115
            $name = SS_SOLR_INDEX_PREFIX . ''. $name;
116
        }
117
118
        if (defined('SS_SOLR_INDEX_SUFFIX')) {
119
            $name = $name . '' . SS_SOLR_INDEX_SUFFIX;
120
        }
121
122
        return $name;
123
    }
124
125
    public function getTypes()
126
    {
127
        return $this->renderWith($this->getTemplatesPath() . '/types.ss');
128
    }
129
130
    /**
131
     * Index-time analyzer which is applied to a specific field.
132
     * Can be used to remove HTML tags, apply stemming, etc.
133
     *
134
     * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory
135
     *
136
     * @param String $field
137
     * @param String $type
138
     * @param Array $params Parameters for the analyzer, usually at least a "class"
139
     */
140
    public function addAnalyzer($field, $type, $params)
141
    {
142
        $fullFields = $this->fieldData($field);
143
        if ($fullFields) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $fullFields of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
144
            foreach ($fullFields as $fullField => $spec) {
145
                if (!isset($this->analyzerFields[$fullField])) {
146
                    $this->analyzerFields[$fullField] = array();
147
                }
148
                $this->analyzerFields[$fullField][$type] = $params;
149
            }
150
        }
151
    }
152
153
    /**
154
     * Get the default text field, normally '_text'
155
     *
156
     * @return string
157
     */
158
    public function getDefaultField()
159
    {
160
        return $this->config()->default_field;
161
    }
162
163
    /**
164
     * Get list of fields each text field should be copied into.
165
     * This will fallback to the default field if omitted.
166
     *
167
     * @return array
168
     */
169
    protected function getCopyDestinations()
170
    {
171
        $copyFields = $this->config()->copy_fields;
172
        if ($copyFields) {
173
            return $copyFields;
174
        }
175
        // Fallback to default field
176
        $df = $this->getDefaultField();
177
        return array($df);
178
    }
179
180
    public function getFieldDefinitions()
181
    {
182
        $xml = array();
183
        $stored = $this->getStoredDefault();
184
185
        $xml[] = "";
186
187
        // Add the hardcoded field definitions
188
189
        $xml[] = "<field name='_documentid' type='string' indexed='true' stored='true' required='true' />";
190
191
        $xml[] = "<field name='ID' type='tint' indexed='true' stored='true' required='true' />";
192
        $xml[] = "<field name='ClassName' type='string' indexed='true' stored='true' required='true' />";
193
        $xml[] = "<field name='ClassHierarchy' type='string' indexed='true' stored='true' required='true' multiValued='true' />";
194
195
        // Add the fulltext collation field
196
197
        $df = $this->getDefaultField();
198
        $xml[] = "<field name='{$df}' type='htmltext' indexed='true' stored='{$stored}' multiValued='true' />" ;
199
200
        // Add the user-specified fields
201
202
        foreach ($this->fulltextFields as $name => $field) {
203
            $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap);
204
        }
205
206 View Code Duplication
        foreach ($this->filterFields as $name => $field) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
207
            if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') {
208
                continue;
209
            }
210
            $xml[] = $this->getFieldDefinition($name, $field);
211
        }
212
213 View Code Duplication
        foreach ($this->sortFields as $name => $field) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
214
            if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') {
215
                continue;
216
            }
217
            $xml[] = $this->getFieldDefinition($name, $field);
218
        }
219
220
        return implode("\n\t\t", $xml);
221
    }
222
223
    /**
224
     * Extract first suggestion text from collated values
225
     *
226
     * @param mixed $collation
227
     * @return string
228
     */
229
    protected function getCollatedSuggestion($collation = '')
230
    {
231
        if (is_string($collation)) {
232
            return $collation;
233
        }
234
        if (is_object($collation)) {
235
            if (isset($collation->misspellingsAndCorrections)) {
236
                foreach ($collation->misspellingsAndCorrections as $key => $value) {
237
                    return $value;
238
                }
239
            }
240
        }
241
        return '';
242
    }
243
244
    /**
245
     * Extract a human friendly spelling suggestion from a Solr spellcheck collation string.
246
     * @param String $collation
247
     * @return String
248
     */
249
    protected function getNiceSuggestion($collation = '')
250
    {
251
        $collationParts = explode(' ', $collation);
252
253
        // Remove advanced query params from the beginning of each collation part.
254
        foreach ($collationParts as $key => &$part) {
255
            $part = ltrim($part, '+');
256
        }
257
258
        return implode(' ', $collationParts);
259
    }
260
261
    /**
262
     * Extract a query string from a Solr spellcheck collation string.
263
     * Useful for constructing 'Did you mean?' links, for example:
264
     * <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
265
     * @param String $collation
266
     * @return String
267
     */
268
    protected function getSuggestionQueryString($collation = '')
269
    {
270
        return str_replace(' ', '+', $this->getNiceSuggestion($collation));
271
    }
272
273
    /**
274
     * Add a field that should be stored
275
     *
276
     * @param string $field The field to add
277
     * @param string $forceType The type to force this field as (required in some cases, when not
278
     * detectable from metadata)
279
     * @param array $extraOptions Dependent on search implementation
280
     */
281
    public function addStoredField($field, $forceType = null, $extraOptions = array())
282
    {
283
        $options = array_merge($extraOptions, array('stored' => 'true'));
284
        $this->addFulltextField($field, $forceType, $options);
285
    }
286
287
    /**
288
     * Add a fulltext field with a boosted value
289
     *
290
     * @param string $field The field to add
291
     * @param string $forceType The type to force this field as (required in some cases, when not
292
     * detectable from metadata)
293
     * @param array $extraOptions Dependent on search implementation
294
     * @param float $boost Numeric boosting value (defaults to 2)
295
     */
296
    public function addBoostedField($field, $forceType = null, $extraOptions = array(), $boost = 2)
297
    {
298
        $options = array_merge($extraOptions, array('boost' => $boost));
299
        $this->addFulltextField($field, $forceType, $options);
300
    }
301
302
303
    public function fieldData($field, $forceType = null, $extraOptions = array())
304
    {
305
        // Ensure that 'boost' is recorded here without being captured by solr
306
        $boost = null;
307
        if (array_key_exists('boost', $extraOptions)) {
308
            $boost = $extraOptions['boost'];
309
            unset($extraOptions['boost']);
310
        }
311
        $data = parent::fieldData($field, $forceType, $extraOptions);
312
313
        // Boost all fields with this name
314
        if (isset($boost)) {
315
            foreach ($data as $fieldName => $fieldInfo) {
316
                $this->boostedFields[$fieldName] = $boost;
317
            }
318
        }
319
        return $data;
320
    }
321
322
    /**
323
     * Set the default boosting level for a specific field.
324
     * Will control the default value for qf param (Query Fields), but will not
325
     * override a query-specific value.
326
     *
327
     * Fields must be added before having a field boosting specified
328
     *
329
     * @param string $field Full field key (Model_Field)
330
     * @param float|null $level Numeric boosting value. Set to null to clear boost
331
     */
332
    public function setFieldBoosting($field, $level)
333
    {
334
        if (!isset($this->fulltextFields[$field])) {
335
            throw new \InvalidArgumentException("No fulltext field $field exists on ".$this->getIndexName());
336
        }
337
        if ($level === null) {
338
            unset($this->boostedFields[$field]);
339
        } else {
340
            $this->boostedFields[$field] = $level;
341
        }
342
    }
343
344
    /**
345
     * Get all boosted fields
346
     *
347
     * @return array
348
     */
349
    public function getBoostedFields()
350
    {
351
        return $this->boostedFields;
352
    }
353
354
    /**
355
     * Determine the best default value for the 'qf' parameter
356
     *
357
     * @return array|null List of query fields, or null if not specified
358
     */
359
    public function getQueryFields()
360
    {
361
        // Not necessary to specify this unless boosting
362
        if (empty($this->boostedFields)) {
363
            return null;
364
        }
365
        $queryFields = array();
366
        foreach ($this->boostedFields as $fieldName => $boost) {
367
            $queryFields[] = $fieldName . '^' . $boost;
368
        }
369
370
        // If any fields are queried, we must always include the default field, otherwise it will be excluded
371
        $df = $this->getDefaultField();
372
        if ($queryFields && !isset($this->boostedFields[$df])) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $queryFields of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
373
            $queryFields[] = $df;
374
        }
375
376
        return $queryFields;
377
    }
378
379
    /**
380
     * Gets the default 'stored' value for fields in this index
381
     *
382
     * @return string A default value for the 'stored' field option, either 'true' or 'false'
383
     */
384
    protected function getStoredDefault()
385
    {
386
        return Director::isDev() ? 'true' : 'false';
387
    }
388
389
    /**
390
     * @param String $name
391
     * @param Array $spec
392
     * @param Array $typeMap
393
     * @return String XML
394
     */
395
    protected function getFieldDefinition($name, $spec, $typeMap = null)
396
    {
397
        if (!$typeMap) {
398
            $typeMap = self::$filterTypeMap;
399
        }
400
        $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : '';
401
        $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*'];
402
403
        $analyzerXml = '';
404
        if (isset($this->analyzerFields[$name])) {
405
            foreach ($this->analyzerFields[$name] as $analyzerType => $analyzerParams) {
406
                $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams);
407
            }
408
        }
409
410
        $fieldParams = array_merge(
411
            array(
412
                'name' => $name,
413
                'type' => $type,
414
                'indexed' => 'true',
415
                'stored' => $this->getStoredDefault(),
416
                'multiValued' => $multiValued
417
            ),
418
            isset($spec['extra_options']) ? $spec['extra_options'] : array()
419
        );
420
421
        return $this->toXmlTag(
422
            "field",
423
            $fieldParams,
0 ignored issues
show
Documentation introduced by
$fieldParams is of type array<string,?>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
424
            $analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null
425
        );
426
    }
427
428
    /**
429
     * Convert definition to XML tag
430
     *
431
     * @param String $tag
432
     * @param String $attrs Map of attributes
433
     * @param String $content Inner content
434
     * @return String XML tag
435
     */
436
    protected function toXmlTag($tag, $attrs, $content = null)
437
    {
438
        $xml = "<$tag ";
439
        if ($attrs) {
440
            $attrStrs = array();
441
            foreach ($attrs as $attrName => $attrVal) {
0 ignored issues
show
Bug introduced by
The expression $attrs of type string is not traversable.
Loading history...
442
                $attrStrs[] = "$attrName='$attrVal'";
443
            }
444
            $xml .= $attrStrs ? implode(' ', $attrStrs) : '';
445
        }
446
        $xml .= $content ? ">$content</$tag>" : '/>';
447
        return $xml;
448
    }
449
450
    /**
451
     * @param String $source Composite field name (<class>_<fieldname>)
452
     * @param String $dest
453
     */
454
    public function addCopyField($source, $dest, $extraOptions = array())
455
    {
456
        if (!isset($this->copyFields[$source])) {
457
            $this->copyFields[$source] = array();
458
        }
459
        $this->copyFields[$source][] = array_merge(
460
            array('source' => $source, 'dest' => $dest),
461
            $extraOptions
462
        );
463
    }
464
465
    /**
466
     * Generate XML for copy field definitions
467
     *
468
     * @return string
469
     */
470
    public function getCopyFieldDefinitions()
471
    {
472
        $xml = array();
473
474
        // Default copy fields
475
        foreach ($this->getCopyDestinations() as $copyTo) {
476
            foreach ($this->fulltextFields as $name => $field) {
477
                $xml[] = "<copyField source='{$name}' dest='{$copyTo}' />";
478
            }
479
        }
480
481
        // Explicit copy fields
482
        foreach ($this->copyFields as $source => $fields) {
483
            foreach ($fields as $fieldAttrs) {
484
                $xml[] = $this->toXmlTag('copyField', $fieldAttrs);
485
            }
486
        }
487
488
        return implode("\n\t", $xml);
489
    }
490
491
    /**
492
     * Determine if the given object is one of the given type
493
     *
494
     * @param string $class
495
     * @param array|string $base Class or list of base classes
496
     * @return bool
497
     */
498
    protected function classIs($class, $base)
499
    {
500
        if (is_array($base)) {
501
            foreach ($base as $nextBase) {
502
                if ($this->classIs($class, $nextBase)) {
503
                    return true;
504
                }
505
            }
506
            return false;
507
        }
508
509
        // Check single origin
510
        return $class === $base || is_subclass_of($class, $base);
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of might return inconsistent results on some PHP versions if $base can be an interface. If so, you could instead use ReflectionClass::implementsInterface.
Loading history...
511
    }
512
513
    protected function _addField($doc, $object, $field)
514
    {
515
        $class = get_class($object);
516
        if (!$this->classIs($class, $field['origin'])) {
517
            return;
518
        }
519
520
        $value = $this->_getFieldValue($object, $field);
521
522
        $type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*'];
523
524
        if (is_array($value)) {
525
            foreach ($value as $sub) {
526
                /* Solr requires dates in the form 1995-12-31T23:59:59Z */
527 View Code Duplication
                if ($type == 'tdate') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
528
                    if (!$sub) {
529
                        continue;
530
                    }
531
                    $sub = gmdate('Y-m-d\TH:i:s\Z', strtotime($sub));
532
                }
533
534
                /* Solr requires numbers to be valid if presented, not just empty */
535 View Code Duplication
                if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($sub)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
536
                    continue;
537
                }
538
539
                $doc->addField($field['name'], $sub);
540
            }
541
        } else {
542
            /* Solr requires dates in the form 1995-12-31T23:59:59Z */
543 View Code Duplication
            if ($type == 'tdate') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
544
                if (!$value) {
545
                    return;
546
                }
547
                $value = gmdate('Y-m-d\TH:i:s\Z', strtotime($value));
548
            }
549
550
            /* Solr requires numbers to be valid if presented, not just empty */
551 View Code Duplication
            if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($value)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
552
                return;
553
            }
554
555
            // Only index fields that are not null
556
            if ($value !== null) {
557
                $doc->setField($field['name'], $value);
558
            }
559
        }
560
    }
561
562
    protected function _addAs($object, $base, $options)
563
    {
564
        $includeSubs = $options['include_children'];
565
566
        $doc = new \Apache_Solr_Document();
567
568
        // Always present fields
569
570
        $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs));
571
        $doc->setField('ID', $object->ID);
572
        $doc->setField('ClassName', $object->ClassName);
573
574
        foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) {
575
            $doc->addField('ClassHierarchy', $class);
576
        }
577
578
        // Add the user-specified fields
579
580
        foreach ($this->getFieldsIterator() as $name => $field) {
581
            if ($field['base'] === $base || (is_array($field['base']) && in_array($base, $field['base']))) {
582
                $this->_addField($doc, $object, $field);
583
            }
584
        }
585
586
        try {
587
            $this->getService()->addDocument($doc);
588
        } catch (Exception $e) {
0 ignored issues
show
Bug introduced by
The class SilverStripe\FullTextSearch\Solr\Exception does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
589
            static::warn($e);
590
            return false;
591
        }
592
593
        return $doc;
594
    }
595
596
    public function add($object)
597
    {
598
        $class = get_class($object);
599
        $docs = array();
600
601
        foreach ($this->getClasses() as $searchclass => $options) {
602
            if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) {
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of might return inconsistent results on some PHP versions if $searchclass can be an interface. If so, you could instead use ReflectionClass::implementsInterface.
Loading history...
603
                $base = DataObject::getSchema()->baseDataClass($searchclass);
604
                $docs[] = $this->_addAs($object, $base, $options);
605
            }
606
        }
607
608
        return $docs;
609
    }
610
611
    public function canAdd($class)
612
    {
613
        foreach ($this->classes as $searchclass => $options) {
614
            if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) {
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of might return inconsistent results on some PHP versions if $searchclass can be an interface. If so, you could instead use ReflectionClass::implementsInterface.
Loading history...
615
                return true;
616
            }
617
        }
618
619
        return false;
620
    }
621
622
    public function delete($base, $id, $state)
623
    {
624
        $documentID = $this->getDocumentIDForState($base, $id, $state);
625
626
        try {
627
            $this->getService()->deleteById($documentID);
628
        } catch (Exception $e) {
0 ignored issues
show
Bug introduced by
The class SilverStripe\FullTextSearch\Solr\Exception does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
629
            static::warn($e);
630
            return false;
631
        }
632
    }
633
634
    /**
635
     * Clear all records which do not match the given classname whitelist.
636
     *
637
     * Can also be used to trim an index when reducing to a narrower set of classes.
638
     *
639
     * Ignores current state / variant.
640
     *
641
     * @param array $classes List of non-obsolete classes in the same format as SolrIndex::getClasses()
642
     * @return bool Flag if successful
643
     */
644
    public function clearObsoleteClasses($classes)
645
    {
646
        if (empty($classes)) {
647
            return false;
648
        }
649
650
        // Delete all records which do not match the necessary classname rules
651
        $conditions = array();
652
        foreach ($classes as $class => $options) {
653
            if ($options['include_children']) {
654
                $conditions[] = "ClassHierarchy:{$class}";
655
            } else {
656
                $conditions[] = "ClassName:{$class}";
657
            }
658
        }
659
660
        // Delete records which don't match any of these conditions in this index
661
        $deleteQuery = "-(" . implode(' ', $conditions) . ")";
662
        $this
663
            ->getService()
664
            ->deleteByQuery($deleteQuery);
665
        return true;
666
    }
667
668
    public function commit()
669
    {
670
        try {
671
            $this->getService()->commit(false, false, false);
672
        } catch (Exception $e) {
0 ignored issues
show
Bug introduced by
The class SilverStripe\FullTextSearch\Solr\Exception does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
673
            static::warn($e);
674
            return false;
675
        }
676
    }
677
678
    /**
679
     * @param SearchQuery $query
680
     * @param integer $offset
681
     * @param integer $limit
682
     * @param array $params Extra request parameters passed through to Solr
683
     * @return ArrayData Map with the following keys:
684
     *  - 'Matches': ArrayList of the matched object instances
685
     */
686
    public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array())
687
    {
688
        $service = $this->getService();
689
690
        $searchClass = count($query->classes) == 1
691
            ? $query->classes[0]['class']
692
            : null;
693
        SearchVariant::with($searchClass)
694
            ->call('alterQuery', $query, $this);
695
696
        $q = array(); // Query
0 ignored issues
show
Unused Code introduced by
$q is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
697
        $fq = array(); // Filter query
698
        $qf = array(); // Query fields
0 ignored issues
show
Unused Code introduced by
$qf is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
699
        $hlq = array(); // Highlight query
700
701
        // Build the search itself
702
        $q = $this->getQueryComponent($query, $hlq);
703
704
        // If using boosting, set the clean term separately for highlighting.
705
        // See https://issues.apache.org/jira/browse/SOLR-2632
706
        if (array_key_exists('hl', $params) && !array_key_exists('hl.q', $params)) {
707
            $params['hl.q'] = implode(' ', $hlq);
708
        }
709
710
        // Filter by class if requested
711
        $classq = array();
712
        foreach ($query->classes as $class) {
713
            if (!empty($class['includeSubclasses'])) {
714
                $classq[] = 'ClassHierarchy:' . $this->sanitiseClassName($class['class']);
715
            } else {
716
                $classq[] = 'ClassName:' . $this->sanitiseClassName($class['class']);
717
            }
718
        }
719
        if ($classq) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $classq of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
720
            $fq[] = '+('.implode(' ', $classq).')';
721
        }
722
723
        // Filter by filters
724
        $fq = array_merge($fq, $this->getFiltersComponent($query));
725
726
        // Prepare query fields unless specified explicitly
727
        if (isset($params['qf'])) {
728
            $qf = $params['qf'];
729
        } else {
730
            $qf = $this->getQueryFields();
731
        }
732
        if (is_array($qf)) {
733
            $qf = implode(' ', $qf);
734
        }
735
        if ($qf) {
736
            $params['qf'] = $qf;
737
        }
738
739
        if (!headers_sent() && Director::isDev()) {
740
            if ($q) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $q of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
741
                header('X-Query: '.implode(' ', $q));
742
            }
743
            if ($fq) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $fq of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
744
                header('X-Filters: "'.implode('", "', $fq).'"');
745
            }
746
            if ($qf) {
747
                header('X-QueryFields: '.$qf);
748
            }
749
        }
750
751
        if ($offset == -1) {
752
            $offset = $query->start;
753
        }
754
        if ($limit == -1) {
755
            $limit = $query->limit;
756
        }
757
        if ($limit == -1) {
758
            $limit = SearchQuery::$default_page_size;
759
        }
760
761
        $params = array_merge($params, array('fq' => implode(' ', $fq)));
762
763
        $res = $service->search(
764
            $q ? implode(' ', $q) : '*:*',
765
            $offset,
766
            $limit,
767
            $params,
768
            \Apache_Solr_Service::METHOD_POST
769
        );
770
771
        $results = new ArrayList();
772
        if ($res->getHttpStatus() >= 200 && $res->getHttpStatus() < 300) {
773
            foreach ($res->response->docs as $doc) {
774
                $result = DataObject::get_by_id($doc->ClassName, $doc->ID);
775
                if ($result) {
776
                    $results->push($result);
777
778
                    // Add highlighting (optional)
779
                    $docId = $doc->_documentid;
780
                    if ($res->highlighting && $res->highlighting->$docId) {
781
                        // TODO Create decorator class for search results rather than adding arbitrary object properties
782
                        // TODO Allow specifying highlighted field, and lazy loading
783
                        // in case the search API needs another query (similar to SphinxSearchable->buildExcerpt()).
784
                        $combinedHighlights = array();
785
                        foreach ($res->highlighting->$docId as $field => $highlights) {
786
                            $combinedHighlights = array_merge($combinedHighlights, $highlights);
787
                        }
788
789
                        // Remove entity-encoded U+FFFD replacement character. It signifies non-displayable characters,
790
                        // and shows up as an encoding error in browsers.
791
                        $result->Excerpt = DBField::create_field(
792
                            'HTMLText',
793
                            str_replace(
794
                                '&#65533;',
795
                                '',
796
                                implode(' ... ', $combinedHighlights)
797
                            )
798
                        );
799
                    }
800
                }
801
            }
802
            $numFound = $res->response->numFound;
803
        } else {
804
            $numFound = 0;
805
        }
806
807
        $ret = array();
808
        $ret['Matches'] = new PaginatedList($results);
809
        $ret['Matches']->setLimitItems(false);
810
        // Tell PaginatedList how many results there are
811
        $ret['Matches']->setTotalItems($numFound);
812
        // Results for current page start at $offset
813
        $ret['Matches']->setPageStart($offset);
814
        // Results per page
815
        $ret['Matches']->setPageLength($limit);
816
817
        // Include spellcheck and suggestion data. Requires spellcheck=true in $params
818
        if (isset($res->spellcheck)) {
819
            // Expose all spellcheck data, for custom handling.
820
            $ret['Spellcheck'] = $res->spellcheck;
821
822
            // Suggestions. Requires spellcheck.collate=true in $params
823
            if (isset($res->spellcheck->suggestions->collation)) {
824
                // Extract string suggestion
825
                $suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation);
826
827
                // The collation, including advanced query params (e.g. +), suitable for making another query programmatically.
828
                $ret['Suggestion'] = $suggestion;
829
830
                // A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display.
831
                $ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion);
832
833
                // A string suitable for appending to an href as a query string.
834
                // For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
835
                $ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion);
836
            }
837
        }
838
839
        $ret = new ArrayData($ret);
840
841
        // Enable extensions to add extra data from the response into
842
        // the returned results set.
843
        $this->extend('updateSearchResults', $ret, $res);
844
845
        return $ret;
846
    }
847
848
    /**
849
     * Solr requires namespaced classes to have double escaped backslashes
850
     *
851
     * @param  string $className E.g. My\Object\Here
852
     * @return string            E.g. My\\Object\\Here
853
     */
854
    public function sanitiseClassName($className)
855
    {
856
        return str_replace('\\', '\\\\', $className);
857
    }
858
859
    /**
860
     * Get the query (q) component for this search
861
     *
862
     * @param SearchQuery $searchQuery
863
     * @param array &$hlq Highlight query returned by reference
864
     * @return array
865
     */
866
    protected function getQueryComponent(SearchQuery $searchQuery, &$hlq = array())
867
    {
868
        $q = array();
869
        foreach ($searchQuery->search as $search) {
870
            $text = $search['text'];
871
            preg_match_all('/"[^"]*"|\S+/', $text, $parts);
872
873
            $fuzzy = $search['fuzzy'] ? '~' : '';
874
875
            foreach ($parts[0] as $part) {
876
                $fields = (isset($search['fields'])) ? $search['fields'] : array();
877
                if (isset($search['boost'])) {
878
                    $fields = array_merge($fields, array_keys($search['boost']));
879
                }
880
                if ($fields) {
881
                    $searchq = array();
882
                    foreach ($fields as $field) {
883
                        $boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : '';
884
                        $searchq[] = "{$field}:".$part.$fuzzy.$boost;
885
                    }
886
                    $q[] = '+('.implode(' OR ', $searchq).')';
887
                } else {
888
                    $q[] = '+'.$part.$fuzzy;
889
                }
890
                $hlq[] = $part;
891
            }
892
        }
893
        return $q;
894
    }
895
896
    /**
897
     * Parse all require constraints for inclusion in a filter query
898
     *
899
     * @param SearchQuery $searchQuery
900
     * @return array List of parsed string values for each require
901
     */
902
    protected function getRequireFiltersComponent(SearchQuery $searchQuery)
903
    {
904
        $fq = array();
905
        foreach ($searchQuery->require as $field => $values) {
906
            $requireq = array();
907
908 View Code Duplication
            foreach ($values as $value) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
909
                if ($value === SearchQuery::$missing) {
910
                    $requireq[] = "(*:* -{$field}:[* TO *])";
911
                } elseif ($value === SearchQuery::$present) {
912
                    $requireq[] = "{$field}:[* TO *]";
913
                } elseif ($value instanceof SearchQuery_Range) {
914
                    $start = $value->start;
915
                    if ($start === null) {
916
                        $start = '*';
917
                    }
918
                    $end = $value->end;
919
                    if ($end === null) {
920
                        $end = '*';
921
                    }
922
                    $requireq[] = "$field:[$start TO $end]";
923
                } else {
924
                    $requireq[] = $field.':"'.$value.'"';
925
                }
926
            }
927
928
            $fq[] = '+('.implode(' ', $requireq).')';
929
        }
930
        return $fq;
931
    }
932
933
    /**
934
     * Parse all exclude constraints for inclusion in a filter query
935
     *
936
     * @param SearchQuery $searchQuery
937
     * @return array List of parsed string values for each exclusion
938
     */
939
    protected function getExcludeFiltersComponent(SearchQuery $searchQuery)
940
    {
941
        $fq = array();
942
        foreach ($searchQuery->exclude as $field => $values) {
943
            $excludeq = array();
944
            $missing = false;
945
946 View Code Duplication
            foreach ($values as $value) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
947
                if ($value === SearchQuery::$missing) {
948
                    $missing = true;
949
                } elseif ($value === SearchQuery::$present) {
950
                    $excludeq[] = "{$field}:[* TO *]";
951
                } elseif ($value instanceof SearchQuery_Range) {
952
                    $start = $value->start;
953
                    if ($start === null) {
954
                        $start = '*';
955
                    }
956
                    $end = $value->end;
957
                    if ($end === null) {
958
                        $end = '*';
959
                    }
960
                    $excludeq[] = "$field:[$start TO $end]";
961
                } else {
962
                    $excludeq[] = $field.':"'.$value.'"';
963
                }
964
            }
965
966
            $fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-('.implode(' ', $excludeq).')';
967
        }
968
        return $fq;
969
    }
970
971
    /**
972
     * Get all filter conditions for this search
973
     *
974
     * @param SearchQuery $searchQuery
975
     * @return array
976
     */
977
    public function getFiltersComponent(SearchQuery $searchQuery)
978
    {
979
        return array_merge(
980
            $this->getRequireFiltersComponent($searchQuery),
981
            $this->getExcludeFiltersComponent($searchQuery)
982
        );
983
    }
984
985
    protected $service;
986
987
    /**
988
     * @return SolrService
989
     */
990
    public function getService()
991
    {
992
        if (!$this->service) {
993
            $this->service = Solr::service(get_class($this));
994
        }
995
        return $this->service;
996
    }
997
998
    public function setService(SolrService $service)
999
    {
1000
        $this->service = $service;
1001
        return $this;
1002
    }
1003
1004
    /**
1005
     * Upload config for this index to the given store
1006
     *
1007
     * @param SolrConfigStore $store
1008
     */
1009
    public function uploadConfig($store)
1010
    {
1011
        // Upload the config files for this index
1012
        $store->uploadString(
1013
            $this->getIndexName(),
1014
            'schema.xml',
1015
            (string)$this->generateSchema()
1016
        );
1017
1018
        // Upload additional files
1019
        foreach (glob($this->getExtrasPath().'/*') as $file) {
1020
            if (is_file($file)) {
1021
                $store->uploadFile($this->getIndexName(), $file);
1022
            }
1023
        }
1024
    }
1025
}
1026