Completed
Push — master ( d98303...8deccf )
by
unknown
23s queued 16s
created

SolrIndex::setFieldBoosting()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 11
rs 9.4285
cc 3
eloc 7
nc 3
nop 2
1
<?php
2
3
namespace SilverStripe\FullTextSearch\Solr;
4
5
use SilverStripe\Control\Director;
6
use SilverStripe\Core\Environment;
7
use SilverStripe\FulltextSearch\Search\Indexes\SearchIndex;
8
use SilverStripe\FullTextSearch\Solr\Services\SolrService;
9
use SilverStripe\FulltextSearch\Search\Queries\SearchQuery;
10
use SilverStripe\FullTextSearch\Search\Queries\SearchQuery_Range;
11
use SilverStripe\FullTextSearch\Search\Variants\SearchVariant;
12
use SilverStripe\FulltextSearch\Search\SearchIntrospection;
13
use SilverStripe\ORM\ArrayList;
14
use SilverStripe\ORM\DataObject;
15
use SilverStripe\ORM\FieldType\DBField;
16
use SilverStripe\ORM\PaginatedList;
17
use SilverStripe\View\ArrayData;
18
19
abstract class SolrIndex extends SearchIndex
20
{
21
    public static $fulltextTypeMap = array(
22
        '*' => 'text',
23
        'HTMLVarchar' => 'htmltext',
24
        'HTMLText' => 'htmltext'
25
    );
26
27
    public static $filterTypeMap = array(
28
        '*' => 'string',
29
        'Boolean' => 'boolean',
30
        'Date' => 'tdate',
31
        'SSDatetime' => 'tdate',
32
        'SS_Datetime' => 'tdate',
33
        'ForeignKey' => 'tint',
34
        'Int' => 'tint',
35
        'Float' => 'tfloat',
36
        'Double' => 'tdouble'
37
    );
38
39
    public static $sortTypeMap = array();
40
41
    protected $analyzerFields = array();
42
43
    protected $copyFields = array();
44
45
    protected $extrasPath = null;
46
47
    protected $templatesPath = null;
48
49
    private static $casting = [
0 ignored issues
show
Comprehensibility introduced by
Consider using a different property name as you override a private property of the parent class.
Loading history...
Unused Code introduced by
The property $casting is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
50
        'FieldDefinitions' => 'HTMLText',
51
        'CopyFieldDefinitions' => 'HTMLText'
52
    ];
53
54
    /**
55
     * List of boosted fields
56
     *
57
     * @var array
58
     */
59
    protected $boostedFields = array();
60
61
    /**
62
     * Name of default field
63
     *
64
     * @var string
65
     * @config
66
     */
67
    private static $default_field = '_text';
0 ignored issues
show
Unused Code introduced by
The property $default_field is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
68
69
    /**
70
     * List of copy fields all fulltext fields should be copied into.
71
     * This will fallback to default_field if not specified
72
     *
73
     * @var array
74
     */
75
    private static $copy_fields = array();
0 ignored issues
show
Unused Code introduced by
The property $copy_fields is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
76
77
    /**
78
     * @return String Absolute path to the folder containing
79
     * templates which are used for generating the schema and field definitions.
80
     */
81
    public function getTemplatesPath()
82
    {
83
        $globalOptions = Solr::solr_options();
84
        $path = $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath'];
85
        return rtrim($path, '/');
86
    }
87
88
    /**
89
     * @return String Absolute path to the configuration default files,
90
     * e.g. solrconfig.xml.
91
     */
92
    public function getExtrasPath()
93
    {
94
        $globalOptions = Solr::solr_options();
95
        return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath'];
96
    }
97
98
    public function generateSchema()
99
    {
100
        return $this->renderWith($this->getTemplatesPath() . '/schema.ss');
101
    }
102
103
    /**
104
     * Helper for returning the correct index name. Supports prefixing and
105
     * suffixing
106
     *
107
     * @return string
108
     */
109
    public function getIndexName()
110
    {
111
        $name = $this->sanitiseClassName(get_class($this), '-');
112
113
        $indexParts = [$name];
114
115
        if ($indexPrefix = Environment::getEnv('SS_SOLR_INDEX_PREFIX')) {
0 ignored issues
show
Bug introduced by
The method getEnv() does not seem to exist on object<SilverStripe\Core\Environment>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
116
            array_unshift($indexParts, $indexPrefix);
117
        }
118
119
        if ($indexSuffix = Environment::getEnv('SS_SOLR_INDEX_SUFFIX')) {
0 ignored issues
show
Bug introduced by
The method getEnv() does not seem to exist on object<SilverStripe\Core\Environment>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
120
            $indexParts[] = $indexSuffix;
121
        }
122
123
        return implode($indexParts);
124
    }
125
126
    /**
127
     * Helper for returning the indexer class name from an index name, encoded via {@link getIndexName()}
128
     *
129
     * @param string $indexName
130
     * @return string
131
     */
132
    public static function getClassNameFromIndex($indexName)
133
    {
134 View Code Duplication
        if (($indexPrefix = Environment::getEnv('SS_SOLR_INDEX_PREFIX'))
0 ignored issues
show
Bug introduced by
The method getEnv() does not seem to exist on object<SilverStripe\Core\Environment>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
135
            && (substr($indexName, 0, strlen($indexPrefix)) === $indexPrefix)
136
        ) {
137
            $indexName = substr($indexName, strlen($indexPrefix));
138
        }
139
140 View Code Duplication
        if (($indexSuffix = Environment::getEnv('SS_SOLR_INDEX_SUFFIX'))
0 ignored issues
show
Bug introduced by
The method getEnv() does not seem to exist on object<SilverStripe\Core\Environment>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
141
            && (substr($indexName, -strlen($indexSuffix)) === $indexSuffix)
142
        ) {
143
            $indexName = substr($indexName, 0, -strlen($indexSuffix));
144
        }
145
146
        return str_replace('-', '\\', $indexName);
147
    }
148
149
    public function getTypes()
150
    {
151
        return $this->renderWith($this->getTemplatesPath() . '/types.ss');
152
    }
153
154
    /**
155
     * Index-time analyzer which is applied to a specific field.
156
     * Can be used to remove HTML tags, apply stemming, etc.
157
     *
158
     * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory
159
     *
160
     * @param string $field
161
     * @param string $type
162
     * @param Array $params Parameters for the analyzer, usually at least a "class"
163
     */
164
    public function addAnalyzer($field, $type, $params)
165
    {
166
        $fullFields = $this->fieldData($field);
167
        if ($fullFields) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $fullFields of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
168
            foreach ($fullFields as $fullField => $spec) {
169
                if (!isset($this->analyzerFields[$fullField])) {
170
                    $this->analyzerFields[$fullField] = array();
171
                }
172
                $this->analyzerFields[$fullField][$type] = $params;
173
            }
174
        }
175
    }
176
177
    /**
178
     * Get the default text field, normally '_text'
179
     *
180
     * @return string
181
     */
182
    public function getDefaultField()
183
    {
184
        return $this->config()->default_field;
185
    }
186
187
    /**
188
     * Get list of fields each text field should be copied into.
189
     * This will fallback to the default field if omitted.
190
     *
191
     * @return array
192
     */
193
    protected function getCopyDestinations()
194
    {
195
        $copyFields = $this->config()->copy_fields;
196
        if ($copyFields) {
197
            return $copyFields;
198
        }
199
        // Fallback to default field
200
        $df = $this->getDefaultField();
201
        return array($df);
202
    }
203
204
    public function getFieldDefinitions()
205
    {
206
        $xml = array();
207
        $stored = $this->getStoredDefault();
208
209
        $xml[] = "";
210
211
        // Add the hardcoded field definitions
212
213
        $xml[] = "<field name='_documentid' type='string' indexed='true' stored='true' required='true' />";
214
215
        $xml[] = "<field name='ID' type='tint' indexed='true' stored='true' required='true' />";
216
        $xml[] = "<field name='ClassName' type='string' indexed='true' stored='true' required='true' />";
217
        $xml[] = "<field name='ClassHierarchy' type='string' indexed='true' stored='true' required='true' multiValued='true' />";
218
219
        // Add the fulltext collation field
220
221
        $df = $this->getDefaultField();
222
        $xml[] = "<field name='{$df}' type='htmltext' indexed='true' stored='{$stored}' multiValued='true' />" ;
223
224
        // Add the user-specified fields
225
226
        foreach ($this->fulltextFields as $name => $field) {
227
            $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap);
228
        }
229
230 View Code Duplication
        foreach ($this->filterFields as $name => $field) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
231
            if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') {
232
                continue;
233
            }
234
            $xml[] = $this->getFieldDefinition($name, $field);
235
        }
236
237 View Code Duplication
        foreach ($this->sortFields as $name => $field) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
238
            if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') {
239
                continue;
240
            }
241
            $xml[] = $this->getFieldDefinition($name, $field);
242
        }
243
244
        return implode("\n\t\t", $xml);
245
    }
246
247
    /**
248
     * Extract first suggestion text from collated values
249
     *
250
     * @param mixed $collation
251
     * @return string
252
     */
253
    protected function getCollatedSuggestion($collation = '')
254
    {
255
        if (is_string($collation)) {
256
            return $collation;
257
        }
258
        if (is_object($collation)) {
259
            if (isset($collation->misspellingsAndCorrections)) {
260
                foreach ($collation->misspellingsAndCorrections as $key => $value) {
261
                    return $value;
262
                }
263
            }
264
        }
265
        return '';
266
    }
267
268
    /**
269
     * Extract a human friendly spelling suggestion from a Solr spellcheck collation string.
270
     * @param string $collation
271
     * @return String
272
     */
273
    protected function getNiceSuggestion($collation = '')
274
    {
275
        $collationParts = explode(' ', $collation);
276
277
        // Remove advanced query params from the beginning of each collation part.
278
        foreach ($collationParts as $key => &$part) {
279
            $part = ltrim($part, '+');
280
        }
281
282
        return implode(' ', $collationParts);
283
    }
284
285
    /**
286
     * Extract a query string from a Solr spellcheck collation string.
287
     * Useful for constructing 'Did you mean?' links, for example:
288
     * <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
289
     * @param string $collation
290
     * @return String
291
     */
292
    protected function getSuggestionQueryString($collation = '')
293
    {
294
        return str_replace(' ', '+', $this->getNiceSuggestion($collation));
295
    }
296
297
    /**
298
     * Add a field that should be stored
299
     *
300
     * @param string $field The field to add
301
     * @param string $forceType The type to force this field as (required in some cases, when not
302
     * detectable from metadata)
303
     * @param array $extraOptions Dependent on search implementation
304
     */
305
    public function addStoredField($field, $forceType = null, $extraOptions = array())
306
    {
307
        $options = array_merge($extraOptions, array('stored' => 'true'));
308
        $this->addFulltextField($field, $forceType, $options);
309
    }
310
311
    /**
312
     * Add a fulltext field with a boosted value
313
     *
314
     * @param string $field The field to add
315
     * @param string $forceType The type to force this field as (required in some cases, when not
316
     * detectable from metadata)
317
     * @param array $extraOptions Dependent on search implementation
318
     * @param float $boost Numeric boosting value (defaults to 2)
319
     */
320
    public function addBoostedField($field, $forceType = null, $extraOptions = array(), $boost = 2)
321
    {
322
        $options = array_merge($extraOptions, array('boost' => $boost));
323
        $this->addFulltextField($field, $forceType, $options);
324
    }
325
326
327
    public function fieldData($field, $forceType = null, $extraOptions = array())
328
    {
329
        // Ensure that 'boost' is recorded here without being captured by solr
330
        $boost = null;
331
        if (array_key_exists('boost', $extraOptions)) {
332
            $boost = $extraOptions['boost'];
333
            unset($extraOptions['boost']);
334
        }
335
        $data = parent::fieldData($field, $forceType, $extraOptions);
336
337
        // Boost all fields with this name
338
        if (isset($boost)) {
339
            foreach ($data as $fieldName => $fieldInfo) {
340
                $this->boostedFields[$fieldName] = $boost;
341
            }
342
        }
343
        return $data;
344
    }
345
346
    /**
347
     * Set the default boosting level for a specific field.
348
     * Will control the default value for qf param (Query Fields), but will not
349
     * override a query-specific value.
350
     *
351
     * Fields must be added before having a field boosting specified
352
     *
353
     * @param string $field Full field key (Model_Field)
354
     * @param float|null $level Numeric boosting value. Set to null to clear boost
355
     */
356
    public function setFieldBoosting($field, $level)
357
    {
358
        if (!isset($this->fulltextFields[$field])) {
359
            throw new \InvalidArgumentException("No fulltext field $field exists on ".$this->getIndexName());
360
        }
361
        if ($level === null) {
362
            unset($this->boostedFields[$field]);
363
        } else {
364
            $this->boostedFields[$field] = $level;
365
        }
366
    }
367
368
    /**
369
     * Get all boosted fields
370
     *
371
     * @return array
372
     */
373
    public function getBoostedFields()
374
    {
375
        return $this->boostedFields;
376
    }
377
378
    /**
379
     * Determine the best default value for the 'qf' parameter
380
     *
381
     * @return array|null List of query fields, or null if not specified
382
     */
383
    public function getQueryFields()
384
    {
385
        // Not necessary to specify this unless boosting
386
        if (empty($this->boostedFields)) {
387
            return null;
388
        }
389
        $queryFields = array();
390
        foreach ($this->boostedFields as $fieldName => $boost) {
391
            $queryFields[] = $fieldName . '^' . $boost;
392
        }
393
394
        // If any fields are queried, we must always include the default field, otherwise it will be excluded
395
        $df = $this->getDefaultField();
396
        if ($queryFields && !isset($this->boostedFields[$df])) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $queryFields of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
397
            $queryFields[] = $df;
398
        }
399
400
        return $queryFields;
401
    }
402
403
    /**
404
     * Gets the default 'stored' value for fields in this index
405
     *
406
     * @return string A default value for the 'stored' field option, either 'true' or 'false'
407
     */
408
    protected function getStoredDefault()
409
    {
410
        return Director::isDev() ? 'true' : 'false';
411
    }
412
413
    /**
414
     * @param string $name
415
     * @param Array $spec
416
     * @param Array $typeMap
417
     * @return String XML
418
     */
419
    protected function getFieldDefinition($name, $spec, $typeMap = null)
420
    {
421
        if (!$typeMap) {
422
            $typeMap = self::$filterTypeMap;
423
        }
424
        $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : '';
425
        $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*'];
426
427
        $analyzerXml = '';
428
        if (isset($this->analyzerFields[$name])) {
429
            foreach ($this->analyzerFields[$name] as $analyzerType => $analyzerParams) {
430
                $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams);
431
            }
432
        }
433
434
        $fieldParams = array_merge(
435
            array(
436
                'name' => $name,
437
                'type' => $type,
438
                'indexed' => 'true',
439
                'stored' => $this->getStoredDefault(),
440
                'multiValued' => $multiValued
441
            ),
442
            isset($spec['extra_options']) ? $spec['extra_options'] : array()
443
        );
444
445
        return $this->toXmlTag(
446
            "field",
447
            $fieldParams,
0 ignored issues
show
Documentation introduced by
$fieldParams is of type array<string,?>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
448
            $analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null
449
        );
450
    }
451
452
    /**
453
     * Convert definition to XML tag
454
     *
455
     * @param string $tag
456
     * @param string $attrs Map of attributes
457
     * @param string $content Inner content
458
     * @return String XML tag
459
     */
460
    protected function toXmlTag($tag, $attrs, $content = null)
461
    {
462
        $xml = "<$tag ";
463
        if ($attrs) {
464
            $attrStrs = array();
465
            foreach ($attrs as $attrName => $attrVal) {
0 ignored issues
show
Bug introduced by
The expression $attrs of type string is not traversable.
Loading history...
466
                $attrStrs[] = "$attrName='$attrVal'";
467
            }
468
            $xml .= $attrStrs ? implode(' ', $attrStrs) : '';
469
        }
470
        $xml .= $content ? ">$content</$tag>" : '/>';
471
        return $xml;
472
    }
473
474
    /**
475
     * @param string $source Composite field name (<class>_<fieldname>)
476
     * @param string $dest
477
     */
478
    public function addCopyField($source, $dest, $extraOptions = array())
479
    {
480
        if (!isset($this->copyFields[$source])) {
481
            $this->copyFields[$source] = array();
482
        }
483
        $this->copyFields[$source][] = array_merge(
484
            array('source' => $source, 'dest' => $dest),
485
            $extraOptions
486
        );
487
    }
488
489
    /**
490
     * Generate XML for copy field definitions
491
     *
492
     * @return string
493
     */
494
    public function getCopyFieldDefinitions()
495
    {
496
        $xml = array();
497
498
        // Default copy fields
499
        foreach ($this->getCopyDestinations() as $copyTo) {
500
            foreach ($this->fulltextFields as $name => $field) {
501
                $xml[] = "<copyField source='{$name}' dest='{$copyTo}' />";
502
            }
503
        }
504
505
        // Explicit copy fields
506
        foreach ($this->copyFields as $source => $fields) {
507
            foreach ($fields as $fieldAttrs) {
508
                $xml[] = $this->toXmlTag('copyField', $fieldAttrs);
509
            }
510
        }
511
512
        return implode("\n\t", $xml);
513
    }
514
515
    /**
516
     * Determine if the given object is one of the given type
517
     *
518
     * @param string $class
519
     * @param array|string $base Class or list of base classes
520
     * @return bool
521
     */
522
    protected function classIs($class, $base)
523
    {
524
        if (is_array($base)) {
525
            foreach ($base as $nextBase) {
526
                if ($this->classIs($class, $nextBase)) {
527
                    return true;
528
                }
529
            }
530
            return false;
531
        }
532
533
        // Check single origin
534
        return $class === $base || is_subclass_of($class, $base);
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of might return inconsistent results on some PHP versions if $base can be an interface. If so, you could instead use ReflectionClass::implementsInterface.
Loading history...
535
    }
536
537
    protected function _addField($doc, $object, $field)
538
    {
539
        $class = get_class($object);
540
        if (!$this->classIs($class, $field['origin'])) {
541
            return;
542
        }
543
544
        $value = $this->_getFieldValue($object, $field);
545
546
        $type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*'];
547
548
        if (is_array($value)) {
549
            foreach ($value as $sub) {
550
                /* Solr requires dates in the form 1995-12-31T23:59:59Z */
551 View Code Duplication
                if ($type == 'tdate') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
552
                    if (!$sub) {
553
                        continue;
554
                    }
555
                    $sub = gmdate('Y-m-d\TH:i:s\Z', strtotime($sub));
556
                }
557
558
                /* Solr requires numbers to be valid if presented, not just empty */
559 View Code Duplication
                if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($sub)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
560
                    continue;
561
                }
562
563
                $doc->addField($field['name'], $sub);
564
            }
565
        } else {
566
            /* Solr requires dates in the form 1995-12-31T23:59:59Z */
567 View Code Duplication
            if ($type == 'tdate') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
568
                if (!$value) {
569
                    return;
570
                }
571
                $value = gmdate('Y-m-d\TH:i:s\Z', strtotime($value));
572
            }
573
574
            /* Solr requires numbers to be valid if presented, not just empty */
575 View Code Duplication
            if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($value)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
576
                return;
577
            }
578
579
            // Only index fields that are not null
580
            if ($value !== null) {
581
                $doc->setField($field['name'], $value);
582
            }
583
        }
584
    }
585
586
    protected function _addAs($object, $base, $options)
587
    {
588
        $includeSubs = $options['include_children'];
589
590
        $doc = new \Apache_Solr_Document();
591
592
        // Always present fields
593
594
        $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs));
595
        $doc->setField('ID', $object->ID);
596
        $doc->setField('ClassName', $object->ClassName);
597
598
        foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) {
599
            $doc->addField('ClassHierarchy', $class);
600
        }
601
602
        // Add the user-specified fields
603
604
        foreach ($this->getFieldsIterator() as $name => $field) {
605
            if ($field['base'] === $base || (is_array($field['base']) && in_array($base, $field['base']))) {
606
                $this->_addField($doc, $object, $field);
607
            }
608
        }
609
610
        try {
611
            $this->getService()->addDocument($doc);
612
        } catch (Exception $e) {
0 ignored issues
show
Bug introduced by
The class SilverStripe\FullTextSearch\Solr\Exception does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
613
            static::warn($e);
614
            return false;
615
        }
616
617
        return $doc;
618
    }
619
620
    public function add($object)
621
    {
622
        $class = get_class($object);
623
        $docs = array();
624
625
        foreach ($this->getClasses() as $searchclass => $options) {
626
            if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) {
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of might return inconsistent results on some PHP versions if $searchclass can be an interface. If so, you could instead use ReflectionClass::implementsInterface.
Loading history...
627
                $base = DataObject::getSchema()->baseDataClass($searchclass);
628
                $docs[] = $this->_addAs($object, $base, $options);
629
            }
630
        }
631
632
        return $docs;
633
    }
634
635
    public function canAdd($class)
636
    {
637
        foreach ($this->classes as $searchclass => $options) {
638
            if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) {
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of might return inconsistent results on some PHP versions if $searchclass can be an interface. If so, you could instead use ReflectionClass::implementsInterface.
Loading history...
639
                return true;
640
            }
641
        }
642
643
        return false;
644
    }
645
646
    public function delete($base, $id, $state)
647
    {
648
        $documentID = $this->getDocumentIDForState($base, $id, $state);
649
650
        try {
651
            $this->getService()->deleteById($documentID);
652
        } catch (Exception $e) {
0 ignored issues
show
Bug introduced by
The class SilverStripe\FullTextSearch\Solr\Exception does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
653
            static::warn($e);
654
            return false;
655
        }
656
    }
657
658
    /**
659
     * Clear all records which do not match the given classname whitelist.
660
     *
661
     * Can also be used to trim an index when reducing to a narrower set of classes.
662
     *
663
     * Ignores current state / variant.
664
     *
665
     * @param array $classes List of non-obsolete classes in the same format as SolrIndex::getClasses()
666
     * @return bool Flag if successful
667
     */
668
    public function clearObsoleteClasses($classes)
669
    {
670
        if (empty($classes)) {
671
            return false;
672
        }
673
674
        // Delete all records which do not match the necessary classname rules
675
        $conditions = array();
676
        foreach ($classes as $class => $options) {
677
            if ($options['include_children']) {
678
                $conditions[] = "ClassHierarchy:{$class}";
679
            } else {
680
                $conditions[] = "ClassName:{$class}";
681
            }
682
        }
683
684
        // Delete records which don't match any of these conditions in this index
685
        $deleteQuery = "-(" . implode(' ', $conditions) . ")";
686
        $this
687
            ->getService()
688
            ->deleteByQuery($deleteQuery);
689
        return true;
690
    }
691
692
    public function commit()
693
    {
694
        try {
695
            $this->getService()->commit(false, false, false);
696
        } catch (Exception $e) {
0 ignored issues
show
Bug introduced by
The class SilverStripe\FullTextSearch\Solr\Exception does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
697
            static::warn($e);
698
            return false;
699
        }
700
    }
701
702
    /**
703
     * @param SearchQuery $query
704
     * @param integer $offset
705
     * @param integer $limit
706
     * @param array $params Extra request parameters passed through to Solr
707
     * @return ArrayData Map with the following keys:
708
     *  - 'Matches': ArrayList of the matched object instances
709
     */
710
    public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array())
711
    {
712
        $service = $this->getService();
713
714
        $searchClass = count($query->classes) == 1
715
            ? $query->classes[0]['class']
716
            : null;
717
        SearchVariant::with($searchClass)
718
            ->call('alterQuery', $query, $this);
719
720
        $q = array(); // Query
0 ignored issues
show
Unused Code introduced by
$q is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
721
        $fq = array(); // Filter query
722
        $qf = array(); // Query fields
0 ignored issues
show
Unused Code introduced by
$qf is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
723
        $hlq = array(); // Highlight query
724
725
        // Build the search itself
726
        $q = $this->getQueryComponent($query, $hlq);
0 ignored issues
show
Documentation introduced by
$query is of type array<integer,?>, but the function expects a object<SilverStripe\Full...ch\Queries\SearchQuery>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
727
728
        // If using boosting, set the clean term separately for highlighting.
729
        // See https://issues.apache.org/jira/browse/SOLR-2632
730
        if (array_key_exists('hl', $params) && !array_key_exists('hl.q', $params)) {
731
            $params['hl.q'] = implode(' ', $hlq);
732
        }
733
734
        // Filter by class if requested
735
        $classq = array();
736
        foreach ($query->classes as $class) {
737
            if (!empty($class['includeSubclasses'])) {
738
                $classq[] = 'ClassHierarchy:' . $this->sanitiseClassName($class['class']);
739
            } else {
740
                $classq[] = 'ClassName:' . $this->sanitiseClassName($class['class']);
741
            }
742
        }
743
        if ($classq) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $classq of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
744
            $fq[] = '+('.implode(' ', $classq).')';
745
        }
746
747
        // Filter by filters
748
        $fq = array_merge($fq, $this->getFiltersComponent($query));
0 ignored issues
show
Documentation introduced by
$query is of type array<integer,?>, but the function expects a object<SilverStripe\Full...ch\Queries\SearchQuery>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
749
750
        // Prepare query fields unless specified explicitly
751
        if (isset($params['qf'])) {
752
            $qf = $params['qf'];
753
        } else {
754
            $qf = $this->getQueryFields();
755
        }
756
        if (is_array($qf)) {
757
            $qf = implode(' ', $qf);
758
        }
759
        if ($qf) {
760
            $params['qf'] = $qf;
761
        }
762
763
        if (!headers_sent() && Director::isDev()) {
764
            if ($q) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $q of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
765
                header('X-Query: '.implode(' ', $q));
766
            }
767
            if ($fq) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $fq of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
768
                header('X-Filters: "'.implode('", "', $fq).'"');
769
            }
770
            if ($qf) {
771
                header('X-QueryFields: '.$qf);
772
            }
773
        }
774
775
        if ($offset == -1) {
776
            $offset = $query->start;
777
        }
778
        if ($limit == -1) {
779
            $limit = $query->limit;
780
        }
781
        if ($limit == -1) {
782
            $limit = SearchQuery::$default_page_size;
783
        }
784
785
        $params = array_merge($params, array('fq' => implode(' ', $fq)));
786
787
        $res = $service->search(
788
            $q ? implode(' ', $q) : '*:*',
789
            $offset,
790
            $limit,
791
            $params,
792
            \Apache_Solr_Service::METHOD_POST
793
        );
794
795
        $results = new ArrayList();
796
        if ($res->getHttpStatus() >= 200 && $res->getHttpStatus() < 300) {
797
            foreach ($res->response->docs as $doc) {
0 ignored issues
show
Bug introduced by
The property response does not seem to exist. Did you mean _response?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
798
                $result = DataObject::get_by_id($doc->ClassName, $doc->ID);
799
                if ($result) {
800
                    $results->push($result);
801
802
                    // Add highlighting (optional)
803
                    $docId = $doc->_documentid;
804
                    if ($res->highlighting && $res->highlighting->$docId) {
805
                        // TODO Create decorator class for search results rather than adding arbitrary object properties
806
                        // TODO Allow specifying highlighted field, and lazy loading
807
                        // in case the search API needs another query (similar to SphinxSearchable->buildExcerpt()).
808
                        $combinedHighlights = array();
809
                        foreach ($res->highlighting->$docId as $field => $highlights) {
810
                            $combinedHighlights = array_merge($combinedHighlights, $highlights);
811
                        }
812
813
                        // Remove entity-encoded U+FFFD replacement character. It signifies non-displayable characters,
814
                        // and shows up as an encoding error in browsers.
815
                        $result->Excerpt = DBField::create_field(
816
                            'HTMLText',
817
                            str_replace(
818
                                '&#65533;',
819
                                '',
820
                                implode(' ... ', $combinedHighlights)
821
                            )
822
                        );
823
                    }
824
                }
825
            }
826
            $numFound = $res->response->numFound;
0 ignored issues
show
Bug introduced by
The property response does not seem to exist. Did you mean _response?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
827
        } else {
828
            $numFound = 0;
829
        }
830
831
        $ret = array();
832
        $ret['Matches'] = new PaginatedList($results);
833
        $ret['Matches']->setLimitItems(false);
834
        // Tell PaginatedList how many results there are
835
        $ret['Matches']->setTotalItems($numFound);
836
        // Results for current page start at $offset
837
        $ret['Matches']->setPageStart($offset);
838
        // Results per page
839
        $ret['Matches']->setPageLength($limit);
840
841
        // Include spellcheck and suggestion data. Requires spellcheck=true in $params
842
        if (isset($res->spellcheck)) {
843
            // Expose all spellcheck data, for custom handling.
844
            $ret['Spellcheck'] = $res->spellcheck;
0 ignored issues
show
Bug introduced by
The property spellcheck does not seem to exist in Apache_Solr_Response.

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
845
846
            // Suggestions. Requires spellcheck.collate=true in $params
847
            if (isset($res->spellcheck->suggestions->collation)) {
848
                // Extract string suggestion
849
                $suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation);
850
851
                // The collation, including advanced query params (e.g. +), suitable for making another query
852
                // programmatically.
853
                $ret['Suggestion'] = $suggestion;
854
855
                // A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display.
856
                $ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion);
857
858
                // A string suitable for appending to an href as a query string.
859
                // For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
860
                $ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion);
861
            }
862
        }
863
864
        $ret = new ArrayData($ret);
865
866
        // Enable extensions to add extra data from the response into
867
        // the returned results set.
868
        $this->extend('updateSearchResults', $ret, $res);
869
870
        return $ret;
871
    }
872
873
    /**
874
     * Solr requires namespaced classes to have double escaped backslashes
875
     *
876
     * @param  string $className   E.g. My\Object\Here
877
     * @param  string $replaceWith The replacement character(s) to use
878
     * @return string              E.g. My\\Object\\Here
879
     */
880
    public function sanitiseClassName($className, $replaceWith = '\\\\')
881
    {
882
        return str_replace('\\', $replaceWith, $className);
883
    }
884
885
    /**
886
     * Get the query (q) component for this search
887
     *
888
     * @param SearchQuery $searchQuery
889
     * @param array &$hlq Highlight query returned by reference
890
     * @return array
891
     */
892
    protected function getQueryComponent(SearchQuery $searchQuery, &$hlq = array())
893
    {
894
        $q = array();
895
        foreach ($searchQuery->search as $search) {
896
            $text = $search['text'];
897
            preg_match_all('/"[^"]*"|\S+/', $text, $parts);
898
899
            $fuzzy = $search['fuzzy'] ? '~' : '';
900
901
            foreach ($parts[0] as $part) {
902
                $fields = (isset($search['fields'])) ? $search['fields'] : array();
903
                if (isset($search['boost'])) {
904
                    $fields = array_merge($fields, array_keys($search['boost']));
905
                }
906
                if ($fields) {
907
                    $searchq = array();
908
                    foreach ($fields as $field) {
909
                        // Escape namespace separators in class names
910
                        $field = $this->sanitiseClassName($field);
911
912
                        $boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : '';
913
                        $searchq[] = "{$field}:".$part.$fuzzy.$boost;
914
                    }
915
                    $q[] = '+('.implode(' OR ', $searchq).')';
916
                } else {
917
                    $q[] = '+'.$part.$fuzzy;
918
                }
919
                $hlq[] = $part;
920
            }
921
        }
922
        return $q;
923
    }
924
925
    /**
926
     * Parse all require constraints for inclusion in a filter query
927
     *
928
     * @param SearchQuery $searchQuery
929
     * @return array List of parsed string values for each require
930
     */
931
    protected function getRequireFiltersComponent(SearchQuery $searchQuery)
932
    {
933
        $fq = array();
934
        foreach ($searchQuery->require as $field => $values) {
935
            $requireq = array();
936
937 View Code Duplication
            foreach ($values as $value) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
938
                if ($value === SearchQuery::$missing) {
939
                    $requireq[] = "(*:* -{$field}:[* TO *])";
940
                } elseif ($value === SearchQuery::$present) {
941
                    $requireq[] = "{$field}:[* TO *]";
942
                } elseif ($value instanceof SearchQuery_Range) {
943
                    $start = $value->start;
944
                    if ($start === null) {
945
                        $start = '*';
946
                    }
947
                    $end = $value->end;
948
                    if ($end === null) {
949
                        $end = '*';
950
                    }
951
                    $requireq[] = "$field:[$start TO $end]";
952
                } else {
953
                    $requireq[] = $field.':"'.$value.'"';
954
                }
955
            }
956
957
            $fq[] = '+('.implode(' ', $requireq).')';
958
        }
959
        return $fq;
960
    }
961
962
    /**
963
     * Parse all exclude constraints for inclusion in a filter query
964
     *
965
     * @param SearchQuery $searchQuery
966
     * @return array List of parsed string values for each exclusion
967
     */
968
    protected function getExcludeFiltersComponent(SearchQuery $searchQuery)
969
    {
970
        $fq = array();
971
        foreach ($searchQuery->exclude as $field => $values) {
972
            // Handle namespaced class names
973
            $field = $this->sanitiseClassName($field);
974
975
            $excludeq = [];
976
            $missing = false;
977
978 View Code Duplication
            foreach ($values as $value) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
979
                if ($value === SearchQuery::$missing) {
980
                    $missing = true;
981
                } elseif ($value === SearchQuery::$present) {
982
                    $excludeq[] = "{$field}:[* TO *]";
983
                } elseif ($value instanceof SearchQuery_Range) {
984
                    $start = $value->start;
985
                    if ($start === null) {
986
                        $start = '*';
987
                    }
988
                    $end = $value->end;
989
                    if ($end === null) {
990
                        $end = '*';
991
                    }
992
                    $excludeq[] = "$field:[$start TO $end]";
993
                } else {
994
                    $excludeq[] = $field.':"'.$value.'"';
995
                }
996
            }
997
998
            $fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-('.implode(' ', $excludeq).')';
999
        }
1000
        return $fq;
1001
    }
1002
1003
    /**
1004
     * Get all filter conditions for this search
1005
     *
1006
     * @param SearchQuery $searchQuery
1007
     * @return array
1008
     */
1009
    public function getFiltersComponent(SearchQuery $searchQuery)
1010
    {
1011
        return array_merge(
1012
            $this->getRequireFiltersComponent($searchQuery),
1013
            $this->getExcludeFiltersComponent($searchQuery)
1014
        );
1015
    }
1016
1017
    protected $service;
1018
1019
    /**
1020
     * @return SolrService
1021
     */
1022
    public function getService()
1023
    {
1024
        if (!$this->service) {
1025
            $this->service = Solr::service(get_class($this));
1026
        }
1027
        return $this->service;
1028
    }
1029
1030
    public function setService(SolrService $service)
1031
    {
1032
        $this->service = $service;
1033
        return $this;
1034
    }
1035
1036
    /**
1037
     * Upload config for this index to the given store
1038
     *
1039
     * @param SolrConfigStore $store
1040
     */
1041
    public function uploadConfig($store)
1042
    {
1043
        // Upload the config files for this index
1044
        $store->uploadString(
1045
            $this->getIndexName(),
1046
            'schema.xml',
1047
            (string)$this->generateSchema()
1048
        );
1049
1050
        // Upload additional files
1051
        foreach (glob($this->getExtrasPath().'/*') as $file) {
1052
            if (is_file($file)) {
1053
                $store->uploadFile($this->getIndexName(), $file);
1054
            }
1055
        }
1056
    }
1057
}
1058