QueryGenerator::convertToQuotedCSV()   B
last analyzed

Complexity

Conditions 5
Paths 9

Size

Total Lines 19
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 5.7629
Metric Value
dl 0
loc 19
ccs 11
cts 16
cp 0.6875
rs 8.8571
cc 5
eloc 13
nc 9
nop 1
crap 5.7629
1
<?php
2
namespace SilverStripe\Elastica;
3
4
use \SilverStripe\Elastica\ResultList;
5
use Elastica\Query;
6
7
use Elastica\Query\QueryString;
8
use Elastica\Aggregation\Filter;
9
use Elastica\Filter\Term;
10
use Elastica\Filter\BoolAnd;
11
use Elastica\Filter\MatchAll;
12
use Elastica\Query\Filtered;
13
use Elastica\Query\MultiMatch;
14
use SilverStripe\Elastica\RangedAggregation;
15
16
class QueryGenerator {
17
18
	/* The term to search for */
19
	private $queryText = '';
20
21
	/* Fields to search for as an array of Name to weighting, otherwise null for all, ie not
22
	a multi match query */
23
	private $fields = null;
24
25
	/* Aggregations already selected in format array(key => value), e.g. array('ISO' => 400) */
26
	private $selectedFilters = null;
27
28
	/* For an empty query, show results or not */
29
	private $showResultsForEmptyQuery = false;
30
31
	/* Manipulator to be used for aggregations */
32
	private $manipulator = null;
33
34
	/* The length of a page of results */
35
	private $pageLength = 10;
36
37
	/* Where to start, normally a multiple of pageLength */
38
	private $start = 0;
39
40
	/* Cache hit counter for test purposes */
41
	private static $cacheHitCtr = 0;
42
43
	/**
44
	 * Comma separated list of SilverStripe ClassNames to search. Leave blank for all
45
	 * @var string
46
	 */
47
	private $classes = '';
48
49
50 10
	public function setQueryText($newQueryText) {
51 10
		$this->queryText = $newQueryText;
52 10
	}
53
54
55 10
	public function setFields($newFields) {
56 10
		$this->fields = $newFields;
57 10
	}
58
59
60 10
	public function setSelectedFilters($newSelectedFilters) {
61 10
		$this->selectedFilters = $newSelectedFilters;
62 10
	}
63
64
65 10
	public function setShowResultsForEmptyQuery($newShowResultsForEmptyQuery) {
66 10
		$this->showResultsForEmptyQuery = $newShowResultsForEmptyQuery;
67 10
	}
68
69
70
	public function getShowResultsForEmptyQuery() {
71
		return $this->showResultsForEmptyQuery;
72
	}
73
74
75 10
	public function setPageLength($newPageLength) {
76 10
		$this->pageLength = $newPageLength;
77 10
	}
78
79
80 10
	public function setStart($newStart) {
81 10
		$this->start = $newStart;
82 10
	}
83
84
85
	/**
86
	 * Update the list of Classes to search, use SilverStripe ClassName comma separated
87
	 * @param string $newClasses comma separated list of SilverStripe ClassNames
88
	 */
89 10
	public function setClasses($newClasses) {
90 10
		$this->classes = $newClasses;
91 10
	}
92
93
94
95
	/**
96
	 * Set the manipulator, mainly used for aggregation
97
	 * @param string $newManipulator manipulator used for aggregation, must implement ElasticaSearchHelper
98
	 */
99 10
	public function setQueryResultManipulator($newManipulator) {
100 10
		$this->manipulator = $newManipulator;
101 10
	}
102
103
104
	/*
105
	Accessor to cache hit counter, for testing purposes
106
	 */
107
	public static function getCacheHitCounter() {
108
		return self::$cacheHitCtr;
109
	}
110
111
112
	public static function resetCacheHitCounter() {
113
		self::$cacheHitCtr = 0;
114
	}
115
116
117
	/**
118
	 * From the input variables create a suitable query using Elastica.  This is somewhat complex
119
	 * due to different formats with and without query text, with and without filters, with and
120
	 * without selected filters.  Extracting this logic into a separate class makes testing much
121
	 * faster and can be used for testing new cases
122
	 *
123
	 * @return \Elastica\Query           Query object suitable for searching using the Elastica library
124
	 */
125 10
	public function generateElasticaQuery() {
126 10
		$queryTextExists = ($this->queryText != '');
127 10
		$isMultiMatch = ($this->fields != null);
128
129 10
		if ($this->selectedFilters == null) {
130 7
			$this->selectedFilters = array();
131 7
		}
132
133 10
		$this->manipulatorInstance = null;
134 10
		if ($this->manipulator) {
135 10
			$this->manipulatorInstance = \Injector::inst()->create($this->manipulator);
136 10
			$this->manipulatorInstance->queryGenerator = $this;
137 10
			$this->manipulatorInstance->originalQueryString = $this->queryText;
138 10
		}
139
140
		//This is a query_string object
141 10
		$textQuery = null;
142
143 10
		if (!$isMultiMatch) {
144 3
			$textQuery = $this->simpleTextQuery();
145 3
		} else {
146 7
			$textQuery = $this->multiMatchQuery();
147
		}
148
149 9
		$query = $this->addFilters($textQuery);
0 ignored issues
show
Documentation introduced by
$textQuery is of type object<Elastica\Filter\M...stica\Query\MultiMatch>, but the function expects a object<SilverStripe\Elas...tica\Query\QueryString>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
150
151
		//This ensures that the original query text is available to search helpers
152 9
		$query->OriginalQueryText = $this->queryText;
153
154
		//This needs to be query object of some form
155 9
		$this->addAggregation($query);
156
157
158
		// pagination
159 9
		$query->setSize($this->pageLength);
160 9
		$query->setFrom($this->start);
161
162 9
		if ($this->manipulatorInstance && !$queryTextExists) {
163 3
			$sort = $this->manipulatorInstance->getDefaultSort();
164 3
			$query->setSort($sort);
165 3
		}
166
167
		// Add suggestions in case required for poor spellings or typos
168 9
		$suggest = new \Elastica\Suggest();
169 9
		$phrase = new \Elastica\Suggest\Phrase('query-phrase-suggestions', '_all');
170 9
		$phrase->setText($this->queryText)->setSize(4);
171
172 9
		$highlightsCfg = \Config::inst()->get('Elastica', 'Highlights');
173 9
		$preTags = $highlightsCfg['PreTags'];
174 9
		$postTags = $highlightsCfg['PostTags'];
175
176 9
		$phrase->setHighlight($preTags, $postTags);
177 9
		$suggest->addSuggestion($phrase);
178 9
		$query->setSuggest($suggest);
179 9
		return $query;
180
	}
181
182
183
	/**
184
	 * Generate a query for autocomplete
185
	 * @return \Elastica\Query Autocompletion query for use with Elastica library
186
	 */
187
	public function generateElasticaAutocompleteQuery() {
188
		$field = array_keys($this->fields)[0];
189
		$data = array(
190
			'size' => 10,
191
			'query' => array(
192
				'match' => array(
193
					$field.'.autocomplete' => array(
194
						'query' => $this->queryText,
195
						'operator' => 'and'
196
					)
197
				)
198
			)
199
		);
200
201
		// The query clause can only have one entry, so a bit of mangling
202
		if (!empty($this->selectedFilters)) {
203
			$filtered = array();
204
			$filtered['query'] = $data['query'];
205
			unset($data['query']);
206
			$filtered['filter'] = array('term' => $this->selectedFilters);
207
			$data['query'] = array('filtered' => $filtered);
208
		}
209
210
/*
0 ignored issues
show
Unused Code Comprehensibility introduced by
61% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
211
{
212
    "filtered": {
213
        "query":  { "match": { "email": "business opportunity" }},
214
        "filter": { "term":  { "folder": "inbox" }}
215
    }
216
}
217
*/
218
		$query = new Query($data);
219
		return $query;
220
	}
221
222
223
224
225
226
	/**
227
	 * Using a query string object, return a suitable filtered or unfiltered query object
228
	 * @param Elastica\Query\QueryString $textQuery A query_string representing the current query
229
	 */
230 9
	private function addFilters($textQuery) {
231 9
		if ($this->manipulator) {
232 9
			$this->manipulatorInstance->updateFilters($this->selectedFilters);
233 9
		}
234
235 9
		$elFilters = array();
236 9
		$rangeFilterKeys = RangedAggregation::getTitles();
237
238 9
		foreach ($this->selectedFilters as $key => $value) {
239 4
			if (!in_array($key, $rangeFilterKeys)) {
240 4
				$filter = new Term();
241 4
				$filter->setTerm($key,$value);
242 4
				$elFilters[] = $filter;
243 4
			} else {
244
				// get the selected range filter
245 2
				$range = RangedAggregation::getByTitle($key);
246 2
				$filter = $range->getFilter($value);
247 2
				$elFilters[] = $filter;
248
			}
249 9
		}
250
251
252
		// if not facets selected, pass through null
253 9
		$queryFilter = null;
254 9
		switch (count($this->selectedFilters)) {
255 9
			case 0:
256
				// filter already null
257 6
				break;
258 4
			case 1:
259 3
				$queryFilter = $elFilters[0];
260 3
				break;
261 2
			default:
262 2
				$queryFilter = new BoolAnd();
263
264 2
				foreach ($elFilters as $filter) {
265 2
					$queryFilter->addFilter($filter);
266 2
				}
267 2
				break;
268 9
		}
269
270
		// the Elastica query object
271 9
		if ($queryFilter == null) {
272 6
			$query = new Query($textQuery);
273 6
		} else {
274
			//MatchAll appears not be allowed inside a filtered query which is a bit of a pain.
275 4
			if ($textQuery instanceof MatchAll) {
276 1
				$textQuery = null;
277 1
			}
278
279 4
			$filtered = new Filtered(
280 4
			  $textQuery,
281
			  $queryFilter
282 4
			);
283 4
			$query = new Query($filtered);
284
		}
285
286 9
		return $query;
287
	}
288
289
290 9
	private function addAggregation(&$query) {
291
		// aggregation (optional)
292 9
		if ($this->manipulatorInstance) {
293 9
			$this->manipulatorInstance->augmentQuery($query);
294 9
		}
295 9
	}
296
297
298
	/*
299
	Simplest form of search, namely search for text string against all fields.  In Curl terms:
300
301
	curl -XGET 'http://localhost:9200/elastica_ss_module_test_en_us/_search?pretty' -d '
302
	{
303
	   "query": {
304
	        "query_string": {
305
	            "query":        "Image"
306
	        }
307
	    }
308
	}
309
	'
310
	 */
311 3
	private function simpleTextQuery() {
312
		// this will search all fields
313
314 3
		$textQuery = new QueryString($this->queryText);
315
316
		//Setting the lenient flag means that numeric fields can be searched for text values
317 3
		$textQuery->setParam('lenient', true);
318
319 3
		if ($this->showResultsForEmptyQuery && $this->queryText == '') {
320 1
			$textQuery = new MatchAll();
321 1
		}
322
323 3
		return $textQuery;
324
	}
325
326
327
	// USE MATCH_ALL, see https://www.elastic.co/guide/en/elasticsearch/reference/1.4/query-dsl-match-all-query.html
328 7
	private function multiMatchQuery() {
329 7
		$textQuery = new MultiMatch();
330
331
		// Differing cases for showing and not showing empty string
332 7
		if ($this->queryText == '') {
333 2
			if (!$this->showResultsForEmptyQuery) {
334
				$textQuery->setQuery('');
335
			} else {
336
				//WIP
337 2
				$textQuery = new MatchAll();
338
			}
339 2
		}
340
341
		// If there is text, search for it regardless
342
		else {
343 6
			$textQuery->setQuery($this->queryText);
344
		}
345
346 7
		if ($textQuery instanceof MultiMatch) {
347 6
			$elasticaFields = $this->convertWeightedFieldsForElastica($this->fields);
348 5
			$textQuery->setFields($elasticaFields);
349 5
			$textQuery->setType('most_fields');
350
351
			//Setting the lenient flag means that numeric fields can be searched for text values
352 5
			$textQuery->setParam('lenient', true);
353 5
		}
354
355 6
		return $textQuery;
356
	}
357
358
359
360
	/**
361
	 * Use the configuration from the Search settings held in the database to
362
	 * form the array of fields suitable for a multimatch query.  Call this
363
	 * after having called setClasses
364
	 *
365
	 * @return array Array of fieldsname to weight
366
	 */
367 6
	public function convertWeightedFieldsForElastica($fields) {
368 6
		$result = array();
369 6
		$nameToType = self::getSearchFieldsMappingForClasses($this->classes,$fields);
370
371 6
		if (sizeof($fields) != 0) {
372 6
			foreach ($fields as $fieldName => $weight) {
373 6
				$fieldCfg = "$fieldName";
374 6
				if ($weight != 1) {
375 1
					$fieldCfg .= '^'.$weight;
376 1
				}
377 6
				array_push($result, $fieldCfg);
378 6
				if (isset($nameToType[$fieldName])) {
379 5
					if ($nameToType[$fieldName] == 'string') {
380 5
						$fieldCfg = "{$fieldName}.*";
381 5
						if ($weight != 1) {
382
							$fieldCfg .= '^'.$weight;
383
						}
384 5
						array_push($result, $fieldCfg);
385 5
					}
386 5
				} else {
387 1
					throw new \Exception("Field $fieldName does not exist");
388
				}
389 5
			}
390 5
		}
391 5
		return $result;
392
	}
393
394
395
396
	/**
397
	 * Get a hash of name to Elasticserver mapping, e.g. 'Title' => 'string'
398
	 * Use SS_Cache to save on database hits, as this data only changes at build time
399
	 * @param  string $classes CSV or array of ClassNames to search, or empty for
400
	 *         all of SiteTree
401
	 * @return array Array hash of fieldname to Elasticsearch mapping
402
	 */
403 6
	public static function getSearchFieldsMappingForClasses($classes = null, $fieldsAllowed = null) {
404
405
		// Get a array of relevant classes to search
406 6
		$cache = QueryGenerator::getCache();
407 6
		$csvClasses = $classes;
408 6
		if (is_array($classes)) {
409
			$csvClasses = implode(',',$classes);
410
		}
411
412 6
		$key ='SEARCHABLE_FIELDS_'.str_replace(',', '_', $csvClasses);
413
414 6
		if ($fieldsAllowed) {
415 6
			$fieldsAllowedCSV = self::convertToQuotedCSV(array_keys($fieldsAllowed));
416 6
			$key .= '_' . str_replace(',', '_', str_replace("'", '_',$fieldsAllowedCSV));
417 6
			$key = str_replace('.', '_', $key);
418 6
			$key = str_replace(' ', '_', $key);
419 6
		}
420
421 6
		$result = $cache->load($key);
422 6
		if (!$result) {
423 6
			$relevantClasses = array();
424 6
			if (empty($csvClasses)) {
425
				$sql = "SELECT DISTINCT Name from SearchableClass where InSiteTree = 1 order by Name";
426
				$records = \DB::query($sql);
427
				foreach ($records as $record) {
428
					array_push($relevantClasses, $record['Name']);
429
				}
430
			} else {
431 6
				$relevantClasses = explode(',', $csvClasses);
432
			}
433
434 6
			$result = array();
435 6
			if (sizeof($relevantClasses) > 0) {
436 6
				$relevantClassesCSV = self::convertToQuotedCSV($relevantClasses);
437
438
				//Perform a database query to get get a list of searchable fieldnames to Elasticsearch mapping
439
				$sql = "SELECT  sf.Name,sf.Type FROM SearchableClass sc  INNER JOIN SearchableField sf ON "
440 6
					 . "sc.id = sf.SearchableClassID WHERE sc.name IN ($relevantClassesCSV)";
441 6
				if ($fieldsAllowed) {
442 6
					$fieldsAllowedCSV = self::convertToQuotedCSV(array_keys($fieldsAllowed));
443 6
					if (strlen($fieldsAllowedCSV) > 0) {
444 6
						$sql .= " AND sf.Name IN ($fieldsAllowedCSV)";
445 6
					}
446 6
				}
447
448 6
				$records = \DB::query($sql);
449 6
				foreach ($records as $record) {
450 5
					$name = $record['Name'];
451 5
					$type = $record['Type'];
452
453
					/**
454
					 * FIXME:
455
					 * This will overwrite duplicate keys such as Content or Title from other Classes.
456
					 * Ideally need to check if the mapping being overwritten changes, e.g. if
457
					 * a field such as BirthDate is date in one class and string in another
458
					 * and throw an exception accordingly
459
					 */
460 5
					$result[$name] = $type;
461 6
				}
462 6
			}
463 6
			$cache->save(json_encode($result),$key);
464 6
		}  else {
465
			// true is necessary here to decode the array hash back to an array and not a struct
466 4
			self::$cacheHitCtr++;
467 4
			$result = json_decode($result,true);
468
		}
469
470 6
		return $result;
471
	}
472
473
474 6
	public static function getCache() {
475 6
		$cache = \SS_Cache::factory('elasticsearch');
476 6
		return $cache;
477
	}
478
479
480
	/**
481
	 * Convert either a CSV string or an array to a CSV single quoted string, suitable for use in
482
	 * an SQL IN clause
483
	 * @param  string|array $csvOrArray A string separated by commas or an array
484
	 * @return string             string or array as a CSV, but values quoted with single quotes
485
	 */
486 6
	public static function convertToQuotedCSV($csvOrArray) {
487 6
		$asArray = $csvOrArray;
488 6
		if (!is_array($csvOrArray)) {
489
			if ($csvOrArray == null) {
490
				$asArray = array();
491
			} else {
492
				$asArray = explode(',', $csvOrArray);
493
			}
494
		}
495 6
		$quoted = array();
496 6
		foreach ($asArray as $value) {
0 ignored issues
show
Bug introduced by
The expression $asArray of type string|array is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
497 6
			if (strlen($value) > 0) {
498 6
				$item = "'".$value."'";
499 6
				array_push($quoted, $item);
500 6
			}
501
502 6
		}
503 6
		return implode(',', $quoted);;
504
	}
505
}
506