1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* SolrQuery class file |
4
|
|
|
*/ |
5
|
|
|
|
6
|
|
|
namespace Graviton\DocumentBundle\Service; |
7
|
|
|
|
8
|
|
|
use Graviton\Rql\Node\SearchNode; |
9
|
|
|
use Solarium\Client; |
10
|
|
|
use Symfony\Component\HttpFoundation\Request; |
11
|
|
|
use Symfony\Component\HttpFoundation\RequestStack; |
12
|
|
|
use Xiag\Rql\Parser\Node\LimitNode; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* @author List of contributors <https://github.com/libgraviton/graviton/graphs/contributors> |
16
|
|
|
* @license https://opensource.org/licenses/MIT MIT License |
17
|
|
|
* @link http://swisscom.ch |
18
|
|
|
*/ |
19
|
|
|
class SolrQuery |
20
|
|
|
{ |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* @var string |
24
|
|
|
*/ |
25
|
|
|
private $className; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* @var array |
29
|
|
|
*/ |
30
|
|
|
private $urlParts = []; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @var int |
34
|
|
|
*/ |
35
|
|
|
private $solrFuzzyBridge; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @var int |
39
|
|
|
*/ |
40
|
|
|
private $solrWildcardBridge; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* @var boolean |
44
|
|
|
*/ |
45
|
|
|
private $andifyTerms; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* @var array |
49
|
|
|
*/ |
50
|
|
|
private $solrMap; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* @var int |
54
|
|
|
*/ |
55
|
|
|
private $paginationDefaultLimit; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @var Client |
59
|
|
|
*/ |
60
|
|
|
private $solrClient; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* @var RequestStack |
64
|
|
|
*/ |
65
|
|
|
private $requestStack; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* if the full search term matches one of these patterns, the whole thing is sent quoted to solr |
69
|
|
|
* |
70
|
|
|
* @var array |
71
|
|
|
*/ |
72
|
|
|
private $fullTermPatterns = [ |
73
|
|
|
'/^[0-9]+ [0-9\.]{9,}$/i' |
74
|
|
|
]; |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* pattern to match a solr field query |
78
|
|
|
* |
79
|
|
|
* @var string |
80
|
|
|
*/ |
81
|
|
|
private $fieldQueryPattern = '/(.{2,}):(.+)/i'; |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* stuff that does not get andified/quoted/whatever |
85
|
|
|
* |
86
|
|
|
* @var array |
87
|
|
|
*/ |
88
|
|
|
private $queryOperators = [ |
89
|
|
|
'AND', |
90
|
|
|
'NOT', |
91
|
|
|
'OR', |
92
|
|
|
'&&', |
93
|
|
|
'||', |
94
|
|
|
'!' |
95
|
|
|
]; |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* Constructor |
99
|
|
|
* |
100
|
|
|
* @param string $solrUrl url to solr |
101
|
|
|
* @param int $solrFuzzyBridge fuzzy bridge |
102
|
|
|
* @param int $solrWildcardBridge wildcard bridge |
103
|
|
|
* @param boolean $andifyTerms andify terms or not? |
104
|
|
|
* @param array $solrMap solr class field weight map |
105
|
|
|
* @param int $paginationDefaultLimit default pagination limit |
106
|
|
|
* @param Client $solrClient solr client |
107
|
|
|
* @param RequestStack $requestStack request stack |
108
|
|
|
*/ |
109
|
38 |
|
public function __construct( |
110
|
|
|
$solrUrl, |
111
|
|
|
$solrFuzzyBridge, |
112
|
|
|
$solrWildcardBridge, |
113
|
|
|
$andifyTerms, |
114
|
|
|
array $solrMap, |
115
|
|
|
$paginationDefaultLimit, |
116
|
|
|
Client $solrClient, |
117
|
|
|
RequestStack $requestStack |
118
|
|
|
) { |
119
|
38 |
|
if (!is_null($solrUrl)) { |
120
|
38 |
|
$this->urlParts = parse_url($solrUrl); |
|
|
|
|
121
|
|
|
} |
122
|
38 |
|
$this->solrFuzzyBridge = (int) $solrFuzzyBridge; |
123
|
38 |
|
$this->solrWildcardBridge = (int) $solrWildcardBridge; |
124
|
38 |
|
$this->andifyTerms = (boolean) $andifyTerms; |
125
|
38 |
|
$this->solrMap = $solrMap; |
126
|
38 |
|
$this->paginationDefaultLimit = (int) $paginationDefaultLimit; |
127
|
38 |
|
$this->solrClient = $solrClient; |
128
|
38 |
|
$this->requestStack = $requestStack; |
129
|
38 |
|
} |
130
|
|
|
|
131
|
|
|
/** |
132
|
|
|
* sets the class name to search - last part equates to solr core name |
133
|
|
|
* |
134
|
|
|
* @param string $className class name |
135
|
|
|
* |
136
|
|
|
* @return void |
137
|
|
|
*/ |
138
|
38 |
|
public function setClassName($className) |
139
|
|
|
{ |
140
|
38 |
|
$this->className = $className; |
141
|
38 |
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* returns true if solr is configured currently, false otherwise |
145
|
|
|
* |
146
|
|
|
* @return bool if solr is configured |
147
|
|
|
*/ |
148
|
2 |
|
public function isConfigured() |
149
|
|
|
{ |
150
|
2 |
|
if (!empty($this->urlParts) && isset($this->solrMap[$this->className])) { |
151
|
2 |
|
return true; |
152
|
|
|
} |
153
|
2 |
|
return false; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* executes the search on solr using the rql parsing nodes. |
158
|
|
|
* |
159
|
|
|
* @param SearchNode $node search node |
160
|
|
|
* @param LimitNode|null $limitNode limit node |
161
|
|
|
* |
162
|
|
|
* @return array an array of just record ids (the ids of the matching documents in solr) |
163
|
|
|
*/ |
164
|
36 |
|
public function query(SearchNode $node, LimitNode $limitNode = null) |
165
|
|
|
{ |
166
|
36 |
|
$client = $this->getClient(); |
167
|
|
|
|
168
|
36 |
|
$query = $client->createQuery($client::QUERY_SELECT); |
169
|
|
|
|
170
|
|
|
// set the weights |
171
|
36 |
|
$query->getEDisMax()->setQueryFields($this->solrMap[$this->className]); |
172
|
|
|
|
173
|
36 |
|
$query->setQuery($this->getSearchTerm($node)); |
174
|
|
|
|
175
|
36 |
|
if ($limitNode instanceof LimitNode) { |
176
|
36 |
|
$query->setStart($limitNode->getOffset())->setRows($limitNode->getLimit()); |
177
|
|
|
} else { |
178
|
|
|
$query->setStart(0)->setRows($this->paginationDefaultLimit); |
179
|
|
|
} |
180
|
|
|
|
181
|
36 |
|
$query->setFields(['id']); |
182
|
|
|
|
183
|
36 |
|
$result = $client->select($query); |
184
|
|
|
|
185
|
36 |
View Code Duplication |
if ($this->requestStack->getCurrentRequest() instanceof Request) { |
186
|
36 |
|
$this->requestStack->getCurrentRequest()->attributes->set('totalCount', $result->getNumFound()); |
187
|
36 |
|
$this->requestStack->getCurrentRequest()->attributes->set('X-Search-Source', 'solr'); |
188
|
|
|
} |
189
|
|
|
|
190
|
36 |
|
$idList = []; |
191
|
36 |
|
foreach ($result as $document) { |
192
|
2 |
|
if (isset($document->id)) { |
193
|
2 |
|
$idList[] = (string) $document->id; |
194
|
|
|
} elseif (isset($document->_id)) { |
195
|
2 |
|
$idList[] = (string) $document->_id; |
196
|
|
|
} |
197
|
|
|
} |
198
|
|
|
|
199
|
36 |
|
return $idList; |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* returns the string search term to be used in the solr query |
204
|
|
|
* |
205
|
|
|
* @param SearchNode $node the search node |
206
|
|
|
* |
207
|
|
|
* @return string the composed search query |
208
|
|
|
*/ |
209
|
36 |
|
private function getSearchTerm(SearchNode $node) |
210
|
|
|
{ |
211
|
36 |
|
$fullTerm = $node->getSearchQuery(); |
212
|
|
|
|
213
|
36 |
|
foreach ($this->fullTermPatterns as $pattern) { |
214
|
36 |
|
if (preg_match($pattern, $fullTerm, $matches) === 1) { |
215
|
36 |
|
return '"'.$fullTerm.'"'; |
216
|
|
|
} |
217
|
|
|
} |
218
|
|
|
|
219
|
34 |
|
if ($this->andifyTerms) { |
220
|
32 |
|
$glue = 'AND'; |
221
|
|
|
} else { |
222
|
2 |
|
$glue = ''; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
|
226
|
34 |
|
$i = 0; |
227
|
34 |
|
$hasPreviousOperator = false; |
228
|
34 |
|
$fullSearchElements = []; |
229
|
|
|
|
230
|
34 |
|
foreach (explode(' ', $node->getSearchQuery()) as $term) { |
231
|
34 |
|
$i++; |
232
|
|
|
|
233
|
|
|
// is this an operator? |
234
|
34 |
|
if (array_search($term, $this->queryOperators) !== false) { |
235
|
8 |
|
$fullSearchElements[] = $term; |
236
|
8 |
|
$hasPreviousOperator = true; |
237
|
8 |
|
continue; |
238
|
|
|
} |
239
|
|
|
|
240
|
34 |
|
$singleTerm = $this->getSingleTerm($term); |
241
|
|
|
|
242
|
34 |
|
if ($i > 1 && $hasPreviousOperator == false && !empty($glue)) { |
243
|
8 |
|
$fullSearchElements[] = $glue; |
244
|
|
|
} else { |
245
|
34 |
|
$hasPreviousOperator = false; |
246
|
|
|
} |
247
|
|
|
|
248
|
34 |
|
$fullSearchElements[] = $singleTerm; |
249
|
|
|
} |
250
|
|
|
|
251
|
34 |
|
return implode(' ', $fullSearchElements); |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
/** |
255
|
|
|
* returns a single term how to search. here we can apply custom logic to the user input string |
256
|
|
|
* |
257
|
|
|
* @param string $term single search term |
258
|
|
|
* |
259
|
|
|
* @return string modified search term |
260
|
|
|
*/ |
261
|
34 |
|
private function getSingleTerm($term) |
262
|
|
|
{ |
263
|
|
|
// we don't modify numbers |
264
|
34 |
|
if (ctype_digit($term)) { |
265
|
10 |
|
return '"'.$term.'"'; |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
// formatted number? |
269
|
32 |
|
$formatted = str_replace( |
270
|
|
|
[ |
271
|
32 |
|
'-', |
272
|
|
|
'.' |
273
|
|
|
], |
274
|
32 |
|
'', |
275
|
16 |
|
$term |
276
|
|
|
); |
277
|
32 |
|
if (ctype_digit($formatted)) { |
278
|
|
|
return '"'.$term.'"'; |
279
|
|
|
} |
280
|
|
|
|
281
|
|
|
// everything that is only numbers *and* characters and at least 3 long, we don't fuzzy/wildcard |
282
|
|
|
// thanks to https://stackoverflow.com/a/7684859/3762521 |
283
|
32 |
|
$pattern = '/^(?=.*[0-9])(?=.*[a-zA-Z])([a-zA-Z0-9]+)$/'; |
284
|
32 |
|
if (strlen($term) > 3 && preg_match($pattern, $term, $matches) === 1) { |
285
|
4 |
|
return '"'.$term.'"'; |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
// is it a solr field query (like id:333)? |
289
|
28 |
|
if (preg_match($this->fieldQueryPattern, $term) === 1) { |
290
|
8 |
|
return $this->parseSolrFieldQuery($term); |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
// strings shorter then 5 chars (like hans) we wildcard, all others we make fuzzy |
294
|
28 |
|
if (strlen($term) >= $this->solrFuzzyBridge) { |
295
|
20 |
|
return $this->doAndNotPrefixSingleTerm($term, '~'); |
296
|
|
|
} |
297
|
|
|
|
298
|
14 |
|
if (strlen($term) >= $this->solrWildcardBridge) { |
299
|
10 |
|
return $this->doAndNotPrefixSingleTerm($term, '*'); |
300
|
|
|
} |
301
|
|
|
|
302
|
8 |
|
return $term; |
303
|
|
|
} |
304
|
|
|
|
305
|
|
|
/** |
306
|
|
|
* parses the special solr field syntax fieldName:fieldValue, converts int ranges |
307
|
|
|
* |
308
|
|
|
* @param string $fieldQuery the query |
309
|
|
|
* |
310
|
|
|
* @return string solr compatible expression |
311
|
|
|
*/ |
312
|
8 |
|
private function parseSolrFieldQuery($fieldQuery) |
313
|
|
|
{ |
314
|
8 |
|
$fieldNameParts = explode(':', $fieldQuery); |
315
|
8 |
|
$fieldName = $fieldNameParts[0]; |
316
|
8 |
|
unset($fieldNameParts[0]); |
317
|
8 |
|
$fieldValue = implode(':', $fieldNameParts); |
318
|
|
|
|
319
|
|
|
// change > and < |
320
|
8 |
|
if ($fieldValue[0] == '<') { |
321
|
2 |
|
$fieldValue = '[* TO '.substr($fieldValue, 1).']'; |
322
|
8 |
|
} elseif ($fieldValue[0] == '>') { |
323
|
2 |
|
$fieldValue = '['.substr($fieldValue, 1).' TO *]'; |
324
|
|
|
} else { |
325
|
8 |
|
$fieldValue = $this->getSingleTerm($fieldValue); |
326
|
|
|
} |
327
|
|
|
|
328
|
8 |
|
return $fieldName.':'.$fieldValue; |
329
|
|
|
} |
330
|
|
|
|
331
|
|
|
/** |
332
|
|
|
* ORify a single term |
333
|
|
|
* |
334
|
|
|
* @param string $term search term |
335
|
|
|
* @param string $modifier modified |
336
|
|
|
* |
337
|
|
|
* @return string ORified query |
338
|
|
|
*/ |
339
|
24 |
|
private function doAndNotPrefixSingleTerm($term, $modifier) |
340
|
|
|
{ |
341
|
|
|
// already modifier there? |
342
|
24 |
|
$last = substr($term, -1); |
343
|
24 |
|
if ($last == '~' || $last == '*') { |
344
|
|
|
// clean from term, override modifier from client |
345
|
6 |
|
$modifier = $last; |
346
|
6 |
|
$term = substr($term, 0, -1); |
347
|
|
|
} |
348
|
|
|
|
349
|
24 |
|
return sprintf( |
350
|
24 |
|
'(%s OR %s%s)', |
351
|
12 |
|
$term, |
352
|
12 |
|
$term, |
353
|
12 |
|
$modifier |
354
|
|
|
); |
355
|
|
|
} |
356
|
|
|
|
357
|
|
|
/** |
358
|
|
|
* returns the client to use for the current query |
359
|
|
|
* |
360
|
|
|
* @return Client client |
361
|
|
|
*/ |
362
|
36 |
|
private function getClient() |
363
|
|
|
{ |
364
|
36 |
|
$endpointConfig = $this->urlParts; |
365
|
36 |
|
if (!isset($endpointConfig['path'])) { |
366
|
34 |
|
$endpointConfig['path'] = '/'; |
367
|
|
|
} |
368
|
|
|
|
369
|
36 |
|
if (substr($endpointConfig['path'], -1) != '/') { |
370
|
2 |
|
$endpointConfig['path'] .= '/'; |
371
|
|
|
} |
372
|
|
|
|
373
|
|
|
// find core name |
374
|
36 |
|
$classnameParts = explode('\\', $this->className); |
375
|
36 |
|
$endpointConfig['core'] = array_pop($classnameParts); |
376
|
|
|
|
377
|
36 |
|
$endpointConfig['timeout'] = 10000; |
378
|
36 |
|
$endpointConfig['key'] = 'local'; |
379
|
|
|
|
380
|
36 |
|
$this->solrClient->addEndpoint($endpointConfig); |
381
|
36 |
|
$this->solrClient->setDefaultEndpoint($endpointConfig['key']); |
382
|
|
|
|
383
|
36 |
|
return $this->solrClient; |
384
|
|
|
} |
385
|
|
|
} |
386
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.