Completed
Push — master ( 39787b...8c541e )
by Maxence
01:20
created

SearchMappingService::generateSearchSimpleQuery()   B

Complexity

Conditions 6
Paths 17

Size

Total Lines 33

Duplication

Lines 20
Ratio 60.61 %

Importance

Changes 0
Metric Value
dl 20
loc 33
rs 8.7697
c 0
b 0
f 0
cc 6
nc 17
nop 1
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\FullTextSearch_ElasticSearch\Service;
32
33
34
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
35
use OCA\FullTextSearch_ElasticSearch\Exceptions\QueryContentGenerationException;
36
use OCA\FullTextSearch_ElasticSearch\Exceptions\SearchQueryGenerationException;
37
use OCA\FullTextSearch_ElasticSearch\Model\QueryContent;
38
use OCP\FullTextSearch\Model\IDocumentAccess;
39
use OCP\FullTextSearch\Model\ISearchRequest;
40
use OCP\FullTextSearch\Model\ISearchRequestSimpleQuery;
41
42
43
/**
44
 * Class SearchMappingService
45
 *
46
 * @package OCA\FullTextSearch_ElasticSearch\Service
47
 */
48
class SearchMappingService {
49
50
	/** @var ConfigService */
51
	private $configService;
52
53
	/** @var MiscService */
54
	private $miscService;
55
56
57
	/**
58
	 * SearchMappingService constructor.
59
	 *
60
	 * @param ConfigService $configService
61
	 * @param MiscService $miscService
62
	 */
63
	public function __construct(ConfigService $configService, MiscService $miscService) {
64
		$this->configService = $configService;
65
		$this->miscService = $miscService;
66
	}
67
68
69
	/**
70
	 * @param ISearchRequest $request
71
	 * @param IDocumentAccess $access
72
	 * @param string $providerId
73
	 *
74
	 * @return array
75
	 * @throws ConfigurationException
76
	 * @throws SearchQueryGenerationException
77
	 */
78
	public function generateSearchQuery(
79
		ISearchRequest $request, IDocumentAccess $access, string $providerId
80
	): array {
81
		$query['params'] = $this->generateSearchQueryParams($request, $access, $providerId);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
82
83
		return $query;
84
	}
85
86
87
	/**
88
	 * @param ISearchRequest $request
89
	 * @param IDocumentAccess $access
90
	 * @param string $providerId
91
	 *
92
	 * @return array
93
	 * @throws ConfigurationException
94
	 * @throws SearchQueryGenerationException
95
	 */
96
	public function generateSearchQueryParams(
97
		ISearchRequest $request, IDocumentAccess $access, string $providerId
98
	): array {
99
		$params = [
100
			'index' => $this->configService->getElasticIndex(),
101
			'type'  => 'standard',
102
			'size'  => $request->getSize(),
103
			'from'  => (($request->getPage() - 1) * $request->getSize())
104
		];
105
106
		$bool = [];
107
		$bool['must']['bool']['should'] = $this->generateSearchQueryContent($request);
108
109
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $providerId]];
110
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
111
		$bool['filter'][]['bool']['should'] =
112
			$this->generateSearchQueryTags('metatags', $request->getMetaTags());
113
114
		$bool['filter'][]['bool']['must'] =
115
			$this->generateSearchQueryTags('subtags', $request->getSubTags(true));
116
117
		$bool['filter'][]['bool']['must'] =
118
			$this->generateSearchSimpleQuery($request->getSimpleQueries());
119
120
//		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
121
122
		$params['body']['query']['bool'] = $bool;
123
		$params['body']['highlight'] = $this->generateSearchHighlighting($request);
124
125
		$this->improveSearchQuerying($request, $params['body']['query']);
126
127
		return $params;
128
	}
129
130
131
	/**
132
	 * @param ISearchRequest $request
133
	 * @param array $arr
134
	 */
135
	private function improveSearchQuerying(ISearchRequest $request, array &$arr) {
136
//		$this->improveSearchWildcardQueries($request, $arr);
137
		$this->improveSearchWildcardFilters($request, $arr);
138
		$this->improveSearchRegexFilters($request, $arr);
139
	}
140
141
142
//	/**
143
//	 * @param SearchRequest $request
144
//	 * @param array $arr
145
//	 */
146
//	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
147
//
148
//		$queries = $request->getWildcardQueries();
149
//		foreach ($queries as $query) {
150
//			$wildcards = [];
151
//			foreach ($query as $entry) {
152
//				$wildcards[] = ['wildcard' => $entry];
153
//			}
154
//
155
//			array_push($arr['bool']['must']['bool']['should'], $wildcards);
156
//		}
157
//
158
//	}
159
160
161
	/**
162
	 * @param ISearchRequest $request
163
	 * @param array $arr
164
	 */
165 View Code Duplication
	private function improveSearchWildcardFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
166
167
		$filters = $request->getWildcardFilters();
168
		foreach ($filters as $filter) {
169
			$wildcards = [];
170
			foreach ($filter as $entry) {
171
				$wildcards[] = ['wildcard' => $entry];
172
			}
173
174
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
175
		}
176
177
	}
178
179
180
	/**
181
	 * @param ISearchRequest $request
182
	 * @param array $arr
183
	 */
184 View Code Duplication
	private function improveSearchRegexFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
185
186
		$filters = $request->getRegexFilters();
187
		foreach ($filters as $filter) {
188
			$regex = [];
189
			foreach ($filter as $entry) {
190
				$regex[] = ['regexp' => $entry];
191
			}
192
193
			$arr['bool']['filter'][]['bool']['should'] = $regex;
194
		}
195
196
	}
197
198
199
	/**
200
	 * @param ISearchRequest $request
201
	 *
202
	 * @return array
203
	 * @throws SearchQueryGenerationException
204
	 */
205
	private function generateSearchQueryContent(ISearchRequest $request): array {
206
		$str = strtolower($request->getSearch());
207
208
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
209
		$queryContent = [];
210
		foreach ($words[0] as $word) {
211
			try {
212
				$queryContent[] = $this->generateQueryContent(trim($word));
213
			} catch (QueryContentGenerationException $e) {
214
				continue;
215
			}
216
		}
217
218
		if (sizeof($queryContent) === 0) {
219
			throw new SearchQueryGenerationException();
220
		}
221
222
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
223
	}
224
225
226
	/**
227
	 * @param string $word
228
	 *
229
	 * @return QueryContent
230
	 * @throws QueryContentGenerationException
231
	 */
232
	private function generateQueryContent(string $word): QueryContent {
233
234
		$searchQueryContent = new QueryContent($word);
235
		if (strlen($searchQueryContent->getWord()) === 0) {
236
			throw new QueryContentGenerationException();
237
		}
238
239
		return $searchQueryContent;
240
	}
241
242
243
	/**
244
	 * @param ISearchRequest $request
245
	 * @param QueryContent[] $queryContents
246
	 *
247
	 * @return array
248
	 */
249
	private function generateSearchQueryFromQueryContent(
250
		ISearchRequest $request, array $queryContents
251
	): array {
252
253
		$query = $queryWords = [];
254
		foreach ($queryContents as $queryContent) {
255
			$queryWords[$queryContent->getShould()][] =
256
				$this->generateQueryContentFields($request, $queryContent);
257
		}
258
259
		$listShould = array_keys($queryWords);
260
		foreach ($listShould as $itemShould) {
261
			$query[$itemShould][] = $queryWords[$itemShould];
262
		}
263
264
		return ['bool' => $query];
265
	}
266
267
268
	/**
269
	 * @param ISearchRequest $request
270
	 * @param QueryContent $content
271
	 *
272
	 * @return array
273
	 */
274
	private function generateQueryContentFields(ISearchRequest $request, QueryContent $content
275
	): array {
276
		$queryFields = [];
277
278
		$fields = array_merge(['content', 'title'], $request->getFields());
279
		foreach ($fields as $field) {
280
			if (!$this->fieldIsOutLimit($request, $field)) {
281
				$queryFields[] = [$content->getMatch() => [$field => $content->getWord()]];
282
			}
283
		}
284
285
		foreach ($request->getWildcardFields() as $field) {
286
			if (!$this->fieldIsOutLimit($request, $field)) {
287
				$queryFields[] = ['wildcard' => [$field => '*' . $content->getWord() . '*']];
288
			}
289
		}
290
291
		$parts = [];
292
		foreach ($this->getPartsFields($request) as $field) {
293
			if (!$this->fieldIsOutLimit($request, $field)) {
294
				$parts[] = $field;
295
			}
296
		}
297
298
		if (sizeof($parts) > 0) {
299
			$queryFields[] = [
300
				'query_string' => [
301
					'fields' => $parts,
302
					'query'  => $content->getWord()
303
				]
304
			];
305
		}
306
307
		return ['bool' => ['should' => $queryFields]];
308
	}
309
310
311
	/**
312
	 * @param IDocumentAccess $access
313
	 *
314
	 * @return array
315
	 */
316
	private function generateSearchQueryAccess(IDocumentAccess $access): array {
317
318
		$query = [];
319
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
320
		$query[] = ['term' => ['users' => $access->getViewerId()]];
321
		$query[] = ['term' => ['users' => '__all']];
322
323
		foreach ($access->getGroups() as $group) {
324
			$query[] = ['term' => ['groups' => $group]];
325
		}
326
327
		foreach ($access->getCircles() as $circle) {
328
			$query[] = ['term' => ['circles' => $circle]];
329
		}
330
331
		return $query;
332
	}
333
334
335
	/**
336
	 * @param ISearchRequest $request
337
	 * @param string $field
338
	 *
339
	 * @return bool
340
	 */
341
	private function fieldIsOutLimit(ISearchRequest $request, string $field): bool {
342
		$limit = $request->getLimitFields();
343
		if (sizeof($limit) === 0) {
344
			return false;
345
		}
346
347
		if (in_array($field, $limit)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return !in_array($field, $limit);.
Loading history...
348
			return false;
349
		}
350
351
		return true;
352
	}
353
354
355
	/**
356
	 * @param string $k
357
	 * @param array $tags
358
	 *
359
	 * @return array
360
	 */
361
	private function generateSearchQueryTags(string $k, array $tags): array {
362
363
		$query = [];
364
		foreach ($tags as $t) {
365
			$query[] = ['term' => [$k => $t]];
366
		}
367
368
		return $query;
369
	}
370
371
372
	/**
373
	 * @param ISearchRequestSimpleQuery[] $queries
374
	 *
375
	 * @return array
376
	 */
377
	private function generateSearchSimpleQuery(array $queries): array {
378
		$simpleQuery = [];
379
		foreach ($queries as $query) {
380
			// TODO: manage multiple entries array
381
382 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_GTE) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
383
				$value = $query->getValues()[0];
384
385
				$simpleQuery[] = ['range' => [$query->getField() => ['gte' => $value]]];
386
			}
387
388 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_LTE) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
389
				$value = $query->getValues()[0];
390
391
				$simpleQuery[] = ['range' => [$query->getField() => ['lte' => $value]]];
392
			}
393
394 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_GT) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
395
				$value = $query->getValues()[0];
396
397
				$simpleQuery[] = ['range' => [$query->getField() => ['gt' => $value]]];
398
			}
399
400 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_LT) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
401
				$value = $query->getValues()[0];
402
403
				$simpleQuery[] = ['range' => [$query->getField() => ['lt' => $value]]];
404
			}
405
406
		}
407
408
		return $simpleQuery;
409
	}
410
411
412
	/**
413
	 * @param ISearchRequest $request
414
	 *
415
	 * @return array
416
	 */
417
	private function generateSearchHighlighting(ISearchRequest $request): array {
418
419
		$parts = $this->getPartsFields($request);
420
		$fields = ['content' => new \stdClass()];
421
		foreach ($parts as $part) {
422
			$fields[$part] = new \stdClass();
423
		}
424
425
		return [
426
			'fields'    => $fields,
427
			'pre_tags'  => [''],
428
			'post_tags' => ['']
429
		];
430
	}
431
432
433
	/**
434
	 * @param string $providerId
435
	 * @param string $documentId
436
	 *
437
	 * @return array
438
	 * @throws ConfigurationException
439
	 */
440
	public function getDocumentQuery(string $providerId, string $documentId): array {
441
		return [
442
			'index' => $this->configService->getElasticIndex(),
443
			'type'  => 'standard',
444
			'id'    => $providerId . ':' . $documentId
445
		];
446
	}
447
448
449
	/**
450
	 * @param ISearchRequest $request
451
	 *
452
	 * @return array
453
	 */
454
	private function getPartsFields(ISearchRequest $request) {
455
		return array_map(
456
			function($value) {
457
				return 'parts.' . $value;
458
			}, $request->getParts()
459
		);
460
	}
461
462
}
463
464