Completed
Pull Request — master (#100)
by Robin
01:43
created

SearchMappingService::generateQueryContentFields()   B

Complexity

Conditions 8
Paths 54

Size

Total Lines 34

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 34
rs 8.1315
c 0
b 0
f 0
cc 8
nc 54
nop 2
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch_Elasticsearch - Use Elasticsearch to index the content of your nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\FullTextSearch_Elasticsearch\Service;
32
33
34
use OCA\FullTextSearch_Elasticsearch\Exceptions\ConfigurationException;
35
use OCA\FullTextSearch_Elasticsearch\Exceptions\QueryContentGenerationException;
36
use OCA\FullTextSearch_Elasticsearch\Exceptions\SearchQueryGenerationException;
37
use OCA\FullTextSearch_Elasticsearch\Model\QueryContent;
38
use OCP\FullTextSearch\Model\IDocumentAccess;
39
use OCP\FullTextSearch\Model\ISearchRequest;
40
use OCP\FullTextSearch\Model\ISearchRequestSimpleQuery;
41
use stdClass;
42
43
/**
44
 * Class SearchMappingService
45
 *
46
 * @package OCA\FullTextSearch_Elasticsearch\Service
47
 */
48
class SearchMappingService {
49
50
	/** @var ConfigService */
51
	private $configService;
52
53
	/** @var MiscService */
54
	private $miscService;
55
56
	/** @var IUserStoragesService */
57
	private $userStoragesService;
58
59
	/**
60
	 * SearchMappingService constructor.
61
	 *
62
	 * @param ConfigService $configService
63
	 * @param MiscService $miscService
64
	 * @param null|IUserStoragesService $userStoragesService
65
	 */
66
	public function __construct(ConfigService $configService, MiscService $miscService, IUserStoragesService $userStoragesService = null) {
67
		$this->configService = $configService;
68
		$this->miscService = $miscService;
69
		$this->userStoragesService = $userStoragesService;
70
	}
71
72
73
	/**
74
	 * @param ISearchRequest $request
75
	 * @param IDocumentAccess $access
76
	 * @param string $providerId
77
	 *
78
	 * @return array
79
	 * @throws ConfigurationException
80
	 * @throws SearchQueryGenerationException
81
	 */
82
	public function generateSearchQuery(
83
		ISearchRequest $request, IDocumentAccess $access, string $providerId
84
	): array {
85
		$query['params'] = $this->generateSearchQueryParams($request, $access, $providerId);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
86
87
		return $query;
88
	}
89
90
91
	/**
92
	 * @param ISearchRequest $request
93
	 * @param IDocumentAccess $access
94
	 * @param string $providerId
95
	 *
96
	 * @return array
97
	 * @throws ConfigurationException
98
	 * @throws SearchQueryGenerationException
99
	 */
100
	public function generateSearchQueryParams(
101
		ISearchRequest $request, IDocumentAccess $access, string $providerId
102
	): array {
103
		$params = [
104
			'index' => $this->configService->getElasticIndex(),
105
			'type'  => 'standard',
106
			'size'  => $request->getSize(),
107
			'from'  => (($request->getPage() - 1) * $request->getSize())
108
		];
109
110
		$bool = [];
111
		if ($request->getSearch() !== '') {
112
			$bool['must']['bool'] = $this->generateSearchQueryContent($request);
113
		}
114
115
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $providerId]];
116
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
117
		$bool['filter'][]['bool']['should'] =
118
			$this->generateSearchQueryTags('metatags', $request->getMetaTags());
119
120
		$bool['filter'][]['bool']['must'] =
121
			$this->generateSearchQueryTags('subtags', $request->getSubTags(true));
122
123
		$bool['filter'][]['bool']['must'] =
124
			$this->generateSearchSimpleQuery($request->getSimpleQueries());
125
126
//		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
127
128
		$params['body']['query']['bool'] = $bool;
129
		$params['body']['highlight'] = $this->generateSearchHighlighting($request);
130
131
		$this->improveSearchQuerying($request, $params['body']['query']);
132
133
		return $params;
134
	}
135
136
137
	/**
138
	 * @param ISearchRequest $request
139
	 * @param array $arr
140
	 */
141
	private function improveSearchQuerying(ISearchRequest $request, array &$arr) {
142
//		$this->improveSearchWildcardQueries($request, $arr);
143
		$this->improveSearchWildcardFilters($request, $arr);
144
		$this->improveSearchRegexFilters($request, $arr);
145
	}
146
147
148
//	/**
149
//	 * @param SearchRequest $request
150
//	 * @param array $arr
151
//	 */
152
//	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
153
//
154
//		$queries = $request->getWildcardQueries();
155
//		foreach ($queries as $query) {
156
//			$wildcards = [];
157
//			foreach ($query as $entry) {
158
//				$wildcards[] = ['wildcard' => $entry];
159
//			}
160
//
161
//			array_push($arr['bool']['must']['bool']['should'], $wildcards);
162
//		}
163
//
164
//	}
165
166
167
	/**
168
	 * @param ISearchRequest $request
169
	 * @param array $arr
170
	 */
171 View Code Duplication
	private function improveSearchWildcardFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
172
173
		$filters = $request->getWildcardFilters();
174
		foreach ($filters as $filter) {
175
			$wildcards = [];
176
			foreach ($filter as $entry) {
177
				$wildcards[] = ['wildcard' => $entry];
178
			}
179
180
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
181
		}
182
183
	}
184
185
186
	/**
187
	 * @param ISearchRequest $request
188
	 * @param array $arr
189
	 */
190 View Code Duplication
	private function improveSearchRegexFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
191
192
		$filters = $request->getRegexFilters();
193
		foreach ($filters as $filter) {
194
			$regex = [];
195
			foreach ($filter as $entry) {
196
				$regex[] = ['regexp' => $entry];
197
			}
198
199
			$arr['bool']['filter'][]['bool']['should'] = $regex;
200
		}
201
202
	}
203
204
205
	/**
206
	 * @param ISearchRequest $request
207
	 *
208
	 * @return array
209
	 * @throws SearchQueryGenerationException
210
	 */
211
	private function generateSearchQueryContent(ISearchRequest $request): array {
212
		$str = strtolower($request->getSearch());
213
214
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
215
		$queryContent = [];
216
		foreach ($words[0] as $word) {
217
			try {
218
				$queryContent[] = $this->generateQueryContent(trim($word));
219
			} catch (QueryContentGenerationException $e) {
220
				continue;
221
			}
222
		}
223
224
		if (sizeof($queryContent) === 0) {
225
			throw new SearchQueryGenerationException();
226
		}
227
228
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
229
	}
230
231
232
	/**
233
	 * @param string $word
234
	 *
235
	 * @return QueryContent
236
	 * @throws QueryContentGenerationException
237
	 */
238
	private function generateQueryContent(string $word): QueryContent {
239
240
		$searchQueryContent = new QueryContent($word);
241
		if (strlen($searchQueryContent->getWord()) === 0) {
242
			throw new QueryContentGenerationException();
243
		}
244
245
		return $searchQueryContent;
246
	}
247
248
249
	/**
250
	 * @param ISearchRequest $request
251
	 * @param QueryContent[] $contents
252
	 *
253
	 * @return array
254
	 */
255
	private function generateSearchQueryFromQueryContent(ISearchRequest $request, array $contents): array {
256
		$query = [];
257
		foreach ($contents as $content) {
258
			if (!array_key_exists($content->getShould(), $query)) {
259
				$query[$content->getShould()] = [];
260
			}
261
262
			if ($content->getShould() === 'must') {
263
				$query[$content->getShould()][] =
264
					['bool' => ['should' => $this->generateQueryContentFields($request, $content)]];
265
			} else {
266
				$query[$content->getShould()] = array_merge(
267
					$query[$content->getShould()], $this->generateQueryContentFields($request, $content)
268
				);
269
			}
270
		}
271
272
		return $query;
273
	}
274
275
276
	/**
277
	 * @param ISearchRequest $request
278
	 * @param QueryContent $content
279
	 *
280
	 * @return array
281
	 */
282
	private function generateQueryContentFields(ISearchRequest $request, QueryContent $content): array {
283
		$queryFields = [];
284
285
		$fields = array_merge(['content', 'title'], $request->getFields());
286
		foreach ($fields as $field) {
287
			if (!$this->fieldIsOutLimit($request, $field)) {
288
				$queryFields[] = [$content->getMatch() => [$field => $content->getWord()]];
289
			}
290
		}
291
292
		foreach ($request->getWildcardFields() as $field) {
293
			if (!$this->fieldIsOutLimit($request, $field)) {
294
				$queryFields[] = ['wildcard' => [$field => '*' . $content->getWord() . '*']];
295
			}
296
		}
297
298
		$parts = [];
299
		foreach ($this->getPartsFields($request) as $field) {
300
			if (!$this->fieldIsOutLimit($request, $field)) {
301
				$parts[] = $field;
302
			}
303
		}
304
305
		if (sizeof($parts) > 0) {
306
			$queryFields[] = [
307
				'query_string' => [
308
					'fields' => $parts,
309
					'query'  => $content->getWord()
310
				]
311
			];
312
		}
313
314
		return $queryFields;
315
	}
316
317
318
	/**
319
	 * @param IDocumentAccess $access
320
	 *
321
	 * @return array
322
	 */
323
	private function generateSearchQueryAccess(IDocumentAccess $access): array {
324
325
		$query = [];
326
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
327
		$query[] = ['term' => ['users' => $access->getViewerId()]];
328
		$query[] = ['term' => ['users' => '__all']];
329
330
		foreach ($access->getGroups() as $group) {
331
			$query[] = ['term' => ['groups' => $group]];
332
		}
333
334
		foreach ($access->getCircles() as $circle) {
335
			$query[] = ['term' => ['circles' => $circle]];
336
		}
337
338
		$externalFilesConditions = $this->getExternalFilesConditions();
339
		if (!empty($externalFilesConditions)) {
340
			$query[] = ['bool' => ['must' => $externalFilesConditions]];
341
		}
342
		
343
		return $query;
344
	}
345
346
	/**
347
	 * @return array
348
	 */
349
	private function getExternalFileShares() : array {
350
		if (!$this->userStoragesService) {
351
			return [];
352
		}
353
		return $this->userStoragesService->getAllStoragesForUser();
354
	}
355
356
	/**
357
	 * Generates condition array for external files
358
	 * @return array
359
	 */
360
	private function getExternalFilesConditions(): array {
361
		// TODO :: normally we should check if user want's to search 
362
		// external files with "$request->getOption('files_external', '1') === '1'"
363
		$externalFileShares = $this->getExternalFileShares();
364
		if (empty($externalFileShares)) {
365
			return [];
366
		}
367
		$allowedExternalShares = [];
368
		foreach ($externalFileShares as $fileShare) {
369
			// If any external share is mounted as root, every
370
			// path is allowed
371
			if ($fileShare === '/') {
372
				$allowedExternalShares = [];
373
				break;
374
			}
375
			$allowedExternalShares[] = ['prefix' => ['title' => $fileShare]];
376
		}
377
		$externalFilesConditions = [];
378
		$externalFilesConditions[] = ['term' => ['source' => 'files_external']];
379
		$externalFilesConditions[] = ['term' => ['owner' => '']];
380
		if (!empty($allowedExternalShares)) {
381
			$externalFilesConditions[] = ['bool' => ['should' => $allowedExternalShares]];
382
		}
383
384
		return $externalFilesConditions;
385
	}
386
387
	/**
388
	 * @param ISearchRequest $request
389
	 * @param string $field
390
	 *
391
	 * @return bool
392
	 */
393
	private function fieldIsOutLimit(ISearchRequest $request, string $field): bool {
394
		$limit = $request->getLimitFields();
395
		if (sizeof($limit) === 0) {
396
			return false;
397
		}
398
399
		if (in_array($field, $limit)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return !in_array($field, $limit);.
Loading history...
400
			return false;
401
		}
402
403
		return true;
404
	}
405
406
407
	/**
408
	 * @param string $k
409
	 * @param array $tags
410
	 *
411
	 * @return array
412
	 */
413
	private function generateSearchQueryTags(string $k, array $tags): array {
414
415
		$query = [];
416
		foreach ($tags as $t) {
417
			$query[] = ['term' => [$k => $t]];
418
		}
419
420
		return $query;
421
	}
422
423
424
	/**
425
	 * @param ISearchRequestSimpleQuery[] $queries
426
	 *
427
	 * @return array
428
	 */
429
	private function generateSearchSimpleQuery(array $queries): array {
430
		$simpleQuery = [];
431
		foreach ($queries as $query) {
432
			// TODO: manage multiple entries array
433
434 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_KEYWORD) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
435
				$value = $query->getValues()[0];
436
				$simpleQuery[] = ['term' => [$query->getField() => $value]];
437
			}
438
439 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_WILDCARD) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
440
				$value = $query->getValues()[0];
441
				$simpleQuery[] = ['wildcard' => [$query->getField() => $value]];
442
			}
443
444 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_EQ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
445
				$value = $query->getValues()[0];
446
				$simpleQuery[] = ['term' => [$query->getField() => $value]];
447
			}
448
449 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_GTE) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
450
				$value = $query->getValues()[0];
451
				$simpleQuery[] = ['range' => [$query->getField() => ['gte' => $value]]];
452
			}
453
454 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_LTE) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
455
				$value = $query->getValues()[0];
456
				$simpleQuery[] = ['range' => [$query->getField() => ['lte' => $value]]];
457
			}
458
459 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_GT) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
460
				$value = $query->getValues()[0];
461
				$simpleQuery[] = ['range' => [$query->getField() => ['gt' => $value]]];
462
			}
463
464 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_LT) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
465
				$value = $query->getValues()[0];
466
				$simpleQuery[] = ['range' => [$query->getField() => ['lt' => $value]]];
467
			}
468
469
		}
470
471
		return $simpleQuery;
472
	}
473
474
475
	/**
476
	 * @param ISearchRequest $request
477
	 *
478
	 * @return array
479
	 */
480
	private function generateSearchHighlighting(ISearchRequest $request): array {
481
482
		$parts = $this->getPartsFields($request);
483
		$fields = ['content' => new stdClass()];
484
		foreach ($parts as $part) {
485
			$fields[$part] = new stdClass();
486
		}
487
488
		return [
489
			'fields'    => $fields,
490
			'pre_tags'  => [''],
491
			'post_tags' => ['']
492
		];
493
	}
494
495
496
	/**
497
	 * @param string $providerId
498
	 * @param string $documentId
499
	 *
500
	 * @return array
501
	 * @throws ConfigurationException
502
	 */
503
	public function getDocumentQuery(string $providerId, string $documentId): array {
504
		return [
505
			'index' => $this->configService->getElasticIndex(),
506
			'type'  => 'standard',
507
			'id'    => $providerId . ':' . $documentId
508
		];
509
	}
510
511
512
	/**
513
	 * @param ISearchRequest $request
514
	 *
515
	 * @return array
516
	 */
517
	private function getPartsFields(ISearchRequest $request) {
518
		return array_map(
519
			function($value) {
520
				return 'parts.' . $value;
521
			}, $request->getParts()
522
		);
523
	}
524
525
}
526
527