Completed
Push — master ( 2566be...2cb116 )
by Maxence
01:40 queued 10s
created

SearchMappingService::generateQueryContentFields()   B

Complexity

Conditions 8
Paths 54

Size

Total Lines 34

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 34
rs 8.1315
c 0
b 0
f 0
cc 8
nc 54
nop 2
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch_Elasticsearch - Use Elasticsearch to index the content of your nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\FullTextSearch_Elasticsearch\Service;
32
33
34
use OCA\FullTextSearch_Elasticsearch\Exceptions\ConfigurationException;
35
use OCA\FullTextSearch_Elasticsearch\Exceptions\QueryContentGenerationException;
36
use OCA\FullTextSearch_Elasticsearch\Exceptions\SearchQueryGenerationException;
37
use OCA\FullTextSearch_Elasticsearch\Model\QueryContent;
38
use OCP\FullTextSearch\Model\IDocumentAccess;
39
use OCP\FullTextSearch\Model\ISearchRequest;
40
use OCP\FullTextSearch\Model\ISearchRequestSimpleQuery;
41
use stdClass;
42
43
44
/**
45
 * Class SearchMappingService
46
 *
47
 * @package OCA\FullTextSearch_Elasticsearch\Service
48
 */
49
class SearchMappingService {
50
51
	/** @var ConfigService */
52
	private $configService;
53
54
	/** @var MiscService */
55
	private $miscService;
56
57
58
	/**
59
	 * SearchMappingService constructor.
60
	 *
61
	 * @param ConfigService $configService
62
	 * @param MiscService $miscService
63
	 */
64
	public function __construct(ConfigService $configService, MiscService $miscService) {
65
		$this->configService = $configService;
66
		$this->miscService = $miscService;
67
	}
68
69
70
	/**
71
	 * @param ISearchRequest $request
72
	 * @param IDocumentAccess $access
73
	 * @param string $providerId
74
	 *
75
	 * @return array
76
	 * @throws ConfigurationException
77
	 * @throws SearchQueryGenerationException
78
	 */
79
	public function generateSearchQuery(
80
		ISearchRequest $request, IDocumentAccess $access, string $providerId
81
	): array {
82
		$query['params'] = $this->generateSearchQueryParams($request, $access, $providerId);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
83
84
		return $query;
85
	}
86
87
88
	/**
89
	 * @param ISearchRequest $request
90
	 * @param IDocumentAccess $access
91
	 * @param string $providerId
92
	 *
93
	 * @return array
94
	 * @throws ConfigurationException
95
	 * @throws SearchQueryGenerationException
96
	 */
97
	public function generateSearchQueryParams(
98
		ISearchRequest $request, IDocumentAccess $access, string $providerId
99
	): array {
100
		$params = [
101
			'index' => $this->configService->getElasticIndex(),
102
			'type'  => 'standard',
103
			'size'  => $request->getSize(),
104
			'from'  => (($request->getPage() - 1) * $request->getSize())
105
		];
106
107
		$bool = [];
108
		if ($request->getSearch() !== '') {
109
			$bool['must']['bool'] = $this->generateSearchQueryContent($request);
110
		}
111
112
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $providerId]];
113
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
114
		$bool['filter'][]['bool']['should'] =
115
			$this->generateSearchQueryTags('metatags', $request->getMetaTags());
116
117
		$bool['filter'][]['bool']['must'] =
118
			$this->generateSearchQueryTags('subtags', $request->getSubTags(true));
119
120
		$bool['filter'][]['bool']['must'] =
121
			$this->generateSearchSimpleQuery($request->getSimpleQueries());
122
123
//		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
124
125
		$params['body']['query']['bool'] = $bool;
126
		$params['body']['highlight'] = $this->generateSearchHighlighting($request);
127
128
		$this->improveSearchQuerying($request, $params['body']['query']);
129
130
		return $params;
131
	}
132
133
134
	/**
135
	 * @param ISearchRequest $request
136
	 * @param array $arr
137
	 */
138
	private function improveSearchQuerying(ISearchRequest $request, array &$arr) {
139
//		$this->improveSearchWildcardQueries($request, $arr);
140
		$this->improveSearchWildcardFilters($request, $arr);
141
		$this->improveSearchRegexFilters($request, $arr);
142
	}
143
144
145
//	/**
146
//	 * @param SearchRequest $request
147
//	 * @param array $arr
148
//	 */
149
//	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
150
//
151
//		$queries = $request->getWildcardQueries();
152
//		foreach ($queries as $query) {
153
//			$wildcards = [];
154
//			foreach ($query as $entry) {
155
//				$wildcards[] = ['wildcard' => $entry];
156
//			}
157
//
158
//			array_push($arr['bool']['must']['bool']['should'], $wildcards);
159
//		}
160
//
161
//	}
162
163
164
	/**
165
	 * @param ISearchRequest $request
166
	 * @param array $arr
167
	 */
168 View Code Duplication
	private function improveSearchWildcardFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
169
170
		$filters = $request->getWildcardFilters();
171
		foreach ($filters as $filter) {
172
			$wildcards = [];
173
			foreach ($filter as $entry) {
174
				$wildcards[] = ['wildcard' => $entry];
175
			}
176
177
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
178
		}
179
180
	}
181
182
183
	/**
184
	 * @param ISearchRequest $request
185
	 * @param array $arr
186
	 */
187 View Code Duplication
	private function improveSearchRegexFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
188
189
		$filters = $request->getRegexFilters();
190
		foreach ($filters as $filter) {
191
			$regex = [];
192
			foreach ($filter as $entry) {
193
				$regex[] = ['regexp' => $entry];
194
			}
195
196
			$arr['bool']['filter'][]['bool']['should'] = $regex;
197
		}
198
199
	}
200
201
202
	/**
203
	 * @param ISearchRequest $request
204
	 *
205
	 * @return array
206
	 * @throws SearchQueryGenerationException
207
	 */
208
	private function generateSearchQueryContent(ISearchRequest $request): array {
209
		$str = strtolower($request->getSearch());
210
211
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
212
		$queryContent = [];
213
		foreach ($words[0] as $word) {
214
			try {
215
				$queryContent[] = $this->generateQueryContent(trim($word));
216
			} catch (QueryContentGenerationException $e) {
217
				continue;
218
			}
219
		}
220
221
		if (sizeof($queryContent) === 0) {
222
			throw new SearchQueryGenerationException();
223
		}
224
225
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
226
	}
227
228
229
	/**
230
	 * @param string $word
231
	 *
232
	 * @return QueryContent
233
	 * @throws QueryContentGenerationException
234
	 */
235
	private function generateQueryContent(string $word): QueryContent {
236
237
		$searchQueryContent = new QueryContent($word);
238
		if (strlen($searchQueryContent->getWord()) === 0) {
239
			throw new QueryContentGenerationException();
240
		}
241
242
		return $searchQueryContent;
243
	}
244
245
246
	/**
247
	 * @param ISearchRequest $request
248
	 * @param QueryContent[] $contents
249
	 *
250
	 * @return array
251
	 */
252
	private function generateSearchQueryFromQueryContent(ISearchRequest $request, array $contents): array {
253
		$query = [];
254
		foreach ($contents as $content) {
255
			if (!array_key_exists($content->getShould(), $query)) {
256
				$query[$content->getShould()] = [];
257
			}
258
259
			if ($content->getShould() === 'must') {
260
				$query[$content->getShould()][] =
261
					['bool' => ['should' => $this->generateQueryContentFields($request, $content)]];
262
			} else {
263
				$query[$content->getShould()] = array_merge(
264
					$query[$content->getShould()], $this->generateQueryContentFields($request, $content)
265
				);
266
			}
267
		}
268
269
		return $query;
270
	}
271
272
273
	/**
274
	 * @param ISearchRequest $request
275
	 * @param QueryContent $content
276
	 *
277
	 * @return array
278
	 */
279
	private function generateQueryContentFields(ISearchRequest $request, QueryContent $content): array {
280
		$queryFields = [];
281
282
		$fields = array_merge(['content', 'title'], $request->getFields());
283
		foreach ($fields as $field) {
284
			if (!$this->fieldIsOutLimit($request, $field)) {
285
				$queryFields[] = [$content->getMatch() => [$field => $content->getWord()]];
286
			}
287
		}
288
289
		foreach ($request->getWildcardFields() as $field) {
290
			if (!$this->fieldIsOutLimit($request, $field)) {
291
				$queryFields[] = ['wildcard' => [$field => '*' . $content->getWord() . '*']];
292
			}
293
		}
294
295
		$parts = [];
296
		foreach ($this->getPartsFields($request) as $field) {
297
			if (!$this->fieldIsOutLimit($request, $field)) {
298
				$parts[] = $field;
299
			}
300
		}
301
302
		if (sizeof($parts) > 0) {
303
			$queryFields[] = [
304
				'query_string' => [
305
					'fields' => $parts,
306
					'query'  => $content->getWord()
307
				]
308
			];
309
		}
310
311
		return $queryFields;
312
	}
313
314
315
	/**
316
	 * @param IDocumentAccess $access
317
	 *
318
	 * @return array
319
	 */
320
	private function generateSearchQueryAccess(IDocumentAccess $access): array {
321
322
		$query = [];
323
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
324
		$query[] = ['term' => ['users' => $access->getViewerId()]];
325
		$query[] = ['term' => ['users' => '__all']];
326
327
		foreach ($access->getGroups() as $group) {
328
			$query[] = ['term' => ['groups' => $group]];
329
		}
330
331
		foreach ($access->getCircles() as $circle) {
332
			$query[] = ['term' => ['circles' => $circle]];
333
		}
334
335
		return $query;
336
	}
337
338
339
	/**
340
	 * @param ISearchRequest $request
341
	 * @param string $field
342
	 *
343
	 * @return bool
344
	 */
345
	private function fieldIsOutLimit(ISearchRequest $request, string $field): bool {
346
		$limit = $request->getLimitFields();
347
		if (sizeof($limit) === 0) {
348
			return false;
349
		}
350
351
		if (in_array($field, $limit)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return !in_array($field, $limit);.
Loading history...
352
			return false;
353
		}
354
355
		return true;
356
	}
357
358
359
	/**
360
	 * @param string $k
361
	 * @param array $tags
362
	 *
363
	 * @return array
364
	 */
365
	private function generateSearchQueryTags(string $k, array $tags): array {
366
367
		$query = [];
368
		foreach ($tags as $t) {
369
			$query[] = ['term' => [$k => $t]];
370
		}
371
372
		return $query;
373
	}
374
375
376
	/**
377
	 * @param ISearchRequestSimpleQuery[] $queries
378
	 *
379
	 * @return array
380
	 */
381
	private function generateSearchSimpleQuery(array $queries): array {
382
		$simpleQuery = [];
383
		foreach ($queries as $query) {
384
			// TODO: manage multiple entries array
385
386 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_KEYWORD) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
387
				$value = $query->getValues()[0];
388
				$simpleQuery[] = ['term' => [$query->getField() => $value]];
389
			}
390
391 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_WILDCARD) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
392
				$value = $query->getValues()[0];
393
				$simpleQuery[] = ['wildcard' => [$query->getField() => $value]];
394
			}
395
396 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_EQ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
397
				$value = $query->getValues()[0];
398
				$simpleQuery[] = ['term' => [$query->getField() => $value]];
399
			}
400
401 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_GTE) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
402
				$value = $query->getValues()[0];
403
				$simpleQuery[] = ['range' => [$query->getField() => ['gte' => $value]]];
404
			}
405
406 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_LTE) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
407
				$value = $query->getValues()[0];
408
				$simpleQuery[] = ['range' => [$query->getField() => ['lte' => $value]]];
409
			}
410
411 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_GT) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
412
				$value = $query->getValues()[0];
413
				$simpleQuery[] = ['range' => [$query->getField() => ['gt' => $value]]];
414
			}
415
416 View Code Duplication
			if ($query->getType() === ISearchRequestSimpleQuery::COMPARE_TYPE_INT_LT) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
417
				$value = $query->getValues()[0];
418
				$simpleQuery[] = ['range' => [$query->getField() => ['lt' => $value]]];
419
			}
420
421
		}
422
423
		return $simpleQuery;
424
	}
425
426
427
	/**
428
	 * @param ISearchRequest $request
429
	 *
430
	 * @return array
431
	 */
432
	private function generateSearchHighlighting(ISearchRequest $request): array {
433
434
		$parts = $this->getPartsFields($request);
435
		$fields = ['content' => new stdClass()];
436
		foreach ($parts as $part) {
437
			$fields[$part] = new stdClass();
438
		}
439
440
		return [
441
			'fields'    => $fields,
442
			'pre_tags'  => [''],
443
			'post_tags' => ['']
444
		];
445
	}
446
447
448
	/**
449
	 * @param string $providerId
450
	 * @param string $documentId
451
	 *
452
	 * @return array
453
	 * @throws ConfigurationException
454
	 */
455
	public function getDocumentQuery(string $providerId, string $documentId): array {
456
		return [
457
			'index' => $this->configService->getElasticIndex(),
458
			'type'  => 'standard',
459
			'id'    => $providerId . ':' . $documentId
460
		];
461
	}
462
463
464
	/**
465
	 * @param ISearchRequest $request
466
	 *
467
	 * @return array
468
	 */
469
	private function getPartsFields(ISearchRequest $request) {
470
		return array_map(
471
			function($value) {
472
				return 'parts.' . $value;
473
			}, $request->getParts()
474
		);
475
	}
476
477
}
478
479