Completed
Pull Request — master (#49)
by Maxence
01:33
created

SearchMappingService::generateQueryContentFields()   B

Complexity

Conditions 8
Paths 54

Size

Total Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 35
rs 8.1155
c 0
b 0
f 0
cc 8
nc 54
nop 2
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\FullTextSearch_ElasticSearch\Service;
32
33
34
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
35
use OCA\FullTextSearch_ElasticSearch\Exceptions\QueryContentGenerationException;
36
use OCA\FullTextSearch_ElasticSearch\Exceptions\SearchQueryGenerationException;
37
use OCA\FullTextSearch_ElasticSearch\Model\QueryContent;
38
use OCP\FullTextSearch\Model\DocumentAccess;
39
use OCP\FullTextSearch\Model\ISearchRequest;
40
41
42
/**
43
 * Class SearchMappingService
44
 *
45
 * @package OCA\FullTextSearch_ElasticSearch\Service
46
 */
47
class SearchMappingService {
48
49
	/** @var ConfigService */
50
	private $configService;
51
52
	/** @var MiscService */
53
	private $miscService;
54
55
56
	/**
57
	 * SearchMappingService constructor.
58
	 *
59
	 * @param ConfigService $configService
60
	 * @param MiscService $miscService
61
	 */
62
	public function __construct(ConfigService $configService, MiscService $miscService) {
63
		$this->configService = $configService;
64
		$this->miscService = $miscService;
65
	}
66
67
68
	/**
69
	 * @param ISearchRequest $request
70
	 * @param DocumentAccess $access
71
	 * @param string $providerId
72
	 *
73
	 * @return array
74
	 * @throws ConfigurationException
75
	 * @throws SearchQueryGenerationException
76
	 */
77
	public function generateSearchQuery(
78
		ISearchRequest $request, DocumentAccess $access, string $providerId
79
	): array {
80
		$query['params'] = $this->generateSearchQueryParams($request, $access, $providerId);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
81
82
		return $query;
83
	}
84
85
86
	/**
87
	 * @param ISearchRequest $request
88
	 * @param DocumentAccess $access
89
	 * @param string $providerId
90
	 *
91
	 * @return array
92
	 * @throws ConfigurationException
93
	 * @throws SearchQueryGenerationException
94
	 */
95
	public function generateSearchQueryParams(
96
		ISearchRequest $request, DocumentAccess $access, string $providerId
97
	): array {
98
		$params = [
99
			'index' => $this->configService->getElasticIndex(),
100
			'type'  => 'standard',
101
			'size'  => $request->getSize(),
102
			'from'  => (($request->getPage() - 1) * $request->getSize())
103
		];
104
105
		$bool = [];
106
		$bool['must']['bool']['should'] = $this->generateSearchQueryContent($request);
107
108
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $providerId]];
109
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
110
		$bool['filter'][]['bool']['should'] =
111
			$this->generateSearchQueryTags('metatags', $request->getMetaTags());
112
		$bool['filter'][]['bool']['should'] =
113
			$this->generateSearchQueryTags('subtags', $request->getSubTags(true));
114
//		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
115
116
		$params['body']['query']['bool'] = $bool;
117
		$params['body']['highlight'] = $this->generateSearchHighlighting($request);
118
119
		$this->improveSearchQuerying($request, $params['body']['query']);
120
121
		return $params;
122
	}
123
124
125
	/**
126
	 * @param ISearchRequest $request
127
	 * @param array $arr
128
	 */
129
	private function improveSearchQuerying(ISearchRequest $request, array &$arr) {
130
//		$this->improveSearchWildcardQueries($request, $arr);
131
		$this->improveSearchWildcardFilters($request, $arr);
132
		$this->improveSearchRegexFilters($request, $arr);
133
	}
134
135
136
//	/**
137
//	 * @param SearchRequest $request
138
//	 * @param array $arr
139
//	 */
140
//	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
141
//
142
//		$queries = $request->getWildcardQueries();
143
//		foreach ($queries as $query) {
144
//			$wildcards = [];
145
//			foreach ($query as $entry) {
146
//				$wildcards[] = ['wildcard' => $entry];
147
//			}
148
//
149
//			array_push($arr['bool']['must']['bool']['should'], $wildcards);
150
//		}
151
//
152
//	}
153
154
155
	/**
156
	 * @param ISearchRequest $request
157
	 * @param array $arr
158
	 */
159 View Code Duplication
	private function improveSearchWildcardFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
160
161
		$filters = $request->getWildcardFilters();
162
		foreach ($filters as $filter) {
163
			$wildcards = [];
164
			foreach ($filter as $entry) {
165
				$wildcards[] = ['wildcard' => $entry];
166
			}
167
168
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
169
		}
170
171
	}
172
173
174
	/**
175
	 * @param ISearchRequest $request
176
	 * @param array $arr
177
	 */
178 View Code Duplication
	private function improveSearchRegexFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
179
180
		$filters = $request->getRegexFilters();
181
		foreach ($filters as $filter) {
182
			$regex = [];
183
			foreach ($filter as $entry) {
184
				$regex[] = ['regexp' => $entry];
185
			}
186
187
			$arr['bool']['filter'][]['bool']['should'] = $regex;
188
		}
189
190
	}
191
192
193
	/**
194
	 * @param ISearchRequest $request
195
	 *
196
	 * @return array
197
	 * @throws SearchQueryGenerationException
198
	 */
199
	private function generateSearchQueryContent(ISearchRequest $request): array {
200
		$str = strtolower($request->getSearch());
201
202
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
203
		$queryContent = [];
204
		foreach ($words[0] as $word) {
205
			try {
206
				$queryContent[] = $this->generateQueryContent(trim($word));
207
			} catch (QueryContentGenerationException $e) {
208
				continue;
209
			}
210
		}
211
212
		if (sizeof($queryContent) === 0) {
213
			throw new SearchQueryGenerationException();
214
		}
215
216
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
217
	}
218
219
220
	/**
221
	 * @param string $word
222
	 *
223
	 * @return QueryContent
224
	 * @throws QueryContentGenerationException
225
	 */
226
	private function generateQueryContent(string $word): QueryContent {
227
228
		$searchQueryContent = new QueryContent($word);
229
		if (strlen($searchQueryContent->getWord()) === 0) {
230
			throw new QueryContentGenerationException();
231
		}
232
233
		return $searchQueryContent;
234
	}
235
236
237
	/**
238
	 * @param ISearchRequest $request
239
	 * @param QueryContent[] $queryContents
240
	 *
241
	 * @return array
242
	 */
243
	private function generateSearchQueryFromQueryContent(
244
		ISearchRequest $request, array $queryContents
245
	): array {
246
247
		$query = $queryWords = [];
248
		foreach ($queryContents as $queryContent) {
249
			$queryWords[$queryContent->getShould()][] =
250
				$this->generateQueryContentFields($request, $queryContent);
251
		}
252
253
		$listShould = array_keys($queryWords);
254
		foreach ($listShould as $itemShould) {
255
			$query[$itemShould][] = $queryWords[$itemShould];
256
		}
257
258
		return ['bool' => $query];
259
	}
260
261
262
	/**
263
	 * @param ISearchRequest $request
264
	 * @param QueryContent $content
265
	 *
266
	 * @return array
267
	 */
268
	private function generateQueryContentFields(ISearchRequest $request, QueryContent $content
269
	): array {
270
		$queryFields = [];
271
272
		$fields = array_merge(['content', 'title'], $request->getFields());
273
		foreach ($fields as $field) {
274
			if (!$this->fieldIsOutLimit($request, $field)) {
275
				$queryFields[] = [$content->getMatch() => [$field => $content->getWord()]];
276
			}
277
		}
278
279
		foreach ($request->getWildcardFields() as $field) {
280
			if (!$this->fieldIsOutLimit($request, $field)) {
281
				$queryFields[] = ['wildcard' => [$field => '*' . $content->getWord() . '*']];
282
			}
283
		}
284
285
		$parts = [];
286
		foreach ($this->getPartsFields($request) as $field) {
287
			if (!$this->fieldIsOutLimit($request, $field)) {
288
				$parts[] = $field;
289
			}
290
		}
291
292
		if (sizeof($parts) > 0) {
293
			$queryFields[] = [
294
				'query_string' => [
295
					'fields' => $parts,
296
					'query'  => $content->getWord()
297
				]
298
			];
299
		}
300
301
		return ['bool' => ['should' => $queryFields]];
302
	}
303
304
305
	/**
306
	 * @param DocumentAccess $access
307
	 *
308
	 * @return array
309
	 */
310
	private function generateSearchQueryAccess(DocumentAccess $access): array {
311
312
		$query = [];
313
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
314
		$query[] = ['term' => ['users' => $access->getViewerId()]];
315
		$query[] = ['term' => ['users' => '__all']];
316
317
		foreach ($access->getGroups() as $group) {
318
			$query[] = ['term' => ['groups' => $group]];
319
		}
320
321
		foreach ($access->getCircles() as $circle) {
322
			$query[] = ['term' => ['circles' => $circle]];
323
		}
324
325
		return $query;
326
	}
327
328
329
	/**
330
	 * @param ISearchRequest $request
331
	 * @param string $field
332
	 *
333
	 * @return bool
334
	 */
335
	private function fieldIsOutLimit(ISearchRequest $request, string $field): bool {
336
		$limit = $request->getLimitFields();
337
		if (sizeof($limit) === 0) {
338
			return false;
339
		}
340
341
		if (in_array($field, $limit)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return !in_array($field, $limit);.
Loading history...
342
			return false;
343
		}
344
345
		return true;
346
	}
347
348
349
	/**
350
	 * @param string $k
351
	 * @param array $tags
352
	 *
353
	 * @return array
354
	 */
355
	private function generateSearchQueryTags(string $k, array $tags): array {
356
357
		$query = [];
358
		foreach ($tags as $t) {
359
			$query[] = ['term' => [$k => $t]];
360
		}
361
362
		return $query;
363
	}
364
365
366
	/**
367
	 * @param ISearchRequest $request
368
	 *
369
	 * @return array
370
	 */
371
	private function generateSearchHighlighting(ISearchRequest $request): array {
372
373
		$parts = $this->getPartsFields($request);
374
		$fields = ['content' => new \stdClass()];
375
		foreach ($parts as $part) {
376
			$fields[$part] = new \stdClass();
377
		}
378
379
		return [
380
			'fields'    => $fields,
381
			'pre_tags'  => [''],
382
			'post_tags' => ['']
383
		];
384
	}
385
386
387
	/**
388
	 * @param string $providerId
389
	 * @param string $documentId
390
	 *
391
	 * @return array
392
	 * @throws ConfigurationException
393
	 */
394
	public function getDocumentQuery(string $providerId, string $documentId): array {
395
		return [
396
			'index' => $this->configService->getElasticIndex(),
397
			'type'  => 'standard',
398
			'id'    => $providerId . ':' . $documentId
399
		];
400
	}
401
402
403
	/**
404
	 * @param ISearchRequest $request
405
	 *
406
	 * @return array
407
	 */
408
	private function getPartsFields(ISearchRequest $request) {
409
		return array_map(
410
			function($value) {
411
				return 'parts.' . $value;
412
			}, $request->getParts()
413
		);
414
	}
415
416
}
417
418