Completed
Pull Request — master (#47)
by Maxence
02:53 queued 01:03
created

SearchMappingService::getPartsFields()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\FullTextSearch_ElasticSearch\Service;
32
33
34
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
35
use OCA\FullTextSearch_ElasticSearch\Exceptions\QueryContentGenerationException;
36
use OCA\FullTextSearch_ElasticSearch\Exceptions\SearchQueryGenerationException;
37
use OCA\FullTextSearch_ElasticSearch\Model\QueryContent;
38
use OCP\FullTextSearch\Model\DocumentAccess;
39
use OCP\FullTextSearch\Model\ISearchRequest;
40
41
42
/**
43
 * Class SearchMappingService
44
 *
45
 * @package OCA\FullTextSearch_ElasticSearch\Service
46
 */
47
class SearchMappingService {
48
49
	/** @var ConfigService */
50
	private $configService;
51
52
	/** @var MiscService */
53
	private $miscService;
54
55
56
	/**
57
	 * SearchMappingService constructor.
58
	 *
59
	 * @param ConfigService $configService
60
	 * @param MiscService $miscService
61
	 */
62
	public function __construct(ConfigService $configService, MiscService $miscService) {
63
		$this->configService = $configService;
64
		$this->miscService = $miscService;
65
	}
66
67
68
	/**
69
	 * @param ISearchRequest $request
70
	 * @param DocumentAccess $access
71
	 * @param string $providerId
72
	 *
73
	 * @return array
74
	 * @throws ConfigurationException
75
	 * @throws SearchQueryGenerationException
76
	 */
77
	public function generateSearchQuery(
78
		ISearchRequest $request, DocumentAccess $access, string $providerId
79
	): array {
80
		$query['params'] = $this->generateSearchQueryParams($request, $access, $providerId);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
81
82
		return $query;
83
	}
84
85
86
	/**
87
	 * @param ISearchRequest $request
88
	 * @param DocumentAccess $access
89
	 * @param string $providerId
90
	 *
91
	 * @return array
92
	 * @throws ConfigurationException
93
	 * @throws SearchQueryGenerationException
94
	 */
95
	public function generateSearchQueryParams(
96
		ISearchRequest $request, DocumentAccess $access, string $providerId
97
	): array {
98
		$params = [
99
			'index' => $this->configService->getElasticIndex(),
100
			'type'  => 'standard',
101
			'size'  => $request->getSize(),
102
			'from'  => (($request->getPage() - 1) * $request->getSize())
103
		];
104
105
		$bool = [];
106
		$bool['must']['bool']['should'] = $this->generateSearchQueryContent($request);
107
108
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $providerId]];
109
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
110
		$bool['filter'][]['bool']['should'] =
111
			$this->generateSearchQueryTags('metatags', $request->getMetaTags());
112
		$bool['filter'][]['bool']['should'] =
113
			$this->generateSearchQueryTags('subtags', $request->getSubTags(true));
114
//		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
115
116
		$params['body']['query']['bool'] = $bool;
117
		$params['body']['highlight'] = $this->generateSearchHighlighting($request);
118
119
		$this->improveSearchQuerying($request, $params['body']['query']);
120
121
		return $params;
122
	}
123
124
125
	/**
126
	 * @param ISearchRequest $request
127
	 * @param array $arr
128
	 */
129
	private function improveSearchQuerying(ISearchRequest $request, array &$arr) {
130
//		$this->improveSearchWildcardQueries($request, $arr);
131
		$this->improveSearchWildcardFilters($request, $arr);
132
		$this->improveSearchRegexFilters($request, $arr);
133
	}
134
135
136
//	/**
137
//	 * @param SearchRequest $request
138
//	 * @param array $arr
139
//	 */
140
//	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
141
//
142
//		$queries = $request->getWildcardQueries();
143
//		foreach ($queries as $query) {
144
//			$wildcards = [];
145
//			foreach ($query as $entry) {
146
//				$wildcards[] = ['wildcard' => $entry];
147
//			}
148
//
149
//			array_push($arr['bool']['must']['bool']['should'], $wildcards);
150
//		}
151
//
152
//	}
153
154
155
	/**
156
	 * @param ISearchRequest $request
157
	 * @param array $arr
158
	 */
159 View Code Duplication
	private function improveSearchWildcardFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
160
161
		$filters = $request->getWildcardFilters();
162
		foreach ($filters as $filter) {
163
			$wildcards = [];
164
			foreach ($filter as $entry) {
165
				$wildcards[] = ['wildcard' => $entry];
166
			}
167
168
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
169
		}
170
171
	}
172
173
174
	/**
175
	 * @param ISearchRequest $request
176
	 * @param array $arr
177
	 */
178 View Code Duplication
	private function improveSearchRegexFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
179
180
		$filters = $request->getRegexFilters();
181
		foreach ($filters as $filter) {
182
			$regex = [];
183
			foreach ($filter as $entry) {
184
				$regex[] = ['regexp' => $entry];
185
			}
186
187
			$arr['bool']['filter'][]['bool']['should'] = $regex;
188
		}
189
190
	}
191
192
193
	/**
194
	 * @param ISearchRequest $request
195
	 *
196
	 * @return array
197
	 * @throws SearchQueryGenerationException
198
	 */
199
	private function generateSearchQueryContent(ISearchRequest $request): array {
200
		$str = strtolower($request->getSearch());
201
202
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
203
		$queryContent = [];
204
		foreach ($words[0] as $word) {
205
			try {
206
				$queryContent[] = $this->generateQueryContent(trim($word));
207
			} catch (QueryContentGenerationException $e) {
208
				continue;
209
			}
210
		}
211
212
		if (sizeof($queryContent) === 0) {
213
			throw new SearchQueryGenerationException();
214
		}
215
216
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
217
	}
218
219
220
	/**
221
	 * @param string $word
222
	 *
223
	 * @return QueryContent
224
	 * @throws QueryContentGenerationException
225
	 */
226
	private function generateQueryContent(string $word): QueryContent {
227
228
		$searchQueryContent = new QueryContent($word);
229
		if (strlen($searchQueryContent->getWord()) === 0) {
230
			throw new QueryContentGenerationException();
231
		}
232
233
		return $searchQueryContent;
234
	}
235
236
237
	/**
238
	 * @param ISearchRequest $request
239
	 * @param QueryContent[] $queryContents
240
	 *
241
	 * @return array
242
	 */
243
	private function generateSearchQueryFromQueryContent(
244
		ISearchRequest $request, array $queryContents
245
	): array {
246
247
		$query = $queryWords = [];
248
		foreach ($queryContents as $queryContent) {
249
			$queryWords[$queryContent->getShould()][] =
250
				$this->generateQueryContentFields($request, $queryContent);
251
		}
252
253
		$listShould = array_keys($queryWords);
254
		foreach ($listShould as $itemShould) {
255
			$query[$itemShould][] = $queryWords[$itemShould];
256
		}
257
258
		return ['bool' => $query];
259
	}
260
261
262
	/**
263
	 * @param ISearchRequest $request
264
	 * @param QueryContent $content
265
	 *
266
	 * @return array
267
	 */
268
	private function generateQueryContentFields(ISearchRequest $request, QueryContent $content
269
	): array {
270
		$parts = $this->getPartsFields($request);
271
		$fields = array_merge(['content', 'title'], $request->getFields(), $parts);
272
273
		$queryFields = [];
274
		foreach ($fields as $field) {
275
			if (!$this->fieldIsOutLimit($request, $field)) {
276
				$queryFields[] = [$content->getMatch() => [$field => $content->getWord()]];
277
			}
278
		}
279
280
		foreach ($request->getWildcardFields() as $field) {
281
			if (!$this->fieldIsOutLimit($request, $field)) {
282
				$queryFields[] = ['wildcard' => [$field => '*' . $content->getWord() . '*']];
283
			}
284
		}
285
286
		return ['bool' => ['should' => $queryFields]];
287
	}
288
289
290
	/**
291
	 * @param DocumentAccess $access
292
	 *
293
	 * @return array
294
	 */
295
	private function generateSearchQueryAccess(DocumentAccess $access): array {
296
297
		$query = [];
298
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
299
		$query[] = ['term' => ['users' => $access->getViewerId()]];
300
		$query[] = ['term' => ['users' => '__all']];
301
302
		foreach ($access->getGroups() as $group) {
303
			$query[] = ['term' => ['groups' => $group]];
304
		}
305
306
		foreach ($access->getCircles() as $circle) {
307
			$query[] = ['term' => ['circles' => $circle]];
308
		}
309
310
		return $query;
311
	}
312
313
314
	/**
315
	 * @param ISearchRequest $request
316
	 * @param string $field
317
	 *
318
	 * @return bool
319
	 */
320
	private function fieldIsOutLimit(ISearchRequest $request, string $field): bool {
321
		$limit = $request->getLimitFields();
322
		if (sizeof($limit) === 0) {
323
			return false;
324
		}
325
326
		if (in_array($field, $limit)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return !in_array($field, $limit);.
Loading history...
327
			return false;
328
		}
329
330
		return true;
331
	}
332
333
334
	/**
335
	 * @param string $k
336
	 * @param array $tags
337
	 *
338
	 * @return array
339
	 */
340
	private function generateSearchQueryTags(string $k, array $tags): array {
341
342
		$query = [];
343
		foreach ($tags as $t) {
344
			$query[] = ['term' => [$k => $t]];
345
		}
346
347
		return $query;
348
	}
349
350
351
	/**
352
	 * @param ISearchRequest $request
353
	 *
354
	 * @return array
355
	 */
356
	private function generateSearchHighlighting(ISearchRequest $request): array {
357
358
		$parts = $this->getPartsFields($request);
359
		$fields = ['content' => new \stdClass()];
360
		foreach ($parts as $part) {
361
			$fields[$part] = new \stdClass();
362
		}
363
364
		return [
365
			'fields'    => $fields,
366
			'pre_tags'  => [''],
367
			'post_tags' => ['']
368
		];
369
	}
370
371
372
	/**
373
	 * @param string $providerId
374
	 * @param string $documentId
375
	 *
376
	 * @return array
377
	 * @throws ConfigurationException
378
	 */
379
	public function getDocumentQuery(string $providerId, string $documentId): array {
380
		return [
381
			'index' => $this->configService->getElasticIndex(),
382
			'type'  => 'standard',
383
			'id'    => $providerId . ':' . $documentId
384
		];
385
	}
386
387
388
	/**
389
	 * @param ISearchRequest $request
390
	 *
391
	 * @return array
392
	 */
393
	private function getPartsFields(ISearchRequest $request) {
394
		return array_map(
395
			function($value) {
396
				return 'parts.' . $value;
397
			}, $request->getParts()
398
		);
399
	}
400
401
}
402
403