Completed
Pull Request — master (#47)
by Maxence
01:59
created

SearchMappingService::getPartsFields()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\FullTextSearch_ElasticSearch\Service;
32
33
34
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
35
use OCA\FullTextSearch_ElasticSearch\Exceptions\QueryContentGenerationException;
36
use OCA\FullTextSearch_ElasticSearch\Exceptions\SearchQueryGenerationException;
37
use OCA\FullTextSearch_ElasticSearch\Model\QueryContent;
38
use OCP\FullTextSearch\Model\DocumentAccess;
39
use OCP\FullTextSearch\Model\ISearchRequest;
40
41
42
/**
43
 * Class SearchMappingService
44
 *
45
 * @package OCA\FullTextSearch_ElasticSearch\Service
46
 */
47
class SearchMappingService {
48
49
	/** @var ConfigService */
50
	private $configService;
51
52
	/** @var MiscService */
53
	private $miscService;
54
55
56
	/**
57
	 * SearchMappingService constructor.
58
	 *
59
	 * @param ConfigService $configService
60
	 * @param MiscService $miscService
61
	 */
62
	public function __construct(ConfigService $configService, MiscService $miscService) {
63
		$this->configService = $configService;
64
		$this->miscService = $miscService;
65
	}
66
67
68
	/**
69
	 * @param ISearchRequest $request
70
	 * @param DocumentAccess $access
71
	 * @param string $providerId
72
	 *
73
	 * @return array
74
	 * @throws ConfigurationException
75
	 * @throws SearchQueryGenerationException
76
	 */
77
	public function generateSearchQuery(
78
		ISearchRequest $request, DocumentAccess $access, string $providerId
79
	): array {
80
		$query['params'] = $this->generateSearchQueryParams($request, $access, $providerId);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
81
82
		return $query;
83
	}
84
85
86
	/**
87
	 * @param ISearchRequest $request
88
	 * @param DocumentAccess $access
89
	 * @param string $providerId
90
	 *
91
	 * @return array
92
	 * @throws ConfigurationException
93
	 * @throws SearchQueryGenerationException
94
	 */
95
	public function generateSearchQueryParams(
96
		ISearchRequest $request, DocumentAccess $access, string $providerId
97
	): array {
98
		$params = [
99
			'index' => $this->configService->getElasticIndex(),
100
			'type'  => 'standard',
101
			'size'  => $request->getSize(),
102
			'from'  => (($request->getPage() - 1) * $request->getSize())
103
		];
104
105
		$bool = [];
106
		$bool['must']['bool']['should'] = $this->generateSearchQueryContent($request);
107
108
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $providerId]];
109
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
110
		$bool['filter'][]['bool']['should'] =
111
			$this->generateSearchQueryTags('metatags', $request->getMetaTags());
112
		$bool['filter'][]['bool']['should'] =
113
			$this->generateSearchQueryTags('subtags', $request->getSubTags(true));
114
//		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
115
116
		$params['body']['query']['bool'] = $bool;
117
		$params['body']['highlight'] = $this->generateSearchHighlighting($request);
118
119
		$this->miscService->log('### ' . json_encode($params['body']['highlight']));
120
		$this->improveSearchQuerying($request, $params['body']['query']);
121
122
		return $params;
123
	}
124
125
126
	/**
127
	 * @param ISearchRequest $request
128
	 * @param array $arr
129
	 */
130
	private function improveSearchQuerying(ISearchRequest $request, array &$arr) {
131
//		$this->improveSearchWildcardQueries($request, $arr);
132
		$this->improveSearchWildcardFilters($request, $arr);
133
		$this->improveSearchRegexFilters($request, $arr);
134
	}
135
136
137
//	/**
138
//	 * @param SearchRequest $request
139
//	 * @param array $arr
140
//	 */
141
//	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
142
//
143
//		$queries = $request->getWildcardQueries();
144
//		foreach ($queries as $query) {
145
//			$wildcards = [];
146
//			foreach ($query as $entry) {
147
//				$wildcards[] = ['wildcard' => $entry];
148
//			}
149
//
150
//			array_push($arr['bool']['must']['bool']['should'], $wildcards);
151
//		}
152
//
153
//	}
154
155
156
	/**
157
	 * @param ISearchRequest $request
158
	 * @param array $arr
159
	 */
160 View Code Duplication
	private function improveSearchWildcardFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
161
162
		$filters = $request->getWildcardFilters();
163
		foreach ($filters as $filter) {
164
			$wildcards = [];
165
			foreach ($filter as $entry) {
166
				$wildcards[] = ['wildcard' => $entry];
167
			}
168
169
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
170
		}
171
172
	}
173
174
175
	/**
176
	 * @param ISearchRequest $request
177
	 * @param array $arr
178
	 */
179 View Code Duplication
	private function improveSearchRegexFilters(ISearchRequest $request, array &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
180
181
		$filters = $request->getRegexFilters();
182
		foreach ($filters as $filter) {
183
			$regex = [];
184
			foreach ($filter as $entry) {
185
				$regex[] = ['regexp' => $entry];
186
			}
187
188
			$arr['bool']['filter'][]['bool']['should'] = $regex;
189
		}
190
191
	}
192
193
194
	/**
195
	 * @param ISearchRequest $request
196
	 *
197
	 * @return array
198
	 * @throws SearchQueryGenerationException
199
	 */
200
	private function generateSearchQueryContent(ISearchRequest $request): array {
201
		$str = strtolower($request->getSearch());
202
203
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
204
		$queryContent = [];
205
		foreach ($words[0] as $word) {
206
			try {
207
				$queryContent[] = $this->generateQueryContent(trim($word));
208
			} catch (QueryContentGenerationException $e) {
209
				continue;
210
			}
211
		}
212
213
		if (sizeof($queryContent) === 0) {
214
			throw new SearchQueryGenerationException();
215
		}
216
217
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
218
	}
219
220
221
	/**
222
	 * @param string $word
223
	 *
224
	 * @return QueryContent
225
	 * @throws QueryContentGenerationException
226
	 */
227
	private function generateQueryContent(string $word): QueryContent {
228
229
		$searchQueryContent = new QueryContent($word);
230
		if (strlen($searchQueryContent->getWord()) === 0) {
231
			throw new QueryContentGenerationException();
232
		}
233
234
		return $searchQueryContent;
235
	}
236
237
238
	/**
239
	 * @param ISearchRequest $request
240
	 * @param QueryContent[] $queryContents
241
	 *
242
	 * @return array
243
	 */
244
	private function generateSearchQueryFromQueryContent(
245
		ISearchRequest $request, array $queryContents
246
	): array {
247
248
		$query = $queryWords = [];
249
		foreach ($queryContents as $queryContent) {
250
			$queryWords[$queryContent->getShould()][] =
251
				$this->generateQueryContentFields($request, $queryContent);
252
		}
253
254
		$listShould = array_keys($queryWords);
255
		foreach ($listShould as $itemShould) {
256
			$query[$itemShould][] = $queryWords[$itemShould];
257
		}
258
259
		return ['bool' => $query];
260
	}
261
262
263
	/**
264
	 * @param ISearchRequest $request
265
	 * @param QueryContent $content
266
	 *
267
	 * @return array
268
	 */
269
	private function generateQueryContentFields(ISearchRequest $request, QueryContent $content
270
	): array {
271
		$parts = $this->getPartsFields($request);
272
		$fields = array_merge(['content', 'title'], $request->getFields(), $parts);
273
274
		$queryFields = [];
275
		foreach ($fields as $field) {
276
			if (!$this->fieldIsOutLimit($request, $field)) {
277
				$queryFields[] = [$content->getMatch() => [$field => $content->getWord()]];
278
			}
279
		}
280
281
		foreach ($request->getWildcardFields() as $field) {
282
			if (!$this->fieldIsOutLimit($request, $field)) {
283
				$queryFields[] = ['wildcard' => [$field => '*' . $content->getWord() . '*']];
284
			}
285
		}
286
287
		return ['bool' => ['should' => $queryFields]];
288
	}
289
290
291
	/**
292
	 * @param DocumentAccess $access
293
	 *
294
	 * @return array
295
	 */
296
	private function generateSearchQueryAccess(DocumentAccess $access): array {
297
298
		$query = [];
299
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
300
		$query[] = ['term' => ['users' => $access->getViewerId()]];
301
		$query[] = ['term' => ['users' => '__all']];
302
303
		foreach ($access->getGroups() as $group) {
304
			$query[] = ['term' => ['groups' => $group]];
305
		}
306
307
		foreach ($access->getCircles() as $circle) {
308
			$query[] = ['term' => ['circles' => $circle]];
309
		}
310
311
		return $query;
312
	}
313
314
315
	/**
316
	 * @param ISearchRequest $request
317
	 * @param string $field
318
	 *
319
	 * @return bool
320
	 */
321
	private function fieldIsOutLimit(ISearchRequest $request, string $field): bool {
322
		$limit = $request->getLimitFields();
323
		if (sizeof($limit) === 0) {
324
			return false;
325
		}
326
327
		if (in_array($field, $limit)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return !in_array($field, $limit);.
Loading history...
328
			return false;
329
		}
330
331
		return true;
332
	}
333
334
335
	/**
336
	 * @param string $k
337
	 * @param array $tags
338
	 *
339
	 * @return array
340
	 */
341
	private function generateSearchQueryTags(string $k, array $tags): array {
342
343
		$query = [];
344
		foreach ($tags as $t) {
345
			$query[] = ['term' => [$k => $t]];
346
		}
347
348
		return $query;
349
	}
350
351
352
	/**
353
	 * @param ISearchRequest $request
354
	 *
355
	 * @return array
356
	 */
357
	private function generateSearchHighlighting(ISearchRequest $request): array {
358
359
		$parts = $this->getPartsFields($request);
360
		$fields = ['content' => new \stdClass()];
361
		foreach ($parts as $part) {
362
			$fields[$part] = new \stdClass();
363
		}
364
365
		return [
366
			'fields'    => $fields,
367
			'pre_tags'  => [''],
368
			'post_tags' => ['']
369
		];
370
	}
371
372
373
	/**
374
	 * @param string $providerId
375
	 * @param string $documentId
376
	 *
377
	 * @return array
378
	 * @throws ConfigurationException
379
	 */
380
	public function getDocumentQuery(string $providerId, string $documentId): array {
381
		return [
382
			'index' => $this->configService->getElasticIndex(),
383
			'type'  => 'standard',
384
			'id'    => $providerId . ':' . $documentId
385
		];
386
	}
387
388
389
	/**
390
	 * @param ISearchRequest $request
391
	 *
392
	 * @return array
393
	 */
394
	private function getPartsFields(ISearchRequest $request) {
395
		return array_map(
396
			function($value) {
397
				return 'parts.' . $value;
398
			}, $request->getParts()
399
		);
400
	}
401
402
}
403
404