Completed
Pull Request — master (#24)
by Maxence
02:28
created

SearchMappingService::generateSearchQueryAccess()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 17
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
c 3
b 0
f 0
dl 0
loc 17
rs 9.4285
cc 3
eloc 10
nc 4
nop 1
1
<?php
2
/**
3
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\FullTextSearch_ElasticSearch\Service;
28
29
use OCA\FullTextSearch\IFullTextSearchProvider;
30
use OCA\FullTextSearch\Model\DocumentAccess;
31
use OCA\FullTextSearch\Model\SearchRequest;
32
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
33
use OCA\FullTextSearch_ElasticSearch\Exceptions\QueryContentGenerationException;
34
use OCA\FullTextSearch_ElasticSearch\Exceptions\SearchQueryGenerationException;
35
use OCA\FullTextSearch_ElasticSearch\Model\QueryContent;
36
37
38
class SearchMappingService {
39
40
	/** @var ConfigService */
41
	private $configService;
42
43
	/** @var MiscService */
44
	private $miscService;
45
46
47
	/**
48
	 * MappingService constructor.
49
	 *
50
	 * @param ConfigService $configService
51
	 * @param MiscService $miscService
52
	 */
53
	public function __construct(ConfigService $configService, MiscService $miscService) {
54
		$this->configService = $configService;
55
		$this->miscService = $miscService;
56
	}
57
58
59
	/**
60
	 * @param IFullTextSearchProvider $provider
61
	 * @param DocumentAccess $access
62
	 * @param SearchRequest $request
63
	 *
64
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
65
	 * @throws ConfigurationException
66
	 * @throws SearchQueryGenerationException
67
	 */
68
	public function generateSearchQuery(
69
		IFullTextSearchProvider $provider, DocumentAccess $access, SearchRequest $request
70
	) {
71
		$query['params'] = $this->generateSearchQueryParams($provider, $access, $request);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
72
73
		return $query;
74
	}
75
76
77
	/**
78
	 * @param IFullTextSearchProvider $provider
79
	 * @param DocumentAccess $access
80
	 * @param SearchRequest $request
81
	 *
82
	 * @return array
83
	 * @throws ConfigurationException
84
	 * @throws SearchQueryGenerationException
85
	 */
86
	public function generateSearchQueryParams(
87
		IFullTextSearchProvider $provider, DocumentAccess $access, SearchRequest $request
88
	) {
89
		$params = [
90
			'index' => $this->configService->getElasticIndex(),
91
			'type'  => 'standard',
92
			'size'  => $request->getSize(),
93
			'from'  => (($request->getPage() - 1) * $request->getSize())
94
		];
95
96
		$bool = [];
97
		$bool['must']['bool']['should'] = $this->generateSearchQueryContent($request);
98
99
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $provider->getId()]];
100
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
101
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
102
103
		$params['body']['query']['bool'] = $bool;
104
		$params['body']['highlight'] = $this->generateSearchHighlighting();
105
106
		$this->improveSearchQuerying($request, $params['body']['query']);
107
108
		return $params;
109
	}
110
111
112
	/**
113
	 * @param SearchRequest $request
114
	 * @param array $arr
115
	 */
116
	private function improveSearchQuerying(SearchRequest $request, &$arr) {
117
		$this->improveSearchWildcardQueries($request, $arr);
118
		$this->improveSearchWildcardFilters($request, $arr);
119
		$this->improveSearchRegexFilters($request, $arr);
120
	}
121
122
123
	/**
124
	 * @param SearchRequest $request
125
	 * @param array $arr
126
	 */
127 View Code Duplication
	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
128
129
		$queries = $request->getWildcardQueries();
130
		foreach ($queries as $query) {
131
			$wildcards = [];
132
			foreach ($query as $entry) {
133
				$wildcards[] = ['wildcard' => $entry];
134
			}
135
136
			array_push($arr['bool']['must']['bool']['should'], $wildcards);
137
		}
138
139
	}
140
141
142
	/**
143
	 * @param SearchRequest $request
144
	 * @param array $arr
145
	 */
146 View Code Duplication
	private function improveSearchWildcardFilters(SearchRequest $request, &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
147
148
		$filters = $request->getWildcardFilters();
149
		foreach ($filters as $filter) {
150
			$wildcards = [];
151
			foreach ($filter as $entry) {
152
				$wildcards[] = ['wildcard' => $entry];
153
			}
154
155
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
156
		}
157
158
	}
159
160
161
	/**
162
	 * @param SearchRequest $request
163
	 * @param array $arr
164
	 */
165
	private function improveSearchRegexFilters(SearchRequest $request, &$arr) {
166
167
		$filters = $request->getRegexFilters();
168
		foreach ($filters as $filter) {
169
			$regex = [];
170
			foreach ($filter as $entry) {
171
				$regex[] = ['regexp' => $entry];
172
			}
173
174
			$arr['bool']['filter'][]['bool']['should'] = $regex;
175
		}
176
177
	}
178
179
180
	/**
181
	 * @param SearchRequest $request
182
	 *
183
	 * @return array<string,array<string,array>>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,array>?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
184
	 * @throws SearchQueryGenerationException
185
	 */
186
	private function generateSearchQueryContent(SearchRequest $request) {
187
		$str = strtolower($request->getSearch());
188
189
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
190
		$queryContent = [];
191
		foreach ($words[0] as $word) {
192
			try {
193
				$queryContent[] = $this->generateQueryContent($word);
194
			} catch (QueryContentGenerationException $e) {
195
				continue;
196
			}
197
		}
198
199
		if (sizeof($queryContent) === 0) {
200
			throw new SearchQueryGenerationException();
201
		}
202
203
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
204
	}
205
206
207
	/**
208
	 * @param string $word
209
	 *
210
	 * @return QueryContent
211
	 * @throws QueryContentGenerationException
212
	 */
213
	private function generateQueryContent($word) {
214
215
		$searchQueryContent = new QueryContent($word);
216
		if (sizeof($searchQueryContent->getWord()) === 0) {
217
			throw new QueryContentGenerationException();
218
		}
219
220
		return $searchQueryContent;
221
	}
222
223
224
	/**
225
	 * @param SearchRequest $request
226
	 * @param QueryContent[] $queryContents
227
	 *
228
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
229
	 */
230
	private function generateSearchQueryFromQueryContent(SearchRequest $request, $queryContents) {
231
		$query = $queryWords = [];
232
233
		$parts = array_map(
234
			function($value) {
235
				return 'parts.' . $value;
236
			}, $request->getParts()
237
		);
238
		$fields = array_merge(['content', 'title'], $parts);
239
240
		foreach ($queryContents as $queryContent) {
241
			$queryWords[$queryContent->getShould()][] =
242
				$this->generateQueryContentFields($queryContent, $fields);
243
		}
244
245
		$listShould = array_keys($queryWords);
246
		foreach ($listShould as $itemShould) {
247
			$query[$itemShould][] = $queryWords[$itemShould];
248
		}
249
250
		return ['bool' => $query];
251
	}
252
253
254
	/**
255
	 * @param QueryContent $queryContent
256
	 * @param array $fields
257
	 *
258
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array<string,array>>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
259
	 */
260
	private function generateQueryContentFields(QueryContent $queryContent, $fields) {
261
		$queryFields = [];
262
		foreach ($fields as $field) {
263
			$queryFields[] =
264
				[$queryContent->getMatch() => [$field => $queryContent->getWord()]];
265
		}
266
267
		return ['bool' => ['should' => $queryFields]];
268
	}
269
270
271
	/**
272
	 * @param DocumentAccess $access
273
	 *
274
	 * @return array<string,array>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,array>[]?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
275
	 */
276
	private function generateSearchQueryAccess(DocumentAccess $access) {
277
278
		$query = [];
279
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
280
		$query[] = ['term' => ['users' => $access->getViewerId()]];
281
		$query[] = ['term' => ['users' => '__all']];
282
283
		foreach ($access->getGroups() as $group) {
284
			$query[] = ['term' => ['groups' => $group]];
285
		}
286
287
		foreach ($access->getCircles() as $circle) {
288
			$query[] = ['term' => ['circles' => $circle]];
289
		}
290
291
		return $query;
292
	}
293
294
295
	/**
296
	 * @param array $tags
297
	 *
298
	 * @return array<string,array>
299
	 */
300
	private function generateSearchQueryTags($tags) {
301
302
		$query = [];
303
		foreach ($tags as $tag) {
304
			$query[] = ['term' => ['tags' => $tag]];
305
		}
306
307
		return $query;
308
	}
309
310
	private function generateSearchHighlighting() {
311
		return [
312
			'fields'    => ['content' => new \stdClass()],
313
			'pre_tags'  => [''],
314
			'post_tags' => ['']
315
		];
316
	}
317
318
319
}
320