Completed
Push — master ( a7a7a5...ab8220 )
by Maxence
01:58
created

SearchMappingService::generateSearchQuery()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 7
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 4
nc 1
nop 3
1
<?php
2
/**
3
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\FullTextSearch_ElasticSearch\Service;
28
29
use OCA\FullTextSearch\IFullTextSearchProvider;
30
use OCA\FullTextSearch\Model\DocumentAccess;
31
use OCA\FullTextSearch\Model\SearchRequest;
32
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
33
34
35
class SearchMappingService {
36
37
	/** @var ConfigService */
38
	private $configService;
39
40
	/** @var MiscService */
41
	private $miscService;
42
43
44
	/**
45
	 * MappingService constructor.
46
	 *
47
	 * @param ConfigService $configService
48
	 * @param MiscService $miscService
49
	 */
50
	public function __construct(ConfigService $configService, MiscService $miscService) {
51
		$this->configService = $configService;
52
		$this->miscService = $miscService;
53
	}
54
55
56
	/**
57
	 * @param IFullTextSearchProvider $provider
58
	 * @param DocumentAccess $access
59
	 * @param SearchRequest $request
60
	 *
61
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
62
	 * @throws ConfigurationException
63
	 */
64
	public function generateSearchQuery(
65
		IFullTextSearchProvider $provider, DocumentAccess $access, SearchRequest $request
66
	) {
67
		$query['params'] = $this->generateSearchQueryParams($provider, $access, $request);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
68
69
		return $query;
70
	}
71
72
73
	/**
74
	 * @param IFullTextSearchProvider $provider
75
	 * @param DocumentAccess $access
76
	 * @param SearchRequest $request
77
	 *
78
	 * @return array
79
	 * @throws ConfigurationException
80
	 */
81
	public function generateSearchQueryParams(
82
		IFullTextSearchProvider $provider, DocumentAccess $access, SearchRequest $request
83
	) {
84
		$params = [
85
			'index' => $this->configService->getElasticIndex(),
86
			'type'  => 'standard',
87
			'size'  => $request->getSize(),
88
			'from'  => (($request->getPage() - 1) * $request->getSize())
89
		];
90
91
		$bool = [];
92
		$bool['must']['bool']['should'] = $this->generateSearchQueryContent($request);
93
94
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $provider->getId()]];
95
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
96
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
97
98
		$params['body']['query']['bool'] = $bool;
99
		$params['body']['highlight'] = $this->generateSearchHighlighting();
100
101
		$this->improveSearchQuerying($request, $params['body']['query']);
102
103
		return $params;
104
	}
105
106
107
	/**
108
	 * @param SearchRequest $request
109
	 * @param array $arr
110
	 */
111
	private function improveSearchQuerying(SearchRequest $request, &$arr) {
112
		$this->improveSearchWildcardQueries($request, $arr);
113
		$this->improveSearchWildcardFilters($request, $arr);
114
		$this->improveSearchRegexFilters($request, $arr);
115
	}
116
117
118
	/**
119
	 * @param SearchRequest $request
120
	 * @param array $arr
121
	 */
122 View Code Duplication
	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
123
124
		$queries = $request->getWildcardQueries();
125
		foreach ($queries as $query) {
126
			$wildcards = [];
127
			foreach ($query as $entry) {
128
				$wildcards[] = ['wildcard' => $entry];
129
			}
130
131
			array_push($arr['bool']['must']['bool']['should'], $wildcards);
132
		}
133
134
	}
135
136
137
	/**
138
	 * @param SearchRequest $request
139
	 * @param array $arr
140
	 */
141 View Code Duplication
	private function improveSearchWildcardFilters(SearchRequest $request, &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
142
143
		$filters = $request->getWildcardFilters();
144
		foreach ($filters as $filter) {
145
			$wildcards = [];
146
			foreach ($filter as $entry) {
147
				$wildcards[] = ['wildcard' => $entry];
148
			}
149
150
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
151
		}
152
153
	}
154
155
156
	/**
157
	 * @param SearchRequest $request
158
	 * @param array $arr
159
	 */
160
	private function improveSearchRegexFilters(SearchRequest $request, &$arr) {
161
162
		$filters = $request->getRegexFilters();
163
		foreach ($filters as $filter) {
164
			$regex = [];
165
			foreach ($filter as $entry) {
166
				$regex[] = ['regexp' => $entry];
167
			}
168
169
			$arr['bool']['filter'][]['bool']['should'] = $regex;
170
		}
171
172
	}
173
174
175
	/**
176
	 * @param SearchRequest $request
177
	 *
178
	 * @return array<string,array<string,array>>
179
	 */
180
	private function generateSearchQueryContent(SearchRequest $request) {
181
		$str = strtolower($request->getSearch());
182
183
		$queryTitle = $queryContent = $kwParts = [];
184
		$words = explode(' ', $str);
185
		foreach ($words as $word) {
186
187
			$kw = 'prefix';
188
			$this->modifySearchQueryContentOnCompleteWord($kw, $word);
189
190
			$queryTitle[] = [$kw => ['title' => $word]];
191
			$queryContent[] = [$kw => ['content' => $word]];
192
			$kwParts[] = ['kw' => $kw, 'word' => $word];
193
		}
194
195
		$query = [
196
			['bool' => ['must' => $queryTitle]],
197
			['bool' => ['must' => $queryContent]]
198
		];
199
200
		$query = array_merge($query, $this->complementSearchWithParts($request, $kwParts));
201
202
		return $query;
203
	}
204
205
206
	/**
207
	 * @param string $kw
208
	 * @param string $word
209
	 */
210
	private function modifySearchQueryContentOnCompleteWord(&$kw, &$word) {
211
		if (substr($word, 0, 1) !== '"' || substr($word, -1) !== '"') {
212
			return;
213
		}
214
215
		$kw = 'match';
216
		$word = substr($word, 1, -1);
217
	}
218
219
220
	/**
221
	 * @param SearchRequest $request
222
	 * @param array $kwParts
223
	 *
224
	 * @return array
225
	 */
226
	private function complementSearchWithParts(SearchRequest $request, $kwParts) {
227
		$query = [];
228
		foreach ($request->getParts() as $part) {
229
			$queryParts = [];
230
			foreach ($kwParts as $kwPart) {
231
				$queryParts[] = [$kwPart['kw'] => ['parts.' . $part => $kwPart['word']]];
232
			}
233
234
			$query[] = ['bool' => ['must' => $queryParts]];
235
		}
236
237
		return $query;
238
	}
239
240
241
	/**
242
	 * @param DocumentAccess $access
243
	 *
244
	 * @return array<string,array>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,array>[]?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
245
	 */
246
	private function generateSearchQueryAccess(DocumentAccess $access) {
247
248
		$query = [];
249
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
250
		$query[] = ['term' => ['users' => $access->getViewerId()]];
251
		$query[] = ['term' => ['users' => '__all']];
252
253
		foreach ($access->getGroups() as $group) {
254
			$query[] = ['term' => ['groups' => $group]];
255
		}
256
257
		foreach ($access->getCircles() as $circle) {
258
			$query[] = ['term' => ['circles' => $circle]];
259
		}
260
261
		return $query;
262
	}
263
264
265
	/**
266
	 * @param array $tags
267
	 *
268
	 * @return array<string,array>
269
	 */
270
	private function generateSearchQueryTags($tags) {
271
272
		$query = [];
273
		foreach ($tags as $tag) {
274
			$query[] = ['term' => ['tags' => $tag]];
275
		}
276
277
		return $query;
278
	}
279
280
	private function generateSearchHighlighting() {
281
		return [
282
			'fields'    => ['content' => new \stdClass()],
283
			'pre_tags'  => [''],
284
			'post_tags' => ['']
285
		];
286
	}
287
288
289
}
290