Completed
Push — master ( a799ec...765778 )
by Maxence
01:37
created

generateSearchQueryFromQueryContent()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 15
rs 9.7666
c 0
b 0
f 0
cc 3
nc 4
nop 2
1
<?php
2
/**
3
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\FullTextSearch_ElasticSearch\Service;
28
29
use OCA\FullTextSearch\IFullTextSearchProvider;
30
use OCA\FullTextSearch\Model\DocumentAccess;
31
use OCA\FullTextSearch\Model\SearchRequest;
32
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
33
use OCA\FullTextSearch_ElasticSearch\Exceptions\QueryContentGenerationException;
34
use OCA\FullTextSearch_ElasticSearch\Exceptions\SearchQueryGenerationException;
35
use OCA\FullTextSearch_ElasticSearch\Model\QueryContent;
36
37
38
class SearchMappingService {
39
40
	/** @var ConfigService */
41
	private $configService;
42
43
	/** @var MiscService */
44
	private $miscService;
45
46
47
	/**
48
	 * MappingService constructor.
49
	 *
50
	 * @param ConfigService $configService
51
	 * @param MiscService $miscService
52
	 */
53
	public function __construct(ConfigService $configService, MiscService $miscService) {
54
		$this->configService = $configService;
55
		$this->miscService = $miscService;
56
	}
57
58
59
	/**
60
	 * @param IFullTextSearchProvider $provider
61
	 * @param DocumentAccess $access
62
	 * @param SearchRequest $request
63
	 *
64
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
65
	 * @throws ConfigurationException
66
	 * @throws SearchQueryGenerationException
67
	 */
68
	public function generateSearchQuery(
69
		IFullTextSearchProvider $provider, DocumentAccess $access, SearchRequest $request
70
	) {
71
		$query['params'] = $this->generateSearchQueryParams($provider, $access, $request);
0 ignored issues
show
Coding Style Comprehensibility introduced by
$query was never initialized. Although not strictly required by PHP, it is generally a good practice to add $query = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
72
73
		return $query;
74
	}
75
76
77
	/**
78
	 * @param IFullTextSearchProvider $provider
79
	 * @param DocumentAccess $access
80
	 * @param SearchRequest $request
81
	 *
82
	 * @return array
83
	 * @throws ConfigurationException
84
	 * @throws SearchQueryGenerationException
85
	 */
86
	public function generateSearchQueryParams(
87
		IFullTextSearchProvider $provider, DocumentAccess $access, SearchRequest $request
88
	) {
89
		$params = [
90
			'index' => $this->configService->getElasticIndex(),
91
			'type'  => 'standard',
92
			'size'  => $request->getSize(),
93
			'from'  => (($request->getPage() - 1) * $request->getSize())
94
		];
95
96
		$bool = [];
97
		$bool['must']['bool']['should'] = $this->generateSearchQueryContent($request);
98
99
		$bool['filter'][]['bool']['must'] = ['term' => ['provider' => $provider->getId()]];
100
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryAccess($access);
101
		$bool['filter'][]['bool']['should'] = $this->generateSearchQueryTags($request->getTags());
102
103
		$params['body']['query']['bool'] = $bool;
104
		$params['body']['highlight'] = $this->generateSearchHighlighting();
105
106
		$this->improveSearchQuerying($request, $params['body']['query']);
107
108
		return $params;
109
	}
110
111
112
	/**
113
	 * @param SearchRequest $request
114
	 * @param array $arr
115
	 */
116
	private function improveSearchQuerying(SearchRequest $request, &$arr) {
117
//		$this->improveSearchWildcardQueries($request, $arr);
0 ignored issues
show
Unused Code Comprehensibility introduced by
73% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
118
		$this->improveSearchWildcardFilters($request, $arr);
119
		$this->improveSearchRegexFilters($request, $arr);
120
	}
121
122
123
//	/**
0 ignored issues
show
Unused Code Comprehensibility introduced by
49% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
124
//	 * @param SearchRequest $request
125
//	 * @param array $arr
126
//	 */
127
//	private function improveSearchWildcardQueries(SearchRequest $request, &$arr) {
128
//
129
//		$queries = $request->getWildcardQueries();
130
//		foreach ($queries as $query) {
131
//			$wildcards = [];
132
//			foreach ($query as $entry) {
133
//				$wildcards[] = ['wildcard' => $entry];
134
//			}
135
//
136
//			array_push($arr['bool']['must']['bool']['should'], $wildcards);
137
//		}
138
//
139
//	}
140
141
142
	/**
143
	 * @param SearchRequest $request
144
	 * @param array $arr
145
	 */
146 View Code Duplication
	private function improveSearchWildcardFilters(SearchRequest $request, &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
147
148
		$filters = $request->getWildcardFilters();
149
		foreach ($filters as $filter) {
150
			$wildcards = [];
151
			foreach ($filter as $entry) {
152
				$wildcards[] = ['wildcard' => $entry];
153
			}
154
155
			$arr['bool']['filter'][]['bool']['should'] = $wildcards;
156
		}
157
158
	}
159
160
161
	/**
162
	 * @param SearchRequest $request
163
	 * @param array $arr
164
	 */
165 View Code Duplication
	private function improveSearchRegexFilters(SearchRequest $request, &$arr) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
166
167
		$filters = $request->getRegexFilters();
168
		foreach ($filters as $filter) {
169
			$regex = [];
170
			foreach ($filter as $entry) {
171
				$regex[] = ['regexp' => $entry];
172
			}
173
174
			$arr['bool']['filter'][]['bool']['should'] = $regex;
175
		}
176
177
	}
178
179
180
	/**
181
	 * @param SearchRequest $request
182
	 *
183
	 * @return array<string,array<string,array>>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,array>?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
184
	 * @throws SearchQueryGenerationException
185
	 */
186
	private function generateSearchQueryContent(SearchRequest $request) {
187
		$str = strtolower($request->getSearch());
188
189
		preg_match_all('/[^?]"(?:\\\\.|[^\\\\"])*"|\S+/', " $str ", $words);
190
		$queryContent = [];
191
		foreach ($words[0] as $word) {
192
			try {
193
				$queryContent[] = $this->generateQueryContent(trim($word));
194
			} catch (QueryContentGenerationException $e) {
195
				continue;
196
			}
197
		}
198
199
		if (sizeof($queryContent) === 0) {
200
			throw new SearchQueryGenerationException();
201
		}
202
203
		return $this->generateSearchQueryFromQueryContent($request, $queryContent);
204
	}
205
206
207
	/**
208
	 * @param string $word
209
	 *
210
	 * @return QueryContent
211
	 * @throws QueryContentGenerationException
212
	 */
213
	private function generateQueryContent($word) {
214
215
		$searchQueryContent = new QueryContent($word);
216
		if (strlen($searchQueryContent->getWord()) === 0) {
217
			throw new QueryContentGenerationException();
218
		}
219
220
		return $searchQueryContent;
221
	}
222
223
224
	/**
225
	 * @param SearchRequest $request
226
	 * @param QueryContent[] $queryContents
227
	 *
228
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
229
	 */
230
	private function generateSearchQueryFromQueryContent(SearchRequest $request, $queryContents) {
231
232
		$query = $queryWords = [];
233
		foreach ($queryContents as $queryContent) {
234
			$queryWords[$queryContent->getShould()][] =
235
				$this->generateQueryContentFields($request, $queryContent);
236
		}
237
238
		$listShould = array_keys($queryWords);
239
		foreach ($listShould as $itemShould) {
240
			$query[$itemShould][] = $queryWords[$itemShould];
241
		}
242
243
		return ['bool' => $query];
244
	}
245
246
247
	/**
248
	 * @param SearchRequest $request
249
	 * @param QueryContent $content
250
	 *
251
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array<string,array>>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
252
	 */
253
	private function generateQueryContentFields(SearchRequest $request, QueryContent $content) {
254
		$parts = array_map(
255
			function($value) {
256
				return 'parts.' . $value;
257
			}, $request->getParts()
258
		);
259
		$fields = array_merge(['content', 'title'], $request->getFields(), $parts);
260
261
		$queryFields = [];
262
		foreach ($fields as $field) {
263
			if (!$this->fieldIsOutLimit($request, $field)) {
264
				$queryFields[] = [$content->getMatch() => [$field => $content->getWord()]];
265
			}
266
		}
267
268
		foreach ($request->getWildcardFields() as $field) {
269
			if (!$this->fieldIsOutLimit($request, $field)) {
270
				$queryFields[] = ['wildcard' => [$field => '*' . $content->getWord() . '*']];
271
			}
272
		}
273
274
		return ['bool' => ['should' => $queryFields]];
275
	}
276
277
278
	/**
279
	 * @param DocumentAccess $access
280
	 *
281
	 * @return array<string,array>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,array>[]?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
282
	 */
283
	private function generateSearchQueryAccess(DocumentAccess $access) {
284
285
		$query = [];
286
		$query[] = ['term' => ['owner' => $access->getViewerId()]];
287
		$query[] = ['term' => ['users' => $access->getViewerId()]];
288
		$query[] = ['term' => ['users' => '__all']];
289
290
		foreach ($access->getGroups() as $group) {
291
			$query[] = ['term' => ['groups' => $group]];
292
		}
293
294
		foreach ($access->getCircles() as $circle) {
295
			$query[] = ['term' => ['circles' => $circle]];
296
		}
297
298
		return $query;
299
	}
300
301
302
	/**
303
	 * @param SearchRequest $request
304
	 * @param string $field
305
	 *
306
	 * @return bool
307
	 */
308
	private function fieldIsOutLimit(SearchRequest $request, $field) {
309
		$limit = $request->getLimitFields();
310
		if (sizeof($limit) === 0) {
311
			return false;
312
		}
313
314
		if (in_array($field, $limit)) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return !in_array($field, $limit);.
Loading history...
315
			return false;
316
		}
317
318
		return true;
319
	}
320
321
322
	/**
323
	 * @param array $tags
324
	 *
325
	 * @return array<string,array>
326
	 */
327
	private function generateSearchQueryTags($tags) {
328
329
		$query = [];
330
		foreach ($tags as $tag) {
331
			$query[] = ['term' => ['tags' => $tag]];
332
		}
333
334
		return $query;
335
	}
336
337
	private function generateSearchHighlighting() {
338
		return [
339
			'fields'    => ['content' => new \stdClass()],
340
			'pre_tags'  => [''],
341
			'post_tags' => ['']
342
		];
343
	}
344
345
346
}
347