Completed
Push — master ( 94b632...a799ec )
by Maxence
02:11
created

IndexMappingService::generateDeleteQuery()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 10
rs 9.9332
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
/**
3
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\FullTextSearch_ElasticSearch\Service;
28
29
use Elasticsearch\Client;
30
use Elasticsearch\Common\Exceptions\Missing404Exception;
31
use OCA\FullTextSearch\Model\IndexDocument;
32
use OCA\FullTextSearch_ElasticSearch\Exceptions\AccessIsEmptyException;
33
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
34
35
36
class IndexMappingService {
37
38
	/** @var ConfigService */
39
	private $configService;
40
41
	/** @var MiscService */
42
	private $miscService;
43
44
45
	/**
46
	 * MappingService constructor.
47
	 *
48
	 * @param ConfigService $configService
49
	 * @param MiscService $miscService
50
	 */
51
	public function __construct(ConfigService $configService, MiscService $miscService) {
52
		$this->configService = $configService;
53
		$this->miscService = $miscService;
54
	}
55
56
57
	/**
58
	 * @param Client $client
59
	 * @param IndexDocument $document
60
	 *
61
	 * @return array
0 ignored issues
show
Documentation introduced by
Should the return type not be callable? Also, consider making the array more specific, something like array<String>, or String[].

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

If the return type contains the type array, this check recommends the use of a more specific type like String[] or array<String>.

Loading history...
62
	 * @throws ConfigurationException
63
	 * @throws AccessIsEmptyException
64
	 */
65
	public function indexDocumentNew(Client $client, IndexDocument $document) {
66
		$index = [
67
			'index' =>
68
				[
69
					'index' => $this->configService->getElasticIndex(),
70
					'id'    => $document->getProviderId() . ':' . $document->getId(),
71
					'type'  => 'standard',
72
					'body'  => $this->generateIndexBody($document)
73
				]
74
		];
75
76
		$this->onIndexingDocument($document, $index);
77
78
		return $client->index($index['index']);
79
	}
80
81
82
	/**
83
	 * @param Client $client
84
	 * @param IndexDocument $document
85
	 *
86
	 * @return array
0 ignored issues
show
Documentation introduced by
Should the return type not be callable? Also, consider making the array more specific, something like array<String>, or String[].

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

If the return type contains the type array, this check recommends the use of a more specific type like String[] or array<String>.

Loading history...
87
	 * @throws ConfigurationException
88
	 * @throws AccessIsEmptyException
89
	 */
90
	public function indexDocumentUpdate(Client $client, IndexDocument $document) {
91
		$index = [
92
			'index' =>
93
				[
94
					'index' => $this->configService->getElasticIndex(),
95
					'id'    => $document->getProviderId() . ':' . $document->getId(),
96
					'type'  => 'standard',
97
					'body'  => ['doc' => $this->generateIndexBody($document)]
98
				]
99
		];
100
101
		$this->onIndexingDocument($document, $index);
102
		try {
103
			return $client->update($index['index']);
104
		} catch (Missing404Exception $e) {
105
			return $this->indexDocumentNew($client, $document);
106
		}
107
	}
108
109
110
	/**
111
	 * @param Client $client
112
	 * @param string $providerId
113
	 * @param string|int $documentId
114
	 *
115
	 * @throws ConfigurationException
116
	 */
117
	public function indexDocumentRemove(Client $client, $providerId, $documentId) {
118
		$index = [
119
			'index' =>
120
				[
121
					'index' => $this->configService->getElasticIndex(),
122
					'id'    => $providerId . ':' . $documentId,
123
					'type'  => 'standard'
124
				]
125
		];
126
127
		try {
128
			$client->delete($index['index']);
129
		} catch (Missing404Exception $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
130
		}
131
	}
132
133
134
	/**
135
	 * @param IndexDocument $document
136
	 * @param array $arr
137
	 */
138
	public function onIndexingDocument(IndexDocument $document, &$arr) {
139
		if ($document->isContentEncoded() === IndexDocument::ENCODED_BASE64) {
140
			$arr['index']['pipeline'] = 'attachment';
141
		}
142
	}
143
144
145
	/**
146
	 * @param IndexDocument $document
147
	 *
148
	 * @return array
149
	 * @throws AccessIsEmptyException
150
	 */
151
	public function generateIndexBody(IndexDocument $document) {
152
153
		$access = $document->getAccess();
154
		if ($access === null) {
155
			throw new AccessIsEmptyException('DocumentAccess is Empty');
156
		}
157
158
		$body = [
159
			'owner'    => $access->getOwnerId(),
160
			'users'    => $access->getUsers(),
161
			'groups'   => $access->getGroups(),
162
			'circles'  => $access->getCircles(),
163
			'tags'     => $document->getTags(),
164
			'hash'     => $document->getHash(),
165
			'provider' => $document->getProviderId(),
166
			'source'   => $document->getSource(),
167
			'title'    => $document->getTitle(),
168
			'parts'    => $document->getParts()
169
		];
170
171
		if ($document->getContent() !== null) {
172
			$body['content'] = $document->getContent();
173
		}
174
175
		return array_merge($document->getInfoAll(), $body);
176
	}
177
178
179
	/**
180
	 * @param bool $complete
181
	 *
182
	 * @return array<string,string|array<string,array<string,array<string,array>>>>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,string|arra...tring,array|boolean>>>>?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
183
	 * @throws ConfigurationException
184
	 */
185
	public function generateGlobalMap($complete = true) {
186
187
		$params = [
188
			'index' => $this->configService->getElasticIndex()
189
		];
190
191
		if ($complete === false) {
192
			return $params;
193
		}
194
195
		$params['body'] = [
196
			'settings' => [
197
				'analysis' => [
198
					'filter'      => [
199
						'shingle' => [
200
							'type' => 'shingle'
201
						]
202
					],
203
					'char_filter' => [
204
						'pre_negs'  => [
205
							'type'        => 'pattern_replace',
206
							'pattern'     => '(\\w+)\\s+((?i:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))\\b',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 100 characters; contains 174 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
207
							'replacement' => '~$1 $2'
208
						],
209
						'post_negs' => [
210
							'type'        => 'pattern_replace',
211
							'pattern'     => '\\b((?i:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))\\s+(\\w+)',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 100 characters; contains 174 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
212
							'replacement' => '$1 ~$2'
213
						]
214
					],
215
					'analyzer'    => [
216
						'analyzer' => [
217
							'type'      => 'custom',
218
							'tokenizer' => $this->configService->getAppValue(
219
								ConfigService::ANALYZER_TOKENIZER
220
							),
221
							'filter'    => ['lowercase', 'stop', 'kstem']
222
						]
223
					]
224
				]
225
			],
226
			'mappings' => [
227
				'standard' => [
228
					'dynamic'    => true,
229
					'properties' => [
230
						'source'   => [
231
							'type' => 'keyword'
232
						],
233
						'title'    => [
234
							'type'        => 'text',
235
							'analyzer'    => 'keyword',
236
							'term_vector' => 'yes',
237
							'copy_to'     => 'combined'
238
						],
239
						'provider' => [
240
							'type' => 'keyword'
241
						],
242
						'tags'     => [
243
							'type' => 'keyword'
244
						],
245
						'content'  => [
246
							'type'        => 'text',
247
							'analyzer'    => 'analyzer',
248
							'term_vector' => 'yes',
249
							'copy_to'     => 'combined'
250
						],
251
						'owner'    => [
252
							'type' => 'keyword'
253
						],
254
						'users'    => [
255
							'type' => 'keyword'
256
						],
257
						'groups'   => [
258
							'type' => 'keyword'
259
						],
260
						'circles'  => [
261
							'type' => 'keyword'
262
						],
263
						'hash'     => [
264
							'type' => 'keyword'
265
						],
266
						'combined' => [
267
							'type'        => 'text',
268
							'analyzer'    => 'analyzer',
269
							'term_vector' => 'yes'
270
						]
271
						//						,
272
						//						'topics'   => [
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
273
						//							'type'  => 'text',
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
274
						//							'index' => 'not_analyzed'
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
275
						//						],
276
						//						'places'   => [
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
277
						//							'type'  => 'text',
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
278
						//							'index' => 'not_analyzed'
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
279
						//						]
280
					]
281
				]
282
			]
283
		];
284
285
		return $params;
286
	}
287
288
289
	/**
290
	 * @param bool $complete
291
	 *
292
	 * @return array<string,string|array<string,string|array<string,array<string,string|integer>>>>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,string|arra...array<string,array>[]>>?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
293
	 */
294
	public function generateGlobalIngest($complete = true) {
295
296
		$params = ['id' => 'attachment'];
297
298
		if ($complete === false) {
299
			return $params;
300
		}
301
302
		$params['body'] = [
303
			'description' => 'attachment',
304
			'processors'  => [
305
				[
306
					'attachment' => [
307
						'field'         => 'content',
308
						'indexed_chars' => -1
309
					],
310
					'convert'    => [
311
						'field'        => 'attachment.content',
312
						'type'         => 'string',
313
						'target_field' => 'content'
314
					],
315
					'remove'     => [
316
						'field'          => 'attachment.content',
317
						'ignore_failure' => true
318
					]
319
				]
320
			]
321
		];
322
323
		return $params;
324
	}
325
326
327
	/**
328
	 * @param string $providerId
329
	 *
330
	 * @return array
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,string>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
331
	 * @throws ConfigurationException
332
	 */
333
	public function generateDeleteQuery($providerId) {
334
		$params = [
335
			'index' => $this->configService->getElasticIndex(),
336
			'type'  => 'standard'
337
		];
338
339
		$params['body']['query']['match'] = ['provider' => $providerId];
340
341
		return $params;
342
	}
343
344
}
345