Completed
Push — master ( 2566be...2cb116 )
by Maxence
01:40 queued 10s
created

IndexMappingService::generateGlobalMap()   B

Complexity

Conditions 3
Paths 3

Size

Total Lines 118

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 118
rs 8
c 0
b 0
f 0
cc 3
nc 3
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch_Elasticsearch - Use Elasticsearch to index the content of your nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\FullTextSearch_Elasticsearch\Service;
32
33
34
use Elasticsearch\Client;
35
use Elasticsearch\Common\Exceptions\Missing404Exception;
36
use OCA\FullTextSearch_Elasticsearch\Exceptions\AccessIsEmptyException;
37
use OCA\FullTextSearch_Elasticsearch\Exceptions\ConfigurationException;
38
use OCP\FullTextSearch\Model\IIndexDocument;
39
40
41
/**
42
 * Class IndexMappingService
43
 *
44
 * @package OCA\FullTextSearch_Elasticsearch\Service
45
 */
46
class IndexMappingService {
47
48
49
	/** @var ConfigService */
50
	private $configService;
51
52
	/** @var MiscService */
53
	private $miscService;
54
55
56
	/**
57
	 * IndexMappingService constructor.
58
	 *
59
	 * @param ConfigService $configService
60
	 * @param MiscService $miscService
61
	 */
62
	public function __construct(ConfigService $configService, MiscService $miscService) {
63
		$this->configService = $configService;
64
		$this->miscService = $miscService;
65
	}
66
67
68
	/**
69
	 * @param Client $client
70
	 * @param IIndexDocument $document
71
	 *
72
	 * @return array
73
	 * @throws ConfigurationException
74
	 * @throws AccessIsEmptyException
75
	 */
76
	public function indexDocumentNew(Client $client, IIndexDocument $document): array {
77
		$index = [
78
			'index' =>
79
				[
80
					'index' => $this->configService->getElasticIndex(),
81
					'id'    => $document->getProviderId() . ':' . $document->getId(),
82
					'type'  => 'standard',
83
					'body'  => $this->generateIndexBody($document)
84
				]
85
		];
86
87
		$this->onIndexingDocument($document, $index);
88
89
		return $client->index($index['index']);
90
	}
91
92
93
	/**
94
	 * @param Client $client
95
	 * @param IIndexDocument $document
96
	 *
97
	 * @return array
98
	 * @throws ConfigurationException
99
	 * @throws AccessIsEmptyException
100
	 */
101
	public function indexDocumentUpdate(Client $client, IIndexDocument $document): array {
102
		$index = [
103
			'index' =>
104
				[
105
					'index' => $this->configService->getElasticIndex(),
106
					'id'    => $document->getProviderId() . ':' . $document->getId(),
107
					'type'  => 'standard',
108
					'body'  => ['doc' => $this->generateIndexBody($document)]
109
				]
110
		];
111
112
		$this->onIndexingDocument($document, $index);
113
		try {
114
			return $client->update($index['index']);
115
		} catch (Missing404Exception $e) {
116
			return $this->indexDocumentNew($client, $document);
117
		}
118
	}
119
120
121
	/**
122
	 * @param Client $client
123
	 * @param string $providerId
124
	 * @param string $documentId
125
	 *
126
	 * @throws ConfigurationException
127
	 */
128
	public function indexDocumentRemove(Client $client, string $providerId, string $documentId) {
129
		$index = [
130
			'index' =>
131
				[
132
					'index' => $this->configService->getElasticIndex(),
133
					'id'    => $providerId . ':' . $documentId,
134
					'type'  => 'standard'
135
				]
136
		];
137
138
		try {
139
			$client->delete($index['index']);
140
		} catch (Missing404Exception $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
141
		}
142
	}
143
144
145
	/**
146
	 * @param IIndexDocument $document
147
	 * @param array $arr
148
	 */
149
	public function onIndexingDocument(IIndexDocument $document, array &$arr) {
150
		if ($document->getContent() !== ''
151
			&& $document->isContentEncoded() === IIndexDocument::ENCODED_BASE64) {
152
			$arr['index']['pipeline'] = 'attachment';
153
		}
154
	}
155
156
157
	/**
158
	 * @param IIndexDocument $document
159
	 *
160
	 * @return array
161
	 * @throws AccessIsEmptyException
162
	 */
163
	public function generateIndexBody(IIndexDocument $document): array {
164
165
		$access = $document->getAccess();
166
		if ($access === null) {
167
			throw new AccessIsEmptyException('DocumentAccess is Empty');
168
		}
169
170
		// TODO: check if we can just update META or just update CONTENT.
171
//		$index = $document->getIndex();
172
//		$body = [];
173
//		if ($index->isStatus(IIndex::INDEX_META)) {
174
		$body = [
175
			'owner'    => $access->getOwnerId(),
176
			'users'    => $access->getUsers(),
177
			'groups'   => $access->getGroups(),
178
			'circles'  => $access->getCircles(),
179
			'links'    => $access->getLinks(),
180
			'metatags' => $document->getMetaTags(),
181
			'subtags'  => $document->getSubTags(true),
182
			'tags'     => $document->getTags(),
183
			'hash'     => $document->getHash(),
184
			'provider' => $document->getProviderId(),
185
			'source'   => $document->getSource(),
186
			'title'    => $document->getTitle(),
187
			'parts'    => $document->getParts()
188
		];
189
//		}
190
191
//		if ($index->isStatus(IIndex::INDEX_CONTENT)) {
192
		$body['content'] = $document->getContent();
193
194
//		}
195
196
		return array_merge($document->getInfoAll(), $body);
197
	}
198
199
200
	/**
201
	 * @param bool $complete
202
	 *
203
	 * @return array
204
	 * @throws ConfigurationException
205
	 */
206
	public function generateGlobalMap(bool $complete = true): array {
207
208
		$params = [
209
			'index' => $this->configService->getElasticIndex()
210
		];
211
212
		if ($complete === false) {
213
			return $params;
214
		}
215
216
		if ($this->configService->getAppValue(ConfigService::ELASTIC_VER_BELOW66) !== '1') {
217
			$params['include_type_name'] = true;
218
		}
219
220
		$params['body'] = [
221
			'settings' => [
222
				'index.mapping.total_fields.limit' => $this->configService->getAppValue(
223
					ConfigService::FIELDS_LIMIT
224
				),
225
				'analysis'                         => [
226
					'filter'      => [
227
						'shingle' => [
228
							'type' => 'shingle'
229
						]
230
					],
231
					'char_filter' => [
232
						'pre_negs'  => [
233
							'type'        => 'pattern_replace',
234
							'pattern'     => '(\\w+)\\s+((?i:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))\\b',
235
							'replacement' => '~$1 $2'
236
						],
237
						'post_negs' => [
238
							'type'        => 'pattern_replace',
239
							'pattern'     => '\\b((?i:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))\\s+(\\w+)',
240
							'replacement' => '$1 ~$2'
241
						]
242
					],
243
					'analyzer'    => [
244
						'analyzer' => [
245
							'type'      => 'custom',
246
							'tokenizer' => $this->configService->getAppValue(
247
								ConfigService::ANALYZER_TOKENIZER
248
							),
249
							'filter'    => ['lowercase', 'stop', 'kstem']
250
						]
251
					]
252
				]
253
			],
254
			'mappings' => [
255
				'standard' => [
256
					'dynamic'    => true,
257
					'properties' => [
258
						'source'   => [
259
							'type' => 'keyword'
260
						],
261
						'title'    => [
262
							'type'        => 'text',
263
							'analyzer'    => 'keyword',
264
							'term_vector' => 'with_positions_offsets',
265
							'copy_to'     => 'combined'
266
						],
267
						'provider' => [
268
							'type' => 'keyword'
269
						],
270
						'tags'     => [
271
							'type' => 'keyword'
272
						],
273
						'metatags' => [
274
							'type' => 'keyword'
275
						],
276
						'subtags'  => [
277
							'type' => 'keyword'
278
						],
279
						'content'  => [
280
							'type'        => 'text',
281
							'analyzer'    => 'analyzer',
282
							'term_vector' => 'with_positions_offsets',
283
							'copy_to'     => 'combined'
284
						],
285
						'owner'    => [
286
							'type' => 'keyword'
287
						],
288
						'users'    => [
289
							'type' => 'keyword'
290
						],
291
						'groups'   => [
292
							'type' => 'keyword'
293
						],
294
						'circles'  => [
295
							'type' => 'keyword'
296
						],
297
						'links'    => [
298
							'type' => 'keyword'
299
						],
300
						'hash'     => [
301
							'type' => 'keyword'
302
						],
303
						'combined' => [
304
							'type'        => 'text',
305
							'analyzer'    => 'analyzer',
306
							'term_vector' => 'with_positions_offsets'
307
						]
308
						//						,
309
						//						'topics'   => [
310
						//							'type'  => 'text',
311
						//							'index' => 'not_analyzed'
312
						//						],
313
						//						'places'   => [
314
						//							'type'  => 'text',
315
						//							'index' => 'not_analyzed'
316
						//						]
317
					]
318
				]
319
			]
320
		];
321
322
		return $params;
323
	}
324
325
326
	/**
327
	 * @param bool $complete
328
	 *
329
	 * @return array
330
	 */
331
	public function generateGlobalIngest(bool $complete = true): array {
332
333
		$params = ['id' => 'attachment'];
334
335
		if ($complete === false) {
336
			return $params;
337
		}
338
339
		$params['body'] = [
340
			'description' => 'attachment',
341
			'processors'  => [
342
				[
343
					'attachment' => [
344
						'field'         => 'content',
345
						'indexed_chars' => -1
346
					],
347
					'convert'    => [
348
						'field'        => 'attachment.content',
349
						'type'         => 'string',
350
						'target_field' => 'content'
351
					],
352
					'remove'     => [
353
						'field'          => 'attachment.content',
354
						'ignore_failure' => true
355
					]
356
				]
357
			]
358
		];
359
360
		return $params;
361
	}
362
363
364
	/**
365
	 * @param string $providerId
366
	 *
367
	 * @return array
368
	 * @throws ConfigurationException
369
	 */
370
	public function generateDeleteQuery(string $providerId): array {
371
		$params = [
372
			'index' => $this->configService->getElasticIndex(),
373
			'type'  => 'standard'
374
		];
375
376
		$params['body']['query']['match'] = ['provider' => $providerId];
377
378
		return $params;
379
	}
380
381
}
382
383