Completed
Push — master ( 39ae5b...0fb1aa )
by Maxence
01:38
created

IndexMappingService::indexDocumentNew()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 18
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 18
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 11
nc 1
nop 4
1
<?php
2
/**
3
 * FullTextSearch_ElasticSearch - Use Elasticsearch to index the content of your nextcloud
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\FullTextSearch_ElasticSearch\Service;
28
29
use Elasticsearch\Client;
30
use OCA\FullTextSearch\IFullTextSearchPlatform;
31
use OCA\FullTextSearch\IFullTextSearchProvider;
32
use OCA\FullTextSearch\Model\Index;
33
use OCA\FullTextSearch\Model\IndexDocument;
34
use OCA\FullTextSearch_ElasticSearch\Exceptions\ConfigurationException;
35
36
37
class IndexMappingService {
38
39
	/** @var ConfigService */
40
	private $configService;
41
42
	/** @var MiscService */
43
	private $miscService;
44
45
46
	/**
47
	 * MappingService constructor.
48
	 *
49
	 * @param ConfigService $configService
50
	 * @param MiscService $miscService
51
	 */
52
	public function __construct(ConfigService $configService, MiscService $miscService) {
53
		$this->configService = $configService;
54
		$this->miscService = $miscService;
55
	}
56
57
58
	/**
59
	 * @param Client $client
60
	 * @param IFullTextSearchProvider $provider
61
	 * @param IndexDocument $document
62
	 *
63
	 * @param IFullTextSearchPlatform $source
64
	 *
65
	 * @return array
0 ignored issues
show
Documentation introduced by
Should the return type not be callable? Also, consider making the array more specific, something like array<String>, or String[].

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

If the return type contains the type array, this check recommends the use of a more specific type like String[] or array<String>.

Loading history...
66
	 * @throws ConfigurationException
67
	 */
68
	public function indexDocumentNew(
69
		Client $client, IFullTextSearchProvider $provider, IndexDocument $document,
70
		IFullTextSearchPlatform $source
71
	) {
72
		$index = [
73
			'index' =>
74
				[
75
					'index' => $this->configService->getElasticIndex(),
76
					'id'    => $document->getId(),
77
					'type'  => $provider->getId(),
78
					'body'  => $this->generateIndexBody($document)
79
				]
80
		];
81
82
		$this->onIndexingDocument($source, $provider, $document, $index);
83
84
		return $client->index($index['index']);
85
	}
86
87
88
	/**
89
	 * @param Client $client
90
	 * @param IFullTextSearchProvider $provider
91
	 * @param IndexDocument $document
92
	 * @param IFullTextSearchPlatform $source
93
	 *
94
	 * @return array
0 ignored issues
show
Documentation introduced by
Should the return type not be callable? Also, consider making the array more specific, something like array<String>, or String[].

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

If the return type contains the type array, this check recommends the use of a more specific type like String[] or array<String>.

Loading history...
95
	 * @throws ConfigurationException
96
	 */
97
	public function indexDocumentUpdate(
98
		Client $client, IFullTextSearchProvider $provider, IndexDocument $document,
99
		IFullTextSearchPlatform $source
100
	) {
101
102
		if ($document->getIndex()
103
					 ->isStatus(Index::INDEX_CONTENT)) {
104
			return $this->indexDocumentNew($client, $provider, $document, $source);
105
		}
106
		
107
		$index = [
108
			'index' =>
109
				[
110
					'index' => $this->configService->getElasticIndex(),
111
					'id'    => $document->getId(),
112
					'type'  => $provider->getId(),
113
					'body'  => ['doc' => $this->generateIndexBody($document)]
114
				]
115
		];
116
117
		$this->onIndexingDocument($source, $provider, $document, $index);
118
119
		return $client->update($index['index']);
120
	}
121
122
123
	/**
124
	 * @param Client $client
125
	 * @param IFullTextSearchProvider $provider
126
	 * @param IndexDocument $document
127
	 *
128
	 * @return array
0 ignored issues
show
Documentation introduced by
Should the return type not be callable? Also, consider making the array more specific, something like array<String>, or String[].

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

If the return type contains the type array, this check recommends the use of a more specific type like String[] or array<String>.

Loading history...
129
	 * @throws ConfigurationException
130
	 */
131
	public function indexDocumentRemove(
132
		Client $client, IFullTextSearchProvider $provider, IndexDocument $document
133
	) {
134
		$index = [
135
			'index' =>
136
				[
137
					'index' => $this->configService->getElasticIndex(),
138
					'id'    => $document->getId(),
139
					'type'  => $provider->getId()
140
				]
141
		];
142
143
		return $client->delete($index['index']);
144
	}
145
146
147
	/**
148
	 * @param IFullTextSearchPlatform $source
149
	 * @param IFullTextSearchProvider $provider
150
	 * @param IndexDocument $document
151
	 * @param array $arr
152
	 */
153
	public function onIndexingDocument(
154
		IFullTextSearchPlatform $source, IFullTextSearchProvider $provider, IndexDocument $document,
155
		&$arr
156
	) {
157
		if ($document->isContentEncoded() === IndexDocument::ENCODED_BASE64) {
158
			$arr['index']['pipeline'] = 'attachment';
159
		}
160
161
		$provider->onIndexingDocument($source, $arr);
162
	}
163
164
165
	/**
166
	 * @param IndexDocument $document
167
	 *
168
	 * @return array
169
	 */
170
	public function generateIndexBody(IndexDocument $document) {
171
172
		$body = [];
173
		$access = $document->getAccess();
174
		if ($access !== null) {
175
			$body = [
176
				'owner'   => $access->getOwnerId(),
177
				'users'   => $access->getUsers(),
178
				'groups'  => $access->getGroups(),
179
				'circles' => $access->getCircles()
180
			];
181
		}
182
183
		$body['tags'] = $document->getTags();
184
185
		if ($document->getTitle() !== null) {
186
			$body['title'] = $document->getTitle();
187
		}
188
189
		if ($document->getContent() !== null) {
190
			$body['content'] = $document->getContent();
191
		}
192
193
		return array_merge($document->getInfoAll(), $body);
194
	}
195
196
197
	/**
198
	 * @param bool $complete
199
	 *
200
	 * @return array<string,string|array<string,array<string,array<string,array>>>>
201
	 * @throws ConfigurationException
202
	 */
203
	public function generateGlobalMap($complete = true) {
204
205
		$params = [
206
			'index' => $this->configService->getElasticIndex()
207
		];
208
209
		if ($complete === false) {
210
			return $params;
211
		}
212
213
		$params['body'] = [
214
			'settings' => [
215
				'analysis' => [
216
					'filter'      => [
217
						'shingle' => [
218
							'type' => 'shingle'
219
						]
220
					],
221
					'char_filter' => [
222
						'pre_negs'  => [
223
							'type'        => 'pattern_replace',
224
							'pattern'     => '(\\w+)\\s+((?i:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))\\b',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 100 characters; contains 174 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
225
							'replacement' => '~$1 $2'
226
						],
227
						'post_negs' => [
228
							'type'        => 'pattern_replace',
229
							'pattern'     => '\\b((?i:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))\\s+(\\w+)',
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 100 characters; contains 174 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
230
							'replacement' => '$1 ~$2'
231
						]
232
					],
233
					'analyzer'    => [
234
						'analyzer' => [
235
							'type'      => 'custom',
236
							'tokenizer' => 'standard',
237
							'filter'    => ['lowercase', 'stop', 'kstem']
238
						]
239
					]
240
				]
241
			],
242
			'mappings' => [
243
				'_default_' => [
244
					'properties' => [
245
						'title'    => [
246
							'type'        => 'text',
247
							'analyzer'    => 'analyzer',
248
							'term_vector' => 'yes',
249
							'copy_to'     => 'combined'
250
						],
251
						'content'  => [
252
							'type'        => 'text',
253
							'analyzer'    => 'analyzer',
254
							'term_vector' => 'yes',
255
							'copy_to'     => 'combined'
256
						],
257
						'owner'    => [
258
							'type'        => 'text',
259
							'analyzer'    => 'analyzer',
260
							'term_vector' => 'yes',
261
							'copy_to'     => 'combined'
262
						],
263
						'users'    => [
264
							'type'        => 'text',
265
							'analyzer'    => 'analyzer',
266
							'term_vector' => 'yes',
267
							'copy_to'     => 'combined'
268
						],
269
						'groups'   => [
270
							'type'        => 'text',
271
							'analyzer'    => 'analyzer',
272
							'term_vector' => 'yes',
273
							'copy_to'     => 'combined'
274
						],
275
						'circles'  => [
276
							'type'        => 'text',
277
							'analyzer'    => 'analyzer',
278
							'term_vector' => 'yes',
279
							'copy_to'     => 'combined'
280
						],
281
						'combined' => [
282
							'type'        => 'text',
283
							'analyzer'    => 'analyzer',
284
							'term_vector' => 'yes'
285
						]
286
						//						,
287
						//						'topics'   => [
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
288
						//							'type'  => 'text',
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
289
						//							'index' => 'not_analyzed'
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
290
						//						],
291
						//						'places'   => [
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
292
						//							'type'  => 'text',
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
293
						//							'index' => 'not_analyzed'
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
294
						//						]
295
					]
296
				]
297
			]
298
		];
299
300
		return $params;
301
	}
302
303
304
	/**
305
	 * @param bool $complete
306
	 *
307
	 * @return array<string,string|array<string,string|array<string,array<string,string|integer>>>>
0 ignored issues
show
Documentation introduced by
Should the return type not be array<string,string|arra...array<string,array>[]>>?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
308
	 */
309
	public function generateGlobalIngest($complete = true) {
310
311
		$params = ['id' => 'attachment'];
312
313
		if ($complete === false) {
314
			return $params;
315
		}
316
317
		$params['body'] = [
318
			'description' => 'attachment',
319
			'processors'  => [
320
				[
321
					'attachment' => [
322
						'field'         => 'content',
323
						'indexed_chars' => -1
324
					],
325
					'set'        => [
326
						'field' => 'content',
327
						'value' => '{{ attachment.content }}'
328
					],
329
					'remove'     => [
330
						'field'          => 'attachment.content',
331
						'ignore_failure' => true
332
					]
333
				]
334
			]
335
		];
336
337
		return $params;
338
	}
339
340
}
341