Completed
Push — master ( d04ae6...69a1ae )
by Maxence
02:17
created

FilesService   F

Complexity

Total Complexity 84

Size/Duplication

Total Lines 773
Duplicated Lines 6.21 %

Coupling/Cohesion

Components 1
Dependencies 9

Importance

Changes 0
Metric Value
wmc 84
lcom 1
cbo 9
dl 48
loc 773
rs 1.827
c 0
b 0
f 0

30 Methods

Rating   Name   Duplication   Size   Complexity  
A getFilesFromUser() 0 11 1
A getFileSource() 0 13 2
A getFileFromPath() 0 4 1
A getFileFromIndex() 0 5 1
A getPathFromViewerId() 0 16 2
A updateFilesDocument() 0 11 2
A updateDocumentAccess() 0 15 3
A __construct() 0 22 1
A initFileSystems() 0 8 2
B getFilesFromDirectory() 0 30 6
A generateFilesDocumentFromFile() 0 21 2
A getFileFromId() 0 16 3
A generateDocuments() 0 24 4
A generateDocumentFromIndex() 0 19 2
A isDocumentUpToDate() 0 20 4
A updateDocument() 0 12 2
A updateFilesDocumentFromFile() 0 10 1
A updateContentFromFile() 0 24 5
B updateShareNames() 0 29 6
A parseMimeType() 0 12 2
A parseMimeTypeText() 6 18 4
A parseMimeTypePDF() 0 7 2
A parseMimeTypeOffice() 6 19 3
A extractContentFromFileText() 0 14 3
A extractContentFromFilePDF() 18 20 4
A extractContentFromFileOffice() 18 20 4
A extractContentFromFileOCR() 0 12 4
A extractContentUsingTesseractOCR() 0 21 4
A isSourceIndexable() 0 10 2
A impersonateOwner() 0 8 2

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complex Class

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like FilesService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use FilesService, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * Files_FullTextSearch - Index the content of your files
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\Files_FullTextSearch\Service;
28
29
30
use Exception;
31
use OC\App\AppManager;
32
use OCA\Files_FullTextSearch\Exceptions\EmptyUserException;
33
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException;
34
use OCA\Files_FullTextSearch\Exceptions\FilesNotFoundException;
35
use OCA\Files_FullTextSearch\Exceptions\KnownFileMimeTypeException;
36
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException;
37
use OCA\Files_FullTextSearch\Model\FilesDocument;
38
use OCA\Files_FullTextSearch\Provider\FilesProvider;
39
use OCA\Files_FullTextSearch_Tesseract\Service\TesseractService;
40
use OCA\FullTextSearch\Exceptions\InterruptException;
41
use OCA\FullTextSearch\Exceptions\TickDoesNotExistException;
42
use OCA\FullTextSearch\Model\Index;
43
use OCA\FullTextSearch\Model\IndexDocument;
44
use OCA\FullTextSearch\Model\Runner;
45
use OCP\AppFramework\IAppContainer;
46
use OCP\Files\File;
47
use OCP\Files\FileInfo;
48
use OCP\Files\Folder;
49
use OCP\Files\InvalidPathException;
50
use OCP\Files\IRootFolder;
51
use OCP\Files\Node;
52
use OCP\Files\NotFoundException;
53
use OCP\Files\NotPermittedException;
54
use OCP\Files\StorageNotAvailableException;
55
use OCP\IUserManager;
56
use OCP\Share\IManager;
57
58
class FilesService {
59
60
	const MIMETYPE_TEXT = 'files_text';
61
	const MIMETYPE_PDF = 'files_pdf';
62
	const MIMETYPE_OFFICE = 'files_office';
63
	const MIMETYPE_OCR = 'files_ocr';
64
	const MIMETYPE_IMAGE = 'files_image';
65
	const MIMETYPE_AUDIO = 'files_audio';
66
67
68
	/** @var IAppContainer */
69
	private $container;
70
71
	/** @var IRootFolder */
72
	private $rootFolder;
73
74
	/** @var IUserManager */
75
	private $userManager;
76
77
	/** @var AppManager */
78
	private $appManager;
79
80
	/** @var IManager */
81
	private $shareManager;
82
83
	/** @var ConfigService */
84
	private $configService;
85
86
	/** @var LocalFilesService */
87
	private $localFilesService;
88
89
	/** @var ExternalFilesService */
90
	private $externalFilesService;
91
92
	/** @var GroupFoldersService */
93
	private $groupFoldersService;
94
95
	/** @var MiscService */
96
	private $miscService;
97
98
99
	/**
100
	 * FilesService constructor.
101
	 *
102
	 * @param IAppContainer $container
103
	 * @param IRootFolder $rootFolder
104
	 * @param AppManager $appManager
105
	 * @param IUserManager $userManager
106
	 * @param IManager $shareManager
107
	 * @param ConfigService $configService
108
	 * @param LocalFilesService $localFilesService
109
	 * @param ExternalFilesService $externalFilesService
110
	 * @param GroupFoldersService $groupFoldersService
111
	 * @param MiscService $miscService
112
	 *
113
	 * @internal param IProviderFactory $factory
114
	 */
115
	public function __construct(
116
		IAppContainer $container, IRootFolder $rootFolder, AppManager $appManager,
117
		IUserManager $userManager,
118
		IManager $shareManager,
119
		ConfigService $configService, LocalFilesService $localFilesService,
120
		ExternalFilesService $externalFilesService,
121
		GroupFoldersService $groupFoldersService,
122
		MiscService $miscService
123
	) {
124
		$this->container = $container;
125
		$this->rootFolder = $rootFolder;
126
		$this->appManager = $appManager;
127
		$this->userManager = $userManager;
128
		$this->shareManager = $shareManager;
129
130
		$this->configService = $configService;
131
		$this->localFilesService = $localFilesService;
132
		$this->externalFilesService = $externalFilesService;
133
		$this->groupFoldersService = $groupFoldersService;
134
135
		$this->miscService = $miscService;
136
	}
137
138
139
	/**
140
	 * @param Runner $runner
141
	 * @param string $userId
142
	 *
143
	 * @return FilesDocument[]
144
	 * @throws InterruptException
145
	 * @throws InvalidPathException
146
	 * @throws NotFoundException
147
	 * @throws TickDoesNotExistException
148
	 */
149
	public function getFilesFromUser(Runner $runner, $userId) {
150
151
		$this->initFileSystems($userId);
152
153
		/** @var Folder $files */
154
		$files = $this->rootFolder->getUserFolder($userId)
155
								  ->get('/');
156
		$result = $this->getFilesFromDirectory($runner, $userId, $files);
157
158
		return $result;
159
	}
160
161
162
	/**
163
	 * @param string $userId
164
	 */
165
	private function initFileSystems($userId) {
166
		if ($userId === '') {
167
			return;
168
		}
169
170
		$this->externalFilesService->initExternalFilesForUser($userId);
171
		$this->groupFoldersService->initGroupSharesForUser($userId);
172
	}
173
174
175
	/**
176
	 * @param Runner $runner
177
	 * @param string $userId
178
	 * @param Folder $node
179
	 *
180
	 * @return FilesDocument[]
181
	 * @throws InterruptException
182
	 * @throws InvalidPathException
183
	 * @throws NotFoundException
184
	 * @throws TickDoesNotExistException
185
	 */
186
	public function getFilesFromDirectory(Runner $runner, $userId, Folder $node) {
187
		$documents = [];
188
189
		try {
190
			if ($node->nodeExists('.noindex')) {
191
				return $documents;
192
			}
193
		} catch (StorageNotAvailableException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\StorageNotAvailableException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
194
			return $documents;
195
		}
196
197
		$files = $node->getDirectoryListing();
198
		foreach ($files as $file) {
199
			$runner->update('getFilesFromDirectory');
200
201
			try {
202
				$documents[] = $this->generateFilesDocumentFromFile($file, $userId);
203
			} catch (FileIsNotIndexableException $e) {
204
				continue;
205
			}
206
207
			if ($file->getType() === FileInfo::TYPE_FOLDER) {
208
				/** @var $file Folder */
209
				$documents =
210
					array_merge($documents, $this->getFilesFromDirectory($runner, $userId, $file));
211
			}
212
		}
213
214
		return $documents;
215
	}
216
217
218
	/**
219
	 * @param Node $file
220
	 *
221
	 * @param string $viewerId
222
	 *
223
	 * @return FilesDocument
224
	 * @throws FileIsNotIndexableException
225
	 * @throws InvalidPathException
226
	 * @throws NotFoundException
227
	 * @throws Exception
228
	 */
229
	private function generateFilesDocumentFromFile(Node $file, $viewerId) {
230
231
		$source = $this->getFileSource($file);
232
		$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, $file->getId());
233
234
		$ownerId = '';
235
		if ($file->getOwner() !== null) {
236
			$ownerId = $file->getOwner()
237
							->getUID();
238
		}
239
240
		$document->setType($file->getType())
241
				 ->setSource($source)
242
				 ->setOwnerId($ownerId)
243
				 ->setPath($this->getPathFromViewerId($file->getId(), $viewerId))
244
				 ->setViewerId($viewerId)
245
				 ->setModifiedTime($file->getMTime())
246
				 ->setMimetype($file->getMimetype());
247
248
		return $document;
249
	}
250
251
252
	/**
253
	 * @param Node $file
254
	 *
255
	 * @return string
256
	 * @throws FileIsNotIndexableException
257
	 * @throws NotFoundException
258
	 */
259
	private function getFileSource(Node $file) {
260
		$source = '';
261
262
		try {
263
			$this->localFilesService->getFileSource($file, $source);
264
			$this->externalFilesService->getFileSource($file, $source);
265
			$this->groupFoldersService->getFileSource($file, $source);
266
		} catch (KnownFileSourceException $e) {
267
			/** we know the source, just leave. */
268
		}
269
270
		return $source;
271
	}
272
273
274
	/**
275
	 * @param string $userId
276
	 * @param string $path
277
	 *
278
	 * @return Node
279
	 * @throws NotFoundException
280
	 */
281
	public function getFileFromPath($userId, $path) {
282
		return $this->rootFolder->getUserFolder($userId)
283
								->get($path);
284
	}
285
286
287
	/**
288
	 * @param string $userId
289
	 * @param int $fileId
290
	 *
291
	 * @return Node
292
	 * @throws FilesNotFoundException
293
	 * @throws EmptyUserException
294
	 */
295
	public function getFileFromId($userId, $fileId) {
296
297
		if ($userId === '') {
298
			throw new EmptyUserException();
299
		}
300
301
		$files = $this->rootFolder->getUserFolder($userId)
302
								  ->getById($fileId);
303
		if (sizeof($files) === 0) {
304
			throw new FilesNotFoundException();
305
		}
306
307
		$file = array_shift($files);
308
309
		return $file;
310
	}
311
312
313
	/**
314
	 * @param Index $index
315
	 *
316
	 * @return Node
317
	 * @throws EmptyUserException
318
	 * @throws FilesNotFoundException
319
	 */
320
	public function getFileFromIndex(Index $index) {
321
		$this->impersonateOwner($index);
322
323
		return $this->getFileFromId($index->getOwnerId(), $index->getDocumentId());
324
	}
325
326
327
	/**
328
	 * @param int $fileId
329
	 * @param string $viewerId
330
	 *
331
	 * @throws Exception
332
	 * @return string
333
	 */
334
	private function getPathFromViewerId($fileId, $viewerId) {
335
336
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
337
										->getById($fileId);
338
339
		if (sizeof($viewerFiles) === 0) {
340
			return '';
341
		}
342
343
		$file = array_shift($viewerFiles);
344
345
		// TODO: better way to do this : we remove the '/userid/files/'
346
		$path = MiscService::noEndSlash(substr($file->getPath(), 8 + strlen($viewerId)));
347
348
		return $path;
349
	}
350
351
352
	/**
353
	 * @param FilesDocument[] $documents
354
	 *
355
	 * @return FilesDocument[]
356
	 */
357
	public function generateDocuments($documents) {
358
359
		$index = [];
360
361
		foreach ($documents as $document) {
362
			if (!($document instanceof FilesDocument)) {
363
				continue;
364
			}
365
366
			try {
367
				$this->updateFilesDocument($document);
368
			} catch (Exception $e) {
369
				// TODO - update $document with a error status instead of just ignore !
370
				$document->getIndex()
371
						 ->setStatus(Index::INDEX_IGNORE);
372
				echo 'Exception: ' . json_encode($e->getTrace()) . ' - ' . $e->getMessage()
373
					 . "\n";
374
			}
375
376
			$index[] = $document;
377
		}
378
379
		return $index;
380
	}
381
382
383
	/**
384
	 * @param Index $index
385
	 *
386
	 * @return FilesDocument
387
	 * @throws FileIsNotIndexableException
388
	 * @throws InvalidPathException
389
	 * @throws NotFoundException
390
	 * @throws NotPermittedException
391
	 */
392
	private function generateDocumentFromIndex(Index $index) {
393
394
		try {
395
			$file = $this->getFileFromIndex($index);
396
		} catch (Exception $e) {
397
			$index->setStatus(Index::INDEX_REMOVE);
398
			$document = new FilesDocument($index->getProviderId(), $index->getDocumentId());
399
			$document->setIndex($index);
400
401
			return $document;
402
		}
403
404
		$document = $this->generateFilesDocumentFromFile($file, $index->getOwnerId());
405
		$document->setIndex($index);
406
407
		$this->updateFilesDocumentFromFile($document, $file);
408
409
		return $document;
410
	}
411
412
413
	/**
414
	 * @param IndexDocument $document
415
	 *
416
	 * @return bool
417
	 */
418
	public function isDocumentUpToDate($document) {
419
		$index = $document->getIndex();
420
421
		if (!$this->configService->compareIndexOptions($index)) {
422
			$index->setStatus(Index::INDEX_CONTENT);
423
			$document->setIndex($index);
424
425
			return false;
426
		}
427
428
		if ($index->getStatus() !== Index::INDEX_OK) {
429
			return false;
430
		}
431
432
		if ($index->getLastIndex() >= $document->getModifiedTime()) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $index->getLastIn...ent->getModifiedTime();.
Loading history...
433
			return true;
434
		}
435
436
		return false;
437
	}
438
439
440
	/**
441
	 * @param Index $index
442
	 *
443
	 * @return FilesDocument
0 ignored issues
show
Documentation introduced by
Should the return type not be FilesDocument|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
444
	 * @throws InvalidPathException
445
	 * @throws NotFoundException
446
	 * @throws NotPermittedException
447
	 */
448
	public function updateDocument(Index $index) {
449
		$this->impersonateOwner($index);
450
		$this->initFileSystems($index->getOwnerId());
451
452
		try {
453
			$document = $this->generateDocumentFromIndex($index);
454
455
			return $document;
456
		} catch (FileIsNotIndexableException $e) {
457
			return null;
458
		}
459
	}
460
461
462
	/**
463
	 * @param FilesDocument $document
464
	 *
465
	 * @throws InvalidPathException
466
	 * @throws NotFoundException
467
	 * @throws NotPermittedException
468
	 */
469
	private function updateFilesDocument(FilesDocument $document) {
470
		$userFolder = $this->rootFolder->getUserFolder($document->getViewerId());
471
		$file = $userFolder->get($document->getPath());
472
473
		try {
474
			$this->updateFilesDocumentFromFile($document, $file);
475
		} catch (FileIsNotIndexableException $e) {
476
			$document->getIndex()
477
					 ->setStatus(Index::INDEX_IGNORE);
478
		}
479
	}
480
481
482
	/**
483
	 * @param FilesDocument $document
484
	 * @param Node $file
485
	 *
486
	 * @throws InvalidPathException
487
	 * @throws NotFoundException
488
	 * @throws NotPermittedException
489
	 */
490
	private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) {
491
492
		$document->getIndex()
493
				 ->setSource($document->getSource());
494
495
		$this->updateDocumentAccess($document, $file);
496
		$this->updateContentFromFile($document, $file);
497
498
		$document->addTag($document->getSource());
499
	}
500
501
502
	/**
503
	 * @param FilesDocument $document
504
	 * @param Node $file
505
	 */
506
	private function updateDocumentAccess(FilesDocument $document, Node $file) {
507
508
		$index = $document->getIndex();
509
510
		if (!$index->isStatus(Index::INDEX_FULL)
511
			&& !$index->isStatus(FilesDocument::STATUS_FILE_ACCESS)) {
512
			return;
513
		}
514
515
		$this->localFilesService->updateDocumentAccess($document, $file);
516
		$this->externalFilesService->updateDocumentAccess($document, $file);
517
		$this->groupFoldersService->updateDocumentAccess($document, $file);
518
519
		$this->updateShareNames($document, $file);
520
	}
521
522
523
	/**
524
	 * @param FilesDocument $document
525
	 * @param Node $file
526
	 *
527
	 * @throws InvalidPathException
528
	 * @throws NotFoundException
529
	 * @throws NotPermittedException
530
	 */
531
	private function updateContentFromFile(FilesDocument $document, Node $file) {
532
533
		$document->setTitle($document->getPath());
534
535
		if (!$document->getIndex()
536
					  ->isStatus(Index::INDEX_CONTENT)
537
			|| $file->getType() !== FileInfo::TYPE_FILE) {
538
			return;
539
		}
540
541
		/** @var File $file */
542
		if ($file->getSize() <
543
			($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) {
544
			$this->extractContentFromFileText($document, $file);
545
			$this->extractContentFromFileOffice($document, $file);
546
			$this->extractContentFromFilePDF($document, $file);
547
			$this->extractContentFromFileOCR($document, $file);
548
		}
549
550
		if ($document->getContent() === null) {
551
			$document->getIndex()
552
					 ->unsetStatus(Index::INDEX_CONTENT);
553
		}
554
	}
555
556
557
	/**
558
	 * @param FilesDocument $document
559
	 * @param Node $file
560
	 *
561
	 * @return array
562
	 */
563
	private function updateShareNames(FilesDocument $document, Node $file) {
564
565
		$users = [];
566
567
		$this->localFilesService->getShareUsersFromFile($file, $users);
568
		$this->externalFilesService->getShareUsers($document, $users);
569
		$this->groupFoldersService->getShareUsers($document, $users);
570
571
		$shareNames = [];
572
		foreach ($users as $username) {
573
			try {
574
				$user = $this->userManager->get($username);
575
				if ($user === null || $user->getLastLogin() === 0) {
576
					continue;
577
				}
578
579
				$path = $this->getPathFromViewerId($file->getId(), $username);
580
				$shareNames[MiscService::secureUsername($username)] =
581
					(!is_string($path)) ? $path = '' : $path;
582
583
			} catch (Exception $e) {
584
				$this->miscService->log('Issue while getting information on documentId:' . $document->getId(), 0);
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 100 characters; contains 102 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
585
			}
586
		}
587
588
		$document->setInfo('share_names', $shareNames);
589
590
		return $shareNames;
591
	}
592
593
594
	/**
595
	 * @param string $mimeType
596
	 *
597
	 * @return string
598
	 */
599
	private function parseMimeType($mimeType) {
600
601
		$parsed = '';
602
		try {
603
			$this->parseMimeTypeText($mimeType, $parsed);
604
			$this->parseMimeTypePDF($mimeType, $parsed);
605
			$this->parseMimeTypeOffice($mimeType, $parsed);
606
		} catch (KnownFileMimeTypeException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
607
		}
608
609
		return $parsed;
610
	}
611
612
613
	/**
614
	 * @param string $mimeType
615
	 * @param string $parsed
616
	 *
617
	 * @throws KnownFileMimeTypeException
618
	 */
619
	private function parseMimeTypeText($mimeType, &$parsed) {
620
621
		if (substr($mimeType, 0, 5) === 'text/') {
622
			$parsed = self::MIMETYPE_TEXT;
623
			throw new KnownFileMimeTypeException();
624
		}
625
626
		$textMimes = [
627
			'application/epub+zip'
628
		];
629
630 View Code Duplication
		foreach ($textMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
631
			if (strpos($mimeType, $mime) === 0) {
632
				$parsed = self::MIMETYPE_TEXT;
633
				throw new KnownFileMimeTypeException();
634
			}
635
		}
636
	}
637
638
639
	/**
640
	 * @param string $mimeType
641
	 * @param string $parsed
642
	 *
643
	 * @throws KnownFileMimeTypeException
644
	 */
645
	private function parseMimeTypePDF($mimeType, &$parsed) {
646
647
		if ($mimeType === 'application/pdf') {
648
			$parsed = self::MIMETYPE_PDF;
649
			throw new KnownFileMimeTypeException();
650
		}
651
	}
652
653
654
	/**
655
	 * @param string $mimeType
656
	 * @param string $parsed
657
	 *
658
	 * @throws KnownFileMimeTypeException
659
	 */
660
	private function parseMimeTypeOffice($mimeType, &$parsed) {
661
662
		$officeMimes = [
663
			'application/msword',
664
			'application/vnd.oasis.opendocument',
665
			'application/vnd.sun.xml',
666
			'application/vnd.openxmlformats-officedocument',
667
			'application/vnd.ms-word',
668
			'application/vnd.ms-powerpoint',
669
			'application/vnd.ms-excel'
670
		];
671
672 View Code Duplication
		foreach ($officeMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
673
			if (strpos($mimeType, $mime) === 0) {
674
				$parsed = self::MIMETYPE_OFFICE;
675
				throw new KnownFileMimeTypeException();
676
			}
677
		}
678
	}
679
680
681
	/**
682
	 * @param FilesDocument $document
683
	 * @param File $file
684
	 *
685
	 * @throws NotPermittedException
686
	 */
687
	private function extractContentFromFileText(FilesDocument $document, File $file) {
688
689
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_TEXT) {
690
			return;
691
		}
692
693
		if (!$this->isSourceIndexable($document)) {
694
			return;
695
		}
696
697
		$document->setContent(
698
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
699
		);
700
	}
701
702
703
	/**
704
	 * @param FilesDocument $document
705
	 * @param File $file
706
	 *
707
	 * @throws NotPermittedException
708
	 */
709 View Code Duplication
	private function extractContentFromFilePDF(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
710
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_PDF) {
711
			return;
712
		}
713
714
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_PDF);
715
		if (!$this->isSourceIndexable($document)) {
716
			return;
717
		}
718
719
		if ($this->configService->getAppValue(ConfigService::FILES_PDF) !== '1') {
720
			$document->setContent('');
721
722
			return;
723
		}
724
725
		$document->setContent(
726
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
727
		);
728
	}
729
730
731
	/**
732
	 * @param FilesDocument $document
733
	 * @param File $file
734
	 *
735
	 * @throws NotPermittedException
736
	 */
737 View Code Duplication
	private function extractContentFromFileOffice(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
738
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_OFFICE) {
739
			return;
740
		}
741
742
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OFFICE);
743
		if (!$this->isSourceIndexable($document)) {
744
			return;
745
		}
746
747
		if ($this->configService->getAppValue(ConfigService::FILES_OFFICE) !== '1') {
748
			$document->setContent('');
749
750
			return;
751
		}
752
753
		$document->setContent(
754
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
755
		);
756
	}
757
758
759
	/**
760
	 * @param FilesDocument $document
761
	 * @param File $file
762
	 */
763
	private function extractContentFromFileOCR(FilesDocument $document, File $file) {
764
		if ($this->configService->getAppValue(ConfigService::FILES_OCR) !== '1') {
765
			return;
766
		}
767
768
		if ($document->getContent() !== '' && $document->getContent() !== null) {
769
			return;
770
		}
771
772
		$document->setContent('');
773
		$this->extractContentUsingTesseractOCR($document, $file);
774
	}
775
776
777
	/**
778
	 * @param FilesDocument $document
779
	 * @param File $file
780
	 */
781
	private function extractContentUsingTesseractOCR(FilesDocument $document, File $file) {
782
		try {
783
			$tesseractService = $this->container->query(TesseractService::class);
784
			$extension = pathinfo($document->getPath(), PATHINFO_EXTENSION);
785
786
			if (!$tesseractService->parsedMimeType($document->getMimetype(), $extension)) {
787
				return;
788
			}
789
790
			$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OCR);
791
			if (!$this->isSourceIndexable($document)) {
792
				return;
793
			}
794
795
			$content = $tesseractService->ocrFile($file);
796
		} catch (Exception $e) {
797
			return;
798
		}
799
800
		$document->setContent(base64_encode($content), IndexDocument::ENCODED_BASE64);
801
	}
802
803
804
	/**
805
	 * @param FilesDocument $document
806
	 *
807
	 * @return bool
808
	 */
809
	private function isSourceIndexable(FilesDocument $document) {
810
		$this->configService->setDocumentIndexOption($document, $document->getSource());
811
		if ($this->configService->getAppValue($document->getSource()) !== '1') {
812
			$document->setContent('');
813
814
			return false;
815
		}
816
817
		return true;
818
	}
819
820
821
	private function impersonateOwner(Index $index) {
822
		if ($index->getOwnerId() !== '') {
823
			return;
824
		}
825
826
		$this->groupFoldersService->impersonateOwner($index);
827
		$this->externalFilesService->impersonateOwner($index);
828
	}
829
830
}
831
832