Completed
Push — master ( 3f43c1...51db49 )
by Maxence
03:26
created

FilesService::getFileFromIndex()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 5
c 0
b 0
f 0
rs 9.4285
cc 1
eloc 3
nc 1
nop 1
1
<?php
2
/**
3
 * Files_FullTextSearch - Index the content of your files
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\Files_FullTextSearch\Service;
28
29
30
use Exception;
31
use OC\App\AppManager;
32
use OCA\Files_FullTextSearch\Exceptions\EmptyUserException;
33
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException;
34
use OCA\Files_FullTextSearch\Exceptions\FilesNotFoundException;
35
use OCA\Files_FullTextSearch\Exceptions\KnownFileMimeTypeException;
36
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException;
37
use OCA\Files_FullTextSearch\Model\FilesDocument;
38
use OCA\Files_FullTextSearch\Provider\FilesProvider;
39
use OCA\Files_FullTextSearch_Tesseract\Service\TesseractService;
40
use OCA\FullTextSearch\Exceptions\InterruptException;
41
use OCA\FullTextSearch\Exceptions\TickDoesNotExistException;
42
use OCA\FullTextSearch\Model\Index;
43
use OCA\FullTextSearch\Model\IndexDocument;
44
use OCA\FullTextSearch\Model\Runner;
45
use OCP\AppFramework\IAppContainer;
46
use OCP\Files\File;
47
use OCP\Files\FileInfo;
48
use OCP\Files\Folder;
49
use OCP\Files\InvalidPathException;
50
use OCP\Files\IRootFolder;
51
use OCP\Files\Node;
52
use OCP\Files\NotFoundException;
53
use OCP\Files\NotPermittedException;
54
use OCP\Files\StorageNotAvailableException;
55
use OCP\IUserManager;
56
use OCP\Share\IManager;
57
58
class FilesService {
59
60
	const MIMETYPE_TEXT = 'files_text';
61
	const MIMETYPE_PDF = 'files_pdf';
62
	const MIMETYPE_OFFICE = 'files_office';
63
	const MIMETYPE_OCR = 'files_ocr';
64
	const MIMETYPE_IMAGE = 'files_image';
65
	const MIMETYPE_AUDIO = 'files_audio';
66
67
68
	/** @var IAppContainer */
69
	private $container;
70
71
	/** @var IRootFolder */
72
	private $rootFolder;
73
74
	/** @var IUserManager */
75
	private $userManager;
76
77
	/** @var AppManager */
78
	private $appManager;
79
80
	/** @var IManager */
81
	private $shareManager;
82
83
	/** @var ConfigService */
84
	private $configService;
85
86
	/** @var LocalFilesService */
87
	private $localFilesService;
88
89
	/** @var ExternalFilesService */
90
	private $externalFilesService;
91
92
	/** @var GroupFoldersService */
93
	private $groupFoldersService;
94
95
	/** @var MiscService */
96
	private $miscService;
97
98
99
	/**
100
	 * FilesService constructor.
101
	 *
102
	 * @param IAppContainer $container
103
	 * @param IRootFolder $rootFolder
104
	 * @param AppManager $appManager
105
	 * @param IUserManager $userManager
106
	 * @param IManager $shareManager
107
	 * @param ConfigService $configService
108
	 * @param LocalFilesService $localFilesService
109
	 * @param ExternalFilesService $externalFilesService
110
	 * @param GroupFoldersService $groupFoldersService
111
	 * @param MiscService $miscService
112
	 *
113
	 * @internal param IProviderFactory $factory
114
	 */
115
	public function __construct(
116
		IAppContainer $container, IRootFolder $rootFolder, AppManager $appManager,
117
		IUserManager $userManager,
118
		IManager $shareManager,
119
		ConfigService $configService, LocalFilesService $localFilesService,
120
		ExternalFilesService $externalFilesService,
121
		GroupFoldersService $groupFoldersService,
122
		MiscService $miscService
123
	) {
124
		$this->container = $container;
125
		$this->rootFolder = $rootFolder;
126
		$this->appManager = $appManager;
127
		$this->userManager = $userManager;
128
		$this->shareManager = $shareManager;
129
130
		$this->configService = $configService;
131
		$this->localFilesService = $localFilesService;
132
		$this->externalFilesService = $externalFilesService;
133
		$this->groupFoldersService = $groupFoldersService;
134
135
		$this->miscService = $miscService;
136
	}
137
138
139
	/**
140
	 * @param Runner $runner
141
	 * @param string $userId
142
	 *
143
	 * @return FilesDocument[]
144
	 * @throws InterruptException
145
	 * @throws InvalidPathException
146
	 * @throws NotFoundException
147
	 * @throws TickDoesNotExistException
148
	 */
149
	public function getFilesFromUser(Runner $runner, $userId) {
150
151
		$this->initFileSystems($userId);
152
153
		/** @var Folder $files */
154
		$files = $this->rootFolder->getUserFolder($userId)
155
								  ->get('/');
156
		$result = $this->getFilesFromDirectory($runner, $userId, $files);
157
158
		return $result;
159
	}
160
161
162
	/**
163
	 * @param string $userId
164
	 */
165
	private function initFileSystems($userId) {
166
		if ($userId === '') {
167
			return;
168
		}
169
170
		$this->externalFilesService->initExternalFilesForUser($userId);
171
		$this->groupFoldersService->initGroupSharesForUser($userId);
172
	}
173
174
175
	/**
176
	 * @param Runner $runner
177
	 * @param string $userId
178
	 * @param Folder $node
179
	 *
180
	 * @return FilesDocument[]
181
	 * @throws InterruptException
182
	 * @throws InvalidPathException
183
	 * @throws NotFoundException
184
	 * @throws TickDoesNotExistException
185
	 */
186
	public function getFilesFromDirectory(Runner $runner, $userId, Folder $node) {
187
		$documents = [];
188
189
		try {
190
			if ($node->nodeExists('.noindex')) {
191
				return $documents;
192
			}
193
		} catch (StorageNotAvailableException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\StorageNotAvailableException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
194
			return $documents;
195
		}
196
197
		$files = $node->getDirectoryListing();
198
		foreach ($files as $file) {
199
			$runner->update('getFilesFromDirectory');
200
201
			try {
202
				$documents[] = $this->generateFilesDocumentFromFile($file, $userId);
203
			} catch (FileIsNotIndexableException $e) {
204
				continue;
205
			}
206
207
			if ($file->getType() === FileInfo::TYPE_FOLDER) {
208
				/** @var $file Folder */
209
				$documents =
210
					array_merge($documents, $this->getFilesFromDirectory($runner, $userId, $file));
211
			}
212
		}
213
214
		return $documents;
215
	}
216
217
218
	/**
219
	 * @param Node $file
220
	 *
221
	 * @param string $viewerId
222
	 *
223
	 * @return FilesDocument
224
	 * @throws FileIsNotIndexableException
225
	 * @throws InvalidPathException
226
	 * @throws NotFoundException
227
	 * @throws Exception
228
	 */
229
	private function generateFilesDocumentFromFile(Node $file, $viewerId) {
230
231
		$source = $this->getFileSource($file);
232
		$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, $file->getId());
233
234
		$ownerId = '';
235
		if ($file->getOwner() !== null) {
236
			$ownerId = $file->getOwner()
237
							->getUID();
238
		}
239
240
		$document->setType($file->getType())
241
				 ->setSource($source)
242
				 ->setOwnerId($ownerId)
243
				 ->setPath($this->getPathFromViewerId($file->getId(), $viewerId))
244
				 ->setViewerId($viewerId)
245
				 ->setModifiedTime($file->getMTime())
246
				 ->setMimetype($file->getMimetype());
247
248
		return $document;
249
	}
250
251
252
	/**
253
	 * @param Node $file
254
	 *
255
	 * @return string
256
	 * @throws FileIsNotIndexableException
257
	 * @throws NotFoundException
258
	 */
259
	private function getFileSource(Node $file) {
260
		$source = '';
261
262
		try {
263
			$this->localFilesService->getFileSource($file, $source);
264
			$this->externalFilesService->getFileSource($file, $source);
265
			$this->groupFoldersService->getFileSource($file, $source);
266
		} catch (KnownFileSourceException $e) {
267
			/** we know the source, just leave. */
268
		}
269
270
		return $source;
271
	}
272
273
274
	/**
275
	 * @param string $userId
276
	 * @param string $path
277
	 *
278
	 * @return Node
279
	 * @throws NotFoundException
280
	 */
281
	public function getFileFromPath($userId, $path) {
282
		return $this->rootFolder->getUserFolder($userId)
283
								->get($path);
284
	}
285
286
287
	/**
288
	 * @param string $userId
289
	 * @param int $fileId
290
	 *
291
	 * @return Node
292
	 * @throws FilesNotFoundException
293
	 * @throws EmptyUserException
294
	 */
295
	public function getFileFromId($userId, $fileId) {
296
297
		if ($userId === '') {
298
			throw new EmptyUserException();
299
		}
300
301
		$files = $this->rootFolder->getUserFolder($userId)
302
								  ->getById($fileId);
303
		if (sizeof($files) === 0) {
304
			throw new FilesNotFoundException();
305
		}
306
307
		$file = array_shift($files);
308
309
		return $file;
310
	}
311
312
313
	/**
314
	 * @param Index $index
315
	 *
316
	 * @return Node
317
	 * @throws EmptyUserException
318
	 * @throws FilesNotFoundException
319
	 */
320
	public function getFileFromIndex(Index $index) {
321
		$this->impersonateOwner($index);
322
323
		return $this->getFileFromId($index->getOwnerId(), $index->getDocumentId());
324
	}
325
326
327
	/**
328
	 * @param int $fileId
329
	 * @param string $viewerId
330
	 *
331
	 * @throws Exception
332
	 * @return string
333
	 */
334
	private function getPathFromViewerId($fileId, $viewerId) {
335
336
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
337
										->getById($fileId);
338
339
		if (sizeof($viewerFiles) === 0) {
340
			return '';
341
		}
342
343
		$file = array_shift($viewerFiles);
344
345
		// TODO: better way to do this : we remove the '/userid/files/'
346
		$path = MiscService::noEndSlash(substr($file->getPath(), 8 + strlen($viewerId)));
347
348
		return $path;
349
	}
350
351
352
	/**
353
	 * @param FilesDocument[] $documents
354
	 *
355
	 * @return FilesDocument[]
356
	 */
357
	public function generateDocuments($documents) {
358
359
		$index = [];
360
361
		foreach ($documents as $document) {
362
			if (!($document instanceof FilesDocument)) {
363
				continue;
364
			}
365
366
			try {
367
				$this->updateFilesDocument($document);
368
			} catch (Exception $e) {
369
				// TODO - update $document with a error status instead of just ignore !
370
				$document->getIndex()
371
						 ->setStatus(Index::INDEX_IGNORE);
372
				echo 'Exception: ' . json_encode($e->getTrace()) . ' - ' . $e->getMessage()
373
					 . "\n";
374
			}
375
376
			$index[] = $document;
377
		}
378
379
		return $index;
380
	}
381
382
383
	/**
384
	 * @param Index $index
385
	 *
386
	 * @return FilesDocument
387
	 * @throws FileIsNotIndexableException
388
	 * @throws InvalidPathException
389
	 * @throws NotFoundException
390
	 * @throws NotPermittedException
391
	 */
392
	private function generateDocumentFromIndex(Index $index) {
393
394
		try {
395
			$file = $this->getFileFromIndex($index);
396
		} catch (Exception $e) {
397
			$index->setStatus(Index::INDEX_REMOVE);
398
			$document = new FilesDocument($index->getProviderId(), $index->getDocumentId());
399
			$document->setIndex($index);
400
401
			return $document;
402
		}
403
404
		$document = $this->generateFilesDocumentFromFile($file, $index->getOwnerId());
405
		$document->setIndex($index);
406
407
		$this->updateFilesDocumentFromFile($document, $file);
408
409
		return $document;
410
	}
411
412
413
	/**
414
	 * @param IndexDocument $document
415
	 *
416
	 * @return bool
417
	 */
418
	public function isDocumentUpToDate($document) {
419
		$index = $document->getIndex();
420
421
		if (!$this->configService->compareIndexOptions($index)) {
422
			$index->setStatus(Index::INDEX_CONTENT);
423
			$document->setIndex($index);
424
425
			return false;
426
		}
427
428
		if ($index->getStatus() !== Index::INDEX_OK) {
429
			return false;
430
		}
431
432
		if ($index->getLastIndex() >= $document->getModifiedTime()) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $index->getLastIn...ent->getModifiedTime();.
Loading history...
433
			return true;
434
		}
435
436
		return false;
437
	}
438
439
440
	/**
441
	 * @param Index $index
442
	 *
443
	 * @return FilesDocument
0 ignored issues
show
Documentation introduced by
Should the return type not be FilesDocument|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
444
	 * @throws InvalidPathException
445
	 * @throws NotFoundException
446
	 * @throws NotPermittedException
447
	 */
448
	public function updateDocument(Index $index) {
449
		$this->impersonateOwner($index);
450
		$this->initFileSystems($index->getOwnerId());
451
452
		try {
453
			$document = $this->generateDocumentFromIndex($index);
454
455
			return $document;
456
		} catch (FileIsNotIndexableException $e) {
457
			return null;
458
		}
459
	}
460
461
462
	/**
463
	 * @param FilesDocument $document
464
	 *
465
	 * @throws InvalidPathException
466
	 * @throws NotFoundException
467
	 * @throws NotPermittedException
468
	 */
469
	private function updateFilesDocument(FilesDocument $document) {
470
		$userFolder = $this->rootFolder->getUserFolder($document->getViewerId());
471
		$file = $userFolder->get($document->getPath());
472
473
		try {
474
			$this->updateFilesDocumentFromFile($document, $file);
475
		} catch (FileIsNotIndexableException $e) {
476
			$document->getIndex()
477
					 ->setStatus(Index::INDEX_IGNORE);
478
		}
479
	}
480
481
482
	/**
483
	 * @param FilesDocument $document
484
	 * @param Node $file
485
	 *
486
	 * @throws InvalidPathException
487
	 * @throws NotFoundException
488
	 * @throws NotPermittedException
489
	 */
490
	private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) {
491
492
		$document->getIndex()
493
				 ->setSource($document->getSource());
494
495
		$this->updateDocumentAccess($document, $file);
496
		$this->updateContentFromFile($document, $file);
497
498
		$document->addTag($document->getSource());
499
	}
500
501
502
	/**
503
	 * @param FilesDocument $document
504
	 * @param Node $file
505
	 */
506
	private function updateDocumentAccess(FilesDocument $document, Node $file) {
507
508
		$index = $document->getIndex();
509
510
		if (!$index->isStatus(Index::INDEX_FULL)
511
			&& !$index->isStatus(FilesDocument::STATUS_FILE_ACCESS)) {
512
			return;
513
		}
514
515
		$this->localFilesService->updateDocumentAccess($document, $file);
516
		$this->externalFilesService->updateDocumentAccess($document, $file);
517
		$this->groupFoldersService->updateDocumentAccess($document, $file);
518
519
		$this->updateShareNames($document, $file);
520
	}
521
522
523
	/**
524
	 * @param FilesDocument $document
525
	 * @param Node $file
526
	 *
527
	 * @throws InvalidPathException
528
	 * @throws NotFoundException
529
	 * @throws NotPermittedException
530
	 */
531
	private function updateContentFromFile(FilesDocument $document, Node $file) {
532
533
		$document->setTitle($document->getPath());
534
535
		if (!$document->getIndex()
536
					  ->isStatus(Index::INDEX_CONTENT)
537
			|| $file->getType() !== FileInfo::TYPE_FILE) {
538
			return;
539
		}
540
541
		/** @var File $file */
542
		if ($file->getSize() <
543
			($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) {
544
			$this->extractContentFromFileText($document, $file);
545
			$this->extractContentFromFileOffice($document, $file);
546
			$this->extractContentFromFilePDF($document, $file);
547
			$this->extractContentFromFileOCR($document, $file);
548
		}
549
550
		if ($document->getContent() === null) {
551
			$document->getIndex()
552
					 ->unsetStatus(Index::INDEX_CONTENT);
553
		}
554
	}
555
556
557
	/**
558
	 * @param FilesDocument $document
559
	 * @param Node $file
560
	 *
561
	 * @return array
562
	 */
563
	private function updateShareNames(FilesDocument $document, Node $file) {
564
565
		$users = [];
566
567
		$this->localFilesService->getShareUsersFromFile($file, $users);
568
		$this->externalFilesService->getShareUsers($document, $users);
569
		$this->groupFoldersService->getShareUsers($document, $users);
570
571
		$shareNames = [];
572
		foreach ($users as $username) {
573
			try {
574
				$user = $this->userManager->get($username);
575
				if ($user === null || $user->getLastLogin() === 0) {
576
					continue;
577
				}
578
579
				$shareNames[MiscService::secureUsername($username)] =
580
					$this->getPathFromViewerId($file->getId(), $username);
581
			} catch (Exception $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
582
			}
583
		}
584
585
		$document->setInfo('share_names', $shareNames);
586
587
		return $shareNames;
588
	}
589
590
591
	/**
592
	 * @param string $mimeType
593
	 *
594
	 * @return string
595
	 */
596
	private function parseMimeType($mimeType) {
597
598
		$parsed = '';
599
		try {
600
			$this->parseMimeTypeText($mimeType, $parsed);
601
			$this->parseMimeTypePDF($mimeType, $parsed);
602
			$this->parseMimeTypeOffice($mimeType, $parsed);
603
		} catch (KnownFileMimeTypeException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
604
		}
605
606
		return $parsed;
607
	}
608
609
610
	/**
611
	 * @param string $mimeType
612
	 * @param string $parsed
613
	 *
614
	 * @throws KnownFileMimeTypeException
615
	 */
616
	private function parseMimeTypeText($mimeType, &$parsed) {
617
618
		if (substr($mimeType, 0, 5) === 'text/') {
619
			$parsed = self::MIMETYPE_TEXT;
620
			throw new KnownFileMimeTypeException();
621
		}
622
623
		$textMimes = [
624
			'application/epub+zip'
625
		];
626
627 View Code Duplication
		foreach ($textMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
628
			if (strpos($mimeType, $mime) === 0) {
629
				$parsed = self::MIMETYPE_TEXT;
630
				throw new KnownFileMimeTypeException();
631
			}
632
		}
633
	}
634
635
636
	/**
637
	 * @param string $mimeType
638
	 * @param string $parsed
639
	 *
640
	 * @throws KnownFileMimeTypeException
641
	 */
642
	private function parseMimeTypePDF($mimeType, &$parsed) {
643
644
		if ($mimeType === 'application/pdf') {
645
			$parsed = self::MIMETYPE_PDF;
646
			throw new KnownFileMimeTypeException();
647
		}
648
	}
649
650
651
	/**
652
	 * @param string $mimeType
653
	 * @param string $parsed
654
	 *
655
	 * @throws KnownFileMimeTypeException
656
	 */
657
	private function parseMimeTypeOffice($mimeType, &$parsed) {
658
659
		$officeMimes = [
660
			'application/msword',
661
			'application/vnd.oasis.opendocument',
662
			'application/vnd.sun.xml',
663
			'application/vnd.openxmlformats-officedocument',
664
			'application/vnd.ms-word',
665
			'application/vnd.ms-powerpoint',
666
			'application/vnd.ms-excel'
667
		];
668
669 View Code Duplication
		foreach ($officeMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
670
			if (strpos($mimeType, $mime) === 0) {
671
				$parsed = self::MIMETYPE_OFFICE;
672
				throw new KnownFileMimeTypeException();
673
			}
674
		}
675
	}
676
677
678
	/**
679
	 * @param FilesDocument $document
680
	 * @param File $file
681
	 *
682
	 * @throws NotPermittedException
683
	 */
684
	private function extractContentFromFileText(FilesDocument $document, File $file) {
685
686
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_TEXT) {
687
			return;
688
		}
689
690
		if (!$this->isSourceIndexable($document)) {
691
			return;
692
		}
693
694
		$document->setContent(
695
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
696
		);
697
	}
698
699
700
	/**
701
	 * @param FilesDocument $document
702
	 * @param File $file
703
	 *
704
	 * @throws NotPermittedException
705
	 */
706 View Code Duplication
	private function extractContentFromFilePDF(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
707
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_PDF) {
708
			return;
709
		}
710
711
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_PDF);
712
		if (!$this->isSourceIndexable($document)) {
713
			return;
714
		}
715
716
		if ($this->configService->getAppValue(ConfigService::FILES_PDF) !== '1') {
717
			$document->setContent('');
718
719
			return;
720
		}
721
722
		$document->setContent(
723
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
724
		);
725
	}
726
727
728
	/**
729
	 * @param FilesDocument $document
730
	 * @param File $file
731
	 *
732
	 * @throws NotPermittedException
733
	 */
734 View Code Duplication
	private function extractContentFromFileOffice(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
735
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_OFFICE) {
736
			return;
737
		}
738
739
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OFFICE);
740
		if (!$this->isSourceIndexable($document)) {
741
			return;
742
		}
743
744
		if ($this->configService->getAppValue(ConfigService::FILES_OFFICE) !== '1') {
745
			$document->setContent('');
746
747
			return;
748
		}
749
750
		$document->setContent(
751
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
752
		);
753
	}
754
755
756
	/**
757
	 * @param FilesDocument $document
758
	 * @param File $file
759
	 */
760
	private function extractContentFromFileOCR(FilesDocument $document, File $file) {
761
		if ($this->configService->getAppValue(ConfigService::FILES_OCR) !== '1') {
762
			return;
763
		}
764
765
		if ($document->getContent() !== '' && $document->getContent() !== null) {
766
			return;
767
		}
768
769
		$document->setContent('');
770
		$this->extractContentUsingTesseractOCR($document, $file);
771
	}
772
773
774
	/**
775
	 * @param FilesDocument $document
776
	 * @param File $file
777
	 */
778
	private function extractContentUsingTesseractOCR(FilesDocument $document, File $file) {
779
		try {
780
			$tesseractService = $this->container->query(TesseractService::class);
781
			$extension = pathinfo($document->getPath(), PATHINFO_EXTENSION);
782
783
			if (!$tesseractService->parsedMimeType($document->getMimetype(), $extension)) {
784
				return;
785
			}
786
787
			$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OCR);
788
			if (!$this->isSourceIndexable($document)) {
789
				return;
790
			}
791
792
			$content = $tesseractService->ocrFile($file);
793
		} catch (Exception $e) {
794
			return;
795
		}
796
797
		$document->setContent(base64_encode($content), IndexDocument::ENCODED_BASE64);
798
	}
799
800
801
	/**
802
	 * @param FilesDocument $document
803
	 *
804
	 * @return bool
805
	 */
806
	private function isSourceIndexable(FilesDocument $document) {
807
		$this->configService->setDocumentIndexOption($document, $document->getSource());
808
		if ($this->configService->getAppValue($document->getSource()) !== '1') {
809
			$document->setContent('');
810
811
			return false;
812
		}
813
814
		return true;
815
	}
816
817
818
	private function impersonateOwner(Index $index) {
819
		if ($index->getOwnerId() !== '') {
820
			return;
821
		}
822
823
		$this->groupFoldersService->impersonateOwner($index);
824
		$this->externalFilesService->impersonateOwner($index);
825
	}
826
827
}
828
829