Completed
Push — master ( 324e78...e457da )
by Maxence
01:39
created

FilesService::getPathFromViewerId()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 16
rs 9.7333
cc 2
nc 2
nop 2
1
<?php
2
/**
3
 * Files_FullTextSearch - Index the content of your files
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\Files_FullTextSearch\Service;
28
29
30
use Exception;
31
use OC\App\AppManager;
32
use OCA\Files_FullTextSearch\Exceptions\EmptyUserException;
33
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException;
34
use OCA\Files_FullTextSearch\Exceptions\FilesNotFoundException;
35
use OCA\Files_FullTextSearch\Exceptions\KnownFileMimeTypeException;
36
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException;
37
use OCA\Files_FullTextSearch\Model\FilesDocument;
38
use OCA\Files_FullTextSearch\Provider\FilesProvider;
39
use OCA\Files_FullTextSearch_Tesseract\Service\TesseractService;
40
use OCA\FullTextSearch\Exceptions\InterruptException;
41
use OCA\FullTextSearch\Exceptions\TickDoesNotExistException;
42
use OCA\FullTextSearch\Model\Index;
43
use OCA\FullTextSearch\Model\IndexDocument;
44
use OCA\FullTextSearch\Model\IndexOptions;
45
use OCA\FullTextSearch\Model\Runner;
46
use OCP\AppFramework\IAppContainer;
47
use OCP\Files\File;
48
use OCP\Files\FileInfo;
49
use OCP\Files\Folder;
50
use OCP\Files\InvalidPathException;
51
use OCP\Files\IRootFolder;
52
use OCP\Files\Node;
53
use OCP\Files\NotFoundException;
54
use OCP\Files\NotPermittedException;
55
use OCP\Files\StorageNotAvailableException;
56
use OCP\IUserManager;
57
use OCP\Share\IManager;
58
59
class FilesService {
60
61
	const MIMETYPE_TEXT = 'files_text';
62
	const MIMETYPE_PDF = 'files_pdf';
63
	const MIMETYPE_OFFICE = 'files_office';
64
	const MIMETYPE_OCR = 'files_ocr';
65
	const MIMETYPE_IMAGE = 'files_image';
66
	const MIMETYPE_AUDIO = 'files_audio';
67
68
69
	/** @var IAppContainer */
70
	private $container;
71
72
	/** @var IRootFolder */
73
	private $rootFolder;
74
75
	/** @var IUserManager */
76
	private $userManager;
77
78
	/** @var AppManager */
79
	private $appManager;
80
81
	/** @var IManager */
82
	private $shareManager;
83
84
	/** @var ConfigService */
85
	private $configService;
86
87
	/** @var LocalFilesService */
88
	private $localFilesService;
89
90
	/** @var ExternalFilesService */
91
	private $externalFilesService;
92
93
	/** @var GroupFoldersService */
94
	private $groupFoldersService;
95
96
	/** @var MiscService */
97
	private $miscService;
98
99
100
	/**
101
	 * FilesService constructor.
102
	 *
103
	 * @param IAppContainer $container
104
	 * @param IRootFolder $rootFolder
105
	 * @param AppManager $appManager
106
	 * @param IUserManager $userManager
107
	 * @param IManager $shareManager
108
	 * @param ConfigService $configService
109
	 * @param LocalFilesService $localFilesService
110
	 * @param ExternalFilesService $externalFilesService
111
	 * @param GroupFoldersService $groupFoldersService
112
	 * @param MiscService $miscService
113
	 *
114
	 * @internal param IProviderFactory $factory
115
	 */
116
	public function __construct(
117
		IAppContainer $container, IRootFolder $rootFolder, AppManager $appManager,
118
		IUserManager $userManager,
119
		IManager $shareManager,
120
		ConfigService $configService, LocalFilesService $localFilesService,
121
		ExternalFilesService $externalFilesService,
122
		GroupFoldersService $groupFoldersService,
123
		MiscService $miscService
124
	) {
125
		$this->container = $container;
126
		$this->rootFolder = $rootFolder;
127
		$this->appManager = $appManager;
128
		$this->userManager = $userManager;
129
		$this->shareManager = $shareManager;
130
131
		$this->configService = $configService;
132
		$this->localFilesService = $localFilesService;
133
		$this->externalFilesService = $externalFilesService;
134
		$this->groupFoldersService = $groupFoldersService;
135
136
		$this->miscService = $miscService;
137
	}
138
139
140
	/**
141
	 * @param Runner $runner
142
	 * @param string $userId
143
	 * @param IndexOptions $indexOptions
144
	 *
145
	 * @return FilesDocument[]
146
	 * @throws InterruptException
147
	 * @throws InvalidPathException
148
	 * @throws NotFoundException
149
	 * @throws TickDoesNotExistException
150
	 */
151
	public function getFilesFromUser(Runner $runner, $userId, $indexOptions) {
152
153
		$this->initFileSystems($userId);
154
155
		/** @var Folder $files */
156
		$files = $this->rootFolder->getUserFolder($userId)
157
								  ->get($indexOptions->getOption('path', '/'));
158
159
		if ($files instanceof Folder) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\Folder does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
160
			$result = $this->getFilesFromDirectory($runner, $userId, $files);
161
		} else {
162
			$result = [];
163
			try {
164
				$result[] = $this->generateFilesDocumentFromFile($userId, $files);
165
			} catch (FileIsNotIndexableException $e) {
166
				/** we do nothin' */
167
			}
168
		}
169
170
		return $result;
171
	}
172
173
174
	/**
175
	 * @param string $userId
176
	 */
177
	private function initFileSystems($userId) {
178
		if ($userId === '') {
179
			return;
180
		}
181
182
		$this->externalFilesService->initExternalFilesForUser($userId);
183
		$this->groupFoldersService->initGroupSharesForUser($userId);
184
	}
185
186
187
	/**
188
	 * @param Runner $runner
189
	 * @param string $userId
190
	 * @param Folder $node
191
	 *
192
	 * @return FilesDocument[]
193
	 * @throws InterruptException
194
	 * @throws InvalidPathException
195
	 * @throws NotFoundException
196
	 * @throws TickDoesNotExistException
197
	 */
198
	public function getFilesFromDirectory(Runner $runner, $userId, Folder $node) {
199
		$documents = [];
200
201
		try {
202
			if ($node->nodeExists('.noindex')) {
203
				return $documents;
204
			}
205
		} catch (StorageNotAvailableException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\StorageNotAvailableException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
206
			return $documents;
207
		}
208
209
		$files = $node->getDirectoryListing();
210
		foreach ($files as $file) {
211
			$runner->updateAction('getFilesFromDirectory');
212
213
			try {
214
				$documents[] = $this->generateFilesDocumentFromFile($userId, $file);
215
			} catch (FileIsNotIndexableException $e) {
216
				continue;
217
			}
218
219
			if ($file->getType() === FileInfo::TYPE_FOLDER) {
220
				/** @var $file Folder */
221
				$documents =
222
					array_merge($documents, $this->getFilesFromDirectory($runner, $userId, $file));
223
			}
224
		}
225
226
		return $documents;
227
	}
228
229
230
	/**
231
	 * @param Node $file
232
	 *
233
	 * @param string $viewerId
234
	 *
235
	 * @return FilesDocument
236
	 * @throws FileIsNotIndexableException
237
	 * @throws InvalidPathException
238
	 * @throws NotFoundException
239
	 * @throws Exception
240
	 */
241
	private function generateFilesDocumentFromFile($viewerId, Node $file) {
242
243
		$source = $this->getFileSource($file);
244
		$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, $file->getId());
245
246
		$ownerId = '';
247
		if ($file->getOwner() !== null) {
248
			$ownerId = $file->getOwner()
249
							->getUID();
250
		}
251
		$document->setType($file->getType())
252
				 ->setSource($source)
253
				 ->setOwnerId($ownerId)
254
				 ->setPath($this->getPathFromViewerId($file->getId(), $viewerId))
255
				 ->setViewerId($viewerId)
256
				 ->setModifiedTime($file->getMTime())
257
				 ->setMimetype($file->getMimetype());
258
259
		return $document;
260
	}
261
262
263
	/**
264
	 * @param Node $file
265
	 *
266
	 * @return string
267
	 * @throws FileIsNotIndexableException
268
	 * @throws NotFoundException
269
	 */
270
	private function getFileSource(Node $file) {
271
		$source = '';
272
273
		try {
274
			$this->localFilesService->getFileSource($file, $source);
275
			$this->externalFilesService->getFileSource($file, $source);
276
			$this->groupFoldersService->getFileSource($file, $source);
277
		} catch (KnownFileSourceException $e) {
278
			/** we know the source, just leave. */
279
		}
280
281
		return $source;
282
	}
283
284
285
	/**
286
	 * @param string $userId
287
	 * @param string $path
288
	 *
289
	 * @return Node
290
	 * @throws NotFoundException
291
	 */
292
	public function getFileFromPath($userId, $path) {
293
		return $this->rootFolder->getUserFolder($userId)
294
								->get($path);
295
	}
296
297
298
	/**
299
	 * @param string $userId
300
	 * @param int $fileId
301
	 *
302
	 * @return Node
303
	 * @throws FilesNotFoundException
304
	 * @throws EmptyUserException
305
	 */
306
	public function getFileFromId($userId, $fileId) {
307
308
		if ($userId === '') {
309
			throw new EmptyUserException();
310
		}
311
312
		$files = $this->rootFolder->getUserFolder($userId)
313
								  ->getById($fileId);
314
		if (sizeof($files) === 0) {
315
			throw new FilesNotFoundException();
316
		}
317
318
		$file = array_shift($files);
319
320
		return $file;
321
	}
322
323
324
	/**
325
	 * @param Index $index
326
	 *
327
	 * @return Node
328
	 * @throws EmptyUserException
329
	 * @throws FilesNotFoundException
330
	 */
331
	public function getFileFromIndex(Index $index) {
332
		$this->impersonateOwner($index);
333
334
		return $this->getFileFromId($index->getOwnerId(), $index->getDocumentId());
335
	}
336
337
338
	/**
339
	 * @param int $fileId
340
	 * @param string $viewerId
341
	 *
342
	 * @throws Exception
343
	 * @return string
344
	 */
345
	private function getPathFromViewerId($fileId, $viewerId) {
346
347
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
348
										->getById($fileId);
349
350
		if (sizeof($viewerFiles) === 0) {
351
			return '';
352
		}
353
354
		$file = array_shift($viewerFiles);
355
356
		// TODO: better way to do this : we remove the '/userid/files/'
357
		$path = MiscService::noEndSlash(substr($file->getPath(), 8 + strlen($viewerId)));
358
359
		return $path;
360
	}
361
362
363
	/**
364
	 * @param FilesDocument $document
365
	 *
366
	 * @return FilesDocument
367
	 */
368
	public function generateDocument($document) {
369
370
		try {
371
			$this->updateFilesDocument($document);
372
		} catch (Exception $e) {
373
			// TODO - update $document with a error status instead of just ignore !
374
			$document->getIndex()
375
					 ->setStatus(Index::INDEX_IGNORE);
376
			echo 'Exception: ' . json_encode($e->getTrace()) . ' - ' . $e->getMessage()
377
				 . "\n";
378
		}
379
380
		return $document;
381
	}
382
383
384
	/**
385
	 * @param Index $index
386
	 *
387
	 * @return FilesDocument
388
	 * @throws FileIsNotIndexableException
389
	 * @throws InvalidPathException
390
	 * @throws NotFoundException
391
	 * @throws NotPermittedException
392
	 */
393
	private function generateDocumentFromIndex(Index $index) {
394
395
		try {
396
			$file = $this->getFileFromIndex($index);
397
		} catch (Exception $e) {
398
			$index->setStatus(Index::INDEX_REMOVE);
399
			$document = new FilesDocument($index->getProviderId(), $index->getDocumentId());
400
			$document->setIndex($index);
401
402
			return $document;
403
		}
404
405
		$document = $this->generateFilesDocumentFromFile($index->getOwnerId(), $file);
406
		$document->setIndex($index);
407
408
		$this->updateFilesDocumentFromFile($document, $file);
409
410
		return $document;
411
	}
412
413
414
	/**
415
	 * @param IndexDocument $document
416
	 *
417
	 * @return bool
418
	 */
419
	public function isDocumentUpToDate($document) {
420
		$index = $document->getIndex();
421
422
		if (!$this->configService->compareIndexOptions($index)) {
423
			$index->setStatus(Index::INDEX_CONTENT);
424
			$document->setIndex($index);
425
426
			return false;
427
		}
428
429
		if ($index->getStatus() !== Index::INDEX_OK) {
430
			return false;
431
		}
432
433
		if ($index->getLastIndex() >= $document->getModifiedTime()) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $index->getLastIn...ent->getModifiedTime();.
Loading history...
434
			return true;
435
		}
436
437
		return false;
438
	}
439
440
441
	/**
442
	 * @param Index $index
443
	 *
444
	 * @return FilesDocument
0 ignored issues
show
Documentation introduced by
Should the return type not be FilesDocument|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
445
	 * @throws InvalidPathException
446
	 * @throws NotFoundException
447
	 * @throws NotPermittedException
448
	 */
449
	public function updateDocument(Index $index) {
450
		$this->impersonateOwner($index);
451
		$this->initFileSystems($index->getOwnerId());
452
453
		try {
454
			$document = $this->generateDocumentFromIndex($index);
455
456
			return $document;
457
		} catch (FileIsNotIndexableException $e) {
458
			return null;
459
		}
460
	}
461
462
463
	/**
464
	 * @param FilesDocument $document
465
	 *
466
	 * @throws InvalidPathException
467
	 * @throws NotFoundException
468
	 * @throws NotPermittedException
469
	 */
470
	private function updateFilesDocument(FilesDocument $document) {
471
		$userFolder = $this->rootFolder->getUserFolder($document->getViewerId());
472
		$file = $userFolder->get($document->getPath());
473
474
		try {
475
			$this->updateFilesDocumentFromFile($document, $file);
476
		} catch (FileIsNotIndexableException $e) {
477
			$document->getIndex()
478
					 ->setStatus(Index::INDEX_IGNORE);
479
		}
480
	}
481
482
483
	/**
484
	 * @param FilesDocument $document
485
	 * @param Node $file
486
	 *
487
	 * @throws InvalidPathException
488
	 * @throws NotFoundException
489
	 * @throws NotPermittedException
490
	 */
491
	private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) {
492
493
		$document->getIndex()
494
				 ->setSource($document->getSource());
495
496
		$this->updateDocumentAccess($document, $file);
497
		$this->updateContentFromFile($document, $file);
498
499
		$document->addTag($document->getSource());
500
	}
501
502
503
	/**
504
	 * @param FilesDocument $document
505
	 * @param Node $file
506
	 */
507
	private function updateDocumentAccess(FilesDocument $document, Node $file) {
508
509
		$index = $document->getIndex();
510
511
		if (!$index->isStatus(Index::INDEX_FULL)
512
			&& !$index->isStatus(FilesDocument::STATUS_FILE_ACCESS)) {
513
			return;
514
		}
515
516
		$this->localFilesService->updateDocumentAccess($document, $file);
517
		$this->externalFilesService->updateDocumentAccess($document, $file);
518
		$this->groupFoldersService->updateDocumentAccess($document, $file);
519
520
		$this->updateShareNames($document, $file);
521
	}
522
523
524
	/**
525
	 * @param FilesDocument $document
526
	 * @param Node $file
527
	 *
528
	 * @throws InvalidPathException
529
	 * @throws NotFoundException
530
	 * @throws NotPermittedException
531
	 */
532
	private function updateContentFromFile(FilesDocument $document, Node $file) {
533
534
		$document->setTitle($document->getPath());
535
536
		if (!$document->getIndex()
537
					  ->isStatus(Index::INDEX_CONTENT)
538
			|| $file->getType() !== FileInfo::TYPE_FILE) {
539
			return;
540
		}
541
542
		/** @var File $file */
543
		if ($file->getSize() <
544
			($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) {
545
			$this->extractContentFromFileText($document, $file);
546
			$this->extractContentFromFileOffice($document, $file);
547
			$this->extractContentFromFilePDF($document, $file);
548
			$this->extractContentFromFileOCR($document, $file);
549
		}
550
551
		if ($document->getContent() === null) {
552
			$document->getIndex()
553
					 ->unsetStatus(Index::INDEX_CONTENT);
554
		}
555
	}
556
557
558
	/**
559
	 * @param FilesDocument $document
560
	 * @param Node $file
561
	 *
562
	 * @return array
563
	 */
564
	private function updateShareNames(FilesDocument $document, Node $file) {
565
566
		$users = [];
567
568
		$this->localFilesService->getShareUsersFromFile($file, $users);
569
		$this->externalFilesService->getShareUsers($document, $users);
570
		$this->groupFoldersService->getShareUsers($document, $users);
571
572
		$shareNames = [];
573
		foreach ($users as $username) {
574
			try {
575
				$user = $this->userManager->get($username);
576
				if ($user === null || $user->getLastLogin() === 0) {
577
					continue;
578
				}
579
580
				$path = $this->getPathFromViewerId($file->getId(), $username);
581
				$shareNames[MiscService::secureUsername($username)] =
582
					(!is_string($path)) ? $path = '' : $path;
583
584
			} catch (Exception $e) {
585
				$this->miscService->log(
586
					'Issue while getting information on documentId:' . $document->getId(), 0
587
				);
588
			}
589
		}
590
591
		$document->setInfo('share_names', $shareNames);
592
593
		return $shareNames;
594
	}
595
596
597
	/**
598
	 * @param string $mimeType
599
	 *
600
	 * @return string
601
	 */
602
	private function parseMimeType($mimeType) {
603
604
		$parsed = '';
605
		try {
606
			$this->parseMimeTypeText($mimeType, $parsed);
607
			$this->parseMimeTypePDF($mimeType, $parsed);
608
			$this->parseMimeTypeOffice($mimeType, $parsed);
609
		} catch (KnownFileMimeTypeException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
610
		}
611
612
		return $parsed;
613
	}
614
615
616
	/**
617
	 * @param string $mimeType
618
	 * @param string $parsed
619
	 *
620
	 * @throws KnownFileMimeTypeException
621
	 */
622
	private function parseMimeTypeText($mimeType, &$parsed) {
623
624
		if (substr($mimeType, 0, 5) === 'text/') {
625
			$parsed = self::MIMETYPE_TEXT;
626
			throw new KnownFileMimeTypeException();
627
		}
628
629
		$textMimes = [
630
			'application/epub+zip'
631
		];
632
633 View Code Duplication
		foreach ($textMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
634
			if (strpos($mimeType, $mime) === 0) {
635
				$parsed = self::MIMETYPE_TEXT;
636
				throw new KnownFileMimeTypeException();
637
			}
638
		}
639
	}
640
641
642
	/**
643
	 * @param string $mimeType
644
	 * @param string $parsed
645
	 *
646
	 * @throws KnownFileMimeTypeException
647
	 */
648
	private function parseMimeTypePDF($mimeType, &$parsed) {
649
650
		if ($mimeType === 'application/pdf') {
651
			$parsed = self::MIMETYPE_PDF;
652
			throw new KnownFileMimeTypeException();
653
		}
654
	}
655
656
657
	/**
658
	 * @param string $mimeType
659
	 * @param string $parsed
660
	 *
661
	 * @throws KnownFileMimeTypeException
662
	 */
663
	private function parseMimeTypeOffice($mimeType, &$parsed) {
664
665
		$officeMimes = [
666
			'application/msword',
667
			'application/vnd.oasis.opendocument',
668
			'application/vnd.sun.xml',
669
			'application/vnd.openxmlformats-officedocument',
670
			'application/vnd.ms-word',
671
			'application/vnd.ms-powerpoint',
672
			'application/vnd.ms-excel'
673
		];
674
675 View Code Duplication
		foreach ($officeMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
676
			if (strpos($mimeType, $mime) === 0) {
677
				$parsed = self::MIMETYPE_OFFICE;
678
				throw new KnownFileMimeTypeException();
679
			}
680
		}
681
	}
682
683
684
	/**
685
	 * @param FilesDocument $document
686
	 * @param File $file
687
	 *
688
	 * @throws NotPermittedException
689
	 */
690
	private function extractContentFromFileText(FilesDocument $document, File $file) {
691
692
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_TEXT) {
693
			return;
694
		}
695
696
		if (!$this->isSourceIndexable($document)) {
697
			return;
698
		}
699
700
		$document->setContent(
701
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
702
		);
703
	}
704
705
706
	/**
707
	 * @param FilesDocument $document
708
	 * @param File $file
709
	 *
710
	 * @throws NotPermittedException
711
	 */
712 View Code Duplication
	private function extractContentFromFilePDF(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
713
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_PDF) {
714
			return;
715
		}
716
717
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_PDF);
718
		if (!$this->isSourceIndexable($document)) {
719
			return;
720
		}
721
722
		if ($this->configService->getAppValue(ConfigService::FILES_PDF) !== '1') {
723
			$document->setContent('');
724
725
			return;
726
		}
727
728
		$document->setContent(
729
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
730
		);
731
	}
732
733
734
	/**
735
	 * @param FilesDocument $document
736
	 * @param File $file
737
	 *
738
	 * @throws NotPermittedException
739
	 */
740 View Code Duplication
	private function extractContentFromFileOffice(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
741
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_OFFICE) {
742
			return;
743
		}
744
745
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OFFICE);
746
		if (!$this->isSourceIndexable($document)) {
747
			return;
748
		}
749
750
		if ($this->configService->getAppValue(ConfigService::FILES_OFFICE) !== '1') {
751
			$document->setContent('');
752
753
			return;
754
		}
755
756
		$document->setContent(
757
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
758
		);
759
	}
760
761
762
	/**
763
	 * @param FilesDocument $document
764
	 * @param File $file
765
	 */
766
	private function extractContentFromFileOCR(FilesDocument $document, File $file) {
767
		if ($this->configService->getAppValue(ConfigService::FILES_OCR) !== '1') {
768
			return;
769
		}
770
771
		if ($document->getContent() !== '' && $document->getContent() !== null) {
772
			return;
773
		}
774
775
		$document->setContent('');
776
		$this->extractContentUsingTesseractOCR($document, $file);
777
	}
778
779
780
	/**
781
	 * @param FilesDocument $document
782
	 * @param File $file
783
	 */
784
	private function extractContentUsingTesseractOCR(FilesDocument $document, File $file) {
785
		try {
786
			$tesseractService = $this->container->query(TesseractService::class);
787
			$extension = pathinfo($document->getPath(), PATHINFO_EXTENSION);
788
789
			if (!$tesseractService->parsedMimeType($document->getMimetype(), $extension)) {
790
				return;
791
			}
792
793
			$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OCR);
794
			if (!$this->isSourceIndexable($document)) {
795
				return;
796
			}
797
798
			$content = $tesseractService->ocrFile($file);
799
		} catch (Exception $e) {
800
			return;
801
		}
802
803
		$document->setContent(base64_encode($content), IndexDocument::ENCODED_BASE64);
804
	}
805
806
807
	/**
808
	 * @param FilesDocument $document
809
	 *
810
	 * @return bool
811
	 */
812
	private function isSourceIndexable(FilesDocument $document) {
813
		$this->configService->setDocumentIndexOption($document, $document->getSource());
814
		if ($this->configService->getAppValue($document->getSource()) !== '1') {
815
			$document->setContent('');
816
817
			return false;
818
		}
819
820
		return true;
821
	}
822
823
824
	private function impersonateOwner(Index $index) {
825
		if ($index->getOwnerId() !== '') {
826
			return;
827
		}
828
829
		$this->groupFoldersService->impersonateOwner($index);
830
		$this->externalFilesService->impersonateOwner($index);
831
	}
832
833
}
834
835