Completed
Push — master ( c01dbf...bf8d93 )
by Maxence
02:22
created

FilesService::extractContentFromFileOCR()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 18
Code Lines 10

Duplication

Lines 18
Ratio 100 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 18
loc 18
rs 9.2
cc 4
eloc 10
nc 4
nop 2
1
<?php
2
/**
3
 * Files_FullTextSearch - Index the content of your files
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\Files_FullTextSearch\Service;
28
29
30
use Exception;
31
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException;
32
use OCA\Files_FullTextSearch\Exceptions\KnownFileMimeTypeException;
33
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException;
34
use OCA\Files_FullTextSearch\Model\FilesDocument;
35
use OCA\Files_FullTextSearch\Provider\FilesProvider;
36
use OCA\FullTextSearch\Exceptions\InterruptException;
37
use OCA\FullTextSearch\Exceptions\TickDoesNotExistException;
38
use OCA\FullTextSearch\Model\Index;
39
use OCA\FullTextSearch\Model\IndexDocument;
40
use OCA\FullTextSearch\Model\Runner;
41
use OCP\Files\File;
42
use OCP\Files\FileInfo;
43
use OCP\Files\Folder;
44
use OCP\Files\InvalidPathException;
45
use OCP\Files\IRootFolder;
46
use OCP\Files\Node;
47
use OCP\Files\NotFoundException;
48
use OCP\Files\NotPermittedException;
49
use OCP\Files\StorageNotAvailableException;
50
use OCP\IUserManager;
51
use OCP\Share\IManager;
52
53
class FilesService {
54
55
	const MIMETYPE_TEXT = 'files_text';
56
	const MIMETYPE_PDF = 'files_pdf';
57
	const MIMETYPE_OFFICE = 'files_office';
58
	const MIMETYPE_OCR = 'files_ocr';
59
	const MIMETYPE_IMAGE = 'files_image';
60
	const MIMETYPE_AUDIO = 'files_audio';
61
62
63
	/** @var IRootFolder */
64
	private $rootFolder;
65
66
	/** @var IUserManager */
67
	private $userManager;
68
69
	/** @var IManager */
70
	private $shareManager;
71
72
	/** @var ConfigService */
73
	private $configService;
74
75
	/** @var LocalFilesService */
76
	private $localFilesService;
77
78
	/** @var ExternalFilesService */
79
	private $externalFilesService;
80
81
	/** @var GroupFoldersService */
82
	private $groupFoldersService;
83
84
	/** @var MiscService */
85
	private $miscService;
86
87
88
	/**
89
	 * FilesService constructor.
90
	 *
91
	 * @param IRootFolder $rootFolder
92
	 * @param IUserManager $userManager
93
	 * @param IManager $shareManager
94
	 * @param ConfigService $configService
95
	 * @param LocalFilesService $localFilesService
96
	 * @param ExternalFilesService $externalFilesService
97
	 * @param GroupFoldersService $groupFoldersService
98
	 * @param MiscService $miscService
99
	 *
100
	 * @internal param IProviderFactory $factory
101
	 */
102 View Code Duplication
	public function __construct(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
103
		IRootFolder $rootFolder, IUserManager $userManager, IManager $shareManager,
104
		ConfigService $configService,
105
		LocalFilesService $localFilesService,
106
		ExternalFilesService $externalFilesService,
107
		GroupFoldersService $groupFoldersService,
108
		MiscService $miscService
109
	) {
110
		$this->rootFolder = $rootFolder;
111
		$this->userManager = $userManager;
112
		$this->shareManager = $shareManager;
113
114
		$this->configService = $configService;
115
		$this->localFilesService = $localFilesService;
116
		$this->externalFilesService = $externalFilesService;
117
		$this->groupFoldersService = $groupFoldersService;
118
119
		$this->miscService = $miscService;
120
	}
121
122
123
	/**
124
	 * @param Runner $runner
125
	 * @param string $userId
126
	 *
127
	 * @return FilesDocument[]
128
	 * @throws InterruptException
129
	 * @throws InvalidPathException
130
	 * @throws NotFoundException
131
	 * @throws TickDoesNotExistException
132
	 */
133
	public function getFilesFromUser(Runner $runner, $userId) {
134
135
		$this->initFileSystems($userId);
136
137
		/** @var Folder $files */
138
		$files = $this->rootFolder->getUserFolder($userId)
139
								  ->get('/');
140
		$result = $this->getFilesFromDirectory($runner, $userId, $files);
141
142
		return $result;
143
	}
144
145
146
	/**
147
	 * @param string $userId
148
	 */
149
	private function initFileSystems($userId) {
150
		if ($userId === '') {
151
			return;
152
		}
153
154
		$this->externalFilesService->initExternalFilesForUser($userId);
155
		$this->groupFoldersService->initGroupSharesForUser($userId);
156
	}
157
158
159
	/**
160
	 * @param Runner $runner
161
	 * @param string $userId
162
	 * @param Folder $node
163
	 *
164
	 * @return FilesDocument[]
165
	 * @throws InterruptException
166
	 * @throws InvalidPathException
167
	 * @throws NotFoundException
168
	 * @throws TickDoesNotExistException
169
	 */
170
	public function getFilesFromDirectory(Runner $runner, $userId, Folder $node) {
171
		$documents = [];
172
173
		try {
174
			if ($node->nodeExists('.noindex')) {
175
				return $documents;
176
			}
177
		} catch (StorageNotAvailableException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\StorageNotAvailableException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
178
			return $documents;
179
		}
180
181
		$files = $node->getDirectoryListing();
182
		foreach ($files as $file) {
183
			$runner->update('getFilesFromDirectory');
184
185
			try {
186
				$documents[] = $this->generateFilesDocumentFromFile($file, $userId);
187
			} catch (FileIsNotIndexableException $e) {
188
				continue;
189
			}
190
191
			if ($file->getType() === FileInfo::TYPE_FOLDER) {
192
				/** @var $file Folder */
193
				$documents =
194
					array_merge($documents, $this->getFilesFromDirectory($runner, $userId, $file));
195
			}
196
		}
197
198
		return $documents;
199
	}
200
201
202
	/**
203
	 * @param Node $file
204
	 *
205
	 * @param string $viewerId
206
	 *
207
	 * @return FilesDocument
208
	 * @throws FileIsNotIndexableException
209
	 * @throws InvalidPathException
210
	 * @throws NotFoundException
211
	 * @throws Exception
212
	 */
213
	private function generateFilesDocumentFromFile(Node $file, $viewerId) {
214
215
		$source = $this->getFileSource($file);
216
		$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, $file->getId());
217
218
		$ownerId = '';
219
		if ($file->getOwner() !== null) {
220
			$ownerId = $file->getOwner()
221
							->getUID();
222
		}
223
224
		$document->setType($file->getType())
225
				 ->setSource($source)
226
				 ->setOwnerId($ownerId)
227
				 ->setPath($this->getPathFromViewerId($file->getId(), $viewerId))
228
				 ->setViewerId($viewerId)
229
				 ->setModifiedTime($file->getMTime())
230
				 ->setMimetype($file->getMimetype());
231
232
		return $document;
233
	}
234
235
236
	/**
237
	 * @param Node $file
238
	 *
239
	 * @return string
240
	 * @throws FileIsNotIndexableException
241
	 * @throws NotFoundException
242
	 */
243
	private function getFileSource(Node $file) {
244
		$source = '';
245
246
		try {
247
			$this->localFilesService->getFileSource($file, $source);
248
			$this->externalFilesService->getFileSource($file, $source);
249
			$this->groupFoldersService->getFileSource($file, $source);
250
		} catch (KnownFileSourceException $e) {
251
			/** we know the source, just leave. */
252
		}
253
254
		return $source;
255
	}
256
257
258
	/**
259
	 * @param string $userId
260
	 * @param string $path
261
	 *
262
	 * @return Node
263
	 * @throws NotFoundException
264
	 */
265
	public function getFileFromPath($userId, $path) {
266
		return $this->rootFolder->getUserFolder($userId)
267
								->get($path);
268
	}
269
270
271
	/**
272
	 * @param string $userId
273
	 * @param int $fileId
274
	 *
275
	 * @return Node
276
	 */
277
	public function getFileFromId($userId, $fileId) {
278
279
		if ($userId === '') {
280
			return null;
281
		}
282
283
		try {
284
			$files = $this->rootFolder->getUserFolder($userId)
285
									  ->getById($fileId);
286
		} catch (Exception $e) {
287
			return null;
288
		}
289
290
		if (sizeof($files) === 0) {
291
			return null;
292
		}
293
294
		$file = array_shift($files);
295
296
		return $file;
297
	}
298
299
300
	/**
301
	 * @param int $fileId
302
	 * @param string $viewerId
303
	 *
304
	 * @throws Exception
305
	 * @return string
306
	 */
307
	private function getPathFromViewerId($fileId, $viewerId) {
308
309
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
310
										->getById($fileId);
311
312
		if (sizeof($viewerFiles) === 0) {
313
			return '';
314
		}
315
316
		$file = array_shift($viewerFiles);
317
318
		// TODO: better way to do this : we remove the '/userid/files/'
319
		$path = MiscService::noEndSlash(substr($file->getPath(), 8 + strlen($viewerId)));
320
321
		return $path;
322
	}
323
324
325
	/**
326
	 * @param FilesDocument $document
327
	 */
328
	public function setDocumentInfo(FilesDocument $document) {
329
330
		$viewerId = $document->getAccess()
331
							 ->getViewerId();
332
333
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
334
										->getById($document->getId());
335
336
		if (sizeof($viewerFiles) === 0) {
337
			return;
338
		}
339
		// we only take the first file
340
		$file = array_shift($viewerFiles);
341
342
		// TODO: better way to do this : we remove the '/userId/files/'
343
		$path = MiscService::noEndSlash(substr($file->getPath(), 7 + strlen($viewerId)));
344
345
		$document->setPath($path);
346
		$document->setFileName($file->getName());
347
	}
348
349
350
	/**
351
	 * @param FilesDocument $document
352
	 */
353
	public function setDocumentTitle(FilesDocument $document) {
354
		$document->setTitle($document->getPath());
355
	}
356
357
358
	/**
359
	 * @param FilesDocument $document
360
	 */
361
	public function setDocumentLink(FilesDocument $document) {
362
363
		$path = $document->getPath();
364
		$filename = $document->getFileName();
365
		$dir = substr($path, 0, -strlen($filename));
366
367
		$document->setLink(
368
			\OC::$server->getURLGenerator()
369
						->linkToRoute(
370
							'files.view.index',
371
							[
372
								'dir'      => $dir,
373
								'scrollto' => $filename,
374
							]
375
						)
376
		);
377
	}
378
379
380
	/**
381
	 * @param FilesDocument $document
382
	 *
383
	 * @throws InvalidPathException
384
	 * @throws NotFoundException
385
	 */
386
	public function setDocumentMore(FilesDocument $document) {
387
388
		$access = $document->getAccess();
389
		$file = $this->getFileFromId($access->getViewerId(), $document->getId());
390
391
		if ($file === null) {
392
			return;
393
		}
394
395
		// TODO: better way to do this : we remove the '/userid/files/'
396
		$path =
397
			MiscService::noEndSlash(substr($file->getPath(), 7 + strlen($access->getViewerId())));
398
399
		$more = [
400
			'webdav'             => $this->getWebdavId($document->getId()),
401
			'path'               => $path,
402
			'timestamp'          => $file->getMTime(), // FIXME: get the creation date of the file
403
			'mimetype'           => $file->getMimetype(),
404
			'modified_timestamp' => $file->getMTime(),
405
			'etag'               => $file->getEtag(),
406
			'permissions'        => $file->getPermissions(),
407
			'size'               => $file->getSize(),
408
			'favorite'           => false // FIXME: get the favorite status
409
		];
410
411
		$document->setMore($more);
412
	}
413
414
415
	/**
416
	 * @param FilesDocument[] $documents
417
	 *
418
	 * @return FilesDocument[]
419
	 */
420
	public function generateDocuments($documents) {
421
422
		$index = [];
423
424
		foreach ($documents as $document) {
425
			if (!($document instanceof FilesDocument)) {
426
				continue;
427
			}
428
429
			try {
430
				$this->updateFilesDocument($document);
431
			} catch (Exception $e) {
432
				// TODO - update $document with a error status instead of just ignore !
433
				$document->getIndex()
434
						 ->setStatus(Index::INDEX_IGNORE);
435
				echo 'Exception: ' . json_encode($e->getTrace()) . ' - ' . $e->getMessage() . "\n";
436
			}
437
438
			$index[] = $document;
439
		}
440
441
		return $index;
442
	}
443
444
445
	/**
446
	 * @param Index $index
447
	 *
448
	 * @return FilesDocument
449
	 * @throws FileIsNotIndexableException
450
	 * @throws InvalidPathException
451
	 * @throws NotFoundException
452
	 * @throws NotPermittedException
453
	 */
454
	private function generateDocumentFromIndex(Index $index) {
455
		$file = $this->getFileFromId($index->getOwnerId(), $index->getDocumentId());
456
457
		if ($file === null) {
458
			$index->setStatus(Index::INDEX_REMOVE);
459
			$document = new FilesDocument($index->getProviderId(), $index->getDocumentId());
460
			$document->setIndex($index);
461
462
			return $document;
463
		}
464
465
		$document = $this->generateFilesDocumentFromFile($file, $index->getOwnerId());
466
		$document->setIndex($index);
467
468
		$this->updateFilesDocumentFromFile($document, $file);
469
470
		return $document;
471
	}
472
473
474
	/**
475
	 * @param IndexDocument $document
476
	 *
477
	 * @return bool
478
	 */
479
	public function isDocumentUpToDate($document) {
480
		$index = $document->getIndex();
481
482
		if (!$this->configService->compareIndexOptions($index)) {
483
			$index->setStatus(Index::INDEX_CONTENT);
484
			$document->setIndex($index);
485
486
			return false;
487
		}
488
489
		if ($index->getStatus() !== Index::INDEX_OK) {
490
			return false;
491
		}
492
493
		if ($index->getLastIndex() >= $document->getModifiedTime()) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $index->getLastIn...ent->getModifiedTime();.
Loading history...
494
			return true;
495
		}
496
497
		return false;
498
	}
499
500
501
	/**
502
	 * @param Index $index
503
	 *
504
	 * @return FilesDocument
0 ignored issues
show
Documentation introduced by
Should the return type not be FilesDocument|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
505
	 * @throws InvalidPathException
506
	 * @throws NotFoundException
507
	 * @throws NotPermittedException
508
	 */
509
	public function updateDocument(Index $index) {
510
		$this->impersonateOwner($index);
511
		$this->initFileSystems($index->getOwnerId());
512
513
		try {
514
			$document = $this->generateDocumentFromIndex($index);
515
516
			return $document;
517
		} catch (FileIsNotIndexableException $e) {
518
			return null;
519
		}
520
	}
521
522
523
	/**
524
	 * @param FilesDocument $document
525
	 *
526
	 * @throws InvalidPathException
527
	 * @throws NotFoundException
528
	 * @throws NotPermittedException
529
	 */
530
	private function updateFilesDocument(FilesDocument $document) {
531
		$userFolder = $this->rootFolder->getUserFolder($document->getViewerId());
532
		$file = $userFolder->get($document->getPath());
533
534
		try {
535
			$this->updateFilesDocumentFromFile($document, $file);
536
		} catch (FileIsNotIndexableException $e) {
537
			$document->getIndex()
538
					 ->setStatus(Index::INDEX_IGNORE);
539
		}
540
	}
541
542
543
	/**
544
	 * @param FilesDocument $document
545
	 * @param Node $file
546
	 *
547
	 * @throws InvalidPathException
548
	 * @throws NotFoundException
549
	 * @throws NotPermittedException
550
	 */
551
	private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) {
552
553
		$document->getIndex()
554
				 ->setSource($document->getSource());
555
556
		$this->updateDocumentAccess($document, $file);
557
		$this->updateContentFromFile($document, $file);
558
559
		$document->addTag($document->getSource());
560
	}
561
562
563
	/**
564
	 * @param FilesDocument $document
565
	 * @param Node $file
566
	 */
567
	private function updateDocumentAccess(FilesDocument $document, Node $file) {
568
569
		$index = $document->getIndex();
570
571
		if (!$index->isStatus(Index::INDEX_FULL)
572
			&& !$index->isStatus(FilesDocument::STATUS_FILE_ACCESS)) {
573
			return;
574
		}
575
576
		$this->localFilesService->updateDocumentAccess($document, $file);
577
		$this->externalFilesService->updateDocumentAccess($document, $file);
578
		$this->groupFoldersService->updateDocumentAccess($document, $file);
579
580
		$this->updateShareNames($document, $file);
581
	}
582
583
584
	/**
585
	 * @param FilesDocument $document
586
	 * @param Node $file
587
	 *
588
	 * @throws InvalidPathException
589
	 * @throws NotFoundException
590
	 * @throws NotPermittedException
591
	 */
592
	private function updateContentFromFile(FilesDocument $document, Node $file) {
593
594
		$document->setTitle($document->getPath());
595
596
		if (!$document->getIndex()
597
					  ->isStatus(Index::INDEX_CONTENT)
598
			|| $file->getType() !== FileInfo::TYPE_FILE) {
599
			return;
600
		}
601
602
		/** @var File $file */
603
		if ($file->getSize() <
604
			($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) {
605
			$this->extractContentFromFileText($document, $file);
606
			$this->extractContentFromFileOffice($document, $file);
607
			$this->extractContentFromFileOCR($document, $file);
608
			$this->extractContentFromFilePDF($document, $file);
609
		}
610
611
		if ($document->getContent() === null) {
612
			$document->getIndex()
613
					 ->unsetStatus(Index::INDEX_CONTENT);
614
		}
615
	}
616
617
618
	/**
619
	 * @param FilesDocument $document
620
	 * @param Node $file
621
	 *
622
	 * @return array
623
	 */
624
	private function updateShareNames(FilesDocument $document, Node $file) {
625
626
		$users = [];
627
628
		$this->localFilesService->getShareUsersFromFile($file, $users);
629
		$this->externalFilesService->getShareUsers($document, $users);
630
		$this->groupFoldersService->getShareUsers($document, $users);
631
632
		$shareNames = [];
633
		foreach ($users as $user) {
634
			try {
635
				$shareNames[MiscService::secureUsername($user)] =
636
					$this->getPathFromViewerId($file->getId(), $user);
637
			} catch (Exception $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
638
			}
639
		}
640
641
		$document->setInfo('share_names', $shareNames);
642
643
//			if ($file->getStorage()
0 ignored issues
show
Unused Code Comprehensibility introduced by
53% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
644
//					 ->isLocal() === false) {
645
//				$shares = $this->externalFilesService->getAllSharesFromExternalFile($access);
646
//			} else {
647
//				$shares = $this->getAllSharesFromFile($file);
648
//			}
649
//
650
//			foreach ($shares as $user) {
651
//				try {
652
//					$shareNames[$user] = $this->getPathFromViewerId($file->getId(), $user);
653
//				} catch (Exception $e) {
654
//				}
655
//			}
656
//
657
		return $shareNames;
658
659
	}
660
661
	/**
662
	 * @param int $fileId
663
	 *
664
	 * @return string
665
	 */
666
	private function getWebdavId($fileId) {
667
		$instanceId = $this->configService->getSystemValue('instanceid');
668
669
		return sprintf("%08s", $fileId) . $instanceId;
670
	}
671
672
673
	/**
674
	 * @param string $mimeType
675
	 *
676
	 * @return string
677
	 */
678
	private function parseMimeType($mimeType) {
679
680
		$parsed = '';
681
		try {
682
			$this->parseMimeTypeText($mimeType, $parsed);
683
			$this->parseMimeTypePDF($mimeType, $parsed);
684
			$this->parseMimeTypeOffice($mimeType, $parsed);
685
			$this->parseMimeTypeOCR($mimeType, $parsed);
686
		} catch (KnownFileMimeTypeException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
687
		}
688
689
		return $parsed;
690
	}
691
692
693
	/**
694
	 * @param string $mimeType
695
	 * @param string $parsed
696
	 *
697
	 * @throws KnownFileMimeTypeException
698
	 */
699
	private function parseMimeTypeText($mimeType, &$parsed) {
700
701
		if (substr($mimeType, 0, 5) === 'text/') {
702
			$parsed = self::MIMETYPE_TEXT;
703
			throw new KnownFileMimeTypeException();
704
		}
705
706
		$textMimes = [
707
			'application/octet-stream',
708
			'application/epub+zip'
709
		];
710
711 View Code Duplication
		foreach ($textMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
712
			if (strpos($mimeType, $mime) === 0) {
713
				$parsed = self::MIMETYPE_TEXT;
714
				throw new KnownFileMimeTypeException();
715
			}
716
		}
717
	}
718
719
720
	/**
721
	 * @param string $mimeType
722
	 * @param string $parsed
723
	 *
724
	 * @throws KnownFileMimeTypeException
725
	 */
726
	private function parseMimeTypePDF($mimeType, &$parsed) {
727
728
		if ($mimeType === 'application/pdf') {
729
			$parsed = self::MIMETYPE_PDF;
730
			throw new KnownFileMimeTypeException();
731
		}
732
	}
733
734
735
	/**
736
	 * @param string $mimeType
737
	 * @param string $parsed
738
	 *
739
	 * @throws KnownFileMimeTypeException
740
	 */
741
	private function parseMimeTypeOffice($mimeType, &$parsed) {
742
743
		$officeMimes = [
744
			'application/msword',
745
			'application/vnd.oasis.opendocument',
746
			'application/vnd.sun.xml',
747
			'application/vnd.openxmlformats-officedocument',
748
			'application/vnd.ms-word',
749
			'application/vnd.ms-powerpoint',
750
			'application/vnd.ms-excel'
751
		];
752
753 View Code Duplication
		foreach ($officeMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
754
			if (strpos($mimeType, $mime) === 0) {
755
				$parsed = self::MIMETYPE_OFFICE;
756
				throw new KnownFileMimeTypeException();
757
			}
758
		}
759
	}
760
761
762
	/**
763
	 * @param string $mimeType
764
	 * @param string $parsed
765
	 *
766
	 * @throws KnownFileMimeTypeException
767
	 */
768
	private function parseMimeTypeOCR($mimeType, &$parsed) {
769
770
		$ocrMimes = [
771
			'application/x-cbr'
772
		];
773
774
		foreach ($ocrMimes as $mime) {
775
			if (strpos($mimeType, $mime) === 0) {
776
				$parsed = self::MIMETYPE_OCR;
777
				throw new KnownFileMimeTypeException();
778
			}
779
		}
780
	}
781
782
783
	/**
784
	 * @param FilesDocument $document
785
	 * @param File $file
786
	 *
787
	 * @throws NotPermittedException
788
	 */
789
	private function extractContentFromFileText(FilesDocument $document, File $file) {
790
791
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_TEXT) {
792
			return;
793
		}
794
795
		if (!$this->isSourceIndexable($document)) {
796
			return;
797
		}
798
799
		$document->setContent(base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64);
800
	}
801
802
803
	/**
804
	 * @param FilesDocument $document
805
	 * @param File $file
806
	 *
807
	 * @throws NotPermittedException
808
	 */
809 View Code Duplication
	private function extractContentFromFilePDF(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
810
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_PDF) {
811
			return;
812
		}
813
814
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_PDF);
815
		if (!$this->isSourceIndexable($document)) {
816
			return;
817
		}
818
819
		if ($this->configService->getAppValue(ConfigService::FILES_PDF) !== '1') {
820
			$document->setContent('');
821
822
			return;
823
		}
824
825
		$document->setContent(base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64);
826
	}
827
828
829
	/**
830
	 * @param FilesDocument $document
831
	 * @param File $file
832
	 *
833
	 * @throws NotPermittedException
834
	 */
835 View Code Duplication
	private function extractContentFromFileOffice(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
836
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_OFFICE) {
837
			return;
838
		}
839
840
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OFFICE);
841
		if (!$this->isSourceIndexable($document)) {
842
			return;
843
		}
844
845
		if ($this->configService->getAppValue(ConfigService::FILES_OFFICE) !== '1') {
846
			$document->setContent('');
847
848
			return;
849
		}
850
851
		$document->setContent(base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64);
852
	}
853
854
855
	/**
856
	 * @param FilesDocument $document
857
	 * @param File $file
858
	 *
859
	 * @throws NotPermittedException
860
	 */
861 View Code Duplication
	private function extractContentFromFileOCR(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
862
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_OCR) {
863
			return;
864
		}
865
866
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OCR);
867
		if (!$this->isSourceIndexable($document)) {
868
			return;
869
		}
870
871
		if ($this->configService->getAppValue(ConfigService::FILES_OCR) !== '1') {
872
			$document->setContent('');
873
874
			return;
875
		}
876
877
		$document->setContent(base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64);
878
	}
879
880
881
	/**
882
	 * @param FilesDocument $document
883
	 *
884
	 * @return bool
885
	 */
886
	private function isSourceIndexable(FilesDocument $document) {
887
		$this->configService->setDocumentIndexOption($document, $document->getSource());
888
		if ($this->configService->getAppValue($document->getSource()) !== '1') {
889
			$document->setContent('');
890
891
			return false;
892
		}
893
894
		return true;
895
	}
896
897
898
	private function impersonateOwner(Index $index) {
899
		if ($index->getOwnerId() !== '') {
900
			return;
901
		}
902
903
		$this->groupFoldersService->impersonateOwner($index);
904
	}
905
906
}
907
908