Completed
Pull Request — master (#52)
by Maxence
02:01
created

FilesService::isDocumentUpToDate()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 20
rs 9.6
c 0
b 0
f 0
cc 4
nc 4
nop 1
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * Files_FullTextSearch - Index the content of your files
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\Files_FullTextSearch\Service;
32
33
34
use daita\MySmallPhpTools\Traits\TPathTools;
35
use Exception;
36
use OCA\Files_FullTextSearch\Exceptions\EmptyUserException;
37
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException;
38
use OCA\Files_FullTextSearch\Exceptions\FilesNotFoundException;
39
use OCA\Files_FullTextSearch\Exceptions\KnownFileMimeTypeException;
40
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException;
41
use OCA\Files_FullTextSearch\Model\FilesDocument;
42
use OCA\Files_FullTextSearch\Provider\FilesProvider;
43
use OCP\App\IAppManager;
44
use OCP\AppFramework\IAppContainer;
45
use OCP\Files\File;
46
use OCP\Files\FileInfo;
47
use OCP\Files\Folder;
48
use OCP\Files\InvalidPathException;
49
use OCP\Files\IRootFolder;
50
use OCP\Files\Node;
51
use OCP\Files\NotFoundException;
52
use OCP\Files\NotPermittedException;
53
use OCP\Files\StorageNotAvailableException;
54
use OCP\FullTextSearch\IFullTextSearchManager;
55
use OCP\FullTextSearch\Model\DocumentAccess;
56
use OCP\FullTextSearch\Model\IIndex;
57
use OCP\FullTextSearch\Model\IIndexOptions;
58
use OCP\FullTextSearch\Model\IndexDocument;
59
use OCP\FullTextSearch\Model\IRunner;
60
use OCP\IUserManager;
61
use OCP\Share\IManager;
62
use Throwable;
63
64
65
/**
66
 * Class FilesService
67
 *
68
 * @package OCA\Files_FullTextSearch\Service
69
 */
70
class FilesService {
71
72
73
	use TPathTools;
74
75
76
	const MIMETYPE_TEXT = 'files_text';
77
	const MIMETYPE_PDF = 'files_pdf';
78
	const MIMETYPE_OFFICE = 'files_office';
79
	const MIMETYPE_ZIP = 'files_zip';
80
	const MIMETYPE_IMAGE = 'files_image';
81
	const MIMETYPE_AUDIO = 'files_audio';
82
83
	const CHUNK_TREE_SIZE = 2;
84
85
86
	/** @var IAppContainer */
87
	private $container;
88
89
	/** @var IRootFolder */
90
	private $rootFolder;
91
92
	/** @var IUserManager */
93
	private $userManager;
94
95
	/** @var IAppManager */
96
	private $appManager;
97
98
	/** @var IManager */
99
	private $shareManager;
100
101
	/** @var ConfigService */
102
	private $configService;
103
104
	/** @var LocalFilesService */
105
	private $localFilesService;
106
107
	/** @var ExternalFilesService */
108
	private $externalFilesService;
109
110
	/** @var GroupFoldersService */
111
	private $groupFoldersService;
112
113
	/** @var ExtensionService */
114
	private $extensionService;
115
116
	/** @var IFullTextSearchManager */
117
	private $fullTextSearchManager;
118
119
	/** @var MiscService */
120
	private $miscService;
121
122
123
	/** @var IRunner */
124
	private $runner;
125
126
	/** @var int */
127
	private $sumDocuments;
128
129
130
	/**
131
	 * FilesService constructor.
132
	 *
133
	 * @param IAppContainer $container
134
	 * @param IRootFolder $rootFolder
135
	 * @param IAppManager $appManager
136
	 * @param IUserManager $userManager
137
	 * @param IManager $shareManager
138
	 * @param ConfigService $configService
139
	 * @param LocalFilesService $localFilesService
140
	 * @param ExternalFilesService $externalFilesService
141
	 * @param GroupFoldersService $groupFoldersService
142
	 * @param ExtensionService $extensionService
143
	 * @param IFullTextSearchManager $fullTextSearchManager
144
	 * @param MiscService $miscService
145
	 *
146
	 * @internal param IProviderFactory $factory
147
	 */
148
	public function __construct(
149
		IAppContainer $container, IRootFolder $rootFolder, IAppManager $appManager,
150
		IUserManager $userManager, IManager $shareManager, ConfigService $configService,
151
		LocalFilesService $localFilesService, ExternalFilesService $externalFilesService,
152
		GroupFoldersService $groupFoldersService, ExtensionService $extensionService,
153
		IFullTextSearchManager $fullTextSearchManager, MiscService $miscService
154
	) {
155
		$this->container = $container;
156
		$this->rootFolder = $rootFolder;
157
		$this->appManager = $appManager;
158
		$this->userManager = $userManager;
159
		$this->shareManager = $shareManager;
160
161
		$this->configService = $configService;
162
		$this->localFilesService = $localFilesService;
163
		$this->externalFilesService = $externalFilesService;
164
		$this->groupFoldersService = $groupFoldersService;
165
		$this->extensionService = $extensionService;
166
		$this->fullTextSearchManager = $fullTextSearchManager;
167
168
		$this->miscService = $miscService;
169
	}
170
171
172
	/**
173
	 * @param IRunner $runner
174
	 */
175
	public function setRunner(IRunner $runner) {
176
		$this->runner = $runner;
177
	}
178
179
180
	/**
181
	 * @param string $userId
182
	 * @param IIndexOptions $indexOptions
183
	 *
184
	 * @return FilesDocument[]
185
	 * @throws NotFoundException
186
	 * @throws InvalidPathException
187
	 */
188
	public function getChunksFromUser(string $userId, IIndexOptions $indexOptions): array {
189
		$this->initFileSystems($userId);
190
191
		/** @var Folder $files */
192
		$files = $this->rootFolder->getUserFolder($userId)
193
								  ->get($indexOptions->getOption('path', '/'));
194
		if ($files instanceof Folder) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\Folder does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
195
			return $this->getChunksFromDirectory($userId, $files);
196
		} else {
197
			return [$files];
198
		}
199
	}
200
201
202
	/**
203
	 * @param string $userId
204
	 * @param Folder $node
205
	 * @param int $level
206
	 *
207
	 * @return FilesDocument[]
208
	 * @throws InvalidPathException
209
	 * @throws NotFoundException
210
	 */
211
	public function getChunksFromDirectory(string $userId, Folder $node, $level = 0): array {
212
		$entries = [];
213
		$level++;
214
215
		$files = $node->getDirectoryListing();
216
		if (empty($files)) {
217
			$entries[] = $this->getPathFromRoot($node->getPath(), $userId, true);
218
		}
219
220
		foreach ($files as $file) {
221
			if ($file->getType() === FileInfo::TYPE_FOLDER && $level < self::CHUNK_TREE_SIZE) {
222
				/** @var $file Folder */
223
				$entries =
224
					array_merge($entries, $this->getChunksFromDirectory($userId, $file, $level));
225
			} else {
226
				$entries[] = $this->getPathFromRoot($file->getPath(), $userId, true);
227
			}
228
		}
229
230
		return $entries;
231
	}
232
233
234
	/**
235
	 * @param string $userId
236
	 * @param string $chunk
237
	 *
238
	 * @return FilesDocument[]
239
	 * @throws InvalidPathException
240
	 * @throws NotFoundException
241
	 */
242
	public function getFilesFromUser(string $userId, string $chunk): array {
243
244
		$this->initFileSystems($userId);
245
		$this->sumDocuments = 0;
246
247
		/** @var Folder $files */
248
		$files = $this->rootFolder->getUserFolder($userId)
249
								  ->get($chunk);
250
		if ($files instanceof Folder) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\Folder does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
251
			$result = $this->getFilesFromDirectory($userId, $files);
252
		} else {
253
			$result = [];
254
			try {
255
				$result[] = $this->generateFilesDocumentFromFile($userId, $files);
256
			} catch (FileIsNotIndexableException $e) {
257
				/** we do nothin' */
258
			}
259
		}
260
261
		return $result;
262
	}
263
264
265
	/**
266
	 * @param string $userId
267
	 * @param Folder $node
268
	 *
269
	 * @return FilesDocument[]
270
	 * @throws InvalidPathException
271
	 * @throws NotFoundException
272
	 * @throws Exception
273
	 */
274
	public function getFilesFromDirectory(string $userId, Folder $node): array {
275
		$documents = [];
276
277
		$this->updateRunnerAction('generateIndexFiles', true);
278
		$this->updateRunnerInfo(
279
			[
280
				'info'          => $node->getPath(),
281
				'title'         => '',
282
				'content'       => '',
283
				'documentTotal' => $this->sumDocuments
284
			]
285
		);
286
287
		try {
288
			if ($node->nodeExists('.noindex')) {
289
				return $documents;
290
			}
291
		} catch (StorageNotAvailableException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\StorageNotAvailableException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
292
			return $documents;
293
		}
294
295
		$files = $node->getDirectoryListing();
296
		foreach ($files as $file) {
297
298
			try {
299
				$documents[] = $this->generateFilesDocumentFromFile($userId, $file);
300
				$this->sumDocuments++;
301
			} catch (FileIsNotIndexableException $e) {
302
				continue;
303
			}
304
305
			if ($file->getType() === FileInfo::TYPE_FOLDER) {
306
				/** @var $file Folder */
307
				$documents =
308
					array_merge($documents, $this->getFilesFromDirectory($userId, $file));
309
			}
310
		}
311
312
		return $documents;
313
	}
314
315
316
	/**
317
	 * @param string $userId
318
	 */
319
	private function initFileSystems(string $userId) {
320
		if ($userId === '') {
321
			return;
322
		}
323
324
		if ($this->userManager->get($userId) === null) {
325
			return;
326
		}
327
328
		$this->externalFilesService->initExternalFilesForUser($userId);
329
		$this->groupFoldersService->initGroupSharesForUser($userId);
330
	}
331
332
333
	/**
334
	 * @param string $viewerId
335
	 * @param Node $file
336
	 *
337
	 * @return FilesDocument
338
	 * @throws FileIsNotIndexableException
339
	 * @throws InvalidPathException
340
	 * @throws NotFoundException
341
	 * @throws Exception
342
	 */
343
	private function generateFilesDocumentFromFile(string $viewerId, Node $file): FilesDocument {
344
345
		$this->isNodeIndexable($file);
346
347
		$source = $this->getFileSource($file);
348
		$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, (string)$file->getId());
349
		$document->setAccess(new DocumentAccess());
350
351
		if ($file->getId() === -1) {
352
			throw new FileIsNotIndexableException();
353
		}
354
355
		$ownerId = '';
356
		if ($file->getOwner() !== null) {
357
			$ownerId = $file->getOwner()
358
							->getUID();
359
		}
360
361
		if (!is_string($ownerId)) {
362
			$ownerId = '';
363
		}
364
365
		$document->setType($file->getType())
366
				 ->setOwnerId($ownerId)
367
				 ->setPath($this->getPathFromViewerId($file->getId(), $viewerId))
368
				 ->setViewerId($viewerId)
369
				 ->setMimetype($file->getMimetype());
370
		$document->setModifiedTime($file->getMTime())
371
				 ->setSource($source);
372
373
		return $document;
374
	}
375
376
377
	/**
378
	 * @param Node $file
379
	 *
380
	 * @return string
381
	 * @throws FileIsNotIndexableException
382
	 */
383 View Code Duplication
	private function getFileSource(Node $file): string {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
384
		$source = '';
385
386
		try {
387
			$this->localFilesService->getFileSource($file, $source);
388
			$this->externalFilesService->getFileSource($file, $source);
389
			$this->groupFoldersService->getFileSource($file, $source);
390
		} catch (KnownFileSourceException $e) {
391
			/** we know the source, just leave. */
392
		}
393
394
		return $source;
395
	}
396
397
398
	/**
399
	 * @param string $userId
400
	 * @param string $path
401
	 *
402
	 * @return Node
403
	 * @throws NotFoundException
404
	 */
405
	public function getFileFromPath(string $userId, string $path): Node {
406
		return $this->rootFolder->getUserFolder($userId)
407
								->get($path);
408
	}
409
410
411
	/**
412
	 * @param string $userId
413
	 * @param int $fileId
414
	 *
415
	 * @return Node
416
	 * @throws FilesNotFoundException
417
	 * @throws EmptyUserException
418
	 */
419
	public function getFileFromId(string $userId, int $fileId): Node {
420
421
		if ($userId === '') {
422
			throw new EmptyUserException();
423
		}
424
425
		$files = $this->rootFolder->getUserFolder($userId)
426
								  ->getById($fileId);
427
		if (sizeof($files) === 0) {
428
			throw new FilesNotFoundException();
429
		}
430
431
		$file = array_shift($files);
432
433
		return $file;
434
	}
435
436
437
	/**
438
	 * @param IIndex $index
439
	 *
440
	 * @return Node
441
	 * @throws EmptyUserException
442
	 * @throws FilesNotFoundException
443
	 */
444
	public function getFileFromIndex(IIndex $index): Node {
445
		// it seems the method is already call slightly earlier in the process
446
//		$this->impersonateOwner($index);
447
448
		return $this->getFileFromId($index->getOwnerId(), (int)$index->getDocumentId());
449
	}
450
451
452
	/**
453
	 * @param int $fileId
454
	 * @param string $viewerId
455
	 *
456
	 * @throws Exception
457
	 * @return string
458
	 */
459
	private function getPathFromViewerId(int $fileId, string $viewerId): string {
460
461
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
462
										->getById($fileId);
463
464
		if (sizeof($viewerFiles) === 0) {
465
			return '';
466
		}
467
468
		$file = array_shift($viewerFiles);
469
470
		// TODO: better way to do this : we remove the '/userid/files/'
471
		$path = $this->getPathFromRoot($file->getPath(), $viewerId);
472
		if (!is_string($path)) {
473
			throw new FileIsNotIndexableException();
474
		}
475
476
		$path = $this->withoutEndSlash($path);
477
478
		return $path;
479
	}
480
481
482
	/**
483
	 * @param FilesDocument $document
484
	 */
485
	public function generateDocument(FilesDocument $document) {
486
487
		try {
488
			$this->updateFilesDocument($document);
489
		} catch (Exception $e) {
490
			// TODO - update $document with a error status instead of just ignore !
491
			$document->getIndex()
492
					 ->setStatus(IIndex::INDEX_IGNORE);
493
			$this->miscService->log(
494
				'Exception while generateDocument: ' . $e->getMessage() . ' - trace: '
495
				. json_encode($e->getTrace())
496
			);
497
		}
498
	}
499
500
501
	/**
502
	 * @param IIndex $index
503
	 *
504
	 * @return FilesDocument
505
	 * @throws FileIsNotIndexableException
506
	 * @throws InvalidPathException
507
	 * @throws NotFoundException
508
	 */
509
	private function generateDocumentFromIndex(IIndex $index): FilesDocument {
510
511
		try {
512
			$file = $this->getFileFromIndex($index);
513
		} catch (Exception $e) {
514
			$index->setStatus(IIndex::INDEX_REMOVE);
515
			$document = new FilesDocument($index->getProviderId(), $index->getDocumentId());
516
			$document->setIndex($index);
517
518
			return $document;
519
		}
520
521
		$this->isNodeIndexable($file);
522
523
		$document = $this->generateFilesDocumentFromFile($index->getOwnerId(), $file);
524
		$document->setIndex($index);
525
526
		$this->updateFilesDocumentFromFile($document, $file);
527
528
		return $document;
529
	}
530
531
532
	/**
533
	 * @param IndexDocument $document
534
	 *
535
	 * @return bool
536
	 */
537
	public function isDocumentUpToDate(IndexDocument $document): bool {
538
		$index = $document->getIndex();
539
540
		if (!$this->configService->compareIndexOptions($index)) {
541
			$index->setStatus(IIndex::INDEX_CONTENT);
542
			$document->setIndex($index);
543
544
			return false;
545
		}
546
547
		if ($index->getStatus() !== IIndex::INDEX_OK) {
548
			return false;
549
		}
550
551
		if ($index->getLastIndex() >= $document->getModifiedTime()) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $index->getLastIn...ent->getModifiedTime();.
Loading history...
552
			return true;
553
		}
554
555
		return false;
556
	}
557
558
559
	/**
560
	 * @param IIndex $index
561
	 *
562
	 * @return FilesDocument
563
	 * @throws InvalidPathException
564
	 * @throws NotFoundException
565
	 * @throws FileIsNotIndexableException
566
	 */
567
	public function updateDocument(IIndex $index): FilesDocument {
568
		$this->impersonateOwner($index);
569
		$this->initFileSystems($index->getOwnerId());
570
571
		$document = $this->generateDocumentFromIndex($index);
572
		$this->updateDirectoryContentIndex($index);
573
574
		return $document;
575
	}
576
577
578
	/**
579
	 * @param FilesDocument $document
580
	 *
581
	 * @throws NotFoundException
582
	 */
583
	private function updateFilesDocument(FilesDocument $document) {
584
		$userFolder = $this->rootFolder->getUserFolder($document->getViewerId());
585
		$file = $userFolder->get($document->getPath());
586
587
		try {
588
			$this->updateFilesDocumentFromFile($document, $file);
589
		} catch (FileIsNotIndexableException $e) {
590
			$document->getIndex()
591
					 ->setStatus(IIndex::INDEX_IGNORE);
592
		}
593
	}
594
595
596
	/**
597
	 * @param FilesDocument $document
598
	 * @param Node $file
599
	 *
600
	 * @throws FileIsNotIndexableException
601
	 */
602
	private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) {
603
604
		$document->getIndex()
605
				 ->setSource($document->getSource());
606
607
		$this->updateDocumentAccess($document, $file);
608
		$this->updateContentFromFile($document, $file);
609
610
		$document->addMetaTag($document->getSource());
611
	}
612
613
614
	/**
615
	 * @param FilesDocument $document
616
	 * @param Node $file
617
	 *
618
	 * @throws FileIsNotIndexableException
619
	 */
620 View Code Duplication
	private function updateDocumentAccess(FilesDocument $document, Node $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
621
622
//		$index = $document->getIndex();
623
		// This should not be needed, let's assume we _need_ to update document access
624
//		if (!$index->isStatus(IIndex::INDEX_FULL)
625
//			&& !$index->isStatus(IIndex::INDEX_META)) {
626
//			return;
627
//		}
628
629
		$this->localFilesService->updateDocumentAccess($document, $file);
630
		$this->externalFilesService->updateDocumentAccess($document, $file);
631
		$this->groupFoldersService->updateDocumentAccess($document, $file);
632
633
		$this->updateShareNames($document, $file);
634
	}
635
636
637
	/**
638
	 * @param FilesDocument $document
639
	 * @param Node $file
640
	 */
641
	private function updateContentFromFile(FilesDocument $document, Node $file) {
642
643
		$document->setTitle($document->getPath());
644
645
		if (!$document->getIndex()
646
					  ->isStatus(IIndex::INDEX_CONTENT)
647
			|| $file->getType() !== FileInfo::TYPE_FILE) {
648
			return;
649
		}
650
651
		try {
652
			/** @var File $file */
653
			if ($file->getSize() <
654
				($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) {
655
				$this->extractContentFromFileText($document, $file);
656
				$this->extractContentFromFileOffice($document, $file);
657
				$this->extractContentFromFilePDF($document, $file);
658
				$this->extractContentFromFileZip($document, $file);
659
660
				$this->extensionService->fileIndexing($document, $file);
661
			}
662
		} catch (Throwable $t) {
0 ignored issues
show
Bug introduced by
The class Throwable does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
663
			$this->manageContentErrorException($document, $t);
664
		}
665
666
		if ($document->getContent() === null) {
667
			$document->getIndex()
668
					 ->unsetStatus(IIndex::INDEX_CONTENT);
669
		}
670
	}
671
672
673
	/**
674
	 * @param FilesDocument $document
675
	 * @param Node $file
676
	 *
677
	 * @return array
678
	 */
679
	private function updateShareNames(FilesDocument $document, Node $file): array {
680
681
		$users = [];
682
683
		$this->localFilesService->getShareUsersFromFile($file, $users);
684
		$this->externalFilesService->getShareUsers($document, $users);
685
		$this->groupFoldersService->getShareUsers($document, $users);
686
687
		$shareNames = [];
688
		foreach ($users as $username) {
689
			try {
690
				$user = $this->userManager->get($username);
691
				if ($user === null || $user->getLastLogin() === 0) {
692
					continue;
693
				}
694
695
				$path = $this->getPathFromViewerId($file->getId(), $username);
696
				$shareNames[$this->miscService->secureUsername($username)] =
697
					(!is_string($path)) ? $path = '' : $path;
698
699
			} catch (Exception $e) {
700
				$this->miscService->log(
701
					'Issue while getting information on documentId:' . $document->getId(), 0
702
				);
703
			}
704
		}
705
706
		$document->setInfoArray('share_names', $shareNames);
707
708
		return $shareNames;
709
	}
710
711
712
	/**
713
	 * @param string $mimeType
714
	 * @param string $extension
715
	 *
716
	 * @return string
717
	 */
718
	private function parseMimeType(string $mimeType, string $extension): string {
719
720
		$parsed = '';
721
		try {
722
			$this->parseMimeTypeText($mimeType, $extension, $parsed);
723
			$this->parseMimeTypePDF($mimeType, $parsed);
724
			$this->parseMimeTypeOffice($mimeType, $parsed);
725
			$this->parseMimeTypeZip($mimeType, $parsed);
726
		} catch (KnownFileMimeTypeException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
727
		}
728
729
		return $parsed;
730
	}
731
732
733
	/**
734
	 * @param string $mimeType
735
	 * @param string $extension
736
	 * @param string $parsed
737
	 *
738
	 * @throws KnownFileMimeTypeException
739
	 */
740
	private function parseMimeTypeText(string $mimeType, string $extension, string &$parsed) {
741
742
		if (substr($mimeType, 0, 5) === 'text/') {
743
			$parsed = self::MIMETYPE_TEXT;
744
			throw new KnownFileMimeTypeException();
745
		}
746
747
		$textMimes = [
748
			'application/epub+zip'
749
		];
750
751 View Code Duplication
		foreach ($textMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
752
			if (strpos($mimeType, $mime) === 0) {
753
				$parsed = self::MIMETYPE_TEXT;
754
				throw new KnownFileMimeTypeException();
755
			}
756
		}
757
758
		$this->parseMimeTypeTextByExtension($mimeType, $extension, $parsed);
759
	}
760
761
762
	/**
763
	 * @param string $mimeType
764
	 * @param string $extension
765
	 * @param string $parsed
766
	 *
767
	 * @throws KnownFileMimeTypeException
768
	 */
769
	private function parseMimeTypeTextByExtension(
770
		string $mimeType, string $extension, string &$parsed
771
	) {
772
		$textMimes = [
773
			'application/octet-stream'
774
		];
775
		$textExtension = [
776
		];
777
778
		foreach ($textMimes as $mime) {
779
			if (strpos($mimeType, $mime) === 0
780
				&& in_array(
781
					strtolower($extension), $textExtension
782
				)) {
783
				$parsed = self::MIMETYPE_TEXT;
784
				throw new KnownFileMimeTypeException();
785
			}
786
		}
787
	}
788
789
790
	/**
791
	 * @param string $mimeType
792
	 * @param string $parsed
793
	 *
794
	 * @throws KnownFileMimeTypeException
795
	 */
796
	private function parseMimeTypePDF(string $mimeType, string &$parsed) {
797
798
		if ($mimeType === 'application/pdf') {
799
			$parsed = self::MIMETYPE_PDF;
800
			throw new KnownFileMimeTypeException();
801
		}
802
	}
803
804
805
	/**
806
	 * @param string $mimeType
807
	 * @param string $parsed
808
	 *
809
	 * @throws KnownFileMimeTypeException
810
	 */
811
	private function parseMimeTypeZip(string $mimeType, string &$parsed) {
812
		if ($mimeType === 'application/zip') {
813
			$parsed = self::MIMETYPE_ZIP;
814
			throw new KnownFileMimeTypeException();
815
		}
816
	}
817
818
819
	/**
820
	 * @param string $mimeType
821
	 * @param string $parsed
822
	 *
823
	 * @throws KnownFileMimeTypeException
824
	 */
825
	private function parseMimeTypeOffice(string $mimeType, string &$parsed) {
826
827
		$officeMimes = [
828
			'application/msword',
829
			'application/vnd.oasis.opendocument',
830
			'application/vnd.sun.xml',
831
			'application/vnd.openxmlformats-officedocument',
832
			'application/vnd.ms-word',
833
			'application/vnd.ms-powerpoint',
834
			'application/vnd.ms-excel'
835
		];
836
837 View Code Duplication
		foreach ($officeMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
838
			if (strpos($mimeType, $mime) === 0) {
839
				$parsed = self::MIMETYPE_OFFICE;
840
				throw new KnownFileMimeTypeException();
841
			}
842
		}
843
	}
844
845
846
	/**
847
	 * @param FilesDocument $document
848
	 * @param File $file
849
	 *
850
	 * @throws NotPermittedException
851
	 */
852
	private function extractContentFromFileText(FilesDocument $document, File $file) {
853
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
854
			!== self::MIMETYPE_TEXT) {
855
			return;
856
		}
857
858
		if (!$this->isSourceIndexable($document)) {
859
			return;
860
		}
861
862
		$document->setContent(
863
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
864
		);
865
	}
866
867
868
	/**
869
	 * @param FilesDocument $document
870
	 * @param File $file
871
	 *
872
	 * @throws NotPermittedException
873
	 */
874 View Code Duplication
	private function extractContentFromFilePDF(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
875
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
876
			!== self::MIMETYPE_PDF) {
877
			return;
878
		}
879
880
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_PDF);
881
		if (!$this->isSourceIndexable($document)) {
882
			return;
883
		}
884
885
		if ($this->configService->getAppValue(ConfigService::FILES_PDF) !== '1') {
886
			$document->setContent('');
887
888
			return;
889
		}
890
891
		$document->setContent(
892
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
893
		);
894
	}
895
896
897
	/**
898
	 * @param FilesDocument $document
899
	 * @param File $file
900
	 *
901
	 * @throws NotPermittedException
902
	 */
903 View Code Duplication
	private function extractContentFromFileZip(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
904
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
905
			!== self::MIMETYPE_ZIP) {
906
			return;
907
		}
908
909
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_ZIP);
910
		if (!$this->isSourceIndexable($document)) {
911
			return;
912
		}
913
914
		if ($this->configService->getAppValue(ConfigService::FILES_ZIP) !== '1') {
915
			$document->setContent('');
916
917
			return;
918
		}
919
920
		$document->setContent(
921
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
922
		);
923
	}
924
925
926
	/**
927
	 * @param FilesDocument $document
928
	 * @param File $file
929
	 *
930
	 * @throws NotPermittedException
931
	 */
932 View Code Duplication
	private function extractContentFromFileOffice(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
933
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
934
			!== self::MIMETYPE_OFFICE) {
935
			return;
936
		}
937
938
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OFFICE);
939
		if (!$this->isSourceIndexable($document)) {
940
			return;
941
		}
942
943
		if ($this->configService->getAppValue(ConfigService::FILES_OFFICE) !== '1') {
944
			$document->setContent('');
945
946
			return;
947
		}
948
949
		$document->setContent(
950
			base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64
951
		);
952
	}
953
954
955
	/**
956
	 * @param FilesDocument $document
957
	 *
958
	 * @return bool
959
	 */
960
	private function isSourceIndexable(FilesDocument $document): bool {
961
		$this->configService->setDocumentIndexOption($document, $document->getSource());
962
		if ($this->configService->getAppValue($document->getSource()) !== '1') {
963
			$document->setContent('');
964
965
			return false;
966
		}
967
968
		return true;
969
	}
970
971
972
	/**
973
	 * @param IIndex $index
974
	 */
975
	private function impersonateOwner(IIndex $index) {
976
		if ($index->getOwnerId() !== '') {
977
			return;
978
		}
979
980
		$this->groupFoldersService->impersonateOwner($index);
981
		$this->externalFilesService->impersonateOwner($index);
982
	}
983
984
985
	/**
986
	 * @param $action
987
	 * @param bool $force
988
	 *
989
	 * @throws Exception
990
	 */
991
	private function updateRunnerAction(string $action, bool $force = false) {
992
		if ($this->runner === null) {
993
			return;
994
		}
995
996
		$this->runner->updateAction($action, $force);
997
	}
998
999
1000
	/**
1001
	 * @param array $data
1002
	 */
1003
	private function updateRunnerInfo($data) {
1004
		if ($this->runner === null) {
1005
			return;
1006
		}
1007
1008
		$this->runner->setInfoArray($data);
1009
	}
1010
1011
	/**
1012
	 * @param IndexDocument $document
1013
	 * @param Throwable $t
1014
	 */
1015
	private function manageContentErrorException(IndexDocument $document, Throwable $t) {
1016
		$document->getIndex()
1017
				 ->addError(
1018
					 'Error while getting file content', $t->getMessage(), IIndex::ERROR_SEV_3
1019
				 );
1020
		$this->updateNewIndexError(
1021
			$document->getIndex(), 'Error while getting file content', $t->getMessage(),
1022
			IIndex::ERROR_SEV_3
1023
		);
1024
1025
		$trace = $t->getTrace();
1026
		if (is_array($trace)) {
1027
			$trace = json_encode($trace);
1028
		}
1029
		if (is_string($trace)) {
1030
			$this->miscService->log($trace, 0);
1031
		}
1032
	}
1033
1034
1035
	/**
1036
	 * @param IIndex $index
1037
	 */
1038
	private function updateDirectoryContentIndex(IIndex $index) {
1039
		if (!$index->isStatus(IIndex::INDEX_META)) {
1040
			return;
1041
		}
1042
1043
		try {
1044
			$file = $this->getFileFromIndex($index);
1045
			if ($file->getType() === File::TYPE_FOLDER) {
1046
				/** @var Folder $file */
1047
				$this->updateDirectoryMeta($file);
1048
			}
1049
		} catch (Exception $e) {
1050
		}
1051
	}
1052
1053
1054
	/**
1055
	 * @param Folder $node
1056
	 */
1057
	private function updateDirectoryMeta(Folder $node) {
1058
		try {
1059
			$files = $node->getDirectoryListing();
1060
		} catch (NotFoundException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\NotFoundException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
1061
			return;
1062
		}
1063
1064
		foreach ($files as $file) {
1065
			try {
1066
				$this->fullTextSearchManager->updateIndexStatus(
1067
					'files', (string)$file->getId(), IIndex::INDEX_META
1068
				);
1069
			} catch (InvalidPathException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
Bug introduced by
The class OCP\Files\InvalidPathException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
1070
			} catch (NotFoundException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
Bug introduced by
The class OCP\Files\NotFoundException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
1071
			}
1072
		}
1073
	}
1074
1075
1076
	/**
1077
	 * @param IIndex $index
1078
	 * @param string $message
1079
	 * @param string $exception
1080
	 * @param int $sev
1081
	 */
1082
	private function updateNewIndexError(IIndex $index, string $message, string $exception, int $sev
1083
	) {
1084
		if ($this->runner === null) {
1085
			return;
1086
		}
1087
1088
		$this->runner->newIndexError($index, $message, $exception, $sev);
1089
	}
1090
1091
1092
	/**
1093
	 * @param Node $file
1094
	 *
1095
	 * @throws FileIsNotIndexableException
1096
	 */
1097
	private function isNodeIndexable(Node $file) {
1098
1099
		if ($file->getType() === File::TYPE_FOLDER) {
1100
			/** @var Folder $file */
1101
			if ($file->nodeExists('.noindex')) {
1102
				throw new FileIsNotIndexableException();
1103
			}
1104
		}
1105
1106
		$parent = $file->getParent();
1107
		$parentPath = $this->withoutBeginSlash($parent->getPath());
1108
		$path = substr($parent->getPath(), 8 + strpos($parentPath, '/'));
1109
		if (is_string($path)) {
1110
			$this->isNodeIndexable($file->getParent());
1111
		}
1112
	}
1113
1114
1115
	/**
1116
	 * @param string $path
1117
	 * @param string $userId
1118
	 *
1119
	 * @param bool $entrySlash
1120
	 *
1121
	 * @return string
1122
	 */
1123
	private function getPathFromRoot(string $path, string $userId, bool $entrySlash = false) {
1124
		// TODO: better way to do this : we remove the '/userid/files/'
1125
		// TODO: do we need userId, or can we crop the path like in isNodeIndexable()
1126
		$path = substr($path, 8 + strlen($userId));
1127
		if (!is_string($path)) {
1128
			$path = '';
1129
		}
1130
1131
		return (($entrySlash) ? '/' : '') . $path;
1132
	}
1133
1134
}
1135
1136