Completed
Push — master ( a77847...eae57b )
by Maxence
13s queued 11s
created

FilesService::extractContentFromFileOffice()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 21

Duplication

Lines 21
Ratio 100 %

Importance

Changes 0
Metric Value
dl 21
loc 21
rs 9.584
c 0
b 0
f 0
cc 4
nc 4
nop 2
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * Files_FullTextSearch - Index the content of your files
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCA\Files_FullTextSearch\Service;
32
33
34
use daita\MySmallPhpTools\Traits\TPathTools;
35
use Exception;
36
use OC\FullTextSearch\Model\DocumentAccess;
37
use OCA\Files_FullTextSearch\Exceptions\EmptyUserException;
38
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException;
39
use OCA\Files_FullTextSearch\Exceptions\FilesNotFoundException;
40
use OCA\Files_FullTextSearch\Exceptions\KnownFileMimeTypeException;
41
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException;
42
use OCA\Files_FullTextSearch\Model\FilesDocument;
43
use OCA\Files_FullTextSearch\Provider\FilesProvider;
44
use OCP\App\IAppManager;
45
use OCP\AppFramework\IAppContainer;
46
use OCP\Comments\ICommentsManager;
47
use OCP\Files\File;
48
use OCP\Files\FileInfo;
49
use OCP\Files\Folder;
50
use OCP\Files\InvalidPathException;
51
use OCP\Files\IRootFolder;
52
use OCP\Files\Node;
53
use OCP\Files\NotFoundException;
54
use OCP\Files\NotPermittedException;
55
use OCP\Files\StorageNotAvailableException;
56
use OCP\FullTextSearch\IFullTextSearchManager;
57
use OCP\FullTextSearch\Model\IIndex;
58
use OCP\FullTextSearch\Model\IIndexDocument;
59
use OCP\FullTextSearch\Model\IIndexOptions;
60
use OCP\FullTextSearch\Model\IRunner;
61
use OCP\IUserManager;
62
use OCP\Share\IManager as IShareManager;
63
use Throwable;
64
65
66
/**
67
 * Class FilesService
68
 *
69
 * @package OCA\Files_FullTextSearch\Service
70
 */
71
class FilesService {
72
73
74
	use TPathTools;
75
76
77
	const MIMETYPE_TEXT = 'files_text';
78
	const MIMETYPE_PDF = 'files_pdf';
79
	const MIMETYPE_OFFICE = 'files_office';
80
	const MIMETYPE_ZIP = 'files_zip';
81
	const MIMETYPE_IMAGE = 'files_image';
82
	const MIMETYPE_AUDIO = 'files_audio';
83
84
	const CHUNK_TREE_SIZE = 2;
85
86
87
	/** @var IAppContainer */
88
	private $container;
89
90
	/** @var IRootFolder */
91
	private $rootFolder;
92
93
	/** @var IUserManager */
94
	private $userManager;
95
96
	/** @var IAppManager */
97
	private $appManager;
98
99
	/** @var IShareManager */
100
	private $shareManager;
101
102
	/** @var ICommentsManager */
103
	private $commentsManager;
104
105
	/** @var ConfigService */
106
	private $configService;
107
108
	/** @var LocalFilesService */
109
	private $localFilesService;
110
111
	/** @var ExternalFilesService */
112
	private $externalFilesService;
113
114
	/** @var GroupFoldersService */
115
	private $groupFoldersService;
116
117
	/** @var ExtensionService */
118
	private $extensionService;
119
120
	/** @var IFullTextSearchManager */
121
	private $fullTextSearchManager;
122
123
	/** @var MiscService */
124
	private $miscService;
125
126
127
	/** @var IRunner */
128
	private $runner;
129
130
	/** @var int */
131
	private $sumDocuments;
132
133
134
	/**
135
	 * FilesService constructor.
136
	 *
137
	 * @param IAppContainer $container
138
	 * @param IRootFolder $rootFolder
139
	 * @param IAppManager $appManager
140
	 * @param IUserManager $userManager
141
	 * @param IShareManager $shareManager
142
	 * @param ICommentsManager $commentsManager
143
	 * @param ConfigService $configService
144
	 * @param LocalFilesService $localFilesService
145
	 * @param ExternalFilesService $externalFilesService
146
	 * @param GroupFoldersService $groupFoldersService
147
	 * @param ExtensionService $extensionService
148
	 * @param IFullTextSearchManager $fullTextSearchManager
149
	 * @param MiscService $miscService
150
	 *
151
	 * @internal param IProviderFactory $factory
152
	 */
153
	public function __construct(
154
		IAppContainer $container, IRootFolder $rootFolder, IAppManager $appManager,
155
		IUserManager $userManager, IShareManager $shareManager, ICommentsManager $commentsManager,
156
		ConfigService $configService,
157
		LocalFilesService $localFilesService, ExternalFilesService $externalFilesService,
158
		GroupFoldersService $groupFoldersService, ExtensionService $extensionService,
159
		IFullTextSearchManager $fullTextSearchManager, MiscService $miscService
160
	) {
161
		$this->container = $container;
162
		$this->rootFolder = $rootFolder;
163
		$this->appManager = $appManager;
164
		$this->userManager = $userManager;
165
		$this->shareManager = $shareManager;
166
		$this->commentsManager = $commentsManager;
167
168
		$this->configService = $configService;
169
		$this->localFilesService = $localFilesService;
170
		$this->externalFilesService = $externalFilesService;
171
		$this->groupFoldersService = $groupFoldersService;
172
		$this->extensionService = $extensionService;
173
		$this->fullTextSearchManager = $fullTextSearchManager;
174
175
		$this->miscService = $miscService;
176
	}
177
178
179
	/**
180
	 * @param IRunner $runner
181
	 */
182
	public function setRunner(IRunner $runner) {
183
		$this->runner = $runner;
184
	}
185
186
187
	/**
188
	 * @param string $userId
189
	 * @param IIndexOptions $indexOptions
190
	 *
191
	 * @return FilesDocument[]
192
	 * @throws NotFoundException
193
	 * @throws InvalidPathException
194
	 */
195
	public function getChunksFromUser(string $userId, IIndexOptions $indexOptions): array {
196
		$this->initFileSystems($userId);
197
198
		/** @var Folder $files */
199
		$files = $this->rootFolder->getUserFolder($userId)
200
								  ->get($indexOptions->getOption('path', '/'));
201
		if ($files instanceof Folder) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\Folder does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
202
			return $this->getChunksFromDirectory($userId, $files);
203
		} else {
204
			return [$files];
205
		}
206
	}
207
208
209
	/**
210
	 * @param string $userId
211
	 * @param Folder $node
212
	 * @param int $level
213
	 *
214
	 * @return FilesDocument[]
215
	 * @throws InvalidPathException
216
	 * @throws NotFoundException
217
	 */
218
	private function getChunksFromDirectory(string $userId, Folder $node, $level = 0): array {
219
		$entries = [];
220
		$level++;
221
222
		$files = $node->getDirectoryListing();
223
		if (empty($files)) {
224
			$entries[] = $this->getPathFromRoot($node->getPath(), $userId, true);
225
		}
226
227
		foreach ($files as $file) {
228
			if ($file->getType() === FileInfo::TYPE_FOLDER && $level < self::CHUNK_TREE_SIZE) {
229
				/** @var $file Folder */
230
				$entries =
231
					array_merge($entries, $this->getChunksFromDirectory($userId, $file, $level));
232
			} else {
233
				$entries[] = $this->getPathFromRoot($file->getPath(), $userId, true);
234
			}
235
		}
236
237
		return $entries;
238
	}
239
240
241
	/**
242
	 * @param string $userId
243
	 * @param string $chunk
244
	 *
245
	 * @return FilesDocument[]
246
	 * @throws InvalidPathException
247
	 * @throws NotFoundException
248
	 */
249
	public function getFilesFromUser(string $userId, string $chunk): array {
250
251
		$this->initFileSystems($userId);
252
		$this->sumDocuments = 0;
253
254
		/** @var Folder $files */
255
		$files = $this->rootFolder->getUserFolder($userId)
256
								  ->get($chunk);
257
258
		$result = [];
259
		if ($files instanceof Folder) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\Folder does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
260
			$result = $this->generateFilesDocumentFromParent($userId, $files);
261
			$result = array_merge($result, $this->getFilesFromDirectory($userId, $files));
262
		} else {
263
			try {
264
				$result[] = $this->generateFilesDocumentFromFile($userId, $files);
265
			} catch (FileIsNotIndexableException $e) {
266
				/** we do nothin' */
267
			}
268
		}
269
270
		return $result;
271
	}
272
273
274
	/**
275
	 * @param string $userId
276
	 * @param Folder $node
277
	 *
278
	 * @return FilesDocument[]
279
	 * @throws InvalidPathException
280
	 * @throws NotFoundException
281
	 * @throws Exception
282
	 */
283
	public function getFilesFromDirectory(string $userId, Folder $node): array {
284
		$documents = [];
285
286
		$this->updateRunnerAction('generateIndexFiles', true);
287
		$this->updateRunnerInfo(
288
			[
289
				'info'          => $node->getPath(),
290
				'title'         => '',
291
				'content'       => '',
292
				'documentTotal' => $this->sumDocuments
293
			]
294
		);
295
296
		try {
297
			if ($node->nodeExists('.noindex')) {
298
				return $documents;
299
			}
300
		} catch (StorageNotAvailableException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\StorageNotAvailableException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
301
			return $documents;
302
		}
303
304 View Code Duplication
		if ($this->configService->getAppValue(ConfigService::FILES_EXTERNAL) === '2'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
305
			&& $node->getMountPoint()
306
					->getMountType() === 'external') {
307
			return $documents;
308
		}
309
310
		$files = $node->getDirectoryListing();
311
		foreach ($files as $file) {
312
313
			try {
314
				$documents[] = $this->generateFilesDocumentFromFile($userId, $file);
315
				$this->sumDocuments++;
316
			} catch (FileIsNotIndexableException $e) {
317
				continue;
318
			}
319
320
			if ($file->getType() === FileInfo::TYPE_FOLDER) {
321
				/** @var $file Folder */
322
				$documents =
323
					array_merge($documents, $this->getFilesFromDirectory($userId, $file));
324
			}
325
		}
326
327
		return $documents;
328
	}
329
330
331
	/**
332
	 * @param string $userId
333
	 */
334
	private function initFileSystems(string $userId) {
335
		if ($userId === '') {
336
			return;
337
		}
338
339
		if ($this->userManager->get($userId) === null) {
340
			return;
341
		}
342
343
		$this->externalFilesService->initExternalFilesForUser($userId);
344
		$this->groupFoldersService->initGroupSharesForUser($userId);
345
	}
346
347
348
	/**
349
	 * @param string $userId
350
	 * @param Folder $parent
351
	 *
352
	 * @return array
353
	 */
354
	private function generateFilesDocumentFromParent(string $userId, Folder $parent): array {
355
		$documents = [];
356
		try {
357
			for ($i = 0; $i < self::CHUNK_TREE_SIZE; $i++) {
358
				$parent = $parent->getParent();
359
				$documents[] = $this->generateFilesDocumentFromFile($userId, $parent);
360
			}
361
		} catch (Exception $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
362
		}
363
364
		return $documents;
365
	}
366
367
368
	/**
369
	 * @param string $viewerId
370
	 * @param Node $file
371
	 *
372
	 * @return FilesDocument
373
	 * @throws FileIsNotIndexableException
374
	 * @throws InvalidPathException
375
	 * @throws NotFoundException
376
	 * @throws Exception
377
	 */
378
	private function generateFilesDocumentFromFile(string $viewerId, Node $file): FilesDocument {
379
380
		$this->isNodeIndexable($file);
381
382
		$source = $this->getFileSource($file);
383
		$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, (string)$file->getId());
384
		$document->setAccess(new DocumentAccess());
385
386
		if ($file->getId() === -1) {
387
			throw new FileIsNotIndexableException();
388
		}
389
390
		$ownerId = '';
391
		if ($file->getOwner() !== null) {
392
			$ownerId = $file->getOwner()
393
							->getUID();
394
		}
395
396
		if (!is_string($ownerId)) {
397
			$ownerId = '';
398
		}
399
400
		$document->setType($file->getType())
401
				 ->setOwnerId($ownerId)
402
				 ->setPath($this->getPathFromViewerId($file->getId(), $viewerId))
403
				 ->setViewerId($viewerId)
404
				 ->setMimetype($file->getMimetype());
405
		$document->setModifiedTime($file->getMTime())
406
				 ->setSource($source);
407
408
		return $document;
409
	}
410
411
412
	/**
413
	 * @param Node $file
414
	 *
415
	 * @return string
416
	 * @throws FileIsNotIndexableException
417
	 */
418 View Code Duplication
	private function getFileSource(Node $file): string {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
419
		$source = '';
420
421
		try {
422
			$this->localFilesService->getFileSource($file, $source);
423
			$this->externalFilesService->getFileSource($file, $source);
424
			$this->groupFoldersService->getFileSource($file, $source);
425
		} catch (KnownFileSourceException $e) {
426
			/** we know the source, just leave. */
427
		}
428
429
		return $source;
430
	}
431
432
433
	/**
434
	 * @param string $userId
435
	 * @param string $path
436
	 *
437
	 * @return Node
438
	 * @throws NotFoundException
439
	 */
440
	public function getFileFromPath(string $userId, string $path): Node {
441
		return $this->rootFolder->getUserFolder($userId)
442
								->get($path);
443
	}
444
445
446
	/**
447
	 * @param string $userId
448
	 * @param int $fileId
449
	 *
450
	 * @return Node
451
	 * @throws FilesNotFoundException
452
	 * @throws EmptyUserException
453
	 */
454
	public function getFileFromId(string $userId, int $fileId): Node {
455
		if ($userId === '') {
456
			throw new EmptyUserException();
457
		}
458
459
		$files = $this->rootFolder->getUserFolder($userId)
460
								  ->getById($fileId);
461
		if (sizeof($files) === 0) {
462
			throw new FilesNotFoundException();
463
		}
464
465
		$file = array_shift($files);
466
467
		return $file;
468
	}
469
470
471
	/**
472
	 * @param IIndex $index
473
	 *
474
	 * @return Node
475
	 * @throws EmptyUserException
476
	 * @throws FilesNotFoundException
477
	 */
478
	public function getFileFromIndex(IIndex $index): Node {
479
		// it seems the method is already call slightly earlier in the process
480
//		$this->impersonateOwner($index);
481
482
		return $this->getFileFromId($index->getOwnerId(), (int)$index->getDocumentId());
483
	}
484
485
486
	/**
487
	 * @param int $fileId
488
	 * @param string $viewerId
489
	 *
490
	 * @return string
491
	 * @throws Exception
492
	 */
493
	private function getPathFromViewerId(int $fileId, string $viewerId): string {
494
495
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
496
										->getById($fileId);
497
498
		if (sizeof($viewerFiles) === 0) {
499
			return '';
500
		}
501
502
		$file = array_shift($viewerFiles);
503
504
		// TODO: better way to do this : we remove the '/userid/files/'
505
		$path = $this->getPathFromRoot($file->getPath(), $viewerId);
506
		if (!is_string($path)) {
507
			throw new FileIsNotIndexableException();
508
		}
509
510
		$path = $this->withoutEndSlash($path);
511
512
		return $path;
513
	}
514
515
516
	/**
517
	 * @param FilesDocument $document
518
	 */
519
	public function generateDocument(FilesDocument $document) {
520
521
		try {
522
			$this->updateFilesDocument($document);
523
		} catch (Exception $e) {
524
			// TODO - update $document with a error status instead of just ignore !
525
			$document->getIndex()
526
					 ->setStatus(IIndex::INDEX_IGNORE);
527
			$this->miscService->log(
528
				'Exception while generateDocument: ' . $e->getMessage() . ' - trace: '
529
				. json_encode($e->getTrace())
530
			);
531
		}
532
	}
533
534
535
	/**
536
	 * @param IIndex $index
537
	 *
538
	 * @return FilesDocument
539
	 * @throws FileIsNotIndexableException
540
	 * @throws InvalidPathException
541
	 * @throws NotFoundException
542
	 */
543
	private function generateDocumentFromIndex(IIndex $index): FilesDocument {
544
545
		try {
546
			$file = $this->getFileFromIndex($index);
547
548 View Code Duplication
			if ($file->getMountPoint()
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
549
					 ->getMountType() === 'external'
550
				&& $this->configService->getAppValue(ConfigService::FILES_EXTERNAL) === '2') {
551
				throw new Exception();
552
			}
553
		} catch (Exception $e) {
554
			$index->setStatus(IIndex::INDEX_REMOVE);
555
			$document = new FilesDocument($index->getProviderId(), $index->getDocumentId());
556
			$document->setIndex($index);
557
558
			return $document;
559
		}
560
561
		$this->isNodeIndexable($file);
562
563
		$document = $this->generateFilesDocumentFromFile($index->getOwnerId(), $file);
564
		$document->setIndex($index);
565
566
		$this->updateFilesDocumentFromFile($document, $file);
567
568
		return $document;
569
	}
570
571
572
	/**
573
	 * @param IIndexDocument $document
574
	 *
575
	 * @return bool
576
	 */
577
	public function isDocumentUpToDate(IIndexDocument $document): bool {
578
		$this->extensionService->indexComparing($document);
579
580
		$index = $document->getIndex();
581
582
583
		if (!$this->configService->compareIndexOptions($index)) {
584
			$index->setStatus(IIndex::INDEX_CONTENT);
585
			$document->setIndex($index);
586
587
			return false;
588
		}
589
590
		if ($index->getStatus() !== IIndex::INDEX_OK) {
591
			return false;
592
		}
593
594
		if ($index->getLastIndex() >= $document->getModifiedTime()) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $index->getLastIn...ent->getModifiedTime();.
Loading history...
595
			return true;
596
		}
597
598
		return false;
599
	}
600
601
602
	/**
603
	 * @param IIndex $index
604
	 *
605
	 * @return FilesDocument
606
	 * @throws InvalidPathException
607
	 * @throws NotFoundException
608
	 * @throws FileIsNotIndexableException
609
	 */
610
	public function updateDocument(IIndex $index): FilesDocument {
611
		$this->impersonateOwner($index);
612
		$this->initFileSystems($index->getOwnerId());
613
614
		$document = $this->generateDocumentFromIndex($index);
615
		$this->updateDirectoryContentIndex($index);
616
617
		return $document;
618
	}
619
620
621
	/**
622
	 * @param FilesDocument $document
623
	 *
624
	 * @throws NotFoundException
625
	 */
626
	private function updateFilesDocument(FilesDocument $document) {
627
		$userFolder = $this->rootFolder->getUserFolder($document->getViewerId());
628
		$file = $userFolder->get($document->getPath());
629
630
		try {
631
			$this->updateFilesDocumentFromFile($document, $file);
632
		} catch (FileIsNotIndexableException $e) {
633
			$document->getIndex()
634
					 ->setStatus(IIndex::INDEX_IGNORE);
635
		}
636
	}
637
638
639
	/**
640
	 * @param FilesDocument $document
641
	 * @param Node $file
642
	 *
643
	 * @throws FileIsNotIndexableException
644
	 */
645
	private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) {
646
647
		$document->getIndex()
648
				 ->setSource($document->getSource());
649
650
		$this->updateDocumentAccess($document, $file);
651
		$this->updateContentFromFile($document, $file);
652
653
		$document->addMetaTag($document->getSource());
654
	}
655
656
657
	/**
658
	 * @param FilesDocument $document
659
	 * @param Node $file
660
	 *
661
	 * @throws FileIsNotIndexableException
662
	 */
663 View Code Duplication
	private function updateDocumentAccess(FilesDocument $document, Node $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
664
665
//		$index = $document->getIndex();
666
		// This should not be needed, let's assume we _need_ to update document access
667
//		if (!$index->isStatus(IIndex::INDEX_FULL)
668
//			&& !$index->isStatus(IIndex::INDEX_META)) {
669
//			return;
670
//		}
671
672
		$this->localFilesService->updateDocumentAccess($document, $file);
673
		$this->externalFilesService->updateDocumentAccess($document, $file);
674
		$this->groupFoldersService->updateDocumentAccess($document, $file);
675
676
		$this->updateShareNames($document, $file);
677
	}
678
679
680
	/**
681
	 * @param FilesDocument $document
682
	 * @param Node $file
683
	 */
684
	private function updateContentFromFile(FilesDocument $document, Node $file) {
685
686
		$document->setTitle($document->getPath());
687
688
		if ((!$document->getIndex()
689
					   ->isStatus(IIndex::INDEX_CONTENT)
690
			 && !$document->getIndex()
691
						  ->isStatus(IIndex::INDEX_META)
692
			)
693
			|| $file->getType() !== FileInfo::TYPE_FILE) {
694
			return;
695
		}
696
697
		try {
698
			/** @var File $file */
699
			if ($file->getSize() <
700
				($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) {
701
				$this->extractContentFromFileText($document, $file);
702
				$this->extractContentFromFileOffice($document, $file);
703
				$this->extractContentFromFilePDF($document, $file);
704
				$this->extractContentFromFileZip($document, $file);
705
706
				$this->extensionService->fileIndexing($document, $file);
707
			}
708
		} catch (Throwable $t) {
0 ignored issues
show
Bug introduced by
The class Throwable does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
709
			$this->manageContentErrorException($document, $t);
710
		}
711
712
		if ($document->getContent() === null) {
713
			$document->getIndex()
714
					 ->unsetStatus(IIndex::INDEX_CONTENT);
715
		}
716
717
		$this->updateCommentsFromFile($document);
718
	}
719
720
721
	/**
722
	 * @param FilesDocument $document
723
	 */
724
	private function updateCommentsFromFile(FilesDocument $document) {
725
		$comments = $this->commentsManager->getForObject('files', $document->getId());
726
727
		$part = [];
728
		foreach ($comments as $comment) {
729
			$part[] = '<' . $comment->getActorId() . '> ' . $comment->getMessage();
730
		}
731
732
		$document->addPart('comments', implode(" \n ", $part));
733
	}
734
735
736
	/**
737
	 * @param FilesDocument $document
738
	 * @param Node $file
739
	 *
740
	 * @return array
741
	 */
742
	private function updateShareNames(FilesDocument $document, Node $file): array {
743
744
		$users = [];
745
746
		$this->localFilesService->getShareUsersFromFile($file, $users);
747
		$this->externalFilesService->getShareUsers($document, $users);
748
		$this->groupFoldersService->getShareUsers($document, $users);
749
750
		$shareNames = [];
751
		foreach ($users as $username) {
752
			$username = (string)$username;
753
754
			try {
755
				$user = $this->userManager->get($username);
756
				if ($user === null || $user->getLastLogin() === 0) {
757
					continue;
758
				}
759
760
				$path = $this->getPathFromViewerId($file->getId(), $username);
761
				$shareNames[$this->miscService->secureUsername($username)] =
762
					(!is_string($path)) ? $path = '' : $path;
763
764
			} catch (Exception $e) {
765
				$this->miscService->log(
766
					'Issue while getting information on documentId:' . $document->getId(), 0
767
				);
768
			}
769
		}
770
771
		$document->setInfoArray('share_names', $shareNames);
772
773
		return $shareNames;
774
	}
775
776
777
	/**
778
	 * @param string $mimeType
779
	 * @param string $extension
780
	 *
781
	 * @return string
782
	 */
783
	private function parseMimeType(string $mimeType, string $extension): string {
784
785
		$parsed = '';
786
		try {
787
			$this->parseMimeTypeText($mimeType, $extension, $parsed);
788
			$this->parseMimeTypePDF($mimeType, $parsed);
789
			$this->parseMimeTypeOffice($mimeType, $parsed);
790
			$this->parseMimeTypeZip($mimeType, $parsed);
791
		} catch (KnownFileMimeTypeException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
792
		}
793
794
		return $parsed;
795
	}
796
797
798
	/**
799
	 * @param string $mimeType
800
	 * @param string $extension
801
	 * @param string $parsed
802
	 *
803
	 * @throws KnownFileMimeTypeException
804
	 */
805
	private function parseMimeTypeText(string $mimeType, string $extension, string &$parsed) {
806
807
		if (substr($mimeType, 0, 5) === 'text/') {
808
			$parsed = self::MIMETYPE_TEXT;
809
			throw new KnownFileMimeTypeException();
810
		}
811
812
		$textMimes = [
813
			'application/epub+zip'
814
		];
815
816 View Code Duplication
		foreach ($textMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
817
			if (strpos($mimeType, $mime) === 0) {
818
				$parsed = self::MIMETYPE_TEXT;
819
				throw new KnownFileMimeTypeException();
820
			}
821
		}
822
823
		$this->parseMimeTypeTextByExtension($mimeType, $extension, $parsed);
824
	}
825
826
827
	/**
828
	 * @param string $mimeType
829
	 * @param string $extension
830
	 * @param string $parsed
831
	 *
832
	 * @throws KnownFileMimeTypeException
833
	 */
834
	private function parseMimeTypeTextByExtension(
835
		string $mimeType, string $extension, string &$parsed
836
	) {
837
		$textMimes = [
838
			'application/octet-stream'
839
		];
840
		$textExtension = [
841
		];
842
843
		foreach ($textMimes as $mime) {
844
			if (strpos($mimeType, $mime) === 0
845
				&& in_array(
846
					strtolower($extension), $textExtension
847
				)) {
848
				$parsed = self::MIMETYPE_TEXT;
849
				throw new KnownFileMimeTypeException();
850
			}
851
		}
852
	}
853
854
855
	/**
856
	 * @param string $mimeType
857
	 * @param string $parsed
858
	 *
859
	 * @throws KnownFileMimeTypeException
860
	 */
861
	private function parseMimeTypePDF(string $mimeType, string &$parsed) {
862
863
		if ($mimeType === 'application/pdf') {
864
			$parsed = self::MIMETYPE_PDF;
865
			throw new KnownFileMimeTypeException();
866
		}
867
	}
868
869
870
	/**
871
	 * @param string $mimeType
872
	 * @param string $parsed
873
	 *
874
	 * @throws KnownFileMimeTypeException
875
	 */
876
	private function parseMimeTypeZip(string $mimeType, string &$parsed) {
877
		if ($mimeType === 'application/zip') {
878
			$parsed = self::MIMETYPE_ZIP;
879
			throw new KnownFileMimeTypeException();
880
		}
881
	}
882
883
884
	/**
885
	 * @param string $mimeType
886
	 * @param string $parsed
887
	 *
888
	 * @throws KnownFileMimeTypeException
889
	 */
890
	private function parseMimeTypeOffice(string $mimeType, string &$parsed) {
891
892
		$officeMimes = [
893
			'application/msword',
894
			'application/vnd.oasis.opendocument',
895
			'application/vnd.sun.xml',
896
			'application/vnd.openxmlformats-officedocument',
897
			'application/vnd.ms-word',
898
			'application/vnd.ms-powerpoint',
899
			'application/vnd.ms-excel'
900
		];
901
902 View Code Duplication
		foreach ($officeMimes as $mime) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
903
			if (strpos($mimeType, $mime) === 0) {
904
				$parsed = self::MIMETYPE_OFFICE;
905
				throw new KnownFileMimeTypeException();
906
			}
907
		}
908
	}
909
910
911
	/**
912
	 * @param FilesDocument $document
913
	 * @param File $file
914
	 *
915
	 * @throws NotPermittedException
916
	 */
917
	private function extractContentFromFileText(FilesDocument $document, File $file) {
918
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
919
			!== self::MIMETYPE_TEXT) {
920
			return;
921
		}
922
923
		if (!$this->isSourceIndexable($document)) {
924
			return;
925
		}
926
927
		$document->setContent(
928
			base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64
929
		);
930
	}
931
932
933
	/**
934
	 * @param FilesDocument $document
935
	 * @param File $file
936
	 *
937
	 * @throws NotPermittedException
938
	 */
939 View Code Duplication
	private function extractContentFromFilePDF(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
940
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
941
			!== self::MIMETYPE_PDF) {
942
			return;
943
		}
944
945
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_PDF);
946
		if (!$this->isSourceIndexable($document)) {
947
			return;
948
		}
949
950
		if ($this->configService->getAppValue(ConfigService::FILES_PDF) !== '1') {
951
			$document->setContent('');
952
953
			return;
954
		}
955
956
		$document->setContent(
957
			base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64
958
		);
959
	}
960
961
962
	/**
963
	 * @param FilesDocument $document
964
	 * @param File $file
965
	 *
966
	 * @throws NotPermittedException
967
	 */
968 View Code Duplication
	private function extractContentFromFileZip(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
969
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
970
			!== self::MIMETYPE_ZIP) {
971
			return;
972
		}
973
974
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_ZIP);
975
		if (!$this->isSourceIndexable($document)) {
976
			return;
977
		}
978
979
		if ($this->configService->getAppValue(ConfigService::FILES_ZIP) !== '1') {
980
			$document->setContent('');
981
982
			return;
983
		}
984
985
		$document->setContent(
986
			base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64
987
		);
988
	}
989
990
991
	/**
992
	 * @param FilesDocument $document
993
	 * @param File $file
994
	 *
995
	 * @throws NotPermittedException
996
	 */
997 View Code Duplication
	private function extractContentFromFileOffice(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
998
		if ($this->parseMimeType($document->getMimeType(), $file->getExtension())
999
			!== self::MIMETYPE_OFFICE) {
1000
			return;
1001
		}
1002
1003
		$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OFFICE);
1004
		if (!$this->isSourceIndexable($document)) {
1005
			return;
1006
		}
1007
1008
		if ($this->configService->getAppValue(ConfigService::FILES_OFFICE) !== '1') {
1009
			$document->setContent('');
1010
1011
			return;
1012
		}
1013
1014
		$document->setContent(
1015
			base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64
1016
		);
1017
	}
1018
1019
1020
	/**
1021
	 * @param FilesDocument $document
1022
	 *
1023
	 * @return bool
1024
	 */
1025
	private function isSourceIndexable(FilesDocument $document): bool {
1026
		$this->configService->setDocumentIndexOption($document, $document->getSource());
1027
		if ($this->configService->getAppValue($document->getSource()) !== '1') {
1028
			$document->setContent('');
1029
1030
			return false;
1031
		}
1032
1033
		return true;
1034
	}
1035
1036
1037
	/**
1038
	 * @param IIndex $index
1039
	 */
1040
	private function impersonateOwner(IIndex $index) {
1041
		if ($index->getOwnerId() !== '') {
1042
			return;
1043
		}
1044
1045
		$this->groupFoldersService->impersonateOwner($index);
1046
		$this->externalFilesService->impersonateOwner($index);
1047
	}
1048
1049
1050
	/**
1051
	 * @param $action
1052
	 * @param bool $force
1053
	 *
1054
	 * @throws Exception
1055
	 */
1056
	private function updateRunnerAction(string $action, bool $force = false) {
1057
		if ($this->runner === null) {
1058
			return;
1059
		}
1060
1061
		$this->runner->updateAction($action, $force);
1062
	}
1063
1064
1065
	/**
1066
	 * @param array $data
1067
	 */
1068
	private function updateRunnerInfo($data) {
1069
		if ($this->runner === null) {
1070
			return;
1071
		}
1072
1073
		$this->runner->setInfoArray($data);
1074
	}
1075
1076
	/**
1077
	 * @param IIndexDocument $document
1078
	 * @param Throwable $t
1079
	 */
1080
	private function manageContentErrorException(IIndexDocument $document, Throwable $t) {
1081
		$document->getIndex()
1082
				 ->addError(
1083
					 'Error while getting file content', $t->getMessage(), IIndex::ERROR_SEV_3
1084
				 );
1085
		$this->updateNewIndexError(
1086
			$document->getIndex(), 'Error while getting file content', $t->getMessage(),
1087
			IIndex::ERROR_SEV_3
1088
		);
1089
1090
		$trace = $t->getTrace();
1091
		if (is_array($trace)) {
1092
			$trace = json_encode($trace);
1093
		}
1094
		if (is_string($trace)) {
1095
			$this->miscService->log($trace, 0);
1096
		}
1097
	}
1098
1099
1100
	/**
1101
	 * @param IIndex $index
1102
	 */
1103
	private function updateDirectoryContentIndex(IIndex $index) {
1104
		if (!$index->isStatus(IIndex::INDEX_META)) {
1105
			return;
1106
		}
1107
1108
		try {
1109
			$file = $this->getFileFromIndex($index);
1110
			if ($file->getType() === File::TYPE_FOLDER) {
1111
				/** @var Folder $file */
1112
				$this->updateDirectoryMeta($file);
1113
			}
1114
		} catch (Exception $e) {
1115
		}
1116
	}
1117
1118
1119
	/**
1120
	 * @param Folder $node
1121
	 */
1122
	private function updateDirectoryMeta(Folder $node) {
1123
		try {
1124
			$files = $node->getDirectoryListing();
1125
		} catch (NotFoundException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\NotFoundException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
1126
			return;
1127
		}
1128
1129
		foreach ($files as $file) {
1130
			try {
1131
				$this->fullTextSearchManager->updateIndexStatus(
1132
					'files', (string)$file->getId(), IIndex::INDEX_META
1133
				);
1134
			} catch (InvalidPathException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
Bug introduced by
The class OCP\Files\InvalidPathException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
1135
			} catch (NotFoundException $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
Bug introduced by
The class OCP\Files\NotFoundException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
1136
			}
1137
		}
1138
	}
1139
1140
1141
	/**
1142
	 * @param IIndex $index
1143
	 * @param string $message
1144
	 * @param string $exception
1145
	 * @param int $sev
1146
	 */
1147
	private function updateNewIndexError(IIndex $index, string $message, string $exception, int $sev
1148
	) {
1149
		if ($this->runner === null) {
1150
			return;
1151
		}
1152
1153
		$this->runner->newIndexError($index, $message, $exception, $sev);
1154
	}
1155
1156
1157
	/**
1158
	 * @param Node $file
1159
	 *
1160
	 * @throws FileIsNotIndexableException
1161
	 */
1162
	private function isNodeIndexable(Node $file) {
1163
1164
		if ($file->getType() === File::TYPE_FOLDER) {
1165
			/** @var Folder $file */
1166
			if ($file->nodeExists('.noindex')) {
1167
				throw new FileIsNotIndexableException();
1168
			}
1169
		}
1170
1171
		$parent = $file->getParent();
1172
		$parentPath = $this->withoutBeginSlash($parent->getPath());
1173
		$path = substr($parent->getPath(), 8 + strpos($parentPath, '/'));
1174
		if (is_string($path)) {
1175
			$this->isNodeIndexable($file->getParent());
1176
		}
1177
	}
1178
1179
1180
	/**
1181
	 * @param string $path
1182
	 * @param string $userId
1183
	 *
1184
	 * @param bool $entrySlash
1185
	 *
1186
	 * @return string
1187
	 */
1188
	private function getPathFromRoot(string $path, string $userId, bool $entrySlash = false) {
1189
		// TODO: better way to do this : we remove the '/userid/files/'
1190
		// TODO: do we need userId, or can we crop the path like in isNodeIndexable()
1191
		$path = substr($path, 8 + strlen($userId));
1192
		if (!is_string($path)) {
1193
			$path = '';
1194
		}
1195
1196
		return (($entrySlash) ? '/' : '') . $path;
1197
	}
1198
1199
}
1200
1201