Completed
Push — master ( facc0d...c11081 )
by Maxence
02:34
created

FilesService::updateFilesDocumentFromFile()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 7
rs 9.4285
cc 1
eloc 5
nc 1
nop 2
1
<?php
2
/**
3
 * Files_FullTextSearch - Index the content of your files
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Maxence Lange <[email protected]>
9
 * @copyright 2018
10
 * @license GNU AGPL version 3 or any later version
11
 *
12
 * This program is free software: you can redistribute it and/or modify
13
 * it under the terms of the GNU Affero General Public License as
14
 * published by the Free Software Foundation, either version 3 of the
15
 * License, or (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU Affero General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Affero General Public License
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
 *
25
 */
26
27
namespace OCA\Files_FullTextSearch\Service;
28
29
30
use Exception;
31
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException;
32
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException;
33
use OCA\Files_FullTextSearch\Model\FilesDocument;
34
use OCA\Files_FullTextSearch\Provider\FilesProvider;
35
use OCA\FullTextSearch\Exceptions\InterruptException;
36
use OCA\FullTextSearch\Exceptions\TickDoesNotExistException;
37
use OCA\FullTextSearch\Model\Index;
38
use OCA\FullTextSearch\Model\IndexDocument;
39
use OCA\FullTextSearch\Model\Runner;
40
use OCP\Files\File;
41
use OCP\Files\FileInfo;
42
use OCP\Files\Folder;
43
use OCP\Files\InvalidPathException;
44
use OCP\Files\IRootFolder;
45
use OCP\Files\Node;
46
use OCP\Files\NotFoundException;
47
use OCP\Files\NotPermittedException;
48
use OCP\Files\StorageNotAvailableException;
49
use OCP\IUserManager;
50
use OCP\Share\IManager;
51
52
class FilesService {
53
54
	const MIMETYPE_TEXT = 'files_text';
55
	const MIMETYPE_PDF = 'files_pdf';
56
	const MIMETYPE_OFFICE = 'files_office';
57
	const MIMETYPE_IMAGE = 'files_image';
58
	const MIMETYPE_AUDIO = 'files_audio';
59
60
61
	/** @var IRootFolder */
62
	private $rootFolder;
63
64
	/** @var IUserManager */
65
	private $userManager;
66
67
	/** @var IManager */
68
	private $shareManager;
69
70
	/** @var ConfigService */
71
	private $configService;
72
73
	/** @var LocalFilesService */
74
	private $localFilesService;
75
76
	/** @var ExternalFilesService */
77
	private $externalFilesService;
78
79
	/** @var GroupFoldersService */
80
	private $groupFoldersService;
81
82
	/** @var MiscService */
83
	private $miscService;
84
85
86
	/**
87
	 * FilesService constructor.
88
	 *
89
	 * @param IRootFolder $rootFolder
90
	 * @param IUserManager $userManager
91
	 * @param IManager $shareManager
92
	 * @param ConfigService $configService
93
	 * @param LocalFilesService $localFilesService
94
	 * @param ExternalFilesService $externalFilesService
95
	 * @param GroupFoldersService $groupFoldersService
96
	 * @param MiscService $miscService
97
	 *
98
	 * @internal param IProviderFactory $factory
99
	 */
100
	public function __construct(
101
		IRootFolder $rootFolder, IUserManager $userManager, IManager $shareManager,
102
		ConfigService $configService,
103
		LocalFilesService $localFilesService,
104
		ExternalFilesService $externalFilesService,
105
		GroupFoldersService $groupFoldersService,
106
		MiscService $miscService
107
	) {
108
		$this->rootFolder = $rootFolder;
109
		$this->userManager = $userManager;
110
		$this->shareManager = $shareManager;
111
112
		$this->configService = $configService;
113
		$this->localFilesService = $localFilesService;
114
		$this->externalFilesService = $externalFilesService;
115
		$this->groupFoldersService = $groupFoldersService;
116
117
		$this->miscService = $miscService;
118
	}
119
120
121
	/**
122
	 * @param Runner $runner
123
	 * @param string $userId
124
	 *
125
	 * @return FilesDocument[]
126
	 * @throws InterruptException
127
	 * @throws InvalidPathException
128
	 * @throws NotFoundException
129
	 * @throws TickDoesNotExistException
130
	 */
131
	public function getFilesFromUser(Runner $runner, $userId) {
132
133
		$this->externalFilesService->initExternalFilesForUser($userId);
134
		$this->groupFoldersService->initGroupSharesForUser($userId);
135
136
		/** @var Folder $files */
137
		$files = $this->rootFolder->getUserFolder($userId)
138
								  ->get('/');
139
		$result = $this->getFilesFromDirectory($runner, $userId, $files);
140
141
		return $result;
142
	}
143
144
145
	/**
146
	 * @param Runner $runner
147
	 * @param string $userId
148
	 * @param Folder $node
149
	 *
150
	 * @return FilesDocument[]
151
	 * @throws InterruptException
152
	 * @throws InvalidPathException
153
	 * @throws NotFoundException
154
	 * @throws TickDoesNotExistException
155
	 */
156
	public function getFilesFromDirectory(Runner $runner, $userId, Folder $node) {
157
		$documents = [];
158
159
		try {
160
			if ($node->nodeExists('.noindex')) {
161
				return $documents;
162
			}
163
		} catch (StorageNotAvailableException $e) {
0 ignored issues
show
Bug introduced by
The class OCP\Files\StorageNotAvailableException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
164
			return $documents;
165
		}
166
167
		$files = $node->getDirectoryListing();
168
		foreach ($files as $file) {
169
			$runner->update('getFilesFromDirectory');
170
171
			try {
172
				$documents[] = $this->generateFilesDocumentFromFile($file, $userId);
173
			} catch (FileIsNotIndexableException $e) {
174
				continue;
175
			}
176
177
			if ($file->getType() === FileInfo::TYPE_FOLDER) {
178
				/** @var $file Folder */
179
				$documents =
180
					array_merge($documents, $this->getFilesFromDirectory($runner, $userId, $file));
181
			}
182
183
		}
184
185
		return $documents;
186
	}
187
188
189
	/**
190
	 * @param Node $file
191
	 *
192
	 * @param string $viewerId
193
	 *
194
	 * @return FilesDocument
195
	 * @throws FileIsNotIndexableException
196
	 * @throws InvalidPathException
197
	 * @throws NotFoundException
198
	 * @throws Exception
199
	 */
200
	private function generateFilesDocumentFromFile(Node $file, $viewerId) {
201
202
		$source = $this->getFileSource($file);
203
		$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, $file->getId());
204
205
		$ownerId = $file->getOwner()
206
						->getUID();
207
208
		$document->setType($file->getType())
209
				 ->setSource($source)
210
				 ->setOwnerId($ownerId)
211
				 ->setPath($this->getPathFromViewerId($file->getId(), $viewerId))
212
				 ->setViewerId($viewerId)
213
				 ->setModifiedTime($file->getMTime())
214
				 ->setMimetype($file->getMimetype());
215
216
		return $document;
217
	}
218
219
220
	/**
221
	 * @param Node $file
222
	 *
223
	 * @return string
224
	 * @throws FileIsNotIndexableException
225
	 * @throws NotFoundException
226
	 */
227
	private function getFileSource(Node $file) {
228
		$source = '';
229
230
		try {
231
			$this->localFilesService->getFileSource($file, $source);
232
			$this->externalFilesService->getFileSource($file, $source);
233
			$this->groupFoldersService->getFileSource($file, $source);
234
		} catch (KnownFileSourceException $e) {
235
			/** we know the source, just leave. */
236
		}
237
238
		return $source;
239
	}
240
241
242
	/**
243
	 * @param string $userId
244
	 * @param string $path
245
	 *
246
	 * @return Node
247
	 * @throws NotFoundException
248
	 */
249
	public function getFileFromPath($userId, $path) {
250
		return $this->rootFolder->getUserFolder($userId)
251
								->get($path);
252
	}
253
254
255
	/**
256
	 * @param string $userId
257
	 * @param int $fileId
258
	 *
259
	 * @return Node
260
	 */
261
	public function getFileFromId($userId, $fileId) {
262
263
		try {
264
			$files = $this->rootFolder->getUserFolder($userId)
265
									  ->getById($fileId);
266
		} catch (Exception $e) {
267
			return null;
268
		}
269
270
		if (sizeof($files) === 0) {
271
			return null;
272
		}
273
274
		$file = array_shift($files);
275
276
		return $file;
277
	}
278
279
280
	/**
281
	 * @param int $fileId
282
	 * @param string $viewerId
283
	 *
284
	 * @throws Exception
285
	 * @return string
286
	 */
287
	private function getPathFromViewerId($fileId, $viewerId) {
288
289
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
290
										->getById($fileId);
291
292
		if (sizeof($viewerFiles) === 0) {
293
			return '';
294
		}
295
296
		$file = array_shift($viewerFiles);
297
298
		// TODO: better way to do this : we remove the '/userid/files/'
299
		$path = MiscService::noEndSlash(substr($file->getPath(), 8 + strlen($viewerId)));
300
301
		return $path;
302
	}
303
304
305
	/**
306
	 * @param FilesDocument $document
307
	 */
308
	public function setDocumentInfo(FilesDocument $document) {
309
310
		$viewerId = $document->getAccess()
311
							 ->getViewerId();
312
313
		$viewerFiles = $this->rootFolder->getUserFolder($viewerId)
314
										->getById($document->getId());
315
316
		if (sizeof($viewerFiles) === 0) {
317
			return;
318
		}
319
		// we only take the first file
320
		$file = array_shift($viewerFiles);
321
322
		// TODO: better way to do this : we remove the '/userId/files/'
323
		$path = MiscService::noEndSlash(substr($file->getPath(), 7 + strlen($viewerId)));
324
325
		$document->setPath($path);
326
		$document->setFileName($file->getName());
327
	}
328
329
330
	/**
331
	 * @param FilesDocument $document
332
	 */
333
	public function setDocumentTitle(FilesDocument $document) {
334
		$document->setTitle($document->getPath());
335
	}
336
337
338
	/**
339
	 * @param FilesDocument $document
340
	 */
341
	public function setDocumentLink(FilesDocument $document) {
342
343
		$path = $document->getPath();
344
		$filename = $document->getFileName();
345
		$dir = substr($path, 0, -strlen($filename));
346
347
		$document->setLink(
348
			\OC::$server->getURLGenerator()
349
						->linkToRoute(
350
							'files.view.index',
351
							[
352
								'dir'      => $dir,
353
								'scrollto' => $filename,
354
							]
355
						)
356
		);
357
	}
358
359
360
	/**
361
	 * @param FilesDocument $document
362
	 *
363
	 * @throws InvalidPathException
364
	 * @throws NotFoundException
365
	 */
366
	public function setDocumentMore(FilesDocument $document) {
367
368
		$access = $document->getAccess();
369
		$file = $this->getFileFromId($access->getViewerId(), $document->getId());
370
371
		if ($file === null) {
372
			return;
373
		}
374
375
		// TODO: better way to do this : we remove the '/userid/files/'
376
		$path =
377
			MiscService::noEndSlash(substr($file->getPath(), 7 + strlen($access->getViewerId())));
378
379
		$more = [
380
			'webdav'             => $this->getWebdavId($document->getId()),
381
			'path'               => $path,
382
			'timestamp'          => $file->getMTime(), // FIXME: get the creation date of the file
383
			'mimetype'           => $file->getMimetype(),
384
			'modified_timestamp' => $file->getMTime(),
385
			'etag'               => $file->getEtag(),
386
			'permissions'        => $file->getPermissions(),
387
			'size'               => $file->getSize(),
388
			'favorite'           => false // FIXME: get the favorite status
389
		];
390
391
		$document->setMore($more);
392
	}
393
394
395
	/**
396
	 * @param FilesDocument[] $documents
397
	 *
398
	 * @return FilesDocument[]
399
	 */
400
	public function generateDocuments($documents) {
401
402
		$index = [];
403
404
		foreach ($documents as $document) {
405
			if (!($document instanceof FilesDocument)) {
406
				continue;
407
			}
408
409
			try {
410
				$this->updateFilesDocument($document);
411
			} catch (Exception $e) {
412
				// TODO - update $document with a error status instead of just ignore !
413
				$document->getIndex()
414
						 ->setStatus(Index::INDEX_IGNORE);
415
				echo 'Exception: ' . json_encode($e->getTrace()) . ' - ' . $e->getMessage() . "\n";
416
			}
417
418
			$index[] = $document;
419
		}
420
421
		return $index;
422
	}
423
424
425
	/**
426
	 * @param Index $index
427
	 *
428
	 * @return FilesDocument
429
	 * @throws FileIsNotIndexableException
430
	 * @throws InvalidPathException
431
	 * @throws NotFoundException
432
	 * @throws NotPermittedException
433
	 */
434
	private function generateDocumentFromIndex(Index $index) {
435
		$file = $this->getFileFromId($index->getOwnerId(), $index->getDocumentId());
436
437
		if ($file === null) {
438
			$index->setStatus(Index::INDEX_REMOVE);
439
			$document = new FilesDocument($index->getProviderId(), $index->getDocumentId());
440
			$document->setIndex($index);
441
442
			return $document;
443
		}
444
445
		$document = $this->generateFilesDocumentFromFile($file, $index->getOwnerId());
446
		$document->setIndex($index);
447
448
		$this->updateFilesDocumentFromFile($document, $file);
449
450
		return $document;
451
	}
452
453
454
	/**
455
	 * @param IndexDocument $document
456
	 *
457
	 * @return bool
458
	 */
459
	public function isDocumentUpToDate($document) {
460
		$index = $document->getIndex();
461
462
		if ($index->getStatus() !== Index::INDEX_OK) {
463
			return false;
464
		}
465
466
		if ($index->getLastIndex() >= $document->getModifiedTime()) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $index->getLastIn...ent->getModifiedTime();.
Loading history...
467
			return true;
468
		}
469
470
		return false;
471
	}
472
473
474
	/**
475
	 * @param Index $index
476
	 *
477
	 * @return FilesDocument
0 ignored issues
show
Documentation introduced by
Should the return type not be FilesDocument|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
478
	 * @throws InvalidPathException
479
	 * @throws NotFoundException
480
	 * @throws NotPermittedException
481
	 */
482
	public function updateDocument(Index $index) {
483
		try {
484
			$document = $this->generateDocumentFromIndex($index);
485
486
			return $document;
487
		} catch (FileIsNotIndexableException $e) {
488
			return null;
489
		}
490
	}
491
492
493
	/**
494
	 * @param FilesDocument $document
495
	 *
496
	 * @throws InvalidPathException
497
	 * @throws NotFoundException
498
	 * @throws NotPermittedException
499
	 */
500
	private function updateFilesDocument(FilesDocument $document) {
501
		$userFolder = $this->rootFolder->getUserFolder($document->getViewerId());
502
		$file = $userFolder->get($document->getPath());
503
504
		try {
505
			$this->updateFilesDocumentFromFile($document, $file);
506
		} catch (FileIsNotIndexableException $e) {
507
			$document->getIndex()
508
					 ->setStatus(Index::INDEX_IGNORE);
509
		}
510
	}
511
512
513
	/**
514
	 * @param FilesDocument $document
515
	 * @param Node $file
516
	 *
517
	 * @throws InvalidPathException
518
	 * @throws NotFoundException
519
	 * @throws NotPermittedException
520
	 */
521
	private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) {
522
		$this->updateDocumentAccess($document, $file);
523
		$this->updateShareNames($document, $file);
524
		$this->updateContentFromFile($document, $file);
525
526
		$document->addTag($document->getSource());
527
	}
528
529
530
	/**
531
	 * @param FilesDocument $document
532
	 * @param Node $file
533
	 */
534
	private function updateDocumentAccess(FilesDocument $document, Node $file) {
535
536
		$index = $document->getIndex();
537
		if (!$index->isStatus(Index::INDEX_FULL)
538
			&& !$index->isStatus(FilesDocument::STATUS_FILE_ACCESS)) {
539
			return;
540
		}
541
542
		$this->localFilesService->updateDocumentAccess($document, $file);
543
		$this->externalFilesService->updateDocumentAccess($document, $file);
544
		$this->groupFoldersService->updateDocumentAccess($document, $file);
545
	}
546
547
548
	/**
549
	 * @param FilesDocument $document
550
	 * @param Node $file
551
	 *
552
	 * @throws InvalidPathException
553
	 * @throws NotFoundException
554
	 * @throws NotPermittedException
555
	 */
556
	private function updateContentFromFile(FilesDocument $document, Node $file) {
557
558
		$document->setTitle($document->getPath());
559
560
		if (!$document->getIndex()
561
					  ->isStatus(Index::INDEX_CONTENT)
562
			|| $file->getType() !== FileInfo::TYPE_FILE) {
563
			return;
564
		}
565
566
		/** @var File $file */
567
		if ($file->getSize() <
568
			($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) {
569
			$this->extractContentFromFileText($document, $file);
570
			$this->extractContentFromFileOffice($document, $file);
571
			$this->extractContentFromFilePDF($document, $file);
572
		}
573
574
		if ($document->getContent() === null) {
575
			$document->getIndex()
576
					 ->unsetStatus(Index::INDEX_CONTENT);
577
		}
578
	}
579
580
581
	/**
582
	 * @param FilesDocument $document
583
	 * @param Node $file
584
	 *
585
	 * @return array
586
	 */
587
	private function updateShareNames(FilesDocument $document, Node $file) {
588
589
		$users = [];
590
		$this->localFilesService->getShareUsers($document, $file, $users);
591
		$this->externalFilesService->getShareUsers($document, $users);
592
		$this->groupFoldersService->getShareUsers($document, $users);
593
594
		$shareNames = [];
595
		foreach ($users as $user) {
596
			try {
597
				$shareNames[MiscService::secureUsername($user)] =
598
					$this->getPathFromViewerId($file->getId(), $user);
599
			} catch (Exception $e) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment why this CATCH block is empty.
Loading history...
600
			}
601
		}
602
603
		$document->setInfo('share_names', $shareNames);
604
605
//			if ($file->getStorage()
0 ignored issues
show
Unused Code Comprehensibility introduced by
53% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
606
//					 ->isLocal() === false) {
607
//				$shares = $this->externalFilesService->getAllSharesFromExternalFile($access);
608
//			} else {
609
//				$shares = $this->getAllSharesFromFile($file);
610
//			}
611
//
612
//			foreach ($shares as $user) {
613
//				try {
614
//					$shareNames[$user] = $this->getPathFromViewerId($file->getId(), $user);
615
//				} catch (Exception $e) {
616
//				}
617
//			}
618
//
619
		return $shareNames;
620
621
	}
622
623
	/**
624
	 * @param int $fileId
625
	 *
626
	 * @return string
627
	 */
628
	private function getWebdavId($fileId) {
629
		$instanceId = $this->configService->getSystemValue('instanceid');
630
631
		return sprintf("%08s", $fileId) . $instanceId;
632
	}
633
634
635
	/**
636
	 * @param string $mimeType
637
	 *
638
	 * @return string
639
	 */
640
	private function parseMimeType($mimeType) {
641
642
		// text file
643
		if ($mimeType === 'application/octet-stream'
644
			|| substr($mimeType, 0, 5) === 'text/') {
645
			return self::MIMETYPE_TEXT;
646
		}
647
648
		// PDF file
649
		if ($mimeType === 'application/pdf') {
650
			return self::MIMETYPE_PDF;
651
		}
652
653
		// Office file
654
		$officeMimes = [
655
			'application/msword',
656
			'application/vnd.oasis.opendocument',
657
			'application/vnd.sun.xml',
658
			'application/vnd.openxmlformats-officedocument',
659
			'application/vnd.ms-word',
660
			'application/vnd.ms-powerpoint',
661
			'application/vnd.ms-excel'
662
		];
663
664
		foreach ($officeMimes as $mime) {
665
			if (strpos($mimeType, $mime) === 0) {
666
				return self::MIMETYPE_OFFICE;
667
			}
668
		}
669
670
		return '';
671
	}
672
673
674
	/**
675
	 * @param FilesDocument $document
676
	 * @param File $file
677
	 *
678
	 * @throws NotPermittedException
679
	 */
680
	private function extractContentFromFileText(FilesDocument $document, File $file) {
681
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_TEXT) {
682
			return;
683
		}
684
685
		// on simple text file, elastic search+attachment pipeline can still detect language, useful ?
686
//		$document->setContent($file->getContent(), IndexDocument::NOT_ENCODED);
687
688
		// We try to avoid error with some base encoding of the document:
689
		$document->setContent(base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64);
690
	}
691
692
693
	/**
694
	 * @param FilesDocument $document
695
	 * @param File $file
696
	 *
697
	 * @throws NotPermittedException
698
	 */
699 View Code Duplication
	private function extractContentFromFilePDF(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
700
701
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_PDF) {
702
			return;
703
		}
704
705
		if ($this->configService->getAppValue('files_pdf') !== '1') {
706
			return;
707
		}
708
709
		$document->setContent(base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64);
710
	}
711
712
713
	/**
714
	 * @param FilesDocument $document
715
	 * @param File $file
716
	 *
717
	 * @throws NotPermittedException
718
	 */
719 View Code Duplication
	private function extractContentFromFileOffice(FilesDocument $document, File $file) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
720
721
		if ($this->configService->getAppValue('files_office') !== '1') {
722
			return;
723
		}
724
725
		if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_OFFICE) {
726
			return;
727
		}
728
729
		$document->setContent(base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64);
730
	}
731
732
733
}
734