|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* Files_FullTextSearch - Index the content of your files |
|
4
|
|
|
* |
|
5
|
|
|
* This file is licensed under the Affero General Public License version 3 or |
|
6
|
|
|
* later. See the COPYING file. |
|
7
|
|
|
* |
|
8
|
|
|
* @author Maxence Lange <[email protected]> |
|
9
|
|
|
* @copyright 2018 |
|
10
|
|
|
* @license GNU AGPL version 3 or any later version |
|
11
|
|
|
* |
|
12
|
|
|
* This program is free software: you can redistribute it and/or modify |
|
13
|
|
|
* it under the terms of the GNU Affero General Public License as |
|
14
|
|
|
* published by the Free Software Foundation, either version 3 of the |
|
15
|
|
|
* License, or (at your option) any later version. |
|
16
|
|
|
* |
|
17
|
|
|
* This program is distributed in the hope that it will be useful, |
|
18
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
19
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
20
|
|
|
* GNU Affero General Public License for more details. |
|
21
|
|
|
* |
|
22
|
|
|
* You should have received a copy of the GNU Affero General Public License |
|
23
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
24
|
|
|
* |
|
25
|
|
|
*/ |
|
26
|
|
|
|
|
27
|
|
|
namespace OCA\Files_FullTextSearch\Service; |
|
28
|
|
|
|
|
29
|
|
|
|
|
30
|
|
|
use Exception; |
|
31
|
|
|
use OC\App\AppManager; |
|
32
|
|
|
use OCA\Files_FullTextSearch\Exceptions\EmptyUserException; |
|
33
|
|
|
use OCA\Files_FullTextSearch\Exceptions\FileIsNotIndexableException; |
|
34
|
|
|
use OCA\Files_FullTextSearch\Exceptions\FilesNotFoundException; |
|
35
|
|
|
use OCA\Files_FullTextSearch\Exceptions\KnownFileMimeTypeException; |
|
36
|
|
|
use OCA\Files_FullTextSearch\Exceptions\KnownFileSourceException; |
|
37
|
|
|
use OCA\Files_FullTextSearch\Model\FilesDocument; |
|
38
|
|
|
use OCA\Files_FullTextSearch\Provider\FilesProvider; |
|
39
|
|
|
use OCA\Files_FullTextSearch_Tesseract\Service\TesseractService; |
|
40
|
|
|
use OCA\FullTextSearch\Exceptions\InterruptException; |
|
41
|
|
|
use OCA\FullTextSearch\Exceptions\TickDoesNotExistException; |
|
42
|
|
|
use OCA\FullTextSearch\Model\Index; |
|
43
|
|
|
use OCA\FullTextSearch\Model\IndexDocument; |
|
44
|
|
|
use OCA\FullTextSearch\Model\Runner; |
|
45
|
|
|
use OCP\AppFramework\IAppContainer; |
|
46
|
|
|
use OCP\Files\File; |
|
47
|
|
|
use OCP\Files\FileInfo; |
|
48
|
|
|
use OCP\Files\Folder; |
|
49
|
|
|
use OCP\Files\InvalidPathException; |
|
50
|
|
|
use OCP\Files\IRootFolder; |
|
51
|
|
|
use OCP\Files\Node; |
|
52
|
|
|
use OCP\Files\NotFoundException; |
|
53
|
|
|
use OCP\Files\NotPermittedException; |
|
54
|
|
|
use OCP\Files\StorageNotAvailableException; |
|
55
|
|
|
use OCP\IUserManager; |
|
56
|
|
|
use OCP\Share\IManager; |
|
57
|
|
|
|
|
58
|
|
|
class FilesService { |
|
59
|
|
|
|
|
60
|
|
|
const MIMETYPE_TEXT = 'files_text'; |
|
61
|
|
|
const MIMETYPE_PDF = 'files_pdf'; |
|
62
|
|
|
const MIMETYPE_OFFICE = 'files_office'; |
|
63
|
|
|
const MIMETYPE_OCR = 'files_ocr'; |
|
64
|
|
|
const MIMETYPE_IMAGE = 'files_image'; |
|
65
|
|
|
const MIMETYPE_AUDIO = 'files_audio'; |
|
66
|
|
|
|
|
67
|
|
|
|
|
68
|
|
|
/** @var IAppContainer */ |
|
69
|
|
|
private $container; |
|
70
|
|
|
|
|
71
|
|
|
/** @var IRootFolder */ |
|
72
|
|
|
private $rootFolder; |
|
73
|
|
|
|
|
74
|
|
|
/** @var IUserManager */ |
|
75
|
|
|
private $userManager; |
|
76
|
|
|
|
|
77
|
|
|
/** @var AppManager */ |
|
78
|
|
|
private $appManager; |
|
79
|
|
|
|
|
80
|
|
|
/** @var IManager */ |
|
81
|
|
|
private $shareManager; |
|
82
|
|
|
|
|
83
|
|
|
/** @var ConfigService */ |
|
84
|
|
|
private $configService; |
|
85
|
|
|
|
|
86
|
|
|
/** @var LocalFilesService */ |
|
87
|
|
|
private $localFilesService; |
|
88
|
|
|
|
|
89
|
|
|
/** @var ExternalFilesService */ |
|
90
|
|
|
private $externalFilesService; |
|
91
|
|
|
|
|
92
|
|
|
/** @var GroupFoldersService */ |
|
93
|
|
|
private $groupFoldersService; |
|
94
|
|
|
|
|
95
|
|
|
/** @var MiscService */ |
|
96
|
|
|
private $miscService; |
|
97
|
|
|
|
|
98
|
|
|
|
|
99
|
|
|
/** |
|
100
|
|
|
* FilesService constructor. |
|
101
|
|
|
* |
|
102
|
|
|
* @param IAppContainer $container |
|
103
|
|
|
* @param IRootFolder $rootFolder |
|
104
|
|
|
* @param AppManager $appManager |
|
105
|
|
|
* @param IUserManager $userManager |
|
106
|
|
|
* @param IManager $shareManager |
|
107
|
|
|
* @param ConfigService $configService |
|
108
|
|
|
* @param LocalFilesService $localFilesService |
|
109
|
|
|
* @param ExternalFilesService $externalFilesService |
|
110
|
|
|
* @param GroupFoldersService $groupFoldersService |
|
111
|
|
|
* @param MiscService $miscService |
|
112
|
|
|
* |
|
113
|
|
|
* @internal param IProviderFactory $factory |
|
114
|
|
|
*/ |
|
115
|
|
|
public function __construct( |
|
116
|
|
|
IAppContainer $container, IRootFolder $rootFolder, AppManager $appManager, |
|
117
|
|
|
IUserManager $userManager, |
|
118
|
|
|
IManager $shareManager, |
|
119
|
|
|
ConfigService $configService, LocalFilesService $localFilesService, |
|
120
|
|
|
ExternalFilesService $externalFilesService, |
|
121
|
|
|
GroupFoldersService $groupFoldersService, |
|
122
|
|
|
MiscService $miscService |
|
123
|
|
|
) { |
|
124
|
|
|
$this->container = $container; |
|
125
|
|
|
$this->rootFolder = $rootFolder; |
|
126
|
|
|
$this->appManager = $appManager; |
|
127
|
|
|
$this->userManager = $userManager; |
|
128
|
|
|
$this->shareManager = $shareManager; |
|
129
|
|
|
|
|
130
|
|
|
$this->configService = $configService; |
|
131
|
|
|
$this->localFilesService = $localFilesService; |
|
132
|
|
|
$this->externalFilesService = $externalFilesService; |
|
133
|
|
|
$this->groupFoldersService = $groupFoldersService; |
|
134
|
|
|
|
|
135
|
|
|
$this->miscService = $miscService; |
|
136
|
|
|
} |
|
137
|
|
|
|
|
138
|
|
|
|
|
139
|
|
|
/** |
|
140
|
|
|
* @param Runner $runner |
|
141
|
|
|
* @param string $userId |
|
142
|
|
|
* |
|
143
|
|
|
* @return FilesDocument[] |
|
144
|
|
|
* @throws InterruptException |
|
145
|
|
|
* @throws InvalidPathException |
|
146
|
|
|
* @throws NotFoundException |
|
147
|
|
|
* @throws TickDoesNotExistException |
|
148
|
|
|
*/ |
|
149
|
|
|
public function getFilesFromUser(Runner $runner, $userId) { |
|
150
|
|
|
|
|
151
|
|
|
$this->initFileSystems($userId); |
|
152
|
|
|
|
|
153
|
|
|
/** @var Folder $files */ |
|
154
|
|
|
$files = $this->rootFolder->getUserFolder($userId) |
|
155
|
|
|
->get('/'); |
|
156
|
|
|
$result = $this->getFilesFromDirectory($runner, $userId, $files); |
|
157
|
|
|
|
|
158
|
|
|
return $result; |
|
159
|
|
|
} |
|
160
|
|
|
|
|
161
|
|
|
|
|
162
|
|
|
/** |
|
163
|
|
|
* @param string $userId |
|
164
|
|
|
*/ |
|
165
|
|
|
private function initFileSystems($userId) { |
|
166
|
|
|
if ($userId === '') { |
|
167
|
|
|
return; |
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
$this->externalFilesService->initExternalFilesForUser($userId); |
|
171
|
|
|
$this->groupFoldersService->initGroupSharesForUser($userId); |
|
172
|
|
|
} |
|
173
|
|
|
|
|
174
|
|
|
|
|
175
|
|
|
/** |
|
176
|
|
|
* @param Runner $runner |
|
177
|
|
|
* @param string $userId |
|
178
|
|
|
* @param Folder $node |
|
179
|
|
|
* |
|
180
|
|
|
* @return FilesDocument[] |
|
181
|
|
|
* @throws InterruptException |
|
182
|
|
|
* @throws InvalidPathException |
|
183
|
|
|
* @throws NotFoundException |
|
184
|
|
|
* @throws TickDoesNotExistException |
|
185
|
|
|
*/ |
|
186
|
|
|
public function getFilesFromDirectory(Runner $runner, $userId, Folder $node) { |
|
187
|
|
|
$documents = []; |
|
188
|
|
|
|
|
189
|
|
|
try { |
|
190
|
|
|
if ($node->nodeExists('.noindex')) { |
|
191
|
|
|
return $documents; |
|
192
|
|
|
} |
|
193
|
|
|
} catch (StorageNotAvailableException $e) { |
|
|
|
|
|
|
194
|
|
|
return $documents; |
|
195
|
|
|
} |
|
196
|
|
|
|
|
197
|
|
|
$files = $node->getDirectoryListing(); |
|
198
|
|
|
foreach ($files as $file) { |
|
199
|
|
|
$runner->update('getFilesFromDirectory'); |
|
200
|
|
|
|
|
201
|
|
|
try { |
|
202
|
|
|
$documents[] = $this->generateFilesDocumentFromFile($file, $userId); |
|
203
|
|
|
} catch (FileIsNotIndexableException $e) { |
|
204
|
|
|
continue; |
|
205
|
|
|
} |
|
206
|
|
|
|
|
207
|
|
|
if ($file->getType() === FileInfo::TYPE_FOLDER) { |
|
208
|
|
|
/** @var $file Folder */ |
|
209
|
|
|
$documents = |
|
210
|
|
|
array_merge($documents, $this->getFilesFromDirectory($runner, $userId, $file)); |
|
211
|
|
|
} |
|
212
|
|
|
} |
|
213
|
|
|
|
|
214
|
|
|
return $documents; |
|
215
|
|
|
} |
|
216
|
|
|
|
|
217
|
|
|
|
|
218
|
|
|
/** |
|
219
|
|
|
* @param Node $file |
|
220
|
|
|
* |
|
221
|
|
|
* @param string $viewerId |
|
222
|
|
|
* |
|
223
|
|
|
* @return FilesDocument |
|
224
|
|
|
* @throws FileIsNotIndexableException |
|
225
|
|
|
* @throws InvalidPathException |
|
226
|
|
|
* @throws NotFoundException |
|
227
|
|
|
* @throws Exception |
|
228
|
|
|
*/ |
|
229
|
|
|
private function generateFilesDocumentFromFile(Node $file, $viewerId) { |
|
230
|
|
|
|
|
231
|
|
|
$source = $this->getFileSource($file); |
|
232
|
|
|
$document = new FilesDocument(FilesProvider::FILES_PROVIDER_ID, $file->getId()); |
|
233
|
|
|
|
|
234
|
|
|
$ownerId = ''; |
|
235
|
|
|
if ($file->getOwner() !== null) { |
|
236
|
|
|
$ownerId = $file->getOwner() |
|
237
|
|
|
->getUID(); |
|
238
|
|
|
} |
|
239
|
|
|
|
|
240
|
|
|
$document->setType($file->getType()) |
|
241
|
|
|
->setSource($source) |
|
242
|
|
|
->setOwnerId($ownerId) |
|
243
|
|
|
->setPath($this->getPathFromViewerId($file->getId(), $viewerId)) |
|
244
|
|
|
->setViewerId($viewerId) |
|
245
|
|
|
->setModifiedTime($file->getMTime()) |
|
246
|
|
|
->setMimetype($file->getMimetype()); |
|
247
|
|
|
|
|
248
|
|
|
return $document; |
|
249
|
|
|
} |
|
250
|
|
|
|
|
251
|
|
|
|
|
252
|
|
|
/** |
|
253
|
|
|
* @param Node $file |
|
254
|
|
|
* |
|
255
|
|
|
* @return string |
|
256
|
|
|
* @throws FileIsNotIndexableException |
|
257
|
|
|
* @throws NotFoundException |
|
258
|
|
|
*/ |
|
259
|
|
|
private function getFileSource(Node $file) { |
|
260
|
|
|
$source = ''; |
|
261
|
|
|
|
|
262
|
|
|
try { |
|
263
|
|
|
$this->localFilesService->getFileSource($file, $source); |
|
264
|
|
|
$this->externalFilesService->getFileSource($file, $source); |
|
265
|
|
|
$this->groupFoldersService->getFileSource($file, $source); |
|
266
|
|
|
} catch (KnownFileSourceException $e) { |
|
267
|
|
|
/** we know the source, just leave. */ |
|
268
|
|
|
} |
|
269
|
|
|
|
|
270
|
|
|
return $source; |
|
271
|
|
|
} |
|
272
|
|
|
|
|
273
|
|
|
|
|
274
|
|
|
/** |
|
275
|
|
|
* @param string $userId |
|
276
|
|
|
* @param string $path |
|
277
|
|
|
* |
|
278
|
|
|
* @return Node |
|
279
|
|
|
* @throws NotFoundException |
|
280
|
|
|
*/ |
|
281
|
|
|
public function getFileFromPath($userId, $path) { |
|
282
|
|
|
return $this->rootFolder->getUserFolder($userId) |
|
283
|
|
|
->get($path); |
|
284
|
|
|
} |
|
285
|
|
|
|
|
286
|
|
|
|
|
287
|
|
|
/** |
|
288
|
|
|
* @param string $userId |
|
289
|
|
|
* @param int $fileId |
|
290
|
|
|
* |
|
291
|
|
|
* @return Node |
|
292
|
|
|
* @throws FilesNotFoundException |
|
293
|
|
|
* @throws EmptyUserException |
|
294
|
|
|
*/ |
|
295
|
|
|
public function getFileFromId($userId, $fileId) { |
|
296
|
|
|
|
|
297
|
|
|
if ($userId === '') { |
|
298
|
|
|
throw new EmptyUserException(); |
|
299
|
|
|
} |
|
300
|
|
|
|
|
301
|
|
|
$files = $this->rootFolder->getUserFolder($userId) |
|
302
|
|
|
->getById($fileId); |
|
303
|
|
|
if (sizeof($files) === 0) { |
|
304
|
|
|
throw new FilesNotFoundException(); |
|
305
|
|
|
} |
|
306
|
|
|
|
|
307
|
|
|
$file = array_shift($files); |
|
308
|
|
|
|
|
309
|
|
|
return $file; |
|
310
|
|
|
} |
|
311
|
|
|
|
|
312
|
|
|
|
|
313
|
|
|
/** |
|
314
|
|
|
* @param Index $index |
|
315
|
|
|
* |
|
316
|
|
|
* @return Node |
|
317
|
|
|
* @throws EmptyUserException |
|
318
|
|
|
* @throws FilesNotFoundException |
|
319
|
|
|
*/ |
|
320
|
|
|
public function getFileFromIndex(Index $index) { |
|
321
|
|
|
$this->impersonateOwner($index); |
|
322
|
|
|
|
|
323
|
|
|
return $this->getFileFromId($index->getOwnerId(), $index->getDocumentId()); |
|
324
|
|
|
} |
|
325
|
|
|
|
|
326
|
|
|
|
|
327
|
|
|
/** |
|
328
|
|
|
* @param int $fileId |
|
329
|
|
|
* @param string $viewerId |
|
330
|
|
|
* |
|
331
|
|
|
* @throws Exception |
|
332
|
|
|
* @return string |
|
333
|
|
|
*/ |
|
334
|
|
|
private function getPathFromViewerId($fileId, $viewerId) { |
|
335
|
|
|
|
|
336
|
|
|
$viewerFiles = $this->rootFolder->getUserFolder($viewerId) |
|
337
|
|
|
->getById($fileId); |
|
338
|
|
|
|
|
339
|
|
|
if (sizeof($viewerFiles) === 0) { |
|
340
|
|
|
return ''; |
|
341
|
|
|
} |
|
342
|
|
|
|
|
343
|
|
|
$file = array_shift($viewerFiles); |
|
344
|
|
|
|
|
345
|
|
|
// TODO: better way to do this : we remove the '/userid/files/' |
|
346
|
|
|
$path = MiscService::noEndSlash(substr($file->getPath(), 8 + strlen($viewerId))); |
|
347
|
|
|
|
|
348
|
|
|
return $path; |
|
349
|
|
|
} |
|
350
|
|
|
|
|
351
|
|
|
|
|
352
|
|
|
/** |
|
353
|
|
|
* @param FilesDocument[] $documents |
|
354
|
|
|
* |
|
355
|
|
|
* @return FilesDocument[] |
|
356
|
|
|
*/ |
|
357
|
|
|
public function generateDocuments($documents) { |
|
358
|
|
|
|
|
359
|
|
|
$index = []; |
|
360
|
|
|
|
|
361
|
|
|
foreach ($documents as $document) { |
|
362
|
|
|
if (!($document instanceof FilesDocument)) { |
|
363
|
|
|
continue; |
|
364
|
|
|
} |
|
365
|
|
|
|
|
366
|
|
|
try { |
|
367
|
|
|
$this->updateFilesDocument($document); |
|
368
|
|
|
} catch (Exception $e) { |
|
369
|
|
|
// TODO - update $document with a error status instead of just ignore ! |
|
370
|
|
|
$document->getIndex() |
|
371
|
|
|
->setStatus(Index::INDEX_IGNORE); |
|
372
|
|
|
echo 'Exception: ' . json_encode($e->getTrace()) . ' - ' . $e->getMessage() |
|
373
|
|
|
. "\n"; |
|
374
|
|
|
} |
|
375
|
|
|
|
|
376
|
|
|
$index[] = $document; |
|
377
|
|
|
} |
|
378
|
|
|
|
|
379
|
|
|
return $index; |
|
380
|
|
|
} |
|
381
|
|
|
|
|
382
|
|
|
|
|
383
|
|
|
/** |
|
384
|
|
|
* @param Index $index |
|
385
|
|
|
* |
|
386
|
|
|
* @return FilesDocument |
|
387
|
|
|
* @throws FileIsNotIndexableException |
|
388
|
|
|
* @throws InvalidPathException |
|
389
|
|
|
* @throws NotFoundException |
|
390
|
|
|
* @throws NotPermittedException |
|
391
|
|
|
*/ |
|
392
|
|
|
private function generateDocumentFromIndex(Index $index) { |
|
393
|
|
|
|
|
394
|
|
|
try { |
|
395
|
|
|
$file = $this->getFileFromIndex($index); |
|
396
|
|
|
} catch (Exception $e) { |
|
397
|
|
|
$index->setStatus(Index::INDEX_REMOVE); |
|
398
|
|
|
$document = new FilesDocument($index->getProviderId(), $index->getDocumentId()); |
|
399
|
|
|
$document->setIndex($index); |
|
400
|
|
|
|
|
401
|
|
|
return $document; |
|
402
|
|
|
} |
|
403
|
|
|
|
|
404
|
|
|
$document = $this->generateFilesDocumentFromFile($file, $index->getOwnerId()); |
|
405
|
|
|
$document->setIndex($index); |
|
406
|
|
|
|
|
407
|
|
|
$this->updateFilesDocumentFromFile($document, $file); |
|
408
|
|
|
|
|
409
|
|
|
return $document; |
|
410
|
|
|
} |
|
411
|
|
|
|
|
412
|
|
|
|
|
413
|
|
|
/** |
|
414
|
|
|
* @param IndexDocument $document |
|
415
|
|
|
* |
|
416
|
|
|
* @return bool |
|
417
|
|
|
*/ |
|
418
|
|
|
public function isDocumentUpToDate($document) { |
|
419
|
|
|
$index = $document->getIndex(); |
|
420
|
|
|
|
|
421
|
|
|
if (!$this->configService->compareIndexOptions($index)) { |
|
422
|
|
|
$index->setStatus(Index::INDEX_CONTENT); |
|
423
|
|
|
$document->setIndex($index); |
|
424
|
|
|
|
|
425
|
|
|
return false; |
|
426
|
|
|
} |
|
427
|
|
|
|
|
428
|
|
|
if ($index->getStatus() !== Index::INDEX_OK) { |
|
429
|
|
|
return false; |
|
430
|
|
|
} |
|
431
|
|
|
|
|
432
|
|
|
if ($index->getLastIndex() >= $document->getModifiedTime()) { |
|
|
|
|
|
|
433
|
|
|
return true; |
|
434
|
|
|
} |
|
435
|
|
|
|
|
436
|
|
|
return false; |
|
437
|
|
|
} |
|
438
|
|
|
|
|
439
|
|
|
|
|
440
|
|
|
/** |
|
441
|
|
|
* @param Index $index |
|
442
|
|
|
* |
|
443
|
|
|
* @return FilesDocument |
|
|
|
|
|
|
444
|
|
|
* @throws InvalidPathException |
|
445
|
|
|
* @throws NotFoundException |
|
446
|
|
|
* @throws NotPermittedException |
|
447
|
|
|
*/ |
|
448
|
|
|
public function updateDocument(Index $index) { |
|
449
|
|
|
$this->impersonateOwner($index); |
|
450
|
|
|
$this->initFileSystems($index->getOwnerId()); |
|
451
|
|
|
|
|
452
|
|
|
try { |
|
453
|
|
|
$document = $this->generateDocumentFromIndex($index); |
|
454
|
|
|
|
|
455
|
|
|
return $document; |
|
456
|
|
|
} catch (FileIsNotIndexableException $e) { |
|
457
|
|
|
return null; |
|
458
|
|
|
} |
|
459
|
|
|
} |
|
460
|
|
|
|
|
461
|
|
|
|
|
462
|
|
|
/** |
|
463
|
|
|
* @param FilesDocument $document |
|
464
|
|
|
* |
|
465
|
|
|
* @throws InvalidPathException |
|
466
|
|
|
* @throws NotFoundException |
|
467
|
|
|
* @throws NotPermittedException |
|
468
|
|
|
*/ |
|
469
|
|
|
private function updateFilesDocument(FilesDocument $document) { |
|
470
|
|
|
$userFolder = $this->rootFolder->getUserFolder($document->getViewerId()); |
|
471
|
|
|
$file = $userFolder->get($document->getPath()); |
|
472
|
|
|
|
|
473
|
|
|
try { |
|
474
|
|
|
$this->updateFilesDocumentFromFile($document, $file); |
|
475
|
|
|
} catch (FileIsNotIndexableException $e) { |
|
476
|
|
|
$document->getIndex() |
|
477
|
|
|
->setStatus(Index::INDEX_IGNORE); |
|
478
|
|
|
} |
|
479
|
|
|
} |
|
480
|
|
|
|
|
481
|
|
|
|
|
482
|
|
|
/** |
|
483
|
|
|
* @param FilesDocument $document |
|
484
|
|
|
* @param Node $file |
|
485
|
|
|
* |
|
486
|
|
|
* @throws InvalidPathException |
|
487
|
|
|
* @throws NotFoundException |
|
488
|
|
|
* @throws NotPermittedException |
|
489
|
|
|
*/ |
|
490
|
|
|
private function updateFilesDocumentFromFile(FilesDocument $document, Node $file) { |
|
491
|
|
|
|
|
492
|
|
|
$document->getIndex() |
|
493
|
|
|
->setSource($document->getSource()); |
|
494
|
|
|
|
|
495
|
|
|
$this->updateDocumentAccess($document, $file); |
|
496
|
|
|
$this->updateContentFromFile($document, $file); |
|
497
|
|
|
|
|
498
|
|
|
$document->addTag($document->getSource()); |
|
499
|
|
|
} |
|
500
|
|
|
|
|
501
|
|
|
|
|
502
|
|
|
/** |
|
503
|
|
|
* @param FilesDocument $document |
|
504
|
|
|
* @param Node $file |
|
505
|
|
|
*/ |
|
506
|
|
|
private function updateDocumentAccess(FilesDocument $document, Node $file) { |
|
507
|
|
|
|
|
508
|
|
|
$index = $document->getIndex(); |
|
509
|
|
|
|
|
510
|
|
|
if (!$index->isStatus(Index::INDEX_FULL) |
|
511
|
|
|
&& !$index->isStatus(FilesDocument::STATUS_FILE_ACCESS)) { |
|
512
|
|
|
return; |
|
513
|
|
|
} |
|
514
|
|
|
|
|
515
|
|
|
$this->localFilesService->updateDocumentAccess($document, $file); |
|
516
|
|
|
$this->externalFilesService->updateDocumentAccess($document, $file); |
|
517
|
|
|
$this->groupFoldersService->updateDocumentAccess($document, $file); |
|
518
|
|
|
|
|
519
|
|
|
$this->updateShareNames($document, $file); |
|
520
|
|
|
} |
|
521
|
|
|
|
|
522
|
|
|
|
|
523
|
|
|
/** |
|
524
|
|
|
* @param FilesDocument $document |
|
525
|
|
|
* @param Node $file |
|
526
|
|
|
* |
|
527
|
|
|
* @throws InvalidPathException |
|
528
|
|
|
* @throws NotFoundException |
|
529
|
|
|
* @throws NotPermittedException |
|
530
|
|
|
*/ |
|
531
|
|
|
private function updateContentFromFile(FilesDocument $document, Node $file) { |
|
532
|
|
|
|
|
533
|
|
|
$document->setTitle($document->getPath()); |
|
534
|
|
|
|
|
535
|
|
|
if (!$document->getIndex() |
|
536
|
|
|
->isStatus(Index::INDEX_CONTENT) |
|
537
|
|
|
|| $file->getType() !== FileInfo::TYPE_FILE) { |
|
538
|
|
|
return; |
|
539
|
|
|
} |
|
540
|
|
|
|
|
541
|
|
|
/** @var File $file */ |
|
542
|
|
|
if ($file->getSize() < |
|
543
|
|
|
($this->configService->getAppValue(ConfigService::FILES_SIZE) * 1024 * 1024)) { |
|
544
|
|
|
$this->extractContentFromFileText($document, $file); |
|
545
|
|
|
$this->extractContentFromFileOffice($document, $file); |
|
546
|
|
|
$this->extractContentFromFilePDF($document, $file); |
|
547
|
|
|
$this->extractContentFromFileOCR($document, $file); |
|
548
|
|
|
} |
|
549
|
|
|
|
|
550
|
|
|
if ($document->getContent() === null) { |
|
551
|
|
|
$document->getIndex() |
|
552
|
|
|
->unsetStatus(Index::INDEX_CONTENT); |
|
553
|
|
|
} |
|
554
|
|
|
} |
|
555
|
|
|
|
|
556
|
|
|
|
|
557
|
|
|
/** |
|
558
|
|
|
* @param FilesDocument $document |
|
559
|
|
|
* @param Node $file |
|
560
|
|
|
* |
|
561
|
|
|
* @return array |
|
562
|
|
|
*/ |
|
563
|
|
|
private function updateShareNames(FilesDocument $document, Node $file) { |
|
564
|
|
|
|
|
565
|
|
|
$users = []; |
|
566
|
|
|
|
|
567
|
|
|
$this->localFilesService->getShareUsersFromFile($file, $users); |
|
568
|
|
|
$this->externalFilesService->getShareUsers($document, $users); |
|
569
|
|
|
$this->groupFoldersService->getShareUsers($document, $users); |
|
570
|
|
|
|
|
571
|
|
|
$shareNames = []; |
|
572
|
|
|
foreach ($users as $username) { |
|
573
|
|
|
try { |
|
574
|
|
|
$user = $this->userManager->get($username); |
|
575
|
|
|
if ($user === null || $user->getLastLogin() === 0) { |
|
576
|
|
|
continue; |
|
577
|
|
|
} |
|
578
|
|
|
|
|
579
|
|
|
$path = $this->getPathFromViewerId($file->getId(), $username); |
|
580
|
|
|
$shareNames[MiscService::secureUsername($username)] = |
|
581
|
|
|
(!is_string($path)) ? $path = '' : $path; |
|
582
|
|
|
|
|
583
|
|
|
} catch (Exception $e) { |
|
584
|
|
|
$this->miscService->log('Issue while getting information on documentId:' . $document->getId(), 0); |
|
|
|
|
|
|
585
|
|
|
} |
|
586
|
|
|
} |
|
587
|
|
|
|
|
588
|
|
|
$document->setInfo('share_names', $shareNames); |
|
589
|
|
|
|
|
590
|
|
|
return $shareNames; |
|
591
|
|
|
} |
|
592
|
|
|
|
|
593
|
|
|
|
|
594
|
|
|
/** |
|
595
|
|
|
* @param string $mimeType |
|
596
|
|
|
* |
|
597
|
|
|
* @return string |
|
598
|
|
|
*/ |
|
599
|
|
|
private function parseMimeType($mimeType) { |
|
600
|
|
|
|
|
601
|
|
|
$parsed = ''; |
|
602
|
|
|
try { |
|
603
|
|
|
$this->parseMimeTypeText($mimeType, $parsed); |
|
604
|
|
|
$this->parseMimeTypePDF($mimeType, $parsed); |
|
605
|
|
|
$this->parseMimeTypeOffice($mimeType, $parsed); |
|
606
|
|
|
} catch (KnownFileMimeTypeException $e) { |
|
|
|
|
|
|
607
|
|
|
} |
|
608
|
|
|
|
|
609
|
|
|
return $parsed; |
|
610
|
|
|
} |
|
611
|
|
|
|
|
612
|
|
|
|
|
613
|
|
|
/** |
|
614
|
|
|
* @param string $mimeType |
|
615
|
|
|
* @param string $parsed |
|
616
|
|
|
* |
|
617
|
|
|
* @throws KnownFileMimeTypeException |
|
618
|
|
|
*/ |
|
619
|
|
|
private function parseMimeTypeText($mimeType, &$parsed) { |
|
620
|
|
|
|
|
621
|
|
|
if (substr($mimeType, 0, 5) === 'text/') { |
|
622
|
|
|
$parsed = self::MIMETYPE_TEXT; |
|
623
|
|
|
throw new KnownFileMimeTypeException(); |
|
624
|
|
|
} |
|
625
|
|
|
|
|
626
|
|
|
$textMimes = [ |
|
627
|
|
|
'application/epub+zip' |
|
628
|
|
|
]; |
|
629
|
|
|
|
|
630
|
|
View Code Duplication |
foreach ($textMimes as $mime) { |
|
|
|
|
|
|
631
|
|
|
if (strpos($mimeType, $mime) === 0) { |
|
632
|
|
|
$parsed = self::MIMETYPE_TEXT; |
|
633
|
|
|
throw new KnownFileMimeTypeException(); |
|
634
|
|
|
} |
|
635
|
|
|
} |
|
636
|
|
|
} |
|
637
|
|
|
|
|
638
|
|
|
|
|
639
|
|
|
/** |
|
640
|
|
|
* @param string $mimeType |
|
641
|
|
|
* @param string $parsed |
|
642
|
|
|
* |
|
643
|
|
|
* @throws KnownFileMimeTypeException |
|
644
|
|
|
*/ |
|
645
|
|
|
private function parseMimeTypePDF($mimeType, &$parsed) { |
|
646
|
|
|
|
|
647
|
|
|
if ($mimeType === 'application/pdf') { |
|
648
|
|
|
$parsed = self::MIMETYPE_PDF; |
|
649
|
|
|
throw new KnownFileMimeTypeException(); |
|
650
|
|
|
} |
|
651
|
|
|
} |
|
652
|
|
|
|
|
653
|
|
|
|
|
654
|
|
|
/** |
|
655
|
|
|
* @param string $mimeType |
|
656
|
|
|
* @param string $parsed |
|
657
|
|
|
* |
|
658
|
|
|
* @throws KnownFileMimeTypeException |
|
659
|
|
|
*/ |
|
660
|
|
|
private function parseMimeTypeOffice($mimeType, &$parsed) { |
|
661
|
|
|
|
|
662
|
|
|
$officeMimes = [ |
|
663
|
|
|
'application/msword', |
|
664
|
|
|
'application/vnd.oasis.opendocument', |
|
665
|
|
|
'application/vnd.sun.xml', |
|
666
|
|
|
'application/vnd.openxmlformats-officedocument', |
|
667
|
|
|
'application/vnd.ms-word', |
|
668
|
|
|
'application/vnd.ms-powerpoint', |
|
669
|
|
|
'application/vnd.ms-excel' |
|
670
|
|
|
]; |
|
671
|
|
|
|
|
672
|
|
View Code Duplication |
foreach ($officeMimes as $mime) { |
|
|
|
|
|
|
673
|
|
|
if (strpos($mimeType, $mime) === 0) { |
|
674
|
|
|
$parsed = self::MIMETYPE_OFFICE; |
|
675
|
|
|
throw new KnownFileMimeTypeException(); |
|
676
|
|
|
} |
|
677
|
|
|
} |
|
678
|
|
|
} |
|
679
|
|
|
|
|
680
|
|
|
|
|
681
|
|
|
/** |
|
682
|
|
|
* @param FilesDocument $document |
|
683
|
|
|
* @param File $file |
|
684
|
|
|
* |
|
685
|
|
|
* @throws NotPermittedException |
|
686
|
|
|
*/ |
|
687
|
|
|
private function extractContentFromFileText(FilesDocument $document, File $file) { |
|
688
|
|
|
|
|
689
|
|
|
if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_TEXT) { |
|
690
|
|
|
return; |
|
691
|
|
|
} |
|
692
|
|
|
|
|
693
|
|
|
if (!$this->isSourceIndexable($document)) { |
|
694
|
|
|
return; |
|
695
|
|
|
} |
|
696
|
|
|
|
|
697
|
|
|
$document->setContent( |
|
698
|
|
|
base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64 |
|
699
|
|
|
); |
|
700
|
|
|
} |
|
701
|
|
|
|
|
702
|
|
|
|
|
703
|
|
|
/** |
|
704
|
|
|
* @param FilesDocument $document |
|
705
|
|
|
* @param File $file |
|
706
|
|
|
* |
|
707
|
|
|
* @throws NotPermittedException |
|
708
|
|
|
*/ |
|
709
|
|
View Code Duplication |
private function extractContentFromFilePDF(FilesDocument $document, File $file) { |
|
|
|
|
|
|
710
|
|
|
if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_PDF) { |
|
711
|
|
|
return; |
|
712
|
|
|
} |
|
713
|
|
|
|
|
714
|
|
|
$this->configService->setDocumentIndexOption($document, ConfigService::FILES_PDF); |
|
715
|
|
|
if (!$this->isSourceIndexable($document)) { |
|
716
|
|
|
return; |
|
717
|
|
|
} |
|
718
|
|
|
|
|
719
|
|
|
if ($this->configService->getAppValue(ConfigService::FILES_PDF) !== '1') { |
|
720
|
|
|
$document->setContent(''); |
|
721
|
|
|
|
|
722
|
|
|
return; |
|
723
|
|
|
} |
|
724
|
|
|
|
|
725
|
|
|
$document->setContent( |
|
726
|
|
|
base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64 |
|
727
|
|
|
); |
|
728
|
|
|
} |
|
729
|
|
|
|
|
730
|
|
|
|
|
731
|
|
|
/** |
|
732
|
|
|
* @param FilesDocument $document |
|
733
|
|
|
* @param File $file |
|
734
|
|
|
* |
|
735
|
|
|
* @throws NotPermittedException |
|
736
|
|
|
*/ |
|
737
|
|
View Code Duplication |
private function extractContentFromFileOffice(FilesDocument $document, File $file) { |
|
|
|
|
|
|
738
|
|
|
if ($this->parseMimeType($document->getMimeType()) !== self::MIMETYPE_OFFICE) { |
|
739
|
|
|
return; |
|
740
|
|
|
} |
|
741
|
|
|
|
|
742
|
|
|
$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OFFICE); |
|
743
|
|
|
if (!$this->isSourceIndexable($document)) { |
|
744
|
|
|
return; |
|
745
|
|
|
} |
|
746
|
|
|
|
|
747
|
|
|
if ($this->configService->getAppValue(ConfigService::FILES_OFFICE) !== '1') { |
|
748
|
|
|
$document->setContent(''); |
|
749
|
|
|
|
|
750
|
|
|
return; |
|
751
|
|
|
} |
|
752
|
|
|
|
|
753
|
|
|
$document->setContent( |
|
754
|
|
|
base64_encode($file->getContent()), IndexDocument::ENCODED_BASE64 |
|
755
|
|
|
); |
|
756
|
|
|
} |
|
757
|
|
|
|
|
758
|
|
|
|
|
759
|
|
|
/** |
|
760
|
|
|
* @param FilesDocument $document |
|
761
|
|
|
* @param File $file |
|
762
|
|
|
*/ |
|
763
|
|
|
private function extractContentFromFileOCR(FilesDocument $document, File $file) { |
|
764
|
|
|
if ($this->configService->getAppValue(ConfigService::FILES_OCR) !== '1') { |
|
765
|
|
|
return; |
|
766
|
|
|
} |
|
767
|
|
|
|
|
768
|
|
|
if ($document->getContent() !== '' && $document->getContent() !== null) { |
|
769
|
|
|
return; |
|
770
|
|
|
} |
|
771
|
|
|
|
|
772
|
|
|
$document->setContent(''); |
|
773
|
|
|
$this->extractContentUsingTesseractOCR($document, $file); |
|
774
|
|
|
} |
|
775
|
|
|
|
|
776
|
|
|
|
|
777
|
|
|
/** |
|
778
|
|
|
* @param FilesDocument $document |
|
779
|
|
|
* @param File $file |
|
780
|
|
|
*/ |
|
781
|
|
|
private function extractContentUsingTesseractOCR(FilesDocument $document, File $file) { |
|
782
|
|
|
try { |
|
783
|
|
|
$tesseractService = $this->container->query(TesseractService::class); |
|
784
|
|
|
$extension = pathinfo($document->getPath(), PATHINFO_EXTENSION); |
|
785
|
|
|
|
|
786
|
|
|
if (!$tesseractService->parsedMimeType($document->getMimetype(), $extension)) { |
|
787
|
|
|
return; |
|
788
|
|
|
} |
|
789
|
|
|
|
|
790
|
|
|
$this->configService->setDocumentIndexOption($document, ConfigService::FILES_OCR); |
|
791
|
|
|
if (!$this->isSourceIndexable($document)) { |
|
792
|
|
|
return; |
|
793
|
|
|
} |
|
794
|
|
|
|
|
795
|
|
|
$content = $tesseractService->ocrFile($file); |
|
796
|
|
|
} catch (Exception $e) { |
|
797
|
|
|
return; |
|
798
|
|
|
} |
|
799
|
|
|
|
|
800
|
|
|
$document->setContent(base64_encode($content), IndexDocument::ENCODED_BASE64); |
|
801
|
|
|
} |
|
802
|
|
|
|
|
803
|
|
|
|
|
804
|
|
|
/** |
|
805
|
|
|
* @param FilesDocument $document |
|
806
|
|
|
* |
|
807
|
|
|
* @return bool |
|
808
|
|
|
*/ |
|
809
|
|
|
private function isSourceIndexable(FilesDocument $document) { |
|
810
|
|
|
$this->configService->setDocumentIndexOption($document, $document->getSource()); |
|
811
|
|
|
if ($this->configService->getAppValue($document->getSource()) !== '1') { |
|
812
|
|
|
$document->setContent(''); |
|
813
|
|
|
|
|
814
|
|
|
return false; |
|
815
|
|
|
} |
|
816
|
|
|
|
|
817
|
|
|
return true; |
|
818
|
|
|
} |
|
819
|
|
|
|
|
820
|
|
|
|
|
821
|
|
|
private function impersonateOwner(Index $index) { |
|
822
|
|
|
if ($index->getOwnerId() !== '') { |
|
823
|
|
|
return; |
|
824
|
|
|
} |
|
825
|
|
|
|
|
826
|
|
|
$this->groupFoldersService->impersonateOwner($index); |
|
827
|
|
|
$this->externalFilesService->impersonateOwner($index); |
|
828
|
|
|
} |
|
829
|
|
|
|
|
830
|
|
|
} |
|
831
|
|
|
|
|
832
|
|
|
|
Scrutinizer analyzes your
composer.json/composer.lockfile if available to determine the classes, and functions that are defined by your dependencies.It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.