1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Nextcloud - OCR |
5
|
|
|
* |
6
|
|
|
* This file is licensed under the Affero General Public License version 3 or |
7
|
|
|
* later. See the COPYING file. |
8
|
|
|
* |
9
|
|
|
* @author Janis Koehr <[email protected]> |
10
|
|
|
* @copyright Janis Koehr 2017 |
11
|
|
|
*/ |
12
|
|
|
namespace OCA\Ocr\Service; |
13
|
|
|
|
14
|
|
|
use OCA\Ocr\Db\OcrJob; |
15
|
|
|
use OCA\Ocr\Db\FileMapper; |
16
|
|
|
use OCA\Ocr\Db\File; |
17
|
|
|
use OCP\ILogger; |
18
|
|
|
use OCA\Ocr\Db\ShareMapper; |
19
|
|
|
use OCP\IL10N; |
20
|
|
|
use OCA\Ocr\Constants\OcrConstants; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* Class FileService |
24
|
|
|
* |
25
|
|
|
* @package OCA\Ocr\Service |
26
|
|
|
*/ |
27
|
|
|
class FileService { |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* |
31
|
|
|
* @var ILogger |
32
|
|
|
*/ |
33
|
|
|
private $logger; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* |
37
|
|
|
* @var FileMapper |
38
|
|
|
*/ |
39
|
|
|
private $fileMapper; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* |
43
|
|
|
* @var ShareMapper |
44
|
|
|
*/ |
45
|
|
|
private $shareMapper; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* |
49
|
|
|
* @var string |
50
|
|
|
*/ |
51
|
|
|
private $userId; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* |
55
|
|
|
* @var IL10N |
56
|
|
|
*/ |
57
|
|
|
private $l10n; |
58
|
|
|
|
59
|
|
|
public function __construct(IL10N $l10n, ILogger $logger, $userId, FileMapper $fileMapper, ShareMapper $shareMapper) { |
60
|
|
|
$this->l10n = $l10n; |
61
|
|
|
$this->logger = $logger; |
62
|
|
|
$this->userId = $userId; |
63
|
|
|
$this->fileMapper = $fileMapper; |
64
|
|
|
$this->shareMapper = $shareMapper; |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* Checks if shared with the process initiator |
69
|
|
|
* |
70
|
|
|
* @param File $fileInfo |
71
|
|
|
* @return boolean|null |
72
|
|
|
*/ |
73
|
|
|
public function checkSharedWithInitiator($fileInfo) { |
74
|
|
|
try { |
75
|
|
|
$owner = str_replace ( 'home::', '', $fileInfo->getStoragename () ); |
76
|
|
|
if ($this->userId === $owner) { |
77
|
|
|
// user is owner (no shared file) |
78
|
|
|
return false; |
79
|
|
|
} else { |
80
|
|
|
// user is not owner (shared file) |
81
|
|
|
return true; |
82
|
|
|
} |
83
|
|
|
} catch ( Exception $e ) { |
|
|
|
|
84
|
|
|
$this->handleException ( $e ); |
85
|
|
|
} |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Builds the target name. |
90
|
|
|
* |
91
|
|
|
* @param File $fileInfo |
92
|
|
|
* @param boolean $shared |
93
|
|
|
* @return string |
94
|
|
|
*/ |
95
|
|
|
public function buildTarget($fileInfo, $shared) { |
96
|
|
|
if ($shared) { |
97
|
|
|
$target = $this->buildTargetForShared ( $fileInfo ); |
98
|
|
|
} else { |
99
|
|
|
$target = $this->buildTargetNotForShared ( $fileInfo ); |
100
|
|
|
} |
101
|
|
|
return $target; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* Builds the source name. |
106
|
|
|
* |
107
|
|
|
* @param File $fileInfo |
108
|
|
|
* @param boolean $shared |
109
|
|
|
* @return string |
110
|
|
|
*/ |
111
|
|
|
public function buildSource($fileInfo, $shared) { |
112
|
|
|
$source = $fileInfo->getPath (); |
113
|
|
|
if ($shared) { |
114
|
|
|
$source = str_replace ( 'home::', '', $fileInfo->getStoragename () ) . '/' . $source; |
115
|
|
|
} else { |
116
|
|
|
$source = $this->userId . '/' . $source; |
117
|
|
|
} |
118
|
|
|
return $source; |
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* Returns the fileInfo for each file in files and checks |
123
|
|
|
* if it has a allowed MIME type and some other conditions. |
124
|
|
|
* |
125
|
|
|
* @param array $files |
126
|
|
|
* @return File[] |
127
|
|
|
* @throws NotFoundException |
128
|
|
|
*/ |
129
|
|
|
public function buildFileInfo($files) { |
130
|
|
|
try { |
131
|
|
|
$fileArray = array (); |
132
|
|
|
foreach ( $files as $file ) { |
133
|
|
|
// Check if anything is missing and file type is correct |
134
|
|
|
if (! empty ( $file ['id'] )) { |
135
|
|
|
|
136
|
|
|
$fileInfo = $this->fileMapper->find ( $file ['id'] ); |
137
|
|
|
$this->checkMimeType ( $fileInfo ); |
138
|
|
|
|
139
|
|
|
array_push ( $fileArray, $fileInfo ); |
140
|
|
|
} else { |
141
|
|
|
throw new NotFoundException ( $this->l10n->t ( 'Wrong parameter.' ) ); |
142
|
|
|
} |
143
|
|
|
} |
144
|
|
|
return $fileArray; |
145
|
|
|
} catch ( Exception $e ) { |
|
|
|
|
146
|
|
|
$this->handleException ( $e ); |
147
|
|
|
} |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Removes ".txt" from the newName of a OCR job |
152
|
|
|
* |
153
|
|
|
* @param $job OcrJob |
154
|
|
|
* @return string |
155
|
|
|
*/ |
156
|
|
|
public function removeFileExtension($job) { |
157
|
|
|
return substr ( $job->getTarget (), 0, strrpos ( $job->getTarget (), '_OCR' ) ); |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* Determines the correct type for the ocr process worker. |
162
|
|
|
* |
163
|
|
|
* @param File $fileInfo |
164
|
|
|
* @return integer |
165
|
|
|
*/ |
166
|
|
|
public function getCorrectType($fileInfo) { |
167
|
|
|
if ($fileInfo->getMimetype () === OcrConstants::MIME_TYPE_PDF) { |
168
|
|
|
return OcrConstants::OCRmyPDF; |
169
|
|
|
} else { |
170
|
|
|
return OcrConstants::TESSERACT; |
171
|
|
|
} |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
/** |
175
|
|
|
* Executes the exec function with a remove statement for a given file path. |
176
|
|
|
* |
177
|
|
|
* @codeCoverageIgnore |
178
|
|
|
* |
179
|
|
|
* @param string $pathToFile |
180
|
|
|
*/ |
181
|
|
|
public function execRemove($pathToFile) { |
182
|
|
|
exec ( 'rm ' . $pathToFile ); |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* Wraps the static file_get_contents method of php. |
187
|
|
|
* |
188
|
|
|
* @codeCoverageIgnore |
189
|
|
|
* |
190
|
|
|
* @param string $pathToFile |
191
|
|
|
* @return string |
192
|
|
|
*/ |
193
|
|
|
public function getFileContents($pathToFile) { |
194
|
|
|
return file_get_contents ( $pathToFile ); |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
/** |
198
|
|
|
* Wraps the static file_exists method of php. |
199
|
|
|
* |
200
|
|
|
* @codeCoverageIgnore |
201
|
|
|
* |
202
|
|
|
* @param string $pathToFile |
203
|
|
|
* @return boolean |
204
|
|
|
*/ |
205
|
|
|
public function fileExists($pathToFile) { |
206
|
|
|
return file_exists ( $pathToFile ); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
/** |
210
|
|
|
* Returns a not existing file name for pdf or image processing |
211
|
|
|
* protected as of testing issues with static methods. |
212
|
|
|
* (Actually |
213
|
|
|
* it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore |
214
|
|
|
* @codeCoverageIgnore |
215
|
|
|
* |
216
|
|
|
* @param File $fileInfo |
217
|
|
|
* @return string |
218
|
|
|
*/ |
219
|
|
|
protected function buildTargetForShared(File $fileInfo) { |
220
|
|
|
try { |
221
|
|
|
$share = $this->shareMapper->find ( $fileInfo->getFileid (), $this->userId, str_replace ( 'home::', '', $fileInfo->getStoragename () ) ); |
222
|
|
|
|
223
|
|
|
// get rid of the .png or .pdf and so on |
224
|
|
|
$fileName = substr ( $share->getFileTarget (), 0, (strrpos ( $share->getFileTarget (), '.' )) ); // '/thedom.png' => '/thedom' || '/Test/thedom.png' => '/Test/thedom' |
|
|
|
|
225
|
|
|
|
226
|
|
|
// remove everything in front of and including of the first appearance of a slash from behind |
227
|
|
|
$fileName = substr ( strrchr ( $fileName, "/" ), 1 ); // '/thedom' => 'thedom' || '/Test/thedom' => 'thedom' |
|
|
|
|
228
|
|
|
|
229
|
|
|
// eliminate the file name from the path |
230
|
|
|
$filePath = dirname ( $share->getFileTarget () ); // '/thedom.png' => '/' || '/Test/thedom.png' => '/Test' |
|
|
|
|
231
|
|
|
|
232
|
|
|
// replace the first slash |
233
|
|
|
$pos = strpos ( $filePath, '/' ); |
234
|
|
|
if ($pos !== false) { |
235
|
|
|
$filePath = substr_replace ( $filePath, '', $pos, strlen ( '/' ) ); // '/' => '' || '/Test/' => 'Test' |
|
|
|
|
236
|
|
|
} |
237
|
|
|
|
238
|
|
View Code Duplication |
if ($fileInfo->getMimetype () === OcrConstants::MIME_TYPE_PDF) { |
|
|
|
|
239
|
|
|
// PDFs: |
240
|
|
|
return \OCP\Files::buildNotExistingFileName ( $filePath, $fileName . '_OCR.pdf' ); |
241
|
|
|
} else { |
242
|
|
|
// IMAGES: |
243
|
|
|
return \OCP\Files::buildNotExistingFileName ( $filePath, $fileName . '_OCR.txt' ); |
244
|
|
|
} |
245
|
|
|
} catch ( Exception $e ) { |
|
|
|
|
246
|
|
|
$this->handleException ( $e ); |
247
|
|
|
} |
248
|
|
|
} |
249
|
|
|
|
250
|
|
|
/** |
251
|
|
|
* Returns a not existing file name for PDF or image processing |
252
|
|
|
* protected as of testing issues with static methods. |
253
|
|
|
* (Actually |
254
|
|
|
* it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore |
255
|
|
|
* @codeCoverageIgnore |
256
|
|
|
* |
257
|
|
|
* @param File $fileInfo |
258
|
|
|
* @return string |
259
|
|
|
*/ |
260
|
|
|
protected function buildTargetNotForShared(File $fileInfo) { |
261
|
|
|
try { |
262
|
|
|
// get rid of the .png or .pdf and so on |
263
|
|
|
$fileName = substr ( $fileInfo->getName (), 0, (strrpos ( $fileInfo->getName (), '.' )) ); // 'thedom.png' => 'thedom' |
|
|
|
|
264
|
|
|
|
265
|
|
|
// eliminate the file name from the path |
266
|
|
|
$filePath = str_replace ( $fileInfo->getName (), '', $fileInfo->getPath () ); // 'files/Test/thedom.png' => 'files/Test/' || 'files/thedom.png' => 'files/' |
|
|
|
|
267
|
|
|
|
268
|
|
|
// and get the path on top of the files/ dir |
269
|
|
|
$filePath = str_replace ( 'files', '', $filePath ); // 'files/Test/' => '/Test/' || 'files/' => '/' |
|
|
|
|
270
|
|
|
|
271
|
|
|
// remove the last slash |
272
|
|
|
$filePath = substr_replace ( $filePath, '', strrpos ( $filePath, '/' ), strlen ( '/' ) ); // '/Test/' => '/Test' || '/' => '' |
|
|
|
|
273
|
|
|
|
274
|
|
|
// replace the first slash |
275
|
|
|
$pos = strpos ( $filePath, '/' ); |
276
|
|
|
if ($pos !== false) { |
277
|
|
|
$filePath = substr_replace ( $filePath, '', $pos, strlen ( '/' ) ); // '/Test' => '// 'Test' || '/' => '' |
278
|
|
|
} |
279
|
|
|
|
280
|
|
View Code Duplication |
if ($fileInfo->getMimetype () === OcrConstants::MIME_TYPE_PDF) { |
|
|
|
|
281
|
|
|
// PDFs: |
282
|
|
|
return \OCP\Files::buildNotExistingFileName ( $filePath, $fileName . '_OCR.pdf' ); |
283
|
|
|
} else { |
284
|
|
|
// IMAGES: |
285
|
|
|
return \OCP\Files::buildNotExistingFileName ( $filePath, $fileName . '_OCR.txt' ); |
286
|
|
|
} |
287
|
|
|
} catch ( Exception $e ) { |
|
|
|
|
288
|
|
|
$this->handleException ( $e ); |
289
|
|
|
} |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
/** |
293
|
|
|
* Checks a MIME type for a specifically given FileInfo. |
294
|
|
|
* |
295
|
|
|
* @param File $fileInfo |
296
|
|
|
*/ |
297
|
|
|
private function checkMimeType(File $fileInfo) { |
298
|
|
|
try { |
299
|
|
|
if (! $fileInfo || ! in_array ( $fileInfo->getMimetype (), OcrConstants::ALLOWED_MIME_TYPES )) { |
300
|
|
|
$this->logger->debug ( 'Getting FileInfo did not work or not included in the ALLOWED_MIMETYPES array.'); |
301
|
|
|
throw new NotFoundException ( $this->l10n->t ( 'Wrong MIME type.' ) ); |
302
|
|
|
} |
303
|
|
|
} catch ( Exception $e ) { |
|
|
|
|
304
|
|
|
$this->handleException ( $e ); |
305
|
|
|
} |
306
|
|
|
} |
307
|
|
|
|
308
|
|
|
/** |
309
|
|
|
* Handle the possible thrown Exceptions from all methods of this class. |
310
|
|
|
* |
311
|
|
|
* @param Exception $e |
312
|
|
|
* @throws Exception |
313
|
|
|
* @throws NotFoundException |
314
|
|
|
*/ |
315
|
|
View Code Duplication |
private function handleException($e) { |
|
|
|
|
316
|
|
|
$this->logger->logException ( $e, [ |
317
|
|
|
'app' => 'ocr', |
318
|
|
|
'message' => 'Exception during file service function processing' |
319
|
|
|
] ); |
320
|
|
|
if ($e instanceof NotFoundException) { |
321
|
|
|
throw new NotFoundException ( $e->getMessage () ); |
322
|
|
|
} else { |
323
|
|
|
throw $e; |
324
|
|
|
} |
325
|
|
|
} |
326
|
|
|
} |
Scrutinizer analyzes your
composer.json
/composer.lock
file if available to determine the classes, and functions that are defined by your dependencies.It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.