1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Nextcloud - OCR |
5
|
|
|
* This file is licensed under the Affero General Public License version 3 or |
6
|
|
|
* later. See the COPYING file. |
7
|
|
|
* |
8
|
|
|
* @author Janis Koehr <[email protected]> |
9
|
|
|
* @copyright Janis Koehr 2017 |
10
|
|
|
*/ |
11
|
|
|
namespace OCA\Ocr\Service; |
12
|
|
|
|
13
|
|
|
use OCA\Ocr\Db\FileMapper; |
14
|
|
|
use OCA\Ocr\Db\File; |
15
|
|
|
use OCP\ILogger; |
16
|
|
|
use OCA\Ocr\Db\ShareMapper; |
17
|
|
|
use OCP\IL10N; |
18
|
|
|
use OCA\Ocr\Constants\OcrConstants; |
19
|
|
|
|
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* Class FileService |
23
|
|
|
* |
24
|
|
|
* @package OCA\Ocr\Service |
25
|
|
|
*/ |
26
|
|
|
class FileService { |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* |
30
|
|
|
* @var ILogger |
31
|
|
|
*/ |
32
|
|
|
private $logger; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* |
36
|
|
|
* @var FileMapper |
37
|
|
|
*/ |
38
|
|
|
private $fileMapper; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* |
42
|
|
|
* @var ShareMapper |
43
|
|
|
*/ |
44
|
|
|
private $shareMapper; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* |
48
|
|
|
* @var string |
49
|
|
|
*/ |
50
|
|
|
private $userId; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* |
54
|
|
|
* @var IL10N |
55
|
|
|
*/ |
56
|
|
|
private $l10n; |
57
|
|
|
|
58
|
6 |
|
public function __construct(IL10N $l10n, ILogger $logger, $userId, FileMapper $fileMapper, ShareMapper $shareMapper) { |
59
|
6 |
|
$this->l10n = $l10n; |
60
|
6 |
|
$this->logger = $logger; |
61
|
6 |
|
$this->userId = $userId; |
62
|
6 |
|
$this->fileMapper = $fileMapper; |
63
|
6 |
|
$this->shareMapper = $shareMapper; |
64
|
6 |
|
} |
65
|
|
|
|
66
|
|
|
/** |
67
|
|
|
* Checks if shared with the process initiator |
68
|
|
|
* |
69
|
|
|
* @param File $fileInfo |
70
|
|
|
* @return boolean|null |
71
|
|
|
*/ |
72
|
|
|
public function checkSharedWithInitiator($fileInfo) { |
73
|
|
|
$owner = str_replace('home::', '', $fileInfo->getStoragename()); |
74
|
|
|
if ($this->userId === $owner) { |
75
|
|
|
// user is owner (no shared file) |
76
|
|
|
return false; |
77
|
|
|
} else { |
78
|
|
|
// user is not owner (shared file) |
79
|
|
|
return true; |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Builds the target name. |
85
|
|
|
* |
86
|
|
|
* @param File $fileInfo |
87
|
|
|
* @param boolean $shared |
88
|
|
|
* @return string |
89
|
|
|
*/ |
90
|
|
|
public function buildTarget($fileInfo, $shared) { |
91
|
|
|
if ($shared) { |
92
|
|
|
$target = $this->buildTargetForShared($fileInfo); |
93
|
|
|
} else { |
94
|
|
|
$target = $this->buildTargetNotForShared($fileInfo); |
95
|
|
|
} |
96
|
|
|
return $target; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* Builds the source name. |
101
|
|
|
* |
102
|
|
|
* @param File $fileInfo |
103
|
|
|
* @param boolean $shared |
104
|
|
|
* @return string |
105
|
|
|
*/ |
106
|
|
|
public function buildSource($fileInfo, $shared) { |
107
|
|
|
$source = $fileInfo->getPath(); |
108
|
|
|
if ($shared) { |
109
|
|
|
$source = str_replace('home::', '', $fileInfo->getStoragename()) . '/' . $source; |
110
|
|
|
} else { |
111
|
|
|
$source = $this->userId . '/' . $source; |
112
|
|
|
} |
113
|
|
|
return $source; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* Returns the fileInfo for each file in files and checks |
118
|
|
|
* if it has a allowed MIME type and some other conditions. |
119
|
|
|
* |
120
|
|
|
* @param array $files |
121
|
|
|
* @return File[] |
122
|
|
|
* @throws NotFoundException |
123
|
|
|
*/ |
124
|
|
|
public function buildFileInfo($files) { |
125
|
|
|
$fileArray = array(); |
126
|
|
|
foreach ($files as $file) { |
127
|
|
|
// Check if anything is missing and file type is correct |
128
|
|
|
if (!empty($file['id'])) { |
129
|
|
|
$fileInfo = $this->fileMapper->find($file['id']); |
130
|
|
|
$this->checkMimeType($fileInfo); |
131
|
|
|
array_push($fileArray, $fileInfo); |
132
|
|
|
} else { |
133
|
|
|
throw new NotFoundException($this->l10n->t('Wrong parameter.')); |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
return $fileArray; |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Determines the correct type for the ocr process worker. |
141
|
|
|
* |
142
|
|
|
* @param File $fileInfo |
143
|
|
|
* @return integer |
144
|
|
|
*/ |
145
|
|
|
public function getCorrectType($fileInfo) { |
146
|
|
|
if ($fileInfo->getMimetype() === OcrConstants::MIME_TYPE_PDF) { |
147
|
|
|
return OcrConstants::OCRmyPDF; |
148
|
|
|
} else { |
149
|
|
|
return OcrConstants::TESSERACT; |
150
|
|
|
} |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
/** |
154
|
|
|
* Executes the exec function with a remove statement for a given file path. |
155
|
|
|
* @codeCoverageIgnore |
156
|
|
|
* |
157
|
|
|
* @param string $pathToFile |
158
|
|
|
*/ |
159
|
|
|
public function execRemove($pathToFile) { |
160
|
|
|
exec('rm ' . $pathToFile); |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
/** |
164
|
|
|
* Wraps the static file_get_contents method of php. |
165
|
|
|
* @codeCoverageIgnore |
166
|
|
|
* |
167
|
|
|
* @param string $pathToFile |
168
|
|
|
* @return string |
169
|
|
|
*/ |
170
|
|
|
public function getFileContents($pathToFile) { |
171
|
|
|
return file_get_contents($pathToFile); |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
/** |
175
|
|
|
* Wraps the static file_exists method of php. |
176
|
|
|
* @codeCoverageIgnore |
177
|
|
|
* |
178
|
|
|
* @param string $pathToFile |
179
|
|
|
* @return boolean |
180
|
|
|
*/ |
181
|
|
|
public function fileExists($pathToFile) { |
182
|
|
|
return file_exists($pathToFile); |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* Wraps the static function \OCP\Files::buildNotExistingFileName() in order to be able to test everything else. |
187
|
|
|
* @codeCoverageIgnore |
188
|
|
|
* |
189
|
|
|
* @param string $filePath |
190
|
|
|
* @param string $fileName |
191
|
|
|
* @return string |
192
|
|
|
*/ |
193
|
|
|
public function buildNotExistingFilename($filePath, $fileName) { |
194
|
|
|
return \OCP\Files::buildNotExistingFileName($filePath, $fileName . '_OCR.pdf'); |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
/** |
198
|
|
|
* Returns a not existing file name for pdf or image processing |
199
|
|
|
* protected as of testing issues with static methods. |
200
|
|
|
* (Actually |
201
|
|
|
* it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not |
202
|
|
|
* static anymore |
203
|
|
|
* @codeCoverageIgnore |
204
|
|
|
* |
205
|
|
|
* @param File $fileInfo |
206
|
|
|
* @return string |
207
|
|
|
*/ |
208
|
|
|
private function buildTargetForShared(File $fileInfo) { |
209
|
|
|
$share = $this->shareMapper->find($fileInfo->getFileid(), $this->userId, |
210
|
|
|
str_replace('home::', '', $fileInfo->getStoragename())); |
211
|
|
|
// get rid of the .png or .pdf and so on |
212
|
|
|
$fileName = substr($share->getFileTarget(), 0, (strrpos($share->getFileTarget(), '.'))); // '/thedom.png' |
213
|
|
|
// => '/thedom' |
214
|
|
|
// || |
215
|
|
|
// '/Test/thedom.png' |
216
|
|
|
// => |
217
|
|
|
// '/Test/thedom' |
218
|
|
|
// remove |
219
|
|
|
// everything |
220
|
|
|
// in front of |
221
|
|
|
// and |
222
|
|
|
// including of |
223
|
|
|
// the first |
224
|
|
|
// appearance |
225
|
|
|
// of a slash |
226
|
|
|
// from behind |
227
|
|
|
$fileName = substr(strrchr($fileName, "/"), 1); // '/thedom' => 'thedom' || '/Test/thedom' => 'thedom' |
|
|
|
|
228
|
|
|
// eliminate the file name from the path |
229
|
|
|
$filePath = dirname($share->getFileTarget()); // '/thedom.png' => '/' || '/Test/thedom.png' => '/Test' |
|
|
|
|
230
|
|
|
// replace the first slash |
231
|
|
|
$pos = strpos($filePath, '/'); |
232
|
|
|
if ($pos !== false) { |
233
|
|
|
$filePath = substr_replace($filePath, '', $pos, strlen('/')); // '/' => '' || '/Test/' => 'Test' |
|
|
|
|
234
|
|
|
} |
235
|
|
View Code Duplication |
if ($fileInfo->getMimetype() === OcrConstants::MIME_TYPE_PDF) { |
|
|
|
|
236
|
|
|
// PDFs: |
237
|
|
|
return $this->buildNotExistingFilename($filePath, $fileName . '_OCR.pdf'); |
238
|
|
|
} else { |
239
|
|
|
// IMAGES: |
240
|
|
|
return $this->buildNotExistingFilename($filePath, $fileName . '_OCR.txt'); |
241
|
|
|
} |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
/** |
245
|
|
|
* Returns a not existing file name for PDF or image processing |
246
|
|
|
* protected as of testing issues with static methods. |
247
|
|
|
* (Actually |
248
|
|
|
* it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not |
249
|
|
|
* static anymore |
250
|
|
|
* @codeCoverageIgnore |
251
|
|
|
* |
252
|
|
|
* @param File $fileInfo |
253
|
|
|
* @return string |
254
|
|
|
*/ |
255
|
|
|
private function buildTargetNotForShared(File $fileInfo) { |
256
|
|
|
// get rid of the .png or .pdf and so on |
257
|
|
|
$fileName = substr($fileInfo->getName(), 0, (strrpos($fileInfo->getName(), '.'))); // 'thedom.png' => |
258
|
|
|
// 'thedom' |
259
|
|
|
// eliminate the file |
260
|
|
|
// name from the path |
261
|
|
|
$filePath = str_replace($fileInfo->getName(), '', $fileInfo->getPath()); // 'files/Test/thedom.png' => |
|
|
|
|
262
|
|
|
// 'files/Test/' || |
263
|
|
|
// 'files/thedom.png' => 'files/' |
|
|
|
|
264
|
|
|
// and get the path on top of the |
265
|
|
|
// files/ dir |
266
|
|
|
$filePath = str_replace('files', '', $filePath); // 'files/Test/' => '/Test/' || 'files/' => '/' |
|
|
|
|
267
|
|
|
// remove the last slash |
268
|
|
|
$filePath = substr_replace($filePath, '', strrpos($filePath, '/'), strlen('/')); // '/Test/' => '/Test' |
|
|
|
|
269
|
|
|
// || '/' => '' |
|
|
|
|
270
|
|
|
// replace the first |
271
|
|
|
// slash |
272
|
|
|
$pos = strpos($filePath, '/'); |
273
|
|
|
if ($pos !== false) { |
274
|
|
|
$filePath = substr_replace($filePath, '', $pos, strlen('/')); // '/Test' => '// 'Test' || '/' => '' |
275
|
|
|
} |
276
|
|
View Code Duplication |
if ($fileInfo->getMimetype() === OcrConstants::MIME_TYPE_PDF) { |
|
|
|
|
277
|
|
|
// PDFs: |
278
|
|
|
return $this->buildNotExistingFilename($filePath, $fileName . '_OCR.pdf'); |
279
|
|
|
} else { |
280
|
|
|
// IMAGES: |
281
|
|
|
return $this->buildNotExistingFilename($filePath, $fileName . '_OCR.txt'); |
282
|
|
|
} |
283
|
|
|
} |
284
|
|
|
|
285
|
|
|
/** |
286
|
|
|
* Checks a MIME type for a specifically given FileInfo. |
287
|
|
|
* |
288
|
|
|
* @param File $fileInfo |
289
|
|
|
*/ |
290
|
|
|
private function checkMimeType(File $fileInfo) { |
291
|
|
|
if (!$fileInfo || !in_array($fileInfo->getMimetype(), OcrConstants::ALLOWED_MIME_TYPES)) { |
292
|
|
|
$this->logger->debug('Getting FileInfo did not work or not included in the ALLOWED_MIMETYPES array.'); |
293
|
|
|
throw new NotFoundException($this->l10n->t('Wrong MIME type.')); |
294
|
|
|
} |
295
|
|
|
} |
296
|
|
|
} |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.