Completed
Pull Request — master (#17)
by Janis
02:51
created

OcrService::sendGearmanJob()   A

Complexity

Conditions 3
Paths 8

Size

Total Lines 23
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
dl 0
loc 23
ccs 0
cts 18
cp 0
rs 9.0856
c 0
b 0
f 0
cc 3
eloc 18
nc 8
nop 7
crap 12

1 Method

Rating   Name   Duplication   Size   Complexity  
A OcrService::handleException() 8 8 2
1
<?php
2
/**
3
 * nextCloud - ocr
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Janis Koehr <[email protected]>
9
 * @copyright Janis Koehr 2016
10
 */
11
12
namespace OCA\Ocr\Service;
13
14
use Exception;
15
use OC\Files\View;
16
use OCA\Ocr\Db\OcrStatus;
17
use OCA\Ocr\Db\OcrStatusMapper;
18
use OCP\Files;
19
use OCP\Files\FileInfo;
20
use OCP\IConfig;
21
use OCP\IL10N;
22
use OCP\ILogger;
23
use OCP\ITempManager;
24
25
26
/**
27
 * Class OcrService
28
 * @package OCA\Ocr\Service
29
 */
30
class OcrService {
31
32
	/**
33
	 * @var ILogger
34
	 */
35
	private $logger;
36
37
	/**
38
	 * @var ITempManager
39
	 */
40
	private $tempM;
41
42
	/**
43
	 * @var IConfig
44
	 */
45
	private $config;
46
47
	/**
48
	 * @var QueueService
49
	 */
50
	private $queueService;
51
52
	/**
53
	 * @var OcrStatusMapper
54
	 */
55
	private $statusMapper;
56
57
	/**
58
	 * @var View
59
	 */
60
	private $view;
61
62
	/**
63
	 * @var
64
	 */
65
	private $userId;
66
67
	/**
68
	 * @var IL10N
69
	 */
70
	private $l10n;
71
72
	/**
73
	 * Array of allowed mimetypes for ocr processing
74
	 */
75
	const ALLOWED_MIMETYPES = ['application/pdf', 'image/png', 'image/jpeg', 'image/tiff'];
76
77
	/**
78
	 * the correct mimetype for a pdf file
79
	 */
80
	const MIMETYPE_PDF = 'application/pdf';
81
82
	/**
83
	 * the only allowed image mimetypes by tesseract
84
	 */
85
	const MIMETYPES_IMAGE = ['image/png', 'image/jpeg', 'image/tiff'];
86
87
	/**
88
	 * OcrService constructor.
89
	 *
90
	 * @param ITempManager $tempManager
91
	 * @param IConfig $config
92
	 * @param QueueService $queueService
93
	 * @param OcrStatusMapper $mapper
94
	 * @param View $view
95
	 * @param $userId
96
	 * @param IL10N $l10n
97
	 * @param ILogger $logger
98
	 */
99 10
	public function __construct(ITempManager $tempManager, IConfig $config, QueueService $queueService, OcrStatusMapper $mapper, View $view, $userId, IL10N $l10n, ILogger $logger) {
100 10
		$this->logger = $logger;
101 10
		$this->tempM = $tempManager;
102 10
		$this->config = $config;
103 10
		$this->queueService = $queueService;
104 10
		$this->statusMapper = $mapper;
105 10
		$this->view = $view;
106 10
		$this->userId = $userId;
107 10
		$this->l10n = $l10n;
108 10
	}
109
110
	/**
111
	 * Gets the list of all available tesseract-ocr languages.
112
	 *
113
	 * @return array Languages
114
	 */
115 3
	public function listLanguages() {
116
		try {
117 3
			$success = -1;
118 3
			$this->logger->debug('Fetching languages. ', ['app' => 'ocr']);
119 3
			exec('tesseract --list-langs 2>&1', $result, $success);
120 3
			if ($success === 0 && count($result) > 0) {
121 3
				if (is_array($result)) {
122 3
					$traineddata = $result;
123 3
				} else {
124
					$traineddata = explode(' ', $result);
125
				}
126 3
				$languages = array();
127 3
				foreach ($traineddata as $td) {
128 3
					$tdname = trim($td);
129 3
					if (strlen($tdname) === 3) {
130 3
						array_push($languages, $tdname);
131 3
					}
132 3
				}
133 3
				$this->logger->debug('Fetched languages: ' . json_encode($languages), ['app' => 'ocr']);
134 3
				return $languages;
135
			} else {
136
				throw new NotFoundException($this->l10n->t('No languages found.'));
137
			}
138
		} catch (Exception $e) {
139
			$this->handleException($e);
140
		}
141
	}
142
143
	/**
144
	 * Processes and prepares the files for ocr.
145
	 * Sends the stuff to the client in order to ocr async.
146
	 *
147
	 * @param string $language
148
	 * @param array $files
149
	 * @return string
150
	 */
151 4
	public function process($language, $files) {
152
		try {
153 4
			$this->logger->debug('Will now process files: ' . json_encode($files) . ' with language: ' . json_encode($language), ['app' => 'ocr']);
154
			// Check if files and language not empty
155 4
			if (!empty($files) && !empty($language) && in_array($language, $this->listLanguages())) {
156
				// get the array with full fileinfo
157 2
				$fileInfo = $this->buildFileInfo($files);
158 1
				foreach ($fileInfo as $fInfo) {
159
					// Check if filelock existing
160
					// TODO: FileLock maybe \OC\Files\View::lockFile()
161
					// get new name for saving purpose
162 1
					$newName = $this->buildNewName($fInfo);
163
164
					// create a temp file for ocr processing purposes
165 1
					$tempFile = $this->tempM->getTemporaryFile();
166
167
					// set the running type
168 1
					if ($fInfo->getMimetype() === $this::MIMETYPE_PDF) {
169
						$ftype = 'mypdf';
170
					} else {
171 1
						$ftype = 'tess';
172
					}
173
174
					// Create status object
175 1
					$status = new OcrStatus('PENDING', $fInfo->getId(), $newName, $tempFile, $ftype, $this->userId);
176
177
					// Init client and send task / job
178
					// Feed the worker
179 1
					$this->queueService->clientSend($status, $this->config->getSystemValue('datadirectory'), $fInfo->getPath(), $language, \OC::$SERVERROOT);
180 1
				}
181 1
				return 'PROCESSING';
182
			} else {
183 2
				throw new NotFoundException($this->l10n->t('Empty passed parameters.'));
184
			}
185 3
		} catch (Exception $e) {
186 3
			$this->handleException($e);
187
		}
188
	}
189
190
	/**
191
	 * A function which returns the JSONResponse for all required status checks and tasks.
192
	 * It will check for already processed, pending and failed ocr tasks and return them as needed.
193
	 *
194
	 * @codeCoverageIgnore
195
	 * @return string
196
	 */
197
	public function status() {
198
		try {
199
			// TODO: release lock
200
			$processed = $this->handleProcessed();
201
202
			$failed = count($this->handleFailed());
203
204
			$pending = count($this->statusMapper->findAllPending($this->userId));
205
206
			return ['processed' => $processed, 'failed' => $failed, 'pending' => $pending];
207
		} catch (Exception $e) {
208
			$this->handleException($e);
209
		}
210
	}
211
212
	/**
213
	 * The command ocr:complete for occ will call this function in order to set the status.
214
	 * the worker should call it automatically after each processing step.
215
	 *
216
	 * @param $statusId
217
	 * @param boolean $failed
218
	 */
219 3
	public function complete($statusId, $failed) {
220
		try {
221 3
			$status = $this->statusMapper->find($statusId);
222 2
			if (!$failed) {
223 1
				$status->setStatus('PROCESSED');
224 1
				$this->statusMapper->update($status);
225 1
			} else {
226 1
				$status->setStatus('FAILED');
227 1
				$this->statusMapper->update($status);
228
			}
229 3
		} catch (Exception $e) {
230 1
			$this->handleException($e);
231
		}
232 2
	}
233
234
	/**
235
	 * Finishes all Processed files by copying them to the right path and deleteing the temp files.
236
	 * Returns the number of processed files.
237
	 *
238
	 * @codeCoverageIgnore
239
	 * @return int
240
	 */
241
	private function handleProcessed() {
242
		try {
243
			$this->logger->debug('Find processed ocr files and put them to the right dirs.', ['app' => 'ocr']);
244
			$processed = $this->statusMapper->findAllProcessed($this->userId);
245
			foreach ($processed as $status) {
246
				if ($status->getType() === 'tess' && file_exists($status->getTempFile() . '.txt')) {
247
					//Save the tmp file with newname
248
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile() . '.txt')); // need .txt because tesseract saves it like this
249
					// Cleaning temp files
250
					$this->statusMapper->delete($status);
251
					exec('rm ' . $status->getTempFile() . '.txt');
252
				} elseif ($status->getType() === 'mypdf' && file_exists($status->getTempFile())) {
253
					//Save the tmp file with newname
254
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile())); // don't need to extend with .pdf / it uses the tmp file to save
255
					$this->statusMapper->delete($status);
256
					exec('rm ' . $status->getTempFile());
257
				} else {
258
					throw new NotFoundException($this->l10n->t('Temp file does not exist.'));
259
				}
260
			}
261
			return count($processed);
262
		} catch (Exception $e) {
263
			$this->handleException($e);
264
		}
265
	}
266
267
	/**
268
	 * Handles all failed orders of ocr processing queue and returns the status objects.
269
	 *
270
	 * @codeCoverageIgnore
271
	 * @return array
272
	 */
273
	private function handleFailed() {
274
		try {
275
			$failed = $this->statusMapper->findAllFailed($this->userId);
276
			foreach ($failed as $status) {
277
				// clean the tempfile
278
				exec('rm ' . $status->getTempFile());
279
				// clean from db
280
				$this->statusMapper->delete($status);
281
			}
282
			$this->logger->debug('Following status objects failed: ' . json_encode($failed), ['app' => 'ocr']);
283
			return $failed;
284
		} catch (Exception $e) {
285
			$this->handleException($e);
286
		}
287
	}
288
289
290
	/**
291
	 * Returns a not existing file name for pdf or image processing
292
	 * protected as of testing issues with static methods. (Actually
293
	 * it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore
294
	 *
295
	 * @param FileInfo $fileInfo
296
	 * @return string
297
	 */
298
	protected function buildNewName(FileInfo $fileInfo) {
299
		// get rid of the .png or .pdf and so on
300
		$fileName = substr($fileInfo->getName(), 0, -4);
301
		// eliminate the file name from the path
302
		$filePath = str_replace($fileInfo->getName(), '', $fileInfo->getPath());
303
		// and get the path on top of the user/files/ dir
304
		$filePath = str_replace('/' . $this->userId . '/files', '', $filePath);
305
		if ($fileInfo->getMimetype() === $this::MIMETYPE_PDF) {
306
			// PDFs:
307
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.pdf');
308
		} else {
309
			// IMAGES:
310
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.txt');
311
		}
312
	}
313
314
	/**
315
	 * Returns the fileInfo for each file in files and checks
316
	 * if it has a allowed mimetype and some other conditions.
317
	 *
318
	 * @param array $files
319
	 * @return array of Files\FileInfo
320
	 * @throws NotFoundException
321
	 */
322 2
	private function buildFileInfo(array $files) {
323
		try {
324 2
			$fileArray = array();
325 2
			foreach ($files as $file) {
326
				// Check if anything is missing and file type is correct
327 2
				if ((!empty($file['path']) || !empty($file['directory'])) && $file['type'] === 'file') {
328
					// get correct path
329 1
					$path = $this->getCorrectPath($file);
330 1
					$fileInfo = $this->view->getFileInfo($path);
331 1
					$this->checkMimeType($fileInfo);
332 1
					array_push($fileArray, $fileInfo);
333 1
				} else {
334 1
					throw new NotFoundException($this->l10n->t('Wrong path parameter.'));
335
				}
336 1
			}
337 1
			return $fileArray;
338 1
		} catch (Exception $e) {
339 1
			$this->handleException($e);
340
		}
341
	}
342
343
	/**
344
	 * Checks a Mimetype for a specific given FileInfo.
345
	 * @param Files\FileInfo $fileInfo
346
	 */
347 1
	private function checkMimeType(FileInfo $fileInfo) {
348
		try {
349 1
			if (!$fileInfo || !in_array($fileInfo->getMimetype(), $this::ALLOWED_MIMETYPES)) {
350
				$this->logger->debug('Getting FileInfo did not work or not included in the ALLOWED_MIMETYPES array.', ['app' => 'ocr']);
351
				throw new NotFoundException($this->l10n->t('Wrong parameters or wrong mimetype.'));
352
			}
353 1
		} catch (Exception $e) {
354
			$this->handleException($e);
355
		}
356 1
	}
357
358
	/**
359
	 * Returns the correct path based on delivered file variable
360
	 * @param $file
361
	 * @return string
362
	 */
363 1
	private function getCorrectPath($file) {
364 1
		if (empty($file['path'])) {
365
			//Because new updated files have the property directory instead of path
366
			$file['path'] = $file['directory'];
367
		}
368 1
		if ($file['path'] === '/') {
369 1
			$path = $file['path'] . $file['name'];
370 1
		} else {
371
			$path = $file['path'] . '/' . $file['name'];
372
		}
373 1
		return $path;
374
	}
375
376
	/**
377
	 * Handle the possible thrown Exceptions from all methods of this class.
378
	 *
379
	 * @param Exception $e
380
	 * @throws Exception
381
	 * @throws NotFoundException
382
	 */
383 4 View Code Duplication
	private function handleException($e) {
384 4
		$this->logger->logException($e, ['app' => 'ocr', 'message' => 'Exception during ocr service function processing']);
385 4
		if ($e instanceof NotFoundException) {
386 4
			throw new NotFoundException($e->getMessage());
387
		} else {
388
			throw $e;
389
		}
390
	}
391
}