Completed
Pull Request — master (#40)
by Janis
123:40 queued 121:18
created

OcrService::getAllForPersonal()   A

Complexity

Conditions 3
Paths 11

Size

Total Lines 18
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 3.0593

Importance

Changes 0
Metric Value
dl 0
loc 18
ccs 13
cts 16
cp 0.8125
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 15
nc 11
nop 1
crap 3.0593
1
<?php
2
/**
3
 * nextCloud - ocr
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Janis Koehr <[email protected]>
9
 * @copyright Janis Koehr 2016
10
 */
11
12
namespace OCA\Ocr\Service;
13
14
use Exception;
15
use OC\Files\View;
16
use OCA\Ocr\Db\OcrStatus;
17
use OCA\Ocr\Db\OcrStatusMapper;
18
use OCP\AppFramework\Db\DoesNotExistException;
19
use OCP\AppFramework\Db\Entity;
20
use OCP\Files;
21
use OCP\Files\FileInfo;
22
use OCP\IConfig;
23
use OCP\IL10N;
24
use OCP\ILogger;
25
use OCP\ITempManager;
26
27
28
/**
29
 * Class OcrService
30
 * @package OCA\Ocr\Service
31
 */
32
class OcrService {
33
34
	/**
35
	 * @var ILogger
36
	 */
37
	private $logger;
38
39
	/**
40
	 * @var ITempManager
41
	 */
42
	private $tempM;
43
44
	/**
45
	 * @var IConfig
46
	 */
47
	private $config;
48
49
	/**
50
	 * @var QueueService
51
	 */
52
	private $queueService;
53
54
	/**
55
	 * @var OcrStatusMapper
56
	 */
57
	private $statusMapper;
58
59
	/**
60
	 * @var View
61
	 */
62
	private $view;
63
64
	/**
65
	 * @var
66
	 */
67
	private $userId;
68
69
	/**
70
	 * @var IL10N
71
	 */
72
	private $l10n;
73
74
	/**
75
	 * Array of allowed mimetypes for ocr processing
76
	 */
77
	const ALLOWED_MIMETYPES = ['application/pdf', 'image/png', 'image/jpeg', 'image/tiff'];
78
79
	/**
80
	 * the correct mimetype for a pdf file
81
	 */
82
	const MIMETYPE_PDF = 'application/pdf';
83
84
	/**
85
	 * the only allowed image mimetypes by tesseract
86
	 */
87
	const MIMETYPES_IMAGE = ['image/png', 'image/jpeg', 'image/tiff'];
88
89
	/**
90
	 * OcrService constructor.
91
	 *
92
	 * @param ITempManager $tempManager
93
	 * @param IConfig $config
94
	 * @param QueueService $queueService
95
	 * @param OcrStatusMapper $mapper
96
	 * @param View $view
97
	 * @param $userId
98
	 * @param IL10N $l10n
99
	 * @param ILogger $logger
100
	 */
101 14
	public function __construct(ITempManager $tempManager, IConfig $config, QueueService $queueService, OcrStatusMapper $mapper, View $view, $userId, IL10N $l10n, ILogger $logger) {
102 14
		$this->logger = $logger;
103 14
		$this->tempM = $tempManager;
104 14
		$this->config = $config;
105 14
		$this->queueService = $queueService;
106 14
		$this->statusMapper = $mapper;
107 14
		$this->view = $view;
108 14
		$this->userId = $userId;
109 14
		$this->l10n = $l10n;
110 14
	}
111
112
	/**
113
	 * Gets the list of all available tesseract-ocr languages.
114
	 *
115
	 * @return string[] Languages
116
	 */
117 3
	public function listLanguages() {
118
		try {
119 3
			$success = -1;
120 3
			$this->logger->debug('Fetching languages. ', ['app' => 'ocr']);
121 3
			exec('tesseract --list-langs 2>&1', $result, $success);
122 3
			if ($success === 0 && count($result) > 0) {
123 3
				if (is_array($result)) {
124 3
					$traineddata = $result;
125 3
				} else {
126
					throw new NotFoundException($this->l10n->t('No languages found.'));
127
				}
128 3
				$languages = array();
129 3
				array_shift($traineddata); // delete the first element of the array as it is a description of tesseract
130 3
				asort($traineddata); // sort the languages alphabetically
131 3
				foreach ($traineddata as $td) {
132 3
					$tdname = trim($td); // strip whitespaces
133 3
					array_push($languages, $tdname); //add to language list
134 3
				}
135 3
				$this->logger->debug('Fetched languages: ' . json_encode($languages), ['app' => 'ocr']);
136 3
				return $languages;
137
			} else {
138
				throw new NotFoundException($this->l10n->t('No languages found.'));
139
			}
140
		} catch (Exception $e) {
141
			$this->handleException($e);
142
		}
143
	}
144
145
	/**
146
	 * Processes and prepares the files for ocr.
147
	 * Sends the stuff to the client in order to ocr async.
148
	 *
149
	 * @param string $language
150
	 * @param array $files
151
	 * @return string
152
	 */
153 4
	public function process($language, $files) {
154
		try {
155 4
			$this->logger->debug('Will now process files: ' . json_encode($files) . ' with language: ' . json_encode($language), ['app' => 'ocr']);
156
			// Check if files and language not empty
157 4
			if (!empty($files) && !empty($language) && in_array($language, $this->listLanguages())) {
158
				// get the array with full fileinfo
159 2
				$fileInfo = $this->buildFileInfo($files);
160 1
				foreach ($fileInfo as $fInfo) {
161
					// Check if filelock existing
162
					// TODO: FileLock maybe \OC\Files\View::lockFile()
163
					// get new name for saving purpose
164 1
					$newName = $this->buildNewName($fInfo);
165
166
					// create a temp file for ocr processing purposes
167 1
					$tempFile = $this->tempM->getTemporaryFile();
168
169
					// set the running type
170 1
					if ($fInfo->getMimetype() === $this::MIMETYPE_PDF) {
171
						$ftype = 'mypdf';
172
					} else {
173 1
						$ftype = 'tess';
174
					}
175
176
					// Create status object
177 1
					$status = new OcrStatus('PENDING', $fInfo->getId(), $newName, $tempFile, $ftype, $this->userId, false);
178
179
					// Init client and send task / job
180
					// Feed the worker
181 1
					$this->queueService->clientSend($status, $this->config->getSystemValue('datadirectory'), $fInfo->getPath(), $language, \OC::$SERVERROOT);
182 1
				}
183 1
				return 'PROCESSING';
184
			} else {
185 2
				throw new NotFoundException($this->l10n->t('Empty parameters passed.'));
186
			}
187 3
		} catch (Exception $e) {
188 3
			$this->handleException($e);
189
		}
190
	}
191
192
	/**
193
	 * A function which returns the JSONResponse for all required status checks and tasks.
194
	 * It will check for already processed, pending and failed ocr tasks and return them as needed.
195
	 *
196
	 * @codeCoverageIgnore
197
	 * @return string
198
	 */
199
	public function status() {
200
		try {
201
			// TODO: release lock
202
203
            // returns user specific processed files
204
			$processed = $this->handleProcessed();
205
206
			$pending = $this->statusMapper->findAllPending($this->userId);
207
208
			// return user specific failed state and set up error
209
			$failed = $this->handleFailed();
210
211
			return ['processed' => count($processed), 'failed' => count($failed), 'pending' => count($pending)];
212
		} catch (Exception $e) {
213
			$this->handleException($e);
214
		}
215
	}
216
217
	/**
218
	 * The command ocr:complete for occ will call this function in order to set the status.
219
	 * the worker should call it automatically after each processing step.
220
	 *
221
	 * @param $statusId
222
	 * @param boolean $failed
223
	 */
224 3
	public function complete($statusId, $failed) {
225
		try {
226 3
			$status = $this->statusMapper->find($statusId);
227 2
			if (!$failed) {
228 1
				$status->setStatus('PROCESSED');
229 1
				$this->statusMapper->update($status);
230 1
			} else {
231 1
				$status->setStatus('FAILED');
232 1
				$this->statusMapper->update($status);
233
			}
234 3
		} catch (Exception $e) {
235 1
			$this->handleException($e);
236
		}
237 2
	}
238
239
	/**
240
	 * The PersonalSettingsController will have the opportunity to delete ocr status objects.
241
	 *
242
	 * @param $statusId
243
	 * @return OcrStatus
244
	 */
245 2
	public function deleteStatus($statusId, $userId) {
246
		try {
247 2
			$status = $this->statusMapper->find($statusId);
248 1
			if ($status->getUserId() !== $userId) {
249
				throw new NotFoundException($this->l10n->t('Cannot delete. Wrong owner.'));
250
			} else {
251 1
				$status = $this->statusMapper->delete($status);
252
			}
253 1
			$status->setNewName($this->removeFileExtension($status));
254 1
            $status->setFileId(null);
255 1
            $status->setTempFile(null);
256 1
            $status->setType(null);
257 1
            $status->setErrorDisplayed(null);
258 1
			return $status;
259 1
		} catch (Exception $e) {
260 1
		    if ($e instanceof DoesNotExistException) {
0 ignored issues
show
Bug introduced by
The class OCP\AppFramework\Db\DoesNotExistException does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
261 1
		        $ex = new NotFoundException($this->l10n->t('Cannot delete. Wrong id.'));
262 1
                $this->handleException($ex);
263
            } else {
264
		        $this->handleException($e);
265
		    }
266
		}
267
	}
268
269
	/**
270
	 * Gets all status objects for a specific user in order to list them on the personal settings page.
271
	 *
272
	 * @param $userId
273
	 * @return array
274
	 */
275 1
	public function getAllForPersonal($userId) {
276
	    try {
277 1
            $status = $this->statusMapper->findAll($userId);
278 1
            $statusNew = array();
279 1
			for ($x = 0; $x < count($status); $x++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
280 1
				$newName = $this->removeFileExtension($status[$x]);
281 1
				$status[$x]->setNewName($newName);
282 1
				$status[$x]->setFileId(null);
283 1
				$status[$x]->setTempFile(null);
284 1
				$status[$x]->setType(null);
285 1
				$status[$x]->setErrorDisplayed(null);
286 1
				array_push($statusNew, $status[$x]);
287 1
			}
288 1
            return $statusNew;
289
        } catch (Exception $e) {
290
	        $this->handleException($e);
291
        }
292
    }
293
294
	/**
295
	 * Finishes all Processed files by copying them to the right path and deleteing the temp files.
296
	 * Returns the number of processed files.
297
	 *
298
	 * @codeCoverageIgnore
299
	 * @return array
300
	 */
301
	private function handleProcessed() {
302
		try {
303
			$this->logger->debug('Check if files were processed by ocr and if so, put them to the right dirs.', ['app' => 'ocr']);
304
			$processed = $this->statusMapper->findAllProcessed($this->userId);
305
			foreach ($processed as $status) {
306
				if ($status->getType() === 'tess' && file_exists($status->getTempFile() . '.txt')) {
307
					//Save the tmp file with newname
308
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile() . '.txt')); // need .txt because tesseract saves it like this
309
					// Cleaning temp files
310
					$this->statusMapper->delete($status);
311
					exec('rm ' . $status->getTempFile() . '.txt');
312
				} elseif ($status->getType() === 'mypdf' && file_exists($status->getTempFile())) {
313
					//Save the tmp file with newname
314
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile())); // don't need to extend with .pdf / it uses the tmp file to save
315
					$this->statusMapper->delete($status);
316
					exec('rm ' . $status->getTempFile());
317
				} else {
318
					throw new NotFoundException($this->l10n->t('Temp file does not exist.'));
319
				}
320
			}
321
			return $processed;
322
		} catch (Exception $e) {
323
			$this->handleException($e);
324
		}
325
	}
326
327
	/**
328
	 * Removes ".txt" from the newName of a ocr status
329
	 *
330
	 * @codeCoverageIgnore
331
	 * @param $status OcrStatus
332
	 * @return string
333
	 */
334
	private function removeFileExtension($status) {
335
		try {
336
			if ($status->getType() === 'tess') {
337
				return str_replace('_OCR.txt', '', $status->getNewName());
338
			} elseif ($status->getType() === 'mypdf') {
339
				return str_replace('_OCR.pdf', '', $status->getNewName());
340
			}
341
		} catch (Exception $e) {
342
			$this->handleException($e);
343
		}
344
	}
345
346
	/**
347
	 * Handles all failed orders of ocr processing queue and returns the status objects.
348
	 *
349
	 * @codeCoverageIgnore
350
	 * @return array
351
	 */
352
	private function handleFailed() {
353
		try {
354
			$failed = $this->statusMapper->findAllFailed($this->userId);
355
			foreach ($failed as $status) {
356
				// clean the tempfile
357
				exec('rm ' . $status->getTempFile());
358
				// set error displayed
359
				$status->setErrorDisplayed(true);
360
				$this->statusMapper->update($status);
361
			}
362
			$this->logger->debug('Following status objects failed: ' . json_encode($failed), ['app' => 'ocr']);
363
			return $failed;
364
		} catch (Exception $e) {
365
			$this->handleException($e);
366
		}
367
	}
368
369
370
	/**
371
	 * Returns a not existing file name for pdf or image processing
372
	 * protected as of testing issues with static methods. (Actually
373
	 * it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore
374
	 *
375
	 * @param FileInfo $fileInfo
376
	 * @return string
377
	 */
378
	protected function buildNewName(FileInfo $fileInfo) {
379
		// get rid of the .png or .pdf and so on
380
		$fileName = substr($fileInfo->getName(), 0, -4);
381
		// eliminate the file name from the path
382
		$filePath = str_replace($fileInfo->getName(), '', $fileInfo->getPath());
383
		// and get the path on top of the user/files/ dir
384
		$filePath = str_replace('/' . $this->userId . '/files', '', $filePath);
385
		if ($fileInfo->getMimetype() === $this::MIMETYPE_PDF) {
386
			// PDFs:
387
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.pdf');
388
		} else {
389
			// IMAGES:
390
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.txt');
391
		}
392
	}
393
394
	/**
395
	 * Returns the fileInfo for each file in files and checks
396
	 * if it has a allowed mimetype and some other conditions.
397
	 *
398
	 * @param array $files
399
	 * @return array of Files\FileInfo
400
	 * @throws NotFoundException
401
	 */
402 2
	private function buildFileInfo(array $files) {
403
		try {
404 2
			$fileArray = array();
405 2
			foreach ($files as $file) {
406
				// Check if anything is missing and file type is correct
407 2
				if ((!empty($file['path']) || !empty($file['directory'])) && $file['type'] === 'file') {
408
					// get correct path
409 1
					$path = $this->getCorrectPath($file);
410 1
					$fileInfo = $this->view->getFileInfo($path);
411 1
					$this->checkMimeType($fileInfo);
412 1
					array_push($fileArray, $fileInfo);
413 1
				} else {
414 1
					throw new NotFoundException($this->l10n->t('Wrong path parameter.'));
415
				}
416 1
			}
417 1
			return $fileArray;
418 1
		} catch (Exception $e) {
419 1
			$this->handleException($e);
420
		}
421
	}
422
423
	/**
424
	 * Checks a Mimetype for a specific given FileInfo.
425
	 * @param Files\FileInfo $fileInfo
426
	 */
427 1
	private function checkMimeType(FileInfo $fileInfo) {
428
		try {
429 1
			if (!$fileInfo || !in_array($fileInfo->getMimetype(), $this::ALLOWED_MIMETYPES)) {
430
				$this->logger->debug('Getting FileInfo did not work or not included in the ALLOWED_MIMETYPES array.', ['app' => 'ocr']);
431
				throw new NotFoundException($this->l10n->t('Wrong parameters or wrong mimetype.'));
432
			}
433 1
		} catch (Exception $e) {
434
			$this->handleException($e);
435
		}
436 1
	}
437
438
	/**
439
	 * Returns the correct path based on delivered file variable
440
	 * @param $file
441
	 * @return string
442
	 */
443 1
	private function getCorrectPath($file) {
444 1
		if (empty($file['path'])) {
445
			//Because new updated files have the property directory instead of path
446
			$file['path'] = $file['directory'];
447
		}
448 1
		if ($file['path'] === '/') {
449 1
			$path = $file['path'] . $file['name'];
450 1
		} else {
451
			$path = $file['path'] . '/' . $file['name'];
452
		}
453 1
		return $path;
454
	}
455
456
	/**
457
	 * Handle the possible thrown Exceptions from all methods of this class.
458
	 *
459
	 * @param Exception $e
460
	 * @throws Exception
461
	 * @throws NotFoundException
462
	 */
463 5 View Code Duplication
	private function handleException($e) {
464 5
		$this->logger->logException($e, ['app' => 'ocr', 'message' => 'Exception during ocr service function processing']);
465 5
		if ($e instanceof NotFoundException) {
466 5
			throw new NotFoundException($e->getMessage());
467
		} else {
468
			throw $e;
469
		}
470
	}
471
}
472