Completed
Pull Request — master (#17)
by Janis
02:51
created

OcrService::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 10
cts 10
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 9
nc 1
nop 8
crap 1

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
/**
3
 * nextCloud - ocr
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Janis Koehr <[email protected]>
9
 * @copyright Janis Koehr 2016
10
 */
11
12
namespace OCA\Ocr\Service;
13
14
use Exception;
15
use OC\Files\View;
16
use OCA\Ocr\Db\OcrStatus;
17
use OCA\Ocr\Db\OcrStatusMapper;
18
use OCP\Files;
19
use OCP\Files\FileInfo;
20
use OCP\IConfig;
21
use OCP\IL10N;
22
use OCP\ILogger;
23
use OCP\ITempManager;
24
25
26
/**
27
 * Class OcrService
28
 * @package OCA\Ocr\Service
29
 */
30
class OcrService {
31
32
	/**
33
	 * @var ILogger
34
	 */
35
	private $logger;
36
37
	/**
38
	 * @var ITempManager
39
	 */
40
	private $tempM;
41
42
	/**
43
	 * @var IConfig
44
	 */
45
	private $config;
46
47
	/**
48
	 * @var QueueService
49
	 */
50
	private $queueService;
51
52
	/**
53
	 * @var OcrStatusMapper
54
	 */
55
	private $statusMapper;
56
57
	/**
58
	 * @var View
59
	 */
60
	private $view;
61
62
	/**
63
	 * @var
64
	 */
65
	private $userId;
66
67
	/**
68
	 * @var IL10N
69
	 */
70
	private $l10n;
71
72
	/**
73
	 * Array of allowed mimetypes for ocr processing
74
	 */
75
	const ALLOWED_MIMETYPES = ['application/pdf', 'image/png', 'image/jpeg', 'image/tiff'];
76
77
	/**
78
	 * the correct mimetype for a pdf file
79
	 */
80
	const MIMETYPE_PDF = 'application/pdf';
81
82
	/**
83
	 * the only allowed image mimetypes by tesseract
84
	 */
85
	const MIMETYPES_IMAGE = ['image/png', 'image/jpeg', 'image/tiff'];
86
87
	/**
88
	 * OcrService constructor.
89
	 *
90
	 * @param ITempManager $tempManager
91
	 * @param IConfig $config
92
	 * @param QueueService $queueService
93
	 * @param OcrStatusMapper $mapper
94
	 * @param View $view
95
	 * @param $userId
96
	 * @param IL10N $l10n
97
	 * @param ILogger $logger
98
	 */
99 10
	public function __construct(ITempManager $tempManager, IConfig $config, QueueService $queueService, OcrStatusMapper $mapper, View $view, $userId, IL10N $l10n, ILogger $logger) {
100 10
		$this->logger = $logger;
101 10
		$this->tempM = $tempManager;
102 10
		$this->config = $config;
103 10
		$this->queueService = $queueService;
104 10
		$this->statusMapper = $mapper;
105 10
		$this->view = $view;
106 10
		$this->userId = $userId;
107 10
		$this->l10n = $l10n;
108 10
	}
109
110
	/**
111
	 * Gets the list of all available tesseract-ocr languages.
112
	 *
113
	 * @return array Languages
114
	 */
115 3
	public function listLanguages() {
116
		try {
117 3
			$success = -1;
118 3
			$this->logger->debug('Fetching languages. ', ['app' => 'ocr']);
119 3
			exec('tesseract --list-langs 2>&1', $result, $success);
120 3
			if ($success === 0 && count($result) > 0) {
121 3
				if (is_array($result)) {
122 3
					$traineddata = $result;
123 3
				} else {
124
					$traineddata = explode(' ', $result);
125
				}
126 3
				$languages = array();
127 3
				foreach ($traineddata as $td) {
128 3
					$tdname = trim($td);
129 3
					if (strlen($tdname) === 3) {
130 3
						array_push($languages, $tdname);
131 3
					}
132 3
				}
133 3
				$this->logger->debug('Fetched languages: ' . json_encode($languages), ['app' => 'ocr']);
134 3
				return $languages;
135
			} else {
136
				throw new NotFoundException($this->l10n->t('No languages found.'));
137
			}
138
		} catch (Exception $e) {
139
			$this->handleException($e);
140
		}
141
	}
142
143
	/**
144
	 * Processes and prepares the files for ocr.
145
	 * Sends the stuff to the client in order to ocr async.
146
	 *
147
	 * @param string $language
148
	 * @param array $files
149
	 * @return string
150
	 */
151 4
	public function process($language, $files) {
152
		try {
153 4
			$this->logger->debug('Will now process files: ' . json_encode($files) . ' with language: ' . json_encode($language), ['app' => 'ocr']);
154
			// Check if files and language not empty
155 4
			if (!empty($files) && !empty($language) && in_array($language, $this->listLanguages())) {
156
				// get the array with full fileinfo
157 2
				$fileInfo = $this->buildFileInfo($files);
158 1
				foreach ($fileInfo as $fInfo) {
159
					// Check if filelock existing
160
					// TODO: FileLock maybe \OC\Files\View::lockFile()
161
					// get new name for saving purpose
162 1
					$newName = $this->buildNewName($fInfo);
163
164
					// create a temp file for ocr processing purposes
165 1
					$tempFile = $this->tempM->getTemporaryFile();
166
167
					// set the running type
168 1
					if ($fInfo->getMimetype() === $this::MIMETYPE_PDF) {
169
						$ftype = 'mypdf';
170
					} else {
171 1
						$ftype = 'tess';
172
					}
173
174
					// Create status object
175 1
					$status = new OcrStatus('PENDING', $fInfo->getId(), $newName, $tempFile, $ftype, $this->userId);
176
177
					// Init client and send task / job
178
					// Feed the worker
179 1
					$this->queueService->clientSend($status, $this->config->getSystemValue('datadirectory'), $fInfo->getPath(), $language, \OC::$SERVERROOT);
180 1
				}
181 1
				return 'PROCESSING';
182
			} else {
183 2
				throw new NotFoundException($this->l10n->t('Empty passed parameters.'));
184
			}
185 3
		} catch (Exception $e) {
186 3
			$this->handleException($e);
187
		}
188
	}
189
190
	/**
191
	 * A function which returns the JSONResponse for all required status checks and tasks.
192
	 * It will check for already processed, pending and failed ocr tasks and return them as needed.
193
	 *
194
	 * @codeCoverageIgnore
195
	 * @return string
196
	 */
197
	public function status() {
198
		try {
199
			// TODO: release lock
200
			$processed = $this->handleProcessed();
201
202
			$failed = count($this->handleFailed());
203
204
			$pending = count($this->statusMapper->findAllPending($this->userId));
205
206
			return ['processed' => $processed, 'failed' => $failed, 'pending' => $pending];
207
		} catch (Exception $e) {
208
			$this->handleException($e);
209
		}
210
	}
211
212
	/**
213
	 * The command ocr:complete for occ will call this function in order to set the status.
214
	 * the worker should call it automatically after each processing step.
215
	 *
216
	 * @param $statusId
217
	 * @param boolean $failed
218
	 */
219 3
	public function complete($statusId, $failed) {
220
		try {
221 3
			$status = $this->statusMapper->find($statusId);
222 2
			if (!$failed) {
223 1
				$status->setStatus('PROCESSED');
224 1
				$this->statusMapper->update($status);
225 1
			} else {
226 1
				$status->setStatus('FAILED');
227 1
				$this->statusMapper->update($status);
228
			}
229 3
		} catch (Exception $e) {
230 1
			$this->handleException($e);
231
		}
232 2
	}
233
234
	/**
235
	 * Finishes all Processed files by copying them to the right path and deleteing the temp files.
236
	 * Returns the number of processed files.
237
	 *
238
	 * @codeCoverageIgnore
239
	 * @return int
240
	 */
241
	private function handleProcessed() {
242
		try {
243
			$this->logger->debug('Find processed ocr files and put them to the right dirs.', ['app' => 'ocr']);
244
			$processed = $this->statusMapper->findAllProcessed($this->userId);
245
			foreach ($processed as $status) {
246
				if ($status->getType() === 'tess' && file_exists($status->getTempFile() . '.txt')) {
247
					//Save the tmp file with newname
248
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile() . '.txt')); // need .txt because tesseract saves it like this
249
					// Cleaning temp files
250
					$this->statusMapper->delete($status);
251
					exec('rm ' . $status->getTempFile() . '.txt');
252
				} elseif ($status->getType() === 'mypdf' && file_exists($status->getTempFile())) {
253
					//Save the tmp file with newname
254
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile())); // don't need to extend with .pdf / it uses the tmp file to save
255
					$this->statusMapper->delete($status);
256
					exec('rm ' . $status->getTempFile());
257
				} else {
258
					throw new NotFoundException($this->l10n->t('Temp file does not exist.'));
259
				}
260
			}
261
			return count($processed);
262
		} catch (Exception $e) {
263
			$this->handleException($e);
264
		}
265
	}
266
267
	/**
268
	 * Handles all failed orders of ocr processing queue and returns the status objects.
269
	 *
270
	 * @codeCoverageIgnore
271
	 * @return array
272
	 */
273
	private function handleFailed() {
274
		try {
275
			$failed = $this->statusMapper->findAllFailed($this->userId);
276
			foreach ($failed as $status) {
277
				// clean the tempfile
278
				exec('rm ' . $status->getTempFile());
279
				// clean from db
280
				$this->statusMapper->delete($status);
281
			}
282
			$this->logger->debug('Following status objects failed: ' . json_encode($failed), ['app' => 'ocr']);
283
			return $failed;
284
		} catch (Exception $e) {
285
			$this->handleException($e);
286
		}
287
	}
288
289
290
	/**
291
	 * Returns a not existing file name for pdf or image processing
292
	 * protected as of testing issues with static methods. (Actually
293
	 * it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore
294
	 *
295
	 * @param FileInfo $fileInfo
296
	 * @return string
297
	 */
298
	protected function buildNewName(FileInfo $fileInfo) {
299
		// get rid of the .png or .pdf and so on
300
		$fileName = substr($fileInfo->getName(), 0, -4);
301
		// eliminate the file name from the path
302
		$filePath = str_replace($fileInfo->getName(), '', $fileInfo->getPath());
303
		// and get the path on top of the user/files/ dir
304
		$filePath = str_replace('/' . $this->userId . '/files', '', $filePath);
305
		if ($fileInfo->getMimetype() === $this::MIMETYPE_PDF) {
306
			// PDFs:
307
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.pdf');
308
		} else {
309
			// IMAGES:
310
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.txt');
311
		}
312
	}
313
314
	/**
315
	 * Returns the fileInfo for each file in files and checks
316
	 * if it has a allowed mimetype and some other conditions.
317
	 *
318
	 * @param array $files
319
	 * @return array of Files\FileInfo
320
	 * @throws NotFoundException
321
	 */
322 2
	private function buildFileInfo(array $files) {
323
		try {
324 2
			$fileArray = array();
325 2
			foreach ($files as $file) {
326
				// Check if anything is missing and file type is correct
327 2
				if ((!empty($file['path']) || !empty($file['directory'])) && $file['type'] === 'file') {
328
					// get correct path
329 1
					$path = $this->getCorrectPath($file);
330 1
					$fileInfo = $this->view->getFileInfo($path);
331 1
					$this->checkMimeType($fileInfo);
332 1
					array_push($fileArray, $fileInfo);
333 1
				} else {
334 1
					throw new NotFoundException($this->l10n->t('Wrong path parameter.'));
335
				}
336 1
			}
337 1
			return $fileArray;
338 1
		} catch (Exception $e) {
339 1
			$this->handleException($e);
340
		}
341
	}
342
343
	/**
344
	 * Checks a Mimetype for a specific given FileInfo.
345
	 * @param Files\FileInfo $fileInfo
346
	 */
347 1
	private function checkMimeType(FileInfo $fileInfo) {
348
		try {
349 1
			if (!$fileInfo || !in_array($fileInfo->getMimetype(), $this::ALLOWED_MIMETYPES)) {
350
				$this->logger->debug('Getting FileInfo did not work or not included in the ALLOWED_MIMETYPES array.', ['app' => 'ocr']);
351
				throw new NotFoundException($this->l10n->t('Wrong parameters or wrong mimetype.'));
352
			}
353 1
		} catch (Exception $e) {
354
			$this->handleException($e);
355
		}
356 1
	}
357
358
	/**
359
	 * Returns the correct path based on delivered file variable
360
	 * @param $file
361
	 * @return string
362
	 */
363 1
	private function getCorrectPath($file) {
364 1
		if (empty($file['path'])) {
365
			//Because new updated files have the property directory instead of path
366
			$file['path'] = $file['directory'];
367
		}
368 1
		if ($file['path'] === '/') {
369 1
			$path = $file['path'] . $file['name'];
370 1
		} else {
371
			$path = $file['path'] . '/' . $file['name'];
372
		}
373 1
		return $path;
374
	}
375
376
	/**
377
	 * Handle the possible thrown Exceptions from all methods of this class.
378
	 *
379
	 * @param Exception $e
380
	 * @throws Exception
381
	 * @throws NotFoundException
382
	 */
383 4 View Code Duplication
	private function handleException($e) {
384 4
		$this->logger->logException($e, ['app' => 'ocr', 'message' => 'Exception during ocr service function processing']);
385 4
		if ($e instanceof NotFoundException) {
386 4
			throw new NotFoundException($e->getMessage());
387
		} else {
388
			throw $e;
389
		}
390
	}
391
}