Completed
Pull Request — master (#17)
by Janis
03:06
created

OcrService::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 10
cts 10
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 9
nc 1
nop 8
crap 1

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
/**
3
 * nextCloud - ocr
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the COPYING file.
7
 *
8
 * @author Janis Koehr <[email protected]>
9
 * @copyright Janis Koehr 2016
10
 */
11
12
namespace OCA\Ocr\Service;
13
14
use Exception;
15
use OC\Files\View;
16
use OCA\Ocr\Db\OcrStatus;
17
use OCA\Ocr\Db\OcrStatusMapper;
18
use OCP\Files;
19
use OCP\Files\FileInfo;
20
use OCP\IConfig;
21
use OCP\IL10N;
22
use OCP\ILogger;
23
use OCP\ITempManager;
24
25
26
/**
27
 * Class OcrService
28
 * @package OCA\Ocr\Service
29
 */
30
class OcrService {
31
32
	/**
33
	 * @var ILogger
34
	 */
35
	private $logger;
36
37
	/**
38
	 * @var ITempManager
39
	 */
40
	private $tempM;
41
42
	/**
43
	 * @var IConfig
44
	 */
45
	private $config;
46
47
	/**
48
	 * @var QueueService
49
	 */
50
	private $queueService;
51
52
	/**
53
	 * @var OcrStatusMapper
54
	 */
55
	private $statusMapper;
56
57
	/**
58
	 * @var View
59
	 */
60
	private $view;
61
62
	/**
63
	 * @var
64
	 */
65
	private $userId;
66
67
	/**
68
	 * @var IL10N
69
	 */
70
	private $l10n;
71
72
	/**
73
	 * Array of allowed mimetypes for ocr processing
74
	 */
75
	const ALLOWED_MIMETYPES = ['application/pdf', 'image/png', 'image/jpeg', 'image/tiff'];
76
77
	/**
78
	 * the correct mimetype for a pdf file
79
	 */
80
	const MIMETYPE_PDF = 'application/pdf';
81
82
	/**
83
	 * the only allowed image mimetypes by tesseract
84
	 */
85
	const MIMETYPES_IMAGE = ['image/png', 'image/jpeg', 'image/tiff'];
86
87
	/**
88
	 * OcrService constructor.
89
	 *
90
	 * @param ITempManager $tempManager
91
	 * @param IConfig $config
92
	 * @param QueueService $queueService
93
	 * @param OcrStatusMapper $mapper
94
	 * @param View $view
95
	 * @param $userId
96
	 * @param IL10N $l10n
97
	 * @param ILogger $logger
98
	 */
99 10
	public function __construct(ITempManager $tempManager, IConfig $config, QueueService $queueService, OcrStatusMapper $mapper, View $view, $userId, IL10N $l10n, ILogger $logger) {
100 10
		$this->logger = $logger;
101 10
		$this->tempM = $tempManager;
102 10
		$this->config = $config;
103 10
		$this->queueService = $queueService;
104 10
		$this->statusMapper = $mapper;
105 10
		$this->view = $view;
106 10
		$this->userId = $userId;
107 10
		$this->l10n = $l10n;
108 10
	}
109
110
	/**
111
	 * Gets the list of all available tesseract-ocr languages.
112
	 *
113
	 * @return array Languages
114
	 */
115 3
	public function listLanguages() {
116
		try {
117 3
			$success = -1;
118 3
			$this->logger->debug('Fetching languages. ', ['app' => 'ocr']);
119 3
			exec('tesseract --list-langs 2>&1', $result, $success);
120 3
			if ($success === 0 && count($result) > 0) {
121
				if (is_array($result)) {
122
					$traineddata = $result;
123
				} else {
124
					$traineddata = explode(' ', $result);
125
				}
126
				$languages = array();
127
				foreach ($traineddata as $td) {
128
					$tdname = trim($td);
129
					if (strlen($tdname) === 3) {
130
						array_push($languages, $tdname);
131
					}
132
				}
133
				$this->logger->debug('Fetched languages: ' . json_encode($languages), ['app' => 'ocr']);
134
				return $languages;
135
			} else {
136 3
				throw new NotFoundException($this->l10n->t('No languages found.'));
137
			}
138 3
		} catch (Exception $e) {
139 3
			$this->handleException($e);
140
		}
141
	}
142
143
	/**
144
	 * Processes and prepares the files for ocr.
145
	 * Sends the stuff to the client in order to ocr async.
146
	 *
147
	 * @param string $language
148
	 * @param array $files
149
	 * @return string
150
	 */
151 4
	public function process($language, $files) {
152
		try {
153 4
			$this->logger->debug('Will now process files: ' . json_encode($files) . ' with language: ' . json_encode($language), ['app' => 'ocr']);
154
			// Check if files and language not empty
155 4
			if (!empty($files) && !empty($language) && in_array($language, $this->listLanguages())) {
156
				// get the array with full fileinfo
157
				$fileInfo = $this->buildFileInfo($files);
158
				foreach ($fileInfo as $fInfo) {
159
					// Check if filelock existing
160
					// TODO: FileLock maybe \OC\Files\View::lockFile()
161
					// get new name for saving purpose
162
					$newName = $this->buildNewName($fInfo);
163
164
					// create a temp file for ocr processing purposes
165
					$tempFile = $this->tempM->getTemporaryFile();
166
167
					// set the running type
168
					if ($fInfo->getMimetype() === $this::MIMETYPE_PDF) {
169
						$ftype = 'mypdf';
170
					} else {
171
						$ftype = 'tess';
172
					}
173
174
					// Create status object
175
					$status = new OcrStatus('PENDING', $fInfo->getId(), $newName, $tempFile, $ftype, $this->userId);
176
177
					// Init client and send task / job
178
					// Feed the worker
179
					$this->queueService->clientSend($status, $this->config->getSystemValue('datadirectory'), $fInfo->getPath(), $language, \OC::$SERVERROOT);
180
				}
181
				return 'PROCESSING';
182
			} else {
183 2
				throw new NotFoundException($this->l10n->t('Empty passed parameters.'));
184
			}
185 4
		} catch (Exception $e) {
186 4
			$this->handleException($e);
187
		}
188
	}
189
190
	/**
191
	 * A function which returns the JSONResponse for all required status checks and tasks.
192
	 * It will check for already processed, pending and failed ocr tasks and return them as needed.
193
	 *
194
	 * @return string
195
	 */
196
	public function status() {
197
		try {
198
			// TODO: release lock
199
			$processed = $this->handleProcessed();
200
201
			$failed = count($this->handleFailed());
202
203
			$pending = count($this->statusMapper->findAllPending($this->userId));
204
205
			return ['processed' => $processed, 'failed' => $failed, 'pending' => $pending];
206
		} catch (Exception $e) {
207
			$this->handleException($e);
208
		}
209
	}
210
211
	/**
212
	 * The command ocr:complete for occ will call this function in order to set the status.
213
	 * the worker should call it automatically after each processing step.
214
	 *
215
	 * @param $statusId
216
	 * @param boolean $failed
217
	 */
218 3
	public function complete($statusId, $failed) {
219
		try {
220 3
			$status = $this->statusMapper->find($statusId);
221 2
			if (!$failed) {
222 1
				$status->setStatus('PROCESSED');
223 1
				$this->statusMapper->update($status);
224 1
			} else {
225 1
				$status->setStatus('FAILED');
226 1
				$this->statusMapper->update($status);
227
			}
228 3
		} catch (Exception $e) {
229 1
			$this->handleException($e);
230
		}
231 2
	}
232
233
	/**
234
	 * Finishes all Processed files by copying them to the right path and deleteing the temp files.
235
	 * Returns the number of processed files.
236
	 *
237
	 * @return int
238
	 */
239
	private function handleProcessed() {
240
		try {
241
			$this->logger->debug('Find processed ocr files and put them to the right dirs.', ['app' => 'ocr']);
242
			$processed = $this->statusMapper->findAllProcessed($this->userId);
243
			foreach ($processed as $status) {
244
				if ($status->getType() === 'tess' && file_exists($status->getTempFile() . '.txt')) {
245
					//Save the tmp file with newname
246
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile() . '.txt')); // need .txt because tesseract saves it like this
247
					// Cleaning temp files
248
					$this->statusMapper->delete($status);
249
					exec('rm ' . $status->getTempFile() . '.txt');
250
				} elseif ($status->getType() === 'mypdf' && file_exists($status->getTempFile())) {
251
					//Save the tmp file with newname
252
					$this->view->file_put_contents($status->getNewName(), file_get_contents($status->getTempFile())); // don't need to extend with .pdf / it uses the tmp file to save
253
					$this->statusMapper->delete($status);
254
					exec('rm ' . $status->getTempFile());
255
				} else {
256
					throw new NotFoundException($this->l10n->t('Temp file does not exist.'));
257
				}
258
			}
259
			return count($processed);
260
		} catch (Exception $e) {
261
			$this->handleException($e);
262
		}
263
	}
264
265
	/**
266
	 * Handles all failed orders of ocr processing queue and returns the status objects.
267
	 *
268
	 * @return array
269
	 */
270
	private function handleFailed() {
271
		try {
272
			$failed = $this->statusMapper->findAllFailed($this->userId);
273
			foreach ($failed as $status) {
274
				// clean the tempfile
275
				exec('rm ' . $status->getTempFile());
276
				// clean from db
277
				$this->statusMapper->delete($status);
278
			}
279
			$this->logger->debug('Following status objects failed: ' . json_encode($failed), ['app' => 'ocr']);
280
			return $failed;
281
		} catch (Exception $e) {
282
			$this->handleException($e);
283
		}
284
	}
285
286
287
	/**
288
	 * Returns a not existing file name for pdf or image processing
289
	 * protected as of testing issues with static methods. (Actually
290
	 * it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore
291
	 *
292
	 * @param FileInfo $fileInfo
293
	 * @return string
294
	 */
295
	protected function buildNewName(FileInfo $fileInfo) {
296
		// get rid of the .png or .pdf and so on
297
		$fileName = substr($fileInfo->getName(), 0, -4);
298
		// eliminate the file name from the path
299
		$filePath = str_replace($fileInfo->getName(), '', $fileInfo->getPath());
300
		// and get the path on top of the user/files/ dir
301
		$filePath = str_replace('/' . $this->userId . '/files', '', $filePath);
302
		if ($fileInfo->getMimetype() === $this::MIMETYPE_PDF) {
303
			// PDFs:
304
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.pdf');
305
		} else {
306
			// IMAGES:
307
			return Files::buildNotExistingFileName($filePath, $fileName . '_OCR.txt');
308
		}
309
	}
310
311
	/**
312
	 * Returns the fileInfo for each file in files and checks
313
	 * if it has a allowed mimetype and some other conditions.
314
	 *
315
	 * @param array $files
316
	 * @return array of Files\FileInfo
317
	 * @throws NotFoundException
318
	 */
319
	private function buildFileInfo(array $files) {
320
		try {
321
			$fileArray = array();
322
			foreach ($files as $file) {
323
				// Check if anything is missing and file type is correct
324
				if ((!empty($file['path']) || !empty($file['directory'])) && $file['type'] === 'file') {
325
					// get correct path
326
					$path = $this->getCorrectPath($file);
327
					$fileInfo = $this->view->getFileInfo($path);
328
					$this->checkMimeType($fileInfo);
329
					array_push($fileArray, $fileInfo);
330
				} else {
331
					throw new NotFoundException($this->l10n->t('Wrong path parameter.'));
332
				}
333
			}
334
			return $fileArray;
335
		} catch (Exception $e) {
336
			$this->handleException($e);
337
		}
338
	}
339
340
	/**
341
	 * Checks a Mimetype for a specific given FileInfo.
342
	 * @param Files\FileInfo $fileInfo
343
	 */
344
	private function checkMimeType(FileInfo $fileInfo) {
345
		try {
346
			if (!$fileInfo || !in_array($fileInfo->getMimetype(), $this::ALLOWED_MIMETYPES)) {
347
				$this->logger->debug('Getting FileInfo did not work or not included in the ALLOWED_MIMETYPES array.', ['app' => 'ocr']);
348
				throw new NotFoundException($this->l10n->t('Wrong parameters or wrong mimetype.'));
349
			}
350
		} catch (Exception $e) {
351
			$this->handleException($e);
352
		}
353
	}
354
355
	/**
356
	 * Returns the correct path based on delivered file variable
357
	 * @param $file
358
	 * @return string
359
	 */
360
	private function getCorrectPath($file) {
361
		if (empty($file['path'])) {
362
			//Because new updated files have the property directory instead of path
363
			$file['path'] = $file['directory'];
364
		}
365
		if ($file['path'] === '/') {
366
			$path = $file['path'] . $file['name'];
367
		} else {
368
			$path = $file['path'] . '/' . $file['name'];
369
		}
370
		return $path;
371
	}
372
373
	/**
374
	 * Handle the possible thrown Exceptions from all methods of this class.
375
	 *
376
	 * @param Exception $e
377
	 * @throws Exception
378
	 * @throws NotFoundException
379
	 */
380 6 View Code Duplication
	private function handleException($e) {
381 6
		$this->logger->logException($e, ['app' => 'ocr', 'message' => 'Exception during ocr service function processing']);
382 6
		if ($e instanceof NotFoundException) {
383 6
			throw new NotFoundException($e->getMessage());
384
		} else {
385
			throw $e;
386
		}
387
	}
388
}