Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like OcrService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use OcrService, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
31 | class OcrService { |
||
32 | |||
33 | /** |
||
34 | * @var ILogger |
||
35 | */ |
||
36 | private $logger; |
||
37 | |||
38 | /** |
||
39 | * @var ITempManager |
||
40 | */ |
||
41 | private $tempM; |
||
42 | |||
43 | /** |
||
44 | * @var QueueService |
||
45 | */ |
||
46 | private $queueService; |
||
47 | |||
48 | /** |
||
49 | * @var OcrStatusMapper |
||
50 | */ |
||
51 | private $statusMapper; |
||
52 | |||
53 | /** |
||
54 | * @var FileMapper |
||
55 | */ |
||
56 | private $fileMapper; |
||
57 | |||
58 | /** |
||
59 | * @var ShareMapper |
||
60 | */ |
||
61 | private $shareMapper; |
||
62 | |||
63 | /** |
||
64 | * @var View |
||
65 | */ |
||
66 | private $view; |
||
67 | |||
68 | /** |
||
69 | * @var userId |
||
70 | */ |
||
71 | private $userId; |
||
72 | |||
73 | /** |
||
74 | * @var IL10N |
||
75 | */ |
||
76 | private $l10n; |
||
77 | |||
78 | /** |
||
79 | * Array of allowed MIME types for OCR processing |
||
80 | */ |
||
81 | const ALLOWED_MIMETYPES = ['application/pdf', 'image/png', 'image/jpeg', 'image/tiff', 'image/jp2', 'image/jpm', 'image/jpx', 'image/webp', 'image/gif']; |
||
82 | |||
83 | /** |
||
84 | * The correct MIME type for a PDF file |
||
85 | */ |
||
86 | const MIMETYPE_PDF = 'application/pdf'; |
||
87 | |||
88 | /** |
||
89 | * The only allowed image MIME types by tesseract |
||
90 | */ |
||
91 | const MIMETYPES_IMAGE = ['image/png', 'image/jpeg', 'image/tiff', 'image/jp2', 'image/jpm', 'image/jpx', 'image/webp', 'image/gif']; |
||
92 | |||
93 | /** |
||
94 | * OcrService constructor. |
||
95 | * |
||
96 | * @param ITempManager $tempManager |
||
97 | * @param QueueService $queueService |
||
98 | * @param OcrStatusMapper $mapper |
||
99 | * @param View $view |
||
100 | * @param $userId |
||
101 | * @param IL10N $l10n |
||
102 | * @param ILogger $logger |
||
103 | */ |
||
104 | 15 | public function __construct(ITempManager $tempManager, QueueService $queueService, OcrStatusMapper $mapper, FileMapper $fileMapper, ShareMapper $shareMapper, View $view, $userId, IL10N $l10n, ILogger $logger) { |
|
115 | |||
116 | /** |
||
117 | * Gets the list of all available tesseract-ocr languages. |
||
118 | * TODO: remove and replace by docker behavior |
||
119 | * @return string[] Languages |
||
120 | */ |
||
121 | 4 | public function listLanguages() { |
|
122 | try { |
||
123 | 4 | $success = -1; |
|
124 | 4 | $this->logger->debug('Fetching languages. ', ['app' => 'ocr']); |
|
125 | 4 | exec('tesseract --list-langs 2>&1', $result, $success); |
|
126 | 4 | $this->logger->debug('Result for list-language command: ' . json_encode($result), ['app' => 'ocr']); |
|
127 | 4 | if ($success === 0 && count($result) > 0) { |
|
128 | 4 | if (is_array($result)) { |
|
129 | 4 | $traineddata = $result; |
|
130 | } else { |
||
131 | throw new NotFoundException($this->l10n->t('No languages found.')); |
||
132 | } |
||
133 | 4 | $languages = array(); |
|
134 | 4 | array_shift($traineddata); // delete the first element of the array as it is a description of tesseract |
|
135 | 4 | asort($traineddata); // sort the languages alphabetically |
|
136 | 4 | foreach ($traineddata as $td) { |
|
137 | 4 | $tdname = trim($td); // strip whitespaces |
|
138 | 4 | array_push($languages, $tdname); //add to language list |
|
139 | } |
||
140 | 4 | $this->logger->debug('Fetched languages: ' . json_encode($languages), ['app' => 'ocr']); |
|
141 | 4 | return $languages; |
|
142 | } else { |
||
143 | throw new NotFoundException($this->l10n->t('No languages found.')); |
||
144 | } |
||
145 | } catch (Exception $e) { |
||
146 | $this->handleException($e); |
||
147 | } |
||
148 | } |
||
149 | |||
150 | /** |
||
151 | * Processes and prepares the files for OCR. |
||
152 | * Sends the stuff to the client in order to OCR async. |
||
153 | * |
||
154 | * @param string[] $languages |
||
155 | * @param array $files |
||
156 | * @return string |
||
157 | */ |
||
158 | 5 | public function process($languages, $files) { |
|
159 | try { |
||
160 | 5 | $this->logger->debug('Will now process files: ' . json_encode($files) . ' with languages: ' . json_encode($languages), ['app' => 'ocr']); |
|
161 | // Check if files and language not empty |
||
162 | // TODO: do not check if languages empty: tesseract does not need a language necassarily. |
||
163 | 5 | if (!empty($files) && !empty($languages) && count(array_diff($languages, $this->listLanguages())) === 0) { |
|
164 | |||
165 | 3 | $fileInfo = $this->buildFileInfo($files); |
|
166 | |||
167 | 2 | foreach ($fileInfo as $fInfo) { |
|
168 | // Check if filelock existing |
||
169 | // TODO: FileLock maybe \OC\Files\View::lockFile() |
||
170 | // Check Shared |
||
171 | 2 | $source = $fInfo->getPath(); |
|
172 | 2 | if ($this->checkSharedWithInitiator($fInfo)) { |
|
173 | // Shared Item |
||
174 | 1 | $source = str_replace('home::', '', $fInfo->getStoragename()) . '/' . $source; |
|
175 | 1 | $target = $this->buildTargetForShared($fInfo); |
|
176 | } else { |
||
177 | // Not Shared |
||
178 | 1 | $source = $this->userId . '/' . $source; |
|
179 | 1 | $target = $this->buildTarget($fInfo); |
|
180 | } |
||
181 | |||
182 | // create a temp file for ocr processing purposes |
||
183 | 2 | $tempFile = $this->tempM->getTemporaryFile(); |
|
184 | |||
185 | // set the running type |
||
186 | 2 | if ($fInfo->getMimetype() === $this::MIMETYPE_PDF) { |
|
187 | $ftype = 'mypdf'; |
||
188 | } else { |
||
189 | 2 | $ftype = 'tess'; |
|
190 | } |
||
191 | // Create status object |
||
192 | 2 | $status = new OcrStatus('PENDING', $source, $target, $tempFile, $ftype, $this->userId, false); |
|
193 | // Init client and send task / job |
||
194 | // Feed the worker |
||
195 | 2 | $this->queueService->clientSend($status, $languages, \OC::$SERVERROOT); |
|
196 | } |
||
197 | 2 | return 'PROCESSING'; |
|
198 | } else { |
||
199 | 2 | throw new NotFoundException($this->l10n->t('Empty parameters passed.')); |
|
200 | } |
||
201 | 3 | } catch (Exception $e) { |
|
202 | 3 | $this->handleException($e); |
|
203 | } |
||
204 | } |
||
205 | |||
206 | /** |
||
207 | * A function which returns the JSONResponse for all required status checks and tasks. |
||
208 | * It will check for already processed, pending and failed ocr tasks and return them as needed. |
||
209 | * |
||
210 | * @codeCoverageIgnore |
||
211 | * @return string |
||
212 | */ |
||
213 | public function status() { |
||
230 | |||
231 | /** |
||
232 | * The command ocr:complete for occ will call this function in order to set the status. |
||
233 | * the worker should call it automatically after each processing step. |
||
234 | * |
||
235 | * @param integer $statusId |
||
236 | * @param boolean $failed |
||
237 | * @param string $errorMessage |
||
238 | */ |
||
239 | 3 | public function complete($statusId, $failed, $errorMessage) { |
|
254 | |||
255 | /** |
||
256 | * The PersonalSettingsController will have the opportunity to delete ocr status objects. |
||
257 | * |
||
258 | * @param $statusId |
||
259 | * @param string $userId |
||
260 | * @return OcrStatus |
||
261 | */ |
||
262 | 2 | public function deleteStatus($statusId, $userId) { |
|
263 | try { |
||
264 | 2 | $status = $this->statusMapper->find($statusId); |
|
265 | 1 | if ($status->getUserId() !== $userId) { |
|
266 | throw new NotFoundException($this->l10n->t('Cannot delete. Wrong owner.')); |
||
267 | } else { |
||
268 | 1 | $status = $this->statusMapper->delete($status); |
|
269 | } |
||
270 | 1 | $status->setTarget($this->removeFileExtension($status)); |
|
271 | 1 | $status->setSource(null); |
|
272 | 1 | $status->setTempFile(null); |
|
273 | 1 | $status->setType(null); |
|
274 | 1 | $status->setErrorDisplayed(null); |
|
275 | 1 | return $status; |
|
276 | 1 | } catch (Exception $e) { |
|
277 | 1 | if ($e instanceof DoesNotExistException) { |
|
|
|||
278 | 1 | $ex = new NotFoundException($this->l10n->t('Cannot delete. Wrong ID.')); |
|
279 | 1 | $this->handleException($ex); |
|
280 | } else { |
||
281 | $this->handleException($e); |
||
282 | } |
||
283 | } |
||
284 | } |
||
285 | |||
286 | /** |
||
287 | * Gets all status objects for a specific user in order to list them on the personal settings page. |
||
288 | * |
||
289 | * @param string $userId |
||
290 | * @return array |
||
291 | */ |
||
292 | 1 | public function getAllForPersonal($userId) { |
|
310 | |||
311 | /** |
||
312 | * Returns a not existing file name for pdf or image processing |
||
313 | * protected as of testing issues with static methods. (Actually |
||
314 | * it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore |
||
315 | * @codeCoverageIgnore |
||
316 | * |
||
317 | * @param File $fileInfo |
||
318 | * @return string |
||
319 | */ |
||
320 | protected function buildTargetForShared(File $fileInfo) { |
||
350 | |||
351 | /** |
||
352 | * Returns a not existing file name for PDF or image processing |
||
353 | * protected as of testing issues with static methods. (Actually |
||
354 | * it will be mocked partially) FIXME: Change this behaviour as soon as the buidlNotExistingFileName function is not static anymore |
||
355 | * @codeCoverageIgnore |
||
356 | * |
||
357 | * @param File $fileInfo |
||
358 | * @return string |
||
359 | */ |
||
360 | protected function buildTarget(File $fileInfo) { |
||
391 | |||
392 | |||
393 | /** |
||
394 | * Returns the fileInfo for each file in files and checks |
||
395 | * if it has a allowed MIME type and some other conditions. |
||
396 | * |
||
397 | * @param array $files |
||
398 | * @return File[] |
||
399 | * @throws NotFoundException |
||
400 | */ |
||
401 | 3 | private function buildFileInfo($files) { |
|
402 | try { |
||
403 | 3 | $fileArray = array(); |
|
404 | 3 | foreach ($files as $file) { |
|
405 | // Check if anything is missing and file type is correct |
||
406 | 3 | if (!empty($file['id'])) { |
|
407 | |||
408 | 3 | $fileInfo = $this->fileMapper->find($file['id']); |
|
409 | 3 | $this->checkMimeType($fileInfo); |
|
410 | |||
411 | 2 | array_push($fileArray, $fileInfo); |
|
412 | } else { |
||
413 | 2 | throw new NotFoundException($this->l10n->t('Wrong parameter.')); |
|
414 | } |
||
415 | } |
||
416 | 2 | return $fileArray; |
|
417 | 1 | } catch (Exception $e) { |
|
418 | 1 | $this->handleException($e); |
|
419 | } |
||
420 | } |
||
421 | |||
422 | /** |
||
423 | * Checks a MIME type for a specifically given FileInfo. |
||
424 | * @param File $fileInfo |
||
425 | */ |
||
426 | 3 | private function checkMimeType(File $fileInfo) { |
|
427 | try { |
||
428 | 3 | if (!$fileInfo || !in_array($fileInfo->getMimetype(), $this::ALLOWED_MIMETYPES)) { |
|
429 | 1 | $this->logger->debug('Getting FileInfo did not work or not included in the ALLOWED_MIMETYPES array.', ['app' => 'ocr']); |
|
430 | 3 | throw new NotFoundException($this->l10n->t('Wrong MIME type.')); |
|
431 | } |
||
432 | 1 | } catch (Exception $e) { |
|
433 | 1 | $this->handleException($e); |
|
434 | } |
||
435 | 2 | } |
|
436 | |||
437 | /** |
||
438 | * @param File $fileInfo |
||
439 | * @return boolean|null |
||
440 | */ |
||
441 | 2 | private function checkSharedWithInitiator($fileInfo) { |
|
442 | try { |
||
443 | 2 | $owner = str_replace('home::', '', $fileInfo->getStoragename()); |
|
444 | 2 | if ($this->userId === $owner) { |
|
445 | // user is owner (no shared file) |
||
446 | 1 | return false; |
|
447 | } else { |
||
448 | // user is not owner (shared file) |
||
449 | 1 | return true; |
|
450 | } |
||
451 | } catch (Exception $e) { |
||
452 | $this->handleException($e); |
||
453 | } |
||
454 | |||
455 | } |
||
456 | |||
457 | /** |
||
458 | * Finishes all Processed files by copying them to the right path and deleteing the temp files. |
||
459 | * Returns the number of processed files. |
||
460 | * |
||
461 | * @codeCoverageIgnore |
||
462 | * @return array |
||
463 | */ |
||
464 | private function handleProcessed() { |
||
491 | |||
492 | /** |
||
493 | * Removes ".txt" from the newName of a OCR status |
||
494 | * |
||
495 | * @param $status OcrStatus |
||
496 | * @return string |
||
497 | */ |
||
498 | 2 | private function removeFileExtension($status) { |
|
499 | try { |
||
500 | 2 | return substr($status->getTarget(), 0, strrpos($status->getTarget(), '_OCR')); |
|
501 | } catch (Exception $e) { |
||
502 | $this->handleException($e); |
||
503 | } |
||
504 | } |
||
505 | |||
506 | /** |
||
507 | * Handles all failed orders of ocr processing queue and returns the status objects. |
||
508 | * |
||
509 | * @codeCoverageIgnore |
||
510 | * @return array |
||
511 | */ |
||
512 | private function handleFailed() { |
||
528 | |||
529 | /** |
||
530 | * Handle the possible thrown Exceptions from all methods of this class. |
||
531 | * |
||
532 | * @param Exception $e |
||
533 | * @throws Exception |
||
534 | * @throws NotFoundException |
||
535 | */ |
||
536 | 5 | View Code Duplication | private function handleException($e) { |
544 | } |
||
545 |
This error could be the result of:
1. Missing dependencies
PHP Analyzer uses your
composer.json
file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects thecomposer.json
to be in the root folder of your repository.Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the
require
orrequire-dev
section?2. Missing use statement
PHP does not complain about undefined classes in
ìnstanceof
checks. For example, the following PHP code will work perfectly fine:If you have not tested against this specific condition, such errors might go unnoticed.