Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like OcrService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use OcrService, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
29 | class OcrService { |
||
30 | |||
31 | /** |
||
32 | * @var ILogger |
||
33 | */ |
||
34 | private $logger; |
||
35 | |||
36 | /** |
||
37 | * @var ITempManager |
||
38 | */ |
||
39 | private $tempM; |
||
40 | |||
41 | /** |
||
42 | * @var IConfig |
||
43 | */ |
||
44 | private $config; |
||
45 | |||
46 | /** |
||
47 | * @var GearmanWorkerService |
||
48 | */ |
||
49 | private $workerService; |
||
50 | |||
51 | /** |
||
52 | * @var OcrStatusMapper |
||
53 | */ |
||
54 | private $statusMapper; |
||
55 | |||
56 | /** |
||
57 | * @var View |
||
58 | */ |
||
59 | private $view; |
||
60 | |||
61 | /** |
||
62 | * @var |
||
63 | */ |
||
64 | private $userId; |
||
65 | |||
66 | /** |
||
67 | * Array of allowed mimetypes for ocr processing |
||
68 | */ |
||
69 | const ALLOWED_MIMETYPES = ['application/pdf', 'image/png', 'image/jpeg', 'image/tiff']; |
||
70 | |||
71 | /** |
||
72 | * the correct mimetype for a pdf file |
||
73 | */ |
||
74 | const MIMETYPE_PDF = 'application/pdf'; |
||
75 | |||
76 | /** |
||
77 | * the only allowed image mimetypes by tesseract |
||
78 | */ |
||
79 | const MIMETYPES_IMAGE = ['image/png', 'image/jpeg', 'image/tiff']; |
||
80 | |||
81 | /** |
||
82 | * OcrService constructor. |
||
83 | * |
||
84 | * @param ILogger $logger |
||
85 | */ |
||
86 | 2 | public function __construct(ITempManager $tempManager, IConfig $config, GearmanWorkerService $workerService, OcrStatusMapper $mapper, View $view, $userId, ILogger $logger) { |
|
95 | |||
96 | /** |
||
97 | * Gets the list of all available tesseract-ocr languages. |
||
98 | * |
||
99 | * @return array Languages |
||
100 | */ |
||
101 | public function listLanguages(){ |
||
128 | |||
129 | /** |
||
130 | * Processes and prepares the files for ocr. |
||
131 | * Sends the stuff to the gearman client in order to ocr async. |
||
132 | * |
||
133 | * @param $language |
||
134 | * @param array $files |
||
135 | * @return string |
||
136 | */ |
||
137 | public function process($language, $files) { |
||
175 | |||
176 | /** |
||
177 | * A function which returns the JSONResponse for all required status checks and tasks. |
||
178 | * It will check for already processed, pending and failed ocr tasks and return them as needed. |
||
179 | * |
||
180 | * @return string |
||
181 | */ |
||
182 | public function status(){ |
||
196 | |||
197 | /** |
||
198 | * The command ocr:complete for occ will call this function in order to set the status. |
||
199 | * the gearman worker should call it automatically after each processing step. |
||
200 | * |
||
201 | * @param $statusId |
||
202 | * @param $failed |
||
203 | */ |
||
204 | public function complete($statusId, $failed){ |
||
223 | |||
224 | /** |
||
225 | * Finishes all Processed files by copying them to the right path and deleteing the temp files. |
||
226 | * Returns the number of processed files. |
||
227 | * |
||
228 | * @return int |
||
229 | */ |
||
230 | private function handleProcessed(){ |
||
255 | |||
256 | /** |
||
257 | * Handles all failed orders of ocr processing queue and returns the status objects. |
||
258 | * |
||
259 | * @return array |
||
260 | */ |
||
261 | private function handleFailed(){ |
||
276 | |||
277 | |||
278 | /** |
||
279 | * Returns a not existing file name for pdf or image processing |
||
280 | * |
||
281 | * @param Files\FileInfo $fileInfo |
||
282 | * @return string |
||
283 | */ |
||
284 | private function buildNewName(Files\FileInfo $fileInfo){ |
||
299 | |||
300 | /** |
||
301 | * Returns the fileInfo for each file in files and checks |
||
302 | * if it has a allowed mimetype and some other conditions. |
||
303 | * |
||
304 | * @param array $files |
||
305 | * @return array of Files\FileInfo |
||
306 | * @throws NotFoundException |
||
307 | */ |
||
308 | private function buildFileInfo(array $files){ |
||
334 | |||
335 | /** |
||
336 | * Inits the Gearman client and sends the task to the background worker (async) |
||
337 | * @param $type |
||
338 | * @param $datadirectory |
||
339 | * @param $path |
||
340 | * @param $tempFile |
||
341 | * @param $language |
||
342 | * @param $statusId |
||
343 | */ |
||
344 | private function sendGearmanJob($type, $datadirectory, $path, $tempFile, $language, $status, $occDir){ |
||
366 | |||
367 | /** |
||
368 | * Handle the possible thrown Exceptions from all methods of this class. |
||
369 | * |
||
370 | * @param Exception $e |
||
371 | * @throws Exception |
||
372 | * @throws NotFoundException |
||
373 | */ |
||
374 | View Code Duplication | private function handleException($e) { |
|
382 | } |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.