Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like DocxMustache often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DocxMustache, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
9 | class DocxMustache |
||
10 | { |
||
11 | public $items; |
||
12 | public $word_doc; |
||
13 | public $template_file_name; |
||
14 | public $template_file; |
||
15 | public $local_path; |
||
16 | public $storageDisk; |
||
17 | public $storagePathPrefix; |
||
18 | public $zipper; |
||
19 | public $imageManipulation; |
||
20 | public $verbose; |
||
21 | |||
22 | public function __construct($items, $local_template_file) |
||
23 | { |
||
24 | $this->items = $items; |
||
25 | $this->template_file_name = basename($local_template_file); |
||
26 | $this->template_file = $local_template_file; |
||
27 | $this->word_doc = false; |
||
28 | $this->zipper = new \Chumper\Zipper\Zipper(); |
||
29 | |||
30 | //name of disk for storage |
||
31 | $this->storageDisk = 'local'; |
||
32 | |||
33 | //prefix within your storage path |
||
34 | $this->storagePathPrefix = 'app/'; |
||
35 | |||
36 | //if you use img urls that support manipulation via parameter |
||
37 | $this->imageManipulation = ''; //'&w=1800'; |
||
38 | |||
39 | $this->verbose = false; |
||
40 | } |
||
41 | |||
42 | public function Execute() |
||
43 | { |
||
44 | $this->CopyTmplate(); |
||
45 | $this->ReadTeamplate(); |
||
46 | } |
||
47 | |||
48 | /** |
||
49 | * @param string $file |
||
50 | */ |
||
51 | public function StoragePath($file) |
||
52 | { |
||
53 | return storage_path($file); |
||
54 | } |
||
55 | |||
56 | /** |
||
57 | * @param string $msg |
||
58 | */ |
||
59 | protected function Log($msg) |
||
60 | { |
||
61 | //introduce logging method here to keep track of process |
||
62 | // can be overwritten in extended class to log with custom preocess logger |
||
63 | if ($this->verbose) { |
||
64 | Log::error($msg); |
||
65 | } |
||
66 | } |
||
67 | |||
68 | public function CleanUpTmpDirs() |
||
69 | { |
||
70 | $now = time(); |
||
71 | $isExpired = ($now - (60 * 240)); |
||
72 | $disk = \Storage::disk($this->storageDisk); |
||
73 | $all_dirs = $disk->directories($this->storagePathPrefix.'DocxMustache'); |
||
74 | foreach ($all_dirs as $dir) { |
||
75 | //delete dirs older than 20min |
||
76 | if ($disk->lastModified($dir) < $isExpired) { |
||
77 | $disk->deleteDirectory($dir); |
||
78 | } |
||
79 | } |
||
80 | } |
||
81 | |||
82 | public function GetTmpDir() |
||
83 | { |
||
84 | $this->CleanUpTmpDirs(); |
||
85 | $path = $this->storagePathPrefix.'DocxMustache/'.uniqid($this->template_file).'/'; |
||
86 | \File::makeDirectory($this->StoragePath($path), 0775, true); |
||
87 | |||
88 | return $path; |
||
89 | } |
||
90 | |||
91 | public function CopyTmplate() |
||
92 | { |
||
93 | $this->Log('Get Copy of Template'); |
||
94 | $this->local_path = $this->GetTmpDir(); |
||
95 | \Storage::disk($this->storageDisk)->copy($this->storagePathPrefix.$this->template_file, $this->local_path.$this->template_file_name); |
||
96 | } |
||
97 | |||
98 | protected function exctractOpenXmlFile($file) |
||
99 | { |
||
100 | $this->zipper->make($this->StoragePath($this->local_path.$this->template_file_name)) |
||
101 | ->extractTo($this->StoragePath($this->local_path), [$file], \Chumper\Zipper\Zipper::WHITELIST); |
||
102 | } |
||
103 | |||
104 | protected function ReadOpenXmlFile($file, $type = 'file') |
||
105 | { |
||
106 | $this->exctractOpenXmlFile($file); |
||
107 | if ($type == 'file') { |
||
108 | if ($file_contents = \Storage::disk($this->storageDisk)->get($this->local_path.$file)) { |
||
109 | return $file_contents; |
||
110 | } else { |
||
111 | throw new Exception('Cannot not read file '.$file); |
||
112 | } |
||
113 | } else { |
||
114 | if ($xml_object = simplexml_load_file($this->StoragePath($this->local_path.$file))) { |
||
115 | return $xml_object; |
||
116 | } else { |
||
117 | throw new Exception('Cannot load XML Object from file '.$file); |
||
118 | } |
||
119 | } |
||
120 | } |
||
121 | |||
122 | protected function SaveOpenXmlFile($file, $folder, $content) |
||
123 | { |
||
124 | \Storage::disk($this->storageDisk) |
||
125 | ->put($this->local_path.$file, $content); |
||
126 | //add new content to word doc |
||
127 | if ($folder) { |
||
128 | $this->zipper->folder($folder) |
||
129 | ->add($this->StoragePath($this->local_path.$file)); |
||
130 | } else { |
||
131 | $this->zipper |
||
132 | ->add($this->StoragePath($this->local_path.$file)); |
||
133 | } |
||
134 | } |
||
135 | |||
136 | protected function SaveOpenXmlObjectToFile($xmlObject, $file, $folder) |
||
137 | { |
||
138 | if ($xmlString = $xmlObject->asXML()) { |
||
139 | $this->SaveOpenXmlFile($file, $folder, $xmlString); |
||
140 | } else { |
||
141 | throw new Exception('Cannot generate xml for '.$file); |
||
142 | } |
||
143 | } |
||
144 | |||
145 | public function ReadTeamplate() |
||
146 | { |
||
147 | $this->Log('Analyze Template'); |
||
148 | //get the main document out of the docx archive |
||
149 | $this->word_doc = $this->ReadOpenXmlFile('word/document.xml', 'file'); |
||
150 | |||
151 | $this->Log('Merge Data into Template'); |
||
152 | |||
153 | $this->word_doc = MustacheRender::render($this->items, $this->word_doc); |
||
154 | |||
155 | $this->word_doc = HtmlConversion::convert($this->word_doc); |
||
156 | |||
157 | $this->ImageReplacer(); |
||
158 | |||
159 | $this->Log('Compact Template with Data'); |
||
160 | |||
161 | $this->SaveOpenXmlFile('word/document.xml', 'word', $this->word_doc); |
||
162 | $this->zipper->close(); |
||
163 | } |
||
164 | |||
165 | protected function AddContentType($imageCt = 'jpeg') |
||
166 | { |
||
167 | $ct_file = $this->ReadOpenXmlFile('[Content_Types].xml', 'object'); |
||
168 | |||
169 | if (! ($ct_file instanceof \Traversable)) { |
||
170 | throw new Exception('Cannot traverse through [Content_Types].xml.'); |
||
171 | } |
||
172 | |||
173 | //check if content type for jpg has been set |
||
174 | $i = 0; |
||
175 | $ct_already_set = false; |
||
176 | foreach ($ct_file as $ct) { |
||
177 | if ((string) $ct_file->Default[$i]['Extension'] == $imageCt) { |
||
178 | $ct_already_set = true; |
||
179 | } |
||
180 | $i++; |
||
181 | } |
||
182 | |||
183 | //if content type for jpg has not been set, add it to xml |
||
184 | // and save xml to file and add it to the archive |
||
185 | if (! $ct_already_set) { |
||
186 | $sxe = $ct_file->addChild('Default'); |
||
187 | $sxe->addAttribute('Extension', $imageCt); |
||
188 | $sxe->addAttribute('ContentType', 'image/'.$imageCt); |
||
189 | $this->SaveOpenXmlObjectToFile($ct_file, '[Content_Types].xml', false); |
||
190 | } |
||
191 | } |
||
192 | |||
193 | protected function FetchReplaceableImages(&$main_file, $ns) |
||
194 | { |
||
195 | //set up basic arrays to keep track of imgs |
||
196 | $imgs = []; |
||
197 | $imgs_replaced = []; // so they can later be removed from media and relation file. |
||
198 | $newIdCounter = 1; |
||
199 | |||
200 | //iterate through all drawing containers of the xml document |
||
201 | foreach ($main_file->xpath('//w:drawing') as $k=>$drawing) { |
||
202 | //figure out if there is a URL saved in the description field of the img |
||
203 | $img_url = $this->AnalyseImgUrlString($drawing->children($ns['wp'])->xpath('wp:docPr')[0]->attributes()['descr']); |
||
204 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->xpath('wp:docPr')[0]->attributes()['descr'] = $img_url['rest']; |
||
205 | |||
206 | //if there is a url, save this img as a img to be replaced |
||
207 | if ($img_url['valid']) { |
||
208 | $ueid = 'wrklstId'.$newIdCounter; |
||
209 | $wasId = (string) $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a'])->graphic->graphicData->children($ns['pic'])->pic->blipFill->children($ns['a'])->blip->attributes($ns['r'])['embed']; |
||
210 | |||
211 | //get dimensions |
||
212 | $cx = (int) $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a'])->graphic->graphicData->children($ns['pic'])->pic->spPr->children($ns['a'])->xfrm->ext->attributes()['cx']; |
||
213 | $cy = (int) $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a'])->graphic->graphicData->children($ns['pic'])->pic->spPr->children($ns['a'])->xfrm->ext->attributes()['cy']; |
||
214 | |||
215 | //remember img as being replaced |
||
216 | $imgs_replaced[$wasId] = $wasId; |
||
217 | |||
218 | //set new img id |
||
219 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a'])->graphic->graphicData->children($ns['pic'])->pic->blipFill->children($ns['a'])->blip->attributes($ns['r'])['embed'] = $ueid; |
||
220 | |||
221 | $imgs[] = [ |
||
222 | 'cx' => (int) $cx, |
||
223 | 'cy' => (int) $cy, |
||
224 | 'wasId' => $wasId, |
||
225 | 'id' => $ueid, |
||
226 | 'url' => $img_url['url'], |
||
227 | 'path' => $img_url['path'], |
||
228 | 'mode' => $img_url['mode'], |
||
229 | ]; |
||
230 | |||
231 | $newIdCounter++; |
||
232 | } |
||
233 | } |
||
234 | |||
235 | return [ |
||
236 | 'imgs' => $imgs, |
||
237 | 'imgs_replaced' => $imgs_replaced, |
||
238 | ]; |
||
239 | } |
||
240 | |||
241 | protected function RemoveReplaceImages($imgs_replaced, &$rels_file) |
||
242 | { |
||
243 | //TODO: check if the same img is used at a different position int he file as well, as otherwise broken images are produced. |
||
244 | //iterate through replaced images and clean rels files from them |
||
245 | foreach ($imgs_replaced as $img_replaced) { |
||
246 | $i = 0; |
||
247 | foreach ($rels_file as $rel) { |
||
248 | if ((string) $rel->attributes()['Id'] == $img_replaced) { |
||
249 | $this->zipper->remove('word/'.(string) $rel->attributes()['Target']); |
||
250 | unset($rels_file->Relationship[$i]); |
||
251 | } |
||
252 | $i++; |
||
253 | } |
||
254 | } |
||
255 | } |
||
256 | |||
257 | protected function InsertImages($ns, &$imgs, &$rels_file, &$main_file) |
||
258 | { |
||
259 | $docimage = new DocImage(); |
||
260 | $allowed_imgs = $docimage->AllowedContentTypeImages(); |
||
261 | $image_i = 1; |
||
262 | //iterate through replacable images |
||
263 | foreach ($imgs as $k=>$img) { |
||
264 | $this->Log('Merge Images into Template - '.round($image_i / count($imgs) * 100).'%'); |
||
265 | //get file type of img and test it against supported imgs |
||
266 | if ($imgageData = $docimage->GetImageFromUrl($img['mode'] == 'url' ? $img['url'] : $img['path'], $img['mode'] == 'url' ? $this->imageManipulation : '')) { |
||
267 | $imgs[$k]['img_file_src'] = str_replace('wrklstId', 'wrklst_image', $img['id']).$allowed_imgs[$imgageData['mime']]; |
||
268 | $imgs[$k]['img_file_dest'] = str_replace('wrklstId', 'wrklst_image', $img['id']).'.jpeg'; |
||
269 | |||
270 | $resampled_img = $docimage->ResampleImage($this, $imgs, $k, $imgageData['data']); |
||
271 | |||
272 | $sxe = $rels_file->addChild('Relationship'); |
||
273 | $sxe->addAttribute('Id', $img['id']); |
||
274 | $sxe->addAttribute('Type', 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'); |
||
275 | $sxe->addAttribute('Target', 'media/'.$imgs[$k]['img_file_dest']); |
||
276 | |||
277 | foreach ($main_file->xpath('//w:drawing') as $k=>$drawing) { |
||
278 | if (null !== $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a']) |
||
279 | ->graphic->graphicData->children($ns['pic'])->pic->blipFill && |
||
280 | $img['id'] == $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a']) |
||
281 | ->graphic->graphicData->children($ns['pic'])->pic->blipFill->children($ns['a']) |
||
282 | ->blip->attributes($ns['r'])['embed']) { |
||
283 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a']) |
||
284 | ->graphic->graphicData->children($ns['pic'])->pic->spPr->children($ns['a']) |
||
285 | ->xfrm->ext->attributes()['cx'] = $resampled_img['width_emus']; |
||
286 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->children($ns['a']) |
||
287 | ->graphic->graphicData->children($ns['pic'])->pic->spPr->children($ns['a']) |
||
288 | ->xfrm->ext->attributes()['cy'] = $resampled_img['height_emus']; |
||
289 | //anchor images |
||
290 | if (isset($main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->anchor)) { |
||
291 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->anchor->extent->attributes()['cx'] = $resampled_img['width_emus']; |
||
292 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->anchor->extent->attributes()['cy'] = $resampled_img['height_emus']; |
||
293 | } |
||
294 | //inline images |
||
295 | elseif (isset($main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->inline)) { |
||
296 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->inline->extent->attributes()['cx'] = $resampled_img['width_emus']; |
||
297 | $main_file->xpath('//w:drawing')[$k]->children($ns['wp'])->inline->extent->attributes()['cy'] = $resampled_img['height_emus']; |
||
298 | } |
||
299 | |||
300 | break; |
||
301 | } |
||
302 | } |
||
303 | } |
||
304 | $image_i++; |
||
305 | } |
||
306 | } |
||
307 | |||
308 | protected function ImageReplacer() |
||
309 | { |
||
310 | $this->Log('Load XML Document to Merge Images'); |
||
311 | |||
312 | //load main doc xml |
||
313 | libxml_use_internal_errors(true); |
||
314 | $main_file = simplexml_load_string($this->word_doc); |
||
315 | |||
316 | if(gettype($main_file) == "object") |
||
317 | { |
||
318 | $this->Log('Merge Images into Template'); |
||
319 | |||
320 | //get all namespaces of the document |
||
321 | $ns = $main_file->getNamespaces(true); |
||
322 | |||
323 | $replaceableImage = $this->FetchReplaceableImages($main_file, $ns); |
||
324 | $imgs = $replaceableImage['imgs']; |
||
325 | $imgs_replaced = $replaceableImage['imgs_replaced']; |
||
326 | |||
327 | $rels_file = $this->ReadOpenXmlFile('word/_rels/document.xml.rels', 'object'); |
||
328 | |||
329 | //do not remove until it is checked if the same img is used at a different position int he file as well, as otherwise broken images are produced. |
||
330 | //$this->RemoveReplaceImages($imgs_replaced, $rels_file); |
||
|
|||
331 | |||
332 | //add jpg content type if not set |
||
333 | $this->AddContentType('jpeg'); |
||
334 | |||
335 | $this->InsertImages($ns, $imgs, $rels_file, $main_file); |
||
336 | |||
337 | $this->SaveOpenXmlObjectToFile($rels_file, 'word/_rels/document.xml.rels', 'word/_rels'); |
||
338 | |||
339 | if ($main_file_xml = $main_file->asXML()) { |
||
340 | $this->word_doc = $main_file_xml; |
||
341 | } else { |
||
342 | throw new Exception('Cannot generate xml for word/document.xml.'); |
||
343 | } |
||
344 | } |
||
345 | else |
||
346 | { |
||
347 | $xmlerror = ''; |
||
348 | foreach (libxml_get_errors() as $error) { |
||
349 | // handle errors here |
||
350 | $xmlerror .= $error; |
||
351 | } |
||
352 | $this->Log('Error: Could not load XML file. '.$xmlerror); |
||
353 | libxml_clear_errors(); |
||
354 | } |
||
355 | } |
||
356 | |||
357 | /** |
||
358 | * @param string $string |
||
359 | */ |
||
360 | protected function AnalyseImgUrlString($string) |
||
361 | { |
||
362 | $string = (string) $string; |
||
363 | $start = '[IMG-REPLACE]'; |
||
364 | $end = '[/IMG-REPLACE]'; |
||
365 | $start_local = '[LOCAL_IMG_REPLACE]'; |
||
366 | $end_local = '[/LOCAL_IMG_REPLACE]'; |
||
367 | $valid = false; |
||
368 | $url = ''; |
||
369 | $path = ''; |
||
370 | |||
371 | if ($string != str_replace($start, '', $string) && $string == str_replace($start.$end, '', $string)) { |
||
372 | $string = ' '.$string; |
||
373 | $ini = strpos($string, $start); |
||
374 | if ($ini == 0) { |
||
375 | $url = ''; |
||
376 | $rest = $string; |
||
377 | } else { |
||
378 | $ini += strlen($start); |
||
379 | $len = ((strpos($string, $end, $ini)) - $ini); |
||
380 | $url = substr($string, $ini, $len); |
||
381 | |||
382 | $ini = strpos($string, $start); |
||
383 | $len = strpos($string, $end, $ini + strlen($start)) + strlen($end); |
||
384 | $rest = substr($string, 0, $ini).substr($string, $len); |
||
385 | } |
||
386 | |||
387 | $valid = true; |
||
388 | |||
389 | //TODO: create a better url validity check |
||
390 | if (! trim(str_replace(['http', 'https', ':', ' '], '', $url)) || $url == str_replace('http', '', $url)) { |
||
391 | $valid = false; |
||
392 | } |
||
393 | $mode = 'url'; |
||
394 | } elseif ($string != str_replace($start_local, '', $string) && $string == str_replace($start_local.$end_local, '', $string)) { |
||
395 | $string = ' '.$string; |
||
396 | $ini = strpos($string, $start_local); |
||
397 | if ($ini == 0) { |
||
398 | $path = ''; |
||
399 | $rest = $string; |
||
400 | } else { |
||
401 | $ini += strlen($start_local); |
||
402 | $len = ((strpos($string, $end_local, $ini)) - $ini); |
||
403 | $path = str_replace('..', '', substr($string, $ini, $len)); |
||
404 | |||
405 | $ini = strpos($string, $start_local); |
||
406 | $len = strpos($string, $end_local, $ini + strlen($start)) + strlen($end_local); |
||
407 | $rest = substr($string, 0, $ini).substr($string, $len); |
||
408 | } |
||
409 | |||
410 | $valid = true; |
||
411 | |||
412 | //check if path starts with storage path |
||
413 | if (! starts_with($path, storage_path())) { |
||
414 | $valid = false; |
||
415 | } |
||
416 | $mode = 'path'; |
||
417 | } else { |
||
418 | $mode = 'nothing'; |
||
419 | $url = ''; |
||
420 | $path = ''; |
||
421 | $rest = str_replace([$start, $end, $start_local, $end_local], '', $string); |
||
422 | } |
||
423 | |||
424 | return [ |
||
425 | 'mode' => $mode, |
||
426 | 'url' => trim($url), |
||
427 | 'path' => trim($path), |
||
428 | 'rest' => trim($rest), |
||
429 | 'valid' => $valid, |
||
430 | ]; |
||
431 | } |
||
432 | |||
433 | public function SaveAsPdf() |
||
434 | { |
||
435 | $this->Log('Converting DOCX to PDF'); |
||
436 | //convert to pdf with libre office |
||
437 | $process = new \Symfony\Component\Process\Process([ |
||
438 | 'soffice', |
||
439 | '--headless', |
||
440 | '--convert-to', |
||
441 | 'pdf', |
||
442 | $this->StoragePath($this->local_path.$this->template_file_name), |
||
443 | '--outdir', |
||
444 | $this->StoragePath($this->local_path), |
||
445 | ])->start(); |
||
446 | while ($process->isRunning()) { |
||
447 | //wait until process is ready |
||
448 | } |
||
449 | // executes after the command finishes |
||
450 | if (! $process->isSuccessful()) { |
||
451 | throw new \Symfony\Component\Process\Exception\ProcessFailedException($process); |
||
452 | } else { |
||
453 | $path_parts = pathinfo($this->StoragePath($this->local_path.$this->template_file_name)); |
||
454 | |||
455 | return $this->StoragePath($this->local_path.$path_parts['filename'].'pdf'); |
||
456 | } |
||
457 | } |
||
458 | } |
||
459 |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.