Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like UnTgz often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UnTgz, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
29 | class UnTgz |
||
30 | { |
||
31 | /** |
||
32 | * Holds the return array of files processed |
||
33 | * @var mixed[] |
||
34 | */ |
||
35 | protected $return = array(); |
||
36 | |||
37 | /** |
||
38 | * Holds the data found in each tar file header block |
||
39 | * @var mixed[] |
||
40 | */ |
||
41 | protected $_current = array(); |
||
42 | |||
43 | /** |
||
44 | * Holds the file pointer, generally to the 512 block we are working on |
||
45 | * @var int |
||
46 | */ |
||
47 | protected $_offset = 0; |
||
48 | |||
49 | /** |
||
50 | * If the file passes or fails crc check |
||
51 | * @var boolean |
||
52 | */ |
||
53 | protected $_crc_check = false; |
||
54 | |||
55 | /** |
||
56 | * The current crc value of the data |
||
57 | * @var string|int |
||
58 | */ |
||
59 | protected $_crc; |
||
60 | |||
61 | /** |
||
62 | * The claimed size of the data in the tarball |
||
63 | * @var int |
||
64 | */ |
||
65 | protected $_size; |
||
66 | |||
67 | /** |
||
68 | * If we are going to write out the files processed |
||
69 | * @var boolean |
||
70 | */ |
||
71 | protected $_write_this = false; |
||
72 | |||
73 | /** |
||
74 | * If we will skip a file we found |
||
75 | * @var boolean |
||
76 | */ |
||
77 | protected $_skip = false; |
||
78 | |||
79 | /** |
||
80 | * If we found a file that was requested ($files_to_extract) |
||
81 | * @var boolean |
||
82 | */ |
||
83 | protected $_found = false; |
||
84 | |||
85 | /** |
||
86 | * Current file header we are working on |
||
87 | * @var mixed[]|string |
||
88 | */ |
||
89 | protected $_header = array(); |
||
90 | |||
91 | /** |
||
92 | * Array of file names we want to extract from the archive |
||
93 | * @var null|string[] |
||
94 | */ |
||
95 | protected $files_to_extract; |
||
96 | |||
97 | /** |
||
98 | * Holds the data string passed to the function |
||
99 | * @var string |
||
100 | */ |
||
101 | protected $data; |
||
102 | |||
103 | /** |
||
104 | * Location to write the files. |
||
105 | * @var string |
||
106 | */ |
||
107 | protected $destination; |
||
108 | |||
109 | /** |
||
110 | * If we are looking for a single specific file |
||
111 | * @var boolean|string |
||
112 | */ |
||
113 | protected $single_file; |
||
114 | |||
115 | /** |
||
116 | * If we can overwrite a file with the same name in the destination |
||
117 | * @var boolean |
||
118 | */ |
||
119 | protected $overwrite; |
||
120 | |||
121 | /** |
||
122 | * Class initialization, passes variables, loads dependencies |
||
123 | * |
||
124 | * @param string $data |
||
125 | * @param string $destination |
||
126 | * @param bool|string $single_file |
||
127 | * @param bool $overwrite |
||
128 | * @param null|string[] $files_to_extract |
||
129 | * |
||
130 | * @throws Elk_Exception package_no_zlib |
||
131 | */ |
||
132 | View Code Duplication | public function __construct($data, $destination, $single_file = false, $overwrite = false, $files_to_extract = null) |
|
156 | |||
157 | /** |
||
158 | * Class controller, calls the ungzip / untar functions in required order |
||
159 | * |
||
160 | * @return boolean|array |
||
161 | */ |
||
162 | public function read_tgz_data() |
||
192 | |||
193 | /** |
||
194 | * Loads the 10 byte header and validates its a tgz file |
||
195 | * |
||
196 | * @return boolean |
||
197 | */ |
||
198 | public function check_valid_tgz() |
||
210 | |||
211 | /** |
||
212 | * Reads the archive file header |
||
213 | * |
||
214 | * What it does: |
||
215 | * |
||
216 | * - validates that the file is a tar.gz |
||
217 | * - validates that its compressed with deflate |
||
218 | * - processes header information so we can set the start of archive data |
||
219 | * - archive comment |
||
220 | * - archive filename |
||
221 | * - header CRC |
||
222 | * |
||
223 | * Signature Definition: |
||
224 | * - identification byte 1 and 2: 2 bytes, 0x1f 0x8b |
||
225 | * - Compression Method: 1 byte |
||
226 | * - Flags: 1 byte |
||
227 | * - Last modification time Contains a POSIX timestamp, 4 bytes |
||
228 | * - Compression flags (or extra flags): 1 byte |
||
229 | * - Operating system, Value that indicates on which operating system file was created, 1 byte |
||
230 | */ |
||
231 | private function _read_header_tgz() |
||
273 | |||
274 | /** |
||
275 | * We now know where the start of the compressed data is in the archive |
||
276 | * The data is terminated with 4 bytes of CRC and 4 bytes of the original input size |
||
277 | */ |
||
278 | public function _ungzip_data() |
||
292 | |||
293 | /** |
||
294 | * Does the work of un tarballing the now ungzip'ed tar file |
||
295 | * |
||
296 | * What it does |
||
297 | * - Assumes its Ustar format |
||
298 | */ |
||
299 | private function _process_files() |
||
354 | |||
355 | /** |
||
356 | * Reads the tar file header block, its a 512 block and contains the following: |
||
357 | * |
||
358 | * Signature Definition: |
||
359 | * - char filename[100]; File name |
||
360 | * - char mode[8]; File mode |
||
361 | * - char uid[8]; Owner's numeric user ID |
||
362 | * - char gid[8]; Group's numeric user ID |
||
363 | * - char size[12]; File size in bytes (octal base) |
||
364 | * - char mtime[12]; Last modification time in numeric Unix time format (octal) |
||
365 | * - char checksum[8]; Checksum for header record |
||
366 | * - char type[1]; Link indicator (file type 0=normal, 1=hard, 2=symlink ... 5=directory ... |
||
367 | * - char linkname[100]; Name of linked file |
||
368 | * - char magic[6]; UStar indicator "ustar" |
||
369 | * - char version[2]; UStar version "00" |
||
370 | * - char uname[32]; Owner user name |
||
371 | * - char gname[32]; Owner group name |
||
372 | * - char devmajor[8]; Device major number |
||
373 | * - char devminor[8]; Device minor number |
||
374 | * - char path[155]; Filename prefix |
||
375 | */ |
||
376 | private function _read_current_header() |
||
395 | |||
396 | /** |
||
397 | * Does what it says, determines if we are writing this file or not |
||
398 | */ |
||
399 | private function _determine_write_this() |
||
420 | |||
421 | /** |
||
422 | * Does the actual writing of the file |
||
423 | * |
||
424 | * - Writes the extracted file to disk or if we are extracting a single file |
||
425 | * - it returns the extracted data |
||
426 | */ |
||
427 | private function _write_this_file() |
||
450 | |||
451 | /** |
||
452 | * Checks the saved vs calculated crc values |
||
453 | */ |
||
454 | private function _check_crc() |
||
462 | |||
463 | /** |
||
464 | * Checks the saved vs calculated crc values |
||
465 | */ |
||
466 | private function _check_header_crc() |
||
480 | } |
||
481 |