Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | <?php |
||
31 | class OLERead |
||
32 | { |
||
33 | private $data = ''; |
||
34 | |||
35 | // Size of a sector = 512 bytes |
||
36 | const BIG_BLOCK_SIZE = 0x200; |
||
37 | |||
38 | // Size of a short sector = 64 bytes |
||
39 | const SMALL_BLOCK_SIZE = 0x40; |
||
40 | |||
41 | // Size of a directory entry always = 128 bytes |
||
42 | const PROPERTY_STORAGE_BLOCK_SIZE = 0x80; |
||
43 | |||
44 | // Minimum size of a standard stream = 4096 bytes, streams smaller than this are stored as short streams |
||
45 | const SMALL_BLOCK_THRESHOLD = 0x1000; |
||
46 | |||
47 | // header offsets |
||
48 | const NUM_BIG_BLOCK_DEPOT_BLOCKS_POS = 0x2c; |
||
49 | const ROOT_START_BLOCK_POS = 0x30; |
||
50 | const SMALL_BLOCK_DEPOT_BLOCK_POS = 0x3c; |
||
51 | const EXTENSION_BLOCK_POS = 0x44; |
||
52 | const NUM_EXTENSION_BLOCK_POS = 0x48; |
||
53 | const BIG_BLOCK_DEPOT_BLOCKS_POS = 0x4c; |
||
54 | |||
55 | // property storage offsets (directory offsets) |
||
56 | const SIZE_OF_NAME_POS = 0x40; |
||
57 | const TYPE_POS = 0x42; |
||
58 | const START_BLOCK_POS = 0x74; |
||
59 | const SIZE_POS = 0x78; |
||
60 | |||
61 | public $wrkbook = null; |
||
62 | public $summaryInformation = null; |
||
63 | public $documentSummaryInformation = null; |
||
64 | |||
65 | /** |
||
66 | * Read the file. |
||
67 | * |
||
68 | * @param $pFilename string Filename |
||
69 | * |
||
70 | * @throws ReaderException |
||
71 | */ |
||
72 | 5 | public function read($pFilename) |
|
73 | { |
||
74 | 5 | File::assertFile($pFilename); |
|
75 | |||
76 | // Get the file identifier |
||
77 | // Don't bother reading the whole file until we know it's a valid OLE file |
||
78 | 5 | $this->data = file_get_contents($pFilename, false, null, 0, 8); |
|
79 | |||
80 | // Check OLE identifier |
||
81 | 5 | $identifierOle = pack('CCCCCCCC', 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1); |
|
82 | 5 | if ($this->data != $identifierOle) { |
|
83 | throw new ReaderException('The filename ' . $pFilename . ' is not recognised as an OLE file'); |
||
84 | } |
||
85 | |||
86 | // Get the file data |
||
87 | 5 | $this->data = file_get_contents($pFilename); |
|
88 | |||
89 | // Total number of sectors used for the SAT |
||
90 | 5 | $this->numBigBlockDepotBlocks = self::getInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS); |
|
|
|||
91 | |||
92 | // SecID of the first sector of the directory stream |
||
93 | 5 | $this->rootStartBlock = self::getInt4d($this->data, self::ROOT_START_BLOCK_POS); |
|
94 | |||
95 | // SecID of the first sector of the SSAT (or -2 if not extant) |
||
96 | 5 | $this->sbdStartBlock = self::getInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS); |
|
97 | |||
98 | // SecID of the first sector of the MSAT (or -2 if no additional sectors are used) |
||
99 | 5 | $this->extensionBlock = self::getInt4d($this->data, self::EXTENSION_BLOCK_POS); |
|
100 | |||
101 | // Total number of sectors used by MSAT |
||
102 | 5 | $this->numExtensionBlocks = self::getInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS); |
|
103 | |||
104 | 5 | $bigBlockDepotBlocks = []; |
|
105 | 5 | $pos = self::BIG_BLOCK_DEPOT_BLOCKS_POS; |
|
106 | |||
107 | 5 | $bbdBlocks = $this->numBigBlockDepotBlocks; |
|
108 | |||
109 | 5 | if ($this->numExtensionBlocks != 0) { |
|
110 | $bbdBlocks = (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS) / 4; |
||
111 | } |
||
112 | |||
113 | 5 | View Code Duplication | for ($i = 0; $i < $bbdBlocks; ++$i) { |
114 | 5 | $bigBlockDepotBlocks[$i] = self::getInt4d($this->data, $pos); |
|
115 | 5 | $pos += 4; |
|
116 | } |
||
117 | |||
118 | 5 | for ($j = 0; $j < $this->numExtensionBlocks; ++$j) { |
|
119 | $pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE; |
||
120 | $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1); |
||
121 | |||
122 | View Code Duplication | for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) { |
|
123 | $bigBlockDepotBlocks[$i] = self::getInt4d($this->data, $pos); |
||
124 | $pos += 4; |
||
125 | } |
||
126 | |||
127 | $bbdBlocks += $blocksToRead; |
||
128 | if ($bbdBlocks < $this->numBigBlockDepotBlocks) { |
||
129 | $this->extensionBlock = self::getInt4d($this->data, $pos); |
||
130 | } |
||
131 | } |
||
132 | |||
133 | 5 | $pos = 0; |
|
134 | 5 | $this->bigBlockChain = ''; |
|
135 | 5 | $bbs = self::BIG_BLOCK_SIZE / 4; |
|
136 | 5 | for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) { |
|
137 | 5 | $pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE; |
|
138 | |||
139 | 5 | $this->bigBlockChain .= substr($this->data, $pos, 4 * $bbs); |
|
140 | 5 | $pos += 4 * $bbs; |
|
141 | } |
||
142 | |||
143 | 5 | $pos = 0; |
|
144 | 5 | $sbdBlock = $this->sbdStartBlock; |
|
145 | 5 | $this->smallBlockChain = ''; |
|
146 | 5 | while ($sbdBlock != -2) { |
|
147 | 5 | $pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE; |
|
148 | |||
149 | 5 | $this->smallBlockChain .= substr($this->data, $pos, 4 * $bbs); |
|
150 | 5 | $pos += 4 * $bbs; |
|
151 | |||
152 | 5 | $sbdBlock = self::getInt4d($this->bigBlockChain, $sbdBlock * 4); |
|
153 | } |
||
154 | |||
155 | // read the directory stream |
||
156 | 5 | $block = $this->rootStartBlock; |
|
157 | 5 | $this->entry = $this->_readData($block); |
|
158 | |||
159 | 5 | $this->readPropertySets(); |
|
160 | 5 | } |
|
161 | |||
162 | /** |
||
163 | * Extract binary stream data. |
||
164 | * |
||
165 | * @param int $stream |
||
166 | * |
||
167 | * @return string |
||
168 | */ |
||
169 | 4 | public function getStream($stream) |
|
170 | { |
||
171 | 4 | if ($stream === null) { |
|
172 | return null; |
||
173 | } |
||
174 | |||
175 | 4 | $streamData = ''; |
|
176 | |||
177 | 4 | if ($this->props[$stream]['size'] < self::SMALL_BLOCK_THRESHOLD) { |
|
178 | 4 | $rootdata = $this->_readData($this->props[$this->rootentry]['startBlock']); |
|
179 | |||
180 | 4 | $block = $this->props[$stream]['startBlock']; |
|
181 | |||
182 | 4 | while ($block != -2) { |
|
183 | 4 | $pos = $block * self::SMALL_BLOCK_SIZE; |
|
184 | 4 | $streamData .= substr($rootdata, $pos, self::SMALL_BLOCK_SIZE); |
|
185 | |||
186 | 4 | $block = self::getInt4d($this->smallBlockChain, $block * 4); |
|
187 | } |
||
188 | |||
189 | 4 | return $streamData; |
|
190 | } |
||
191 | 4 | $numBlocks = $this->props[$stream]['size'] / self::BIG_BLOCK_SIZE; |
|
192 | 4 | if ($this->props[$stream]['size'] % self::BIG_BLOCK_SIZE != 0) { |
|
193 | 4 | ++$numBlocks; |
|
194 | } |
||
195 | |||
196 | 4 | if ($numBlocks == 0) { |
|
197 | return ''; |
||
198 | } |
||
199 | |||
200 | 4 | $block = $this->props[$stream]['startBlock']; |
|
201 | |||
202 | 4 | View Code Duplication | while ($block != -2) { |
203 | 4 | $pos = ($block + 1) * self::BIG_BLOCK_SIZE; |
|
204 | 4 | $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE); |
|
205 | 4 | $block = self::getInt4d($this->bigBlockChain, $block * 4); |
|
206 | } |
||
207 | |||
208 | 4 | return $streamData; |
|
209 | } |
||
210 | |||
211 | /** |
||
212 | * Read a standard stream (by joining sectors using information from SAT). |
||
213 | * |
||
214 | * @param int $bl Sector ID where the stream starts |
||
215 | * |
||
216 | * @return string Data for standard stream |
||
217 | */ |
||
218 | 5 | private function _readData($bl) |
|
219 | { |
||
220 | 5 | $block = $bl; |
|
221 | 5 | $data = ''; |
|
222 | |||
223 | 5 | View Code Duplication | while ($block != -2) { |
224 | 5 | $pos = ($block + 1) * self::BIG_BLOCK_SIZE; |
|
225 | 5 | $data .= substr($this->data, $pos, self::BIG_BLOCK_SIZE); |
|
226 | 5 | $block = self::getInt4d($this->bigBlockChain, $block * 4); |
|
227 | } |
||
228 | |||
229 | 5 | return $data; |
|
230 | } |
||
231 | |||
232 | /** |
||
233 | * Read entries in the directory stream. |
||
234 | */ |
||
235 | 5 | private function readPropertySets() |
|
236 | { |
||
237 | 5 | $offset = 0; |
|
238 | |||
239 | // loop through entires, each entry is 128 bytes |
||
240 | 5 | $entryLen = strlen($this->entry); |
|
241 | 5 | while ($offset < $entryLen) { |
|
242 | // entry data (128 bytes) |
||
243 | 5 | $d = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE); |
|
244 | |||
245 | // size in bytes of name |
||
246 | 5 | $nameSize = ord($d[self::SIZE_OF_NAME_POS]) | (ord($d[self::SIZE_OF_NAME_POS + 1]) << 8); |
|
247 | |||
248 | // type of entry |
||
249 | 5 | $type = ord($d[self::TYPE_POS]); |
|
250 | |||
251 | // sectorID of first sector or short sector, if this entry refers to a stream (the case with workbook) |
||
252 | // sectorID of first sector of the short-stream container stream, if this entry is root entry |
||
253 | 5 | $startBlock = self::getInt4d($d, self::START_BLOCK_POS); |
|
254 | |||
255 | 5 | $size = self::getInt4d($d, self::SIZE_POS); |
|
256 | |||
257 | 5 | $name = str_replace("\x00", '', substr($d, 0, $nameSize)); |
|
258 | |||
259 | 5 | $this->props[] = [ |
|
260 | 5 | 'name' => $name, |
|
261 | 5 | 'type' => $type, |
|
262 | 5 | 'startBlock' => $startBlock, |
|
263 | 5 | 'size' => $size, |
|
264 | ]; |
||
265 | |||
266 | // tmp helper to simplify checks |
||
267 | 5 | $upName = strtoupper($name); |
|
268 | |||
269 | // Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook) |
||
270 | 5 | if (($upName === 'WORKBOOK') || ($upName === 'BOOK')) { |
|
271 | 5 | $this->wrkbook = count($this->props) - 1; |
|
272 | 5 | } elseif ($upName === 'ROOT ENTRY' || $upName === 'R') { |
|
273 | // Root entry |
||
274 | 5 | $this->rootentry = count($this->props) - 1; |
|
275 | } |
||
276 | |||
277 | // Summary information |
||
278 | 5 | if ($name == chr(5) . 'SummaryInformation') { |
|
279 | 5 | $this->summaryInformation = count($this->props) - 1; |
|
280 | } |
||
281 | |||
282 | // Additional Document Summary information |
||
283 | 5 | if ($name == chr(5) . 'DocumentSummaryInformation') { |
|
284 | 5 | $this->documentSummaryInformation = count($this->props) - 1; |
|
285 | } |
||
286 | |||
287 | 5 | $offset += self::PROPERTY_STORAGE_BLOCK_SIZE; |
|
288 | } |
||
289 | 5 | } |
|
290 | |||
291 | /** |
||
292 | * Read 4 bytes of data at specified position. |
||
293 | * |
||
294 | * @param string $data |
||
295 | * @param int $pos |
||
296 | * |
||
297 | * @return int |
||
298 | */ |
||
299 | 5 | private static function getInt4d($data, $pos) |
|
327 | } |
||
328 |
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: