|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* ZIP file directories reader, for the purposes of upload verification. |
|
4
|
|
|
* |
|
5
|
|
|
* This program is free software; you can redistribute it and/or modify |
|
6
|
|
|
* it under the terms of the GNU General Public License as published by |
|
7
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
8
|
|
|
* (at your option) any later version. |
|
9
|
|
|
* |
|
10
|
|
|
* This program is distributed in the hope that it will be useful, |
|
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13
|
|
|
* GNU General Public License for more details. |
|
14
|
|
|
* |
|
15
|
|
|
* You should have received a copy of the GNU General Public License along |
|
16
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
|
17
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
|
19
|
|
|
* |
|
20
|
|
|
* @file |
|
21
|
|
|
*/ |
|
22
|
|
|
|
|
23
|
|
|
/** |
|
24
|
|
|
* A class for reading ZIP file directories, for the purposes of upload |
|
25
|
|
|
* verification. |
|
26
|
|
|
* |
|
27
|
|
|
* Only a functional interface is provided: ZipFileReader::read(). No access is |
|
28
|
|
|
* given to object instances. |
|
29
|
|
|
* |
|
30
|
|
|
*/ |
|
31
|
|
|
class ZipDirectoryReader { |
|
32
|
|
|
/** |
|
33
|
|
|
* Read a ZIP file and call a function for each file discovered in it. |
|
34
|
|
|
* |
|
35
|
|
|
* Because this class is aimed at verification, an error is raised on |
|
36
|
|
|
* suspicious or ambiguous input, instead of emulating some standard |
|
37
|
|
|
* behavior. |
|
38
|
|
|
* |
|
39
|
|
|
* @param string $fileName The archive file name |
|
40
|
|
|
* @param array $callback The callback function. It will be called for each file |
|
41
|
|
|
* with a single associative array each time, with members: |
|
42
|
|
|
* |
|
43
|
|
|
* - name: The file name. Directories conventionally have a trailing |
|
44
|
|
|
* slash. |
|
45
|
|
|
* |
|
46
|
|
|
* - mtime: The file modification time, in MediaWiki 14-char format |
|
47
|
|
|
* |
|
48
|
|
|
* - size: The uncompressed file size |
|
49
|
|
|
* |
|
50
|
|
|
* @param array $options An associative array of read options, with the option |
|
51
|
|
|
* name in the key. This may currently contain: |
|
52
|
|
|
* |
|
53
|
|
|
* - zip64: If this is set to true, then we will emulate a |
|
54
|
|
|
* library with ZIP64 support, like OpenJDK 7. If it is set to |
|
55
|
|
|
* false, then we will emulate a library with no knowledge of |
|
56
|
|
|
* ZIP64. |
|
57
|
|
|
* |
|
58
|
|
|
* NOTE: The ZIP64 code is untested and probably doesn't work. It |
|
59
|
|
|
* turned out to be easier to just reject ZIP64 archive uploads, |
|
60
|
|
|
* since they are likely to be very rare. Confirming safety of a |
|
61
|
|
|
* ZIP64 file is fairly complex. What do you do with a file that is |
|
62
|
|
|
* ambiguous and broken when read with a non-ZIP64 reader, but valid |
|
63
|
|
|
* when read with a ZIP64 reader? This situation is normal for a |
|
64
|
|
|
* valid ZIP64 file, and working out what non-ZIP64 readers will make |
|
65
|
|
|
* of such a file is not trivial. |
|
66
|
|
|
* |
|
67
|
|
|
* @return Status A Status object. The following fatal errors are defined: |
|
68
|
|
|
* |
|
69
|
|
|
* - zip-file-open-error: The file could not be opened. |
|
70
|
|
|
* |
|
71
|
|
|
* - zip-wrong-format: The file does not appear to be a ZIP file. |
|
72
|
|
|
* |
|
73
|
|
|
* - zip-bad: There was something wrong or ambiguous about the file |
|
74
|
|
|
* data. |
|
75
|
|
|
* |
|
76
|
|
|
* - zip-unsupported: The ZIP file uses features which |
|
77
|
|
|
* ZipDirectoryReader does not support. |
|
78
|
|
|
* |
|
79
|
|
|
* The default messages for those fatal errors are written in a way that |
|
80
|
|
|
* makes sense for upload verification. |
|
81
|
|
|
* |
|
82
|
|
|
* If a fatal error is returned, more information about the error will be |
|
83
|
|
|
* available in the debug log. |
|
84
|
|
|
* |
|
85
|
|
|
* Note that the callback function may be called any number of times before |
|
86
|
|
|
* a fatal error is returned. If this occurs, the data sent to the callback |
|
87
|
|
|
* function should be discarded. |
|
88
|
|
|
*/ |
|
89
|
|
|
public static function read( $fileName, $callback, $options = [] ) { |
|
90
|
|
|
$zdr = new self( $fileName, $callback, $options ); |
|
91
|
|
|
|
|
92
|
|
|
return $zdr->execute(); |
|
93
|
|
|
} |
|
94
|
|
|
|
|
95
|
|
|
/** The file name */ |
|
96
|
|
|
protected $fileName; |
|
97
|
|
|
|
|
98
|
|
|
/** The opened file resource */ |
|
99
|
|
|
protected $file; |
|
100
|
|
|
|
|
101
|
|
|
/** The cached length of the file, or null if it has not been loaded yet. */ |
|
102
|
|
|
protected $fileLength; |
|
103
|
|
|
|
|
104
|
|
|
/** A segmented cache of the file contents */ |
|
105
|
|
|
protected $buffer; |
|
106
|
|
|
|
|
107
|
|
|
/** The file data callback */ |
|
108
|
|
|
protected $callback; |
|
109
|
|
|
|
|
110
|
|
|
/** The ZIP64 mode */ |
|
111
|
|
|
protected $zip64 = false; |
|
112
|
|
|
|
|
113
|
|
|
/** Stored headers */ |
|
114
|
|
|
protected $eocdr, $eocdr64, $eocdr64Locator; |
|
|
|
|
|
|
115
|
|
|
|
|
116
|
|
|
protected $data; |
|
117
|
|
|
|
|
118
|
|
|
/** The "extra field" ID for ZIP64 central directory entries */ |
|
119
|
|
|
const ZIP64_EXTRA_HEADER = 0x0001; |
|
120
|
|
|
|
|
121
|
|
|
/** The segment size for the file contents cache */ |
|
122
|
|
|
const SEGSIZE = 16384; |
|
123
|
|
|
|
|
124
|
|
|
/** The index of the "general field" bit for UTF-8 file names */ |
|
125
|
|
|
const GENERAL_UTF8 = 11; |
|
126
|
|
|
|
|
127
|
|
|
/** The index of the "general field" bit for central directory encryption */ |
|
128
|
|
|
const GENERAL_CD_ENCRYPTED = 13; |
|
129
|
|
|
|
|
130
|
|
|
/** |
|
131
|
|
|
* Private constructor |
|
132
|
|
|
* @param string $fileName |
|
133
|
|
|
* @param callable $callback |
|
134
|
|
|
* @param array $options |
|
135
|
|
|
*/ |
|
136
|
|
|
protected function __construct( $fileName, $callback, $options ) { |
|
137
|
|
|
$this->fileName = $fileName; |
|
138
|
|
|
$this->callback = $callback; |
|
139
|
|
|
|
|
140
|
|
|
if ( isset( $options['zip64'] ) ) { |
|
141
|
|
|
$this->zip64 = $options['zip64']; |
|
142
|
|
|
} |
|
143
|
|
|
} |
|
144
|
|
|
|
|
145
|
|
|
/** |
|
146
|
|
|
* Read the directory according to settings in $this. |
|
147
|
|
|
* |
|
148
|
|
|
* @return Status |
|
149
|
|
|
*/ |
|
150
|
|
|
function execute() { |
|
151
|
|
|
$this->file = fopen( $this->fileName, 'r' ); |
|
152
|
|
|
$this->data = []; |
|
153
|
|
|
if ( !$this->file ) { |
|
154
|
|
|
return Status::newFatal( 'zip-file-open-error' ); |
|
155
|
|
|
} |
|
156
|
|
|
|
|
157
|
|
|
$status = Status::newGood(); |
|
158
|
|
|
try { |
|
159
|
|
|
$this->readEndOfCentralDirectoryRecord(); |
|
160
|
|
|
if ( $this->zip64 ) { |
|
161
|
|
|
list( $offset, $size ) = $this->findZip64CentralDirectory(); |
|
162
|
|
|
$this->readCentralDirectory( $offset, $size ); |
|
163
|
|
|
} else { |
|
164
|
|
|
if ( $this->eocdr['CD size'] == 0xffffffff |
|
165
|
|
|
|| $this->eocdr['CD offset'] == 0xffffffff |
|
166
|
|
|
|| $this->eocdr['CD entries total'] == 0xffff |
|
167
|
|
|
) { |
|
168
|
|
|
$this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . |
|
169
|
|
|
'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . |
|
170
|
|
|
'opening vulnerabilities on clients using OpenJDK 7 or later.' ); |
|
171
|
|
|
} |
|
172
|
|
|
|
|
173
|
|
|
list( $offset, $size ) = $this->findOldCentralDirectory(); |
|
174
|
|
|
$this->readCentralDirectory( $offset, $size ); |
|
175
|
|
|
} |
|
176
|
|
|
} catch ( ZipDirectoryReaderError $e ) { |
|
177
|
|
|
$status->fatal( $e->getErrorCode() ); |
|
178
|
|
|
} |
|
179
|
|
|
|
|
180
|
|
|
fclose( $this->file ); |
|
181
|
|
|
|
|
182
|
|
|
return $status; |
|
183
|
|
|
} |
|
184
|
|
|
|
|
185
|
|
|
/** |
|
186
|
|
|
* Throw an error, and log a debug message |
|
187
|
|
|
* @param mixed $code |
|
188
|
|
|
* @param string $debugMessage |
|
189
|
|
|
* @throws ZipDirectoryReaderError |
|
190
|
|
|
*/ |
|
191
|
|
|
function error( $code, $debugMessage ) { |
|
192
|
|
|
wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" ); |
|
193
|
|
|
throw new ZipDirectoryReaderError( $code ); |
|
194
|
|
|
} |
|
195
|
|
|
|
|
196
|
|
|
/** |
|
197
|
|
|
* Read the header which is at the end of the central directory, |
|
198
|
|
|
* unimaginatively called the "end of central directory record" by the ZIP |
|
199
|
|
|
* spec. |
|
200
|
|
|
*/ |
|
201
|
|
|
function readEndOfCentralDirectoryRecord() { |
|
202
|
|
|
$info = [ |
|
203
|
|
|
'signature' => 4, |
|
204
|
|
|
'disk' => 2, |
|
205
|
|
|
'CD start disk' => 2, |
|
206
|
|
|
'CD entries this disk' => 2, |
|
207
|
|
|
'CD entries total' => 2, |
|
208
|
|
|
'CD size' => 4, |
|
209
|
|
|
'CD offset' => 4, |
|
210
|
|
|
'file comment length' => 2, |
|
211
|
|
|
]; |
|
212
|
|
|
$structSize = $this->getStructSize( $info ); |
|
213
|
|
|
$startPos = $this->getFileLength() - 65536 - $structSize; |
|
214
|
|
|
if ( $startPos < 0 ) { |
|
215
|
|
|
$startPos = 0; |
|
216
|
|
|
} |
|
217
|
|
|
|
|
218
|
|
|
if ( $this->getFileLength() === 0 ) { |
|
219
|
|
|
$this->error( 'zip-wrong-format', "The file is empty." ); |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
$block = $this->getBlock( $startPos ); |
|
223
|
|
|
$sigPos = strrpos( $block, "PK\x05\x06" ); |
|
224
|
|
|
if ( $sigPos === false ) { |
|
225
|
|
|
$this->error( 'zip-wrong-format', |
|
226
|
|
|
"zip file lacks EOCDR signature. It probably isn't a zip file." ); |
|
227
|
|
|
} |
|
228
|
|
|
|
|
229
|
|
|
$this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); |
|
230
|
|
|
$this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; |
|
231
|
|
|
|
|
232
|
|
|
if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { |
|
233
|
|
|
$this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); |
|
234
|
|
|
} |
|
235
|
|
|
if ( $this->eocdr['disk'] !== 0 |
|
236
|
|
|
|| $this->eocdr['CD start disk'] !== 0 |
|
237
|
|
|
) { |
|
238
|
|
|
$this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); |
|
239
|
|
|
} |
|
240
|
|
|
$this->eocdr += $this->unpack( |
|
241
|
|
|
$block, |
|
242
|
|
|
[ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ], |
|
243
|
|
|
$sigPos + $structSize ); |
|
244
|
|
|
$this->eocdr['position'] = $startPos + $sigPos; |
|
245
|
|
|
} |
|
246
|
|
|
|
|
247
|
|
|
/** |
|
248
|
|
|
* Read the header called the "ZIP64 end of central directory locator". An |
|
249
|
|
|
* error will be raised if it does not exist. |
|
250
|
|
|
*/ |
|
251
|
|
|
function readZip64EndOfCentralDirectoryLocator() { |
|
252
|
|
|
$info = [ |
|
253
|
|
|
'signature' => [ 'string', 4 ], |
|
254
|
|
|
'eocdr64 start disk' => 4, |
|
255
|
|
|
'eocdr64 offset' => 8, |
|
256
|
|
|
'number of disks' => 4, |
|
257
|
|
|
]; |
|
258
|
|
|
$structSize = $this->getStructSize( $info ); |
|
259
|
|
|
|
|
260
|
|
|
$start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; |
|
261
|
|
|
$block = $this->getBlock( $start, $structSize ); |
|
262
|
|
|
$this->eocdr64Locator = $data = $this->unpack( $block, $info ); |
|
263
|
|
|
|
|
264
|
|
|
if ( $data['signature'] !== "PK\x06\x07" ) { |
|
265
|
|
|
// Note: Java will allow this and continue to read the |
|
266
|
|
|
// EOCDR64, so we have to reject the upload, we can't |
|
267
|
|
|
// just use the EOCDR header instead. |
|
268
|
|
|
$this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); |
|
269
|
|
|
} |
|
270
|
|
|
} |
|
271
|
|
|
|
|
272
|
|
|
/** |
|
273
|
|
|
* Read the header called the "ZIP64 end of central directory record". It |
|
274
|
|
|
* may replace the regular "end of central directory record" in ZIP64 files. |
|
275
|
|
|
*/ |
|
276
|
|
|
function readZip64EndOfCentralDirectoryRecord() { |
|
277
|
|
|
if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 |
|
278
|
|
|
|| $this->eocdr64Locator['number of disks'] != 0 |
|
279
|
|
|
) { |
|
280
|
|
|
$this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); |
|
281
|
|
|
} |
|
282
|
|
|
|
|
283
|
|
|
$info = [ |
|
284
|
|
|
'signature' => [ 'string', 4 ], |
|
285
|
|
|
'EOCDR64 size' => 8, |
|
286
|
|
|
'version made by' => 2, |
|
287
|
|
|
'version needed' => 2, |
|
288
|
|
|
'disk' => 4, |
|
289
|
|
|
'CD start disk' => 4, |
|
290
|
|
|
'CD entries this disk' => 8, |
|
291
|
|
|
'CD entries total' => 8, |
|
292
|
|
|
'CD size' => 8, |
|
293
|
|
|
'CD offset' => 8 |
|
294
|
|
|
]; |
|
295
|
|
|
$structSize = $this->getStructSize( $info ); |
|
296
|
|
|
$block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); |
|
297
|
|
|
$this->eocdr64 = $data = $this->unpack( $block, $info ); |
|
298
|
|
|
if ( $data['signature'] !== "PK\x06\x06" ) { |
|
299
|
|
|
$this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); |
|
300
|
|
|
} |
|
301
|
|
|
if ( $data['disk'] !== 0 |
|
302
|
|
|
|| $data['CD start disk'] !== 0 |
|
303
|
|
|
) { |
|
304
|
|
|
$this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); |
|
305
|
|
|
} |
|
306
|
|
|
} |
|
307
|
|
|
|
|
308
|
|
|
/** |
|
309
|
|
|
* Find the location of the central directory, as would be seen by a |
|
310
|
|
|
* non-ZIP64 reader. |
|
311
|
|
|
* |
|
312
|
|
|
* @return array List containing offset, size and end position. |
|
313
|
|
|
*/ |
|
314
|
|
|
function findOldCentralDirectory() { |
|
315
|
|
|
$size = $this->eocdr['CD size']; |
|
316
|
|
|
$offset = $this->eocdr['CD offset']; |
|
317
|
|
|
$endPos = $this->eocdr['position']; |
|
318
|
|
|
|
|
319
|
|
|
// Some readers use the EOCDR position instead of the offset field |
|
320
|
|
|
// to find the directory, so to be safe, we check if they both agree. |
|
321
|
|
|
if ( $offset + $size != $endPos ) { |
|
322
|
|
|
$this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
|
323
|
|
|
'of central directory record' ); |
|
324
|
|
|
} |
|
325
|
|
|
|
|
326
|
|
|
return [ $offset, $size ]; |
|
327
|
|
|
} |
|
328
|
|
|
|
|
329
|
|
|
/** |
|
330
|
|
|
* Find the location of the central directory, as would be seen by a |
|
331
|
|
|
* ZIP64-compliant reader. |
|
332
|
|
|
* |
|
333
|
|
|
* @return array List containing offset, size and end position. |
|
334
|
|
|
*/ |
|
335
|
|
|
function findZip64CentralDirectory() { |
|
336
|
|
|
// The spec is ambiguous about the exact rules of precedence between the |
|
337
|
|
|
// ZIP64 headers and the original headers. Here we follow zip_util.c |
|
338
|
|
|
// from OpenJDK 7. |
|
339
|
|
|
$size = $this->eocdr['CD size']; |
|
340
|
|
|
$offset = $this->eocdr['CD offset']; |
|
341
|
|
|
$numEntries = $this->eocdr['CD entries total']; |
|
342
|
|
|
$endPos = $this->eocdr['position']; |
|
343
|
|
|
if ( $size == 0xffffffff |
|
344
|
|
|
|| $offset == 0xffffffff |
|
345
|
|
|
|| $numEntries == 0xffff |
|
346
|
|
|
) { |
|
347
|
|
|
$this->readZip64EndOfCentralDirectoryLocator(); |
|
348
|
|
|
|
|
349
|
|
|
if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { |
|
350
|
|
|
$this->readZip64EndOfCentralDirectoryRecord(); |
|
351
|
|
|
if ( isset( $this->eocdr64['CD offset'] ) ) { |
|
352
|
|
|
$size = $this->eocdr64['CD size']; |
|
353
|
|
|
$offset = $this->eocdr64['CD offset']; |
|
354
|
|
|
$endPos = $this->eocdr64Locator['eocdr64 offset']; |
|
355
|
|
|
} |
|
356
|
|
|
} |
|
357
|
|
|
} |
|
358
|
|
|
// Some readers use the EOCDR position instead of the offset field |
|
359
|
|
|
// to find the directory, so to be safe, we check if they both agree. |
|
360
|
|
|
if ( $offset + $size != $endPos ) { |
|
361
|
|
|
$this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
|
362
|
|
|
'of central directory record' ); |
|
363
|
|
|
} |
|
364
|
|
|
|
|
365
|
|
|
return [ $offset, $size ]; |
|
366
|
|
|
} |
|
367
|
|
|
|
|
368
|
|
|
/** |
|
369
|
|
|
* Read the central directory at the given location |
|
370
|
|
|
* @param int $offset |
|
371
|
|
|
* @param int $size |
|
372
|
|
|
*/ |
|
373
|
|
|
function readCentralDirectory( $offset, $size ) { |
|
374
|
|
|
$block = $this->getBlock( $offset, $size ); |
|
375
|
|
|
|
|
376
|
|
|
$fixedInfo = [ |
|
377
|
|
|
'signature' => [ 'string', 4 ], |
|
378
|
|
|
'version made by' => 2, |
|
379
|
|
|
'version needed' => 2, |
|
380
|
|
|
'general bits' => 2, |
|
381
|
|
|
'compression method' => 2, |
|
382
|
|
|
'mod time' => 2, |
|
383
|
|
|
'mod date' => 2, |
|
384
|
|
|
'crc-32' => 4, |
|
385
|
|
|
'compressed size' => 4, |
|
386
|
|
|
'uncompressed size' => 4, |
|
387
|
|
|
'name length' => 2, |
|
388
|
|
|
'extra field length' => 2, |
|
389
|
|
|
'comment length' => 2, |
|
390
|
|
|
'disk number start' => 2, |
|
391
|
|
|
'internal attrs' => 2, |
|
392
|
|
|
'external attrs' => 4, |
|
393
|
|
|
'local header offset' => 4, |
|
394
|
|
|
]; |
|
395
|
|
|
$fixedSize = $this->getStructSize( $fixedInfo ); |
|
396
|
|
|
|
|
397
|
|
|
$pos = 0; |
|
398
|
|
|
while ( $pos < $size ) { |
|
399
|
|
|
$data = $this->unpack( $block, $fixedInfo, $pos ); |
|
400
|
|
|
$pos += $fixedSize; |
|
401
|
|
|
|
|
402
|
|
|
if ( $data['signature'] !== "PK\x01\x02" ) { |
|
403
|
|
|
$this->error( 'zip-bad', 'Invalid signature found in directory entry' ); |
|
404
|
|
|
} |
|
405
|
|
|
|
|
406
|
|
|
$variableInfo = [ |
|
407
|
|
|
'name' => [ 'string', $data['name length'] ], |
|
408
|
|
|
'extra field' => [ 'string', $data['extra field length'] ], |
|
409
|
|
|
'comment' => [ 'string', $data['comment length'] ], |
|
410
|
|
|
]; |
|
411
|
|
|
$data += $this->unpack( $block, $variableInfo, $pos ); |
|
412
|
|
|
$pos += $this->getStructSize( $variableInfo ); |
|
413
|
|
|
|
|
414
|
|
|
if ( $this->zip64 && ( |
|
415
|
|
|
$data['compressed size'] == 0xffffffff |
|
416
|
|
|
|| $data['uncompressed size'] == 0xffffffff |
|
417
|
|
|
|| $data['local header offset'] == 0xffffffff ) |
|
418
|
|
|
) { |
|
419
|
|
|
$zip64Data = $this->unpackZip64Extra( $data['extra field'] ); |
|
420
|
|
|
if ( $zip64Data ) { |
|
421
|
|
|
$data = $zip64Data + $data; |
|
422
|
|
|
} |
|
423
|
|
|
} |
|
424
|
|
|
|
|
425
|
|
|
if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { |
|
426
|
|
|
$this->error( 'zip-unsupported', 'central directory encryption is not supported' ); |
|
427
|
|
|
} |
|
428
|
|
|
|
|
429
|
|
|
// Convert the timestamp into MediaWiki format |
|
430
|
|
|
// For the format, please see the MS-DOS 2.0 Programmer's Reference, |
|
431
|
|
|
// pages 3-5 and 3-6. |
|
432
|
|
|
$time = $data['mod time']; |
|
433
|
|
|
$date = $data['mod date']; |
|
434
|
|
|
|
|
435
|
|
|
$year = 1980 + ( $date >> 9 ); |
|
436
|
|
|
$month = ( $date >> 5 ) & 15; |
|
437
|
|
|
$day = $date & 31; |
|
438
|
|
|
$hour = ( $time >> 11 ) & 31; |
|
439
|
|
|
$minute = ( $time >> 5 ) & 63; |
|
440
|
|
|
$second = ( $time & 31 ) * 2; |
|
441
|
|
|
$timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", |
|
442
|
|
|
$year, $month, $day, $hour, $minute, $second ); |
|
443
|
|
|
|
|
444
|
|
|
// Convert the character set in the file name |
|
445
|
|
|
if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) { |
|
446
|
|
|
$name = $data['name']; |
|
447
|
|
|
} else { |
|
448
|
|
|
$name = iconv( 'CP437', 'UTF-8', $data['name'] ); |
|
449
|
|
|
} |
|
450
|
|
|
|
|
451
|
|
|
// Compile a data array for the user, with a sensible format |
|
452
|
|
|
$userData = [ |
|
453
|
|
|
'name' => $name, |
|
454
|
|
|
'mtime' => $timestamp, |
|
455
|
|
|
'size' => $data['uncompressed size'], |
|
456
|
|
|
]; |
|
457
|
|
|
call_user_func( $this->callback, $userData ); |
|
458
|
|
|
} |
|
459
|
|
|
} |
|
460
|
|
|
|
|
461
|
|
|
/** |
|
462
|
|
|
* Interpret ZIP64 "extra field" data and return an associative array. |
|
463
|
|
|
* @param string $extraField |
|
464
|
|
|
* @return array|bool |
|
465
|
|
|
*/ |
|
466
|
|
|
function unpackZip64Extra( $extraField ) { |
|
467
|
|
|
$extraHeaderInfo = [ |
|
468
|
|
|
'id' => 2, |
|
469
|
|
|
'size' => 2, |
|
470
|
|
|
]; |
|
471
|
|
|
$extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); |
|
472
|
|
|
|
|
473
|
|
|
$zip64ExtraInfo = [ |
|
474
|
|
|
'uncompressed size' => 8, |
|
475
|
|
|
'compressed size' => 8, |
|
476
|
|
|
'local header offset' => 8, |
|
477
|
|
|
'disk number start' => 4, |
|
478
|
|
|
]; |
|
479
|
|
|
|
|
480
|
|
|
$extraPos = 0; |
|
481
|
|
|
while ( $extraPos < strlen( $extraField ) ) { |
|
482
|
|
|
$extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); |
|
483
|
|
|
$extraPos += $extraHeaderSize; |
|
484
|
|
|
$extra += $this->unpack( $extraField, |
|
485
|
|
|
[ 'data' => [ 'string', $extra['size'] ] ], |
|
486
|
|
|
$extraPos ); |
|
487
|
|
|
$extraPos += $extra['size']; |
|
488
|
|
|
|
|
489
|
|
|
if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { |
|
490
|
|
|
return $this->unpack( $extra['data'], $zip64ExtraInfo ); |
|
491
|
|
|
} |
|
492
|
|
|
} |
|
493
|
|
|
|
|
494
|
|
|
return false; |
|
495
|
|
|
} |
|
496
|
|
|
|
|
497
|
|
|
/** |
|
498
|
|
|
* Get the length of the file. |
|
499
|
|
|
* @return int |
|
500
|
|
|
*/ |
|
501
|
|
|
function getFileLength() { |
|
502
|
|
|
if ( $this->fileLength === null ) { |
|
503
|
|
|
$stat = fstat( $this->file ); |
|
504
|
|
|
$this->fileLength = $stat['size']; |
|
505
|
|
|
} |
|
506
|
|
|
|
|
507
|
|
|
return $this->fileLength; |
|
508
|
|
|
} |
|
509
|
|
|
|
|
510
|
|
|
/** |
|
511
|
|
|
* Get the file contents from a given offset. If there are not enough bytes |
|
512
|
|
|
* in the file to satisfy the request, an exception will be thrown. |
|
513
|
|
|
* |
|
514
|
|
|
* @param int $start The byte offset of the start of the block. |
|
515
|
|
|
* @param int $length The number of bytes to return. If omitted, the remainder |
|
516
|
|
|
* of the file will be returned. |
|
517
|
|
|
* |
|
518
|
|
|
* @return string |
|
519
|
|
|
*/ |
|
520
|
|
|
function getBlock( $start, $length = null ) { |
|
521
|
|
|
$fileLength = $this->getFileLength(); |
|
522
|
|
|
if ( $start >= $fileLength ) { |
|
523
|
|
|
$this->error( 'zip-bad', "getBlock() requested position $start, " . |
|
524
|
|
|
"file length is $fileLength" ); |
|
525
|
|
|
} |
|
526
|
|
|
if ( $length === null ) { |
|
527
|
|
|
$length = $fileLength - $start; |
|
528
|
|
|
} |
|
529
|
|
|
$end = $start + $length; |
|
530
|
|
|
if ( $end > $fileLength ) { |
|
531
|
|
|
$this->error( 'zip-bad', "getBlock() requested end position $end, " . |
|
532
|
|
|
"file length is $fileLength" ); |
|
533
|
|
|
} |
|
534
|
|
|
$startSeg = floor( $start / self::SEGSIZE ); |
|
535
|
|
|
$endSeg = ceil( $end / self::SEGSIZE ); |
|
536
|
|
|
|
|
537
|
|
|
$block = ''; |
|
538
|
|
|
for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { |
|
539
|
|
|
$block .= $this->getSegment( $segIndex ); |
|
540
|
|
|
} |
|
541
|
|
|
|
|
542
|
|
|
$block = substr( $block, |
|
543
|
|
|
$start - $startSeg * self::SEGSIZE, |
|
544
|
|
|
$length ); |
|
545
|
|
|
|
|
546
|
|
|
if ( strlen( $block ) < $length ) { |
|
547
|
|
|
$this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); |
|
548
|
|
|
} |
|
549
|
|
|
|
|
550
|
|
|
return $block; |
|
551
|
|
|
} |
|
552
|
|
|
|
|
553
|
|
|
/** |
|
554
|
|
|
* Get a section of the file starting at position $segIndex * self::SEGSIZE, |
|
555
|
|
|
* of length self::SEGSIZE. The result is cached. This is a helper function |
|
556
|
|
|
* for getBlock(). |
|
557
|
|
|
* |
|
558
|
|
|
* If there are not enough bytes in the file to satisfy the request, the |
|
559
|
|
|
* return value will be truncated. If a request is made for a segment beyond |
|
560
|
|
|
* the end of the file, an empty string will be returned. |
|
561
|
|
|
* |
|
562
|
|
|
* @param int $segIndex |
|
563
|
|
|
* |
|
564
|
|
|
* @return string |
|
565
|
|
|
*/ |
|
566
|
|
|
function getSegment( $segIndex ) { |
|
567
|
|
|
if ( !isset( $this->buffer[$segIndex] ) ) { |
|
568
|
|
|
$bytePos = $segIndex * self::SEGSIZE; |
|
569
|
|
|
if ( $bytePos >= $this->getFileLength() ) { |
|
570
|
|
|
$this->buffer[$segIndex] = ''; |
|
571
|
|
|
|
|
572
|
|
|
return ''; |
|
573
|
|
|
} |
|
574
|
|
|
if ( fseek( $this->file, $bytePos ) ) { |
|
575
|
|
|
$this->error( 'zip-bad', "seek to $bytePos failed" ); |
|
576
|
|
|
} |
|
577
|
|
|
$seg = fread( $this->file, self::SEGSIZE ); |
|
578
|
|
|
if ( $seg === false ) { |
|
579
|
|
|
$this->error( 'zip-bad', "read from $bytePos failed" ); |
|
580
|
|
|
} |
|
581
|
|
|
$this->buffer[$segIndex] = $seg; |
|
582
|
|
|
} |
|
583
|
|
|
|
|
584
|
|
|
return $this->buffer[$segIndex]; |
|
585
|
|
|
} |
|
586
|
|
|
|
|
587
|
|
|
/** |
|
588
|
|
|
* Get the size of a structure in bytes. See unpack() for the format of $struct. |
|
589
|
|
|
* @param array $struct |
|
590
|
|
|
* @return int |
|
591
|
|
|
*/ |
|
592
|
|
|
function getStructSize( $struct ) { |
|
593
|
|
|
$size = 0; |
|
594
|
|
|
foreach ( $struct as $type ) { |
|
595
|
|
|
if ( is_array( $type ) ) { |
|
596
|
|
|
list( , $fieldSize ) = $type; |
|
597
|
|
|
$size += $fieldSize; |
|
598
|
|
|
} else { |
|
599
|
|
|
$size += $type; |
|
600
|
|
|
} |
|
601
|
|
|
} |
|
602
|
|
|
|
|
603
|
|
|
return $size; |
|
604
|
|
|
} |
|
605
|
|
|
|
|
606
|
|
|
/** |
|
607
|
|
|
* Unpack a binary structure. This is like the built-in unpack() function |
|
608
|
|
|
* except nicer. |
|
609
|
|
|
* |
|
610
|
|
|
* @param string $string The binary data input |
|
611
|
|
|
* |
|
612
|
|
|
* @param array $struct An associative array giving structure members and their |
|
613
|
|
|
* types. In the key is the field name. The value may be either an |
|
614
|
|
|
* integer, in which case the field is a little-endian unsigned integer |
|
615
|
|
|
* encoded in the given number of bytes, or an array, in which case the |
|
616
|
|
|
* first element of the array is the type name, and the subsequent |
|
617
|
|
|
* elements are type-dependent parameters. Only one such type is defined: |
|
618
|
|
|
* - "string": The second array element gives the length of string. |
|
619
|
|
|
* Not null terminated. |
|
620
|
|
|
* |
|
621
|
|
|
* @param int $offset The offset into the string at which to start unpacking. |
|
622
|
|
|
* |
|
623
|
|
|
* @throws MWException |
|
624
|
|
|
* @return array Unpacked associative array. Note that large integers in the input |
|
625
|
|
|
* may be represented as floating point numbers in the return value, so |
|
626
|
|
|
* the use of weak comparison is advised. |
|
627
|
|
|
*/ |
|
628
|
|
|
function unpack( $string, $struct, $offset = 0 ) { |
|
629
|
|
|
$size = $this->getStructSize( $struct ); |
|
630
|
|
|
if ( $offset + $size > strlen( $string ) ) { |
|
631
|
|
|
$this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); |
|
632
|
|
|
} |
|
633
|
|
|
|
|
634
|
|
|
$data = []; |
|
635
|
|
|
$pos = $offset; |
|
636
|
|
|
foreach ( $struct as $key => $type ) { |
|
637
|
|
|
if ( is_array( $type ) ) { |
|
638
|
|
|
list( $typeName, $fieldSize ) = $type; |
|
639
|
|
|
switch ( $typeName ) { |
|
640
|
|
|
case 'string': |
|
641
|
|
|
$data[$key] = substr( $string, $pos, $fieldSize ); |
|
642
|
|
|
$pos += $fieldSize; |
|
643
|
|
|
break; |
|
644
|
|
|
default: |
|
645
|
|
|
throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" ); |
|
646
|
|
|
} |
|
647
|
|
|
} else { |
|
648
|
|
|
// Unsigned little-endian integer |
|
649
|
|
|
$length = intval( $type ); |
|
650
|
|
|
|
|
651
|
|
|
// Calculate the value. Use an algorithm which automatically |
|
652
|
|
|
// upgrades the value to floating point if necessary. |
|
653
|
|
|
$value = 0; |
|
654
|
|
|
for ( $i = $length - 1; $i >= 0; $i-- ) { |
|
655
|
|
|
$value *= 256; |
|
656
|
|
|
$value += ord( $string[$pos + $i] ); |
|
657
|
|
|
} |
|
658
|
|
|
|
|
659
|
|
|
// Throw an exception if there was loss of precision |
|
660
|
|
|
if ( $value > pow( 2, 52 ) ) { |
|
661
|
|
|
$this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . |
|
662
|
|
|
'This could happen if we tried to unpack a 64-bit structure ' . |
|
663
|
|
|
'at an invalid location.' ); |
|
664
|
|
|
} |
|
665
|
|
|
$data[$key] = $value; |
|
666
|
|
|
$pos += $length; |
|
667
|
|
|
} |
|
668
|
|
|
} |
|
669
|
|
|
|
|
670
|
|
|
return $data; |
|
671
|
|
|
} |
|
672
|
|
|
|
|
673
|
|
|
/** |
|
674
|
|
|
* Returns a bit from a given position in an integer value, converted to |
|
675
|
|
|
* boolean. |
|
676
|
|
|
* |
|
677
|
|
|
* @param int $value |
|
678
|
|
|
* @param int $bitIndex The index of the bit, where 0 is the LSB. |
|
679
|
|
|
* @return bool |
|
680
|
|
|
*/ |
|
681
|
|
|
function testBit( $value, $bitIndex ) { |
|
682
|
|
|
return (bool)( ( $value >> $bitIndex ) & 1 ); |
|
683
|
|
|
} |
|
684
|
|
|
|
|
685
|
|
|
/** |
|
686
|
|
|
* Debugging helper function which dumps a string in hexdump -C format. |
|
687
|
|
|
* @param string $s |
|
688
|
|
|
*/ |
|
689
|
|
|
function hexDump( $s ) { |
|
690
|
|
|
$n = strlen( $s ); |
|
691
|
|
|
for ( $i = 0; $i < $n; $i += 16 ) { |
|
692
|
|
|
printf( "%08X ", $i ); |
|
693
|
|
|
for ( $j = 0; $j < 16; $j++ ) { |
|
694
|
|
|
print " "; |
|
695
|
|
|
if ( $j == 8 ) { |
|
696
|
|
|
print " "; |
|
697
|
|
|
} |
|
698
|
|
|
if ( $i + $j >= $n ) { |
|
699
|
|
|
print " "; |
|
700
|
|
|
} else { |
|
701
|
|
|
printf( "%02X", ord( $s[$i + $j] ) ); |
|
702
|
|
|
} |
|
703
|
|
|
} |
|
704
|
|
|
|
|
705
|
|
|
print " |"; |
|
706
|
|
|
for ( $j = 0; $j < 16; $j++ ) { |
|
707
|
|
|
if ( $i + $j >= $n ) { |
|
708
|
|
|
print " "; |
|
709
|
|
|
} elseif ( ctype_print( $s[$i + $j] ) ) { |
|
710
|
|
|
print $s[$i + $j]; |
|
711
|
|
|
} else { |
|
712
|
|
|
print '.'; |
|
713
|
|
|
} |
|
714
|
|
|
} |
|
715
|
|
|
print "|\n"; |
|
716
|
|
|
} |
|
717
|
|
|
} |
|
718
|
|
|
} |
|
719
|
|
|
|
|
720
|
|
|
/** |
|
721
|
|
|
* Internal exception class. Will be caught by private code. |
|
722
|
|
|
*/ |
|
723
|
|
|
class ZipDirectoryReaderError extends Exception { |
|
724
|
|
|
protected $errorCode; |
|
725
|
|
|
|
|
726
|
|
|
function __construct( $code ) { |
|
727
|
|
|
$this->errorCode = $code; |
|
728
|
|
|
parent::__construct( "ZipDirectoryReader error: $code" ); |
|
729
|
|
|
} |
|
730
|
|
|
|
|
731
|
|
|
/** |
|
732
|
|
|
* @return mixed |
|
733
|
|
|
*/ |
|
734
|
|
|
function getErrorCode() { |
|
735
|
|
|
return $this->errorCode; |
|
736
|
|
|
} |
|
737
|
|
|
} |
|
738
|
|
|
|
Only declaring a single property per statement allows you to later on add doc comments more easily.
It is also recommended by PSR2, so it is a common style that many people expect.