1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException; |
6
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\CodePage; |
7
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\File; |
8
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\OLERead; |
9
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\StringHelper; |
10
|
|
|
use PhpOffice\PhpSpreadsheet\Style\Border; |
11
|
|
|
|
12
|
|
|
class XlsBase extends BaseReader |
13
|
|
|
{ |
14
|
|
|
final protected const HIGH_ORDER_BIT = 0x80 << 24; |
15
|
|
|
final protected const FC000000 = 0xFC << 24; |
16
|
|
|
final protected const FE000000 = 0xFE << 24; |
17
|
|
|
|
18
|
|
|
// ParseXL definitions |
19
|
|
|
final const XLS_BIFF8 = 0x0600; |
20
|
|
|
final const XLS_BIFF7 = 0x0500; |
21
|
|
|
final const XLS_WORKBOOKGLOBALS = 0x0005; |
22
|
|
|
final const XLS_WORKSHEET = 0x0010; |
23
|
|
|
|
24
|
|
|
// record identifiers |
25
|
|
|
final const XLS_TYPE_FORMULA = 0x0006; |
26
|
|
|
final const XLS_TYPE_EOF = 0x000A; |
27
|
|
|
final const XLS_TYPE_PROTECT = 0x0012; |
28
|
|
|
final const XLS_TYPE_OBJECTPROTECT = 0x0063; |
29
|
|
|
final const XLS_TYPE_SCENPROTECT = 0x00DD; |
30
|
|
|
final const XLS_TYPE_PASSWORD = 0x0013; |
31
|
|
|
final const XLS_TYPE_HEADER = 0x0014; |
32
|
|
|
final const XLS_TYPE_FOOTER = 0x0015; |
33
|
|
|
final const XLS_TYPE_EXTERNSHEET = 0x0017; |
34
|
|
|
final const XLS_TYPE_DEFINEDNAME = 0x0018; |
35
|
|
|
final const XLS_TYPE_VERTICALPAGEBREAKS = 0x001A; |
36
|
|
|
final const XLS_TYPE_HORIZONTALPAGEBREAKS = 0x001B; |
37
|
|
|
final const XLS_TYPE_NOTE = 0x001C; |
38
|
|
|
final const XLS_TYPE_SELECTION = 0x001D; |
39
|
|
|
final const XLS_TYPE_DATEMODE = 0x0022; |
40
|
|
|
final const XLS_TYPE_EXTERNNAME = 0x0023; |
41
|
|
|
final const XLS_TYPE_LEFTMARGIN = 0x0026; |
42
|
|
|
final const XLS_TYPE_RIGHTMARGIN = 0x0027; |
43
|
|
|
final const XLS_TYPE_TOPMARGIN = 0x0028; |
44
|
|
|
final const XLS_TYPE_BOTTOMMARGIN = 0x0029; |
45
|
|
|
final const XLS_TYPE_PRINTGRIDLINES = 0x002B; |
46
|
|
|
final const XLS_TYPE_FILEPASS = 0x002F; |
47
|
|
|
final const XLS_TYPE_FONT = 0x0031; |
48
|
|
|
final const XLS_TYPE_CONTINUE = 0x003C; |
49
|
|
|
final const XLS_TYPE_PANE = 0x0041; |
50
|
|
|
final const XLS_TYPE_CODEPAGE = 0x0042; |
51
|
|
|
final const XLS_TYPE_DEFCOLWIDTH = 0x0055; |
52
|
|
|
final const XLS_TYPE_OBJ = 0x005D; |
53
|
|
|
final const XLS_TYPE_COLINFO = 0x007D; |
54
|
|
|
final const XLS_TYPE_IMDATA = 0x007F; |
55
|
|
|
final const XLS_TYPE_SHEETPR = 0x0081; |
56
|
|
|
final const XLS_TYPE_HCENTER = 0x0083; |
57
|
|
|
final const XLS_TYPE_VCENTER = 0x0084; |
58
|
|
|
final const XLS_TYPE_SHEET = 0x0085; |
59
|
|
|
final const XLS_TYPE_PALETTE = 0x0092; |
60
|
|
|
final const XLS_TYPE_SCL = 0x00A0; |
61
|
|
|
final const XLS_TYPE_PAGESETUP = 0x00A1; |
62
|
|
|
final const XLS_TYPE_MULRK = 0x00BD; |
63
|
|
|
final const XLS_TYPE_MULBLANK = 0x00BE; |
64
|
|
|
final const XLS_TYPE_DBCELL = 0x00D7; |
65
|
|
|
final const XLS_TYPE_XF = 0x00E0; |
66
|
|
|
final const XLS_TYPE_MERGEDCELLS = 0x00E5; |
67
|
|
|
final const XLS_TYPE_MSODRAWINGGROUP = 0x00EB; |
68
|
|
|
final const XLS_TYPE_MSODRAWING = 0x00EC; |
69
|
|
|
final const XLS_TYPE_SST = 0x00FC; |
70
|
|
|
final const XLS_TYPE_LABELSST = 0x00FD; |
71
|
|
|
final const XLS_TYPE_EXTSST = 0x00FF; |
72
|
|
|
final const XLS_TYPE_EXTERNALBOOK = 0x01AE; |
73
|
|
|
final const XLS_TYPE_DATAVALIDATIONS = 0x01B2; |
74
|
|
|
final const XLS_TYPE_TXO = 0x01B6; |
75
|
|
|
final const XLS_TYPE_HYPERLINK = 0x01B8; |
76
|
|
|
final const XLS_TYPE_DATAVALIDATION = 0x01BE; |
77
|
|
|
final const XLS_TYPE_DIMENSION = 0x0200; |
78
|
|
|
final const XLS_TYPE_BLANK = 0x0201; |
79
|
|
|
final const XLS_TYPE_NUMBER = 0x0203; |
80
|
|
|
final const XLS_TYPE_LABEL = 0x0204; |
81
|
|
|
final const XLS_TYPE_BOOLERR = 0x0205; |
82
|
|
|
final const XLS_TYPE_STRING = 0x0207; |
83
|
|
|
final const XLS_TYPE_ROW = 0x0208; |
84
|
|
|
final const XLS_TYPE_INDEX = 0x020B; |
85
|
|
|
final const XLS_TYPE_ARRAY = 0x0221; |
86
|
|
|
final const XLS_TYPE_DEFAULTROWHEIGHT = 0x0225; |
87
|
|
|
final const XLS_TYPE_WINDOW2 = 0x023E; |
88
|
|
|
final const XLS_TYPE_RK = 0x027E; |
89
|
|
|
final const XLS_TYPE_STYLE = 0x0293; |
90
|
|
|
final const XLS_TYPE_FORMAT = 0x041E; |
91
|
|
|
final const XLS_TYPE_SHAREDFMLA = 0x04BC; |
92
|
|
|
final const XLS_TYPE_BOF = 0x0809; |
93
|
|
|
final const XLS_TYPE_SHEETPROTECTION = 0x0867; |
94
|
|
|
final const XLS_TYPE_RANGEPROTECTION = 0x0868; |
95
|
|
|
final const XLS_TYPE_SHEETLAYOUT = 0x0862; |
96
|
|
|
final const XLS_TYPE_XFEXT = 0x087D; |
97
|
|
|
final const XLS_TYPE_PAGELAYOUTVIEW = 0x088B; |
98
|
|
|
final const XLS_TYPE_CFHEADER = 0x01B0; |
99
|
|
|
final const XLS_TYPE_CFRULE = 0x01B1; |
100
|
|
|
final const XLS_TYPE_UNKNOWN = 0xFFFF; |
101
|
|
|
|
102
|
|
|
// Encryption type |
103
|
|
|
final const MS_BIFF_CRYPTO_NONE = 0; |
104
|
|
|
final const MS_BIFF_CRYPTO_XOR = 1; |
105
|
|
|
final const MS_BIFF_CRYPTO_RC4 = 2; |
106
|
|
|
|
107
|
|
|
// Size of stream blocks when using RC4 encryption |
108
|
|
|
final const REKEY_BLOCK = 0x400; |
109
|
|
|
|
110
|
|
|
// should be consistent with Writer\Xls\Style\CellBorder |
111
|
|
|
final const BORDER_STYLE_MAP = [ |
112
|
|
|
Border::BORDER_NONE, // => 0x00, |
113
|
|
|
Border::BORDER_THIN, // => 0x01, |
114
|
|
|
Border::BORDER_MEDIUM, // => 0x02, |
115
|
|
|
Border::BORDER_DASHED, // => 0x03, |
116
|
|
|
Border::BORDER_DOTTED, // => 0x04, |
117
|
|
|
Border::BORDER_THICK, // => 0x05, |
118
|
|
|
Border::BORDER_DOUBLE, // => 0x06, |
119
|
|
|
Border::BORDER_HAIR, // => 0x07, |
120
|
|
|
Border::BORDER_MEDIUMDASHED, // => 0x08, |
121
|
|
|
Border::BORDER_DASHDOT, // => 0x09, |
122
|
|
|
Border::BORDER_MEDIUMDASHDOT, // => 0x0A, |
123
|
|
|
Border::BORDER_DASHDOTDOT, // => 0x0B, |
124
|
|
|
Border::BORDER_MEDIUMDASHDOTDOT, // => 0x0C, |
125
|
|
|
Border::BORDER_SLANTDASHDOT, // => 0x0D, |
126
|
|
|
Border::BORDER_OMIT, // => 0x0E, |
127
|
|
|
Border::BORDER_OMIT, // => 0x0F, |
128
|
|
|
]; |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95) |
132
|
|
|
* For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'. |
133
|
|
|
*/ |
134
|
|
|
protected string $codepage = ''; |
135
|
|
|
|
136
|
|
|
public function setCodepage(string $codepage): void |
137
|
|
|
{ |
138
|
|
|
if (CodePage::validate($codepage) === false) { |
139
|
|
|
throw new PhpSpreadsheetException('Unknown codepage: ' . $codepage); |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
$this->codepage = $codepage; |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
public function getCodepage(): string |
146
|
|
|
{ |
147
|
|
|
return $this->codepage; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Can the current IReader read the file? |
152
|
|
|
*/ |
153
|
|
|
public function canRead(string $filename): bool |
154
|
|
|
{ |
155
|
|
|
if (File::testFileNoThrow($filename) === false) { |
156
|
|
|
return false; |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
try { |
160
|
|
|
// Use ParseXL for the hard work. |
161
|
|
|
$ole = new OLERead(); |
162
|
|
|
|
163
|
|
|
// get excel data |
164
|
|
|
$ole->read($filename); |
165
|
|
|
if ($ole->wrkbook === null) { |
166
|
|
|
throw new Exception('The filename ' . $filename . ' is not recognised as a Spreadsheet file'); |
167
|
|
|
} |
168
|
|
|
|
169
|
|
|
return true; |
170
|
|
|
} catch (PhpSpreadsheetException) { |
171
|
|
|
return false; |
172
|
|
|
} |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* Extract RGB color |
177
|
|
|
* OpenOffice.org's Documentation of the Microsoft Excel File Format, section 2.5.4. |
178
|
|
|
* |
179
|
|
|
* @param string $rgb Encoded RGB value (4 bytes) |
180
|
|
|
*/ |
181
|
|
|
protected static function readRGB(string $rgb): array |
182
|
|
|
{ |
183
|
|
|
// offset: 0; size 1; Red component |
184
|
|
|
$r = ord($rgb[0]); |
185
|
|
|
|
186
|
|
|
// offset: 1; size: 1; Green component |
187
|
|
|
$g = ord($rgb[1]); |
188
|
|
|
|
189
|
|
|
// offset: 2; size: 1; Blue component |
190
|
|
|
$b = ord($rgb[2]); |
191
|
|
|
|
192
|
|
|
// HEX notation, e.g. 'FF00FC' |
193
|
|
|
$rgb = sprintf('%02X%02X%02X', $r, $g, $b); |
194
|
|
|
|
195
|
|
|
return ['rgb' => $rgb]; |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* Extracts an Excel Unicode short string (8-bit string length) |
200
|
|
|
* OpenOffice documentation: 2.5.3 |
201
|
|
|
* function will automatically find out where the Unicode string ends. |
202
|
|
|
*/ |
203
|
|
|
protected static function readUnicodeStringShort(string $subData): array |
204
|
|
|
{ |
205
|
|
|
// offset: 0: size: 1; length of the string (character count) |
206
|
|
|
$characterCount = ord($subData[0]); |
207
|
|
|
|
208
|
|
|
$string = self::readUnicodeString(substr($subData, 1), $characterCount); |
209
|
|
|
|
210
|
|
|
// add 1 for the string length |
211
|
|
|
++$string['size']; |
212
|
|
|
|
213
|
|
|
return $string; |
214
|
|
|
} |
215
|
|
|
|
216
|
|
|
/** |
217
|
|
|
* Extracts an Excel Unicode long string (16-bit string length) |
218
|
|
|
* OpenOffice documentation: 2.5.3 |
219
|
|
|
* this function is under construction, needs to support rich text, and Asian phonetic settings. |
220
|
|
|
*/ |
221
|
|
|
protected static function readUnicodeStringLong(string $subData): array |
222
|
|
|
{ |
223
|
|
|
// offset: 0: size: 2; length of the string (character count) |
224
|
|
|
$characterCount = self::getUInt2d($subData, 0); |
225
|
|
|
|
226
|
|
|
$string = self::readUnicodeString(substr($subData, 2), $characterCount); |
227
|
|
|
|
228
|
|
|
// add 2 for the string length |
229
|
|
|
$string['size'] += 2; |
230
|
|
|
|
231
|
|
|
return $string; |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
/** |
235
|
|
|
* Read Unicode string with no string length field, but with known character count |
236
|
|
|
* this function is under construction, needs to support rich text, and Asian phonetic settings |
237
|
|
|
* OpenOffice.org's Documentation of the Microsoft Excel File Format, section 2.5.3. |
238
|
|
|
*/ |
239
|
|
|
protected static function readUnicodeString(string $subData, int $characterCount): array |
240
|
|
|
{ |
241
|
|
|
// offset: 0: size: 1; option flags |
242
|
|
|
// bit: 0; mask: 0x01; character compression (0 = compressed 8-bit, 1 = uncompressed 16-bit) |
243
|
|
|
$isCompressed = !((0x01 & ord($subData[0])) >> 0); |
244
|
|
|
|
245
|
|
|
// bit: 2; mask: 0x04; Asian phonetic settings |
246
|
|
|
//$hasAsian = (0x04) & ord($subData[0]) >> 2; |
247
|
|
|
|
248
|
|
|
// bit: 3; mask: 0x08; Rich-Text settings |
249
|
|
|
//$hasRichText = (0x08) & ord($subData[0]) >> 3; |
250
|
|
|
|
251
|
|
|
// offset: 1: size: var; character array |
252
|
|
|
// this offset assumes richtext and Asian phonetic settings are off which is generally wrong |
253
|
|
|
// needs to be fixed |
254
|
|
|
$value = self::encodeUTF16(substr($subData, 1, $isCompressed ? $characterCount : 2 * $characterCount), $isCompressed); |
255
|
|
|
|
256
|
|
|
return [ |
257
|
|
|
'value' => $value, |
258
|
|
|
'size' => $isCompressed ? 1 + $characterCount : 1 + 2 * $characterCount, // the size in bytes including the option flags |
259
|
|
|
]; |
260
|
|
|
} |
261
|
|
|
|
262
|
|
|
/** |
263
|
|
|
* Convert UTF-8 string to string surounded by double quotes. Used for explicit string tokens in formulas. |
264
|
|
|
* Example: hello"world --> "hello""world". |
265
|
|
|
* |
266
|
|
|
* @param string $value UTF-8 encoded string |
267
|
|
|
*/ |
268
|
|
|
protected static function UTF8toExcelDoubleQuoted(string $value): string |
269
|
|
|
{ |
270
|
|
|
return '"' . str_replace('"', '""', $value) . '"'; |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
/** |
274
|
|
|
* Reads first 8 bytes of a string and return IEEE 754 float. |
275
|
|
|
* |
276
|
|
|
* @param string $data Binary string that is at least 8 bytes long |
277
|
|
|
*/ |
278
|
|
|
protected static function extractNumber(string $data): int|float |
279
|
|
|
{ |
280
|
|
|
$rknumhigh = self::getInt4d($data, 4); |
281
|
|
|
$rknumlow = self::getInt4d($data, 0); |
282
|
|
|
$sign = ($rknumhigh & self::HIGH_ORDER_BIT) >> 31; |
283
|
|
|
$exp = (($rknumhigh & 0x7FF00000) >> 20) - 1023; |
284
|
|
|
$mantissa = (0x100000 | ($rknumhigh & 0x000FFFFF)); |
285
|
|
|
$mantissalow1 = ($rknumlow & self::HIGH_ORDER_BIT) >> 31; |
286
|
|
|
$mantissalow2 = ($rknumlow & 0x7FFFFFFF); |
287
|
|
|
$value = $mantissa / 2 ** (20 - $exp); |
288
|
|
|
|
289
|
|
|
if ($mantissalow1 != 0) { |
290
|
|
|
$value += 1 / 2 ** (21 - $exp); |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
if ($mantissalow2 != 0) { |
294
|
|
|
$value += $mantissalow2 / 2 ** (52 - $exp); |
295
|
|
|
} |
296
|
|
|
if ($sign) { |
297
|
|
|
$value *= -1; |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
return $value; |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
protected static function getIEEE754(int $rknum): float|int |
304
|
|
|
{ |
305
|
|
|
if (($rknum & 0x02) != 0) { |
306
|
|
|
$value = $rknum >> 2; |
307
|
|
|
} else { |
308
|
|
|
// changes by mmp, info on IEEE754 encoding from |
309
|
|
|
// research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html |
310
|
|
|
// The RK format calls for using only the most significant 30 bits |
311
|
|
|
// of the 64 bit floating point value. The other 34 bits are assumed |
312
|
|
|
// to be 0 so we use the upper 30 bits of $rknum as follows... |
313
|
|
|
$sign = ($rknum & self::HIGH_ORDER_BIT) >> 31; |
314
|
|
|
$exp = ($rknum & 0x7FF00000) >> 20; |
315
|
|
|
$mantissa = (0x100000 | ($rknum & 0x000FFFFC)); |
316
|
|
|
$value = $mantissa / 2 ** (20 - ($exp - 1023)); |
317
|
|
|
if ($sign) { |
318
|
|
|
$value = -1 * $value; |
319
|
|
|
} |
320
|
|
|
//end of changes by mmp |
321
|
|
|
} |
322
|
|
|
if (($rknum & 0x01) != 0) { |
323
|
|
|
$value /= 100; |
324
|
|
|
} |
325
|
|
|
|
326
|
|
|
return $value; |
327
|
|
|
} |
328
|
|
|
|
329
|
|
|
/** |
330
|
|
|
* Get UTF-8 string from (compressed or uncompressed) UTF-16 string. |
331
|
|
|
*/ |
332
|
|
|
protected static function encodeUTF16(string $string, bool $compressed = false): string |
333
|
|
|
{ |
334
|
|
|
if ($compressed) { |
335
|
|
|
$string = self::uncompressByteString($string); |
336
|
|
|
} |
337
|
|
|
|
338
|
|
|
return StringHelper::convertEncoding($string, 'UTF-8', 'UTF-16LE'); |
339
|
|
|
} |
340
|
|
|
|
341
|
|
|
/** |
342
|
|
|
* Convert UTF-16 string in compressed notation to uncompressed form. Only used for BIFF8. |
343
|
|
|
*/ |
344
|
|
|
protected static function uncompressByteString(string $string): string |
345
|
|
|
{ |
346
|
|
|
$uncompressedString = ''; |
347
|
|
|
$strLen = strlen($string); |
348
|
|
|
for ($i = 0; $i < $strLen; ++$i) { |
349
|
|
|
$uncompressedString .= $string[$i] . "\0"; |
350
|
|
|
} |
351
|
|
|
|
352
|
|
|
return $uncompressedString; |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
/** |
356
|
|
|
* Convert string to UTF-8. Only used for BIFF5. |
357
|
|
|
*/ |
358
|
|
|
protected function decodeCodepage(string $string): string |
359
|
|
|
{ |
360
|
|
|
return StringHelper::convertEncoding($string, 'UTF-8', $this->codepage); |
361
|
|
|
} |
362
|
|
|
|
363
|
|
|
/** |
364
|
|
|
* Read 16-bit unsigned integer. |
365
|
|
|
*/ |
366
|
|
|
public static function getUInt2d(string $data, int $pos): int |
367
|
|
|
{ |
368
|
|
|
return ord($data[$pos]) | (ord($data[$pos + 1]) << 8); |
369
|
|
|
} |
370
|
|
|
|
371
|
|
|
/** |
372
|
|
|
* Read 16-bit signed integer. |
373
|
|
|
*/ |
374
|
|
|
public static function getInt2d(string $data, int $pos): int |
375
|
|
|
{ |
376
|
|
|
return unpack('s', $data[$pos] . $data[$pos + 1])[1]; // @phpstan-ignore-line |
377
|
|
|
} |
378
|
|
|
|
379
|
|
|
/** |
380
|
|
|
* Read 32-bit signed integer. |
381
|
|
|
*/ |
382
|
|
|
public static function getInt4d(string $data, int $pos): int |
383
|
|
|
{ |
384
|
|
|
// FIX: represent numbers correctly on 64-bit system |
385
|
|
|
// http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334 |
386
|
|
|
// Changed by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems |
387
|
|
|
$_or_24 = ord($data[$pos + 3]); |
388
|
|
|
if ($_or_24 >= 128) { |
389
|
|
|
// negative number |
390
|
|
|
$_ord_24 = -abs((256 - $_or_24) << 24); |
391
|
|
|
} else { |
392
|
|
|
$_ord_24 = ($_or_24 & 127) << 24; |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $_ord_24; |
396
|
|
|
} |
397
|
|
|
} |
398
|
|
|
|