Failed Conditions
Pull Request — master (#4118)
by Owen
13:53
created

XlsBase   A

Complexity

Total Complexity 31

Size/Duplication

Total Lines 384
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 31
eloc 183
c 1
b 0
f 0
dl 0
loc 384
rs 9.92

16 Methods

Rating   Name   Duplication   Size   Complexity  
A extractNumber() 0 23 4
A uncompressByteString() 0 9 2
A canRead() 0 19 4
A getIEEE754() 0 24 4
A encodeUTF16() 0 7 2
A UTF8toExcelDoubleQuoted() 0 3 1
A decodeCodepage() 0 3 1
A getUInt2d() 0 3 1
A readUnicodeStringShort() 0 11 1
A readRGB() 0 15 1
A setCodepage() 0 7 2
A getInt4d() 0 14 2
A getCodepage() 0 3 1
A readUnicodeString() 0 20 3
A getInt2d() 0 3 1
A readUnicodeStringLong() 0 11 1
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
6
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
7
use PhpOffice\PhpSpreadsheet\Shared\File;
8
use PhpOffice\PhpSpreadsheet\Shared\OLERead;
9
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
10
use PhpOffice\PhpSpreadsheet\Style\Border;
11
12
class XlsBase extends BaseReader
13
{
14
    final protected const HIGH_ORDER_BIT = 0x80 << 24;
15
    final protected const FC000000 = 0xFC << 24;
16
    final protected const FE000000 = 0xFE << 24;
17
18
    // ParseXL definitions
19
    final const XLS_BIFF8 = 0x0600;
20
    final const XLS_BIFF7 = 0x0500;
21
    final const XLS_WORKBOOKGLOBALS = 0x0005;
22
    final const XLS_WORKSHEET = 0x0010;
23
24
    // record identifiers
25
    final const XLS_TYPE_FORMULA = 0x0006;
26
    final const XLS_TYPE_EOF = 0x000A;
27
    final const XLS_TYPE_PROTECT = 0x0012;
28
    final const XLS_TYPE_OBJECTPROTECT = 0x0063;
29
    final const XLS_TYPE_SCENPROTECT = 0x00DD;
30
    final const XLS_TYPE_PASSWORD = 0x0013;
31
    final const XLS_TYPE_HEADER = 0x0014;
32
    final const XLS_TYPE_FOOTER = 0x0015;
33
    final const XLS_TYPE_EXTERNSHEET = 0x0017;
34
    final const XLS_TYPE_DEFINEDNAME = 0x0018;
35
    final const XLS_TYPE_VERTICALPAGEBREAKS = 0x001A;
36
    final const XLS_TYPE_HORIZONTALPAGEBREAKS = 0x001B;
37
    final const XLS_TYPE_NOTE = 0x001C;
38
    final const XLS_TYPE_SELECTION = 0x001D;
39
    final const XLS_TYPE_DATEMODE = 0x0022;
40
    final const XLS_TYPE_EXTERNNAME = 0x0023;
41
    final const XLS_TYPE_LEFTMARGIN = 0x0026;
42
    final const XLS_TYPE_RIGHTMARGIN = 0x0027;
43
    final const XLS_TYPE_TOPMARGIN = 0x0028;
44
    final const XLS_TYPE_BOTTOMMARGIN = 0x0029;
45
    final const XLS_TYPE_PRINTGRIDLINES = 0x002B;
46
    final const XLS_TYPE_FILEPASS = 0x002F;
47
    final const XLS_TYPE_FONT = 0x0031;
48
    final const XLS_TYPE_CONTINUE = 0x003C;
49
    final const XLS_TYPE_PANE = 0x0041;
50
    final const XLS_TYPE_CODEPAGE = 0x0042;
51
    final const XLS_TYPE_DEFCOLWIDTH = 0x0055;
52
    final const XLS_TYPE_OBJ = 0x005D;
53
    final const XLS_TYPE_COLINFO = 0x007D;
54
    final const XLS_TYPE_IMDATA = 0x007F;
55
    final const XLS_TYPE_SHEETPR = 0x0081;
56
    final const XLS_TYPE_HCENTER = 0x0083;
57
    final const XLS_TYPE_VCENTER = 0x0084;
58
    final const XLS_TYPE_SHEET = 0x0085;
59
    final const XLS_TYPE_PALETTE = 0x0092;
60
    final const XLS_TYPE_SCL = 0x00A0;
61
    final const XLS_TYPE_PAGESETUP = 0x00A1;
62
    final const XLS_TYPE_MULRK = 0x00BD;
63
    final const XLS_TYPE_MULBLANK = 0x00BE;
64
    final const XLS_TYPE_DBCELL = 0x00D7;
65
    final const XLS_TYPE_XF = 0x00E0;
66
    final const XLS_TYPE_MERGEDCELLS = 0x00E5;
67
    final const XLS_TYPE_MSODRAWINGGROUP = 0x00EB;
68
    final const XLS_TYPE_MSODRAWING = 0x00EC;
69
    final const XLS_TYPE_SST = 0x00FC;
70
    final const XLS_TYPE_LABELSST = 0x00FD;
71
    final const XLS_TYPE_EXTSST = 0x00FF;
72
    final const XLS_TYPE_EXTERNALBOOK = 0x01AE;
73
    final const XLS_TYPE_DATAVALIDATIONS = 0x01B2;
74
    final const XLS_TYPE_TXO = 0x01B6;
75
    final const XLS_TYPE_HYPERLINK = 0x01B8;
76
    final const XLS_TYPE_DATAVALIDATION = 0x01BE;
77
    final const XLS_TYPE_DIMENSION = 0x0200;
78
    final const XLS_TYPE_BLANK = 0x0201;
79
    final const XLS_TYPE_NUMBER = 0x0203;
80
    final const XLS_TYPE_LABEL = 0x0204;
81
    final const XLS_TYPE_BOOLERR = 0x0205;
82
    final const XLS_TYPE_STRING = 0x0207;
83
    final const XLS_TYPE_ROW = 0x0208;
84
    final const XLS_TYPE_INDEX = 0x020B;
85
    final const XLS_TYPE_ARRAY = 0x0221;
86
    final const XLS_TYPE_DEFAULTROWHEIGHT = 0x0225;
87
    final const XLS_TYPE_WINDOW2 = 0x023E;
88
    final const XLS_TYPE_RK = 0x027E;
89
    final const XLS_TYPE_STYLE = 0x0293;
90
    final const XLS_TYPE_FORMAT = 0x041E;
91
    final const XLS_TYPE_SHAREDFMLA = 0x04BC;
92
    final const XLS_TYPE_BOF = 0x0809;
93
    final const XLS_TYPE_SHEETPROTECTION = 0x0867;
94
    final const XLS_TYPE_RANGEPROTECTION = 0x0868;
95
    final const XLS_TYPE_SHEETLAYOUT = 0x0862;
96
    final const XLS_TYPE_XFEXT = 0x087D;
97
    final const XLS_TYPE_PAGELAYOUTVIEW = 0x088B;
98
    final const XLS_TYPE_CFHEADER = 0x01B0;
99
    final const XLS_TYPE_CFRULE = 0x01B1;
100
    final const XLS_TYPE_UNKNOWN = 0xFFFF;
101
102
    // Encryption type
103
    final const MS_BIFF_CRYPTO_NONE = 0;
104
    final const MS_BIFF_CRYPTO_XOR = 1;
105
    final const MS_BIFF_CRYPTO_RC4 = 2;
106
107
    // Size of stream blocks when using RC4 encryption
108
    final const REKEY_BLOCK = 0x400;
109
110
    // should be consistent with Writer\Xls\Style\CellBorder
111
    final const BORDER_STYLE_MAP = [
112
        Border::BORDER_NONE, // => 0x00,
113
        Border::BORDER_THIN,  // => 0x01,
114
        Border::BORDER_MEDIUM, // => 0x02,
115
        Border::BORDER_DASHED, // => 0x03,
116
        Border::BORDER_DOTTED,  // => 0x04,
117
        Border::BORDER_THICK, // => 0x05,
118
        Border::BORDER_DOUBLE, // => 0x06,
119
        Border::BORDER_HAIR, // => 0x07,
120
        Border::BORDER_MEDIUMDASHED, // => 0x08,
121
        Border::BORDER_DASHDOT, // => 0x09,
122
        Border::BORDER_MEDIUMDASHDOT, // => 0x0A,
123
        Border::BORDER_DASHDOTDOT, // => 0x0B,
124
        Border::BORDER_MEDIUMDASHDOTDOT, // => 0x0C,
125
        Border::BORDER_SLANTDASHDOT, // => 0x0D,
126
        Border::BORDER_OMIT, // => 0x0E,
127
        Border::BORDER_OMIT, // => 0x0F,
128
    ];
129
130
    /**
131
     * Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95)
132
     * For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'.
133
     */
134
    protected string $codepage = '';
135
136
    public function setCodepage(string $codepage): void
137
    {
138
        if (CodePage::validate($codepage) === false) {
139
            throw new PhpSpreadsheetException('Unknown codepage: ' . $codepage);
140
        }
141
142
        $this->codepage = $codepage;
143
    }
144
145
    public function getCodepage(): string
146
    {
147
        return $this->codepage;
148
    }
149
150
    /**
151
     * Can the current IReader read the file?
152
     */
153
    public function canRead(string $filename): bool
154
    {
155
        if (File::testFileNoThrow($filename) === false) {
156
            return false;
157
        }
158
159
        try {
160
            // Use ParseXL for the hard work.
161
            $ole = new OLERead();
162
163
            // get excel data
164
            $ole->read($filename);
165
            if ($ole->wrkbook === null) {
166
                throw new Exception('The filename ' . $filename . ' is not recognised as a Spreadsheet file');
167
            }
168
169
            return true;
170
        } catch (PhpSpreadsheetException) {
171
            return false;
172
        }
173
    }
174
175
    /**
176
     * Extract RGB color
177
     * OpenOffice.org's Documentation of the Microsoft Excel File Format, section 2.5.4.
178
     *
179
     * @param string $rgb Encoded RGB value (4 bytes)
180
     */
181
    protected static function readRGB(string $rgb): array
182
    {
183
        // offset: 0; size 1; Red component
184
        $r = ord($rgb[0]);
185
186
        // offset: 1; size: 1; Green component
187
        $g = ord($rgb[1]);
188
189
        // offset: 2; size: 1; Blue component
190
        $b = ord($rgb[2]);
191
192
        // HEX notation, e.g. 'FF00FC'
193
        $rgb = sprintf('%02X%02X%02X', $r, $g, $b);
194
195
        return ['rgb' => $rgb];
196
    }
197
198
    /**
199
     * Extracts an Excel Unicode short string (8-bit string length)
200
     * OpenOffice documentation: 2.5.3
201
     * function will automatically find out where the Unicode string ends.
202
     */
203
    protected static function readUnicodeStringShort(string $subData): array
204
    {
205
        // offset: 0: size: 1; length of the string (character count)
206
        $characterCount = ord($subData[0]);
207
208
        $string = self::readUnicodeString(substr($subData, 1), $characterCount);
209
210
        // add 1 for the string length
211
        ++$string['size'];
212
213
        return $string;
214
    }
215
216
    /**
217
     * Extracts an Excel Unicode long string (16-bit string length)
218
     * OpenOffice documentation: 2.5.3
219
     * this function is under construction, needs to support rich text, and Asian phonetic settings.
220
     */
221
    protected static function readUnicodeStringLong(string $subData): array
222
    {
223
        // offset: 0: size: 2; length of the string (character count)
224
        $characterCount = self::getUInt2d($subData, 0);
225
226
        $string = self::readUnicodeString(substr($subData, 2), $characterCount);
227
228
        // add 2 for the string length
229
        $string['size'] += 2;
230
231
        return $string;
232
    }
233
234
    /**
235
     * Read Unicode string with no string length field, but with known character count
236
     * this function is under construction, needs to support rich text, and Asian phonetic settings
237
     * OpenOffice.org's Documentation of the Microsoft Excel File Format, section 2.5.3.
238
     */
239
    protected static function readUnicodeString(string $subData, int $characterCount): array
240
    {
241
        // offset: 0: size: 1; option flags
242
        // bit: 0; mask: 0x01; character compression (0 = compressed 8-bit, 1 = uncompressed 16-bit)
243
        $isCompressed = !((0x01 & ord($subData[0])) >> 0);
244
245
        // bit: 2; mask: 0x04; Asian phonetic settings
246
        //$hasAsian = (0x04) & ord($subData[0]) >> 2;
247
248
        // bit: 3; mask: 0x08; Rich-Text settings
249
        //$hasRichText = (0x08) & ord($subData[0]) >> 3;
250
251
        // offset: 1: size: var; character array
252
        // this offset assumes richtext and Asian phonetic settings are off which is generally wrong
253
        // needs to be fixed
254
        $value = self::encodeUTF16(substr($subData, 1, $isCompressed ? $characterCount : 2 * $characterCount), $isCompressed);
255
256
        return [
257
            'value' => $value,
258
            'size' => $isCompressed ? 1 + $characterCount : 1 + 2 * $characterCount, // the size in bytes including the option flags
259
        ];
260
    }
261
262
    /**
263
     * Convert UTF-8 string to string surounded by double quotes. Used for explicit string tokens in formulas.
264
     * Example:  hello"world  -->  "hello""world".
265
     *
266
     * @param string $value UTF-8 encoded string
267
     */
268
    protected static function UTF8toExcelDoubleQuoted(string $value): string
269
    {
270
        return '"' . str_replace('"', '""', $value) . '"';
271
    }
272
273
    /**
274
     * Reads first 8 bytes of a string and return IEEE 754 float.
275
     *
276
     * @param string $data Binary string that is at least 8 bytes long
277
     */
278
    protected static function extractNumber(string $data): int|float
279
    {
280
        $rknumhigh = self::getInt4d($data, 4);
281
        $rknumlow = self::getInt4d($data, 0);
282
        $sign = ($rknumhigh & self::HIGH_ORDER_BIT) >> 31;
283
        $exp = (($rknumhigh & 0x7FF00000) >> 20) - 1023;
284
        $mantissa = (0x100000 | ($rknumhigh & 0x000FFFFF));
285
        $mantissalow1 = ($rknumlow & self::HIGH_ORDER_BIT) >> 31;
286
        $mantissalow2 = ($rknumlow & 0x7FFFFFFF);
287
        $value = $mantissa / 2 ** (20 - $exp);
288
289
        if ($mantissalow1 != 0) {
290
            $value += 1 / 2 ** (21 - $exp);
291
        }
292
293
        if ($mantissalow2 != 0) {
294
            $value += $mantissalow2 / 2 ** (52 - $exp);
295
        }
296
        if ($sign) {
297
            $value *= -1;
298
        }
299
300
        return $value;
301
    }
302
303
    protected static function getIEEE754(int $rknum): float|int
304
    {
305
        if (($rknum & 0x02) != 0) {
306
            $value = $rknum >> 2;
307
        } else {
308
            // changes by mmp, info on IEEE754 encoding from
309
            // research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html
310
            // The RK format calls for using only the most significant 30 bits
311
            // of the 64 bit floating point value. The other 34 bits are assumed
312
            // to be 0 so we use the upper 30 bits of $rknum as follows...
313
            $sign = ($rknum & self::HIGH_ORDER_BIT) >> 31;
314
            $exp = ($rknum & 0x7FF00000) >> 20;
315
            $mantissa = (0x100000 | ($rknum & 0x000FFFFC));
316
            $value = $mantissa / 2 ** (20 - ($exp - 1023));
317
            if ($sign) {
318
                $value = -1 * $value;
319
            }
320
            //end of changes by mmp
321
        }
322
        if (($rknum & 0x01) != 0) {
323
            $value /= 100;
324
        }
325
326
        return $value;
327
    }
328
329
    /**
330
     * Get UTF-8 string from (compressed or uncompressed) UTF-16 string.
331
     */
332
    protected static function encodeUTF16(string $string, bool $compressed = false): string
333
    {
334
        if ($compressed) {
335
            $string = self::uncompressByteString($string);
336
        }
337
338
        return StringHelper::convertEncoding($string, 'UTF-8', 'UTF-16LE');
339
    }
340
341
    /**
342
     * Convert UTF-16 string in compressed notation to uncompressed form. Only used for BIFF8.
343
     */
344
    protected static function uncompressByteString(string $string): string
345
    {
346
        $uncompressedString = '';
347
        $strLen = strlen($string);
348
        for ($i = 0; $i < $strLen; ++$i) {
349
            $uncompressedString .= $string[$i] . "\0";
350
        }
351
352
        return $uncompressedString;
353
    }
354
355
    /**
356
     * Convert string to UTF-8. Only used for BIFF5.
357
     */
358
    protected function decodeCodepage(string $string): string
359
    {
360
        return StringHelper::convertEncoding($string, 'UTF-8', $this->codepage);
361
    }
362
363
    /**
364
     * Read 16-bit unsigned integer.
365
     */
366
    public static function getUInt2d(string $data, int $pos): int
367
    {
368
        return ord($data[$pos]) | (ord($data[$pos + 1]) << 8);
369
    }
370
371
    /**
372
     * Read 16-bit signed integer.
373
     */
374
    public static function getInt2d(string $data, int $pos): int
375
    {
376
        return unpack('s', $data[$pos] . $data[$pos + 1])[1]; // @phpstan-ignore-line
377
    }
378
379
    /**
380
     * Read 32-bit signed integer.
381
     */
382
    public static function getInt4d(string $data, int $pos): int
383
    {
384
        // FIX: represent numbers correctly on 64-bit system
385
        // http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
386
        // Changed by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
387
        $_or_24 = ord($data[$pos + 3]);
388
        if ($_or_24 >= 128) {
389
            // negative number
390
            $_ord_24 = -abs((256 - $_or_24) << 24);
391
        } else {
392
            $_ord_24 = ($_or_24 & 127) << 24;
393
        }
394
395
        return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $_ord_24;
396
    }
397
}
398