1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use DOMDocument; |
6
|
|
|
use DOMElement; |
7
|
|
|
use DOMNode; |
8
|
|
|
use DOMText; |
9
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Copyright (c) 2006 - 2016 PhpSpreadsheet |
13
|
|
|
* |
14
|
|
|
* This library is free software; you can redistribute it and/or |
15
|
|
|
* modify it under the terms of the GNU Lesser General Public |
16
|
|
|
* License as published by the Free Software Foundation; either |
17
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
18
|
|
|
* |
19
|
|
|
* This library is distributed in the hope that it will be useful, |
20
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
21
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
22
|
|
|
* Lesser General Public License for more details. |
23
|
|
|
* |
24
|
|
|
* You should have received a copy of the GNU Lesser General Public |
25
|
|
|
* License along with this library; if not, write to the Free Software |
26
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
27
|
|
|
* |
28
|
|
|
* @category PhpSpreadsheet |
29
|
|
|
* @copyright Copyright (c) 2006 - 2016 PhpSpreadsheet (https://github.com/PHPOffice/PhpSpreadsheet) |
30
|
|
|
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL |
31
|
|
|
* @version ##VERSION##, ##DATE## |
32
|
|
|
*/ |
33
|
|
|
/** PhpSpreadsheet root directory */ |
34
|
|
|
class HTML extends BaseReader implements IReader |
35
|
|
|
{ |
36
|
|
|
/** |
37
|
|
|
* Sample size to read to determine if it's HTML or not |
38
|
|
|
*/ |
39
|
|
|
const TEST_SAMPLE_SIZE = 2048; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Input encoding |
43
|
|
|
* |
44
|
|
|
* @var string |
45
|
|
|
*/ |
46
|
|
|
protected $inputEncoding = 'ANSI'; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Sheet index to read |
50
|
|
|
* |
51
|
|
|
* @var int |
52
|
|
|
*/ |
53
|
|
|
protected $sheetIndex = 0; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Formats |
57
|
|
|
* |
58
|
|
|
* @var array |
59
|
|
|
*/ |
60
|
|
|
protected $formats = [ |
61
|
|
|
'h1' => [ |
62
|
|
|
'font' => [ |
63
|
|
|
'bold' => true, |
64
|
|
|
'size' => 24, |
65
|
|
|
], |
66
|
|
|
], // Bold, 24pt |
67
|
|
|
'h2' => [ |
68
|
|
|
'font' => [ |
69
|
|
|
'bold' => true, |
70
|
|
|
'size' => 18, |
71
|
|
|
], |
72
|
|
|
], // Bold, 18pt |
73
|
|
|
'h3' => [ |
74
|
|
|
'font' => [ |
75
|
|
|
'bold' => true, |
76
|
|
|
'size' => 13.5, |
77
|
|
|
], |
78
|
|
|
], // Bold, 13.5pt |
79
|
|
|
'h4' => [ |
80
|
|
|
'font' => [ |
81
|
|
|
'bold' => true, |
82
|
|
|
'size' => 12, |
83
|
|
|
], |
84
|
|
|
], // Bold, 12pt |
85
|
|
|
'h5' => [ |
86
|
|
|
'font' => [ |
87
|
|
|
'bold' => true, |
88
|
|
|
'size' => 10, |
89
|
|
|
], |
90
|
|
|
], // Bold, 10pt |
91
|
|
|
'h6' => [ |
92
|
|
|
'font' => [ |
93
|
|
|
'bold' => true, |
94
|
|
|
'size' => 7.5, |
95
|
|
|
], |
96
|
|
|
], // Bold, 7.5pt |
97
|
|
|
'a' => [ |
98
|
|
|
'font' => [ |
99
|
|
|
'underline' => true, |
100
|
|
|
'color' => [ |
101
|
|
|
'argb' => \PhpOffice\PhpSpreadsheet\Style\Color::COLOR_BLUE, |
102
|
|
|
], |
103
|
|
|
], |
104
|
|
|
], // Blue underlined |
105
|
|
|
'hr' => [ |
106
|
|
|
'borders' => [ |
107
|
|
|
'bottom' => [ |
108
|
|
|
'style' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_THIN, |
109
|
|
|
'color' => [ |
110
|
|
|
\PhpOffice\PhpSpreadsheet\Style\Color::COLOR_BLACK, |
111
|
|
|
], |
112
|
|
|
], |
113
|
|
|
], |
114
|
|
|
], // Bottom border |
115
|
|
|
]; |
116
|
|
|
|
117
|
|
|
protected $rowspan = []; |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* Create a new HTML Reader instance |
121
|
|
|
*/ |
122
|
1 |
|
public function __construct() |
123
|
|
|
{ |
124
|
1 |
|
$this->readFilter = new DefaultReadFilter(); |
125
|
1 |
|
} |
126
|
|
|
|
127
|
|
|
/** |
128
|
|
|
* Validate that the current file is an HTML file |
129
|
|
|
* |
130
|
|
|
* @return bool |
131
|
|
|
*/ |
132
|
1 |
|
protected function isValidFormat() |
133
|
|
|
{ |
134
|
1 |
|
$beginning = $this->readBeginning(); |
135
|
|
|
|
136
|
1 |
|
if (!self::startsWithTag($beginning)) { |
137
|
1 |
|
return false; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
if (!self::containsTags($beginning)) { |
141
|
|
|
return false; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
if (!self::endsWithTag($this->readEnding())) { |
145
|
|
|
return false; |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
return true; |
149
|
|
|
} |
150
|
|
|
|
151
|
1 |
|
private function readBeginning() |
152
|
|
|
{ |
153
|
1 |
|
fseek($this->fileHandle, 0); |
154
|
|
|
|
155
|
1 |
|
return fread($this->fileHandle, self::TEST_SAMPLE_SIZE); |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
private function readEnding() |
159
|
|
|
{ |
160
|
|
|
$meta = stream_get_meta_data($this->fileHandle); |
161
|
|
|
$filename = $meta['uri']; |
162
|
|
|
|
163
|
|
|
$size = filesize($filename); |
164
|
|
|
$blockSize = self::TEST_SAMPLE_SIZE; |
165
|
|
|
|
166
|
|
|
fseek($this->fileHandle, $size - $blockSize); |
167
|
|
|
|
168
|
|
|
return fread($this->fileHandle, $blockSize); |
169
|
|
|
} |
170
|
|
|
|
171
|
1 |
|
private static function startsWithTag($data) |
172
|
|
|
{ |
173
|
1 |
|
return '<' === substr(trim($data), 0, 1); |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
private static function endsWithTag($data) |
177
|
|
|
{ |
178
|
|
|
return '>' === substr(trim($data), -1, 1); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
private static function containsTags($data) |
182
|
|
|
{ |
183
|
|
|
return strlen($data) !== strlen(strip_tags($data)); |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
/** |
187
|
|
|
* Loads Spreadsheet from file |
188
|
|
|
* |
189
|
|
|
* @param string $pFilename |
190
|
|
|
* @throws Exception |
191
|
|
|
* @return Spreadsheet |
192
|
|
|
*/ |
193
|
|
|
public function load($pFilename) |
194
|
|
|
{ |
195
|
|
|
// Create new Spreadsheet |
196
|
|
|
$spreadsheet = new Spreadsheet(); |
197
|
|
|
|
198
|
|
|
// Load into this instance |
199
|
|
|
return $this->loadIntoExisting($pFilename, $spreadsheet); |
|
|
|
|
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* Set input encoding |
204
|
|
|
* |
205
|
|
|
* @param string $pValue Input encoding |
206
|
|
|
*/ |
207
|
|
|
public function setInputEncoding($pValue = 'ANSI') |
208
|
|
|
{ |
209
|
|
|
$this->inputEncoding = $pValue; |
210
|
|
|
|
211
|
|
|
return $this; |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
/** |
215
|
|
|
* Get input encoding |
216
|
|
|
* |
217
|
|
|
* @return string |
218
|
|
|
*/ |
219
|
|
|
public function getInputEncoding() |
220
|
|
|
{ |
221
|
|
|
return $this->inputEncoding; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
// Data Array used for testing only, should write to Spreadsheet object on completion of tests |
225
|
|
|
protected $dataArray = []; |
226
|
|
|
protected $tableLevel = 0; |
227
|
|
|
protected $nestedColumn = ['A']; |
228
|
|
|
|
229
|
|
|
protected function setTableStartColumn($column) |
230
|
|
|
{ |
231
|
|
|
if ($this->tableLevel == 0) { |
232
|
|
|
$column = 'A'; |
233
|
|
|
} |
234
|
|
|
++$this->tableLevel; |
235
|
|
|
$this->nestedColumn[$this->tableLevel] = $column; |
236
|
|
|
|
237
|
|
|
return $this->nestedColumn[$this->tableLevel]; |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
protected function getTableStartColumn() |
241
|
|
|
{ |
242
|
|
|
return $this->nestedColumn[$this->tableLevel]; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
protected function releaseTableStartColumn() |
246
|
|
|
{ |
247
|
|
|
--$this->tableLevel; |
248
|
|
|
|
249
|
|
|
return array_pop($this->nestedColumn); |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
protected function flushCell($sheet, $column, $row, &$cellContent) |
253
|
|
|
{ |
254
|
|
|
if (is_string($cellContent)) { |
255
|
|
|
// Simple String content |
256
|
|
|
if (trim($cellContent) > '') { |
257
|
|
|
// Only actually write it if there's content in the string |
258
|
|
|
// Write to worksheet to be done here... |
259
|
|
|
// ... we return the cell so we can mess about with styles more easily |
260
|
|
|
$sheet->setCellValue($column . $row, $cellContent, true); |
261
|
|
|
$this->dataArray[$row][$column] = $cellContent; |
262
|
|
|
} |
263
|
|
|
} else { |
264
|
|
|
// We have a Rich Text run |
265
|
|
|
// TODO |
266
|
|
|
$this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent; |
267
|
|
|
} |
268
|
|
|
$cellContent = (string) ''; |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
/** |
272
|
|
|
* @param DOMNode $element |
273
|
|
|
* @param \PhpOffice\PhpSpreadsheet\Worksheet $sheet |
274
|
|
|
* @param int $row |
275
|
|
|
* @param string $column |
276
|
|
|
* @param string $cellContent |
277
|
|
|
*/ |
278
|
|
|
protected function processDomElement(DOMNode $element, \PhpOffice\PhpSpreadsheet\Worksheet $sheet, &$row, &$column, &$cellContent) |
279
|
|
|
{ |
280
|
|
|
foreach ($element->childNodes as $child) { |
281
|
|
|
if ($child instanceof DOMText) { |
282
|
|
|
$domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue)); |
283
|
|
|
if (is_string($cellContent)) { |
284
|
|
|
// simply append the text if the cell content is a plain text string |
285
|
|
|
$cellContent .= $domText; |
286
|
|
|
} else { |
|
|
|
|
287
|
|
|
// but if we have a rich text run instead, we need to append it correctly |
288
|
|
|
// TODO |
289
|
|
|
} |
290
|
|
|
} elseif ($child instanceof DOMElement) { |
291
|
|
|
$attributeArray = []; |
292
|
|
|
foreach ($child->attributes as $attribute) { |
293
|
|
|
$attributeArray[$attribute->name] = $attribute->value; |
294
|
|
|
} |
295
|
|
|
|
296
|
|
|
switch ($child->nodeName) { |
297
|
|
|
case 'meta': |
298
|
|
|
foreach ($attributeArray as $attributeName => $attributeValue) { |
299
|
|
|
switch ($attributeName) { |
300
|
|
|
case 'content': |
301
|
|
|
// TODO |
302
|
|
|
// Extract character set, so we can convert to UTF-8 if required |
303
|
|
|
break; |
304
|
|
|
} |
305
|
|
|
} |
306
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
307
|
|
|
break; |
308
|
|
|
case 'title': |
309
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
310
|
|
|
$sheet->setTitle($cellContent); |
311
|
|
|
$cellContent = ''; |
312
|
|
|
break; |
313
|
|
|
case 'span': |
314
|
|
|
case 'div': |
315
|
|
|
case 'font': |
316
|
|
|
case 'i': |
317
|
|
|
case 'em': |
318
|
|
|
case 'strong': |
319
|
|
|
case 'b': |
320
|
|
|
if ($cellContent > '') { |
321
|
|
|
$cellContent .= ' '; |
322
|
|
|
} |
323
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
324
|
|
|
if ($cellContent > '') { |
325
|
|
|
$cellContent .= ' '; |
326
|
|
|
} |
327
|
|
|
break; |
328
|
|
|
case 'hr': |
329
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
330
|
|
|
++$row; |
331
|
|
|
if (isset($this->formats[$child->nodeName])) { |
332
|
|
|
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); |
333
|
|
|
} else { |
334
|
|
|
$cellContent = '----------'; |
335
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
336
|
|
|
} |
337
|
|
|
++$row; |
338
|
|
|
// Add a break after a horizontal rule, simply by allowing the code to dropthru |
339
|
|
|
case 'br': |
340
|
|
|
if ($this->tableLevel > 0) { |
341
|
|
|
// If we're inside a table, replace with a \n |
342
|
|
|
$cellContent .= "\n"; |
343
|
|
|
} else { |
344
|
|
|
// Otherwise flush our existing content and move the row cursor on |
345
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
346
|
|
|
++$row; |
347
|
|
|
} |
348
|
|
|
break; |
349
|
|
|
case 'a': |
350
|
|
|
foreach ($attributeArray as $attributeName => $attributeValue) { |
351
|
|
|
switch ($attributeName) { |
352
|
|
|
case 'href': |
353
|
|
|
$sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue); |
354
|
|
View Code Duplication |
if (isset($this->formats[$child->nodeName])) { |
|
|
|
|
355
|
|
|
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); |
356
|
|
|
} |
357
|
|
|
break; |
358
|
|
|
} |
359
|
|
|
} |
360
|
|
|
$cellContent .= ' '; |
361
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
362
|
|
|
break; |
363
|
|
|
case 'h1': |
364
|
|
|
case 'h2': |
365
|
|
|
case 'h3': |
366
|
|
|
case 'h4': |
367
|
|
|
case 'h5': |
368
|
|
|
case 'h6': |
369
|
|
|
case 'ol': |
370
|
|
|
case 'ul': |
371
|
|
|
case 'p': |
372
|
|
|
if ($this->tableLevel > 0) { |
373
|
|
|
// If we're inside a table, replace with a \n |
374
|
|
|
$cellContent .= "\n"; |
375
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
376
|
|
|
} else { |
377
|
|
|
if ($cellContent > '') { |
378
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
379
|
|
|
++$row; |
380
|
|
|
} |
381
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
382
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
383
|
|
|
|
384
|
|
View Code Duplication |
if (isset($this->formats[$child->nodeName])) { |
|
|
|
|
385
|
|
|
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); |
386
|
|
|
} |
387
|
|
|
|
388
|
|
|
++$row; |
389
|
|
|
$column = 'A'; |
390
|
|
|
} |
391
|
|
|
break; |
392
|
|
|
case 'li': |
393
|
|
|
if ($this->tableLevel > 0) { |
394
|
|
|
// If we're inside a table, replace with a \n |
395
|
|
|
$cellContent .= "\n"; |
396
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
397
|
|
|
} else { |
398
|
|
|
if ($cellContent > '') { |
399
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
400
|
|
|
} |
401
|
|
|
++$row; |
402
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
403
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
404
|
|
|
$column = 'A'; |
405
|
|
|
} |
406
|
|
|
break; |
407
|
|
|
case 'table': |
408
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
409
|
|
|
$column = $this->setTableStartColumn($column); |
410
|
|
|
if ($this->tableLevel > 1) { |
411
|
|
|
--$row; |
412
|
|
|
} |
413
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
414
|
|
|
$column = $this->releaseTableStartColumn(); |
415
|
|
|
if ($this->tableLevel > 1) { |
416
|
|
|
++$column; |
417
|
|
|
} else { |
418
|
|
|
++$row; |
419
|
|
|
} |
420
|
|
|
break; |
421
|
|
|
case 'thead': |
422
|
|
|
case 'tbody': |
423
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
424
|
|
|
break; |
425
|
|
|
case 'tr': |
426
|
|
|
$column = $this->getTableStartColumn(); |
427
|
|
|
$cellContent = ''; |
428
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
429
|
|
|
++$row; |
430
|
|
|
break; |
431
|
|
|
case 'th': |
432
|
|
|
case 'td': |
433
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
434
|
|
|
|
435
|
|
|
while (isset($this->rowspan[$column . $row])) { |
436
|
|
|
++$column; |
437
|
|
|
} |
438
|
|
|
|
439
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
440
|
|
|
|
441
|
|
|
if (isset($attributeArray['rowspan']) && isset($attributeArray['colspan'])) { |
442
|
|
|
//create merging rowspan and colspan |
443
|
|
|
$columnTo = $column; |
444
|
|
|
for ($i = 0; $i < $attributeArray['colspan'] - 1; ++$i) { |
445
|
|
|
++$columnTo; |
446
|
|
|
} |
447
|
|
|
$range = $column . $row . ':' . $columnTo . ($row + $attributeArray['rowspan'] - 1); |
448
|
|
|
foreach (\PhpOffice\PhpSpreadsheet\Cell::extractAllCellReferencesInRange($range) as $value) { |
449
|
|
|
$this->rowspan[$value] = true; |
450
|
|
|
} |
451
|
|
|
$sheet->mergeCells($range); |
452
|
|
|
$column = $columnTo; |
453
|
|
|
} elseif (isset($attributeArray['rowspan'])) { |
454
|
|
|
//create merging rowspan |
455
|
|
|
$range = $column . $row . ':' . $column . ($row + $attributeArray['rowspan'] - 1); |
456
|
|
|
foreach (\PhpOffice\PhpSpreadsheet\Cell::extractAllCellReferencesInRange($range) as $value) { |
457
|
|
|
$this->rowspan[$value] = true; |
458
|
|
|
} |
459
|
|
|
$sheet->mergeCells($range); |
460
|
|
|
} elseif (isset($attributeArray['colspan'])) { |
461
|
|
|
//create merging colspan |
462
|
|
|
$columnTo = $column; |
463
|
|
|
for ($i = 0; $i < $attributeArray['colspan'] - 1; ++$i) { |
464
|
|
|
++$columnTo; |
465
|
|
|
} |
466
|
|
|
$sheet->mergeCells($column . $row . ':' . $columnTo . $row); |
467
|
|
|
$column = $columnTo; |
468
|
|
|
} |
469
|
|
|
++$column; |
470
|
|
|
break; |
471
|
|
|
case 'body': |
472
|
|
|
$row = 1; |
473
|
|
|
$column = 'A'; |
474
|
|
|
$content = ''; |
|
|
|
|
475
|
|
|
$this->tableLevel = 0; |
476
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
477
|
|
|
break; |
478
|
|
|
default: |
479
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
480
|
|
|
} |
481
|
|
|
} |
482
|
|
|
} |
483
|
|
|
} |
484
|
|
|
|
485
|
|
|
/** |
486
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance |
487
|
|
|
* |
488
|
|
|
* @param string $pFilename |
489
|
|
|
* @param Spreadsheet $spreadsheet |
490
|
|
|
* @throws Exception |
491
|
|
|
* @return Spreadsheet |
492
|
|
|
*/ |
493
|
|
|
public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
494
|
|
|
{ |
495
|
|
|
// Open file to validate |
496
|
|
|
$this->openFile($pFilename); |
497
|
|
|
if (!$this->isValidFormat()) { |
498
|
|
|
fclose($this->fileHandle); |
499
|
|
|
throw new Exception($pFilename . ' is an Invalid HTML file.'); |
500
|
|
|
} |
501
|
|
|
// Close after validating |
502
|
|
|
fclose($this->fileHandle); |
503
|
|
|
|
504
|
|
|
// Create new sheet |
505
|
|
|
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
506
|
|
|
$spreadsheet->createSheet(); |
507
|
|
|
} |
508
|
|
|
$spreadsheet->setActiveSheetIndex($this->sheetIndex); |
509
|
|
|
|
510
|
|
|
// Create a new DOM object |
511
|
|
|
$dom = new DOMDocument(); |
512
|
|
|
// Reload the HTML file into the DOM object |
513
|
|
|
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); |
514
|
|
|
if ($loaded === false) { |
515
|
|
|
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); |
516
|
|
|
} |
517
|
|
|
|
518
|
|
|
// Discard white space |
519
|
|
|
$dom->preserveWhiteSpace = false; |
520
|
|
|
|
521
|
|
|
$row = 0; |
522
|
|
|
$column = 'A'; |
523
|
|
|
$content = ''; |
524
|
|
|
$this->processDomElement($dom, $spreadsheet->getActiveSheet(), $row, $column, $content); |
525
|
|
|
|
526
|
|
|
// Return |
527
|
|
|
return $spreadsheet; |
528
|
|
|
} |
529
|
|
|
|
530
|
|
|
/** |
531
|
|
|
* Get sheet index |
532
|
|
|
* |
533
|
|
|
* @return int |
534
|
|
|
*/ |
535
|
|
|
public function getSheetIndex() |
536
|
|
|
{ |
537
|
|
|
return $this->sheetIndex; |
538
|
|
|
} |
539
|
|
|
|
540
|
|
|
/** |
541
|
|
|
* Set sheet index |
542
|
|
|
* |
543
|
|
|
* @param int $pValue Sheet index |
544
|
|
|
* @return HTML |
545
|
|
|
*/ |
546
|
|
|
public function setSheetIndex($pValue = 0) |
547
|
|
|
{ |
548
|
|
|
$this->sheetIndex = $pValue; |
549
|
|
|
|
550
|
|
|
return $this; |
551
|
|
|
} |
552
|
|
|
|
553
|
|
|
/** |
554
|
|
|
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks |
555
|
|
|
* |
556
|
|
|
* @param string $xml |
557
|
|
|
* @throws Exception |
558
|
|
|
*/ |
559
|
|
View Code Duplication |
public function securityScan($xml) |
|
|
|
|
560
|
|
|
{ |
561
|
|
|
$pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/'; |
562
|
|
|
if (preg_match($pattern, $xml)) { |
563
|
|
|
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'); |
564
|
|
|
} |
565
|
|
|
|
566
|
|
|
return $xml; |
567
|
|
|
} |
568
|
|
|
} |
569
|
|
|
|
If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.
Let’s take a look at an example:
Our function
my_function
expects aPost
object, and outputs the author of the post. The base classPost
returns a simple string and outputting a simple string will work just fine. However, the child classBlogPost
which is a sub-type ofPost
instead decided to return anobject
, and is therefore violating the SOLID principles. If aBlogPost
were passed tomy_function
, PHP would not complain, but ultimately fail when executing thestrtoupper
call in its body.