1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use DOMDocument; |
6
|
|
|
use DOMElement; |
7
|
|
|
use DOMNode; |
8
|
|
|
use DOMText; |
9
|
|
|
use PhpOffice\PhpSpreadsheet\Cell; |
10
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
11
|
|
|
use PhpOffice\PhpSpreadsheet\Style\Border; |
12
|
|
|
use PhpOffice\PhpSpreadsheet\Style\Color; |
13
|
|
|
use PhpOffice\PhpSpreadsheet\Style\Fill; |
14
|
|
|
use PhpOffice\PhpSpreadsheet\Worksheet; |
15
|
|
|
|
16
|
|
|
/** PhpSpreadsheet root directory */ |
17
|
|
|
class Html extends BaseReader implements IReader |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* Sample size to read to determine if it's HTML or not. |
21
|
|
|
*/ |
22
|
|
|
const TEST_SAMPLE_SIZE = 2048; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* Input encoding. |
26
|
|
|
* |
27
|
|
|
* @var string |
28
|
|
|
*/ |
29
|
|
|
protected $inputEncoding = 'ANSI'; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Sheet index to read. |
33
|
|
|
* |
34
|
|
|
* @var int |
35
|
|
|
*/ |
36
|
|
|
protected $sheetIndex = 0; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Formats. |
40
|
|
|
* |
41
|
|
|
* @var array |
42
|
|
|
*/ |
43
|
|
|
protected $formats = [ |
44
|
|
|
'h1' => [ |
45
|
|
|
'font' => [ |
46
|
|
|
'bold' => true, |
47
|
|
|
'size' => 24, |
48
|
|
|
], |
49
|
|
|
], // Bold, 24pt |
50
|
|
|
'h2' => [ |
51
|
|
|
'font' => [ |
52
|
|
|
'bold' => true, |
53
|
|
|
'size' => 18, |
54
|
|
|
], |
55
|
|
|
], // Bold, 18pt |
56
|
|
|
'h3' => [ |
57
|
|
|
'font' => [ |
58
|
|
|
'bold' => true, |
59
|
|
|
'size' => 13.5, |
60
|
|
|
], |
61
|
|
|
], // Bold, 13.5pt |
62
|
|
|
'h4' => [ |
63
|
|
|
'font' => [ |
64
|
|
|
'bold' => true, |
65
|
|
|
'size' => 12, |
66
|
|
|
], |
67
|
|
|
], // Bold, 12pt |
68
|
|
|
'h5' => [ |
69
|
|
|
'font' => [ |
70
|
|
|
'bold' => true, |
71
|
|
|
'size' => 10, |
72
|
|
|
], |
73
|
|
|
], // Bold, 10pt |
74
|
|
|
'h6' => [ |
75
|
|
|
'font' => [ |
76
|
|
|
'bold' => true, |
77
|
|
|
'size' => 7.5, |
78
|
|
|
], |
79
|
|
|
], // Bold, 7.5pt |
80
|
|
|
'a' => [ |
81
|
|
|
'font' => [ |
82
|
|
|
'underline' => true, |
83
|
|
|
'color' => [ |
84
|
|
|
'argb' => Color::COLOR_BLUE, |
85
|
|
|
], |
86
|
|
|
], |
87
|
|
|
], // Blue underlined |
88
|
|
|
'hr' => [ |
89
|
|
|
'borders' => [ |
90
|
|
|
'bottom' => [ |
91
|
|
|
'borderStyle' => Border::BORDER_THIN, |
92
|
|
|
'color' => [ |
93
|
|
|
Color::COLOR_BLACK, |
94
|
|
|
], |
95
|
|
|
], |
96
|
|
|
], |
97
|
|
|
], // Bottom border |
98
|
|
|
]; |
99
|
|
|
|
100
|
|
|
protected $rowspan = []; |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* Create a new HTML Reader instance. |
104
|
|
|
*/ |
105
|
2 |
|
public function __construct() |
106
|
|
|
{ |
107
|
2 |
|
$this->readFilter = new DefaultReadFilter(); |
108
|
2 |
|
} |
109
|
|
|
|
110
|
|
|
/** |
111
|
|
|
* Validate that the current file is an HTML file. |
112
|
|
|
* |
113
|
|
|
* @param string $pFilename |
114
|
|
|
* |
115
|
|
|
* @throws Exception |
116
|
|
|
* |
117
|
|
|
* @return bool |
118
|
|
|
*/ |
119
|
1 |
|
public function canRead($pFilename) |
120
|
|
|
{ |
121
|
|
|
// Check if file exists |
122
|
|
|
try { |
123
|
1 |
|
$this->openFile($pFilename); |
124
|
|
|
} catch (Exception $e) { |
125
|
|
|
return false; |
126
|
|
|
} |
127
|
|
|
|
128
|
1 |
|
$beginning = $this->readBeginning(); |
129
|
1 |
|
$startWithTag = self::startsWithTag($beginning); |
130
|
1 |
|
$containsTags = self::containsTags($beginning); |
131
|
1 |
|
$endsWithTag = self::endsWithTag($this->readEnding()); |
132
|
|
|
|
133
|
1 |
|
fclose($this->fileHandle); |
134
|
|
|
|
135
|
1 |
|
return $startWithTag && $containsTags && $endsWithTag; |
136
|
|
|
} |
137
|
|
|
|
138
|
1 |
|
private function readBeginning() |
139
|
|
|
{ |
140
|
1 |
|
fseek($this->fileHandle, 0); |
141
|
|
|
|
142
|
1 |
|
return fread($this->fileHandle, self::TEST_SAMPLE_SIZE); |
143
|
|
|
} |
144
|
|
|
|
145
|
1 |
|
private function readEnding() |
146
|
|
|
{ |
147
|
1 |
|
$meta = stream_get_meta_data($this->fileHandle); |
148
|
1 |
|
$filename = $meta['uri']; |
149
|
|
|
|
150
|
1 |
|
$size = filesize($filename); |
151
|
1 |
|
$blockSize = self::TEST_SAMPLE_SIZE; |
152
|
|
|
|
153
|
1 |
|
fseek($this->fileHandle, $size - $blockSize); |
154
|
|
|
|
155
|
1 |
|
return fread($this->fileHandle, $blockSize); |
156
|
|
|
} |
157
|
|
|
|
158
|
1 |
|
private static function startsWithTag($data) |
159
|
|
|
{ |
160
|
1 |
|
return '<' === substr(trim($data), 0, 1); |
161
|
|
|
} |
162
|
|
|
|
163
|
1 |
|
private static function endsWithTag($data) |
164
|
|
|
{ |
165
|
1 |
|
return '>' === substr(trim($data), -1, 1); |
166
|
|
|
} |
167
|
|
|
|
168
|
1 |
|
private static function containsTags($data) |
169
|
|
|
{ |
170
|
1 |
|
return strlen($data) !== strlen(strip_tags($data)); |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
/** |
174
|
|
|
* Loads Spreadsheet from file. |
175
|
|
|
* |
176
|
|
|
* @param string $pFilename |
177
|
|
|
* |
178
|
|
|
* @throws Exception |
179
|
|
|
* |
180
|
|
|
* @return Spreadsheet |
181
|
|
|
*/ |
182
|
1 |
|
public function load($pFilename) |
183
|
|
|
{ |
184
|
|
|
// Create new Spreadsheet |
185
|
1 |
|
$spreadsheet = new Spreadsheet(); |
186
|
|
|
|
187
|
|
|
// Load into this instance |
188
|
|
|
return $this->loadIntoExisting($pFilename, $spreadsheet); |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
/** |
192
|
|
|
* Set input encoding. |
193
|
|
|
* |
194
|
|
|
* @param string $pValue Input encoding, eg: 'ANSI' |
195
|
|
|
*/ |
196
|
|
|
public function setInputEncoding($pValue) |
197
|
|
|
{ |
198
|
|
|
$this->inputEncoding = $pValue; |
199
|
|
|
|
200
|
|
|
return $this; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* Get input encoding. |
205
|
|
|
* |
206
|
|
|
* @return string |
207
|
|
|
*/ |
208
|
|
|
public function getInputEncoding() |
209
|
|
|
{ |
210
|
|
|
return $this->inputEncoding; |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
// Data Array used for testing only, should write to Spreadsheet object on completion of tests |
214
|
|
|
protected $dataArray = []; |
215
|
|
|
protected $tableLevel = 0; |
216
|
|
|
protected $nestedColumn = ['A']; |
217
|
|
|
|
218
|
|
|
protected function setTableStartColumn($column) |
219
|
|
|
{ |
220
|
|
|
if ($this->tableLevel == 0) { |
221
|
|
|
$column = 'A'; |
222
|
|
|
} |
223
|
|
|
++$this->tableLevel; |
224
|
|
|
$this->nestedColumn[$this->tableLevel] = $column; |
225
|
|
|
|
226
|
|
|
return $this->nestedColumn[$this->tableLevel]; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
protected function getTableStartColumn() |
230
|
|
|
{ |
231
|
|
|
return $this->nestedColumn[$this->tableLevel]; |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
protected function releaseTableStartColumn() |
235
|
|
|
{ |
236
|
|
|
--$this->tableLevel; |
237
|
|
|
|
238
|
|
|
return array_pop($this->nestedColumn); |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
protected function flushCell($sheet, $column, $row, &$cellContent) |
242
|
|
|
{ |
243
|
|
|
if (is_string($cellContent)) { |
244
|
|
|
// Simple String content |
245
|
|
|
if (trim($cellContent) > '') { |
246
|
|
|
// Only actually write it if there's content in the string |
247
|
|
|
// Write to worksheet to be done here... |
248
|
|
|
// ... we return the cell so we can mess about with styles more easily |
249
|
|
|
$sheet->setCellValue($column . $row, $cellContent); |
250
|
|
|
$this->dataArray[$row][$column] = $cellContent; |
251
|
|
|
} |
252
|
|
|
} else { |
253
|
|
|
// We have a Rich Text run |
254
|
|
|
// TODO |
255
|
|
|
$this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent; |
256
|
|
|
} |
257
|
|
|
$cellContent = (string) ''; |
258
|
|
|
} |
259
|
|
|
|
260
|
|
|
/** |
261
|
|
|
* @param DOMNode $element |
262
|
|
|
* @param Worksheet $sheet |
263
|
|
|
* @param int $row |
264
|
|
|
* @param string $column |
265
|
|
|
* @param string $cellContent |
266
|
|
|
*/ |
267
|
|
|
protected function processDomElement(DOMNode $element, Worksheet $sheet, &$row, &$column, &$cellContent) |
268
|
|
|
{ |
269
|
|
|
foreach ($element->childNodes as $child) { |
270
|
|
|
if ($child instanceof DOMText) { |
271
|
|
|
$domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue)); |
272
|
|
|
if (is_string($cellContent)) { |
273
|
|
|
// simply append the text if the cell content is a plain text string |
274
|
|
|
$cellContent .= $domText; |
275
|
|
|
} |
276
|
|
|
// but if we have a rich text run instead, we need to append it correctly |
277
|
|
|
// TODO |
278
|
|
|
} elseif ($child instanceof DOMElement) { |
279
|
|
|
$attributeArray = []; |
280
|
|
|
foreach ($child->attributes as $attribute) { |
281
|
|
|
$attributeArray[$attribute->name] = $attribute->value; |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
switch ($child->nodeName) { |
285
|
|
|
case 'meta': |
286
|
|
|
foreach ($attributeArray as $attributeName => $attributeValue) { |
287
|
|
|
switch ($attributeName) { |
288
|
|
|
case 'content': |
289
|
|
|
// TODO |
290
|
|
|
// Extract character set, so we can convert to UTF-8 if required |
291
|
|
|
break; |
292
|
|
|
} |
293
|
|
|
} |
294
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
295
|
|
|
|
296
|
|
|
break; |
297
|
|
|
case 'title': |
298
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
299
|
|
|
$sheet->setTitle($cellContent, true, false); |
300
|
|
|
$cellContent = ''; |
301
|
|
|
|
302
|
|
|
break; |
303
|
|
|
case 'span': |
304
|
|
|
case 'div': |
305
|
|
|
case 'font': |
306
|
|
|
case 'i': |
307
|
|
|
case 'em': |
308
|
|
|
case 'strong': |
309
|
|
|
case 'b': |
310
|
|
|
if ($cellContent > '') { |
311
|
|
|
$cellContent .= ' '; |
312
|
|
|
} |
313
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
314
|
|
|
if ($cellContent > '') { |
315
|
|
|
$cellContent .= ' '; |
316
|
|
|
} |
317
|
|
|
|
318
|
|
|
break; |
319
|
|
|
case 'hr': |
320
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
321
|
|
|
++$row; |
322
|
|
|
if (isset($this->formats[$child->nodeName])) { |
323
|
|
|
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); |
324
|
|
|
} else { |
325
|
|
|
$cellContent = '----------'; |
326
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
327
|
|
|
} |
328
|
|
|
++$row; |
329
|
|
|
// Add a break after a horizontal rule, simply by allowing the code to dropthru |
330
|
|
|
// no break |
331
|
|
|
case 'br': |
332
|
|
|
if ($this->tableLevel > 0) { |
333
|
|
|
// If we're inside a table, replace with a \n |
334
|
|
|
$cellContent .= "\n"; |
335
|
|
|
} else { |
336
|
|
|
// Otherwise flush our existing content and move the row cursor on |
337
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
338
|
|
|
++$row; |
339
|
|
|
} |
340
|
|
|
|
341
|
|
|
break; |
342
|
|
|
case 'a': |
343
|
|
|
foreach ($attributeArray as $attributeName => $attributeValue) { |
344
|
|
|
switch ($attributeName) { |
345
|
|
|
case 'href': |
346
|
|
|
$sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue); |
347
|
|
View Code Duplication |
if (isset($this->formats[$child->nodeName])) { |
|
|
|
|
348
|
|
|
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); |
349
|
|
|
} |
350
|
|
|
|
351
|
|
|
break; |
352
|
|
|
} |
353
|
|
|
} |
354
|
|
|
$cellContent .= ' '; |
355
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
356
|
|
|
|
357
|
|
|
break; |
358
|
|
|
case 'h1': |
359
|
|
|
case 'h2': |
360
|
|
|
case 'h3': |
361
|
|
|
case 'h4': |
362
|
|
|
case 'h5': |
363
|
|
|
case 'h6': |
364
|
|
|
case 'ol': |
365
|
|
|
case 'ul': |
366
|
|
|
case 'p': |
367
|
|
|
if ($this->tableLevel > 0) { |
368
|
|
|
// If we're inside a table, replace with a \n |
369
|
|
|
$cellContent .= "\n"; |
370
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
371
|
|
|
} else { |
372
|
|
|
if ($cellContent > '') { |
373
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
374
|
|
|
++$row; |
375
|
|
|
} |
376
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
377
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
378
|
|
|
|
379
|
|
View Code Duplication |
if (isset($this->formats[$child->nodeName])) { |
|
|
|
|
380
|
|
|
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
++$row; |
384
|
|
|
$column = 'A'; |
385
|
|
|
} |
386
|
|
|
|
387
|
|
|
break; |
388
|
|
|
case 'li': |
389
|
|
|
if ($this->tableLevel > 0) { |
390
|
|
|
// If we're inside a table, replace with a \n |
391
|
|
|
$cellContent .= "\n"; |
392
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
393
|
|
|
} else { |
394
|
|
|
if ($cellContent > '') { |
395
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
396
|
|
|
} |
397
|
|
|
++$row; |
398
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
399
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
400
|
|
|
$column = 'A'; |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
break; |
404
|
|
|
case 'table': |
405
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
406
|
|
|
$column = $this->setTableStartColumn($column); |
407
|
|
|
if ($this->tableLevel > 1) { |
408
|
|
|
--$row; |
409
|
|
|
} |
410
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
411
|
|
|
$column = $this->releaseTableStartColumn(); |
412
|
|
|
if ($this->tableLevel > 1) { |
413
|
|
|
++$column; |
414
|
|
|
} else { |
415
|
|
|
++$row; |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
break; |
419
|
|
|
case 'thead': |
420
|
|
|
case 'tbody': |
421
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
422
|
|
|
|
423
|
|
|
break; |
424
|
|
|
case 'tr': |
425
|
|
|
$column = $this->getTableStartColumn(); |
426
|
|
|
$cellContent = ''; |
427
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
428
|
|
|
++$row; |
429
|
|
|
|
430
|
|
|
break; |
431
|
|
|
case 'th': |
432
|
|
|
case 'td': |
433
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
434
|
|
|
|
435
|
|
|
// apply inline style |
436
|
|
|
$this->applyInlineStyle($sheet, $row, $column, $attributeArray); |
437
|
|
|
|
438
|
|
|
while (isset($this->rowspan[$column . $row])) { |
439
|
|
|
++$column; |
440
|
|
|
} |
441
|
|
|
|
442
|
|
|
$this->flushCell($sheet, $column, $row, $cellContent); |
443
|
|
|
|
444
|
|
|
if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) { |
445
|
|
|
//create merging rowspan and colspan |
446
|
|
|
$columnTo = $column; |
447
|
|
|
for ($i = 0; $i < $attributeArray['colspan'] - 1; ++$i) { |
448
|
|
|
++$columnTo; |
449
|
|
|
} |
450
|
|
|
$range = $column . $row . ':' . $columnTo . ($row + $attributeArray['rowspan'] - 1); |
451
|
|
|
foreach (Cell::extractAllCellReferencesInRange($range) as $value) { |
452
|
|
|
$this->rowspan[$value] = true; |
453
|
|
|
} |
454
|
|
|
$sheet->mergeCells($range); |
455
|
|
|
$column = $columnTo; |
456
|
|
|
} elseif (isset($attributeArray['rowspan'])) { |
457
|
|
|
//create merging rowspan |
458
|
|
|
$range = $column . $row . ':' . $column . ($row + $attributeArray['rowspan'] - 1); |
459
|
|
|
foreach (Cell::extractAllCellReferencesInRange($range) as $value) { |
460
|
|
|
$this->rowspan[$value] = true; |
461
|
|
|
} |
462
|
|
|
$sheet->mergeCells($range); |
463
|
|
|
} elseif (isset($attributeArray['colspan'])) { |
464
|
|
|
//create merging colspan |
465
|
|
|
$columnTo = $column; |
466
|
|
|
for ($i = 0; $i < $attributeArray['colspan'] - 1; ++$i) { |
467
|
|
|
++$columnTo; |
468
|
|
|
} |
469
|
|
|
$sheet->mergeCells($column . $row . ':' . $columnTo . $row); |
470
|
|
|
$column = $columnTo; |
471
|
|
|
} elseif (isset($attributeArray['bgcolor'])) { |
472
|
|
|
$sheet->getStyle($column . $row)->applyFromArray( |
473
|
|
|
[ |
474
|
|
|
'fill' => [ |
475
|
|
|
'fillType' => Fill::FILL_SOLID, |
476
|
|
|
'color' => ['rgb' => $attributeArray['bgcolor']], |
477
|
|
|
], |
478
|
|
|
] |
479
|
|
|
); |
480
|
|
|
} |
481
|
|
|
++$column; |
482
|
|
|
|
483
|
|
|
break; |
484
|
|
|
case 'body': |
485
|
|
|
$row = 1; |
486
|
|
|
$column = 'A'; |
487
|
|
|
$content = ''; |
|
|
|
|
488
|
|
|
$this->tableLevel = 0; |
489
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
490
|
|
|
|
491
|
|
|
break; |
492
|
|
|
default: |
493
|
|
|
$this->processDomElement($child, $sheet, $row, $column, $cellContent); |
494
|
|
|
} |
495
|
|
|
} |
496
|
|
|
} |
497
|
|
|
} |
498
|
|
|
|
499
|
|
|
/** |
500
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
501
|
|
|
* |
502
|
|
|
* @param string $pFilename |
503
|
|
|
* @param Spreadsheet $spreadsheet |
504
|
|
|
* |
505
|
|
|
* @throws Exception |
506
|
|
|
* |
507
|
|
|
* @return Spreadsheet |
508
|
|
|
*/ |
509
|
|
|
public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
510
|
|
|
{ |
511
|
|
|
// Validate |
512
|
|
|
if (!$this->canRead($pFilename)) { |
513
|
|
|
throw new Exception($pFilename . ' is an Invalid HTML file.'); |
514
|
|
|
} |
515
|
|
|
|
516
|
|
|
// Create new sheet |
517
|
|
|
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
518
|
|
|
$spreadsheet->createSheet(); |
519
|
|
|
} |
520
|
|
|
$spreadsheet->setActiveSheetIndex($this->sheetIndex); |
521
|
|
|
|
522
|
|
|
// Create a new DOM object |
523
|
|
|
$dom = new DOMDocument(); |
524
|
|
|
// Reload the HTML file into the DOM object |
525
|
|
|
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); |
526
|
|
|
if ($loaded === false) { |
527
|
|
|
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); |
528
|
|
|
} |
529
|
|
|
|
530
|
|
|
// Discard white space |
531
|
|
|
$dom->preserveWhiteSpace = false; |
532
|
|
|
|
533
|
|
|
$row = 0; |
534
|
|
|
$column = 'A'; |
535
|
|
|
$content = ''; |
536
|
|
|
$this->processDomElement($dom, $spreadsheet->getActiveSheet(), $row, $column, $content); |
537
|
|
|
|
538
|
|
|
// Return |
539
|
|
|
return $spreadsheet; |
540
|
|
|
} |
541
|
|
|
|
542
|
|
|
/** |
543
|
|
|
* Get sheet index. |
544
|
|
|
* |
545
|
|
|
* @return int |
546
|
|
|
*/ |
547
|
|
|
public function getSheetIndex() |
548
|
|
|
{ |
549
|
|
|
return $this->sheetIndex; |
550
|
|
|
} |
551
|
|
|
|
552
|
|
|
/** |
553
|
|
|
* Set sheet index. |
554
|
|
|
* |
555
|
|
|
* @param int $pValue Sheet index |
556
|
|
|
* |
557
|
|
|
* @return HTML |
558
|
|
|
*/ |
559
|
|
|
public function setSheetIndex($pValue) |
560
|
|
|
{ |
561
|
|
|
$this->sheetIndex = $pValue; |
562
|
|
|
|
563
|
|
|
return $this; |
564
|
|
|
} |
565
|
|
|
|
566
|
|
|
/** |
567
|
|
|
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks. |
568
|
|
|
* |
569
|
|
|
* @param string $xml |
570
|
|
|
* |
571
|
|
|
* @throws Exception |
572
|
|
|
*/ |
573
|
|
View Code Duplication |
public function securityScan($xml) |
|
|
|
|
574
|
|
|
{ |
575
|
|
|
$pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/'; |
576
|
|
|
if (preg_match($pattern, $xml)) { |
577
|
|
|
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'); |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
return $xml; |
581
|
|
|
} |
582
|
|
|
|
583
|
|
|
/** |
584
|
|
|
* Apply inline css inline style. |
585
|
|
|
* |
586
|
|
|
* NOTES : |
587
|
|
|
* Currently only intended for td & th element, |
588
|
|
|
* and only takes 'background-color' and 'color'; property with HEX color |
589
|
|
|
* |
590
|
|
|
* TODO : |
591
|
|
|
* - Implement to other propertie, such as border |
592
|
|
|
* |
593
|
|
|
* @param Worksheet $sheet |
594
|
|
|
* @param array $attributeArray |
595
|
|
|
* @param int $row |
596
|
|
|
* @param string $column |
597
|
|
|
*/ |
598
|
|
|
private function applyInlineStyle(&$sheet, $row, $column, $attributeArray) |
599
|
|
|
{ |
600
|
|
|
if (!isset($attributeArray['style'])) { |
601
|
|
|
return; |
602
|
|
|
} |
603
|
|
|
|
604
|
|
|
$supported_styles = ['background-color', 'color']; |
605
|
|
|
|
606
|
|
|
// add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color |
607
|
|
|
$styles = explode(';', $attributeArray['style']); |
608
|
|
|
foreach ($styles as $st) { |
609
|
|
|
$value = explode(':', $st); |
610
|
|
|
|
611
|
|
|
if (empty(trim($value[0])) || !in_array(trim($value[0]), $supported_styles)) { |
612
|
|
|
continue; |
613
|
|
|
} |
614
|
|
|
|
615
|
|
|
//check if has #, so we can get clean hex |
616
|
|
|
if (substr(trim($value[1]), 0, 1) == '#') { |
617
|
|
|
$style_color = substr(trim($value[1]), 1); |
618
|
|
|
} |
619
|
|
|
|
620
|
|
|
if (empty($style_color)) { |
621
|
|
|
continue; |
622
|
|
|
} |
623
|
|
|
|
624
|
|
|
switch (trim($value[0])) { |
625
|
|
|
case 'background-color': |
626
|
|
|
$sheet->getStyle($column . $row)->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => "{$style_color}"]]]); |
627
|
|
|
|
628
|
|
|
break; |
629
|
|
|
case 'color': |
630
|
|
|
$sheet->getStyle($column . $row)->applyFromArray(['font' => ['color' => ['rgb' => "$style_color}"]]]); |
631
|
|
|
|
632
|
|
|
break; |
633
|
|
|
} |
634
|
|
|
} |
635
|
|
|
} |
636
|
|
|
} |
637
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.