Failed Conditions
Push — master ( 735103...6a4138 )
by Adrien
12:48
created

Html::processDomElementThTd()   B

Complexity

Conditions 9
Paths 8

Size

Total Lines 49
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 9.1317

Importance

Changes 0
Metric Value
cc 9
eloc 32
nc 8
nop 6
dl 0
loc 49
ccs 15
cts 17
cp 0.8824
crap 9.1317
rs 8.0555
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use DOMDocument;
6
use DOMElement;
7
use DOMNode;
8
use DOMText;
9
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
10
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
11
use PhpOffice\PhpSpreadsheet\Spreadsheet;
12
use PhpOffice\PhpSpreadsheet\Style\Border;
13
use PhpOffice\PhpSpreadsheet\Style\Color;
14
use PhpOffice\PhpSpreadsheet\Style\Fill;
15
use PhpOffice\PhpSpreadsheet\Style\Font;
16
use PhpOffice\PhpSpreadsheet\Style\Style;
17
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
18
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
19
use Throwable;
20
21
/** PhpSpreadsheet root directory */
22
class Html extends BaseReader
23
{
24
    /**
25
     * Sample size to read to determine if it's HTML or not.
26
     */
27
    const TEST_SAMPLE_SIZE = 2048;
28
29
    /**
30
     * Input encoding.
31
     *
32
     * @var string
33
     */
34
    protected $inputEncoding = 'ANSI';
35
36
    /**
37
     * Sheet index to read.
38
     *
39
     * @var int
40
     */
41
    protected $sheetIndex = 0;
42
43
    /**
44
     * Formats.
45
     *
46
     * @var array
47
     */
48
    protected $formats = [
49
        'h1' => [
50
            'font' => [
51
                'bold' => true,
52
                'size' => 24,
53
            ],
54
        ], //    Bold, 24pt
55
        'h2' => [
56
            'font' => [
57
                'bold' => true,
58
                'size' => 18,
59
            ],
60
        ], //    Bold, 18pt
61
        'h3' => [
62
            'font' => [
63
                'bold' => true,
64
                'size' => 13.5,
65
            ],
66
        ], //    Bold, 13.5pt
67
        'h4' => [
68
            'font' => [
69
                'bold' => true,
70
                'size' => 12,
71
            ],
72
        ], //    Bold, 12pt
73
        'h5' => [
74
            'font' => [
75
                'bold' => true,
76
                'size' => 10,
77
            ],
78
        ], //    Bold, 10pt
79
        'h6' => [
80
            'font' => [
81
                'bold' => true,
82
                'size' => 7.5,
83
            ],
84
        ], //    Bold, 7.5pt
85
        'a' => [
86
            'font' => [
87
                'underline' => true,
88
                'color' => [
89
                    'argb' => Color::COLOR_BLUE,
90
                ],
91
            ],
92
        ], //    Blue underlined
93
        'hr' => [
94
            'borders' => [
95
                'bottom' => [
96
                    'borderStyle' => Border::BORDER_THIN,
97
                    'color' => [
98
                        Color::COLOR_BLACK,
99
                    ],
100
                ],
101
            ],
102
        ], //    Bottom border
103
        'strong' => [
104
            'font' => [
105
                'bold' => true,
106
            ],
107
        ], //    Bold
108
        'b' => [
109
            'font' => [
110
                'bold' => true,
111
            ],
112
        ], //    Bold
113
        'i' => [
114
            'font' => [
115
                'italic' => true,
116
            ],
117
        ], //    Italic
118
        'em' => [
119
            'font' => [
120
                'italic' => true,
121
            ],
122
        ], //    Italic
123
    ];
124
125
    protected $rowspan = [];
126
127
    /**
128
     * Create a new HTML Reader instance.
129 32
     */
130
    public function __construct()
131 32
    {
132 32
        parent::__construct();
133 32
        $this->securityScanner = XmlScanner::getInstance($this);
134
    }
135
136
    /**
137
     * Validate that the current file is an HTML file.
138
     *
139
     * @param string $pFilename
140
     *
141
     * @return bool
142 28
     */
143
    public function canRead($pFilename)
144
    {
145
        // Check if file exists
146 28
        try {
147
            $this->openFile($pFilename);
148
        } catch (Exception $e) {
149
            return false;
150
        }
151 28
152 28
        $beginning = $this->readBeginning();
153 28
        $startWithTag = self::startsWithTag($beginning);
154 28
        $containsTags = self::containsTags($beginning);
155
        $endsWithTag = self::endsWithTag($this->readEnding());
156 28
157
        fclose($this->fileHandle);
158 28
159
        return $startWithTag && $containsTags && $endsWithTag;
160
    }
161 28
162
    private function readBeginning()
163 28
    {
164
        fseek($this->fileHandle, 0);
165 28
166
        return fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
167
    }
168 28
169
    private function readEnding()
170 28
    {
171 28
        $meta = stream_get_meta_data($this->fileHandle);
172
        $filename = $meta['uri'];
173 28
174 28
        $size = filesize($filename);
175 1
        if ($size === 0) {
176
            return '';
177
        }
178 27
179 27
        $blockSize = self::TEST_SAMPLE_SIZE;
180 13
        if ($size < $blockSize) {
181
            $blockSize = $size;
182
        }
183 27
184
        fseek($this->fileHandle, $size - $blockSize);
185 27
186
        return fread($this->fileHandle, $blockSize);
187
    }
188 28
189
    private static function startsWithTag($data)
190 28
    {
191
        return '<' === substr(trim($data), 0, 1);
192
    }
193 28
194
    private static function endsWithTag($data)
195 28
    {
196
        return '>' === substr(trim($data), -1, 1);
197
    }
198 28
199
    private static function containsTags($data)
200 28
    {
201
        return strlen($data) !== strlen(strip_tags($data));
202
    }
203
204
    /**
205
     * Loads Spreadsheet from file.
206
     *
207
     * @param string $pFilename
208
     *
209
     * @return Spreadsheet
210 21
     */
211
    public function load($pFilename)
212
    {
213 21
        // Create new Spreadsheet
214
        $spreadsheet = new Spreadsheet();
215
216 21
        // Load into this instance
217
        return $this->loadIntoExisting($pFilename, $spreadsheet);
218
    }
219
220
    /**
221
     * Set input encoding.
222
     *
223
     * @deprecated no use is made of this property
224
     *
225
     * @param string $pValue Input encoding, eg: 'ANSI'
226 2
     *
227
     * @return $this
228 2
     *
229
     * @codeCoverageIgnore
230 2
     */
231
    public function setInputEncoding($pValue)
232
    {
233
        $this->inputEncoding = $pValue;
234
235
        return $this;
236
    }
237
238
    /**
239
     * Get input encoding.
240
     *
241
     * @deprecated no use is made of this property
242
     *
243
     * @return string
244
     *
245
     * @codeCoverageIgnore
246
     */
247
    public function getInputEncoding()
248
    {
249
        return $this->inputEncoding;
250 23
    }
251
252 23
    //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
253 23
    protected $dataArray = [];
254
255 23
    protected $tableLevel = 0;
256 23
257
    protected $nestedColumn = ['A'];
258 23
259
    protected function setTableStartColumn($column)
260
    {
261 23
        if ($this->tableLevel == 0) {
262
            $column = 'A';
263 23
        }
264
        ++$this->tableLevel;
265
        $this->nestedColumn[$this->tableLevel] = $column;
266 23
267
        return $this->nestedColumn[$this->tableLevel];
268 23
    }
269
270 23
    protected function getTableStartColumn()
271
    {
272
        return $this->nestedColumn[$this->tableLevel];
273 23
    }
274
275 23
    protected function releaseTableStartColumn()
276
    {
277 23
        --$this->tableLevel;
278
279
        return array_pop($this->nestedColumn);
280
    }
281 22
282 23
    protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent): void
283
    {
284
        if (is_string($cellContent)) {
285
            //    Simple String content
286
            if (trim($cellContent) > '') {
287
                //    Only actually write it if there's content in the string
288
                //    Write to worksheet to be done here...
289 23
                //    ... we return the cell so we can mess about with styles more easily
290 23
                $sheet->setCellValue($column . $row, $cellContent);
291
                $this->dataArray[$row][$column] = $cellContent;
292
            }
293
        } else {
294
            //    We have a Rich Text run
295
            //    TODO
296
            $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
297
        }
298
        $cellContent = (string) '';
299 23
    }
300
301 23
    private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
302 23
    {
303 23
        $attributeArray = [];
304 23
        foreach ($child->attributes as $attribute) {
305
            $attributeArray[$attribute->name] = $attribute->value;
306 23
        }
307
308
        if ($child->nodeName === 'body') {
309
            $row = 1;
310 23
            $column = 'A';
311 23
            $cellContent = '';
312 23
            $this->tableLevel = 0;
313 20
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
314
        } else {
315
            $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
316 23
        }
317 23
    }
318 10
319
    private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
320 10
    {
321 2
        if ($child->nodeName === 'title') {
322
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
323
            $sheet->setTitle($cellContent, true, false);
324 10
            $cellContent = '';
325
        } else {
326 10
            $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
327 23
        }
328 10
    }
329 10
330 10
    private static $spanEtc = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
331
332 10
    private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
333 23
    {
334 23
        if (in_array($child->nodeName, self::$spanEtc)) {
335 23
            if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
336 23
                $sheet->getComment($column . $row)
337 23
                    ->getText()
338 23
                    ->createTextRun($child->textContent);
339 23
            }
340 6
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
341 6
342 6
            if (isset($this->formats[$child->nodeName])) {
343 6
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
344
            }
345 6
        } else {
346
            $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
347
        }
348
    }
349
350
    private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
351
    {
352
        if ($child->nodeName === 'hr') {
353
            $this->flushCell($sheet, $column, $row, $cellContent);
354
            ++$row;
355
            if (isset($this->formats[$child->nodeName])) {
356
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
357
            }
358
            ++$row;
359
        }
360
        // fall through to br
361 23
        $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
362
    }
363
364
    private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
365
    {
366
        if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
367
            if ($this->tableLevel > 0) {
368
                //    If we're inside a table, replace with a \n and set the cell to wrap
369
                $cellContent .= "\n";
370
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
371
            } else {
372
                //    Otherwise flush our existing content and move the row cursor on
373 23
                $this->flushCell($sheet, $column, $row, $cellContent);
374 3
                ++$row;
375
            }
376 3
        } else {
377 3
            $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
378
        }
379
    }
380
381
    private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
382
    {
383
        if ($child->nodeName === 'a') {
384 3
            foreach ($attributeArray as $attributeName => $attributeValue) {
385 23
                switch ($attributeName) {
386 6
                    case 'href':
387
                        $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
388 6
                        if (isset($this->formats[$child->nodeName])) {
389
                            $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
390
                        }
391
392
                        break;
393
                    case 'class':
394
                        if ($attributeValue === 'comment-indicator') {
395 6
                            break; // Ignore - it's just a red square.
396 6
                        }
397 6
                }
398
            }
399
            // no idea why this should be needed
400
            //$cellContent .= ' ';
401 6
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
402 6
        } else {
403
            $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
404 6
        }
405 23
    }
406 23
407 23
    private static $h1Etc = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
408 23
409 23
    private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
410 23
    {
411 23
        if (in_array($child->nodeName, self::$h1Etc)) {
412 23
            if ($this->tableLevel > 0) {
413 23
                //    If we're inside a table, replace with a \n
414
                $cellContent .= $cellContent ? "\n" : '';
415
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
416
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
417
            } else {
418
                if ($cellContent > '') {
419
                    $this->flushCell($sheet, $column, $row, $cellContent);
420
                    ++$row;
421
                }
422
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
423
                $this->flushCell($sheet, $column, $row, $cellContent);
424
425
                if (isset($this->formats[$child->nodeName])) {
426
                    $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
427
                }
428
429
                ++$row;
430
                $column = 'A';
431
            }
432
        } else {
433
            $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
434
        }
435 23
    }
436
437
    private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
438
    {
439
        if ($child->nodeName === 'li') {
440
            if ($this->tableLevel > 0) {
441
                //    If we're inside a table, replace with a \n
442
                $cellContent .= $cellContent ? "\n" : '';
443
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
444
            } else {
445
                if ($cellContent > '') {
446
                    $this->flushCell($sheet, $column, $row, $cellContent);
447
                }
448
                ++$row;
449
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
450
                $this->flushCell($sheet, $column, $row, $cellContent);
451 23
                $column = 'A';
452 1
            }
453
        } else {
454 1
            $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
455 23
        }
456 23
    }
457 23
458 23
    private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
459
    {
460
        if ($child->nodeName === 'img') {
461 23
            $this->insertImage($sheet, $column, $row, $attributeArray);
462 23
        } else {
463 23
            $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
464
        }
465
    }
466 23
467
    private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
468
    {
469 23
        if ($child->nodeName === 'table') {
470 23
            $this->flushCell($sheet, $column, $row, $cellContent);
471 23
            $column = $this->setTableStartColumn($column);
472 9
            if ($this->tableLevel > 1) {
473
                --$row;
474 9
            }
475 23
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
476 23
            $column = $this->releaseTableStartColumn();
477 23
            if ($this->tableLevel > 1) {
478 23
                ++$column;
479
            } else {
480 23
                ++$row;
481
            }
482
        } else {
483
            $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
484 23
        }
485
    }
486 23
487 23
    private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
488 23
    {
489 23
        if ($child->nodeName === 'tr') {
490
            $column = $this->getTableStartColumn();
491 23
            $cellContent = '';
492 1
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
493
494
            if (isset($attributeArray['height'])) {
495
                $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
496 23
            }
497
498 23
            ++$row;
499
        } else {
500 23
            $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
501
        }
502
    }
503
504
    private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
505
    {
506
        if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
507
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
508
        } else {
509
            $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
510
        }
511
    }
512 23
513
    private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
514 1
    {
515 1
        if (isset($attributeArray['bgcolor'])) {
516 1
            $sheet->getStyle("$column$row")->applyFromArray(
517
                [
518 1
                    'fill' => [
519 23
                        'fillType' => Fill::FILL_SOLID,
520
                        'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
521 2
                    ],
522 2
                ]
523 2
            );
524
        }
525 2
    }
526 2
527 23
    private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
528
    {
529
        if (isset($attributeArray['width'])) {
530
            $sheet->getColumnDimension($column)->setWidth($attributeArray['width']);
531
        }
532
    }
533
534
    private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
535
    {
536
        if (isset($attributeArray['height'])) {
537
            $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
538 23
        }
539 1
    }
540
541
    private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
542 23
    {
543 1
        if (isset($attributeArray['align'])) {
544
            $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
545
        }
546 23
    }
547 1
548
    private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
549
    {
550 23
        if (isset($attributeArray['valign'])) {
551 1
            $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
552
        }
553
    }
554 23
555 1
    private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
556
    {
557
        if (isset($attributeArray['data-format'])) {
558 23
            $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
559
        }
560 23
    }
561 23
562 23
    private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
563 23
    {
564 23
        while (isset($this->rowspan[$column . $row])) {
565 23
            ++$column;
566 23
        }
567
        $this->processDomElement($child, $sheet, $row, $column, $cellContent);
568 23
569
        // apply inline style
570 23
        $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
571
572
        $this->flushCell($sheet, $column, $row, $cellContent);
573
574 23
        $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
575
        $this->processDomElementWidth($sheet, $column, $attributeArray);
576
        $this->processDomElementHeight($sheet, $row, $attributeArray);
577
        $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
578
        $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
579
        $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
580
581
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
582
            //create merging rowspan and colspan
583
            $columnTo = $column;
584 21
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
585
                ++$columnTo;
586
            }
587 21
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
588
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
589
                $this->rowspan[$value] = true;
590
            }
591
            $sheet->mergeCells($range);
592 21
            $column = $columnTo;
593
        } elseif (isset($attributeArray['rowspan'])) {
594 21
            //create merging rowspan
595 21
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
596
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
597
                $this->rowspan[$value] = true;
598
            }
599 21
            $sheet->mergeCells($range);
600
        } elseif (isset($attributeArray['colspan'])) {
601
            //create merging colspan
602
            $columnTo = $column;
603
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
604
                ++$columnTo;
605
            }
606
            $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
607
            $column = $columnTo;
608
        }
609
610 2
        ++$column;
611
    }
612
613 2
    protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
614
    {
615 2
        foreach ($element->childNodes as $child) {
616 2
            if ($child instanceof DOMText) {
617
                $domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue));
618
                if (is_string($cellContent)) {
619
                    //    simply append the text if the cell content is a plain text string
620 2
                    $cellContent .= $domText;
621
                }
622
                //    but if we have a rich text run instead, we need to append it correctly
623
                    //    TODO
624
            } elseif ($child instanceof DOMElement) {
625
                $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
626
            }
627
        }
628
    }
629
630
    /**
631 23
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
632
     *
633 23
     * @param string $pFilename
634 1
     *
635
     * @return Spreadsheet
636 23
     */
637
    public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
638
    {
639 23
        // Validate
640
        if (!$this->canRead($pFilename)) {
641 23
            throw new Exception($pFilename . ' is an Invalid HTML file.');
642 23
        }
643 23
644 23
        // Create a new DOM object
645 23
        $dom = new DOMDocument();
646
        // Reload the HTML file into the DOM object
647
        try {
648 23
            $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
649
        } catch (Throwable $e) {
650
            $loaded = false;
651
        }
652
        if ($loaded === false) {
653
            throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
654
        }
655
656
        return $this->loadDocument($dom, $spreadsheet);
657
    }
658
659
    /**
660
     * Spreadsheet from content.
661
     *
662
     * @param string $content
663
     */
664
    public function loadFromString($content, ?Spreadsheet $spreadsheet = null): Spreadsheet
665
    {
666
        //    Create a new DOM object
667
        $dom = new DOMDocument();
668 1
        //    Reload the HTML file into the DOM object
669
        try {
670 1
            $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8'));
671
        } catch (Throwable $e) {
672 1
            $loaded = false;
673
        }
674
        if ($loaded === false) {
675
            throw new Exception('Failed to load content as a DOM Document');
676
        }
677
678
        return $this->loadDocument($dom, $spreadsheet ?? new Spreadsheet());
679
    }
680
681
    /**
682
     * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
683
     */
684
    private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
685
    {
686
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
687
            $spreadsheet->createSheet();
688
        }
689
        $spreadsheet->setActiveSheetIndex($this->sheetIndex);
690 23
691
        // Discard white space
692 23
        $document->preserveWhiteSpace = false;
693 20
694
        $row = 0;
695
        $column = 'A';
696 9
        $content = '';
697
        $this->rowspan = [];
698
        $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
699 9
700 9
        // Return
701 9
        return $spreadsheet;
702 9
    }
703 9
704
    /**
705 9
     * Get sheet index.
706 6
     *
707
     * @return int
708
     */
709
    public function getSheetIndex()
710 9
    {
711 9
        return $this->sheetIndex;
712 3
    }
713
714 3
    /**
715
     * Set sheet index.
716
     *
717
     * @param int $pValue Sheet index
718 3
     *
719
     * @return $this
720 3
     */
721 9
    public function setSheetIndex($pValue)
722 3
    {
723
        $this->sheetIndex = $pValue;
724 3
725
        return $this;
726
    }
727
728 3
    /**
729
     * Apply inline css inline style.
730 3
     *
731
     * NOTES :
732 6
     * Currently only intended for td & th element,
733 1
     * and only takes 'background-color' and 'color'; property with HEX color
734
     *
735 1
     * TODO :
736
     * - Implement to other propertie, such as border
737 6
     *
738 1
     * @param Worksheet $sheet
739
     * @param int $row
740 1
     * @param string $column
741
     * @param array $attributeArray
742 6
     */
743 1
    private function applyInlineStyle(&$sheet, $row, $column, $attributeArray): void
744
    {
745 1
        if (!isset($attributeArray['style'])) {
746
            return;
747 6
        }
748 1
749
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
750 1
            $columnTo = $column;
751
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
752 6
                ++$columnTo;
753 1
            }
754
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
755 1
            $cellStyle = $sheet->getStyle($range);
756
        } elseif (isset($attributeArray['rowspan'])) {
757 5
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
758 1
            $cellStyle = $sheet->getStyle($range);
759 1
        } elseif (isset($attributeArray['colspan'])) {
760
            $columnTo = $column;
761
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
762 1
                ++$columnTo;
763
            }
764 5
            $range = $column . $row . ':' . $columnTo . $row;
765 1
            $cellStyle = $sheet->getStyle($range);
766 1
        } else {
767
            $cellStyle = $sheet->getStyle($column . $row);
768
        }
769 1
770
        // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
771 5
        $styles = explode(';', $attributeArray['style']);
772 1
        foreach ($styles as $st) {
773 1
            $value = explode(':', $st);
774
            $styleName = isset($value[0]) ? trim($value[0]) : null;
775
            $styleValue = isset($value[1]) ? trim($value[1]) : null;
776 1
777
            if (!$styleName) {
778 5
                continue;
779 1
            }
780
781 1
            switch ($styleName) {
782
                case 'background':
783 5
                case 'background-color':
784
                    $styleColor = $this->getStyleColor($styleValue);
785 1
786 1
                    if (!$styleColor) {
787
                        continue 2;
788 1
                    }
789 1
790 1
                    $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
791
792 1
                    break;
793
                case 'color':
794
                    $styleColor = $this->getStyleColor($styleValue);
795 1
796
                    if (!$styleColor) {
797 4
                        continue 2;
798 1
                    }
799
800 1
                    $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
801
802 4
                    break;
803 2
804
                case 'border':
805 2
                    $this->setBorderStyle($cellStyle, $styleValue, 'allBorders');
806
807 4
                    break;
808 1
809 1
                case 'border-top':
810
                    $this->setBorderStyle($cellStyle, $styleValue, 'top');
811
812 1
                    break;
813
814 3
                case 'border-bottom':
815 1
                    $this->setBorderStyle($cellStyle, $styleValue, 'bottom');
816 1
817
                    break;
818
819 1
                case 'border-left':
820
                    $this->setBorderStyle($cellStyle, $styleValue, 'left');
821 2
822 1
                    break;
823 1
824
                case 'border-right':
825
                    $this->setBorderStyle($cellStyle, $styleValue, 'right');
826 1
827
                    break;
828 2
829 2
                case 'font-size':
830 2
                    $cellStyle->getFont()->setSize(
831
                        (float) $styleValue
832
                    );
833 2
834
                    break;
835
836 9
                case 'font-weight':
837
                    if ($styleValue === 'bold' || $styleValue >= 500) {
838
                        $cellStyle->getFont()->setBold(true);
839
                    }
840
841
                    break;
842
843
                case 'font-style':
844
                    if ($styleValue === 'italic') {
845 4
                        $cellStyle->getFont()->setItalic(true);
846
                    }
847 4
848 4
                    break;
849
850
                case 'font-family':
851
                    $cellStyle->getFont()->setName(str_replace('\'', '', $styleValue));
852
853
                    break;
854
855
                case 'text-decoration':
856
                    switch ($styleValue) {
857
                        case 'underline':
858
                            $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
859
860 1
                            break;
861
                        case 'line-through':
862 1
                            $cellStyle->getFont()->setStrikethrough(true);
863
864
                            break;
865
                    }
866 1
867 1
                    break;
868 1
869 1
                case 'text-align':
870
                    $cellStyle->getAlignment()->setHorizontal($styleValue);
871 1
872 1
                    break;
873 1
874 1
                case 'vertical-align':
875 1
                    $cellStyle->getAlignment()->setVertical($styleValue);
876 1
877 1
                    break;
878
879 1
                case 'width':
880
                    $sheet->getColumnDimension($column)->setWidth(
881
                        str_replace('px', '', $styleValue)
882
                    );
883 1
884
                    break;
885
886
                case 'height':
887 1
                    $sheet->getRowDimension($row)->setRowHeight(
888
                        str_replace('px', '', $styleValue)
889
                    );
890
891 1
                    break;
892 1
893
                case 'word-wrap':
894
                    $cellStyle->getAlignment()->setWrapText(
895 1
                        $styleValue === 'break-word'
896 1
                    );
897
898 1
                    break;
899
900
                case 'text-indent':
901
                    $cellStyle->getAlignment()->setIndent(
902
                        (int) str_replace(['px'], '', $styleValue)
903
                    );
904
905
                    break;
906
            }
907 1
        }
908
    }
909
910 1
    /**
911 1
     * Check if has #, so we can get clean hex.
912
     *
913
     * @param $value
914
     *
915
     * @return null|string
916
     */
917
    public function getStyleColor($value)
918
    {
919
        if (strpos($value, '#') === 0) {
920
            return substr($value, 1);
921
        }
922
923
        return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup((string) $value);
924
    }
925
926
    /**
927
     * @param string    $column
928
     * @param int       $row
929
     */
930
    private function insertImage(Worksheet $sheet, $column, $row, array $attributes): void
931
    {
932
        if (!isset($attributes['src'])) {
933
            return;
934
        }
935
936
        $src = urldecode($attributes['src']);
937
        $width = isset($attributes['width']) ? (float) $attributes['width'] : null;
938
        $height = isset($attributes['height']) ? (float) $attributes['height'] : null;
939
        $name = $attributes['alt'] ?? null;
940
941
        $drawing = new Drawing();
942
        $drawing->setPath($src);
943
        $drawing->setWorksheet($sheet);
944
        $drawing->setCoordinates($column . $row);
945
        $drawing->setOffsetX(0);
946
        $drawing->setOffsetY(10);
947
        $drawing->setResizeProportional(true);
948 1
949
        if ($name) {
950 1
            $drawing->setName($name);
951
        }
952 1
953
        if ($width) {
954
            $drawing->setWidth((int) $width);
955 1
        }
956 1
957
        if ($height) {
958
            $drawing->setHeight((int) $height);
959
        }
960 1
961
        $sheet->getColumnDimension($column)->setWidth(
962
            $drawing->getWidth() / 6
963
        );
964
965
        $sheet->getRowDimension($row)->setRowHeight(
966
            $drawing->getHeight() * 0.9
967
        );
968
    }
969
970
    private static $borderMappings = [
971
        'dash-dot' => Border::BORDER_DASHDOT,
972
        'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
973
        'dashed' => Border::BORDER_DASHED,
974
        'dotted' => Border::BORDER_DOTTED,
975
        'double' => Border::BORDER_DOUBLE,
976
        'hair' => Border::BORDER_HAIR,
977
        'medium' => Border::BORDER_MEDIUM,
978
        'medium-dashed' => Border::BORDER_MEDIUMDASHED,
979
        'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
980
        'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
981
        'none' => Border::BORDER_NONE,
982
        'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
983
        'solid' => Border::BORDER_THIN,
984
        'thick' => Border::BORDER_THICK,
985
    ];
986
987
    public static function getBorderMappings(): array
988
    {
989
        return self::$borderMappings;
990
    }
991
992
    /**
993
     * Map html border style to PhpSpreadsheet border style.
994
     *
995
     * @param  string $style
996
     *
997
     * @return null|string
998
     */
999
    public function getBorderStyle($style)
1000
    {
1001
        return (array_key_exists($style, self::$borderMappings)) ? self::$borderMappings[$style] : null;
1002
    }
1003
1004
    /**
1005
     * @param string $styleValue
1006
     * @param string $type
1007
     */
1008
    private function setBorderStyle(Style $cellStyle, $styleValue, $type): void
1009
    {
1010
        if (trim($styleValue) === Border::BORDER_NONE) {
1011
            $borderStyle = Border::BORDER_NONE;
1012
            $color = null;
1013
        } else {
1014
            [, $borderStyle, $color] = explode(' ', $styleValue);
1015
        }
1016
1017
        $cellStyle->applyFromArray([
1018
            'borders' => [
1019
                $type => [
1020
                    'borderStyle' => $this->getBorderStyle($borderStyle),
1021
                    'color' => ['rgb' => $this->getStyleColor($color)],
1022
                ],
1023
            ],
1024
        ]);
1025
    }
1026
}
1027