Passed
Pull Request — master (#4142)
by Owen
16:30 queued 03:39
created

Html::getStyleArray()   B

Complexity

Conditions 8
Paths 2

Size

Total Lines 29
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 8.0747

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 20
c 1
b 0
f 0
dl 0
loc 29
ccs 17
cts 19
cp 0.8947
rs 8.4444
cc 8
nc 2
nop 1
crap 8.0747
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use DOMAttr;
6
use DOMDocument;
7
use DOMElement;
8
use DOMNode;
9
use DOMText;
10
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
11
use PhpOffice\PhpSpreadsheet\Cell\DataType;
12
use PhpOffice\PhpSpreadsheet\Comment;
13
use PhpOffice\PhpSpreadsheet\Document\Properties;
14
use PhpOffice\PhpSpreadsheet\Exception as SpreadsheetException;
15
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
16
use PhpOffice\PhpSpreadsheet\Helper\Html as HelperHtml;
17
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
18
use PhpOffice\PhpSpreadsheet\Spreadsheet;
19
use PhpOffice\PhpSpreadsheet\Style\Border;
20
use PhpOffice\PhpSpreadsheet\Style\Color;
21
use PhpOffice\PhpSpreadsheet\Style\Fill;
22
use PhpOffice\PhpSpreadsheet\Style\Font;
23
use PhpOffice\PhpSpreadsheet\Style\Style;
24
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
25
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
26
use Throwable;
27
28
class Html extends BaseReader
29
{
30
    /**
31
     * Sample size to read to determine if it's HTML or not.
32
     */
33
    const TEST_SAMPLE_SIZE = 2048;
34
35
    private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/';
36
37
    private const DECLARES_CHARSET = '/ charset=/i';
38
39
    /**
40
     * Input encoding.
41
     */
42
    protected string $inputEncoding = 'ANSI';
43
44
    /**
45
     * Sheet index to read.
46
     */
47
    protected int $sheetIndex = 0;
48
49
    /**
50
     * Formats.
51
     */
52
    protected array $formats = [
53
        'h1' => [
54
            'font' => [
55
                'bold' => true,
56
                'size' => 24,
57
            ],
58
        ], //    Bold, 24pt
59
        'h2' => [
60
            'font' => [
61
                'bold' => true,
62
                'size' => 18,
63
            ],
64
        ], //    Bold, 18pt
65
        'h3' => [
66
            'font' => [
67
                'bold' => true,
68
                'size' => 13.5,
69
            ],
70
        ], //    Bold, 13.5pt
71
        'h4' => [
72
            'font' => [
73
                'bold' => true,
74
                'size' => 12,
75
            ],
76
        ], //    Bold, 12pt
77
        'h5' => [
78
            'font' => [
79
                'bold' => true,
80
                'size' => 10,
81
            ],
82
        ], //    Bold, 10pt
83
        'h6' => [
84
            'font' => [
85
                'bold' => true,
86
                'size' => 7.5,
87
            ],
88
        ], //    Bold, 7.5pt
89
        'a' => [
90
            'font' => [
91
                'underline' => true,
92
                'color' => [
93
                    'argb' => Color::COLOR_BLUE,
94
                ],
95
            ],
96
        ], //    Blue underlined
97
        'hr' => [
98
            'borders' => [
99
                'bottom' => [
100
                    'borderStyle' => Border::BORDER_THIN,
101
                    'color' => [
102
                        Color::COLOR_BLACK,
103
                    ],
104
                ],
105
            ],
106
        ], //    Bottom border
107
        'strong' => [
108
            'font' => [
109
                'bold' => true,
110
            ],
111
        ], //    Bold
112
        'b' => [
113
            'font' => [
114
                'bold' => true,
115
            ],
116
        ], //    Bold
117
        'i' => [
118
            'font' => [
119
                'italic' => true,
120
            ],
121
        ], //    Italic
122
        'em' => [
123
            'font' => [
124
                'italic' => true,
125
            ],
126
        ], //    Italic
127
    ];
128
129
    protected array $rowspan = [];
130
131
    /**
132
     * Create a new HTML Reader instance.
133
     */
134 501
    public function __construct()
135
    {
136 501
        parent::__construct();
137 501
        $this->securityScanner = XmlScanner::getInstance($this);
138
    }
139
140
    /**
141
     * Validate that the current file is an HTML file.
142
     */
143 482
    public function canRead(string $filename): bool
144
    {
145
        // Check if file exists
146
        try {
147 482
            $this->openFile($filename);
148 1
        } catch (Exception) {
149 1
            return false;
150
        }
151
152 481
        $beginning = preg_replace(self::STARTS_WITH_BOM, '', $this->readBeginning()) ?? '';
153
154 481
        $startWithTag = self::startsWithTag($beginning);
155 481
        $containsTags = self::containsTags($beginning);
156 481
        $endsWithTag = self::endsWithTag($this->readEnding());
157
158 481
        fclose($this->fileHandle);
159
160 481
        return $startWithTag && $containsTags && $endsWithTag;
161
    }
162
163 481
    private function readBeginning(): string
164
    {
165 481
        fseek($this->fileHandle, 0);
166
167 481
        return (string) fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
168
    }
169
170 481
    private function readEnding(): string
171
    {
172 481
        $meta = stream_get_meta_data($this->fileHandle);
173
        // Phpstan incorrectly flags following line for Php8.2-, corrected in 8.3
174 481
        $filename = $meta['uri']; //@phpstan-ignore-line
175
176 481
        $size = (int) filesize($filename);
177 481
        if ($size === 0) {
178 1
            return '';
179
        }
180
181 480
        $blockSize = self::TEST_SAMPLE_SIZE;
182 480
        if ($size < $blockSize) {
183 43
            $blockSize = $size;
184
        }
185
186 480
        fseek($this->fileHandle, $size - $blockSize);
187
188 480
        return (string) fread($this->fileHandle, $blockSize);
189
    }
190
191 481
    private static function startsWithTag(string $data): bool
192
    {
193 481
        return str_starts_with(trim($data), '<');
194
    }
195
196 481
    private static function endsWithTag(string $data): bool
197
    {
198 481
        return str_ends_with(trim($data), '>');
199
    }
200
201 481
    private static function containsTags(string $data): bool
202
    {
203 481
        return strlen($data) !== strlen(strip_tags($data));
204
    }
205
206
    /**
207
     * Loads Spreadsheet from file.
208
     */
209 463
    public function loadSpreadsheetFromFile(string $filename): Spreadsheet
210
    {
211
        // Create new Spreadsheet
212 463
        $spreadsheet = new Spreadsheet();
213
214
        // Load into this instance
215 463
        return $this->loadIntoExisting($filename, $spreadsheet);
216
    }
217
218
    //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
219
220
    protected array $dataArray = [];
221
222
    protected int $tableLevel = 0;
223
224
    protected array $nestedColumn = ['A'];
225
226 477
    protected function setTableStartColumn(string $column): string
227
    {
228 477
        if ($this->tableLevel == 0) {
229 477
            $column = 'A';
230
        }
231 477
        ++$this->tableLevel;
232 477
        $this->nestedColumn[$this->tableLevel] = $column;
233
234 477
        return $this->nestedColumn[$this->tableLevel];
235
    }
236
237 473
    protected function getTableStartColumn(): string
238
    {
239 473
        return $this->nestedColumn[$this->tableLevel];
240
    }
241
242 476
    protected function releaseTableStartColumn(): string
243
    {
244 476
        --$this->tableLevel;
245
246 476
        return array_pop($this->nestedColumn);
247
    }
248
249
    /**
250
     * Flush cell.
251
     */
252 478
    protected function flushCell(Worksheet $sheet, string $column, int|string $row, mixed &$cellContent, array $attributeArray): void
253
    {
254 478
        if (is_string($cellContent)) {
255
            //    Simple String content
256 478
            if (trim($cellContent) > '') {
257
                //    Only actually write it if there's content in the string
258
                //    Write to worksheet to be done here...
259
                //    ... we return the cell, so we can mess about with styles more easily
260
261
                // Set cell value explicitly if there is data-type attribute
262 464
                if (isset($attributeArray['data-type'])) {
263 1
                    $datatype = $attributeArray['data-type'];
264 1
                    if (in_array($datatype, [DataType::TYPE_STRING, DataType::TYPE_STRING2, DataType::TYPE_INLINE])) {
265
                        //Prevent to Excel treat string with beginning equal sign or convert big numbers to scientific number
266 1
                        if (str_starts_with($cellContent, '=')) {
267 1
                            $sheet->getCell($column . $row)
268 1
                                ->getStyle()
269 1
                                ->setQuotePrefix(true);
270
                        }
271
                    }
272
273
                    //catching the Exception and ignoring the invalid data types
274
                    try {
275 1
                        $sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
276 1
                    } catch (SpreadsheetException) {
277 1
                        $sheet->setCellValue($column . $row, $cellContent);
278
                    }
279
                } else {
280 463
                    $sheet->setCellValue($column . $row, $cellContent);
281
                }
282 464
                $this->dataArray[$row][$column] = $cellContent;
283
            }
284
        } else {
285
            //    We have a Rich Text run
286
            //    TODO
287
            $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
288
        }
289 478
        $cellContent = (string) '';
290
    }
291
292 478
    private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
293
    {
294 478
        $attributeArray = [];
295
        /** @var DOMAttr $attribute */
296 478
        foreach ($child->attributes as $attribute) {
297 465
            $attributeArray[$attribute->name] = $attribute->value;
298
        }
299
300 478
        if ($child->nodeName === 'body') {
301 478
            $row = 1;
302 478
            $column = 'A';
303 478
            $cellContent = '';
304 478
            $this->tableLevel = 0;
305 478
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
306
        } else {
307 478
            $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
308
        }
309
    }
310
311 478
    private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
312
    {
313 478
        if ($child->nodeName === 'title') {
314 445
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
315
316
            try {
317 445
                $sheet->setTitle($cellContent, true, true);
318 442
                $sheet->getParent()?->getProperties()?->setTitle($cellContent);
319 3
            } catch (SpreadsheetException) {
320
                // leave default title if too long or illegal chars
321
            }
322 445
            $cellContent = '';
323
        } else {
324 478
            $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
325
        }
326
    }
327
328
    private const SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
329
330 478
    private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
331
    {
332 478
        if (in_array((string) $child->nodeName, self::SPAN_ETC, true)) {
333 430
            if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
334 9
                $sheet->getComment($column . $row)
335 9
                    ->getText()
336 9
                    ->createTextRun($child->textContent);
337 9
                if (isset($attributeArray['dir']) && $attributeArray['dir'] === 'rtl') {
338 1
                    $sheet->getComment($column . $row)->setTextboxDirection(Comment::TEXTBOX_DIRECTION_RTL);
339
                }
340 9
                if (isset($attributeArray['style'])) {
341 2
                    $alignStyle = $attributeArray['style'];
342 2
                    if (preg_match('/\\btext-align:\\s*(left|right|center|justify)\\b/', $alignStyle, $matches) === 1) {
343 2
                        $sheet->getComment($column . $row)->setAlignment($matches[1]);
344
                    }
345
                }
346
            } else {
347 430
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
348
            }
349
350 430
            if (isset($this->formats[$child->nodeName])) {
351 2
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
352
            }
353
        } else {
354 478
            $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
355
        }
356
    }
357
358 478
    private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
359
    {
360 478
        if ($child->nodeName === 'hr') {
361 1
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
362 1
            ++$row;
363 1
            if (isset($this->formats[$child->nodeName])) {
364 1
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
365
            }
366 1
            ++$row;
367
        }
368
        // fall through to br
369 478
        $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
370
    }
371
372 478
    private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
373
    {
374 478
        if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
375 4
            if ($this->tableLevel > 0) {
376
                //    If we're inside a table, replace with a newline and set the cell to wrap
377 4
                $cellContent .= "\n";
378 4
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
379
            } else {
380
                //    Otherwise flush our existing content and move the row cursor on
381 1
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
382 1
                ++$row;
383
            }
384
        } else {
385 478
            $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
386
        }
387
    }
388
389 478
    private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
390
    {
391 478
        if ($child->nodeName === 'a') {
392 12
            foreach ($attributeArray as $attributeName => $attributeValue) {
393
                switch ($attributeName) {
394 12
                    case 'href':
395 3
                        $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
396 3
                        if (isset($this->formats[$child->nodeName])) {
397 3
                            $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
398
                        }
399
400 3
                        break;
401 10
                    case 'class':
402 9
                        if ($attributeValue === 'comment-indicator') {
403 9
                            break; // Ignore - it's just a red square.
404
                        }
405
                }
406
            }
407
            // no idea why this should be needed
408
            //$cellContent .= ' ';
409 12
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
410
        } else {
411 478
            $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
412
        }
413
    }
414
415
    private const H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
416
417 478
    private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
418
    {
419 478
        if (in_array((string) $child->nodeName, self::H1_ETC, true)) {
420 2
            if ($this->tableLevel > 0) {
421
                //    If we're inside a table, replace with a newline
422 1
                $cellContent .= $cellContent ? "\n" : '';
423 1
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
424 1
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
425
            } else {
426 2
                if ($cellContent > '') {
427 1
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
428 1
                    ++$row;
429
                }
430 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
431 2
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
432
433 2
                if (isset($this->formats[$child->nodeName])) {
434 1
                    $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
435
                }
436
437 2
                ++$row;
438 2
                $column = 'A';
439
            }
440
        } else {
441 478
            $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
442
        }
443
    }
444
445 478
    private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
446
    {
447 478
        if ($child->nodeName === 'li') {
448 2
            if ($this->tableLevel > 0) {
449
                //    If we're inside a table, replace with a newline
450 1
                $cellContent .= $cellContent ? "\n" : '';
451 1
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
452
            } else {
453 2
                if ($cellContent > '') {
454 1
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
455
                }
456 2
                ++$row;
457 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
458 2
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
459 2
                $column = 'A';
460
            }
461
        } else {
462 478
            $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
463
        }
464
    }
465
466 478
    private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
467
    {
468 478
        if ($child->nodeName === 'img') {
469 11
            $this->insertImage($sheet, $column, $row, $attributeArray);
470
        } else {
471 478
            $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
472
        }
473
    }
474
475
    private string $currentColumn = 'A';
476
477 478
    private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
478
    {
479 478
        if ($child->nodeName === 'table') {
480 477
            if (isset($attributeArray['class'])) {
481 430
                $classes = explode(' ', $attributeArray['class']);
482 430
                $sheet->setShowGridlines(in_array('gridlines', $classes, true));
483 430
                $sheet->setPrintGridlines(in_array('gridlinesp', $classes, true));
484
            }
485 477
            $this->currentColumn = 'A';
486 477
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
487 477
            $column = $this->setTableStartColumn($column);
488 477
            if ($this->tableLevel > 1 && $row > 1) {
489 2
                --$row;
490
            }
491 477
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
492 476
            $column = $this->releaseTableStartColumn();
493 476
            if ($this->tableLevel > 1) {
494 2
                ++$column;
495
            } else {
496 476
                ++$row;
497
            }
498
        } else {
499 478
            $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
500
        }
501
    }
502
503 478
    private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
504
    {
505 478
        if ($child->nodeName === 'col') {
506 428
            $this->applyInlineStyle($sheet, -1, $this->currentColumn, $attributeArray);
507 428
            ++$this->currentColumn;
508 478
        } elseif ($child->nodeName === 'tr') {
509 473
            $column = $this->getTableStartColumn();
510 473
            $cellContent = '';
511 473
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
512
513 472
            if (isset($attributeArray['height'])) {
514 1
                $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
515
            }
516
517 472
            ++$row;
518
        } else {
519 478
            $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
520
        }
521
    }
522
523 478
    private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
524
    {
525 478
        if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
526 478
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
527
        } else {
528 473
            $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
529
        }
530
    }
531
532 472
    private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
533
    {
534 472
        if (isset($attributeArray['bgcolor'])) {
535 1
            $sheet->getStyle("$column$row")->applyFromArray(
536 1
                [
537 1
                    'fill' => [
538 1
                        'fillType' => Fill::FILL_SOLID,
539 1
                        'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
540 1
                    ],
541 1
                ]
542 1
            );
543
        }
544
    }
545
546 472
    private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
547
    {
548 472
        if (isset($attributeArray['width'])) {
549 1
            $sheet->getColumnDimension($column)->setWidth((new CssDimension($attributeArray['width']))->width());
550
        }
551
    }
552
553 472
    private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
554
    {
555 472
        if (isset($attributeArray['height'])) {
556 1
            $sheet->getRowDimension($row)->setRowHeight((new CssDimension($attributeArray['height']))->height());
557
        }
558
    }
559
560 472
    private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
561
    {
562 472
        if (isset($attributeArray['align'])) {
563 1
            $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
564
        }
565
    }
566
567 472
    private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
568
    {
569 472
        if (isset($attributeArray['valign'])) {
570 1
            $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
571
        }
572
    }
573
574 472
    private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
575
    {
576 472
        if (isset($attributeArray['data-format'])) {
577 1
            $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
578
        }
579
    }
580
581 473
    private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
582
    {
583 473
        while (isset($this->rowspan[$column . $row])) {
584 3
            ++$column;
585
        }
586 473
        $this->processDomElement($child, $sheet, $row, $column, $cellContent);
587
588
        // apply inline style
589 472
        $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
590
591 472
        $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
592
593 472
        $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
594 472
        $this->processDomElementWidth($sheet, $column, $attributeArray);
595 472
        $this->processDomElementHeight($sheet, $row, $attributeArray);
596 472
        $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
597 472
        $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
598 472
        $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
599
600 472
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
601
            //create merging rowspan and colspan
602 2
            $columnTo = $column;
603 2
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
604 2
                ++$columnTo;
605
            }
606 2
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
607 2
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
608 2
                $this->rowspan[$value] = true;
609
            }
610 2
            $sheet->mergeCells($range);
611 2
            $column = $columnTo;
612 472
        } elseif (isset($attributeArray['rowspan'])) {
613
            //create merging rowspan
614 3
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
615 3
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
616 3
                $this->rowspan[$value] = true;
617
            }
618 3
            $sheet->mergeCells($range);
619 472
        } elseif (isset($attributeArray['colspan'])) {
620
            //create merging colspan
621 3
            $columnTo = $column;
622 3
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
623 3
                ++$columnTo;
624
            }
625 3
            $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
626 3
            $column = $columnTo;
627
        }
628
629 472
        ++$column;
630
    }
631
632 478
    protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
633
    {
634 478
        foreach ($element->childNodes as $child) {
635 478
            if ($child instanceof DOMText) {
636 475
                $domText = (string) preg_replace('/\s+/', ' ', trim($child->nodeValue ?? ''));
637 475
                if ($domText === "\u{a0}") {
638 12
                    $domText = '';
639
                }
640 475
                if (is_string($cellContent)) {
641
                    //    simply append the text if the cell content is a plain text string
642 475
                    $cellContent .= $domText;
643
                }
644
                //    but if we have a rich text run instead, we need to append it correctly
645
                //    TODO
646 478
            } elseif ($child instanceof DOMElement) {
647 478
                $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
648
            }
649
        }
650
    }
651
652
    /**
653
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
654
     */
655 463
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
656
    {
657
        // Validate
658 463
        if (!$this->canRead($filename)) {
659 1
            throw new Exception($filename . ' is an Invalid HTML file.');
660
        }
661
662
        // Create a new DOM object
663 462
        $dom = new DOMDocument();
664
665
        // Reload the HTML file into the DOM object
666
        try {
667 462
            $convert = $this->getSecurityScannerOrThrow()->scanFile($filename);
668 462
            $convert = self::replaceNonAsciiIfNeeded($convert);
669 462
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
670 1
        } catch (Throwable $e) {
671 1
            $loaded = false;
672
        }
673 462
        if ($loaded === false) {
674 1
            throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null);
675
        }
676 461
        self::loadProperties($dom, $spreadsheet);
677
678 461
        return $this->loadDocument($dom, $spreadsheet);
679
    }
680
681 478
    private static function loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet): void
682
    {
683 478
        $properties = $spreadsheet->getProperties();
684 478
        foreach ($dom->getElementsByTagName('meta') as $meta) {
685 438
            $metaContent = (string) $meta->getAttribute('content');
686 438
            if ($metaContent !== '') {
687 430
                $metaName = (string) $meta->getAttribute('name');
688
                switch ($metaName) {
689 430
                    case 'author':
690 428
                        $properties->setCreator($metaContent);
691
692 428
                        break;
693 430
                    case 'category':
694 1
                        $properties->setCategory($metaContent);
695
696 1
                        break;
697 430
                    case 'company':
698 1
                        $properties->setCompany($metaContent);
699
700 1
                        break;
701 430
                    case 'created':
702 428
                        $properties->setCreated($metaContent);
703
704 428
                        break;
705 430
                    case 'description':
706 1
                        $properties->setDescription($metaContent);
707
708 1
                        break;
709 430
                    case 'keywords':
710 1
                        $properties->setKeywords($metaContent);
711
712 1
                        break;
713 430
                    case 'lastModifiedBy':
714 428
                        $properties->setLastModifiedBy($metaContent);
715
716 428
                        break;
717 430
                    case 'manager':
718 1
                        $properties->setManager($metaContent);
719
720 1
                        break;
721 430
                    case 'modified':
722 428
                        $properties->setModified($metaContent);
723
724 428
                        break;
725 430
                    case 'subject':
726 1
                        $properties->setSubject($metaContent);
727
728 1
                        break;
729 430
                    case 'title':
730 426
                        $properties->setTitle($metaContent);
731
732 426
                        break;
733 430
                    case 'viewport':
734 1
                        $properties->setViewport($metaContent);
735
736 1
                        break;
737
                    default:
738 430
                        if (preg_match('/^custom[.](bool|date|float|int|string)[.](.+)$/', $metaName, $matches) === 1) {
739 1
                            match ($matches[1]) {
740 1
                                'bool' => $properties->setCustomProperty($matches[2], (bool) $metaContent, Properties::PROPERTY_TYPE_BOOLEAN),
741 1
                                'float' => $properties->setCustomProperty($matches[2], (float) $metaContent, Properties::PROPERTY_TYPE_FLOAT),
742 1
                                'int' => $properties->setCustomProperty($matches[2], (int) $metaContent, Properties::PROPERTY_TYPE_INTEGER),
743 1
                                'date' => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_DATE),
744
                                // string
745 1
                                default => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_STRING),
746 1
                            };
747
                        }
748
                }
749
            }
750
        }
751 478
        if (!empty($dom->baseURI)) {
752 1
            $properties->setHyperlinkBase($dom->baseURI);
753
        }
754
    }
755
756 7
    private static function replaceNonAscii(array $matches): string
757
    {
758 7
        return '&#' . mb_ord($matches[0], 'UTF-8') . ';';
759
    }
760
761 479
    private static function replaceNonAsciiIfNeeded(string $convert): ?string
762
    {
763 479
        if (preg_match(self::STARTS_WITH_BOM, $convert) !== 1 && preg_match(self::DECLARES_CHARSET, $convert) !== 1) {
764 35
            $lowend = "\u{80}";
765 35
            $highend = "\u{10ffff}";
766 35
            $regexp = "/[$lowend-$highend]/u";
767
            /** @var callable $callback */
768 35
            $callback = [self::class, 'replaceNonAscii'];
769 35
            $convert = preg_replace_callback($regexp, $callback, $convert);
770
        }
771
772 479
        return $convert;
773
    }
774
775
    /**
776
     * Spreadsheet from content.
777
     */
778 17
    public function loadFromString(string $content, ?Spreadsheet $spreadsheet = null): Spreadsheet
779
    {
780
        //    Create a new DOM object
781 17
        $dom = new DOMDocument();
782
783
        //    Reload the HTML file into the DOM object
784
        try {
785 17
            $convert = $this->getSecurityScannerOrThrow()->scan($content);
786 17
            $convert = self::replaceNonAsciiIfNeeded($convert);
787 17
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
788
        } catch (Throwable $e) {
789
            $loaded = false;
790
        }
791 17
        if ($loaded === false) {
792
            throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);
793
        }
794 17
        $spreadsheet = $spreadsheet ?? new Spreadsheet();
795 17
        self::loadProperties($dom, $spreadsheet);
796
797 17
        return $this->loadDocument($dom, $spreadsheet);
798
    }
799
800
    /**
801
     * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
802
     */
803 478
    private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
804
    {
805 478
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
806 2
            $spreadsheet->createSheet();
807
        }
808 478
        $spreadsheet->setActiveSheetIndex($this->sheetIndex);
809
810
        // Discard white space
811 478
        $document->preserveWhiteSpace = false;
812
813 478
        $row = 0;
814 478
        $column = 'A';
815 478
        $content = '';
816 478
        $this->rowspan = [];
817 478
        $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
818
819
        // Return
820 477
        return $spreadsheet;
821
    }
822
823
    /**
824
     * Get sheet index.
825
     */
826 1
    public function getSheetIndex(): int
827
    {
828 1
        return $this->sheetIndex;
829
    }
830
831
    /**
832
     * Set sheet index.
833
     *
834
     * @param int $sheetIndex Sheet index
835
     *
836
     * @return $this
837
     */
838 2
    public function setSheetIndex(int $sheetIndex): static
839
    {
840 2
        $this->sheetIndex = $sheetIndex;
841
842 2
        return $this;
843
    }
844
845
    /**
846
     * Apply inline css inline style.
847
     *
848
     * NOTES :
849
     * Currently only intended for td & th element,
850
     * and only takes 'background-color' and 'color'; property with HEX color
851
     *
852
     * TODO :
853
     * - Implement to other propertie, such as border
854
     */
855 472
    private function applyInlineStyle(Worksheet &$sheet, int $row, string $column, array $attributeArray): void
856
    {
857 472
        if (!isset($attributeArray['style'])) {
858 466
            return;
859
        }
860
861 16
        if ($row <= 0 || $column === '') {
862 1
            $cellStyle = new Style();
863 16
        } elseif (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
864 1
            $columnTo = $column;
865 1
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
866 1
                ++$columnTo;
867
            }
868 1
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
869 1
            $cellStyle = $sheet->getStyle($range);
870 16
        } elseif (isset($attributeArray['rowspan'])) {
871 1
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
872 1
            $cellStyle = $sheet->getStyle($range);
873 16
        } elseif (isset($attributeArray['colspan'])) {
874 1
            $columnTo = $column;
875 1
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
876 1
                ++$columnTo;
877
            }
878 1
            $range = $column . $row . ':' . $columnTo . $row;
879 1
            $cellStyle = $sheet->getStyle($range);
880
        } else {
881 16
            $cellStyle = $sheet->getStyle($column . $row);
882
        }
883
884
        // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
885 16
        $styles = explode(';', $attributeArray['style']);
886 16
        foreach ($styles as $st) {
887 16
            $value = explode(':', $st);
888 16
            $styleName = isset($value[0]) ? trim($value[0]) : null;
889 16
            $styleValue = isset($value[1]) ? trim($value[1]) : null;
890 16
            $styleValueString = (string) $styleValue;
891
892 16
            if (!$styleName) {
893 12
                continue;
894
            }
895
896
            switch ($styleName) {
897 16
                case 'background':
898 16
                case 'background-color':
899 3
                    $styleColor = $this->getStyleColor($styleValueString);
900
901 3
                    if (!$styleColor) {
902 1
                        continue 2;
903
                    }
904
905 3
                    $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
906
907 3
                    break;
908 16
                case 'color':
909 3
                    $styleColor = $this->getStyleColor($styleValueString);
910
911 3
                    if (!$styleColor) {
912 1
                        continue 2;
913
                    }
914
915 3
                    $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
916
917 3
                    break;
918
919 13
                case 'border':
920 3
                    $this->setBorderStyle($cellStyle, $styleValueString, 'allBorders');
921
922 3
                    break;
923
924 11
                case 'border-top':
925 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'top');
926
927 1
                    break;
928
929 11
                case 'border-bottom':
930 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'bottom');
931
932 1
                    break;
933
934 11
                case 'border-left':
935 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'left');
936
937 1
                    break;
938
939 11
                case 'border-right':
940 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'right');
941
942 1
                    break;
943
944 10
                case 'font-size':
945 1
                    $cellStyle->getFont()->setSize(
946 1
                        (float) $styleValue
947 1
                    );
948
949 1
                    break;
950
951 10
                case 'font-weight':
952 1
                    if ($styleValue === 'bold' || $styleValue >= 500) {
953 1
                        $cellStyle->getFont()->setBold(true);
954
                    }
955
956 1
                    break;
957
958 10
                case 'font-style':
959 1
                    if ($styleValue === 'italic') {
960 1
                        $cellStyle->getFont()->setItalic(true);
961
                    }
962
963 1
                    break;
964
965 10
                case 'font-family':
966 1
                    $cellStyle->getFont()->setName(str_replace('\'', '', $styleValueString));
967
968 1
                    break;
969
970 10
                case 'text-decoration':
971
                    switch ($styleValue) {
972 1
                        case 'underline':
973 1
                            $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
974
975 1
                            break;
976 1
                        case 'line-through':
977 1
                            $cellStyle->getFont()->setStrikethrough(true);
978
979 1
                            break;
980
                    }
981
982 1
                    break;
983
984 9
                case 'text-align':
985 1
                    $cellStyle->getAlignment()->setHorizontal($styleValueString);
986
987 1
                    break;
988
989 9
                case 'vertical-align':
990 2
                    $cellStyle->getAlignment()->setVertical($styleValueString);
991
992 2
                    break;
993
994 9
                case 'width':
995 2
                    if ($column !== '') {
996 2
                        $sheet->getColumnDimension($column)->setWidth(
997 2
                            (new CssDimension($styleValue ?? ''))->width()
998 2
                        );
999
                    }
1000
1001 2
                    break;
1002
1003 7
                case 'height':
1004 1
                    if ($row > 0) {
1005 1
                        $sheet->getRowDimension($row)->setRowHeight(
1006 1
                            (new CssDimension($styleValue ?? ''))->height()
1007 1
                        );
1008
                    }
1009
1010 1
                    break;
1011
1012 6
                case 'word-wrap':
1013 1
                    $cellStyle->getAlignment()->setWrapText(
1014 1
                        $styleValue === 'break-word'
1015 1
                    );
1016
1017 1
                    break;
1018
1019 6
                case 'text-indent':
1020 2
                    $cellStyle->getAlignment()->setIndent(
1021 2
                        (int) str_replace(['px'], '', $styleValueString)
1022 2
                    );
1023
1024 2
                    break;
1025
            }
1026
        }
1027
    }
1028
1029
    /**
1030
     * Check if has #, so we can get clean hex.
1031
     */
1032 7
    public function getStyleColor(?string $value): string
1033
    {
1034 7
        $value = (string) $value;
1035 7
        if (str_starts_with($value, '#')) {
1036 5
            return substr($value, 1);
1037
        }
1038
1039 4
        return HelperHtml::colourNameLookup($value);
1040
    }
1041
1042 11
    private function insertImage(Worksheet $sheet, string $column, int $row, array $attributes): void
1043
    {
1044 11
        if (!isset($attributes['src'])) {
1045 1
            return;
1046
        }
1047 10
        $styleArray = self::getStyleArray($attributes);
1048
1049 10
        $src = $attributes['src'];
1050 10
        if (substr($src, 0, 5) !== 'data:') {
1051 6
            $src = urldecode($src);
1052
        }
1053 10
        $width = isset($attributes['width']) ? (float) $attributes['width'] : ($styleArray['width'] ?? null);
1054 10
        $height = isset($attributes['height']) ? (float) $attributes['height'] : ($styleArray['height'] ?? null);
1055 10
        $name = $attributes['alt'] ?? null;
1056
1057 10
        $drawing = new Drawing();
1058 10
        $drawing->setPath($src, false);
1059 9
        if ($drawing->getPath() === '') {
1060 1
            return;
1061
        }
1062 8
        $drawing->setWorksheet($sheet);
1063 8
        $drawing->setCoordinates($column . $row);
1064 8
        $drawing->setOffsetX(0);
1065 8
        $drawing->setOffsetY(10);
1066 8
        $drawing->setResizeProportional(true);
1067
1068 8
        if ($name) {
1069 7
            $drawing->setName($name);
1070
        }
1071
1072 8
        if ($width) {
1073 5
            if ($height) {
1074 2
                $drawing->setWidthAndHeight((int) $width, (int) $height);
1075
            } else {
1076 3
                $drawing->setWidth((int) $width);
1077
            }
1078 3
        } elseif ($height) {
1079 1
            $drawing->setHeight((int) $height);
1080
        }
1081
1082 8
        $sheet->getColumnDimension($column)->setWidth(
1083 8
            $drawing->getWidth() / 6
1084 8
        );
1085
1086 8
        $sheet->getRowDimension($row)->setRowHeight(
1087 8
            $drawing->getHeight() * 0.9
1088 8
        );
1089
1090 8
        if (isset($styleArray['opacity'])) {
1091
            $opacity = $styleArray['opacity'];
1092
            if (is_numeric($opacity)) {
1093
                $drawing->setOpacity((int) ($opacity * 100000));
1094
            }
1095
        }
1096
    }
1097
1098 10
    private static function getStyleArray(array $attributes): array
1099
    {
1100 10
        $styleArray = [];
1101 10
        if (isset($attributes['style'])) {
1102 4
            $styles = explode(';', $attributes['style']);
1103 4
            foreach ($styles as $style) {
1104 4
                $value = explode(':', $style);
1105 4
                if (count($value) === 2) {
1106 4
                    $arrayKey = trim($value[0]);
1107 4
                    $arrayValue = trim($value[1]);
1108 4
                    if ($arrayKey === 'width') {
1109 4
                        if (substr($arrayValue, -2) === 'px') {
1110 4
                            $arrayValue = (string) (((float) substr($arrayValue, 0, -2)));
1111
                        } else {
1112
                            $arrayValue = (new CssDimension($arrayValue))->width();
1113
                        }
1114 4
                    } elseif ($arrayKey === 'height') {
1115 2
                        if (substr($arrayValue, -2) === 'px') {
1116 2
                            $arrayValue = substr($arrayValue, 0, -2);
1117
                        } else {
1118
                            $arrayValue = (new CssDimension($arrayValue))->height();
1119
                        }
1120
                    }
1121 4
                    $styleArray[$arrayKey] = $arrayValue;
1122
                }
1123
            }
1124
        }
1125
1126 10
        return $styleArray;
1127
    }
1128
1129
    private const BORDER_MAPPINGS = [
1130
        'dash-dot' => Border::BORDER_DASHDOT,
1131
        'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
1132
        'dashed' => Border::BORDER_DASHED,
1133
        'dotted' => Border::BORDER_DOTTED,
1134
        'double' => Border::BORDER_DOUBLE,
1135
        'hair' => Border::BORDER_HAIR,
1136
        'medium' => Border::BORDER_MEDIUM,
1137
        'medium-dashed' => Border::BORDER_MEDIUMDASHED,
1138
        'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
1139
        'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
1140
        'none' => Border::BORDER_NONE,
1141
        'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
1142
        'solid' => Border::BORDER_THIN,
1143
        'thick' => Border::BORDER_THICK,
1144
    ];
1145
1146 15
    public static function getBorderMappings(): array
1147
    {
1148 15
        return self::BORDER_MAPPINGS;
1149
    }
1150
1151
    /**
1152
     * Map html border style to PhpSpreadsheet border style.
1153
     */
1154 3
    public function getBorderStyle(string $style): ?string
1155
    {
1156 3
        return self::BORDER_MAPPINGS[$style] ?? null;
1157
    }
1158
1159 3
    private function setBorderStyle(Style $cellStyle, string $styleValue, string $type): void
1160
    {
1161 3
        if (trim($styleValue) === Border::BORDER_NONE) {
1162 1
            $borderStyle = Border::BORDER_NONE;
1163 1
            $color = null;
1164
        } else {
1165 3
            $borderArray = explode(' ', $styleValue);
1166 3
            $borderCount = count($borderArray);
1167 3
            if ($borderCount >= 3) {
1168 3
                $borderStyle = $borderArray[1];
1169 3
                $color = $borderArray[2];
1170
            } else {
1171 1
                $borderStyle = $borderArray[0];
1172 1
                $color = $borderArray[1] ?? null;
1173
            }
1174
        }
1175
1176 3
        $cellStyle->applyFromArray([
1177 3
            'borders' => [
1178 3
                $type => [
1179 3
                    'borderStyle' => $this->getBorderStyle($borderStyle),
1180 3
                    'color' => ['rgb' => $this->getStyleColor($color)],
1181 3
                ],
1182 3
            ],
1183 3
        ]);
1184
    }
1185
1186
    /**
1187
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
1188
     */
1189 1
    public function listWorksheetInfo(string $filename): array
1190
    {
1191 1
        $info = [];
1192 1
        $spreadsheet = new Spreadsheet();
1193 1
        $this->loadIntoExisting($filename, $spreadsheet);
1194 1
        foreach ($spreadsheet->getAllSheets() as $sheet) {
1195 1
            $newEntry = ['worksheetName' => $sheet->getTitle()];
1196 1
            $newEntry['lastColumnLetter'] = $sheet->getHighestDataColumn();
1197 1
            $newEntry['lastColumnIndex'] = Coordinate::columnIndexFromString($sheet->getHighestDataColumn()) - 1;
1198 1
            $newEntry['totalRows'] = $sheet->getHighestDataRow();
1199 1
            $newEntry['totalColumns'] = $newEntry['lastColumnIndex'] + 1;
1200 1
            $info[] = $newEntry;
1201
        }
1202 1
        $spreadsheet->disconnectWorksheets();
1203
1204 1
        return $info;
1205
    }
1206
}
1207