Failed Conditions
Push — master ( bf4629...7712d5 )
by Adrien
27:59 queued 18:08
created

Html::processDomElementBgcolor()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 8
ccs 7
cts 7
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 4
crap 2
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use DOMDocument;
6
use DOMElement;
7
use DOMNode;
8
use DOMText;
9
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
10
use PhpOffice\PhpSpreadsheet\Cell\DataType;
11
use PhpOffice\PhpSpreadsheet\Document\Properties;
12
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
13
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
14
use PhpOffice\PhpSpreadsheet\Spreadsheet;
15
use PhpOffice\PhpSpreadsheet\Style\Border;
16
use PhpOffice\PhpSpreadsheet\Style\Color;
17
use PhpOffice\PhpSpreadsheet\Style\Fill;
18
use PhpOffice\PhpSpreadsheet\Style\Font;
19
use PhpOffice\PhpSpreadsheet\Style\Style;
20
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
21
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
22
use Throwable;
23
24
class Html extends BaseReader
25
{
26
    /**
27
     * Sample size to read to determine if it's HTML or not.
28
     */
29
    const TEST_SAMPLE_SIZE = 2048;
30
31
    /**
32
     * Input encoding.
33
     *
34
     * @var string
35
     */
36
    protected $inputEncoding = 'ANSI';
37
38
    /**
39
     * Sheet index to read.
40
     *
41
     * @var int
42
     */
43
    protected $sheetIndex = 0;
44
45
    /**
46
     * Formats.
47
     *
48
     * @var array
49
     */
50
    protected $formats = [
51
        'h1' => [
52
            'font' => [
53
                'bold' => true,
54
                'size' => 24,
55
            ],
56
        ], //    Bold, 24pt
57
        'h2' => [
58
            'font' => [
59
                'bold' => true,
60
                'size' => 18,
61
            ],
62
        ], //    Bold, 18pt
63
        'h3' => [
64
            'font' => [
65
                'bold' => true,
66
                'size' => 13.5,
67
            ],
68
        ], //    Bold, 13.5pt
69
        'h4' => [
70
            'font' => [
71
                'bold' => true,
72
                'size' => 12,
73
            ],
74
        ], //    Bold, 12pt
75
        'h5' => [
76
            'font' => [
77
                'bold' => true,
78
                'size' => 10,
79
            ],
80
        ], //    Bold, 10pt
81
        'h6' => [
82
            'font' => [
83
                'bold' => true,
84
                'size' => 7.5,
85
            ],
86
        ], //    Bold, 7.5pt
87
        'a' => [
88
            'font' => [
89
                'underline' => true,
90
                'color' => [
91
                    'argb' => Color::COLOR_BLUE,
92
                ],
93
            ],
94
        ], //    Blue underlined
95
        'hr' => [
96
            'borders' => [
97
                'bottom' => [
98
                    'borderStyle' => Border::BORDER_THIN,
99
                    'color' => [
100
                        Color::COLOR_BLACK,
101
                    ],
102
                ],
103
            ],
104
        ], //    Bottom border
105
        'strong' => [
106
            'font' => [
107
                'bold' => true,
108
            ],
109
        ], //    Bold
110
        'b' => [
111
            'font' => [
112
                'bold' => true,
113
            ],
114
        ], //    Bold
115
        'i' => [
116
            'font' => [
117
                'italic' => true,
118
            ],
119
        ], //    Italic
120
        'em' => [
121
            'font' => [
122
                'italic' => true,
123
            ],
124
        ], //    Italic
125
    ];
126
127
    /** @var array */
128
    protected $rowspan = [];
129
130
    /**
131
     * Create a new HTML Reader instance.
132
     */
133 469
    public function __construct()
134
    {
135 469
        parent::__construct();
136 469
        $this->securityScanner = XmlScanner::getInstance($this);
137
    }
138
139
    /**
140
     * Validate that the current file is an HTML file.
141
     */
142 449
    public function canRead(string $filename): bool
143
    {
144
        // Check if file exists
145
        try {
146 449
            $this->openFile($filename);
147 1
        } catch (Exception) {
148 1
            return false;
149
        }
150
151 448
        $beginning = $this->readBeginning();
152 448
        $startWithTag = self::startsWithTag($beginning);
153 448
        $containsTags = self::containsTags($beginning);
154 448
        $endsWithTag = self::endsWithTag($this->readEnding());
155
156 448
        fclose($this->fileHandle);
157
158 448
        return $startWithTag && $containsTags && $endsWithTag;
159
    }
160
161 448
    private function readBeginning(): string
162
    {
163 448
        fseek($this->fileHandle, 0);
164
165 448
        return (string) fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
166
    }
167
168 448
    private function readEnding(): string
169
    {
170 448
        $meta = stream_get_meta_data($this->fileHandle);
171 448
        $filename = $meta['uri'];
172
173 448
        $size = (int) filesize($filename);
174 448
        if ($size === 0) {
175 1
            return '';
176
        }
177
178 447
        $blockSize = self::TEST_SAMPLE_SIZE;
179 447
        if ($size < $blockSize) {
180 25
            $blockSize = $size;
181
        }
182
183 447
        fseek($this->fileHandle, $size - $blockSize);
184
185 447
        return (string) fread($this->fileHandle, $blockSize);
186
    }
187
188 448
    private static function startsWithTag(string $data): bool
189
    {
190 448
        return str_starts_with(trim($data), '<');
191
    }
192
193 448
    private static function endsWithTag(string $data): bool
194
    {
195 448
        return str_ends_with(trim($data), '>');
196
    }
197
198 448
    private static function containsTags(string $data): bool
199
    {
200 448
        return strlen($data) !== strlen(strip_tags($data));
201
    }
202
203
    /**
204
     * Loads Spreadsheet from file.
205
     */
206 434
    public function loadSpreadsheetFromFile(string $filename): Spreadsheet
207
    {
208
        // Create new Spreadsheet
209 434
        $spreadsheet = new Spreadsheet();
210
211
        // Load into this instance
212 434
        return $this->loadIntoExisting($filename, $spreadsheet);
213
    }
214
215
    /**
216
     * Set input encoding.
217
     *
218
     * @param string $inputEncoding Input encoding, eg: 'ANSI'
219
     *
220
     * @return $this
221
     *
222
     * @codeCoverageIgnore
223
     *
224
     * @deprecated no use is made of this property
225
     */
226
    public function setInputEncoding($inputEncoding): static
227
    {
228
        $this->inputEncoding = $inputEncoding;
229
230
        return $this;
231
    }
232
233
    /**
234
     * Get input encoding.
235
     *
236
     * @return string
237
     *
238
     * @codeCoverageIgnore
239
     *
240
     * @deprecated no use is made of this property
241
     */
242
    public function getInputEncoding()
243
    {
244
        return $this->inputEncoding;
245
    }
246
247
    //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
248
249
    /** @var array */
250
    protected $dataArray = [];
251
252
    /** @var int */
253
    protected $tableLevel = 0;
254
255
    /** @var array */
256
    protected $nestedColumn = ['A'];
257
258 448
    protected function setTableStartColumn(string $column): string
259
    {
260 448
        if ($this->tableLevel == 0) {
261 448
            $column = 'A';
262
        }
263 448
        ++$this->tableLevel;
264 448
        $this->nestedColumn[$this->tableLevel] = $column;
265
266 448
        return $this->nestedColumn[$this->tableLevel];
267
    }
268
269 444
    protected function getTableStartColumn(): string
270
    {
271 444
        return $this->nestedColumn[$this->tableLevel];
272
    }
273
274 448
    protected function releaseTableStartColumn(): string
275
    {
276 448
        --$this->tableLevel;
277
278 448
        return array_pop($this->nestedColumn);
279
    }
280
281
    /**
282
     * Flush cell.
283
     *
284
     * @param int|string $row
285
     */
286 449
    protected function flushCell(Worksheet $sheet, string $column, $row, mixed &$cellContent, array $attributeArray): void
287
    {
288 449
        if (is_string($cellContent)) {
289
            //    Simple String content
290 449
            if (trim($cellContent) > '') {
291
                //    Only actually write it if there's content in the string
292
                //    Write to worksheet to be done here...
293
                //    ... we return the cell, so we can mess about with styles more easily
294
295
                // Set cell value explicitly if there is data-type attribute
296 438
                if (isset($attributeArray['data-type'])) {
297 1
                    $datatype = $attributeArray['data-type'];
298 1
                    if (in_array($datatype, [DataType::TYPE_STRING, DataType::TYPE_STRING2, DataType::TYPE_INLINE])) {
299
                        //Prevent to Excel treat string with beginning equal sign or convert big numbers to scientific number
300 1
                        if (str_starts_with($cellContent, '=')) {
301 1
                            $sheet->getCell($column . $row)
302 1
                                ->getStyle()
303 1
                                ->setQuotePrefix(true);
304
                        }
305
                    }
306
307
                    //catching the Exception and ignoring the invalid data types
308
                    try {
309 1
                        $sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
310 1
                    } catch (\PhpOffice\PhpSpreadsheet\Exception) {
311 1
                        $sheet->setCellValue($column . $row, $cellContent);
312
                    }
313
                } else {
314 437
                    $sheet->setCellValue($column . $row, $cellContent);
315
                }
316 449
                $this->dataArray[$row][$column] = $cellContent;
317
            }
318
        } else {
319
            //    We have a Rich Text run
320
            //    TODO
321
            $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
322
        }
323 449
        $cellContent = (string) '';
324
    }
325
326 449
    private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
327
    {
328 449
        $attributeArray = [];
329 449
        foreach ($child->attributes as $attribute) {
330 437
            $attributeArray[$attribute->name] = $attribute->value;
331
        }
332
333 449
        if ($child->nodeName === 'body') {
334 449
            $row = 1;
335 449
            $column = 'A';
336 449
            $cellContent = '';
337 449
            $this->tableLevel = 0;
338 449
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
339
        } else {
340 449
            $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
341
        }
342
    }
343
344 449
    private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
345
    {
346 449
        if ($child->nodeName === 'title') {
347 420
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
348 420
            $sheet->setTitle($cellContent, true, true);
349 420
            $cellContent = '';
350
        } else {
351 449
            $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
352
        }
353
    }
354
355
    private const SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
356
357 449
    private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
358
    {
359 449
        if (in_array((string) $child->nodeName, self::SPAN_ETC, true)) {
360 415
            if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
361 6
                $sheet->getComment($column . $row)
362 6
                    ->getText()
363 6
                    ->createTextRun($child->textContent);
364
            } else {
365 415
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
366
            }
367
368 415
            if (isset($this->formats[$child->nodeName])) {
369 415
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
370
            }
371
        } else {
372 449
            $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
373
        }
374
    }
375
376 449
    private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
377
    {
378 449
        if ($child->nodeName === 'hr') {
379 1
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
380 1
            ++$row;
381 1
            if (isset($this->formats[$child->nodeName])) {
382 1
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
383
            }
384 1
            ++$row;
385
        }
386
        // fall through to br
387 449
        $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
388
    }
389
390 449
    private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
391
    {
392 449
        if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
393 4
            if ($this->tableLevel > 0) {
394
                //    If we're inside a table, replace with a \n and set the cell to wrap
395 4
                $cellContent .= "\n";
396 4
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
397
            } else {
398
                //    Otherwise flush our existing content and move the row cursor on
399 1
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
400 4
                ++$row;
401
            }
402
        } else {
403 449
            $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
404
        }
405
    }
406
407 449
    private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
408
    {
409 449
        if ($child->nodeName === 'a') {
410 9
            foreach ($attributeArray as $attributeName => $attributeValue) {
411
                switch ($attributeName) {
412 9
                    case 'href':
413 3
                        $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
414 3
                        if (isset($this->formats[$child->nodeName])) {
415 3
                            $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
416
                        }
417
418 3
                        break;
419 7
                    case 'class':
420 6
                        if ($attributeValue === 'comment-indicator') {
421 6
                            break; // Ignore - it's just a red square.
422
                        }
423
                }
424
            }
425
            // no idea why this should be needed
426
            //$cellContent .= ' ';
427 9
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
428
        } else {
429 449
            $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
430
        }
431
    }
432
433
    private const H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
434
435 449
    private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
436
    {
437 449
        if (in_array((string) $child->nodeName, self::H1_ETC, true)) {
438 2
            if ($this->tableLevel > 0) {
439
                //    If we're inside a table, replace with a \n
440 1
                $cellContent .= $cellContent ? "\n" : '';
441 1
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
442 1
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
443
            } else {
444 2
                if ($cellContent > '') {
445 1
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
446 1
                    ++$row;
447
                }
448 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
449 2
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
450
451 2
                if (isset($this->formats[$child->nodeName])) {
452 1
                    $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
453
                }
454
455 2
                ++$row;
456 2
                $column = 'A';
457
            }
458
        } else {
459 449
            $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
460
        }
461
    }
462
463 449
    private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
464
    {
465 449
        if ($child->nodeName === 'li') {
466 2
            if ($this->tableLevel > 0) {
467
                //    If we're inside a table, replace with a \n
468 1
                $cellContent .= $cellContent ? "\n" : '';
469 1
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
470
            } else {
471 2
                if ($cellContent > '') {
472 1
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
473
                }
474 2
                ++$row;
475 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
476 2
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
477 2
                $column = 'A';
478
            }
479
        } else {
480 449
            $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
481
        }
482
    }
483
484 449
    private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
485
    {
486 449
        if ($child->nodeName === 'img') {
487 8
            $this->insertImage($sheet, $column, $row, $attributeArray);
488
        } else {
489 449
            $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
490
        }
491
    }
492
493
    private string $currentColumn = 'A';
494
495 449
    private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
496
    {
497 449
        if ($child->nodeName === 'table') {
498 448
            $this->currentColumn = 'A';
499 448
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
500 448
            $column = $this->setTableStartColumn($column);
501 448
            if ($this->tableLevel > 1 && $row > 1) {
502 2
                --$row;
503
            }
504 448
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
505 448
            $column = $this->releaseTableStartColumn();
506 448
            if ($this->tableLevel > 1) {
507 2
                ++$column;
508
            } else {
509 448
                ++$row;
510
            }
511
        } else {
512 449
            $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
513
        }
514
    }
515
516 449
    private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
517
    {
518 449
        if ($child->nodeName === 'col') {
519 413
            $this->applyInlineStyle($sheet, -1, $this->currentColumn, $attributeArray);
520 413
            ++$this->currentColumn;
521 449
        } elseif ($child->nodeName === 'tr') {
522 444
            $column = $this->getTableStartColumn();
523 444
            $cellContent = '';
524 444
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
525
526 444
            if (isset($attributeArray['height'])) {
527 1
                $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
528
            }
529
530 444
            ++$row;
531
        } else {
532 449
            $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
533
        }
534
    }
535
536 449
    private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
537
    {
538 449
        if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
539 449
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
540
        } else {
541 444
            $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
542
        }
543
    }
544
545 444
    private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
546
    {
547 444
        if (isset($attributeArray['bgcolor'])) {
548 1
            $sheet->getStyle("$column$row")->applyFromArray(
549 1
                [
550 1
                    'fill' => [
551 1
                        'fillType' => Fill::FILL_SOLID,
552 1
                        'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
553 1
                    ],
554 1
                ]
555 1
            );
556
        }
557
    }
558
559 444
    private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
560
    {
561 444
        if (isset($attributeArray['width'])) {
562 1
            $sheet->getColumnDimension($column)->setWidth((new CssDimension($attributeArray['width']))->width());
563
        }
564
    }
565
566 444
    private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
567
    {
568 444
        if (isset($attributeArray['height'])) {
569 1
            $sheet->getRowDimension($row)->setRowHeight((new CssDimension($attributeArray['height']))->height());
570
        }
571
    }
572
573 444
    private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
574
    {
575 444
        if (isset($attributeArray['align'])) {
576 1
            $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
577
        }
578
    }
579
580 444
    private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
581
    {
582 444
        if (isset($attributeArray['valign'])) {
583 1
            $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
584
        }
585
    }
586
587 444
    private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
588
    {
589 444
        if (isset($attributeArray['data-format'])) {
590 1
            $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
591
        }
592
    }
593
594 444
    private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
595
    {
596 444
        while (isset($this->rowspan[$column . $row])) {
597 3
            ++$column;
598
        }
599 444
        $this->processDomElement($child, $sheet, $row, $column, $cellContent);
600
601
        // apply inline style
602 444
        $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
603
604 444
        $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
605
606 444
        $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
607 444
        $this->processDomElementWidth($sheet, $column, $attributeArray);
608 444
        $this->processDomElementHeight($sheet, $row, $attributeArray);
609 444
        $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
610 444
        $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
611 444
        $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
612
613 444
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
614
            //create merging rowspan and colspan
615 2
            $columnTo = $column;
616 2
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
617 2
                ++$columnTo;
618
            }
619 2
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
620 2
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
621 2
                $this->rowspan[$value] = true;
622
            }
623 2
            $sheet->mergeCells($range);
624 2
            $column = $columnTo;
625 444
        } elseif (isset($attributeArray['rowspan'])) {
626
            //create merging rowspan
627 3
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
628 3
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
629 3
                $this->rowspan[$value] = true;
630
            }
631 3
            $sheet->mergeCells($range);
632 444
        } elseif (isset($attributeArray['colspan'])) {
633
            //create merging colspan
634 3
            $columnTo = $column;
635 3
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
636 3
                ++$columnTo;
637
            }
638 3
            $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
639 3
            $column = $columnTo;
640
        }
641
642 444
        ++$column;
643
    }
644
645 449
    protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
646
    {
647 449
        foreach ($element->childNodes as $child) {
648 449
            if ($child instanceof DOMText) {
649 446
                $domText = (string) preg_replace('/\s+/u', ' ', trim($child->nodeValue ?? ''));
650 446
                if (is_string($cellContent)) {
651
                    //    simply append the text if the cell content is a plain text string
652 446
                    $cellContent .= $domText;
653
                }
654
                //    but if we have a rich text run instead, we need to append it correctly
655
                //    TODO
656 449
            } elseif ($child instanceof DOMElement) {
657 449
                $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
658
            }
659
        }
660
    }
661
662
    /**
663
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
664
     */
665 434
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
666
    {
667
        // Validate
668 434
        if (!$this->canRead($filename)) {
669 1
            throw new Exception($filename . ' is an Invalid HTML file.');
670
        }
671
672
        // Create a new DOM object
673 433
        $dom = new DOMDocument();
674
675
        // Reload the HTML file into the DOM object
676
        try {
677 433
            $convert = $this->getSecurityScannerOrThrow()->scanFile($filename);
678 433
            $lowend = "\u{80}";
679 433
            $highend = "\u{10ffff}";
680 433
            $regexp = "/[$lowend-$highend]/u";
681
            /** @var callable */
682 433
            $callback = [self::class, 'replaceNonAscii'];
683 433
            $convert = preg_replace_callback($regexp, $callback, $convert);
684 433
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
685 1
        } catch (Throwable $e) {
686 1
            $loaded = false;
687
        }
688 433
        if ($loaded === false) {
689 1
            throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null);
690
        }
691 432
        self::loadProperties($dom, $spreadsheet);
692
693 432
        return $this->loadDocument($dom, $spreadsheet);
694
    }
695
696 449
    private static function loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet): void
697
    {
698 449
        $properties = $spreadsheet->getProperties();
699 449
        foreach ($dom->getElementsByTagName('meta') as $meta) {
700 418
            $metaContent = (string) $meta->getAttribute('content');
701 418
            if ($metaContent !== '') {
702 413
                $metaName = (string) $meta->getAttribute('name');
703
                switch ($metaName) {
704 413
                    case 'author':
705 413
                        $properties->setCreator($metaContent);
706
707 413
                        break;
708 413
                    case 'category':
709 1
                        $properties->setCategory($metaContent);
710
711 1
                        break;
712 413
                    case 'company':
713 1
                        $properties->setCompany($metaContent);
714
715 1
                        break;
716 413
                    case 'created':
717 413
                        $properties->setCreated($metaContent);
718
719 413
                        break;
720 413
                    case 'description':
721 1
                        $properties->setDescription($metaContent);
722
723 1
                        break;
724 413
                    case 'keywords':
725 1
                        $properties->setKeywords($metaContent);
726
727 1
                        break;
728 413
                    case 'lastModifiedBy':
729 413
                        $properties->setLastModifiedBy($metaContent);
730
731 413
                        break;
732 413
                    case 'manager':
733 1
                        $properties->setManager($metaContent);
734
735 1
                        break;
736 413
                    case 'modified':
737 413
                        $properties->setModified($metaContent);
738
739 413
                        break;
740 413
                    case 'subject':
741 1
                        $properties->setSubject($metaContent);
742
743 1
                        break;
744 413
                    case 'title':
745 413
                        $properties->setTitle($metaContent);
746
747 413
                        break;
748 413
                    case 'viewport':
749 1
                        $properties->setViewport($metaContent);
750
751 1
                        break;
752
                    default:
753 413
                        if (preg_match('/^custom[.](bool|date|float|int|string)[.](.+)$/', $metaName, $matches) === 1) {
754 1
                            match ($matches[1]) {
755 1
                                'bool' => $properties->setCustomProperty($matches[2], (bool) $metaContent, Properties::PROPERTY_TYPE_BOOLEAN),
756 1
                                'float' => $properties->setCustomProperty($matches[2], (float) $metaContent, Properties::PROPERTY_TYPE_FLOAT),
757 1
                                'int' => $properties->setCustomProperty($matches[2], (int) $metaContent, Properties::PROPERTY_TYPE_INTEGER),
758 1
                                'date' => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_DATE),
759
                                // string
760 1
                                default => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_STRING),
761 1
                            };
762
                        }
763
                }
764
            }
765
        }
766 449
        if (!empty($dom->baseURI)) {
767 1
            $properties->setHyperlinkBase($dom->baseURI);
768
        }
769
    }
770
771 55
    private static function replaceNonAscii(array $matches): string
772
    {
773 55
        return '&#' . mb_ord($matches[0], 'UTF-8') . ';';
774
    }
775
776
    /**
777
     * Spreadsheet from content.
778
     *
779
     * @param string $content
780
     */
781 18
    public function loadFromString($content, ?Spreadsheet $spreadsheet = null): Spreadsheet
782
    {
783
        //    Create a new DOM object
784 18
        $dom = new DOMDocument();
785
786
        //    Reload the HTML file into the DOM object
787
        try {
788 18
            $convert = $this->getSecurityScannerOrThrow()->scan($content);
789 18
            $lowend = "\u{80}";
790 18
            $highend = "\u{10ffff}";
791 18
            $regexp = "/[$lowend-$highend]/u";
792
            /** @var callable */
793 18
            $callback = [self::class, 'replaceNonAscii'];
794 18
            $convert = preg_replace_callback($regexp, $callback, $convert);
795 18
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
796 1
        } catch (Throwable $e) {
797 1
            $loaded = false;
798
        }
799 18
        if ($loaded === false) {
800 1
            throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);
801
        }
802 17
        $spreadsheet = $spreadsheet ?? new Spreadsheet();
803 17
        self::loadProperties($dom, $spreadsheet);
804
805 17
        return $this->loadDocument($dom, $spreadsheet);
806
    }
807
808
    /**
809
     * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
810
     */
811 449
    private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
812
    {
813 449
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
814 2
            $spreadsheet->createSheet();
815
        }
816 449
        $spreadsheet->setActiveSheetIndex($this->sheetIndex);
817
818
        // Discard white space
819 449
        $document->preserveWhiteSpace = false;
820
821 449
        $row = 0;
822 449
        $column = 'A';
823 449
        $content = '';
824 449
        $this->rowspan = [];
825 449
        $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
826
827
        // Return
828 449
        return $spreadsheet;
829
    }
830
831
    /**
832
     * Get sheet index.
833
     *
834
     * @return int
835
     */
836 1
    public function getSheetIndex()
837
    {
838 1
        return $this->sheetIndex;
839
    }
840
841
    /**
842
     * Set sheet index.
843
     *
844
     * @param int $sheetIndex Sheet index
845
     *
846
     * @return $this
847
     */
848 2
    public function setSheetIndex($sheetIndex): static
849
    {
850 2
        $this->sheetIndex = $sheetIndex;
851
852 2
        return $this;
853
    }
854
855
    /**
856
     * Apply inline css inline style.
857
     *
858
     * NOTES :
859
     * Currently only intended for td & th element,
860
     * and only takes 'background-color' and 'color'; property with HEX color
861
     *
862
     * TODO :
863
     * - Implement to other propertie, such as border
864
     *
865
     * @param int $row
866
     */
867 444
    private function applyInlineStyle(Worksheet &$sheet, $row, string $column, array $attributeArray): void
868
    {
869 444
        if (!isset($attributeArray['style'])) {
870 438
            return;
871
        }
872
873 16
        if ($row <= 0 || $column === '') {
874 1
            $cellStyle = new Style();
875 16
        } elseif (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
876 1
            $columnTo = $column;
877 1
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
878 1
                ++$columnTo;
879
            }
880 1
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
881 1
            $cellStyle = $sheet->getStyle($range);
882 16
        } elseif (isset($attributeArray['rowspan'])) {
883 1
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
884 1
            $cellStyle = $sheet->getStyle($range);
885 16
        } elseif (isset($attributeArray['colspan'])) {
886 1
            $columnTo = $column;
887 1
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
888 1
                ++$columnTo;
889
            }
890 1
            $range = $column . $row . ':' . $columnTo . $row;
891 1
            $cellStyle = $sheet->getStyle($range);
892
        } else {
893 16
            $cellStyle = $sheet->getStyle($column . $row);
894
        }
895
896
        // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
897 16
        $styles = explode(';', $attributeArray['style']);
898 16
        foreach ($styles as $st) {
899 16
            $value = explode(':', $st);
900 16
            $styleName = isset($value[0]) ? trim($value[0]) : null;
901 16
            $styleValue = isset($value[1]) ? trim($value[1]) : null;
902 16
            $styleValueString = (string) $styleValue;
903
904 16
            if (!$styleName) {
905 12
                continue;
906
            }
907
908
            switch ($styleName) {
909 16
                case 'background':
910 16
                case 'background-color':
911 3
                    $styleColor = $this->getStyleColor($styleValueString);
912
913 3
                    if (!$styleColor) {
914 1
                        continue 2;
915
                    }
916
917 3
                    $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
918
919 3
                    break;
920 16
                case 'color':
921 3
                    $styleColor = $this->getStyleColor($styleValueString);
922
923 3
                    if (!$styleColor) {
924 1
                        continue 2;
925
                    }
926
927 3
                    $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
928
929 3
                    break;
930
931 13
                case 'border':
932 3
                    $this->setBorderStyle($cellStyle, $styleValueString, 'allBorders');
933
934 3
                    break;
935
936 11
                case 'border-top':
937 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'top');
938
939 1
                    break;
940
941 11
                case 'border-bottom':
942 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'bottom');
943
944 1
                    break;
945
946 11
                case 'border-left':
947 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'left');
948
949 1
                    break;
950
951 11
                case 'border-right':
952 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'right');
953
954 1
                    break;
955
956 10
                case 'font-size':
957 1
                    $cellStyle->getFont()->setSize(
958 1
                        (float) $styleValue
959 1
                    );
960
961 1
                    break;
962
963 10
                case 'font-weight':
964 1
                    if ($styleValue === 'bold' || $styleValue >= 500) {
965 1
                        $cellStyle->getFont()->setBold(true);
966
                    }
967
968 1
                    break;
969
970 10
                case 'font-style':
971 1
                    if ($styleValue === 'italic') {
972 1
                        $cellStyle->getFont()->setItalic(true);
973
                    }
974
975 1
                    break;
976
977 10
                case 'font-family':
978 1
                    $cellStyle->getFont()->setName(str_replace('\'', '', $styleValueString));
979
980 1
                    break;
981
982 10
                case 'text-decoration':
983
                    switch ($styleValue) {
984 1
                        case 'underline':
985 1
                            $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
986
987 1
                            break;
988 1
                        case 'line-through':
989 1
                            $cellStyle->getFont()->setStrikethrough(true);
990
991 1
                            break;
992
                    }
993
994 1
                    break;
995
996 9
                case 'text-align':
997 1
                    $cellStyle->getAlignment()->setHorizontal($styleValueString);
998
999 1
                    break;
1000
1001 9
                case 'vertical-align':
1002 2
                    $cellStyle->getAlignment()->setVertical($styleValueString);
1003
1004 2
                    break;
1005
1006 9
                case 'width':
1007 2
                    if ($column !== '') {
1008 2
                        $sheet->getColumnDimension($column)->setWidth(
1009 2
                            (new CssDimension($styleValue ?? ''))->width()
1010 2
                        );
1011
                    }
1012
1013 2
                    break;
1014
1015 7
                case 'height':
1016 1
                    if ($row > 0) {
1017 1
                        $sheet->getRowDimension($row)->setRowHeight(
1018 1
                            (new CssDimension($styleValue ?? ''))->height()
1019 1
                        );
1020
                    }
1021
1022 1
                    break;
1023
1024 6
                case 'word-wrap':
1025 1
                    $cellStyle->getAlignment()->setWrapText(
1026 1
                        $styleValue === 'break-word'
1027 1
                    );
1028
1029 1
                    break;
1030
1031 6
                case 'text-indent':
1032 2
                    $cellStyle->getAlignment()->setIndent(
1033 2
                        (int) str_replace(['px'], '', $styleValueString)
1034 2
                    );
1035
1036 2
                    break;
1037
            }
1038
        }
1039
    }
1040
1041
    /**
1042
     * Check if has #, so we can get clean hex.
1043
     */
1044 7
    public function getStyleColor(mixed $value): string
1045
    {
1046 7
        $value = (string) $value;
1047 7
        if (str_starts_with($value, '#')) {
1048 5
            return substr($value, 1);
1049
        }
1050
1051 4
        return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup($value);
1052
    }
1053
1054 8
    private function insertImage(Worksheet $sheet, string $column, int $row, array $attributes): void
1055
    {
1056 8
        if (!isset($attributes['src'])) {
1057 1
            return;
1058
        }
1059
1060 7
        $src = urldecode($attributes['src']);
1061 7
        $width = isset($attributes['width']) ? (float) $attributes['width'] : null;
1062 7
        $height = isset($attributes['height']) ? (float) $attributes['height'] : null;
1063 7
        $name = $attributes['alt'] ?? null;
1064
1065 7
        $drawing = new Drawing();
1066 7
        $drawing->setPath($src);
1067 7
        $drawing->setWorksheet($sheet);
1068 7
        $drawing->setCoordinates($column . $row);
1069 7
        $drawing->setOffsetX(0);
1070 7
        $drawing->setOffsetY(10);
1071 7
        $drawing->setResizeProportional(true);
1072
1073 7
        if ($name) {
1074 6
            $drawing->setName($name);
1075
        }
1076
1077 7
        if ($width) {
1078 1
            $drawing->setWidth((int) $width);
1079
        }
1080
1081 7
        if ($height) {
1082 1
            $drawing->setHeight((int) $height);
1083
        }
1084
1085 7
        $sheet->getColumnDimension($column)->setWidth(
1086 7
            $drawing->getWidth() / 6
1087 7
        );
1088
1089 7
        $sheet->getRowDimension($row)->setRowHeight(
1090 7
            $drawing->getHeight() * 0.9
1091 7
        );
1092
    }
1093
1094
    private const BORDER_MAPPINGS = [
1095
        'dash-dot' => Border::BORDER_DASHDOT,
1096
        'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
1097
        'dashed' => Border::BORDER_DASHED,
1098
        'dotted' => Border::BORDER_DOTTED,
1099
        'double' => Border::BORDER_DOUBLE,
1100
        'hair' => Border::BORDER_HAIR,
1101
        'medium' => Border::BORDER_MEDIUM,
1102
        'medium-dashed' => Border::BORDER_MEDIUMDASHED,
1103
        'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
1104
        'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
1105
        'none' => Border::BORDER_NONE,
1106
        'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
1107
        'solid' => Border::BORDER_THIN,
1108
        'thick' => Border::BORDER_THICK,
1109
    ];
1110
1111 15
    public static function getBorderMappings(): array
1112
    {
1113 15
        return self::BORDER_MAPPINGS;
1114
    }
1115
1116
    /**
1117
     * Map html border style to PhpSpreadsheet border style.
1118
     *
1119
     * @param string $style
1120
     */
1121 3
    public function getBorderStyle($style): ?string
1122
    {
1123 3
        return self::BORDER_MAPPINGS[$style] ?? null;
1124
    }
1125
1126 3
    private function setBorderStyle(Style $cellStyle, string $styleValue, string $type): void
1127
    {
1128 3
        if (trim($styleValue) === Border::BORDER_NONE) {
1129 1
            $borderStyle = Border::BORDER_NONE;
1130 1
            $color = null;
1131
        } else {
1132 3
            $borderArray = explode(' ', $styleValue);
1133 3
            $borderCount = count($borderArray);
1134 3
            if ($borderCount >= 3) {
1135 3
                $borderStyle = $borderArray[1];
1136 3
                $color = $borderArray[2];
1137
            } else {
1138 1
                $borderStyle = $borderArray[0];
1139 1
                $color = $borderArray[1] ?? null;
1140
            }
1141
        }
1142
1143 3
        $cellStyle->applyFromArray([
1144 3
            'borders' => [
1145 3
                $type => [
1146 3
                    'borderStyle' => $this->getBorderStyle($borderStyle),
1147 3
                    'color' => ['rgb' => $this->getStyleColor($color)],
1148 3
                ],
1149 3
            ],
1150 3
        ]);
1151
    }
1152
1153
    /**
1154
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
1155
     */
1156 1
    public function listWorksheetInfo(string $filename): array
1157
    {
1158 1
        $info = [];
1159 1
        $spreadsheet = new Spreadsheet();
1160 1
        $this->loadIntoExisting($filename, $spreadsheet);
1161 1
        foreach ($spreadsheet->getAllSheets() as $sheet) {
1162 1
            $newEntry = ['worksheetName' => $sheet->getTitle()];
1163 1
            $newEntry['lastColumnLetter'] = $sheet->getHighestDataColumn();
1164 1
            $newEntry['lastColumnIndex'] = Coordinate::columnIndexFromString($sheet->getHighestDataColumn()) - 1;
1165 1
            $newEntry['totalRows'] = $sheet->getHighestDataRow();
1166 1
            $newEntry['totalColumns'] = $newEntry['lastColumnIndex'] + 1;
1167 1
            $info[] = $newEntry;
1168
        }
1169 1
        $spreadsheet->disconnectWorksheets();
1170
1171 1
        return $info;
1172
    }
1173
}
1174