Passed
Pull Request — master (#4142)
by Owen
11:33
created

Html::insertImage()   C

Complexity

Conditions 12
Paths 201

Size

Total Lines 52
Code Lines 35

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 24
CRAP Score 12

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 35
dl 0
loc 52
rs 6.1208
c 1
b 0
f 0
ccs 24
cts 24
cp 1
cc 12
nc 201
nop 4
crap 12

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use DOMAttr;
6
use DOMDocument;
7
use DOMElement;
8
use DOMNode;
9
use DOMText;
10
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
11
use PhpOffice\PhpSpreadsheet\Cell\DataType;
12
use PhpOffice\PhpSpreadsheet\Comment;
13
use PhpOffice\PhpSpreadsheet\Document\Properties;
14
use PhpOffice\PhpSpreadsheet\Exception as SpreadsheetException;
15
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
16
use PhpOffice\PhpSpreadsheet\Helper\Html as HelperHtml;
17
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
18
use PhpOffice\PhpSpreadsheet\Spreadsheet;
19
use PhpOffice\PhpSpreadsheet\Style\Border;
20
use PhpOffice\PhpSpreadsheet\Style\Color;
21
use PhpOffice\PhpSpreadsheet\Style\Fill;
22
use PhpOffice\PhpSpreadsheet\Style\Font;
23
use PhpOffice\PhpSpreadsheet\Style\Style;
24
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
25
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
26
use Throwable;
27
28
class Html extends BaseReader
29
{
30
    /**
31
     * Sample size to read to determine if it's HTML or not.
32
     */
33
    const TEST_SAMPLE_SIZE = 2048;
34
35
    private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/';
36
37
    private const DECLARES_CHARSET = '/ charset=/i';
38
39
    /**
40
     * Input encoding.
41
     */
42
    protected string $inputEncoding = 'ANSI';
43
44
    /**
45
     * Sheet index to read.
46
     */
47
    protected int $sheetIndex = 0;
48
49
    /**
50
     * Formats.
51
     */
52
    protected array $formats = [
53
        'h1' => [
54
            'font' => [
55
                'bold' => true,
56
                'size' => 24,
57
            ],
58
        ], //    Bold, 24pt
59
        'h2' => [
60
            'font' => [
61
                'bold' => true,
62
                'size' => 18,
63
            ],
64
        ], //    Bold, 18pt
65
        'h3' => [
66
            'font' => [
67
                'bold' => true,
68
                'size' => 13.5,
69
            ],
70
        ], //    Bold, 13.5pt
71
        'h4' => [
72
            'font' => [
73
                'bold' => true,
74
                'size' => 12,
75
            ],
76
        ], //    Bold, 12pt
77
        'h5' => [
78
            'font' => [
79
                'bold' => true,
80
                'size' => 10,
81
            ],
82
        ], //    Bold, 10pt
83
        'h6' => [
84
            'font' => [
85
                'bold' => true,
86
                'size' => 7.5,
87
            ],
88
        ], //    Bold, 7.5pt
89
        'a' => [
90
            'font' => [
91
                'underline' => true,
92
                'color' => [
93
                    'argb' => Color::COLOR_BLUE,
94
                ],
95
            ],
96
        ], //    Blue underlined
97
        'hr' => [
98
            'borders' => [
99
                'bottom' => [
100
                    'borderStyle' => Border::BORDER_THIN,
101
                    'color' => [
102
                        Color::COLOR_BLACK,
103
                    ],
104
                ],
105
            ],
106
        ], //    Bottom border
107
        'strong' => [
108
            'font' => [
109
                'bold' => true,
110
            ],
111
        ], //    Bold
112
        'b' => [
113
            'font' => [
114
                'bold' => true,
115
            ],
116
        ], //    Bold
117
        'i' => [
118
            'font' => [
119
                'italic' => true,
120
            ],
121
        ], //    Italic
122
        'em' => [
123
            'font' => [
124
                'italic' => true,
125
            ],
126
        ], //    Italic
127
    ];
128
129
    protected array $rowspan = [];
130
131
    /**
132
     * Create a new HTML Reader instance.
133
     */
134 498
    public function __construct()
135
    {
136 498
        parent::__construct();
137 498
        $this->securityScanner = XmlScanner::getInstance($this);
138
    }
139
140
    /**
141
     * Validate that the current file is an HTML file.
142
     */
143 479
    public function canRead(string $filename): bool
144
    {
145
        // Check if file exists
146
        try {
147 479
            $this->openFile($filename);
148 1
        } catch (Exception) {
149 1
            return false;
150
        }
151
152 478
        $beginning = preg_replace(self::STARTS_WITH_BOM, '', $this->readBeginning()) ?? '';
153
154 478
        $startWithTag = self::startsWithTag($beginning);
155 478
        $containsTags = self::containsTags($beginning);
156 478
        $endsWithTag = self::endsWithTag($this->readEnding());
157
158 478
        fclose($this->fileHandle);
159
160 478
        return $startWithTag && $containsTags && $endsWithTag;
161
    }
162
163 478
    private function readBeginning(): string
164
    {
165 478
        fseek($this->fileHandle, 0);
166
167 478
        return (string) fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
168
    }
169
170 478
    private function readEnding(): string
171
    {
172 478
        $meta = stream_get_meta_data($this->fileHandle);
173
        // Phpstan incorrectly flags following line for Php8.2-, corrected in 8.3
174 478
        $filename = $meta['uri']; //@phpstan-ignore-line
175
176 478
        $size = (int) filesize($filename);
177 478
        if ($size === 0) {
178 1
            return '';
179
        }
180
181 477
        $blockSize = self::TEST_SAMPLE_SIZE;
182 477
        if ($size < $blockSize) {
183 40
            $blockSize = $size;
184
        }
185
186 477
        fseek($this->fileHandle, $size - $blockSize);
187
188 477
        return (string) fread($this->fileHandle, $blockSize);
189
    }
190
191 478
    private static function startsWithTag(string $data): bool
192
    {
193 478
        return str_starts_with(trim($data), '<');
194
    }
195
196 478
    private static function endsWithTag(string $data): bool
197
    {
198 478
        return str_ends_with(trim($data), '>');
199
    }
200
201 478
    private static function containsTags(string $data): bool
202
    {
203 478
        return strlen($data) !== strlen(strip_tags($data));
204
    }
205
206
    /**
207
     * Loads Spreadsheet from file.
208
     */
209 460
    public function loadSpreadsheetFromFile(string $filename): Spreadsheet
210
    {
211
        // Create new Spreadsheet
212 460
        $spreadsheet = new Spreadsheet();
213
214
        // Load into this instance
215 460
        return $this->loadIntoExisting($filename, $spreadsheet);
216
    }
217
218
    //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
219
220
    protected array $dataArray = [];
221
222
    protected int $tableLevel = 0;
223
224
    protected array $nestedColumn = ['A'];
225
226 474
    protected function setTableStartColumn(string $column): string
227
    {
228 474
        if ($this->tableLevel == 0) {
229 474
            $column = 'A';
230
        }
231 474
        ++$this->tableLevel;
232 474
        $this->nestedColumn[$this->tableLevel] = $column;
233
234 474
        return $this->nestedColumn[$this->tableLevel];
235
    }
236
237 470
    protected function getTableStartColumn(): string
238
    {
239 470
        return $this->nestedColumn[$this->tableLevel];
240
    }
241
242 474
    protected function releaseTableStartColumn(): string
243
    {
244 474
        --$this->tableLevel;
245
246 474
        return array_pop($this->nestedColumn);
247
    }
248
249
    /**
250
     * Flush cell.
251
     */
252 475
    protected function flushCell(Worksheet $sheet, string $column, int|string $row, mixed &$cellContent, array $attributeArray): void
253
    {
254 475
        if (is_string($cellContent)) {
255
            //    Simple String content
256 475
            if (trim($cellContent) > '') {
257
                //    Only actually write it if there's content in the string
258
                //    Write to worksheet to be done here...
259
                //    ... we return the cell, so we can mess about with styles more easily
260
261
                // Set cell value explicitly if there is data-type attribute
262 464
                if (isset($attributeArray['data-type'])) {
263 1
                    $datatype = $attributeArray['data-type'];
264 1
                    if (in_array($datatype, [DataType::TYPE_STRING, DataType::TYPE_STRING2, DataType::TYPE_INLINE])) {
265
                        //Prevent to Excel treat string with beginning equal sign or convert big numbers to scientific number
266 1
                        if (str_starts_with($cellContent, '=')) {
267 1
                            $sheet->getCell($column . $row)
268 1
                                ->getStyle()
269 1
                                ->setQuotePrefix(true);
270
                        }
271
                    }
272
273
                    //catching the Exception and ignoring the invalid data types
274
                    try {
275 1
                        $sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
276 1
                    } catch (SpreadsheetException) {
277 1
                        $sheet->setCellValue($column . $row, $cellContent);
278
                    }
279
                } else {
280 463
                    $sheet->setCellValue($column . $row, $cellContent);
281
                }
282 464
                $this->dataArray[$row][$column] = $cellContent;
283
            }
284
        } else {
285
            //    We have a Rich Text run
286
            //    TODO
287
            $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
288
        }
289 475
        $cellContent = (string) '';
290
    }
291
292 475
    private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
293
    {
294 475
        $attributeArray = [];
295
        /** @var DOMAttr $attribute */
296 475
        foreach ($child->attributes as $attribute) {
297 462
            $attributeArray[$attribute->name] = $attribute->value;
298
        }
299
300 475
        if ($child->nodeName === 'body') {
301 475
            $row = 1;
302 475
            $column = 'A';
303 475
            $cellContent = '';
304 475
            $this->tableLevel = 0;
305 475
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
306
        } else {
307 475
            $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
308
        }
309
    }
310
311 475
    private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
312
    {
313 475
        if ($child->nodeName === 'title') {
314 445
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
315
316
            try {
317 445
                $sheet->setTitle($cellContent, true, true);
318 3
                $sheet->getParent()?->getProperties()?->setTitle($cellContent);
319
            } catch (SpreadsheetException) {
320
                // leave default title if too long or illegal chars
321 445
            }
322
            $cellContent = '';
323 475
        } else {
324
            $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
325
        }
326
    }
327
328
    private const SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
329 475
330
    private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
331 475
    {
332 430
        if (in_array((string) $child->nodeName, self::SPAN_ETC, true)) {
333 9
            if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
334 9
                $sheet->getComment($column . $row)
335 9
                    ->getText()
336 9
                    ->createTextRun($child->textContent);
337 1
                if (isset($attributeArray['dir']) && $attributeArray['dir'] === 'rtl') {
338
                    $sheet->getComment($column . $row)->setTextboxDirection(Comment::TEXTBOX_DIRECTION_RTL);
339 9
                }
340 2
                if (isset($attributeArray['style'])) {
341 2
                    $alignStyle = $attributeArray['style'];
342 2
                    if (preg_match('/\\btext-align:\\s*(left|right|center|justify)\\b/', $alignStyle, $matches) === 1) {
343
                        $sheet->getComment($column . $row)->setAlignment($matches[1]);
344
                    }
345
                }
346 430
            } else {
347
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
348
            }
349 430
350 2
            if (isset($this->formats[$child->nodeName])) {
351
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
352
            }
353 475
        } else {
354
            $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
355
        }
356
    }
357 475
358
    private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
359 475
    {
360 1
        if ($child->nodeName === 'hr') {
361 1
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
362 1
            ++$row;
363 1
            if (isset($this->formats[$child->nodeName])) {
364
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
365 1
            }
366
            ++$row;
367
        }
368 475
        // fall through to br
369
        $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
370
    }
371 475
372
    private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
373 475
    {
374 4
        if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
375
            if ($this->tableLevel > 0) {
376 4
                //    If we're inside a table, replace with a newline and set the cell to wrap
377 4
                $cellContent .= "\n";
378
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
379
            } else {
380 1
                //    Otherwise flush our existing content and move the row cursor on
381 1
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
382
                ++$row;
383
            }
384 475
        } else {
385
            $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
386
        }
387
    }
388 475
389
    private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
390 475
    {
391 12
        if ($child->nodeName === 'a') {
392
            foreach ($attributeArray as $attributeName => $attributeValue) {
393 12
                switch ($attributeName) {
394 3
                    case 'href':
395 3
                        $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
396 3
                        if (isset($this->formats[$child->nodeName])) {
397
                            $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
398
                        }
399 3
400 10
                        break;
401 9
                    case 'class':
402 9
                        if ($attributeValue === 'comment-indicator') {
403
                            break; // Ignore - it's just a red square.
404
                        }
405
                }
406
            }
407
            // no idea why this should be needed
408 12
            //$cellContent .= ' ';
409
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
410 475
        } else {
411
            $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
412
        }
413
    }
414
415
    private const H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
416 475
417
    private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
418 475
    {
419 2
        if (in_array((string) $child->nodeName, self::H1_ETC, true)) {
420
            if ($this->tableLevel > 0) {
421 1
                //    If we're inside a table, replace with a newline
422 1
                $cellContent .= $cellContent ? "\n" : '';
423 1
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
424
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
425 2
            } else {
426 1
                if ($cellContent > '') {
427 1
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
428
                    ++$row;
429 2
                }
430 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
431
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
432 2
433 1
                if (isset($this->formats[$child->nodeName])) {
434
                    $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
435
                }
436 2
437 2
                ++$row;
438
                $column = 'A';
439
            }
440 475
        } else {
441
            $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
442
        }
443
    }
444 475
445
    private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
446 475
    {
447 2
        if ($child->nodeName === 'li') {
448
            if ($this->tableLevel > 0) {
449 1
                //    If we're inside a table, replace with a newline
450 1
                $cellContent .= $cellContent ? "\n" : '';
451
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
452 2
            } else {
453 1
                if ($cellContent > '') {
454
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
455 2
                }
456 2
                ++$row;
457 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
458 2
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
459
                $column = 'A';
460
            }
461 475
        } else {
462
            $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
463
        }
464
    }
465 475
466
    private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
467 475
    {
468 8
        if ($child->nodeName === 'img') {
469
            $this->insertImage($sheet, $column, $row, $attributeArray);
470 475
        } else {
471
            $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
472
        }
473
    }
474
475
    private string $currentColumn = 'A';
476 475
477
    private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
478 475
    {
479 474
        if ($child->nodeName === 'table') {
480 474
            if (isset($attributeArray['class'])) {
481 474
                $classes = explode(' ', $attributeArray['class']);
482 474
                $sheet->setShowGridlines(in_array('gridlines', $classes, true));
483 2
                $sheet->setPrintGridlines(in_array('gridlinesp', $classes, true));
484
            }
485 474
            $this->currentColumn = 'A';
486 474
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
487 474
            $column = $this->setTableStartColumn($column);
488 2
            if ($this->tableLevel > 1 && $row > 1) {
489
                --$row;
490 474
            }
491
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
492
            $column = $this->releaseTableStartColumn();
493 475
            if ($this->tableLevel > 1) {
494
                ++$column;
495
            } else {
496
                ++$row;
497 475
            }
498
        } else {
499 475
            $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
500 428
        }
501 428
    }
502 475
503 470
    private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
504 470
    {
505 470
        if ($child->nodeName === 'col') {
506
            $this->applyInlineStyle($sheet, -1, $this->currentColumn, $attributeArray);
507 470
            ++$this->currentColumn;
508 1
        } elseif ($child->nodeName === 'tr') {
509
            $column = $this->getTableStartColumn();
510
            $cellContent = '';
511 470
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
512
513 475
            if (isset($attributeArray['height'])) {
514
                $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
515
            }
516
517 475
            ++$row;
518
        } else {
519 475
            $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
520 475
        }
521
    }
522 470
523
    private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
524
    {
525
        if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
526 470
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
527
        } else {
528 470
            $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
529 1
        }
530 1
    }
531 1
532 1
    private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
533 1
    {
534 1
        if (isset($attributeArray['bgcolor'])) {
535 1
            $sheet->getStyle("$column$row")->applyFromArray(
536 1
                [
537
                    'fill' => [
538
                        'fillType' => Fill::FILL_SOLID,
539
                        'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
540 470
                    ],
541
                ]
542 470
            );
543 1
        }
544
    }
545
546
    private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
547 470
    {
548
        if (isset($attributeArray['width'])) {
549 470
            $sheet->getColumnDimension($column)->setWidth((new CssDimension($attributeArray['width']))->width());
550 1
        }
551
    }
552
553
    private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
554 470
    {
555
        if (isset($attributeArray['height'])) {
556 470
            $sheet->getRowDimension($row)->setRowHeight((new CssDimension($attributeArray['height']))->height());
557 1
        }
558
    }
559
560
    private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
561 470
    {
562
        if (isset($attributeArray['align'])) {
563 470
            $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
564 1
        }
565
    }
566
567
    private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
568 470
    {
569
        if (isset($attributeArray['valign'])) {
570 470
            $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
571 1
        }
572
    }
573
574
    private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
575 470
    {
576
        if (isset($attributeArray['data-format'])) {
577 470
            $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
578 3
        }
579
    }
580 470
581
    private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
582
    {
583 470
        while (isset($this->rowspan[$column . $row])) {
584
            ++$column;
585 470
        }
586
        $this->processDomElement($child, $sheet, $row, $column, $cellContent);
587 470
588 470
        // apply inline style
589 470
        $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
590 470
591 470
        $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
592 470
593
        $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
594 470
        $this->processDomElementWidth($sheet, $column, $attributeArray);
595
        $this->processDomElementHeight($sheet, $row, $attributeArray);
596 2
        $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
597 2
        $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
598 2
        $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
599
600 2
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
601 2
            //create merging rowspan and colspan
602 2
            $columnTo = $column;
603
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
604 2
                ++$columnTo;
605 2
            }
606 470
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
607
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
608 3
                $this->rowspan[$value] = true;
609 3
            }
610 3
            $sheet->mergeCells($range);
611
            $column = $columnTo;
612 3
        } elseif (isset($attributeArray['rowspan'])) {
613 470
            //create merging rowspan
614
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
615 3
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
616 3
                $this->rowspan[$value] = true;
617 3
            }
618
            $sheet->mergeCells($range);
619 3
        } elseif (isset($attributeArray['colspan'])) {
620 3
            //create merging colspan
621
            $columnTo = $column;
622
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
623 470
                ++$columnTo;
624
            }
625
            $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
626 475
            $column = $columnTo;
627
        }
628 475
629 475
        ++$column;
630 472
    }
631 472
632 12
    protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
633
    {
634 472
        foreach ($element->childNodes as $child) {
635
            if ($child instanceof DOMText) {
636 472
                $domText = (string) preg_replace('/\s+/', ' ', trim($child->nodeValue ?? ''));
637
                if ($domText === "\u{a0}") {
638
                    $domText = '';
639
                }
640 475
                if (is_string($cellContent)) {
641 475
                    //    simply append the text if the cell content is a plain text string
642
                    $cellContent .= $domText;
643
                }
644
                //    but if we have a rich text run instead, we need to append it correctly
645
                //    TODO
646
            } elseif ($child instanceof DOMElement) {
647
                $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
648
            }
649 460
        }
650
    }
651
652 460
    /**
653 1
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
654
     */
655
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
656
    {
657 459
        // Validate
658
        if (!$this->canRead($filename)) {
659
            throw new Exception($filename . ' is an Invalid HTML file.');
660
        }
661 459
662 459
        // Create a new DOM object
663 459
        $dom = new DOMDocument();
664 1
665 1
        // Reload the HTML file into the DOM object
666
        try {
667 459
            $convert = $this->getSecurityScannerOrThrow()->scanFile($filename);
668 1
            $convert = self::replaceNonAsciiIfNeeded($convert);
669
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
670 458
        } catch (Throwable $e) {
671
            $loaded = false;
672 458
        }
673
        if ($loaded === false) {
674
            throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null);
675 475
        }
676
        self::loadProperties($dom, $spreadsheet);
677 475
678 475
        return $this->loadDocument($dom, $spreadsheet);
679 438
    }
680 438
681 430
    private static function loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet): void
682
    {
683 430
        $properties = $spreadsheet->getProperties();
684 428
        foreach ($dom->getElementsByTagName('meta') as $meta) {
685
            $metaContent = (string) $meta->getAttribute('content');
686 428
            if ($metaContent !== '') {
687 430
                $metaName = (string) $meta->getAttribute('name');
688 1
                switch ($metaName) {
689
                    case 'author':
690 1
                        $properties->setCreator($metaContent);
691 430
692 1
                        break;
693
                    case 'category':
694 1
                        $properties->setCategory($metaContent);
695 430
696 428
                        break;
697
                    case 'company':
698 428
                        $properties->setCompany($metaContent);
699 430
700 1
                        break;
701
                    case 'created':
702 1
                        $properties->setCreated($metaContent);
703 430
704 1
                        break;
705
                    case 'description':
706 1
                        $properties->setDescription($metaContent);
707 430
708 428
                        break;
709
                    case 'keywords':
710 428
                        $properties->setKeywords($metaContent);
711 430
712 1
                        break;
713
                    case 'lastModifiedBy':
714 1
                        $properties->setLastModifiedBy($metaContent);
715 430
716 428
                        break;
717
                    case 'manager':
718 428
                        $properties->setManager($metaContent);
719 430
720 1
                        break;
721
                    case 'modified':
722 1
                        $properties->setModified($metaContent);
723 430
724 426
                        break;
725
                    case 'subject':
726 426
                        $properties->setSubject($metaContent);
727 430
728 1
                        break;
729
                    case 'title':
730 1
                        $properties->setTitle($metaContent);
731
732 430
                        break;
733 1
                    case 'viewport':
734 1
                        $properties->setViewport($metaContent);
735 1
736 1
                        break;
737 1
                    default:
738
                        if (preg_match('/^custom[.](bool|date|float|int|string)[.](.+)$/', $metaName, $matches) === 1) {
739 1
                            match ($matches[1]) {
740 1
                                'bool' => $properties->setCustomProperty($matches[2], (bool) $metaContent, Properties::PROPERTY_TYPE_BOOLEAN),
741
                                'float' => $properties->setCustomProperty($matches[2], (float) $metaContent, Properties::PROPERTY_TYPE_FLOAT),
742
                                'int' => $properties->setCustomProperty($matches[2], (int) $metaContent, Properties::PROPERTY_TYPE_INTEGER),
743
                                'date' => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_DATE),
744
                                // string
745 475
                                default => $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_STRING),
746 1
                            };
747
                        }
748
                }
749
            }
750 4
        }
751
        if (!empty($dom->baseURI)) {
752 4
            $properties->setHyperlinkBase($dom->baseURI);
753
        }
754
    }
755 476
756
    private static function replaceNonAscii(array $matches): string
757 476
    {
758 32
        return '&#' . mb_ord($matches[0], 'UTF-8') . ';';
759 32
    }
760 32
761
    private static function replaceNonAsciiIfNeeded(string $convert): ?string
762 32
    {
763 32
        if (preg_match(self::STARTS_WITH_BOM, $convert) !== 1 && preg_match(self::DECLARES_CHARSET, $convert) !== 1) {
764
            $lowend = "\u{80}";
765
            $highend = "\u{10ffff}";
766 476
            $regexp = "/[$lowend-$highend]/u";
767
            /** @var callable $callback */
768
            $callback = [self::class, 'replaceNonAscii'];
769
            $convert = preg_replace_callback($regexp, $callback, $convert);
770
        }
771
772 17
        return $convert;
773
    }
774
775 17
    /**
776
     * Spreadsheet from content.
777
     */
778
    public function loadFromString(string $content, ?Spreadsheet $spreadsheet = null): Spreadsheet
779 17
    {
780 17
        //    Create a new DOM object
781 17
        $dom = new DOMDocument();
782
783
        //    Reload the HTML file into the DOM object
784
        try {
785 17
            $convert = $this->getSecurityScannerOrThrow()->scan($content);
786
            $convert = self::replaceNonAsciiIfNeeded($convert);
787
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
788 17
        } catch (Throwable $e) {
789 17
            $loaded = false;
790
        }
791 17
        if ($loaded === false) {
792
            throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);
793
        }
794
        $spreadsheet = $spreadsheet ?? new Spreadsheet();
795
        self::loadProperties($dom, $spreadsheet);
796
797 475
        return $this->loadDocument($dom, $spreadsheet);
798
    }
799 475
800 2
    /**
801
     * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
802 475
     */
803
    private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
804
    {
805 475
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
806
            $spreadsheet->createSheet();
807 475
        }
808 475
        $spreadsheet->setActiveSheetIndex($this->sheetIndex);
809 475
810 475
        // Discard white space
811 475
        $document->preserveWhiteSpace = false;
812
813
        $row = 0;
814 475
        $column = 'A';
815
        $content = '';
816
        $this->rowspan = [];
817
        $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
818
819
        // Return
820 1
        return $spreadsheet;
821
    }
822 1
823
    /**
824
     * Get sheet index.
825
     */
826
    public function getSheetIndex(): int
827
    {
828
        return $this->sheetIndex;
829
    }
830
831
    /**
832 2
     * Set sheet index.
833
     *
834 2
     * @param int $sheetIndex Sheet index
835
     *
836 2
     * @return $this
837
     */
838
    public function setSheetIndex(int $sheetIndex): static
839
    {
840
        $this->sheetIndex = $sheetIndex;
841
842
        return $this;
843
    }
844
845
    /**
846
     * Apply inline css inline style.
847
     *
848
     * NOTES :
849 470
     * Currently only intended for td & th element,
850
     * and only takes 'background-color' and 'color'; property with HEX color
851 470
     *
852 464
     * TODO :
853
     * - Implement to other propertie, such as border
854
     */
855 16
    private function applyInlineStyle(Worksheet &$sheet, int $row, string $column, array $attributeArray): void
856 1
    {
857 16
        if (!isset($attributeArray['style'])) {
858 1
            return;
859 1
        }
860 1
861
        if ($row <= 0 || $column === '') {
862 1
            $cellStyle = new Style();
863 1
        } elseif (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
864 16
            $columnTo = $column;
865 1
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
866 1
                ++$columnTo;
867 16
            }
868 1
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
869 1
            $cellStyle = $sheet->getStyle($range);
870 1
        } elseif (isset($attributeArray['rowspan'])) {
871
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
872 1
            $cellStyle = $sheet->getStyle($range);
873 1
        } elseif (isset($attributeArray['colspan'])) {
874
            $columnTo = $column;
875 16
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
876
                ++$columnTo;
877
            }
878
            $range = $column . $row . ':' . $columnTo . $row;
879 16
            $cellStyle = $sheet->getStyle($range);
880 16
        } else {
881 16
            $cellStyle = $sheet->getStyle($column . $row);
882 16
        }
883 16
884 16
        // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
885
        $styles = explode(';', $attributeArray['style']);
886 16
        foreach ($styles as $st) {
887 12
            $value = explode(':', $st);
888
            $styleName = isset($value[0]) ? trim($value[0]) : null;
889
            $styleValue = isset($value[1]) ? trim($value[1]) : null;
890
            $styleValueString = (string) $styleValue;
891 16
892 16
            if (!$styleName) {
893 3
                continue;
894
            }
895 3
896 1
            switch ($styleName) {
897
                case 'background':
898
                case 'background-color':
899 3
                    $styleColor = $this->getStyleColor($styleValueString);
900
901 3
                    if (!$styleColor) {
902 16
                        continue 2;
903 3
                    }
904
905 3
                    $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
906 1
907
                    break;
908
                case 'color':
909 3
                    $styleColor = $this->getStyleColor($styleValueString);
910
911 3
                    if (!$styleColor) {
912
                        continue 2;
913 13
                    }
914 3
915
                    $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
916 3
917
                    break;
918 11
919 1
                case 'border':
920
                    $this->setBorderStyle($cellStyle, $styleValueString, 'allBorders');
921 1
922
                    break;
923 11
924 1
                case 'border-top':
925
                    $this->setBorderStyle($cellStyle, $styleValueString, 'top');
926 1
927
                    break;
928 11
929 1
                case 'border-bottom':
930
                    $this->setBorderStyle($cellStyle, $styleValueString, 'bottom');
931 1
932
                    break;
933 11
934 1
                case 'border-left':
935
                    $this->setBorderStyle($cellStyle, $styleValueString, 'left');
936 1
937
                    break;
938 10
939 1
                case 'border-right':
940 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'right');
941 1
942
                    break;
943 1
944
                case 'font-size':
945 10
                    $cellStyle->getFont()->setSize(
946 1
                        (float) $styleValue
947 1
                    );
948
949
                    break;
950 1
951
                case 'font-weight':
952 10
                    if ($styleValue === 'bold' || $styleValue >= 500) {
953 1
                        $cellStyle->getFont()->setBold(true);
954 1
                    }
955
956
                    break;
957 1
958
                case 'font-style':
959 10
                    if ($styleValue === 'italic') {
960 1
                        $cellStyle->getFont()->setItalic(true);
961
                    }
962 1
963
                    break;
964 10
965
                case 'font-family':
966 1
                    $cellStyle->getFont()->setName(str_replace('\'', '', $styleValueString));
967 1
968
                    break;
969 1
970 1
                case 'text-decoration':
971 1
                    switch ($styleValue) {
972
                        case 'underline':
973 1
                            $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
974
975
                            break;
976 1
                        case 'line-through':
977
                            $cellStyle->getFont()->setStrikethrough(true);
978 9
979 1
                            break;
980
                    }
981 1
982
                    break;
983 9
984 2
                case 'text-align':
985
                    $cellStyle->getAlignment()->setHorizontal($styleValueString);
986 2
987
                    break;
988 9
989 2
                case 'vertical-align':
990 2
                    $cellStyle->getAlignment()->setVertical($styleValueString);
991 2
992 2
                    break;
993
994
                case 'width':
995 2
                    if ($column !== '') {
996
                        $sheet->getColumnDimension($column)->setWidth(
997 7
                            (new CssDimension($styleValue ?? ''))->width()
998 1
                        );
999 1
                    }
1000 1
1001 1
                    break;
1002
1003
                case 'height':
1004 1
                    if ($row > 0) {
1005
                        $sheet->getRowDimension($row)->setRowHeight(
1006 6
                            (new CssDimension($styleValue ?? ''))->height()
1007 1
                        );
1008 1
                    }
1009 1
1010
                    break;
1011 1
1012
                case 'word-wrap':
1013 6
                    $cellStyle->getAlignment()->setWrapText(
1014 2
                        $styleValue === 'break-word'
1015 2
                    );
1016 2
1017
                    break;
1018 2
1019
                case 'text-indent':
1020
                    $cellStyle->getAlignment()->setIndent(
1021
                        (int) str_replace(['px'], '', $styleValueString)
1022
                    );
1023
1024
                    break;
1025
            }
1026 7
        }
1027
    }
1028 7
1029 7
    /**
1030 5
     * Check if has #, so we can get clean hex.
1031
     */
1032
    public function getStyleColor(?string $value): string
1033 4
    {
1034
        $value = (string) $value;
1035
        if (str_starts_with($value, '#')) {
1036 8
            return substr($value, 1);
1037
        }
1038 8
1039 1
        return HelperHtml::colourNameLookup($value);
1040
    }
1041
1042 7
    private function insertImage(Worksheet $sheet, string $column, int $row, array $attributes): void
1043 7
    {
1044 7
        if (!isset($attributes['src'])) {
1045 7
            return;
1046
        }
1047 7
        $styleArray = self::getStyleArray($attributes);
1048 7
1049 7
        $src = $attributes['src'];
1050 7
        if (substr($src, 0, 5) !== 'data:') {
1051 7
            $src = urldecode($src);
1052 7
        }
1053 7
        $width = isset($attributes['width']) ? (float) $attributes['width'] : ($styleArray['width'] ?? null);
1054
        $height = isset($attributes['height']) ? (float) $attributes['height'] : ($styleArray['height'] ?? null);
1055 7
        $name = $attributes['alt'] ?? null;
1056 6
1057
        $drawing = new Drawing();
1058
        $drawing->setPath($src);
1059 7
        if ($drawing->getPath() === '') {
1060 1
            return;
1061
        }
1062
        $drawing->setWorksheet($sheet);
1063 7
        $drawing->setCoordinates($column . $row);
1064 1
        $drawing->setOffsetX(0);
1065
        $drawing->setOffsetY(10);
1066
        $drawing->setResizeProportional(true);
1067 7
1068 7
        if ($name) {
1069 7
            $drawing->setName($name);
1070
        }
1071 7
1072 7
        if ($width) {
1073 7
            if ($height) {
1074
                $drawing->setWidthAndHeight((int) $width, (int) $height);
1075
            } else {
1076
                $drawing->setWidth((int) $width);
1077
            }
1078
        } elseif ($height) {
1079
            $drawing->setHeight((int) $height);
1080
        }
1081
1082
        $sheet->getColumnDimension($column)->setWidth(
1083
            $drawing->getWidth() / 6
1084
        );
1085
1086
        $sheet->getRowDimension($row)->setRowHeight(
1087
            $drawing->getHeight() * 0.9
1088
        );
1089
1090
        if (isset($styleArray['opacity'])) {
1091
            $opacity = $styleArray['opacity'];
1092
            if (is_numeric($opacity)) {
1093 15
                $drawing->setOpacity((int) ($opacity * 100000));
1094
            }
1095 15
        }
1096
    }
1097
1098
    private static function getStyleArray(array $attributes): array
1099
    {
1100
        $styleArray = [];
1101 3
        if (isset($attributes['style'])) {
1102
            $styles = explode(';', $attributes['style']);
1103 3
            foreach ($styles as $style) {
1104
                $value = explode(':', $style);
1105
                if (count($value) === 2) {
1106 3
                    $arrayKey = trim($value[0]);
1107
                    $arrayValue = trim($value[1]);
1108 3
                    if ($arrayKey === 'width') {
1109 1
                        if (substr($arrayValue, -2) === 'px') {
1110 1
                            $arrayValue = (string) (((float) substr($arrayValue, 0, -2)));
1111
                        } else {
1112 3
                            $arrayValue = (new CssDimension($arrayValue))->width();
1113 3
                        }
1114 3
                    } elseif ($arrayKey === 'height') {
1115 3
                        if (substr($arrayValue, -2) === 'px') {
1116 3
                            $arrayValue = substr($arrayValue, 0, -2);
1117
                        } else {
1118 1
                            $arrayValue = (new CssDimension($arrayValue))->height();
1119 1
                        }
1120
                    }
1121
                    $styleArray[$arrayKey] = $arrayValue;
1122
                }
1123 3
            }
1124 3
        }
1125 3
1126 3
        return $styleArray;
1127 3
    }
1128 3
1129 3
    private const BORDER_MAPPINGS = [
1130 3
        'dash-dot' => Border::BORDER_DASHDOT,
1131
        'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
1132
        'dashed' => Border::BORDER_DASHED,
1133
        'dotted' => Border::BORDER_DOTTED,
1134
        'double' => Border::BORDER_DOUBLE,
1135
        'hair' => Border::BORDER_HAIR,
1136 1
        'medium' => Border::BORDER_MEDIUM,
1137
        'medium-dashed' => Border::BORDER_MEDIUMDASHED,
1138 1
        'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
1139 1
        'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
1140 1
        'none' => Border::BORDER_NONE,
1141 1
        'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
1142 1
        'solid' => Border::BORDER_THIN,
1143 1
        'thick' => Border::BORDER_THICK,
1144 1
    ];
1145 1
1146 1
    public static function getBorderMappings(): array
1147 1
    {
1148
        return self::BORDER_MAPPINGS;
1149 1
    }
1150
1151 1
    /**
1152
     * Map html border style to PhpSpreadsheet border style.
1153
     */
1154
    public function getBorderStyle(string $style): ?string
1155
    {
1156
        return self::BORDER_MAPPINGS[$style] ?? null;
1157
    }
1158
1159
    private function setBorderStyle(Style $cellStyle, string $styleValue, string $type): void
1160
    {
1161
        if (trim($styleValue) === Border::BORDER_NONE) {
1162
            $borderStyle = Border::BORDER_NONE;
1163
            $color = null;
1164
        } else {
1165
            $borderArray = explode(' ', $styleValue);
1166
            $borderCount = count($borderArray);
1167
            if ($borderCount >= 3) {
1168
                $borderStyle = $borderArray[1];
1169
                $color = $borderArray[2];
1170
            } else {
1171
                $borderStyle = $borderArray[0];
1172
                $color = $borderArray[1] ?? null;
1173
            }
1174
        }
1175
1176
        $cellStyle->applyFromArray([
1177
            'borders' => [
1178
                $type => [
1179
                    'borderStyle' => $this->getBorderStyle($borderStyle),
1180
                    'color' => ['rgb' => $this->getStyleColor($color)],
1181
                ],
1182
            ],
1183
        ]);
1184
    }
1185
1186
    /**
1187
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
1188
     */
1189
    public function listWorksheetInfo(string $filename): array
1190
    {
1191
        $info = [];
1192
        $spreadsheet = new Spreadsheet();
1193
        $this->loadIntoExisting($filename, $spreadsheet);
1194
        foreach ($spreadsheet->getAllSheets() as $sheet) {
1195
            $newEntry = ['worksheetName' => $sheet->getTitle()];
1196
            $newEntry['lastColumnLetter'] = $sheet->getHighestDataColumn();
1197
            $newEntry['lastColumnIndex'] = Coordinate::columnIndexFromString($sheet->getHighestDataColumn()) - 1;
1198
            $newEntry['totalRows'] = $sheet->getHighestDataRow();
1199
            $newEntry['totalColumns'] = $newEntry['lastColumnIndex'] + 1;
1200
            $info[] = $newEntry;
1201
        }
1202
        $spreadsheet->disconnectWorksheets();
1203
1204
        return $info;
1205
    }
1206
}
1207