Passed
Push — master ( fde2cc...f1d90a )
by Mark
17:04 queued 08:09
created

Html::applyInlineStyle()   F

Complexity

Conditions 41
Paths 606

Size

Total Lines 170
Code Lines 106

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 109
CRAP Score 41

Importance

Changes 0
Metric Value
eloc 106
dl 0
loc 170
ccs 109
cts 109
cp 1
rs 0.4377
c 0
b 0
f 0
cc 41
nc 606
nop 4
crap 41

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use DOMDocument;
6
use DOMElement;
7
use DOMNode;
8
use DOMText;
9
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
10
use PhpOffice\PhpSpreadsheet\Cell\DataType;
11
use PhpOffice\PhpSpreadsheet\Document\Properties;
12
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
13
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
14
use PhpOffice\PhpSpreadsheet\Spreadsheet;
15
use PhpOffice\PhpSpreadsheet\Style\Border;
16
use PhpOffice\PhpSpreadsheet\Style\Color;
17
use PhpOffice\PhpSpreadsheet\Style\Fill;
18
use PhpOffice\PhpSpreadsheet\Style\Font;
19
use PhpOffice\PhpSpreadsheet\Style\Style;
20
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
21
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
22
use Throwable;
23
24
class Html extends BaseReader
25
{
26
    /**
27
     * Sample size to read to determine if it's HTML or not.
28
     */
29
    const TEST_SAMPLE_SIZE = 2048;
30
31
    /**
32
     * Input encoding.
33
     *
34
     * @var string
35
     */
36
    protected $inputEncoding = 'ANSI';
37
38
    /**
39
     * Sheet index to read.
40
     *
41
     * @var int
42
     */
43
    protected $sheetIndex = 0;
44
45
    /**
46
     * Formats.
47
     *
48
     * @var array
49
     */
50
    protected $formats = [
51
        'h1' => [
52
            'font' => [
53
                'bold' => true,
54
                'size' => 24,
55
            ],
56
        ], //    Bold, 24pt
57
        'h2' => [
58
            'font' => [
59
                'bold' => true,
60
                'size' => 18,
61
            ],
62
        ], //    Bold, 18pt
63
        'h3' => [
64
            'font' => [
65
                'bold' => true,
66
                'size' => 13.5,
67
            ],
68
        ], //    Bold, 13.5pt
69
        'h4' => [
70
            'font' => [
71
                'bold' => true,
72
                'size' => 12,
73
            ],
74
        ], //    Bold, 12pt
75
        'h5' => [
76
            'font' => [
77
                'bold' => true,
78
                'size' => 10,
79
            ],
80
        ], //    Bold, 10pt
81
        'h6' => [
82
            'font' => [
83
                'bold' => true,
84
                'size' => 7.5,
85
            ],
86
        ], //    Bold, 7.5pt
87
        'a' => [
88
            'font' => [
89
                'underline' => true,
90
                'color' => [
91
                    'argb' => Color::COLOR_BLUE,
92
                ],
93
            ],
94
        ], //    Blue underlined
95
        'hr' => [
96
            'borders' => [
97
                'bottom' => [
98
                    'borderStyle' => Border::BORDER_THIN,
99
                    'color' => [
100
                        Color::COLOR_BLACK,
101
                    ],
102
                ],
103
            ],
104
        ], //    Bottom border
105
        'strong' => [
106
            'font' => [
107
                'bold' => true,
108
            ],
109
        ], //    Bold
110
        'b' => [
111
            'font' => [
112
                'bold' => true,
113
            ],
114
        ], //    Bold
115
        'i' => [
116
            'font' => [
117
                'italic' => true,
118
            ],
119
        ], //    Italic
120
        'em' => [
121
            'font' => [
122
                'italic' => true,
123
            ],
124
        ], //    Italic
125
    ];
126
127
    /** @var array */
128
    protected $rowspan = [];
129
130
    /**
131
     * Create a new HTML Reader instance.
132
     */
133 462
    public function __construct()
134
    {
135 462
        parent::__construct();
136 462
        $this->securityScanner = XmlScanner::getInstance($this);
137
    }
138
139
    /**
140
     * Validate that the current file is an HTML file.
141
     */
142 442
    public function canRead(string $filename): bool
143
    {
144
        // Check if file exists
145
        try {
146 442
            $this->openFile($filename);
147 1
        } catch (Exception $e) {
148 1
            return false;
149
        }
150
151 441
        $beginning = $this->readBeginning();
152 441
        $startWithTag = self::startsWithTag($beginning);
153 441
        $containsTags = self::containsTags($beginning);
154 441
        $endsWithTag = self::endsWithTag($this->readEnding());
155
156 441
        fclose($this->fileHandle);
157
158 441
        return $startWithTag && $containsTags && $endsWithTag;
159
    }
160
161 441
    private function readBeginning(): string
162
    {
163 441
        fseek($this->fileHandle, 0);
164
165 441
        return (string) fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
166
    }
167
168 441
    private function readEnding(): string
169
    {
170 441
        $meta = stream_get_meta_data($this->fileHandle);
171 441
        $filename = $meta['uri'];
172
173 441
        $size = (int) filesize($filename);
174 441
        if ($size === 0) {
175 1
            return '';
176
        }
177
178 440
        $blockSize = self::TEST_SAMPLE_SIZE;
179 440
        if ($size < $blockSize) {
180 24
            $blockSize = $size;
181
        }
182
183 440
        fseek($this->fileHandle, $size - $blockSize);
184
185 440
        return (string) fread($this->fileHandle, $blockSize);
186
    }
187
188 441
    private static function startsWithTag(string $data): bool
189
    {
190 441
        return '<' === substr(trim($data), 0, 1);
191
    }
192
193 441
    private static function endsWithTag(string $data): bool
194
    {
195 441
        return '>' === substr(trim($data), -1, 1);
196
    }
197
198 441
    private static function containsTags(string $data): bool
199
    {
200 441
        return strlen($data) !== strlen(strip_tags($data));
201
    }
202
203
    /**
204
     * Loads Spreadsheet from file.
205
     */
206 427
    public function loadSpreadsheetFromFile(string $filename): Spreadsheet
207
    {
208
        // Create new Spreadsheet
209 427
        $spreadsheet = new Spreadsheet();
210
211
        // Load into this instance
212 427
        return $this->loadIntoExisting($filename, $spreadsheet);
213
    }
214
215
    /**
216
     * Set input encoding.
217
     *
218
     * @param string $inputEncoding Input encoding, eg: 'ANSI'
219
     *
220
     * @return $this
221
     *
222
     * @codeCoverageIgnore
223
     *
224
     * @deprecated no use is made of this property
225
     */
226
    public function setInputEncoding($inputEncoding)
227
    {
228
        $this->inputEncoding = $inputEncoding;
229
230
        return $this;
231
    }
232
233
    /**
234
     * Get input encoding.
235
     *
236
     * @return string
237
     *
238
     * @codeCoverageIgnore
239
     *
240
     * @deprecated no use is made of this property
241
     */
242
    public function getInputEncoding()
243
    {
244
        return $this->inputEncoding;
245
    }
246
247
    //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
248
249
    /** @var array */
250
    protected $dataArray = [];
251
252
    /** @var int */
253
    protected $tableLevel = 0;
254
255
    /** @var array */
256
    protected $nestedColumn = ['A'];
257
258 441
    protected function setTableStartColumn(string $column): string
259
    {
260 441
        if ($this->tableLevel == 0) {
261 441
            $column = 'A';
262
        }
263 441
        ++$this->tableLevel;
264 441
        $this->nestedColumn[$this->tableLevel] = $column;
265
266 441
        return $this->nestedColumn[$this->tableLevel];
267
    }
268
269 437
    protected function getTableStartColumn(): string
270
    {
271 437
        return $this->nestedColumn[$this->tableLevel];
272
    }
273
274 441
    protected function releaseTableStartColumn(): string
275
    {
276 441
        --$this->tableLevel;
277
278 441
        return array_pop($this->nestedColumn);
279
    }
280
281
    /**
282
     * Flush cell.
283
     *
284
     * @param string $column
285
     * @param int|string $row
286
     * @param mixed $cellContent
287
     */
288 442
    protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent, array $attributeArray): void
289
    {
290 442
        if (is_string($cellContent)) {
291
            //    Simple String content
292 442
            if (trim($cellContent) > '') {
293
                //    Only actually write it if there's content in the string
294
                //    Write to worksheet to be done here...
295
                //    ... we return the cell, so we can mess about with styles more easily
296
297
                // Set cell value explicitly if there is data-type attribute
298 431
                if (isset($attributeArray['data-type'])) {
299 1
                    $datatype = $attributeArray['data-type'];
300 1
                    if (in_array($datatype, [DataType::TYPE_STRING, DataType::TYPE_STRING2, DataType::TYPE_INLINE])) {
301
                        //Prevent to Excel treat string with beginning equal sign or convert big numbers to scientific number
302 1
                        if (substr($cellContent, 0, 1) === '=') {
303 1
                            $sheet->getCell($column . $row)
304 1
                                ->getStyle()
305 1
                                ->setQuotePrefix(true);
306
                        }
307
                    }
308
                    //catching the Exception and ignoring the invalid data types
309
                    try {
310 1
                        $sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
311 1
                    } catch (\PhpOffice\PhpSpreadsheet\Exception $exception) {
312 1
                        $sheet->setCellValue($column . $row, $cellContent);
313
                    }
314
                } else {
315 430
                    $sheet->setCellValue($column . $row, $cellContent);
316
                }
317 442
                $this->dataArray[$row][$column] = $cellContent;
318
            }
319
        } else {
320
            //    We have a Rich Text run
321
            //    TODO
322
            $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
323
        }
324 442
        $cellContent = (string) '';
325
    }
326
327 442
    private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
328
    {
329 442
        $attributeArray = [];
330 442
        foreach ($child->attributes as $attribute) {
331 430
            $attributeArray[$attribute->name] = $attribute->value;
332
        }
333
334 442
        if ($child->nodeName === 'body') {
335 442
            $row = 1;
336 442
            $column = 'A';
337 442
            $cellContent = '';
338 442
            $this->tableLevel = 0;
339 442
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
340
        } else {
341 442
            $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
342
        }
343
    }
344
345 442
    private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
346
    {
347 442
        if ($child->nodeName === 'title') {
348 413
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
349 413
            $sheet->setTitle($cellContent, true, true);
350 413
            $cellContent = '';
351
        } else {
352 442
            $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
353
        }
354
    }
355
356
    private const SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
357
358 442
    private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
359
    {
360 442
        if (in_array((string) $child->nodeName, self::SPAN_ETC, true)) {
361 409
            if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
362 6
                $sheet->getComment($column . $row)
363 6
                    ->getText()
364 6
                    ->createTextRun($child->textContent);
365
            } else {
366 409
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
367
            }
368
369 409
            if (isset($this->formats[$child->nodeName])) {
370 409
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
371
            }
372
        } else {
373 442
            $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
374
        }
375
    }
376
377 442
    private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
378
    {
379 442
        if ($child->nodeName === 'hr') {
380 1
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
381 1
            ++$row;
382 1
            if (isset($this->formats[$child->nodeName])) {
383 1
                $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
384
            }
385 1
            ++$row;
386
        }
387
        // fall through to br
388 442
        $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
389
    }
390
391 442
    private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
392
    {
393 442
        if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
394 4
            if ($this->tableLevel > 0) {
395
                //    If we're inside a table, replace with a \n and set the cell to wrap
396 4
                $cellContent .= "\n";
397 4
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
398
            } else {
399
                //    Otherwise flush our existing content and move the row cursor on
400 1
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
401 4
                ++$row;
402
            }
403
        } else {
404 442
            $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
405
        }
406
    }
407
408 442
    private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
409
    {
410 442
        if ($child->nodeName === 'a') {
411 9
            foreach ($attributeArray as $attributeName => $attributeValue) {
412
                switch ($attributeName) {
413 9
                    case 'href':
414 3
                        $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
415 3
                        if (isset($this->formats[$child->nodeName])) {
416 3
                            $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
417
                        }
418
419 3
                        break;
420 7
                    case 'class':
421 6
                        if ($attributeValue === 'comment-indicator') {
422 6
                            break; // Ignore - it's just a red square.
423
                        }
424
                }
425
            }
426
            // no idea why this should be needed
427
            //$cellContent .= ' ';
428 9
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
429
        } else {
430 442
            $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
431
        }
432
    }
433
434
    private const H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
435
436 442
    private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
437
    {
438 442
        if (in_array((string) $child->nodeName, self::H1_ETC, true)) {
439 2
            if ($this->tableLevel > 0) {
440
                //    If we're inside a table, replace with a \n
441 1
                $cellContent .= $cellContent ? "\n" : '';
442 1
                $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
443 1
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
444
            } else {
445 2
                if ($cellContent > '') {
446 1
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
447 1
                    ++$row;
448
                }
449 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
450 2
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
451
452 2
                if (isset($this->formats[$child->nodeName])) {
453 1
                    $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
454
                }
455
456 2
                ++$row;
457 2
                $column = 'A';
458
            }
459
        } else {
460 442
            $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
461
        }
462
    }
463
464 442
    private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
465
    {
466 442
        if ($child->nodeName === 'li') {
467 2
            if ($this->tableLevel > 0) {
468
                //    If we're inside a table, replace with a \n
469 1
                $cellContent .= $cellContent ? "\n" : '';
470 1
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
471
            } else {
472 2
                if ($cellContent > '') {
473 1
                    $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
474
                }
475 2
                ++$row;
476 2
                $this->processDomElement($child, $sheet, $row, $column, $cellContent);
477 2
                $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
478 2
                $column = 'A';
479
            }
480
        } else {
481 442
            $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
482
        }
483
    }
484
485 442
    private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
486
    {
487 442
        if ($child->nodeName === 'img') {
488 8
            $this->insertImage($sheet, $column, $row, $attributeArray);
489
        } else {
490 442
            $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
491
        }
492
    }
493
494
    private string $currentColumn = 'A';
495
496 442
    private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
497
    {
498 442
        if ($child->nodeName === 'table') {
499 441
            $this->currentColumn = 'A';
500 441
            $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
501 441
            $column = $this->setTableStartColumn($column);
502 441
            if ($this->tableLevel > 1 && $row > 1) {
503 2
                --$row;
504
            }
505 441
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
506 441
            $column = $this->releaseTableStartColumn();
507 441
            if ($this->tableLevel > 1) {
508 2
                ++$column;
509
            } else {
510 441
                ++$row;
511
            }
512
        } else {
513 442
            $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
514
        }
515
    }
516
517 442
    private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
518
    {
519 442
        if ($child->nodeName === 'col') {
520 407
            $this->applyInlineStyle($sheet, -1, $this->currentColumn, $attributeArray);
521 407
            ++$this->currentColumn;
522 442
        } elseif ($child->nodeName === 'tr') {
523 437
            $column = $this->getTableStartColumn();
524 437
            $cellContent = '';
525 437
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
526
527 437
            if (isset($attributeArray['height'])) {
528 1
                $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
529
            }
530
531 437
            ++$row;
532
        } else {
533 442
            $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
534
        }
535
    }
536
537 442
    private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
538
    {
539 442
        if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
540 442
            $this->processDomElement($child, $sheet, $row, $column, $cellContent);
541
        } else {
542 437
            $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
543
        }
544
    }
545
546 437
    private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
547
    {
548 437
        if (isset($attributeArray['bgcolor'])) {
549 1
            $sheet->getStyle("$column$row")->applyFromArray(
550 1
                [
551 1
                    'fill' => [
552 1
                        'fillType' => Fill::FILL_SOLID,
553 1
                        'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
554 1
                    ],
555 1
                ]
556 1
            );
557
        }
558
    }
559
560 437
    private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
561
    {
562 437
        if (isset($attributeArray['width'])) {
563 1
            $sheet->getColumnDimension($column)->setWidth((new CssDimension($attributeArray['width']))->width());
564
        }
565
    }
566
567 437
    private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
568
    {
569 437
        if (isset($attributeArray['height'])) {
570 1
            $sheet->getRowDimension($row)->setRowHeight((new CssDimension($attributeArray['height']))->height());
571
        }
572
    }
573
574 437
    private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
575
    {
576 437
        if (isset($attributeArray['align'])) {
577 1
            $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
578
        }
579
    }
580
581 437
    private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
582
    {
583 437
        if (isset($attributeArray['valign'])) {
584 1
            $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
585
        }
586
    }
587
588 437
    private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
589
    {
590 437
        if (isset($attributeArray['data-format'])) {
591 1
            $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
592
        }
593
    }
594
595 437
    private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
596
    {
597 437
        while (isset($this->rowspan[$column . $row])) {
598 3
            ++$column;
599
        }
600 437
        $this->processDomElement($child, $sheet, $row, $column, $cellContent);
601
602
        // apply inline style
603 437
        $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
604
605 437
        $this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
606
607 437
        $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
608 437
        $this->processDomElementWidth($sheet, $column, $attributeArray);
609 437
        $this->processDomElementHeight($sheet, $row, $attributeArray);
610 437
        $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
611 437
        $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
612 437
        $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
613
614 437
        if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
615
            //create merging rowspan and colspan
616 2
            $columnTo = $column;
617 2
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
618 2
                ++$columnTo;
619
            }
620 2
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
621 2
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
622 2
                $this->rowspan[$value] = true;
623
            }
624 2
            $sheet->mergeCells($range);
625 2
            $column = $columnTo;
626 437
        } elseif (isset($attributeArray['rowspan'])) {
627
            //create merging rowspan
628 3
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
629 3
            foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
630 3
                $this->rowspan[$value] = true;
631
            }
632 3
            $sheet->mergeCells($range);
633 437
        } elseif (isset($attributeArray['colspan'])) {
634
            //create merging colspan
635 3
            $columnTo = $column;
636 3
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
637 3
                ++$columnTo;
638
            }
639 3
            $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
640 3
            $column = $columnTo;
641
        }
642
643 437
        ++$column;
644
    }
645
646 442
    protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
647
    {
648 442
        foreach ($element->childNodes as $child) {
649 442
            if ($child instanceof DOMText) {
650 439
                $domText = (string) preg_replace('/\s+/u', ' ', trim($child->nodeValue ?? ''));
651 439
                if (is_string($cellContent)) {
652
                    //    simply append the text if the cell content is a plain text string
653 439
                    $cellContent .= $domText;
654
                }
655
                //    but if we have a rich text run instead, we need to append it correctly
656
                    //    TODO
657 442
            } elseif ($child instanceof DOMElement) {
658 442
                $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
659
            }
660
        }
661
    }
662
663
    /**
664
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
665
     *
666
     * @param string $filename
667
     *
668
     * @return Spreadsheet
669
     */
670 427
    public function loadIntoExisting($filename, Spreadsheet $spreadsheet)
671
    {
672
        // Validate
673 427
        if (!$this->canRead($filename)) {
674 1
            throw new Exception($filename . ' is an Invalid HTML file.');
675
        }
676
677
        // Create a new DOM object
678 426
        $dom = new DOMDocument();
679
        // Reload the HTML file into the DOM object
680
        try {
681 426
            $convert = $this->getSecurityScannerOrThrow()->scanFile($filename);
682 426
            $lowend = "\u{80}";
683 426
            $highend = "\u{10ffff}";
684 426
            $regexp = "/[$lowend-$highend]/u";
685
            /** @var callable */
686 426
            $callback = [self::class, 'replaceNonAscii'];
687 426
            $convert = preg_replace_callback($regexp, $callback, $convert);
688 426
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
689 1
        } catch (Throwable $e) {
690 1
            $loaded = false;
691
        }
692 426
        if ($loaded === false) {
693 1
            throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null);
694
        }
695 425
        self::loadProperties($dom, $spreadsheet);
696
697 425
        return $this->loadDocument($dom, $spreadsheet);
698
    }
699
700 442
    private static function loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet): void
701
    {
702 442
        $properties = $spreadsheet->getProperties();
703 442
        foreach ($dom->getElementsByTagName('meta') as $meta) {
704 411
            $metaContent = (string) $meta->getAttribute('content');
705 411
            if ($metaContent !== '') {
706 407
                $metaName = (string) $meta->getAttribute('name');
707
                switch ($metaName) {
708 407
                    case 'author':
709 407
                        $properties->setCreator($metaContent);
710
711 407
                        break;
712 407
                    case 'category':
713 1
                        $properties->setCategory($metaContent);
714
715 1
                        break;
716 407
                    case 'company':
717 1
                        $properties->setCompany($metaContent);
718
719 1
                        break;
720 407
                    case 'created':
721 407
                        $properties->setCreated($metaContent);
722
723 407
                        break;
724 407
                    case 'description':
725 1
                        $properties->setDescription($metaContent);
726
727 1
                        break;
728 407
                    case 'keywords':
729 1
                        $properties->setKeywords($metaContent);
730
731 1
                        break;
732 407
                    case 'lastModifiedBy':
733 407
                        $properties->setLastModifiedBy($metaContent);
734
735 407
                        break;
736 407
                    case 'manager':
737 1
                        $properties->setManager($metaContent);
738
739 1
                        break;
740 407
                    case 'modified':
741 407
                        $properties->setModified($metaContent);
742
743 407
                        break;
744 407
                    case 'subject':
745 1
                        $properties->setSubject($metaContent);
746
747 1
                        break;
748 407
                    case 'title':
749 407
                        $properties->setTitle($metaContent);
750
751 407
                        break;
752
                    default:
753 407
                        if (preg_match('/^custom[.](bool|date|float|int|string)[.](.+)$/', $metaName, $matches) === 1) {
754 1
                            switch ($matches[1]) {
755 1
                                case 'bool':
756 1
                                    $properties->setCustomProperty($matches[2], (bool) $metaContent, Properties::PROPERTY_TYPE_BOOLEAN);
757
758 1
                                    break;
759 1
                                case 'float':
760 1
                                    $properties->setCustomProperty($matches[2], (float) $metaContent, Properties::PROPERTY_TYPE_FLOAT);
761
762 1
                                    break;
763 1
                                case 'int':
764 1
                                    $properties->setCustomProperty($matches[2], (int) $metaContent, Properties::PROPERTY_TYPE_INTEGER);
765
766 1
                                    break;
767 1
                                case 'date':
768 1
                                    $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_DATE);
769
770 1
                                    break;
771
                                default: // string
772 1
                                    $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_STRING);
773
                            }
774
                        }
775
                }
776
            }
777
        }
778 442
        if (!empty($dom->baseURI)) {
779 1
            $properties->setHyperlinkBase($dom->baseURI);
780
        }
781
    }
782
783 50
    private static function replaceNonAscii(array $matches): string
784
    {
785 50
        return '&#' . mb_ord($matches[0], 'UTF-8') . ';';
786
    }
787
788
    /**
789
     * Spreadsheet from content.
790
     *
791
     * @param string $content
792
     */
793 18
    public function loadFromString($content, ?Spreadsheet $spreadsheet = null): Spreadsheet
794
    {
795
        //    Create a new DOM object
796 18
        $dom = new DOMDocument();
797
        //    Reload the HTML file into the DOM object
798
        try {
799 18
            $convert = $this->getSecurityScannerOrThrow()->scan($content);
800 18
            $lowend = "\u{80}";
801 18
            $highend = "\u{10ffff}";
802 18
            $regexp = "/[$lowend-$highend]/u";
803
            /** @var callable */
804 18
            $callback = [self::class, 'replaceNonAscii'];
805 18
            $convert = preg_replace_callback($regexp, $callback, $convert);
806 18
            $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
807 1
        } catch (Throwable $e) {
808 1
            $loaded = false;
809
        }
810 18
        if ($loaded === false) {
811 1
            throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);
812
        }
813 17
        $spreadsheet = $spreadsheet ?? new Spreadsheet();
814 17
        self::loadProperties($dom, $spreadsheet);
815
816 17
        return $this->loadDocument($dom, $spreadsheet);
817
    }
818
819
    /**
820
     * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
821
     */
822 442
    private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
823
    {
824 442
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
825 2
            $spreadsheet->createSheet();
826
        }
827 442
        $spreadsheet->setActiveSheetIndex($this->sheetIndex);
828
829
        // Discard white space
830 442
        $document->preserveWhiteSpace = false;
831
832 442
        $row = 0;
833 442
        $column = 'A';
834 442
        $content = '';
835 442
        $this->rowspan = [];
836 442
        $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
837
838
        // Return
839 442
        return $spreadsheet;
840
    }
841
842
    /**
843
     * Get sheet index.
844
     *
845
     * @return int
846
     */
847 1
    public function getSheetIndex()
848
    {
849 1
        return $this->sheetIndex;
850
    }
851
852
    /**
853
     * Set sheet index.
854
     *
855
     * @param int $sheetIndex Sheet index
856
     *
857
     * @return $this
858
     */
859 2
    public function setSheetIndex($sheetIndex)
860
    {
861 2
        $this->sheetIndex = $sheetIndex;
862
863 2
        return $this;
864
    }
865
866
    /**
867
     * Apply inline css inline style.
868
     *
869
     * NOTES :
870
     * Currently only intended for td & th element,
871
     * and only takes 'background-color' and 'color'; property with HEX color
872
     *
873
     * TODO :
874
     * - Implement to other propertie, such as border
875
     *
876
     * @param int $row
877
     * @param string $column
878
     * @param array $attributeArray
879
     */
880 437
    private function applyInlineStyle(Worksheet &$sheet, $row, $column, $attributeArray): void
881
    {
882 437
        if (!isset($attributeArray['style'])) {
883 431
            return;
884
        }
885
886 16
        if ($row <= 0 || $column === '') {
887 1
            $cellStyle = new Style();
888 16
        } elseif (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
889 1
            $columnTo = $column;
890 1
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
891 1
                ++$columnTo;
892
            }
893 1
            $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
894 1
            $cellStyle = $sheet->getStyle($range);
895 16
        } elseif (isset($attributeArray['rowspan'])) {
896 1
            $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
897 1
            $cellStyle = $sheet->getStyle($range);
898 16
        } elseif (isset($attributeArray['colspan'])) {
899 1
            $columnTo = $column;
900 1
            for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
901 1
                ++$columnTo;
902
            }
903 1
            $range = $column . $row . ':' . $columnTo . $row;
904 1
            $cellStyle = $sheet->getStyle($range);
905
        } else {
906 16
            $cellStyle = $sheet->getStyle($column . $row);
907
        }
908
909
        // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
910 16
        $styles = explode(';', $attributeArray['style']);
911 16
        foreach ($styles as $st) {
912 16
            $value = explode(':', $st);
913 16
            $styleName = isset($value[0]) ? trim($value[0]) : null;
914 16
            $styleValue = isset($value[1]) ? trim($value[1]) : null;
915 16
            $styleValueString = (string) $styleValue;
916
917 16
            if (!$styleName) {
918 12
                continue;
919
            }
920
921
            switch ($styleName) {
922 16
                case 'background':
923 16
                case 'background-color':
924 3
                    $styleColor = $this->getStyleColor($styleValueString);
925
926 3
                    if (!$styleColor) {
927 1
                        continue 2;
928
                    }
929
930 3
                    $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
931
932 3
                    break;
933 16
                case 'color':
934 3
                    $styleColor = $this->getStyleColor($styleValueString);
935
936 3
                    if (!$styleColor) {
937 1
                        continue 2;
938
                    }
939
940 3
                    $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
941
942 3
                    break;
943
944 13
                case 'border':
945 3
                    $this->setBorderStyle($cellStyle, $styleValueString, 'allBorders');
946
947 3
                    break;
948
949 11
                case 'border-top':
950 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'top');
951
952 1
                    break;
953
954 11
                case 'border-bottom':
955 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'bottom');
956
957 1
                    break;
958
959 11
                case 'border-left':
960 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'left');
961
962 1
                    break;
963
964 11
                case 'border-right':
965 1
                    $this->setBorderStyle($cellStyle, $styleValueString, 'right');
966
967 1
                    break;
968
969 10
                case 'font-size':
970 1
                    $cellStyle->getFont()->setSize(
971 1
                        (float) $styleValue
972 1
                    );
973
974 1
                    break;
975
976 10
                case 'font-weight':
977 1
                    if ($styleValue === 'bold' || $styleValue >= 500) {
978 1
                        $cellStyle->getFont()->setBold(true);
979
                    }
980
981 1
                    break;
982
983 10
                case 'font-style':
984 1
                    if ($styleValue === 'italic') {
985 1
                        $cellStyle->getFont()->setItalic(true);
986
                    }
987
988 1
                    break;
989
990 10
                case 'font-family':
991 1
                    $cellStyle->getFont()->setName(str_replace('\'', '', $styleValueString));
992
993 1
                    break;
994
995 10
                case 'text-decoration':
996
                    switch ($styleValue) {
997 1
                        case 'underline':
998 1
                            $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
999
1000 1
                            break;
1001 1
                        case 'line-through':
1002 1
                            $cellStyle->getFont()->setStrikethrough(true);
1003
1004 1
                            break;
1005
                    }
1006
1007 1
                    break;
1008
1009 9
                case 'text-align':
1010 1
                    $cellStyle->getAlignment()->setHorizontal($styleValueString);
1011
1012 1
                    break;
1013
1014 9
                case 'vertical-align':
1015 2
                    $cellStyle->getAlignment()->setVertical($styleValueString);
1016
1017 2
                    break;
1018
1019 9
                case 'width':
1020 2
                    if ($column !== '') {
1021 2
                        $sheet->getColumnDimension($column)->setWidth(
1022 2
                            (new CssDimension($styleValue ?? ''))->width()
1023 2
                        );
1024
                    }
1025
1026 2
                    break;
1027
1028 7
                case 'height':
1029 1
                    if ($row > 0) {
1030 1
                        $sheet->getRowDimension($row)->setRowHeight(
1031 1
                            (new CssDimension($styleValue ?? ''))->height()
1032 1
                        );
1033
                    }
1034
1035 1
                    break;
1036
1037 6
                case 'word-wrap':
1038 1
                    $cellStyle->getAlignment()->setWrapText(
1039 1
                        $styleValue === 'break-word'
1040 1
                    );
1041
1042 1
                    break;
1043
1044 6
                case 'text-indent':
1045 2
                    $cellStyle->getAlignment()->setIndent(
1046 2
                        (int) str_replace(['px'], '', $styleValueString)
1047 2
                    );
1048
1049 2
                    break;
1050
            }
1051
        }
1052
    }
1053
1054
    /**
1055
     * Check if has #, so we can get clean hex.
1056
     *
1057
     * @param mixed $value
1058
     *
1059
     * @return null|string
1060
     */
1061 7
    public function getStyleColor($value)
1062
    {
1063 7
        $value = (string) $value;
1064 7
        if (strpos($value, '#') === 0) {
1065 5
            return substr($value, 1);
1066
        }
1067
1068 4
        return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup($value);
1069
    }
1070
1071
    /**
1072
     * @param string    $column
1073
     * @param int       $row
1074
     */
1075 8
    private function insertImage(Worksheet $sheet, $column, $row, array $attributes): void
1076
    {
1077 8
        if (!isset($attributes['src'])) {
1078 1
            return;
1079
        }
1080
1081 7
        $src = urldecode($attributes['src']);
1082 7
        $width = isset($attributes['width']) ? (float) $attributes['width'] : null;
1083 7
        $height = isset($attributes['height']) ? (float) $attributes['height'] : null;
1084 7
        $name = $attributes['alt'] ?? null;
1085
1086 7
        $drawing = new Drawing();
1087 7
        $drawing->setPath($src);
1088 7
        $drawing->setWorksheet($sheet);
1089 7
        $drawing->setCoordinates($column . $row);
1090 7
        $drawing->setOffsetX(0);
1091 7
        $drawing->setOffsetY(10);
1092 7
        $drawing->setResizeProportional(true);
1093
1094 7
        if ($name) {
1095 6
            $drawing->setName($name);
1096
        }
1097
1098 7
        if ($width) {
1099 1
            $drawing->setWidth((int) $width);
1100
        }
1101
1102 7
        if ($height) {
1103 1
            $drawing->setHeight((int) $height);
1104
        }
1105
1106 7
        $sheet->getColumnDimension($column)->setWidth(
1107 7
            $drawing->getWidth() / 6
1108 7
        );
1109
1110 7
        $sheet->getRowDimension($row)->setRowHeight(
1111 7
            $drawing->getHeight() * 0.9
1112 7
        );
1113
    }
1114
1115
    private const BORDER_MAPPINGS = [
1116
        'dash-dot' => Border::BORDER_DASHDOT,
1117
        'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
1118
        'dashed' => Border::BORDER_DASHED,
1119
        'dotted' => Border::BORDER_DOTTED,
1120
        'double' => Border::BORDER_DOUBLE,
1121
        'hair' => Border::BORDER_HAIR,
1122
        'medium' => Border::BORDER_MEDIUM,
1123
        'medium-dashed' => Border::BORDER_MEDIUMDASHED,
1124
        'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
1125
        'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
1126
        'none' => Border::BORDER_NONE,
1127
        'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
1128
        'solid' => Border::BORDER_THIN,
1129
        'thick' => Border::BORDER_THICK,
1130
    ];
1131
1132 15
    public static function getBorderMappings(): array
1133
    {
1134 15
        return self::BORDER_MAPPINGS;
1135
    }
1136
1137
    /**
1138
     * Map html border style to PhpSpreadsheet border style.
1139
     *
1140
     * @param  string $style
1141
     *
1142
     * @return null|string
1143
     */
1144 3
    public function getBorderStyle($style)
1145
    {
1146 3
        return self::BORDER_MAPPINGS[$style] ?? null;
1147
    }
1148
1149
    /**
1150
     * @param string $styleValue
1151
     * @param string $type
1152
     */
1153 3
    private function setBorderStyle(Style $cellStyle, $styleValue, $type): void
1154
    {
1155 3
        if (trim($styleValue) === Border::BORDER_NONE) {
1156 1
            $borderStyle = Border::BORDER_NONE;
1157 1
            $color = null;
1158
        } else {
1159 3
            $borderArray = explode(' ', $styleValue);
1160 3
            $borderCount = count($borderArray);
1161 3
            if ($borderCount >= 3) {
1162 3
                $borderStyle = $borderArray[1];
1163 3
                $color = $borderArray[2];
1164
            } else {
1165 1
                $borderStyle = $borderArray[0];
1166 1
                $color = $borderArray[1] ?? null;
1167
            }
1168
        }
1169
1170 3
        $cellStyle->applyFromArray([
1171 3
            'borders' => [
1172 3
                $type => [
1173 3
                    'borderStyle' => $this->getBorderStyle($borderStyle),
1174 3
                    'color' => ['rgb' => $this->getStyleColor($color)],
1175 3
                ],
1176 3
            ],
1177 3
        ]);
1178
    }
1179
}
1180