Total Complexity | 161 |
Total Lines | 945 |
Duplicated Lines | 0 % |
Coverage | 73.71% |
Changes | 2 | ||
Bugs | 0 | Features | 0 |
Complex classes like Html often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Html, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
21 | class Html extends BaseReader |
||
22 | { |
||
23 | /** |
||
24 | * Sample size to read to determine if it's HTML or not. |
||
25 | */ |
||
26 | const TEST_SAMPLE_SIZE = 2048; |
||
27 | |||
28 | /** |
||
29 | * Input encoding. |
||
30 | * |
||
31 | * @var string |
||
32 | */ |
||
33 | protected $inputEncoding = 'ANSI'; |
||
34 | |||
35 | /** |
||
36 | * Sheet index to read. |
||
37 | * |
||
38 | * @var int |
||
39 | */ |
||
40 | protected $sheetIndex = 0; |
||
41 | |||
42 | /** |
||
43 | * Formats. |
||
44 | * |
||
45 | * @var array |
||
46 | */ |
||
47 | protected $formats = [ |
||
48 | 'h1' => [ |
||
49 | 'font' => [ |
||
50 | 'bold' => true, |
||
51 | 'size' => 24, |
||
52 | ], |
||
53 | ], // Bold, 24pt |
||
54 | 'h2' => [ |
||
55 | 'font' => [ |
||
56 | 'bold' => true, |
||
57 | 'size' => 18, |
||
58 | ], |
||
59 | ], // Bold, 18pt |
||
60 | 'h3' => [ |
||
61 | 'font' => [ |
||
62 | 'bold' => true, |
||
63 | 'size' => 13.5, |
||
64 | ], |
||
65 | ], // Bold, 13.5pt |
||
66 | 'h4' => [ |
||
67 | 'font' => [ |
||
68 | 'bold' => true, |
||
69 | 'size' => 12, |
||
70 | ], |
||
71 | ], // Bold, 12pt |
||
72 | 'h5' => [ |
||
73 | 'font' => [ |
||
74 | 'bold' => true, |
||
75 | 'size' => 10, |
||
76 | ], |
||
77 | ], // Bold, 10pt |
||
78 | 'h6' => [ |
||
79 | 'font' => [ |
||
80 | 'bold' => true, |
||
81 | 'size' => 7.5, |
||
82 | ], |
||
83 | ], // Bold, 7.5pt |
||
84 | 'a' => [ |
||
85 | 'font' => [ |
||
86 | 'underline' => true, |
||
87 | 'color' => [ |
||
88 | 'argb' => Color::COLOR_BLUE, |
||
89 | ], |
||
90 | ], |
||
91 | ], // Blue underlined |
||
92 | 'hr' => [ |
||
93 | 'borders' => [ |
||
94 | 'bottom' => [ |
||
95 | 'borderStyle' => Border::BORDER_THIN, |
||
96 | 'color' => [ |
||
97 | Color::COLOR_BLACK, |
||
98 | ], |
||
99 | ], |
||
100 | ], |
||
101 | ], // Bottom border |
||
102 | 'strong' => [ |
||
103 | 'font' => [ |
||
104 | 'bold' => true, |
||
105 | ], |
||
106 | ], // Bold |
||
107 | 'b' => [ |
||
108 | 'font' => [ |
||
109 | 'bold' => true, |
||
110 | ], |
||
111 | ], // Bold |
||
112 | 'i' => [ |
||
113 | 'font' => [ |
||
114 | 'italic' => true, |
||
115 | ], |
||
116 | ], // Italic |
||
117 | 'em' => [ |
||
118 | 'font' => [ |
||
119 | 'italic' => true, |
||
120 | ], |
||
121 | ], // Italic |
||
122 | ]; |
||
123 | |||
124 | protected $rowspan = []; |
||
125 | |||
126 | /** |
||
127 | * Create a new HTML Reader instance. |
||
128 | */ |
||
129 | 29 | public function __construct() |
|
133 | 29 | } |
|
134 | |||
135 | /** |
||
136 | * Validate that the current file is an HTML file. |
||
137 | * |
||
138 | * @param string $pFilename |
||
139 | * |
||
140 | * @return bool |
||
141 | */ |
||
142 | 26 | public function canRead($pFilename) |
|
143 | { |
||
144 | // Check if file exists |
||
145 | try { |
||
146 | 26 | $this->openFile($pFilename); |
|
147 | } catch (Exception $e) { |
||
148 | return false; |
||
149 | } |
||
150 | |||
151 | 26 | $beginning = $this->readBeginning(); |
|
152 | 26 | $startWithTag = self::startsWithTag($beginning); |
|
153 | 26 | $containsTags = self::containsTags($beginning); |
|
154 | 26 | $endsWithTag = self::endsWithTag($this->readEnding()); |
|
155 | |||
156 | 26 | fclose($this->fileHandle); |
|
1 ignored issue
–
show
|
|||
157 | |||
158 | 26 | return $startWithTag && $containsTags && $endsWithTag; |
|
159 | } |
||
160 | |||
161 | 26 | private function readBeginning() |
|
162 | { |
||
163 | 26 | fseek($this->fileHandle, 0); |
|
164 | |||
165 | 26 | return fread($this->fileHandle, self::TEST_SAMPLE_SIZE); |
|
166 | } |
||
167 | |||
168 | 26 | private function readEnding() |
|
169 | { |
||
170 | 26 | $meta = stream_get_meta_data($this->fileHandle); |
|
171 | 26 | $filename = $meta['uri']; |
|
172 | |||
173 | 26 | $size = filesize($filename); |
|
174 | 26 | if ($size === 0) { |
|
175 | 1 | return ''; |
|
176 | } |
||
177 | |||
178 | 25 | $blockSize = self::TEST_SAMPLE_SIZE; |
|
179 | 25 | if ($size < $blockSize) { |
|
180 | 12 | $blockSize = $size; |
|
181 | } |
||
182 | |||
183 | 25 | fseek($this->fileHandle, $size - $blockSize); |
|
184 | |||
185 | 25 | return fread($this->fileHandle, $blockSize); |
|
186 | } |
||
187 | |||
188 | 26 | private static function startsWithTag($data) |
|
189 | { |
||
190 | 26 | return '<' === substr(trim($data), 0, 1); |
|
191 | } |
||
192 | |||
193 | 26 | private static function endsWithTag($data) |
|
194 | { |
||
195 | 26 | return '>' === substr(trim($data), -1, 1); |
|
196 | } |
||
197 | |||
198 | 26 | private static function containsTags($data) |
|
199 | { |
||
200 | 26 | return strlen($data) !== strlen(strip_tags($data)); |
|
201 | } |
||
202 | |||
203 | /** |
||
204 | * Loads Spreadsheet from file. |
||
205 | * |
||
206 | * @param string $pFilename |
||
207 | * |
||
208 | * @throws Exception |
||
209 | * |
||
210 | * @return Spreadsheet |
||
211 | */ |
||
212 | 19 | public function load($pFilename) |
|
213 | { |
||
214 | // Create new Spreadsheet |
||
215 | 19 | $spreadsheet = new Spreadsheet(); |
|
216 | |||
217 | // Load into this instance |
||
218 | 19 | return $this->loadIntoExisting($pFilename, $spreadsheet); |
|
219 | } |
||
220 | |||
221 | /** |
||
222 | * Set input encoding. |
||
223 | * |
||
224 | * @param string $pValue Input encoding, eg: 'ANSI' |
||
225 | * |
||
226 | * @return Html |
||
227 | */ |
||
228 | 1 | public function setInputEncoding($pValue) |
|
233 | } |
||
234 | |||
235 | /** |
||
236 | * Get input encoding. |
||
237 | * |
||
238 | * @return string |
||
239 | */ |
||
240 | public function getInputEncoding() |
||
241 | { |
||
242 | return $this->inputEncoding; |
||
243 | } |
||
244 | |||
245 | // Data Array used for testing only, should write to Spreadsheet object on completion of tests |
||
246 | protected $dataArray = []; |
||
247 | |||
248 | protected $tableLevel = 0; |
||
249 | |||
250 | protected $nestedColumn = ['A']; |
||
251 | |||
252 | 20 | protected function setTableStartColumn($column) |
|
253 | { |
||
254 | 20 | if ($this->tableLevel == 0) { |
|
255 | 20 | $column = 'A'; |
|
256 | } |
||
257 | 20 | ++$this->tableLevel; |
|
258 | 20 | $this->nestedColumn[$this->tableLevel] = $column; |
|
259 | |||
260 | 20 | return $this->nestedColumn[$this->tableLevel]; |
|
261 | } |
||
262 | |||
263 | 20 | protected function getTableStartColumn() |
|
266 | } |
||
267 | |||
268 | 20 | protected function releaseTableStartColumn() |
|
269 | { |
||
270 | 20 | --$this->tableLevel; |
|
271 | |||
272 | 20 | return array_pop($this->nestedColumn); |
|
273 | } |
||
274 | |||
275 | 20 | protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent) |
|
292 | 20 | } |
|
293 | |||
294 | /** |
||
295 | * @param DOMNode $element |
||
296 | * @param Worksheet $sheet |
||
297 | * @param int $row |
||
298 | * @param string $column |
||
299 | * @param string $cellContent |
||
300 | */ |
||
301 | 20 | protected function processDomElement(DOMNode $element, Worksheet $sheet, &$row, &$column, &$cellContent) |
|
573 | } |
||
574 | } |
||
575 | } |
||
576 | 20 | } |
|
577 | |||
578 | /** |
||
579 | * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
||
580 | * |
||
581 | * @param string $pFilename |
||
582 | * @param Spreadsheet $spreadsheet |
||
583 | * |
||
584 | * @throws Exception |
||
585 | * |
||
586 | * @return Spreadsheet |
||
587 | */ |
||
588 | 19 | public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
|
589 | { |
||
590 | // Validate |
||
591 | 19 | if (!$this->canRead($pFilename)) { |
|
592 | throw new Exception($pFilename . ' is an Invalid HTML file.'); |
||
593 | } |
||
594 | |||
595 | // Create a new DOM object |
||
596 | 19 | $dom = new DOMDocument(); |
|
597 | // Reload the HTML file into the DOM object |
||
598 | 19 | $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); |
|
599 | 19 | if ($loaded === false) { |
|
600 | throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); |
||
601 | } |
||
602 | |||
603 | 19 | return $this->loadDocument($dom, $spreadsheet); |
|
604 | } |
||
605 | |||
606 | /** |
||
607 | * Spreadsheet from content. |
||
608 | * |
||
609 | * @param string $content |
||
610 | * |
||
611 | * @throws Exception |
||
612 | * |
||
613 | * @return Spreadsheet |
||
614 | */ |
||
615 | 1 | public function loadFromString($content): Spreadsheet |
|
616 | { |
||
617 | // Create a new DOM object |
||
618 | 1 | $dom = new DOMDocument(); |
|
619 | // Reload the HTML file into the DOM object |
||
620 | 1 | $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8')); |
|
621 | 1 | if ($loaded === false) { |
|
622 | throw new Exception('Failed to load content as a DOM Document'); |
||
623 | } |
||
624 | |||
625 | 1 | return $this->loadDocument($dom, new Spreadsheet()); |
|
626 | } |
||
627 | |||
628 | /** |
||
629 | * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance. |
||
630 | * |
||
631 | * @param DOMDocument $document |
||
632 | * @param Spreadsheet $spreadsheet |
||
633 | * |
||
634 | * @throws \PhpOffice\PhpSpreadsheet\Exception |
||
635 | * |
||
636 | * @return Spreadsheet |
||
637 | */ |
||
638 | 20 | private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet |
|
639 | { |
||
640 | 20 | while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
|
641 | $spreadsheet->createSheet(); |
||
642 | } |
||
643 | 20 | $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
|
644 | |||
645 | // Discard white space |
||
646 | 20 | $document->preserveWhiteSpace = false; |
|
647 | |||
648 | 20 | $row = 0; |
|
649 | 20 | $column = 'A'; |
|
650 | 20 | $content = ''; |
|
651 | 20 | $this->rowspan = []; |
|
652 | 20 | $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content); |
|
653 | |||
654 | // Return |
||
655 | 20 | return $spreadsheet; |
|
656 | } |
||
657 | |||
658 | /** |
||
659 | * Get sheet index. |
||
660 | * |
||
661 | * @return int |
||
662 | */ |
||
663 | public function getSheetIndex() |
||
664 | { |
||
665 | return $this->sheetIndex; |
||
666 | } |
||
667 | |||
668 | /** |
||
669 | * Set sheet index. |
||
670 | * |
||
671 | * @param int $pValue Sheet index |
||
672 | * |
||
673 | * @return HTML |
||
674 | */ |
||
675 | public function setSheetIndex($pValue) |
||
676 | { |
||
677 | $this->sheetIndex = $pValue; |
||
678 | |||
679 | return $this; |
||
680 | } |
||
681 | |||
682 | /** |
||
683 | * Apply inline css inline style. |
||
684 | * |
||
685 | * NOTES : |
||
686 | * Currently only intended for td & th element, |
||
687 | * and only takes 'background-color' and 'color'; property with HEX color |
||
688 | * |
||
689 | * TODO : |
||
690 | * - Implement to other propertie, such as border |
||
691 | * |
||
692 | * @param Worksheet $sheet |
||
693 | * @param int $row |
||
694 | * @param string $column |
||
695 | * @param array $attributeArray |
||
696 | */ |
||
697 | 20 | private function applyInlineStyle(&$sheet, $row, $column, $attributeArray) |
|
698 | { |
||
699 | 20 | if (!isset($attributeArray['style'])) { |
|
700 | 17 | return; |
|
701 | } |
||
702 | |||
703 | 7 | $cellStyle = $sheet->getStyle($column . $row); |
|
704 | |||
705 | // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color |
||
706 | 7 | $styles = explode(';', $attributeArray['style']); |
|
707 | 7 | foreach ($styles as $st) { |
|
708 | 7 | $value = explode(':', $st); |
|
709 | 7 | $styleName = isset($value[0]) ? trim($value[0]) : null; |
|
710 | 7 | $styleValue = isset($value[1]) ? trim($value[1]) : null; |
|
711 | |||
712 | 7 | if (!$styleName) { |
|
713 | 5 | continue; |
|
714 | } |
||
715 | |||
716 | 7 | switch ($styleName) { |
|
717 | 7 | case 'background': |
|
718 | 7 | case 'background-color': |
|
719 | 2 | $styleColor = $this->getStyleColor($styleValue); |
|
720 | |||
721 | 2 | if (!$styleColor) { |
|
722 | continue 2; |
||
723 | } |
||
724 | |||
725 | 2 | $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]); |
|
726 | |||
727 | 2 | break; |
|
728 | 7 | case 'color': |
|
729 | 2 | $styleColor = $this->getStyleColor($styleValue); |
|
730 | |||
731 | 2 | if (!$styleColor) { |
|
732 | continue 2; |
||
733 | } |
||
734 | |||
735 | 2 | $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]); |
|
736 | |||
737 | 2 | break; |
|
738 | |||
739 | 5 | case 'border': |
|
740 | 1 | $this->setBorderStyle($cellStyle, $styleValue, 'allBorders'); |
|
741 | |||
742 | 1 | break; |
|
743 | |||
744 | 5 | case 'border-top': |
|
745 | 1 | $this->setBorderStyle($cellStyle, $styleValue, 'top'); |
|
746 | |||
747 | 1 | break; |
|
748 | |||
749 | 5 | case 'border-bottom': |
|
750 | 1 | $this->setBorderStyle($cellStyle, $styleValue, 'bottom'); |
|
751 | |||
752 | 1 | break; |
|
753 | |||
754 | 5 | case 'border-left': |
|
755 | 1 | $this->setBorderStyle($cellStyle, $styleValue, 'left'); |
|
756 | |||
757 | 1 | break; |
|
758 | |||
759 | 5 | case 'border-right': |
|
760 | 1 | $this->setBorderStyle($cellStyle, $styleValue, 'right'); |
|
761 | |||
762 | 1 | break; |
|
763 | |||
764 | 4 | case 'font-size': |
|
765 | 1 | $cellStyle->getFont()->setSize( |
|
766 | 1 | (float) $styleValue |
|
767 | ); |
||
768 | |||
769 | 1 | break; |
|
770 | |||
771 | 4 | case 'font-weight': |
|
772 | 1 | if ($styleValue === 'bold' || $styleValue >= 500) { |
|
773 | 1 | $cellStyle->getFont()->setBold(true); |
|
774 | } |
||
775 | |||
776 | 1 | break; |
|
777 | |||
778 | 4 | case 'font-style': |
|
779 | 1 | if ($styleValue === 'italic') { |
|
780 | 1 | $cellStyle->getFont()->setItalic(true); |
|
781 | } |
||
782 | |||
783 | 1 | break; |
|
784 | |||
785 | 4 | case 'font-family': |
|
786 | 1 | $cellStyle->getFont()->setName(str_replace('\'', '', $styleValue)); |
|
787 | |||
788 | 1 | break; |
|
789 | |||
790 | 4 | case 'text-decoration': |
|
791 | 1 | switch ($styleValue) { |
|
792 | 1 | case 'underline': |
|
793 | 1 | $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE); |
|
794 | |||
795 | 1 | break; |
|
796 | 1 | case 'line-through': |
|
797 | 1 | $cellStyle->getFont()->setStrikethrough(true); |
|
798 | |||
799 | 1 | break; |
|
800 | } |
||
801 | |||
802 | 1 | break; |
|
803 | |||
804 | 3 | case 'text-align': |
|
805 | 1 | $cellStyle->getAlignment()->setHorizontal($styleValue); |
|
806 | |||
807 | 1 | break; |
|
808 | |||
809 | 3 | case 'vertical-align': |
|
810 | 1 | $cellStyle->getAlignment()->setVertical($styleValue); |
|
811 | |||
812 | 1 | break; |
|
813 | |||
814 | 3 | case 'width': |
|
815 | 1 | $sheet->getColumnDimension($column)->setWidth( |
|
816 | 1 | str_replace('px', '', $styleValue) |
|
817 | ); |
||
818 | |||
819 | 1 | break; |
|
820 | |||
821 | 2 | case 'height': |
|
822 | 1 | $sheet->getRowDimension($row)->setRowHeight( |
|
823 | 1 | str_replace('px', '', $styleValue) |
|
824 | ); |
||
825 | |||
826 | 1 | break; |
|
827 | |||
828 | 1 | case 'word-wrap': |
|
829 | 1 | $cellStyle->getAlignment()->setWrapText( |
|
830 | 1 | $styleValue === 'break-word' |
|
831 | ); |
||
832 | |||
833 | 1 | break; |
|
834 | |||
835 | 1 | case 'text-indent': |
|
836 | 1 | $cellStyle->getAlignment()->setIndent( |
|
837 | 1 | (int) str_replace(['px'], '', $styleValue) |
|
838 | ); |
||
839 | |||
840 | 1 | break; |
|
841 | } |
||
842 | } |
||
843 | 7 | } |
|
844 | |||
845 | /** |
||
846 | * Check if has #, so we can get clean hex. |
||
847 | * |
||
848 | * @param $value |
||
849 | * |
||
850 | * @return null|string |
||
851 | */ |
||
852 | 3 | public function getStyleColor($value) |
|
853 | { |
||
854 | 3 | if (strpos($value, '#') === 0) { |
|
855 | 3 | return substr($value, 1); |
|
856 | } |
||
857 | |||
858 | return null; |
||
859 | } |
||
860 | |||
861 | /** |
||
862 | * @param Worksheet $sheet |
||
863 | * @param string $column |
||
864 | * @param int $row |
||
865 | * @param array $attributes |
||
866 | * |
||
867 | * @throws \PhpOffice\PhpSpreadsheet\Exception |
||
868 | */ |
||
869 | 1 | private function insertImage(Worksheet $sheet, $column, $row, array $attributes) |
|
906 | ); |
||
907 | 1 | } |
|
908 | |||
909 | /** |
||
910 | * Map html border style to PhpSpreadsheet border style. |
||
911 | * |
||
912 | * @param string $style |
||
913 | * |
||
914 | * @return null|string |
||
915 | */ |
||
916 | 1 | public function getBorderStyle($style) |
|
917 | { |
||
918 | 1 | switch ($style) { |
|
919 | 1 | case 'solid': |
|
920 | 1 | return Border::BORDER_THIN; |
|
921 | case 'dashed': |
||
922 | return Border::BORDER_DASHED; |
||
923 | case 'dotted': |
||
924 | return Border::BORDER_DOTTED; |
||
925 | case 'medium': |
||
926 | return Border::BORDER_MEDIUM; |
||
927 | case 'thick': |
||
928 | return Border::BORDER_THICK; |
||
929 | case 'none': |
||
930 | return Border::BORDER_NONE; |
||
931 | case 'dash-dot': |
||
932 | return Border::BORDER_DASHDOT; |
||
933 | case 'dash-dot-dot': |
||
934 | return Border::BORDER_DASHDOTDOT; |
||
935 | case 'double': |
||
936 | return Border::BORDER_DOUBLE; |
||
937 | case 'hair': |
||
938 | return Border::BORDER_HAIR; |
||
939 | case 'medium-dash-dot': |
||
940 | return Border::BORDER_MEDIUMDASHDOT; |
||
941 | case 'medium-dash-dot-dot': |
||
942 | return Border::BORDER_MEDIUMDASHDOTDOT; |
||
943 | case 'medium-dashed': |
||
944 | return Border::BORDER_MEDIUMDASHED; |
||
945 | case 'slant-dash-dot': |
||
946 | return Border::BORDER_SLANTDASHDOT; |
||
947 | } |
||
948 | |||
949 | return null; |
||
950 | } |
||
951 | |||
952 | /** |
||
953 | * @param Style $cellStyle |
||
954 | * @param string $styleValue |
||
955 | * @param string $type |
||
956 | */ |
||
957 | 1 | private function setBorderStyle(Style $cellStyle, $styleValue, $type) |
|
966 | ], |
||
967 | ], |
||
968 | ]); |
||
969 | 1 | } |
|
970 | } |
||
971 |