Total Complexity | 103 |
Total Lines | 638 |
Duplicated Lines | 0 % |
Coverage | 69.37% |
Changes | 0 |
Complex classes like Html often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Html, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | class Html extends BaseReader |
||
18 | { |
||
19 | /** |
||
20 | * Sample size to read to determine if it's HTML or not. |
||
21 | */ |
||
22 | const TEST_SAMPLE_SIZE = 2048; |
||
23 | |||
24 | /** |
||
25 | * Input encoding. |
||
26 | * |
||
27 | * @var string |
||
28 | */ |
||
29 | protected $inputEncoding = 'ANSI'; |
||
30 | |||
31 | /** |
||
32 | * Sheet index to read. |
||
33 | * |
||
34 | * @var int |
||
35 | */ |
||
36 | protected $sheetIndex = 0; |
||
37 | |||
38 | /** |
||
39 | * Formats. |
||
40 | * |
||
41 | * @var array |
||
42 | */ |
||
43 | protected $formats = [ |
||
44 | 'h1' => [ |
||
45 | 'font' => [ |
||
46 | 'bold' => true, |
||
47 | 'size' => 24, |
||
48 | ], |
||
49 | ], // Bold, 24pt |
||
50 | 'h2' => [ |
||
51 | 'font' => [ |
||
52 | 'bold' => true, |
||
53 | 'size' => 18, |
||
54 | ], |
||
55 | ], // Bold, 18pt |
||
56 | 'h3' => [ |
||
57 | 'font' => [ |
||
58 | 'bold' => true, |
||
59 | 'size' => 13.5, |
||
60 | ], |
||
61 | ], // Bold, 13.5pt |
||
62 | 'h4' => [ |
||
63 | 'font' => [ |
||
64 | 'bold' => true, |
||
65 | 'size' => 12, |
||
66 | ], |
||
67 | ], // Bold, 12pt |
||
68 | 'h5' => [ |
||
69 | 'font' => [ |
||
70 | 'bold' => true, |
||
71 | 'size' => 10, |
||
72 | ], |
||
73 | ], // Bold, 10pt |
||
74 | 'h6' => [ |
||
75 | 'font' => [ |
||
76 | 'bold' => true, |
||
77 | 'size' => 7.5, |
||
78 | ], |
||
79 | ], // Bold, 7.5pt |
||
80 | 'a' => [ |
||
81 | 'font' => [ |
||
82 | 'underline' => true, |
||
83 | 'color' => [ |
||
84 | 'argb' => Color::COLOR_BLUE, |
||
85 | ], |
||
86 | ], |
||
87 | ], // Blue underlined |
||
88 | 'hr' => [ |
||
89 | 'borders' => [ |
||
90 | 'bottom' => [ |
||
91 | 'borderStyle' => Border::BORDER_THIN, |
||
92 | 'color' => [ |
||
93 | Color::COLOR_BLACK, |
||
94 | ], |
||
95 | ], |
||
96 | ], |
||
97 | ], // Bottom border |
||
98 | ]; |
||
99 | |||
100 | protected $rowspan = []; |
||
101 | |||
102 | /** |
||
103 | * Create a new HTML Reader instance. |
||
104 | */ |
||
105 | 18 | public function __construct() |
|
108 | 18 | } |
|
109 | |||
110 | /** |
||
111 | * Validate that the current file is an HTML file. |
||
112 | * |
||
113 | * @param string $pFilename |
||
114 | * |
||
115 | * @return bool |
||
116 | */ |
||
117 | 16 | public function canRead($pFilename) |
|
118 | { |
||
119 | // Check if file exists |
||
120 | try { |
||
121 | 16 | $this->openFile($pFilename); |
|
122 | } catch (Exception $e) { |
||
123 | return false; |
||
124 | } |
||
125 | |||
126 | 16 | $beginning = $this->readBeginning(); |
|
127 | 16 | $startWithTag = self::startsWithTag($beginning); |
|
128 | 16 | $containsTags = self::containsTags($beginning); |
|
129 | 16 | $endsWithTag = self::endsWithTag($this->readEnding()); |
|
130 | |||
131 | 16 | fclose($this->fileHandle); |
|
1 ignored issue
–
show
|
|||
132 | |||
133 | 16 | return $startWithTag && $containsTags && $endsWithTag; |
|
134 | } |
||
135 | |||
136 | 16 | private function readBeginning() |
|
137 | { |
||
138 | 16 | fseek($this->fileHandle, 0); |
|
139 | |||
140 | 16 | return fread($this->fileHandle, self::TEST_SAMPLE_SIZE); |
|
141 | } |
||
142 | |||
143 | 16 | private function readEnding() |
|
144 | { |
||
145 | 16 | $meta = stream_get_meta_data($this->fileHandle); |
|
146 | 16 | $filename = $meta['uri']; |
|
147 | |||
148 | 16 | $size = filesize($filename); |
|
149 | 16 | if ($size === 0) { |
|
150 | 1 | return ''; |
|
151 | } |
||
152 | |||
153 | 15 | $blockSize = self::TEST_SAMPLE_SIZE; |
|
154 | 15 | if ($size < $blockSize) { |
|
155 | 2 | $blockSize = $size; |
|
156 | } |
||
157 | |||
158 | 15 | fseek($this->fileHandle, $size - $blockSize); |
|
159 | |||
160 | 15 | return fread($this->fileHandle, $blockSize); |
|
161 | } |
||
162 | |||
163 | 16 | private static function startsWithTag($data) |
|
164 | { |
||
165 | 16 | return '<' === substr(trim($data), 0, 1); |
|
166 | } |
||
167 | |||
168 | 16 | private static function endsWithTag($data) |
|
169 | { |
||
170 | 16 | return '>' === substr(trim($data), -1, 1); |
|
171 | } |
||
172 | |||
173 | 16 | private static function containsTags($data) |
|
176 | } |
||
177 | |||
178 | /** |
||
179 | * Loads Spreadsheet from file. |
||
180 | * |
||
181 | * @param string $pFilename |
||
182 | * |
||
183 | * @throws Exception |
||
184 | * |
||
185 | * @return Spreadsheet |
||
186 | */ |
||
187 | 9 | public function load($pFilename) |
|
188 | { |
||
189 | // Create new Spreadsheet |
||
190 | 9 | $spreadsheet = new Spreadsheet(); |
|
191 | |||
192 | // Load into this instance |
||
193 | 9 | return $this->loadIntoExisting($pFilename, $spreadsheet); |
|
194 | } |
||
195 | |||
196 | /** |
||
197 | * Set input encoding. |
||
198 | * |
||
199 | * @param string $pValue Input encoding, eg: 'ANSI' |
||
200 | * |
||
201 | * @return Html |
||
202 | */ |
||
203 | public function setInputEncoding($pValue) |
||
204 | { |
||
205 | $this->inputEncoding = $pValue; |
||
206 | |||
207 | return $this; |
||
208 | } |
||
209 | |||
210 | /** |
||
211 | * Get input encoding. |
||
212 | * |
||
213 | * @return string |
||
214 | */ |
||
215 | public function getInputEncoding() |
||
216 | { |
||
217 | return $this->inputEncoding; |
||
218 | } |
||
219 | |||
220 | // Data Array used for testing only, should write to Spreadsheet object on completion of tests |
||
221 | protected $dataArray = []; |
||
222 | |||
223 | protected $tableLevel = 0; |
||
224 | |||
225 | protected $nestedColumn = ['A']; |
||
226 | |||
227 | 9 | protected function setTableStartColumn($column) |
|
228 | { |
||
229 | 9 | if ($this->tableLevel == 0) { |
|
230 | 9 | $column = 'A'; |
|
231 | } |
||
232 | 9 | ++$this->tableLevel; |
|
233 | 9 | $this->nestedColumn[$this->tableLevel] = $column; |
|
234 | |||
235 | 9 | return $this->nestedColumn[$this->tableLevel]; |
|
236 | } |
||
237 | |||
238 | 9 | protected function getTableStartColumn() |
|
241 | } |
||
242 | |||
243 | 9 | protected function releaseTableStartColumn() |
|
244 | { |
||
245 | 9 | --$this->tableLevel; |
|
246 | |||
247 | 9 | return array_pop($this->nestedColumn); |
|
248 | } |
||
249 | |||
250 | 9 | protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent) |
|
251 | { |
||
252 | 9 | if (is_string($cellContent)) { |
|
253 | // Simple String content |
||
254 | 9 | if (trim($cellContent) > '') { |
|
255 | // Only actually write it if there's content in the string |
||
256 | // Write to worksheet to be done here... |
||
257 | // ... we return the cell so we can mess about with styles more easily |
||
258 | 9 | $sheet->setCellValue($column . $row, $cellContent); |
|
259 | 9 | $this->dataArray[$row][$column] = $cellContent; |
|
260 | } |
||
261 | } else { |
||
262 | // We have a Rich Text run |
||
263 | // TODO |
||
264 | $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent; |
||
265 | } |
||
266 | 9 | $cellContent = (string) ''; |
|
267 | 9 | } |
|
268 | |||
269 | /** |
||
270 | * @param DOMNode $element |
||
271 | * @param Worksheet $sheet |
||
272 | * @param int $row |
||
273 | * @param string $column |
||
274 | * @param string $cellContent |
||
275 | */ |
||
276 | 9 | protected function processDomElement(DOMNode $element, Worksheet $sheet, &$row, &$column, &$cellContent) |
|
515 | } |
||
516 | } |
||
517 | } |
||
518 | 9 | } |
|
519 | |||
520 | /** |
||
521 | * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
||
522 | * |
||
523 | * @param string $pFilename |
||
524 | * @param Spreadsheet $spreadsheet |
||
525 | * |
||
526 | * @throws Exception |
||
527 | * |
||
528 | * @return Spreadsheet |
||
529 | */ |
||
530 | 9 | public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
|
531 | { |
||
532 | // Validate |
||
533 | 9 | if (!$this->canRead($pFilename)) { |
|
534 | throw new Exception($pFilename . ' is an Invalid HTML file.'); |
||
535 | } |
||
536 | |||
537 | // Create new sheet |
||
538 | 9 | while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
|
539 | $spreadsheet->createSheet(); |
||
540 | } |
||
541 | 9 | $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
|
542 | |||
543 | // Create a new DOM object |
||
544 | 9 | $dom = new DOMDocument(); |
|
545 | // Reload the HTML file into the DOM object |
||
546 | 9 | $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); |
|
547 | 9 | if ($loaded === false) { |
|
548 | throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); |
||
549 | } |
||
550 | |||
551 | // Discard white space |
||
552 | 9 | $dom->preserveWhiteSpace = false; |
|
553 | |||
554 | 9 | $row = 0; |
|
555 | 9 | $column = 'A'; |
|
556 | 9 | $content = ''; |
|
557 | 9 | $this->rowspan = []; |
|
558 | 9 | $this->processDomElement($dom, $spreadsheet->getActiveSheet(), $row, $column, $content); |
|
559 | |||
560 | // Return |
||
561 | 9 | return $spreadsheet; |
|
562 | } |
||
563 | |||
564 | /** |
||
565 | * Get sheet index. |
||
566 | * |
||
567 | * @return int |
||
568 | */ |
||
569 | public function getSheetIndex() |
||
572 | } |
||
573 | |||
574 | /** |
||
575 | * Set sheet index. |
||
576 | * |
||
577 | * @param int $pValue Sheet index |
||
578 | * |
||
579 | * @return HTML |
||
580 | */ |
||
581 | public function setSheetIndex($pValue) |
||
586 | } |
||
587 | |||
588 | /** |
||
589 | * Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks. |
||
590 | * |
||
591 | * @param string $xml |
||
592 | * |
||
593 | * @return string |
||
594 | */ |
||
595 | 9 | public function securityScan($xml) |
|
603 | } |
||
604 | |||
605 | /** |
||
606 | * Apply inline css inline style. |
||
607 | * |
||
608 | * NOTES : |
||
609 | * Currently only intended for td & th element, |
||
610 | * and only takes 'background-color' and 'color'; property with HEX color |
||
611 | * |
||
612 | * TODO : |
||
613 | * - Implement to other propertie, such as border |
||
614 | * |
||
615 | * @param Worksheet $sheet |
||
616 | * @param int $row |
||
617 | * @param string $column |
||
618 | * @param array $attributeArray |
||
619 | */ |
||
620 | 9 | private function applyInlineStyle(&$sheet, $row, $column, $attributeArray) |
|
659 |