Total Complexity | 101 |
Total Lines | 627 |
Duplicated Lines | 0 % |
Coverage | 69.39% |
Changes | 0 |
Complex classes like Html often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Html, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
18 | class Html extends BaseReader |
||
19 | { |
||
20 | /** |
||
21 | * @var XmlScanner |
||
22 | */ |
||
23 | private $securityScanner; |
||
24 | |||
25 | /** |
||
26 | * Sample size to read to determine if it's HTML or not. |
||
27 | */ |
||
28 | const TEST_SAMPLE_SIZE = 2048; |
||
29 | |||
30 | /** |
||
31 | * Input encoding. |
||
32 | * |
||
33 | * @var string |
||
34 | */ |
||
35 | protected $inputEncoding = 'ANSI'; |
||
36 | |||
37 | /** |
||
38 | * Sheet index to read. |
||
39 | * |
||
40 | * @var int |
||
41 | */ |
||
42 | protected $sheetIndex = 0; |
||
43 | |||
44 | /** |
||
45 | * Formats. |
||
46 | * |
||
47 | * @var array |
||
48 | */ |
||
49 | protected $formats = [ |
||
50 | 'h1' => [ |
||
51 | 'font' => [ |
||
52 | 'bold' => true, |
||
53 | 'size' => 24, |
||
54 | ], |
||
55 | ], // Bold, 24pt |
||
56 | 'h2' => [ |
||
57 | 'font' => [ |
||
58 | 'bold' => true, |
||
59 | 'size' => 18, |
||
60 | ], |
||
61 | ], // Bold, 18pt |
||
62 | 'h3' => [ |
||
63 | 'font' => [ |
||
64 | 'bold' => true, |
||
65 | 'size' => 13.5, |
||
66 | ], |
||
67 | ], // Bold, 13.5pt |
||
68 | 'h4' => [ |
||
69 | 'font' => [ |
||
70 | 'bold' => true, |
||
71 | 'size' => 12, |
||
72 | ], |
||
73 | ], // Bold, 12pt |
||
74 | 'h5' => [ |
||
75 | 'font' => [ |
||
76 | 'bold' => true, |
||
77 | 'size' => 10, |
||
78 | ], |
||
79 | ], // Bold, 10pt |
||
80 | 'h6' => [ |
||
81 | 'font' => [ |
||
82 | 'bold' => true, |
||
83 | 'size' => 7.5, |
||
84 | ], |
||
85 | ], // Bold, 7.5pt |
||
86 | 'a' => [ |
||
87 | 'font' => [ |
||
88 | 'underline' => true, |
||
89 | 'color' => [ |
||
90 | 'argb' => Color::COLOR_BLUE, |
||
91 | ], |
||
92 | ], |
||
93 | ], // Blue underlined |
||
94 | 'hr' => [ |
||
95 | 'borders' => [ |
||
96 | 'bottom' => [ |
||
97 | 'borderStyle' => Border::BORDER_THIN, |
||
98 | 'color' => [ |
||
99 | Color::COLOR_BLACK, |
||
100 | ], |
||
101 | ], |
||
102 | ], |
||
103 | ], // Bottom border |
||
104 | ]; |
||
105 | |||
106 | protected $rowspan = []; |
||
107 | |||
108 | /** |
||
109 | * Create a new HTML Reader instance. |
||
110 | */ |
||
111 | 18 | public function __construct() |
|
112 | { |
||
113 | 18 | $this->readFilter = new DefaultReadFilter(); |
|
114 | 18 | $this->securityScanner = new XmlScanner('<!ENTITY'); |
|
115 | 18 | } |
|
116 | |||
117 | /** |
||
118 | * Validate that the current file is an HTML file. |
||
119 | * |
||
120 | * @param string $pFilename |
||
121 | * |
||
122 | * @return bool |
||
123 | */ |
||
124 | 16 | public function canRead($pFilename) |
|
125 | { |
||
126 | // Check if file exists |
||
127 | try { |
||
128 | 16 | $this->openFile($pFilename); |
|
129 | } catch (Exception $e) { |
||
130 | return false; |
||
131 | } |
||
132 | |||
133 | 16 | $beginning = $this->readBeginning(); |
|
134 | 16 | $startWithTag = self::startsWithTag($beginning); |
|
135 | 16 | $containsTags = self::containsTags($beginning); |
|
136 | 16 | $endsWithTag = self::endsWithTag($this->readEnding()); |
|
137 | |||
138 | 16 | fclose($this->fileHandle); |
|
1 ignored issue
–
show
|
|||
139 | |||
140 | 16 | return $startWithTag && $containsTags && $endsWithTag; |
|
141 | } |
||
142 | |||
143 | 16 | private function readBeginning() |
|
148 | } |
||
149 | |||
150 | 16 | private function readEnding() |
|
151 | { |
||
152 | 16 | $meta = stream_get_meta_data($this->fileHandle); |
|
153 | 16 | $filename = $meta['uri']; |
|
154 | |||
155 | 16 | $size = filesize($filename); |
|
156 | 16 | if ($size === 0) { |
|
157 | 1 | return ''; |
|
158 | } |
||
159 | |||
160 | 15 | $blockSize = self::TEST_SAMPLE_SIZE; |
|
161 | 15 | if ($size < $blockSize) { |
|
162 | 2 | $blockSize = $size; |
|
163 | } |
||
164 | |||
165 | 15 | fseek($this->fileHandle, $size - $blockSize); |
|
166 | |||
167 | 15 | return fread($this->fileHandle, $blockSize); |
|
168 | } |
||
169 | |||
170 | 16 | private static function startsWithTag($data) |
|
171 | { |
||
172 | 16 | return '<' === substr(trim($data), 0, 1); |
|
173 | } |
||
174 | |||
175 | 16 | private static function endsWithTag($data) |
|
176 | { |
||
177 | 16 | return '>' === substr(trim($data), -1, 1); |
|
178 | } |
||
179 | |||
180 | 16 | private static function containsTags($data) |
|
181 | { |
||
182 | 16 | return strlen($data) !== strlen(strip_tags($data)); |
|
183 | } |
||
184 | |||
185 | /** |
||
186 | * Loads Spreadsheet from file. |
||
187 | * |
||
188 | * @param string $pFilename |
||
189 | * |
||
190 | * @throws Exception |
||
191 | * |
||
192 | * @return Spreadsheet |
||
193 | */ |
||
194 | 9 | public function load($pFilename) |
|
195 | { |
||
196 | // Create new Spreadsheet |
||
197 | 9 | $spreadsheet = new Spreadsheet(); |
|
198 | |||
199 | // Load into this instance |
||
200 | 9 | return $this->loadIntoExisting($pFilename, $spreadsheet); |
|
201 | } |
||
202 | |||
203 | /** |
||
204 | * Set input encoding. |
||
205 | * |
||
206 | * @param string $pValue Input encoding, eg: 'ANSI' |
||
207 | * |
||
208 | * @return Html |
||
209 | */ |
||
210 | public function setInputEncoding($pValue) |
||
211 | { |
||
212 | $this->inputEncoding = $pValue; |
||
213 | |||
214 | return $this; |
||
215 | } |
||
216 | |||
217 | /** |
||
218 | * Get input encoding. |
||
219 | * |
||
220 | * @return string |
||
221 | */ |
||
222 | public function getInputEncoding() |
||
223 | { |
||
224 | return $this->inputEncoding; |
||
225 | } |
||
226 | |||
227 | // Data Array used for testing only, should write to Spreadsheet object on completion of tests |
||
228 | protected $dataArray = []; |
||
229 | |||
230 | protected $tableLevel = 0; |
||
231 | |||
232 | protected $nestedColumn = ['A']; |
||
233 | |||
234 | 9 | protected function setTableStartColumn($column) |
|
235 | { |
||
236 | 9 | if ($this->tableLevel == 0) { |
|
237 | 9 | $column = 'A'; |
|
238 | } |
||
239 | 9 | ++$this->tableLevel; |
|
240 | 9 | $this->nestedColumn[$this->tableLevel] = $column; |
|
241 | |||
242 | 9 | return $this->nestedColumn[$this->tableLevel]; |
|
243 | } |
||
244 | |||
245 | 9 | protected function getTableStartColumn() |
|
248 | } |
||
249 | |||
250 | 9 | protected function releaseTableStartColumn() |
|
255 | } |
||
256 | |||
257 | 9 | protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent) |
|
258 | { |
||
259 | 9 | if (is_string($cellContent)) { |
|
260 | // Simple String content |
||
261 | 9 | if (trim($cellContent) > '') { |
|
262 | // Only actually write it if there's content in the string |
||
263 | // Write to worksheet to be done here... |
||
264 | // ... we return the cell so we can mess about with styles more easily |
||
265 | 9 | $sheet->setCellValue($column . $row, $cellContent); |
|
266 | 9 | $this->dataArray[$row][$column] = $cellContent; |
|
267 | } |
||
268 | } else { |
||
269 | // We have a Rich Text run |
||
270 | // TODO |
||
271 | $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent; |
||
272 | } |
||
273 | 9 | $cellContent = (string) ''; |
|
274 | 9 | } |
|
275 | |||
276 | /** |
||
277 | * @param DOMNode $element |
||
278 | * @param Worksheet $sheet |
||
279 | * @param int $row |
||
280 | * @param string $column |
||
281 | * @param string $cellContent |
||
282 | */ |
||
283 | 9 | protected function processDomElement(DOMNode $element, Worksheet $sheet, &$row, &$column, &$cellContent) |
|
522 | } |
||
523 | } |
||
524 | } |
||
525 | 9 | } |
|
526 | |||
527 | /** |
||
528 | * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
||
529 | * |
||
530 | * @param string $pFilename |
||
531 | * @param Spreadsheet $spreadsheet |
||
532 | * |
||
533 | * @throws Exception |
||
534 | * |
||
535 | * @return Spreadsheet |
||
536 | */ |
||
537 | 9 | public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
|
538 | { |
||
539 | // Validate |
||
540 | 9 | if (!$this->canRead($pFilename)) { |
|
541 | throw new Exception($pFilename . ' is an Invalid HTML file.'); |
||
542 | } |
||
543 | |||
544 | // Create new sheet |
||
545 | 9 | while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
|
546 | $spreadsheet->createSheet(); |
||
547 | } |
||
548 | 9 | $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
|
549 | |||
550 | // Create a new DOM object |
||
551 | 9 | $dom = new DOMDocument(); |
|
552 | // Reload the HTML file into the DOM object |
||
553 | 9 | $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); |
|
554 | 9 | if ($loaded === false) { |
|
555 | throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); |
||
556 | } |
||
557 | |||
558 | // Discard white space |
||
559 | 9 | $dom->preserveWhiteSpace = false; |
|
560 | |||
561 | 9 | $row = 0; |
|
562 | 9 | $column = 'A'; |
|
563 | 9 | $content = ''; |
|
564 | 9 | $this->rowspan = []; |
|
565 | 9 | $this->processDomElement($dom, $spreadsheet->getActiveSheet(), $row, $column, $content); |
|
566 | |||
567 | // Return |
||
568 | 9 | return $spreadsheet; |
|
569 | } |
||
570 | |||
571 | /** |
||
572 | * Get sheet index. |
||
573 | * |
||
574 | * @return int |
||
575 | */ |
||
576 | public function getSheetIndex() |
||
579 | } |
||
580 | |||
581 | /** |
||
582 | * Set sheet index. |
||
583 | * |
||
584 | * @param int $pValue Sheet index |
||
585 | * |
||
586 | * @return HTML |
||
587 | */ |
||
588 | public function setSheetIndex($pValue) |
||
593 | } |
||
594 | |||
595 | /** |
||
596 | * Apply inline css inline style. |
||
597 | * |
||
598 | * NOTES : |
||
599 | * Currently only intended for td & th element, |
||
600 | * and only takes 'background-color' and 'color'; property with HEX color |
||
601 | * |
||
602 | * TODO : |
||
603 | * - Implement to other propertie, such as border |
||
604 | * |
||
605 | * @param Worksheet $sheet |
||
606 | * @param int $row |
||
607 | * @param string $column |
||
608 | * @param array $attributeArray |
||
609 | */ |
||
610 | 9 | private function applyInlineStyle(&$sheet, $row, $column, $attributeArray) |
|
645 | } |
||
646 | } |
||
647 | 1 | } |
|
648 | } |
||
649 |