Total Complexity | 44 |
Total Lines | 273 |
Duplicated Lines | 0 % |
Changes | 19 | ||
Bugs | 4 | Features | 2 |
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
47 | class Parser |
||
48 | { |
||
49 | /** |
||
50 | * @var PDFObject[] |
||
51 | */ |
||
52 | protected $objects = []; |
||
53 | |||
54 | protected $rawDataParser; |
||
55 | |||
56 | public function __construct($cfg = []) |
||
57 | { |
||
58 | $this->rawDataParser = new RawDataParser($cfg); |
||
59 | } |
||
60 | |||
61 | /** |
||
62 | * @param string $filename |
||
63 | * |
||
64 | * @return Document |
||
65 | * |
||
66 | * @throws \Exception |
||
67 | */ |
||
68 | public function parseFile($filename) |
||
82 | } |
||
83 | |||
84 | /** |
||
85 | * @param string $content PDF content to parse |
||
86 | * |
||
87 | * @return Document |
||
88 | * |
||
89 | * @throws \Exception if secured PDF file was detected |
||
90 | * @throws \Exception if no object list was found |
||
91 | */ |
||
92 | public function parseContent($content) |
||
118 | } |
||
119 | |||
120 | protected function parseTrailer($structure, $document) |
||
140 | } |
||
141 | |||
142 | /** |
||
143 | * @param string $id |
||
144 | * @param array $structure |
||
145 | * @param Document $document |
||
146 | */ |
||
147 | protected function parseObject($id, $structure, $document) |
||
148 | { |
||
149 | $header = new Header([], $document); |
||
150 | $content = ''; |
||
151 | |||
152 | foreach ($structure as $position => $part) { |
||
153 | switch ($part[0]) { |
||
154 | case '[': |
||
155 | $elements = []; |
||
156 | |||
157 | foreach ($part[1] as $sub_element) { |
||
158 | $sub_type = $sub_element[0]; |
||
159 | $sub_value = $sub_element[1]; |
||
160 | $elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document); |
||
161 | } |
||
162 | |||
163 | $header = new Header($elements, $document); |
||
164 | break; |
||
165 | |||
166 | case '<<': |
||
167 | $header = $this->parseHeader($part[1], $document); |
||
168 | break; |
||
169 | |||
170 | case 'stream': |
||
171 | $content = isset($part[3][0]) ? $part[3][0] : $part[1]; |
||
172 | |||
173 | if ($header->get('Type')->equals('ObjStm')) { |
||
174 | $match = []; |
||
175 | |||
176 | // Split xrefs and contents. |
||
177 | preg_match('/^((\d+\s+\d+\s*)*)(.*)$/s', $content, $match); |
||
178 | $content = $match[3]; |
||
179 | |||
180 | // Extract xrefs. |
||
181 | $xrefs = preg_split( |
||
182 | '/(\d+\s+\d+\s*)/s', |
||
183 | $match[1], |
||
184 | -1, |
||
185 | PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE |
||
186 | ); |
||
187 | $table = []; |
||
188 | |||
189 | foreach ($xrefs as $xref) { |
||
190 | list($id, $position) = explode(' ', trim($xref)); |
||
191 | $table[$position] = $id; |
||
192 | } |
||
193 | |||
194 | ksort($table); |
||
195 | |||
196 | $ids = array_values($table); |
||
197 | $positions = array_keys($table); |
||
198 | |||
199 | foreach ($positions as $index => $position) { |
||
200 | $id = $ids[$index].'_0'; |
||
201 | $next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : \strlen($content); |
||
202 | $sub_content = substr($content, $position, (int) $next_position - (int) $position); |
||
203 | |||
204 | $sub_header = Header::parse($sub_content, $document); |
||
205 | $object = PDFObject::factory($document, $sub_header, ''); |
||
206 | $this->objects[$id] = $object; |
||
207 | } |
||
208 | |||
209 | // It is not necessary to store this content. |
||
210 | $content = ''; |
||
211 | |||
212 | return; |
||
213 | } |
||
214 | break; |
||
215 | |||
216 | default: |
||
217 | if ('null' != $part) { |
||
218 | $element = $this->parseHeaderElement($part[0], $part[1], $document); |
||
219 | |||
220 | if ($element) { |
||
221 | $header = new Header([$element], $document); |
||
222 | } |
||
223 | } |
||
224 | break; |
||
225 | } |
||
226 | } |
||
227 | |||
228 | if (!isset($this->objects[$id])) { |
||
229 | $this->objects[$id] = PDFObject::factory($document, $header, $content); |
||
230 | } |
||
231 | } |
||
232 | |||
233 | /** |
||
234 | * @param array $structure |
||
235 | * @param Document $document |
||
236 | * |
||
237 | * @return Header |
||
238 | * |
||
239 | * @throws \Exception |
||
240 | */ |
||
241 | protected function parseHeader($structure, $document) |
||
255 | } |
||
256 | |||
257 | /** |
||
258 | * @param string $type |
||
259 | * @param string|array $value |
||
260 | * @param Document $document |
||
261 | * |
||
262 | * @return Element|Header|null |
||
263 | * |
||
264 | * @throws \Exception |
||
265 | */ |
||
266 | protected function parseHeaderElement($type, $value, $document) |
||
323 |