Complex classes like ARC2_RDFXMLParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ARC2_RDFXMLParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
14 | class ARC2_RDFXMLParser extends ARC2_RDFParser { |
||
15 | |||
16 | function __construct($a = '', &$caller) { |
||
19 | |||
20 | function ARC2_RDFXMLParser($a = '', &$caller) { |
||
23 | |||
24 | function __init() {/* reader */ |
||
25 | parent::__init(); |
||
26 | $this->encoding = $this->v('encoding', false, $this->a); |
||
27 | $this->state = 0; |
||
28 | $this->x_lang = ''; |
||
29 | $this->x_base = $this->base; |
||
30 | $this->xml = 'http://www.w3.org/XML/1998/namespace'; |
||
31 | $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
||
32 | $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf'); |
||
33 | $this->s_stack = array(); |
||
34 | $this->s_count = 0; |
||
35 | $this->target_encoding = ''; |
||
36 | } |
||
37 | |||
38 | /* */ |
||
39 | |||
40 | function parse($path, $data = '', $iso_fallback = false) { |
||
41 | /* reader */ |
||
42 | if (!$this->v('reader')) { |
||
43 | ARC2::inc('Reader'); |
||
44 | $this->reader = & new ARC2_Reader($this->a, $this); |
||
45 | } |
||
46 | $this->reader->setAcceptHeader('Accept: application/rdf+xml; q=0.9, */*; q=0.1'); |
||
47 | $this->reader->activate($path, $data); |
||
48 | $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base; |
||
49 | /* xml parser */ |
||
50 | $this->initXMLParser(); |
||
51 | /* parse */ |
||
52 | $first = true; |
||
53 | while ($d = $this->reader->readStream()) { |
||
54 | if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 60, $this->a)); |
||
|
|||
55 | if ($iso_fallback && $first) { |
||
56 | $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d); |
||
57 | $first = false; |
||
58 | } |
||
59 | if (!xml_parse($this->xml_parser, $d, false)) { |
||
60 | $error_str = xml_error_string(xml_get_error_code($this->xml_parser)); |
||
61 | $line = xml_get_current_line_number($this->xml_parser); |
||
62 | $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')'; |
||
63 | if (!$iso_fallback && preg_match("/Invalid character/i", $error_str)) { |
||
64 | xml_parser_free($this->xml_parser); |
||
65 | unset($this->xml_parser); |
||
66 | $this->reader->closeStream(); |
||
67 | $this->__init(); |
||
68 | $this->encoding = 'ISO-8859-1'; |
||
69 | unset($this->xml_parser); |
||
70 | unset($this->reader); |
||
71 | return $this->parse($path, $data, true); |
||
72 | } |
||
73 | else { |
||
74 | return $this->addError($this->tmp_error); |
||
75 | } |
||
76 | } |
||
77 | } |
||
78 | $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING); |
||
79 | xml_parser_free($this->xml_parser); |
||
80 | $this->reader->closeStream(); |
||
81 | unset($this->reader); |
||
82 | return $this->done(); |
||
83 | } |
||
84 | |||
85 | /* */ |
||
86 | |||
87 | function initXMLParser() { |
||
88 | if (!isset($this->xml_parser)) { |
||
89 | $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8'; |
||
90 | $parser = xml_parser_create_ns($enc, ''); |
||
91 | xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0); |
||
92 | xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); |
||
93 | xml_set_element_handler($parser, 'open', 'close'); |
||
94 | xml_set_character_data_handler($parser, 'cdata'); |
||
95 | xml_set_start_namespace_decl_handler($parser, 'nsDecl'); |
||
96 | xml_set_object($parser, $this); |
||
97 | $this->xml_parser =& $parser; |
||
98 | } |
||
99 | } |
||
100 | |||
101 | /* */ |
||
102 | |||
103 | function getEncoding($src = 'config') { |
||
112 | |||
113 | /* */ |
||
114 | |||
115 | function getTriples() { |
||
118 | |||
119 | function countTriples() { |
||
122 | |||
123 | /* */ |
||
124 | |||
125 | function pushS(&$s) { |
||
126 | $s['pos'] = $this->s_count; |
||
127 | $this->s_stack[$this->s_count] = $s; |
||
128 | $this->s_count++; |
||
129 | } |
||
130 | |||
131 | function popS(){/* php 4.0.x-safe */ |
||
132 | $r = array(); |
||
133 | $this->s_count--; |
||
134 | for ($i = 0, $i_max = $this->s_count; $i < $i_max; $i++) { |
||
135 | $r[$i] = $this->s_stack[$i]; |
||
136 | } |
||
137 | $this->s_stack = $r; |
||
138 | } |
||
139 | |||
140 | function updateS($s) { |
||
143 | |||
144 | function getParentS() { |
||
147 | |||
148 | function getParentXBase() { |
||
149 | if ($p = $this->getParentS()) { |
||
150 | return isset($p['p_x_base']) && $p['p_x_base'] ? $p['p_x_base'] : (isset($p['x_base']) ? $p['x_base'] : ''); |
||
151 | } |
||
152 | return $this->x_base; |
||
153 | } |
||
154 | |||
155 | function getParentXLang() { |
||
156 | if ($p = $this->getParentS()) { |
||
157 | return isset($p['p_x_lang']) && $p['p_x_lang'] ? $p['p_x_lang'] : (isset($p['x_lang']) ? $p['x_lang'] : ''); |
||
158 | } |
||
159 | return $this->x_lang; |
||
160 | } |
||
161 | |||
162 | /* */ |
||
163 | |||
164 | function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') { |
||
165 | //echo "-----\nadding $s / $p / $o\n-----\n"; |
||
166 | $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang); |
||
167 | if ($this->skip_dupes) { |
||
168 | $h = md5(serialize($t)); |
||
169 | if (!isset($this->added_triples[$h])) { |
||
170 | $this->triples[$this->t_count] = $t; |
||
171 | $this->t_count++; |
||
172 | $this->added_triples[$h] = true; |
||
173 | } |
||
174 | } |
||
175 | else { |
||
176 | $this->triples[$this->t_count] = $t; |
||
177 | $this->t_count++; |
||
178 | } |
||
179 | } |
||
180 | |||
181 | function reify($t, $s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') { |
||
182 | $this->addT($t, $this->rdf.'type', $this->rdf.'Statement', 'uri', 'uri'); |
||
183 | $this->addT($t, $this->rdf.'subject', $s, 'uri', $s_type); |
||
184 | $this->addT($t, $this->rdf.'predicate', $p, 'uri', 'uri'); |
||
185 | $this->addT($t, $this->rdf.'object', $o, 'uri', $o_type, $o_dt, $o_lang); |
||
186 | } |
||
187 | |||
188 | /* */ |
||
189 | |||
190 | function open($p, $t, $a) { |
||
191 | //echo "state is $this->state\n"; |
||
192 | //echo "opening $t\n"; |
||
193 | switch($this->state) { |
||
194 | case 0: return $this->h0Open($t, $a); |
||
195 | case 1: return $this->h1Open($t, $a); |
||
196 | case 2: return $this->h2Open($t, $a); |
||
197 | case 4: return $this->h4Open($t, $a); |
||
198 | case 5: return $this->h5Open($t, $a); |
||
199 | case 6: return $this->h6Open($t, $a); |
||
200 | default: $this->addError('open() called at state ' . $this->state . ' in '.$t); |
||
201 | } |
||
202 | } |
||
203 | |||
204 | function close($p, $t) { |
||
205 | //echo "state is $this->state\n"; |
||
206 | //echo "closing $t\n"; |
||
207 | switch($this->state){ |
||
208 | case 1: return $this->h1Close($t); |
||
209 | case 2: return $this->h2Close($t); |
||
210 | case 3: return $this->h3Close($t); |
||
211 | case 4: return $this->h4Close($t); |
||
212 | case 5: return $this->h5Close($t); |
||
213 | case 6: return $this->h6Close($t); |
||
214 | default: $this->addError('close() called at state ' . $this->state . ' in '.$t); |
||
215 | } |
||
216 | } |
||
217 | |||
218 | function cdata($p, $d) { |
||
219 | //echo "state is $this->state\n"; |
||
220 | //echo "cdata\n"; |
||
221 | switch($this->state){ |
||
222 | case 4: return $this->h4Cdata($d); |
||
223 | case 6: return $this->h6Cdata($d); |
||
224 | default: return false; |
||
225 | } |
||
226 | } |
||
227 | |||
228 | function nsDecl($p, $prf, $uri) { |
||
231 | |||
232 | /* */ |
||
233 | |||
234 | function h0Open($t, $a) { |
||
235 | $this->x_lang = $this->v($this->xml.'lang', $this->x_lang, $a); |
||
236 | $this->x_base = $this->calcURI($this->v($this->xml.'base', $this->x_base, $a)); |
||
237 | $this->state = 1; |
||
238 | if ($t !== $this->rdf.'RDF') { |
||
239 | $this->h1Open($t, $a); |
||
240 | } |
||
241 | } |
||
242 | |||
243 | /* */ |
||
244 | |||
245 | function h1Open($t, $a) { |
||
246 | $s = array( |
||
247 | 'x_base' => isset($a[$this->xml.'base']) ? $this->calcURI($a[$this->xml.'base']) : $this->getParentXBase(), |
||
248 | 'x_lang' => isset($a[$this->xml.'lang']) ? $a[$this->xml.'lang'] : $this->getParentXLang(), |
||
249 | 'li_count' => 0, |
||
250 | ); |
||
251 | /* ID */ |
||
252 | if (isset($a[$this->rdf.'ID'])) { |
||
253 | $s['type'] = 'uri'; |
||
254 | $s['value'] = $this->calcURI('#'.$a[$this->rdf.'ID'], $s['x_base']); |
||
255 | } |
||
256 | /* about */ |
||
257 | elseif (isset($a[$this->rdf.'about'])) { |
||
258 | $s['type'] = 'uri'; |
||
259 | $s['value'] = $this->calcURI($a[$this->rdf.'about'], $s['x_base']); |
||
260 | } |
||
261 | /* bnode */ |
||
262 | else { |
||
263 | $s['type'] = 'bnode'; |
||
264 | if (isset($a[$this->rdf.'nodeID'])) { |
||
265 | $s['value'] = '_:'.$a[$this->rdf.'nodeID']; |
||
266 | } |
||
267 | else { |
||
268 | $s['value'] = $this->createBnodeID(); |
||
269 | } |
||
270 | } |
||
271 | /* sub-node */ |
||
272 | if ($this->state === 4) { |
||
273 | $sup_s = $this->getParentS(); |
||
274 | /* new collection */ |
||
275 | if (isset($sup_s['o_is_coll']) && $sup_s['o_is_coll']) { |
||
276 | $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']); |
||
277 | $this->addT($sup_s['value'], $sup_s['p'], $coll['value'], $sup_s['type'], $coll['type']); |
||
278 | $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']); |
||
279 | $this->pushS($coll); |
||
280 | } |
||
281 | /* new entry in existing coll */ |
||
282 | elseif (isset($sup_s['is_coll']) && $sup_s['is_coll']) { |
||
283 | $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']); |
||
284 | $this->addT($sup_s['value'], $this->rdf . 'rest', $coll['value'], $sup_s['type'], $coll['type']); |
||
285 | $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']); |
||
286 | $this->pushS($coll); |
||
287 | } |
||
288 | /* normal sub-node */ |
||
289 | elseif(isset($sup_s['p']) && $sup_s['p']) { |
||
290 | $this->addT($sup_s['value'], $sup_s['p'], $s['value'], $sup_s['type'], $s['type']); |
||
291 | } |
||
292 | } |
||
293 | /* typed node */ |
||
294 | if ($t !== $this->rdf.'Description') { |
||
295 | $this->addT($s['value'], $this->rdf.'type', $t, $s['type'], 'uri'); |
||
296 | } |
||
297 | /* (additional) typing attr */ |
||
298 | if (isset($a[$this->rdf.'type'])) { |
||
299 | $this->addT($s['value'], $this->rdf.'type', $a[$this->rdf.'type'], $s['type'], 'uri'); |
||
300 | } |
||
301 | /* Seq|Bag|Alt */ |
||
302 | if (in_array($t, array($this->rdf.'Seq', $this->rdf.'Bag', $this->rdf.'Alt'))) { |
||
303 | $s['is_con'] = true; |
||
304 | } |
||
305 | /* any other attrs (skip rdf and xml, except rdf:_, rdf:value, rdf:Seq) */ |
||
306 | foreach($a as $k => $v) { |
||
307 | if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value|Seq|Bag|Alt|Statement|Property|List)$/', $k)) { |
||
308 | if (strpos($k, ':')) { |
||
309 | $this->addT($s['value'], $k, $v, $s['type'], 'literal', '', $s['x_lang']); |
||
310 | } |
||
311 | } |
||
312 | } |
||
313 | $this->pushS($s); |
||
314 | $this->state = 2; |
||
315 | } |
||
316 | |||
317 | /* */ |
||
318 | |||
319 | function h2Open($t, $a) { |
||
433 | |||
434 | /* */ |
||
435 | |||
436 | function h4Open($t, $a) { |
||
439 | |||
440 | /* */ |
||
441 | |||
442 | function h5Open($t, $a) { |
||
446 | |||
447 | /* */ |
||
448 | |||
449 | function h6Open($t, $a) { |
||
450 | $s = $this->getParentS(); |
||
451 | $data = isset($s['o_xml_data']) ? $s['o_xml_data'] : ''; |
||
452 | $ns = isset($s['ns']) ? $s['ns'] : array(); |
||
453 | $parts = $this->splitURI($t); |
||
454 | if (count($parts) === 1) { |
||
455 | $data .= '<'.$t; |
||
456 | } |
||
457 | else { |
||
458 | $ns_uri = $parts[0]; |
||
459 | $name = $parts[1]; |
||
460 | if (!isset($this->nsp[$ns_uri])) { |
||
461 | foreach ($this->nsp as $tmp1 => $tmp2) { |
||
462 | if (strpos($t, $tmp1) === 0) { |
||
463 | $ns_uri = $tmp1; |
||
464 | $name = substr($t, strlen($tmp1)); |
||
465 | break; |
||
466 | } |
||
467 | } |
||
468 | } |
||
469 | $nsp = $this->nsp[$ns_uri]; |
||
470 | $data .= $nsp ? '<' . $nsp . ':' . $name : '<' . $name; |
||
471 | /* ns */ |
||
472 | if (!isset($ns[$nsp.'='.$ns_uri]) || !$ns[$nsp.'='.$ns_uri]) { |
||
473 | $data .= $nsp ? ' xmlns:'.$nsp.'="'.$ns_uri.'"' : ' xmlns="'.$ns_uri.'"'; |
||
474 | $ns[$nsp.'='.$ns_uri] = true; |
||
475 | $s['ns'] = $ns; |
||
476 | } |
||
477 | } |
||
478 | foreach ($a as $k => $v) { |
||
479 | $parts = $this->splitURI($k); |
||
480 | if (count($parts) === 1) { |
||
481 | $data .= ' '.$k.'="'.$v.'"'; |
||
482 | } |
||
483 | else { |
||
484 | $ns_uri = $parts[0]; |
||
485 | $name = $parts[1]; |
||
486 | $nsp = $this->nsp[$ns_uri]; |
||
487 | $data .= $nsp ? ' '.$nsp.':'.$name.'="'.$v.'"' : ' '.$name.'="'.$v.'"' ; |
||
488 | } |
||
489 | } |
||
490 | $data .= '>'; |
||
491 | $s['o_xml_data'] = $data; |
||
492 | $s['o_xml_level'] = isset($s['o_xml_level']) ? $s['o_xml_level'] + 1 : 1; |
||
493 | if ($t == $s['p']) {/* xml container prop */ |
||
494 | $s['p_xml_literal_level'] = isset($s['p_xml_literal_level']) ? $s['p_xml_literal_level'] + 1 : 1; |
||
495 | } |
||
496 | $this->updateS($s); |
||
497 | } |
||
498 | |||
499 | /* */ |
||
500 | |||
501 | function h1Close($t) {/* end of doc */ |
||
504 | |||
505 | /* */ |
||
506 | |||
507 | function h2Close($t) {/* expecting a prop, getting a close */ |
||
508 | if ($s = $this->getParentS()) { |
||
509 | $has_closing_tag = (isset($s['has_closing_tag']) && !$s['has_closing_tag']) ? 0 : 1; |
||
510 | $this->popS(); |
||
511 | $this->state = 5; |
||
512 | if ($s = $this->getParentS()) {/* new s */ |
||
513 | if (!isset($s['p']) || !$s['p']) {/* p close after collection|parseType=Resource|node close after p close */ |
||
514 | $this->state = $this->s_count ? 4 : 1; |
||
515 | if (!$has_closing_tag) { |
||
516 | $this->state = 2; |
||
517 | } |
||
518 | } |
||
519 | elseif (!$has_closing_tag) { |
||
520 | $this->state = 2; |
||
521 | } |
||
522 | } |
||
523 | } |
||
524 | } |
||
525 | |||
526 | /* */ |
||
527 | |||
528 | function h3Close($t) {/* p close */ |
||
531 | |||
532 | /* */ |
||
533 | |||
534 | function h4Close($t) {/* empty p | pClose after cdata | pClose after collection */ |
||
535 | if ($s = $this->getParentS()) { |
||
536 | $b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : (isset($s['x_base']) ? $s['x_base'] : ''); |
||
537 | if (isset($s['is_coll']) && $s['is_coll']) { |
||
538 | $this->addT($s['value'], $this->rdf . 'rest', $this->rdf . 'nil', $s['type'], 'uri'); |
||
539 | /* back to collection start */ |
||
540 | while ((!isset($s['p']) || ($s['p'] != $t))) { |
||
541 | $sub_s = $s; |
||
542 | $this->popS(); |
||
543 | $s = $this->getParentS(); |
||
544 | } |
||
545 | /* reification */ |
||
546 | if (isset($s['p_id']) && $s['p_id']) { |
||
547 | $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $sub_s['value'], $s['type'], $sub_s['type']); |
||
569 | |||
570 | /* */ |
||
571 | |||
572 | function h5Close($t) {/* p close */ |
||
579 | |||
580 | /* */ |
||
581 | |||
582 | function h6Close($t) { |
||
621 | |||
622 | /* */ |
||
623 | |||
624 | function h4Cdata($d) { |
||
630 | |||
631 | /* */ |
||
632 | |||
633 | function h6Cdata($d) { |
||
642 | |||
643 | /* */ |
||
644 | |||
645 | } |
||
646 |
If you suppress an error, we recommend checking for the error condition explicitly: