Complex classes like ARC2_RDFXMLParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ARC2_RDFXMLParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | class ARC2_RDFXMLParser extends ARC2_RDFParser { |
||
| 15 | |||
| 16 | function __construct($a = '', &$caller) { |
||
| 19 | |||
| 20 | function ARC2_RDFXMLParser($a = '', &$caller) { |
||
| 23 | |||
| 24 | function __init() {/* reader */ |
||
| 25 | parent::__init(); |
||
| 26 | $this->encoding = $this->v('encoding', false, $this->a); |
||
| 27 | $this->state = 0; |
||
| 28 | $this->x_lang = ''; |
||
| 29 | $this->x_base = $this->base; |
||
| 30 | $this->xml = 'http://www.w3.org/XML/1998/namespace'; |
||
| 31 | $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
||
| 32 | $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf'); |
||
| 33 | $this->s_stack = array(); |
||
| 34 | $this->s_count = 0; |
||
| 35 | $this->target_encoding = ''; |
||
| 36 | } |
||
| 37 | |||
| 38 | /* */ |
||
| 39 | |||
| 40 | function parse($path, $data = '', $iso_fallback = false) { |
||
| 41 | /* reader */ |
||
| 42 | if (!$this->v('reader')) { |
||
| 43 | ARC2::inc('Reader'); |
||
| 44 | $this->reader = & new ARC2_Reader($this->a, $this); |
||
| 45 | } |
||
| 46 | $this->reader->setAcceptHeader('Accept: application/rdf+xml; q=0.9, */*; q=0.1'); |
||
| 47 | $this->reader->activate($path, $data); |
||
| 48 | $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base; |
||
| 49 | /* xml parser */ |
||
| 50 | $this->initXMLParser(); |
||
| 51 | /* parse */ |
||
| 52 | $first = true; |
||
| 53 | while ($d = $this->reader->readStream()) { |
||
| 54 | if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 60, $this->a)); |
||
|
|
|||
| 55 | if ($iso_fallback && $first) { |
||
| 56 | $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d); |
||
| 57 | $first = false; |
||
| 58 | } |
||
| 59 | if (!xml_parse($this->xml_parser, $d, false)) { |
||
| 60 | $error_str = xml_error_string(xml_get_error_code($this->xml_parser)); |
||
| 61 | $line = xml_get_current_line_number($this->xml_parser); |
||
| 62 | $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')'; |
||
| 63 | if (!$iso_fallback && preg_match("/Invalid character/i", $error_str)) { |
||
| 64 | xml_parser_free($this->xml_parser); |
||
| 65 | unset($this->xml_parser); |
||
| 66 | $this->reader->closeStream(); |
||
| 67 | $this->__init(); |
||
| 68 | $this->encoding = 'ISO-8859-1'; |
||
| 69 | unset($this->xml_parser); |
||
| 70 | unset($this->reader); |
||
| 71 | return $this->parse($path, $data, true); |
||
| 72 | } |
||
| 73 | else { |
||
| 74 | return $this->addError($this->tmp_error); |
||
| 75 | } |
||
| 76 | } |
||
| 77 | } |
||
| 78 | $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING); |
||
| 79 | xml_parser_free($this->xml_parser); |
||
| 80 | $this->reader->closeStream(); |
||
| 81 | unset($this->reader); |
||
| 82 | return $this->done(); |
||
| 83 | } |
||
| 84 | |||
| 85 | /* */ |
||
| 86 | |||
| 87 | function initXMLParser() { |
||
| 88 | if (!isset($this->xml_parser)) { |
||
| 89 | $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8'; |
||
| 90 | $parser = xml_parser_create_ns($enc, ''); |
||
| 91 | xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0); |
||
| 92 | xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); |
||
| 93 | xml_set_element_handler($parser, 'open', 'close'); |
||
| 94 | xml_set_character_data_handler($parser, 'cdata'); |
||
| 95 | xml_set_start_namespace_decl_handler($parser, 'nsDecl'); |
||
| 96 | xml_set_object($parser, $this); |
||
| 97 | $this->xml_parser =& $parser; |
||
| 98 | } |
||
| 99 | } |
||
| 100 | |||
| 101 | /* */ |
||
| 102 | |||
| 103 | function getEncoding($src = 'config') { |
||
| 112 | |||
| 113 | /* */ |
||
| 114 | |||
| 115 | function getTriples() { |
||
| 118 | |||
| 119 | function countTriples() { |
||
| 122 | |||
| 123 | /* */ |
||
| 124 | |||
| 125 | function pushS(&$s) { |
||
| 126 | $s['pos'] = $this->s_count; |
||
| 127 | $this->s_stack[$this->s_count] = $s; |
||
| 128 | $this->s_count++; |
||
| 129 | } |
||
| 130 | |||
| 131 | function popS(){/* php 4.0.x-safe */ |
||
| 132 | $r = array(); |
||
| 133 | $this->s_count--; |
||
| 134 | for ($i = 0, $i_max = $this->s_count; $i < $i_max; $i++) { |
||
| 135 | $r[$i] = $this->s_stack[$i]; |
||
| 136 | } |
||
| 137 | $this->s_stack = $r; |
||
| 138 | } |
||
| 139 | |||
| 140 | function updateS($s) { |
||
| 143 | |||
| 144 | function getParentS() { |
||
| 147 | |||
| 148 | function getParentXBase() { |
||
| 149 | if ($p = $this->getParentS()) { |
||
| 150 | return isset($p['p_x_base']) && $p['p_x_base'] ? $p['p_x_base'] : (isset($p['x_base']) ? $p['x_base'] : ''); |
||
| 151 | } |
||
| 152 | return $this->x_base; |
||
| 153 | } |
||
| 154 | |||
| 155 | function getParentXLang() { |
||
| 156 | if ($p = $this->getParentS()) { |
||
| 157 | return isset($p['p_x_lang']) && $p['p_x_lang'] ? $p['p_x_lang'] : (isset($p['x_lang']) ? $p['x_lang'] : ''); |
||
| 158 | } |
||
| 159 | return $this->x_lang; |
||
| 160 | } |
||
| 161 | |||
| 162 | /* */ |
||
| 163 | |||
| 164 | function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') { |
||
| 165 | //echo "-----\nadding $s / $p / $o\n-----\n"; |
||
| 166 | $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang); |
||
| 167 | if ($this->skip_dupes) { |
||
| 168 | $h = md5(serialize($t)); |
||
| 169 | if (!isset($this->added_triples[$h])) { |
||
| 170 | $this->triples[$this->t_count] = $t; |
||
| 171 | $this->t_count++; |
||
| 172 | $this->added_triples[$h] = true; |
||
| 173 | } |
||
| 174 | } |
||
| 175 | else { |
||
| 176 | $this->triples[$this->t_count] = $t; |
||
| 177 | $this->t_count++; |
||
| 178 | } |
||
| 179 | } |
||
| 180 | |||
| 181 | function reify($t, $s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') { |
||
| 182 | $this->addT($t, $this->rdf.'type', $this->rdf.'Statement', 'uri', 'uri'); |
||
| 183 | $this->addT($t, $this->rdf.'subject', $s, 'uri', $s_type); |
||
| 184 | $this->addT($t, $this->rdf.'predicate', $p, 'uri', 'uri'); |
||
| 185 | $this->addT($t, $this->rdf.'object', $o, 'uri', $o_type, $o_dt, $o_lang); |
||
| 186 | } |
||
| 187 | |||
| 188 | /* */ |
||
| 189 | |||
| 190 | function open($p, $t, $a) { |
||
| 191 | //echo "state is $this->state\n"; |
||
| 192 | //echo "opening $t\n"; |
||
| 193 | switch($this->state) { |
||
| 194 | case 0: return $this->h0Open($t, $a); |
||
| 195 | case 1: return $this->h1Open($t, $a); |
||
| 196 | case 2: return $this->h2Open($t, $a); |
||
| 197 | case 4: return $this->h4Open($t, $a); |
||
| 198 | case 5: return $this->h5Open($t, $a); |
||
| 199 | case 6: return $this->h6Open($t, $a); |
||
| 200 | default: $this->addError('open() called at state ' . $this->state . ' in '.$t); |
||
| 201 | } |
||
| 202 | } |
||
| 203 | |||
| 204 | function close($p, $t) { |
||
| 205 | //echo "state is $this->state\n"; |
||
| 206 | //echo "closing $t\n"; |
||
| 207 | switch($this->state){ |
||
| 208 | case 1: return $this->h1Close($t); |
||
| 209 | case 2: return $this->h2Close($t); |
||
| 210 | case 3: return $this->h3Close($t); |
||
| 211 | case 4: return $this->h4Close($t); |
||
| 212 | case 5: return $this->h5Close($t); |
||
| 213 | case 6: return $this->h6Close($t); |
||
| 214 | default: $this->addError('close() called at state ' . $this->state . ' in '.$t); |
||
| 215 | } |
||
| 216 | } |
||
| 217 | |||
| 218 | function cdata($p, $d) { |
||
| 219 | //echo "state is $this->state\n"; |
||
| 220 | //echo "cdata\n"; |
||
| 221 | switch($this->state){ |
||
| 222 | case 4: return $this->h4Cdata($d); |
||
| 223 | case 6: return $this->h6Cdata($d); |
||
| 224 | default: return false; |
||
| 225 | } |
||
| 226 | } |
||
| 227 | |||
| 228 | function nsDecl($p, $prf, $uri) { |
||
| 231 | |||
| 232 | /* */ |
||
| 233 | |||
| 234 | function h0Open($t, $a) { |
||
| 235 | $this->x_lang = $this->v($this->xml.'lang', $this->x_lang, $a); |
||
| 236 | $this->x_base = $this->calcURI($this->v($this->xml.'base', $this->x_base, $a)); |
||
| 237 | $this->state = 1; |
||
| 238 | if ($t !== $this->rdf.'RDF') { |
||
| 239 | $this->h1Open($t, $a); |
||
| 240 | } |
||
| 241 | } |
||
| 242 | |||
| 243 | /* */ |
||
| 244 | |||
| 245 | function h1Open($t, $a) { |
||
| 246 | $s = array( |
||
| 247 | 'x_base' => isset($a[$this->xml.'base']) ? $this->calcURI($a[$this->xml.'base']) : $this->getParentXBase(), |
||
| 248 | 'x_lang' => isset($a[$this->xml.'lang']) ? $a[$this->xml.'lang'] : $this->getParentXLang(), |
||
| 249 | 'li_count' => 0, |
||
| 250 | ); |
||
| 251 | /* ID */ |
||
| 252 | if (isset($a[$this->rdf.'ID'])) { |
||
| 253 | $s['type'] = 'uri'; |
||
| 254 | $s['value'] = $this->calcURI('#'.$a[$this->rdf.'ID'], $s['x_base']); |
||
| 255 | } |
||
| 256 | /* about */ |
||
| 257 | elseif (isset($a[$this->rdf.'about'])) { |
||
| 258 | $s['type'] = 'uri'; |
||
| 259 | $s['value'] = $this->calcURI($a[$this->rdf.'about'], $s['x_base']); |
||
| 260 | } |
||
| 261 | /* bnode */ |
||
| 262 | else { |
||
| 263 | $s['type'] = 'bnode'; |
||
| 264 | if (isset($a[$this->rdf.'nodeID'])) { |
||
| 265 | $s['value'] = '_:'.$a[$this->rdf.'nodeID']; |
||
| 266 | } |
||
| 267 | else { |
||
| 268 | $s['value'] = $this->createBnodeID(); |
||
| 269 | } |
||
| 270 | } |
||
| 271 | /* sub-node */ |
||
| 272 | if ($this->state === 4) { |
||
| 273 | $sup_s = $this->getParentS(); |
||
| 274 | /* new collection */ |
||
| 275 | if (isset($sup_s['o_is_coll']) && $sup_s['o_is_coll']) { |
||
| 276 | $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']); |
||
| 277 | $this->addT($sup_s['value'], $sup_s['p'], $coll['value'], $sup_s['type'], $coll['type']); |
||
| 278 | $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']); |
||
| 279 | $this->pushS($coll); |
||
| 280 | } |
||
| 281 | /* new entry in existing coll */ |
||
| 282 | elseif (isset($sup_s['is_coll']) && $sup_s['is_coll']) { |
||
| 283 | $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']); |
||
| 284 | $this->addT($sup_s['value'], $this->rdf . 'rest', $coll['value'], $sup_s['type'], $coll['type']); |
||
| 285 | $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']); |
||
| 286 | $this->pushS($coll); |
||
| 287 | } |
||
| 288 | /* normal sub-node */ |
||
| 289 | elseif(isset($sup_s['p']) && $sup_s['p']) { |
||
| 290 | $this->addT($sup_s['value'], $sup_s['p'], $s['value'], $sup_s['type'], $s['type']); |
||
| 291 | } |
||
| 292 | } |
||
| 293 | /* typed node */ |
||
| 294 | if ($t !== $this->rdf.'Description') { |
||
| 295 | $this->addT($s['value'], $this->rdf.'type', $t, $s['type'], 'uri'); |
||
| 296 | } |
||
| 297 | /* (additional) typing attr */ |
||
| 298 | if (isset($a[$this->rdf.'type'])) { |
||
| 299 | $this->addT($s['value'], $this->rdf.'type', $a[$this->rdf.'type'], $s['type'], 'uri'); |
||
| 300 | } |
||
| 301 | /* Seq|Bag|Alt */ |
||
| 302 | if (in_array($t, array($this->rdf.'Seq', $this->rdf.'Bag', $this->rdf.'Alt'))) { |
||
| 303 | $s['is_con'] = true; |
||
| 304 | } |
||
| 305 | /* any other attrs (skip rdf and xml, except rdf:_, rdf:value, rdf:Seq) */ |
||
| 306 | foreach($a as $k => $v) { |
||
| 307 | if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value|Seq|Bag|Alt|Statement|Property|List)$/', $k)) { |
||
| 308 | if (strpos($k, ':')) { |
||
| 309 | $this->addT($s['value'], $k, $v, $s['type'], 'literal', '', $s['x_lang']); |
||
| 310 | } |
||
| 311 | } |
||
| 312 | } |
||
| 313 | $this->pushS($s); |
||
| 314 | $this->state = 2; |
||
| 315 | } |
||
| 316 | |||
| 317 | /* */ |
||
| 318 | |||
| 319 | function h2Open($t, $a) { |
||
| 433 | |||
| 434 | /* */ |
||
| 435 | |||
| 436 | function h4Open($t, $a) { |
||
| 439 | |||
| 440 | /* */ |
||
| 441 | |||
| 442 | function h5Open($t, $a) { |
||
| 446 | |||
| 447 | /* */ |
||
| 448 | |||
| 449 | function h6Open($t, $a) { |
||
| 450 | $s = $this->getParentS(); |
||
| 451 | $data = isset($s['o_xml_data']) ? $s['o_xml_data'] : ''; |
||
| 452 | $ns = isset($s['ns']) ? $s['ns'] : array(); |
||
| 453 | $parts = $this->splitURI($t); |
||
| 454 | if (count($parts) === 1) { |
||
| 455 | $data .= '<'.$t; |
||
| 456 | } |
||
| 457 | else { |
||
| 458 | $ns_uri = $parts[0]; |
||
| 459 | $name = $parts[1]; |
||
| 460 | if (!isset($this->nsp[$ns_uri])) { |
||
| 461 | foreach ($this->nsp as $tmp1 => $tmp2) { |
||
| 462 | if (strpos($t, $tmp1) === 0) { |
||
| 463 | $ns_uri = $tmp1; |
||
| 464 | $name = substr($t, strlen($tmp1)); |
||
| 465 | break; |
||
| 466 | } |
||
| 467 | } |
||
| 468 | } |
||
| 469 | $nsp = $this->nsp[$ns_uri]; |
||
| 470 | $data .= $nsp ? '<' . $nsp . ':' . $name : '<' . $name; |
||
| 471 | /* ns */ |
||
| 472 | if (!isset($ns[$nsp.'='.$ns_uri]) || !$ns[$nsp.'='.$ns_uri]) { |
||
| 473 | $data .= $nsp ? ' xmlns:'.$nsp.'="'.$ns_uri.'"' : ' xmlns="'.$ns_uri.'"'; |
||
| 474 | $ns[$nsp.'='.$ns_uri] = true; |
||
| 475 | $s['ns'] = $ns; |
||
| 476 | } |
||
| 477 | } |
||
| 478 | foreach ($a as $k => $v) { |
||
| 479 | $parts = $this->splitURI($k); |
||
| 480 | if (count($parts) === 1) { |
||
| 481 | $data .= ' '.$k.'="'.$v.'"'; |
||
| 482 | } |
||
| 483 | else { |
||
| 484 | $ns_uri = $parts[0]; |
||
| 485 | $name = $parts[1]; |
||
| 486 | $nsp = $this->nsp[$ns_uri]; |
||
| 487 | $data .= $nsp ? ' '.$nsp.':'.$name.'="'.$v.'"' : ' '.$name.'="'.$v.'"' ; |
||
| 488 | } |
||
| 489 | } |
||
| 490 | $data .= '>'; |
||
| 491 | $s['o_xml_data'] = $data; |
||
| 492 | $s['o_xml_level'] = isset($s['o_xml_level']) ? $s['o_xml_level'] + 1 : 1; |
||
| 493 | if ($t == $s['p']) {/* xml container prop */ |
||
| 494 | $s['p_xml_literal_level'] = isset($s['p_xml_literal_level']) ? $s['p_xml_literal_level'] + 1 : 1; |
||
| 495 | } |
||
| 496 | $this->updateS($s); |
||
| 497 | } |
||
| 498 | |||
| 499 | /* */ |
||
| 500 | |||
| 501 | function h1Close($t) {/* end of doc */ |
||
| 504 | |||
| 505 | /* */ |
||
| 506 | |||
| 507 | function h2Close($t) {/* expecting a prop, getting a close */ |
||
| 508 | if ($s = $this->getParentS()) { |
||
| 509 | $has_closing_tag = (isset($s['has_closing_tag']) && !$s['has_closing_tag']) ? 0 : 1; |
||
| 510 | $this->popS(); |
||
| 511 | $this->state = 5; |
||
| 512 | if ($s = $this->getParentS()) {/* new s */ |
||
| 513 | if (!isset($s['p']) || !$s['p']) {/* p close after collection|parseType=Resource|node close after p close */ |
||
| 514 | $this->state = $this->s_count ? 4 : 1; |
||
| 515 | if (!$has_closing_tag) { |
||
| 516 | $this->state = 2; |
||
| 517 | } |
||
| 518 | } |
||
| 519 | elseif (!$has_closing_tag) { |
||
| 520 | $this->state = 2; |
||
| 521 | } |
||
| 522 | } |
||
| 523 | } |
||
| 524 | } |
||
| 525 | |||
| 526 | /* */ |
||
| 527 | |||
| 528 | function h3Close($t) {/* p close */ |
||
| 531 | |||
| 532 | /* */ |
||
| 533 | |||
| 534 | function h4Close($t) {/* empty p | pClose after cdata | pClose after collection */ |
||
| 535 | if ($s = $this->getParentS()) { |
||
| 536 | $b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : (isset($s['x_base']) ? $s['x_base'] : ''); |
||
| 537 | if (isset($s['is_coll']) && $s['is_coll']) { |
||
| 538 | $this->addT($s['value'], $this->rdf . 'rest', $this->rdf . 'nil', $s['type'], 'uri'); |
||
| 539 | /* back to collection start */ |
||
| 540 | while ((!isset($s['p']) || ($s['p'] != $t))) { |
||
| 541 | $sub_s = $s; |
||
| 542 | $this->popS(); |
||
| 543 | $s = $this->getParentS(); |
||
| 544 | } |
||
| 545 | /* reification */ |
||
| 546 | if (isset($s['p_id']) && $s['p_id']) { |
||
| 547 | $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $sub_s['value'], $s['type'], $sub_s['type']); |
||
| 569 | |||
| 570 | /* */ |
||
| 571 | |||
| 572 | function h5Close($t) {/* p close */ |
||
| 579 | |||
| 580 | /* */ |
||
| 581 | |||
| 582 | function h6Close($t) { |
||
| 621 | |||
| 622 | /* */ |
||
| 623 | |||
| 624 | function h4Cdata($d) { |
||
| 630 | |||
| 631 | /* */ |
||
| 632 | |||
| 633 | function h6Cdata($d) { |
||
| 642 | |||
| 643 | /* */ |
||
| 644 | |||
| 645 | } |
||
| 646 |
If you suppress an error, we recommend checking for the error condition explicitly: