Complex classes like ARC2_RdfaExtractor often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ARC2_RdfaExtractor, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 13 | class ARC2_RdfaExtractor extends ARC2_RDFExtractor { |
||
| 14 | |||
| 15 | function __construct($a = '', &$caller) { |
||
| 18 | |||
| 19 | function ARC2_RdfaExtractor($a = '', &$caller) { |
||
| 22 | |||
| 23 | function __init() { |
||
| 26 | |||
| 27 | /* */ |
||
| 28 | |||
| 29 | function extractRDF() { |
||
| 30 | //echo '<pre>' . htmlspecialchars(print_r($this->nodes, 1)) . '</pre>'; |
||
| 31 | if (!isset($this->caller->detected_formats['rdfa'])) return 0; |
||
| 32 | $root_node = $this->getRootNode(); |
||
| 33 | //$base = $this->v('xml:base', $this->getDocBase(), $root_node['a']); |
||
| 34 | $base = $this->getDocBase(); |
||
| 35 | $context = array( |
||
| 36 | 'base' => $base, |
||
| 37 | 'p_s' => $base, |
||
| 38 | 'p_o' => '', |
||
| 39 | 'ns' => array(), |
||
| 40 | 'inco_ts' => array(), |
||
| 41 | 'lang' => '', |
||
| 42 | ); |
||
| 43 | $this->processNode($root_node, $context, 0); |
||
| 44 | } |
||
| 45 | |||
| 46 | /* */ |
||
| 47 | |||
| 48 | function getRootNode() { |
||
| 49 | foreach ($this->nodes as $id => $node) { |
||
| 50 | if ($node['tag'] == 'html') { |
||
| 51 | return $node; |
||
| 52 | } |
||
| 53 | } |
||
| 54 | return $this->nodes[0]; |
||
| 55 | } |
||
| 56 | |||
| 57 | /* */ |
||
| 58 | |||
| 59 | function processNode($n, $ct, $level) { |
||
| 266 | |||
| 267 | /* */ |
||
| 268 | |||
| 269 | function getAttributeURIs($n, $ct, $lct, $attr) { |
||
| 270 | $vals = ($val = $this->v($attr, '', $n['a'])) ? explode(' ', $val) : array(); |
||
| 271 | $r = array(); |
||
| 272 | foreach ($vals as $val) { |
||
| 273 | if(!trim($val)) continue; |
||
| 274 | if ((list($uri, $sub_v) = $this->xURI(trim($val), $lct['base'], $lct['ns'], $attr, $lct)) && $uri) { |
||
| 275 | $r[] = $uri; |
||
| 276 | } |
||
| 277 | } |
||
| 278 | return $r; |
||
| 279 | } |
||
| 280 | |||
| 281 | /* */ |
||
| 282 | |||
| 283 | function getCurrentObjectLiteral($n, $lct, $ct) { |
||
| 284 | $xml_val = $this->getContent($n); |
||
| 285 | $plain_val = $this->getPlainContent($n, 0, 0); |
||
| 286 | if (function_exists('html_entity_decode')) { |
||
| 287 | $plain_val = html_entity_decode($plain_val, ENT_QUOTES); |
||
| 288 | } |
||
| 289 | $dt = $this->v('datatype', '', $n['a']); |
||
| 290 | list($dt_uri, $sub_v) = $this->xURI($dt, $lct['base'], $lct['ns'], '', $lct); |
||
| 291 | $dt = $dt ? $dt_uri : $dt; |
||
| 292 | $r = array('value' => '', 'lang' => $lct['lang'], 'datatype' => $dt); |
||
| 293 | if (isset($n['a']['content'])) { |
||
| 294 | $r['value'] = $n['a']['content']; |
||
| 295 | if (function_exists('html_entity_decode')) { |
||
| 296 | $r['value'] = html_entity_decode($r['value'], ENT_QUOTES); |
||
| 297 | } |
||
| 298 | } |
||
| 299 | elseif ($xml_val == $plain_val) { |
||
| 300 | $r['value'] = $plain_val; |
||
| 301 | } |
||
| 302 | elseif (!preg_match('/[\<\>]/', $xml_val)) { |
||
| 303 | $r['value'] = $xml_val; |
||
| 304 | } |
||
| 305 | elseif (isset($n['a']['datatype']) && ($dt != 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral')) { |
||
| 306 | $r['value'] = $plain_val; |
||
| 307 | } |
||
| 308 | elseif (!isset($n['a']['datatype']) || ($dt == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral')) { |
||
| 309 | $r['value'] = $this->injectXMLDeclarations($xml_val, $lct['ns'], $lct['lang']); |
||
| 310 | $r['datatype'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral'; |
||
| 311 | } |
||
| 312 | return $r; |
||
| 313 | } |
||
| 314 | |||
| 315 | function injectXMLDeclarations($val, $ns, $lang) {//@@todo proper node rebuilding */ |
||
| 316 | $lang_code = $lang ? ' xml:lang="' . $lang . '"' : ''; |
||
| 317 | /* ns */ |
||
| 318 | $val = preg_replace('/<([a-z0-9]+)([\>\s])/is', '<\\1 xmlns="http://www.w3.org/1999/xhtml"' . $lang_code . '\\2', $val); |
||
| 319 | foreach ($ns as $prefix => $uri) { |
||
| 320 | if ($prefix && ($pos = strpos(' ' . $val, '<' . $prefix . ':'))) { |
||
| 321 | $val = substr($val, 0, $pos - 1) . preg_replace('/^(<' . $prefix . '\:[^\>\s]+)/', '\\1 xmlns:' . $prefix. '="' . $uri . '"' . $lang_code, substr($val, $pos - 1)); |
||
| 322 | } |
||
| 323 | } |
||
| 324 | /* remove accidentally added xml:lang and xmlns= */ |
||
| 325 | $val = preg_replace('/(\<[^\>]*)( xml\:lang[^\s\>]+)([^\>]*)(xml\:lang[^\s\>]+)/s', '\\1\\3\\4', $val); |
||
| 326 | $val = preg_replace('/(\<[^\>]*)( xmlns=[^\s\>]+)([^\>]*)(xmlns=[^\s\>]+)/s', '\\1\\3\\4', $val); |
||
| 327 | return $val; |
||
| 328 | } |
||
| 329 | |||
| 330 | /* */ |
||
| 331 | |||
| 332 | function xURI($v, $base, $ns, $attr_type = '', $lct = '') { |
||
| 333 | if ((list($sub_r, $sub_v) = $this->xBlankCURIE($v, $base, $ns)) && $sub_r) { |
||
| 334 | return array($sub_r, $sub_v); |
||
| 335 | } |
||
| 336 | if ((list($sub_r, $sub_v) = $this->xSafeCURIE($v, $base, $ns, $lct)) && $sub_r) { |
||
| 337 | return array($sub_r, $sub_v); |
||
| 338 | } |
||
| 339 | if ((list($sub_r, $sub_v) = $this->xCURIE($v, $base, $ns)) && $sub_r) { |
||
| 340 | return array($sub_r, $sub_v); |
||
| 341 | } |
||
| 342 | if (preg_match('/^(rel|rev)$/', $attr_type) && preg_match('/^\s*(alternate|appendix|bookmark|cite|chapter|contents|copyright|glossary|help|icon|index|last|license|meta|next|p3pv1|prev|role|section|stylesheet|subsection|start|up)(\s|$)/is', $v, $m)) { |
||
| 343 | return array('http://www.w3.org/1999/xhtml/vocab#' . strtolower($m[1]), preg_replace('/^\s*' . $m[1]. '/is', '', $v)); |
||
| 344 | } |
||
| 345 | if (preg_match('/^(rel|rev)$/', $attr_type) && preg_match('/^[a-z0-9\.]+$/i', $v)) { |
||
| 346 | return array(0, $v); |
||
| 347 | } |
||
| 348 | return array($this->calcURI($v, $base), ''); |
||
| 349 | } |
||
| 350 | |||
| 351 | function xBlankCURIE($v, $base, $ns) { |
||
| 361 | |||
| 362 | function xSafeCURIE($v, $base, $ns, $lct = '') { |
||
| 363 | /* empty */ |
||
| 364 | if ($sub_r = $this->x('\[\]', $v)) { |
||
| 365 | $r = $lct ? $lct['prev_s'] : $base;/* should be current subject value */ |
||
| 366 | return $sub_r[1] ? array($r, $sub_r[1]) : array($r, ''); |
||
| 367 | } |
||
| 368 | if ($sub_r = $this->x('\[([^\:]*)\:([^\]]*)\]', $v)) { |
||
| 369 | if (!$sub_r[1]) return array('http://www.w3.org/1999/xhtml/vocab#' . $sub_r[2], ''); |
||
| 370 | if (isset($ns[$sub_r[1]])) { |
||
| 371 | return array($ns[$sub_r[1]] . $sub_r[2], ''); |
||
| 372 | } |
||
| 373 | } |
||
| 374 | return array(0, $v); |
||
| 375 | } |
||
| 376 | |||
| 377 | function xCURIE($v, $base, $ns) { |
||
| 386 | |||
| 387 | /* */ |
||
| 388 | |||
| 389 | } |
||
| 390 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)or! empty(...)instead.