Complex classes like ARC2_RdfaExtractor often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ARC2_RdfaExtractor, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class ARC2_RdfaExtractor extends ARC2_RDFExtractor { |
||
14 | |||
15 | function __construct($a = '', &$caller) { |
||
18 | |||
19 | function ARC2_RdfaExtractor($a = '', &$caller) { |
||
22 | |||
23 | function __init() { |
||
26 | |||
27 | /* */ |
||
28 | |||
29 | function extractRDF() { |
||
30 | //echo '<pre>' . htmlspecialchars(print_r($this->nodes, 1)) . '</pre>'; |
||
31 | if (!isset($this->caller->detected_formats['rdfa'])) return 0; |
||
32 | $root_node = $this->getRootNode(); |
||
33 | //$base = $this->v('xml:base', $this->getDocBase(), $root_node['a']); |
||
34 | $base = $this->getDocBase(); |
||
35 | $context = array( |
||
36 | 'base' => $base, |
||
37 | 'p_s' => $base, |
||
38 | 'p_o' => '', |
||
39 | 'ns' => array(), |
||
40 | 'inco_ts' => array(), |
||
41 | 'lang' => '', |
||
42 | ); |
||
43 | $this->processNode($root_node, $context, 0); |
||
44 | } |
||
45 | |||
46 | /* */ |
||
47 | |||
48 | function getRootNode() { |
||
49 | foreach ($this->nodes as $id => $node) { |
||
50 | if ($node['tag'] == 'html') { |
||
51 | return $node; |
||
52 | } |
||
53 | } |
||
54 | return $this->nodes[0]; |
||
55 | } |
||
56 | |||
57 | /* */ |
||
58 | |||
59 | function processNode($n, $ct, $level) { |
||
266 | |||
267 | /* */ |
||
268 | |||
269 | function getAttributeURIs($n, $ct, $lct, $attr) { |
||
270 | $vals = ($val = $this->v($attr, '', $n['a'])) ? explode(' ', $val) : array(); |
||
271 | $r = array(); |
||
272 | foreach ($vals as $val) { |
||
273 | if(!trim($val)) continue; |
||
274 | if ((list($uri, $sub_v) = $this->xURI(trim($val), $lct['base'], $lct['ns'], $attr, $lct)) && $uri) { |
||
275 | $r[] = $uri; |
||
276 | } |
||
277 | } |
||
278 | return $r; |
||
279 | } |
||
280 | |||
281 | /* */ |
||
282 | |||
283 | function getCurrentObjectLiteral($n, $lct, $ct) { |
||
284 | $xml_val = $this->getContent($n); |
||
285 | $plain_val = $this->getPlainContent($n, 0, 0); |
||
286 | if (function_exists('html_entity_decode')) { |
||
287 | $plain_val = html_entity_decode($plain_val, ENT_QUOTES); |
||
288 | } |
||
289 | $dt = $this->v('datatype', '', $n['a']); |
||
290 | list($dt_uri, $sub_v) = $this->xURI($dt, $lct['base'], $lct['ns'], '', $lct); |
||
291 | $dt = $dt ? $dt_uri : $dt; |
||
292 | $r = array('value' => '', 'lang' => $lct['lang'], 'datatype' => $dt); |
||
293 | if (isset($n['a']['content'])) { |
||
294 | $r['value'] = $n['a']['content']; |
||
295 | if (function_exists('html_entity_decode')) { |
||
296 | $r['value'] = html_entity_decode($r['value'], ENT_QUOTES); |
||
297 | } |
||
298 | } |
||
299 | elseif ($xml_val == $plain_val) { |
||
300 | $r['value'] = $plain_val; |
||
301 | } |
||
302 | elseif (!preg_match('/[\<\>]/', $xml_val)) { |
||
303 | $r['value'] = $xml_val; |
||
304 | } |
||
305 | elseif (isset($n['a']['datatype']) && ($dt != 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral')) { |
||
306 | $r['value'] = $plain_val; |
||
307 | } |
||
308 | elseif (!isset($n['a']['datatype']) || ($dt == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral')) { |
||
309 | $r['value'] = $this->injectXMLDeclarations($xml_val, $lct['ns'], $lct['lang']); |
||
310 | $r['datatype'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral'; |
||
311 | } |
||
312 | return $r; |
||
313 | } |
||
314 | |||
315 | function injectXMLDeclarations($val, $ns, $lang) {//@@todo proper node rebuilding */ |
||
316 | $lang_code = $lang ? ' xml:lang="' . $lang . '"' : ''; |
||
317 | /* ns */ |
||
318 | $val = preg_replace('/<([a-z0-9]+)([\>\s])/is', '<\\1 xmlns="http://www.w3.org/1999/xhtml"' . $lang_code . '\\2', $val); |
||
319 | foreach ($ns as $prefix => $uri) { |
||
320 | if ($prefix && ($pos = strpos(' ' . $val, '<' . $prefix . ':'))) { |
||
321 | $val = substr($val, 0, $pos - 1) . preg_replace('/^(<' . $prefix . '\:[^\>\s]+)/', '\\1 xmlns:' . $prefix. '="' . $uri . '"' . $lang_code, substr($val, $pos - 1)); |
||
322 | } |
||
323 | } |
||
324 | /* remove accidentally added xml:lang and xmlns= */ |
||
325 | $val = preg_replace('/(\<[^\>]*)( xml\:lang[^\s\>]+)([^\>]*)(xml\:lang[^\s\>]+)/s', '\\1\\3\\4', $val); |
||
326 | $val = preg_replace('/(\<[^\>]*)( xmlns=[^\s\>]+)([^\>]*)(xmlns=[^\s\>]+)/s', '\\1\\3\\4', $val); |
||
327 | return $val; |
||
328 | } |
||
329 | |||
330 | /* */ |
||
331 | |||
332 | function xURI($v, $base, $ns, $attr_type = '', $lct = '') { |
||
333 | if ((list($sub_r, $sub_v) = $this->xBlankCURIE($v, $base, $ns)) && $sub_r) { |
||
334 | return array($sub_r, $sub_v); |
||
335 | } |
||
336 | if ((list($sub_r, $sub_v) = $this->xSafeCURIE($v, $base, $ns, $lct)) && $sub_r) { |
||
337 | return array($sub_r, $sub_v); |
||
338 | } |
||
339 | if ((list($sub_r, $sub_v) = $this->xCURIE($v, $base, $ns)) && $sub_r) { |
||
340 | return array($sub_r, $sub_v); |
||
341 | } |
||
342 | if (preg_match('/^(rel|rev)$/', $attr_type) && preg_match('/^\s*(alternate|appendix|bookmark|cite|chapter|contents|copyright|glossary|help|icon|index|last|license|meta|next|p3pv1|prev|role|section|stylesheet|subsection|start|up)(\s|$)/is', $v, $m)) { |
||
343 | return array('http://www.w3.org/1999/xhtml/vocab#' . strtolower($m[1]), preg_replace('/^\s*' . $m[1]. '/is', '', $v)); |
||
344 | } |
||
345 | if (preg_match('/^(rel|rev)$/', $attr_type) && preg_match('/^[a-z0-9\.]+$/i', $v)) { |
||
346 | return array(0, $v); |
||
347 | } |
||
348 | return array($this->calcURI($v, $base), ''); |
||
349 | } |
||
350 | |||
351 | function xBlankCURIE($v, $base, $ns) { |
||
361 | |||
362 | function xSafeCURIE($v, $base, $ns, $lct = '') { |
||
363 | /* empty */ |
||
364 | if ($sub_r = $this->x('\[\]', $v)) { |
||
365 | $r = $lct ? $lct['prev_s'] : $base;/* should be current subject value */ |
||
366 | return $sub_r[1] ? array($r, $sub_r[1]) : array($r, ''); |
||
367 | } |
||
368 | if ($sub_r = $this->x('\[([^\:]*)\:([^\]]*)\]', $v)) { |
||
369 | if (!$sub_r[1]) return array('http://www.w3.org/1999/xhtml/vocab#' . $sub_r[2], ''); |
||
370 | if (isset($ns[$sub_r[1]])) { |
||
371 | return array($ns[$sub_r[1]] . $sub_r[2], ''); |
||
372 | } |
||
373 | } |
||
374 | return array(0, $v); |
||
375 | } |
||
376 | |||
377 | function xCURIE($v, $base, $ns) { |
||
386 | |||
387 | /* */ |
||
388 | |||
389 | } |
||
390 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.