| Total Complexity | 152 |
| Total Lines | 779 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like RssParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use RssParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 36 | class RssParser |
||
| 37 | { |
||
| 38 | /** |
||
| 39 | * @var DoliDB Database handler. |
||
| 40 | */ |
||
| 41 | public $db; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * @var string Error code (or message) |
||
| 45 | */ |
||
| 46 | public $error = ''; |
||
| 47 | |||
| 48 | public $feed_version; |
||
| 49 | |||
| 50 | private $_format = ''; |
||
| 51 | private $_urlRSS; |
||
| 52 | private $_language; |
||
| 53 | private $_generator; |
||
| 54 | private $_copyright; |
||
| 55 | private $_lastbuilddate; |
||
| 56 | private $_imageurl; |
||
| 57 | private $_link; |
||
| 58 | private $_title; |
||
| 59 | private $_description; |
||
| 60 | private $_lastfetchdate; // Last successful fetch |
||
| 61 | private $_rssarray = array(); |
||
| 62 | |||
| 63 | private $current_namespace; |
||
| 64 | public $items = array(); |
||
| 65 | public $current_item = array(); |
||
| 66 | public $channel = array(); |
||
| 67 | public $textinput = array(); |
||
| 68 | public $image = array(); |
||
| 69 | |||
| 70 | private $initem; |
||
| 71 | private $intextinput; |
||
| 72 | private $incontent; |
||
| 73 | private $inimage; |
||
| 74 | private $inchannel; |
||
| 75 | |||
| 76 | // For parsing with xmlparser |
||
| 77 | public $stack = array(); // parser stack |
||
| 78 | private $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); |
||
| 79 | |||
| 80 | |||
| 81 | /** |
||
| 82 | * Constructor |
||
| 83 | * |
||
| 84 | * @param DoliDB $db Database handler |
||
| 85 | */ |
||
| 86 | public function __construct($db) |
||
| 87 | { |
||
| 88 | $this->db = $db; |
||
| 89 | } |
||
| 90 | |||
| 91 | /** |
||
| 92 | * getFormat |
||
| 93 | * |
||
| 94 | * @return string |
||
| 95 | */ |
||
| 96 | public function getFormat() |
||
| 97 | { |
||
| 98 | return $this->_format; |
||
| 99 | } |
||
| 100 | |||
| 101 | /** |
||
| 102 | * getUrlRss |
||
| 103 | * |
||
| 104 | * @return string |
||
| 105 | */ |
||
| 106 | public function getUrlRss() |
||
| 107 | { |
||
| 108 | return $this->_urlRSS; |
||
| 109 | } |
||
| 110 | /** |
||
| 111 | * getLanguage |
||
| 112 | * |
||
| 113 | * @return string |
||
| 114 | */ |
||
| 115 | public function getLanguage() |
||
| 116 | { |
||
| 117 | return $this->_language; |
||
| 118 | } |
||
| 119 | /** |
||
| 120 | * getGenerator |
||
| 121 | * |
||
| 122 | * @return string |
||
| 123 | */ |
||
| 124 | public function getGenerator() |
||
| 125 | { |
||
| 126 | return $this->_generator; |
||
| 127 | } |
||
| 128 | /** |
||
| 129 | * getCopyright |
||
| 130 | * |
||
| 131 | * @return string |
||
| 132 | */ |
||
| 133 | public function getCopyright() |
||
| 134 | { |
||
| 135 | return $this->_copyright; |
||
| 136 | } |
||
| 137 | /** |
||
| 138 | * getLastBuildDate |
||
| 139 | * |
||
| 140 | * @return string |
||
| 141 | */ |
||
| 142 | public function getLastBuildDate() |
||
| 143 | { |
||
| 144 | return $this->_lastbuilddate; |
||
| 145 | } |
||
| 146 | /** |
||
| 147 | * getImageUrl |
||
| 148 | * |
||
| 149 | * @return string |
||
| 150 | */ |
||
| 151 | public function getImageUrl() |
||
| 152 | { |
||
| 153 | return $this->_imageurl; |
||
| 154 | } |
||
| 155 | /** |
||
| 156 | * getLink |
||
| 157 | * |
||
| 158 | * @return string |
||
| 159 | */ |
||
| 160 | public function getLink() |
||
| 161 | { |
||
| 162 | return $this->_link; |
||
| 163 | } |
||
| 164 | /** |
||
| 165 | * getTitle |
||
| 166 | * |
||
| 167 | * @return string |
||
| 168 | */ |
||
| 169 | public function getTitle() |
||
| 170 | { |
||
| 171 | return $this->_title; |
||
| 172 | } |
||
| 173 | /** |
||
| 174 | * getDescription |
||
| 175 | * |
||
| 176 | * @return string |
||
| 177 | */ |
||
| 178 | public function getDescription() |
||
| 181 | } |
||
| 182 | /** |
||
| 183 | * getLastFetchDate |
||
| 184 | * |
||
| 185 | * @return int |
||
| 186 | */ |
||
| 187 | public function getLastFetchDate() |
||
| 188 | { |
||
| 189 | return $this->_lastfetchdate; |
||
| 190 | } |
||
| 191 | /** |
||
| 192 | * getItems |
||
| 193 | * |
||
| 194 | * @return array |
||
| 195 | */ |
||
| 196 | public function getItems() |
||
| 199 | } |
||
| 200 | |||
| 201 | /** |
||
| 202 | * Parse rss URL |
||
| 203 | * |
||
| 204 | * @param string $urlRSS Url to parse |
||
| 205 | * @param int $maxNb Max nb of records to get (0 for no limit) |
||
| 206 | * @param int $cachedelay 0=No cache, nb of seconds we accept cache files (cachedir must also be defined) |
||
| 207 | * @param string $cachedir Directory where to save cache file (For example $conf->externalrss->dir_temp) |
||
| 208 | * @return int Return integer <0 if KO, >0 if OK |
||
| 209 | */ |
||
| 210 | public function parser($urlRSS, $maxNb = 0, $cachedelay = 60, $cachedir = '') |
||
| 211 | { |
||
| 212 | include_once DOL_DOCUMENT_ROOT . '/core/lib/files.lib.php'; |
||
| 213 | include_once DOL_DOCUMENT_ROOT . '/core/lib/geturl.lib.php'; |
||
| 214 | |||
| 215 | $rss = ''; |
||
| 216 | $str = ''; // This will contain content of feed |
||
| 217 | |||
| 218 | // Check parameters |
||
| 219 | if (!dol_is_url($urlRSS)) { |
||
| 220 | $this->error = "ErrorBadUrl"; |
||
| 221 | return -1; |
||
| 222 | } |
||
| 223 | |||
| 224 | $this->_urlRSS = $urlRSS; |
||
| 225 | $newpathofdestfile = $cachedir . '/' . dol_hash($this->_urlRSS, 3); // Force md5 hash (does not contain special chars) |
||
| 226 | $newmask = '0644'; |
||
| 227 | |||
| 228 | //dol_syslog("RssParser::parser parse url=".$urlRSS." => cache file=".$newpathofdestfile); |
||
| 229 | $nowgmt = dol_now(); |
||
| 230 | |||
| 231 | // Search into cache |
||
| 232 | $foundintocache = 0; |
||
| 233 | if ($cachedelay > 0 && $cachedir) { |
||
| 234 | $filedate = dol_filemtime($newpathofdestfile); |
||
| 235 | if ($filedate >= ($nowgmt - $cachedelay)) { |
||
| 236 | //dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we use it."); |
||
| 237 | $foundintocache = 1; |
||
| 238 | |||
| 239 | $this->_lastfetchdate = $filedate; |
||
| 240 | } else { |
||
| 241 | dol_syslog(get_class($this) . "::parser cache file " . $newpathofdestfile . " is not found or older than now - cachedelay (" . $nowgmt . " - " . $cachedelay . ") so we can't use it."); |
||
| 242 | } |
||
| 243 | } |
||
| 244 | |||
| 245 | // Load file into $str |
||
| 246 | if ($foundintocache) { // Cache file found and is not too old |
||
| 247 | $str = file_get_contents($newpathofdestfile); |
||
| 248 | } else { |
||
| 249 | try { |
||
| 250 | $result = getURLContent($this->_urlRSS, 'GET', '', 1, array(), array('http', 'https'), 0); |
||
| 251 | |||
| 252 | if (!empty($result['content'])) { |
||
| 253 | $str = $result['content']; |
||
| 254 | } elseif (!empty($result['curl_error_msg'])) { |
||
| 255 | $this->error = 'Error retrieving URL ' . $this->_urlRSS . ' - ' . $result['curl_error_msg']; |
||
| 256 | return -1; |
||
| 257 | } |
||
| 258 | } catch (Exception $e) { |
||
|
|
|||
| 259 | $this->error = 'Error retrieving URL ' . $this->_urlRSS . ' - ' . $e->getMessage(); |
||
| 260 | return -2; |
||
| 261 | } |
||
| 262 | } |
||
| 263 | |||
| 264 | if ($str !== false) { |
||
| 265 | // Convert $str into xml |
||
| 266 | if (getDolGlobalString('EXTERNALRSS_USE_SIMPLEXML')) { |
||
| 267 | //print 'xx'.LIBXML_NOCDATA; |
||
| 268 | libxml_use_internal_errors(false); |
||
| 269 | if (LIBXML_VERSION < 20900) { |
||
| 270 | // Avoid load of external entities (security problem). |
||
| 271 | // Required only if LIBXML_VERSION < 20900 |
||
| 272 | // @phan-suppress-next-line PhanDeprecatedFunctionInternal |
||
| 273 | libxml_disable_entity_loader(true); |
||
| 274 | } |
||
| 275 | |||
| 276 | $rss = simplexml_load_string($str, "SimpleXMLElement", LIBXML_NOCDATA); |
||
| 277 | } else { |
||
| 278 | if (!function_exists('xml_parser_create')) { |
||
| 279 | $this->error = 'Function xml_parser_create are not supported by your PHP'; |
||
| 280 | return -1; |
||
| 281 | } |
||
| 282 | |||
| 283 | try { |
||
| 284 | // @phan-suppress-next-line PhanTypeMismatchArgumentInternalProbablyReal |
||
| 285 | $xmlparser = xml_parser_create(null); |
||
| 286 | |||
| 287 | xml_parser_set_option($xmlparser, XML_OPTION_CASE_FOLDING, 0); |
||
| 288 | xml_parser_set_option($xmlparser, XML_OPTION_SKIP_WHITE, 1); |
||
| 289 | xml_parser_set_option($xmlparser, XML_OPTION_TARGET_ENCODING, "UTF-8"); |
||
| 290 | //xml_set_external_entity_ref_handler($xmlparser, 'extEntHandler'); // Seems to have no effect even when function extEntHandler exists. |
||
| 291 | |||
| 292 | if (!is_resource($xmlparser) && !is_object($xmlparser)) { |
||
| 293 | $this->error = "ErrorFailedToCreateParser"; |
||
| 294 | return -1; |
||
| 295 | } |
||
| 296 | |||
| 297 | xml_set_object($xmlparser, $this); |
||
| 298 | // @phan-suppress-next-line PhanUndeclaredFunctionInCallable |
||
| 299 | xml_set_element_handler($xmlparser, 'feed_start_element', 'feed_end_element'); |
||
| 300 | // @phan-suppress-next-line PhanUndeclaredFunctionInCallable |
||
| 301 | xml_set_character_data_handler($xmlparser, 'feed_cdata'); |
||
| 302 | |||
| 303 | $status = xml_parse($xmlparser, $str, false); |
||
| 304 | |||
| 305 | xml_parser_free($xmlparser); |
||
| 306 | |||
| 307 | $rss = $this; |
||
| 308 | //var_dump($status.' '.$rss->_format);exit; |
||
| 309 | } catch (Exception $e) { |
||
| 310 | $rss = null; |
||
| 311 | } |
||
| 312 | } |
||
| 313 | } |
||
| 314 | |||
| 315 | // If $rss loaded |
||
| 316 | if ($rss) { |
||
| 317 | // Save file into cache |
||
| 318 | if (empty($foundintocache) && $cachedir) { |
||
| 319 | dol_syslog(get_class($this) . "::parser cache file " . $newpathofdestfile . " is saved onto disk."); |
||
| 320 | if (!dol_is_dir($cachedir)) { |
||
| 321 | dol_mkdir($cachedir); |
||
| 322 | } |
||
| 323 | $fp = fopen($newpathofdestfile, 'w'); |
||
| 324 | if ($fp) { |
||
| 325 | fwrite($fp, $str); |
||
| 326 | fclose($fp); |
||
| 327 | dolChmod($newpathofdestfile); |
||
| 328 | |||
| 329 | $this->_lastfetchdate = $nowgmt; |
||
| 330 | } else { |
||
| 331 | print 'Error, failed to open file ' . $newpathofdestfile . ' for write'; |
||
| 332 | } |
||
| 333 | } |
||
| 334 | |||
| 335 | unset($str); // Free memory |
||
| 336 | |||
| 337 | if (empty($rss->_format)) { // If format not detected automatically |
||
| 338 | $rss->_format = 'rss'; |
||
| 339 | if (empty($rss->channel)) { |
||
| 340 | $rss->_format = 'atom'; |
||
| 341 | } |
||
| 342 | } |
||
| 343 | |||
| 344 | $items = array(); |
||
| 345 | |||
| 346 | // Save description entries |
||
| 347 | if ($rss->_format == 'rss') { |
||
| 348 | //var_dump($rss); |
||
| 349 | if (getDolGlobalString('EXTERNALRSS_USE_SIMPLEXML')) { |
||
| 350 | if (!empty($rss->channel->language)) { |
||
| 351 | $this->_language = sanitizeVal((string) $rss->channel->language); |
||
| 352 | } |
||
| 353 | if (!empty($rss->channel->generator)) { |
||
| 354 | $this->_generator = sanitizeVal((string) $rss->channel->generator); |
||
| 355 | } |
||
| 356 | if (!empty($rss->channel->copyright)) { |
||
| 357 | $this->_copyright = sanitizeVal((string) $rss->channel->copyright); |
||
| 358 | } |
||
| 359 | if (!empty($rss->channel->lastbuilddate)) { |
||
| 360 | $this->_lastbuilddate = sanitizeVal((string) $rss->channel->lastbuilddate); |
||
| 361 | } |
||
| 362 | if (!empty($rss->channel->image->url[0])) { |
||
| 363 | $this->_imageurl = sanitizeVal((string) $rss->channel->image->url[0]); |
||
| 364 | } |
||
| 365 | if (!empty($rss->channel->link)) { |
||
| 366 | $this->_link = sanitizeVal((string) $rss->channel->link); |
||
| 367 | } |
||
| 368 | if (!empty($rss->channel->title)) { |
||
| 369 | $this->_title = sanitizeVal((string) $rss->channel->title); |
||
| 370 | } |
||
| 371 | if (!empty($rss->channel->description)) { |
||
| 372 | $this->_description = sanitizeVal((string) $rss->channel->description); |
||
| 373 | } |
||
| 374 | } else { |
||
| 375 | //var_dump($rss->channel); |
||
| 376 | if (!empty($rss->channel['language'])) { |
||
| 377 | $this->_language = sanitizeVal((string) $rss->channel['language']); |
||
| 378 | } |
||
| 379 | if (!empty($rss->channel['generator'])) { |
||
| 380 | $this->_generator = sanitizeVal((string) $rss->channel['generator']); |
||
| 381 | } |
||
| 382 | if (!empty($rss->channel['copyright'])) { |
||
| 383 | $this->_copyright = sanitizeVal((string) $rss->channel['copyright']); |
||
| 384 | } |
||
| 385 | if (!empty($rss->channel['lastbuilddate'])) { |
||
| 386 | $this->_lastbuilddate = sanitizeVal((string) $rss->channel['lastbuilddate']); |
||
| 387 | } |
||
| 388 | if (!empty($rss->image['url'])) { |
||
| 389 | $this->_imageurl = sanitizeVal((string) $rss->image['url']); |
||
| 390 | } |
||
| 391 | if (!empty($rss->channel['link'])) { |
||
| 392 | $this->_link = sanitizeVal((string) $rss->channel['link']); |
||
| 393 | } |
||
| 394 | if (!empty($rss->channel['title'])) { |
||
| 395 | $this->_title = sanitizeVal((string) $rss->channel['title']); |
||
| 396 | } |
||
| 397 | if (!empty($rss->channel['description'])) { |
||
| 398 | $this->_description = sanitizeVal((string) $rss->channel['description']); |
||
| 399 | } |
||
| 400 | } |
||
| 401 | |||
| 402 | if (getDolGlobalString('EXTERNALRSS_USE_SIMPLEXML')) { |
||
| 403 | $items = $rss->channel->item; // With simplexml |
||
| 404 | } else { |
||
| 405 | $items = $rss->items; // With xmlparse |
||
| 406 | } |
||
| 407 | //var_dump($items);exit; |
||
| 408 | } elseif ($rss->_format == 'atom') { |
||
| 409 | //var_dump($rss); |
||
| 410 | if (getDolGlobalString('EXTERNALRSS_USE_SIMPLEXML')) { |
||
| 411 | if (!empty($rss->generator)) { |
||
| 412 | $this->_generator = sanitizeVal((string) $rss->generator); |
||
| 413 | } |
||
| 414 | if (!empty($rss->lastbuilddate)) { |
||
| 415 | $this->_lastbuilddate = sanitizeVal((string) $rss->modified); |
||
| 416 | } |
||
| 417 | if (!empty($rss->link->href)) { |
||
| 418 | $this->_link = sanitizeVal((string) $rss->link->href); |
||
| 419 | } |
||
| 420 | if (!empty($rss->title)) { |
||
| 421 | $this->_title = sanitizeVal((string) $rss->title); |
||
| 422 | } |
||
| 423 | if (!empty($rss->description)) { |
||
| 424 | $this->_description = sanitizeVal((string) $rss->description); |
||
| 425 | } |
||
| 426 | } else { |
||
| 427 | //if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language']; |
||
| 428 | if (!empty($rss->channel['generator'])) { |
||
| 429 | $this->_generator = sanitizeVal((string) $rss->channel['generator']); |
||
| 430 | } |
||
| 431 | //if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright']; |
||
| 432 | if (!empty($rss->channel['modified'])) { |
||
| 433 | $this->_lastbuilddate = sanitizeVal((string) $rss->channel['modified']); |
||
| 434 | } |
||
| 435 | //if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url']; |
||
| 436 | if (!empty($rss->channel['link'])) { |
||
| 437 | $this->_link = sanitizeVal((string) $rss->channel['link']); |
||
| 438 | } |
||
| 439 | if (!empty($rss->channel['title'])) { |
||
| 440 | $this->_title = sanitizeVal((string) $rss->channel['title']); |
||
| 441 | } |
||
| 442 | //if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description']; |
||
| 443 | |||
| 444 | if (!empty($rss->channel)) { |
||
| 445 | $this->_imageurl = sanitizeVal($this->getAtomImageUrl($rss->channel)); |
||
| 446 | } |
||
| 447 | } |
||
| 448 | if (getDolGlobalString('EXTERNALRSS_USE_SIMPLEXML')) { |
||
| 449 | $tmprss = xml2php($rss); |
||
| 450 | $items = $tmprss['entry']; |
||
| 451 | } else { |
||
| 452 | // With simplexml |
||
| 453 | $items = $rss->items; // With xmlparse |
||
| 454 | } |
||
| 455 | //var_dump($items);exit; |
||
| 456 | } |
||
| 457 | |||
| 458 | $i = 0; |
||
| 459 | |||
| 460 | // Loop on each record |
||
| 461 | if (is_array($items)) { |
||
| 462 | foreach ($items as $item) { |
||
| 463 | //var_dump($item);exit; |
||
| 464 | if ($rss->_format == 'rss') { |
||
| 465 | if (getDolGlobalString('EXTERNALRSS_USE_SIMPLEXML')) { |
||
| 466 | $itemLink = sanitizeVal((string) $item->link); |
||
| 467 | $itemTitle = sanitizeVal((string) $item->title); |
||
| 468 | $itemDescription = sanitizeVal((string) $item->description); |
||
| 469 | $itemPubDate = sanitizeVal((string) $item->pubDate); |
||
| 470 | $itemId = ''; |
||
| 471 | $itemAuthor = ''; |
||
| 472 | } else { |
||
| 473 | $itemLink = sanitizeVal((string) $item['link']); |
||
| 474 | $itemTitle = sanitizeVal((string) $item['title']); |
||
| 475 | $itemDescription = sanitizeVal((string) $item['description']); |
||
| 476 | $itemPubDate = sanitizeVal((string) $item['pubdate']); |
||
| 477 | $itemId = sanitizeVal((string) $item['guid']); |
||
| 478 | $itemAuthor = sanitizeVal((string) ($item['author'] ?? '')); |
||
| 479 | } |
||
| 480 | |||
| 481 | // Loop on each category |
||
| 482 | $itemCategory = array(); |
||
| 483 | if (!empty($item->category) && is_array($item->category)) { |
||
| 484 | foreach ($item->category as $cat) { |
||
| 485 | $itemCategory[] = (string) $cat; |
||
| 486 | } |
||
| 487 | } |
||
| 488 | } elseif ($rss->_format == 'atom') { |
||
| 489 | $itemLink = (isset($item['link']) ? sanitizeVal((string) $item['link']) : ''); |
||
| 490 | $itemTitle = sanitizeVal((string) $item['title']); |
||
| 491 | $itemDescription = sanitizeVal($this->getAtomItemDescription($item)); |
||
| 492 | $itemPubDate = sanitizeVal((string) $item['created']); |
||
| 493 | $itemId = sanitizeVal((string) $item['id']); |
||
| 494 | $itemAuthor = sanitizeVal((string) ($item['author'] ? $item['author'] : $item['author_name'])); |
||
| 495 | $itemCategory = array(); |
||
| 496 | } else { |
||
| 497 | $itemLink = ''; |
||
| 498 | $itemTitle = ''; |
||
| 499 | $itemDescription = ''; |
||
| 500 | $itemPubDate = ''; |
||
| 501 | $itemId = ''; |
||
| 502 | $itemAuthor = ''; |
||
| 503 | $itemCategory = array(); |
||
| 504 | print 'ErrorBadFeedFormat'; |
||
| 505 | } |
||
| 506 | |||
| 507 | // Add record to result array |
||
| 508 | $this->_rssarray[$i] = array( |
||
| 509 | 'link' => $itemLink, |
||
| 510 | 'title' => $itemTitle, |
||
| 511 | 'description' => $itemDescription, |
||
| 512 | 'pubDate' => $itemPubDate, |
||
| 513 | 'category' => $itemCategory, |
||
| 514 | 'id' => $itemId, |
||
| 515 | 'author' => $itemAuthor |
||
| 516 | ); |
||
| 517 | //var_dump($this->_rssarray); |
||
| 518 | |||
| 519 | $i++; |
||
| 520 | |||
| 521 | if ($i > $maxNb) { |
||
| 522 | break; // We get all records we want |
||
| 523 | } |
||
| 524 | } |
||
| 525 | } |
||
| 526 | |||
| 527 | return 1; |
||
| 528 | } else { |
||
| 529 | $this->error = 'ErrorFailedToLoadRSSFile'; |
||
| 530 | return -1; |
||
| 531 | } |
||
| 532 | } |
||
| 533 | |||
| 534 | |||
| 535 | |||
| 536 | // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps |
||
| 537 | /** |
||
| 538 | * Triggered when opened tag is found |
||
| 539 | * |
||
| 540 | * @param string $p Start |
||
| 541 | * @param string $element Tag |
||
| 542 | * @param array $attrs Attributes of tags |
||
| 543 | * @return void |
||
| 544 | */ |
||
| 545 | public function feed_start_element($p, $element, $attrs) |
||
| 546 | { |
||
| 547 | // phpcs:enable |
||
| 548 | $el = $element = strtolower($element); |
||
| 549 | $attrs = array_change_key_case($attrs, CASE_LOWER); |
||
| 550 | |||
| 551 | // check for a namespace, and split if found |
||
| 552 | $ns = false; |
||
| 553 | if (strpos($element, ':')) { |
||
| 554 | list($ns, $el) = explode(':', $element, 2); |
||
| 555 | } |
||
| 556 | if ($ns and $ns != 'rdf') { |
||
| 557 | $this->current_namespace = $ns; |
||
| 558 | } |
||
| 559 | |||
| 560 | // if feed type isn't set, then this is first element of feed identify feed from root element |
||
| 561 | if (empty($this->_format)) { |
||
| 562 | if ($el == 'rdf') { |
||
| 563 | $this->_format = 'rss'; |
||
| 564 | $this->feed_version = '1.0'; |
||
| 565 | } elseif ($el == 'rss') { |
||
| 566 | $this->_format = 'rss'; |
||
| 567 | $this->feed_version = $attrs['version']; |
||
| 568 | } elseif ($el == 'feed') { |
||
| 569 | $this->_format = 'atom'; |
||
| 570 | $this->feed_version = $attrs['version']; |
||
| 571 | $this->inchannel = true; |
||
| 572 | } |
||
| 573 | return; |
||
| 574 | } |
||
| 575 | |||
| 576 | if ($el == 'channel') { |
||
| 577 | $this->inchannel = true; |
||
| 578 | } elseif ($el == 'item' || $el == 'entry') { |
||
| 579 | $this->initem = true; |
||
| 580 | if (isset($attrs['rdf:about'])) { |
||
| 581 | $this->current_item['about'] = $attrs['rdf:about']; |
||
| 582 | } |
||
| 583 | } elseif ($this->_format == 'rss' && $this->current_namespace == '' && $el == 'textinput') { |
||
| 584 | // if we're in the default namespace of an RSS feed, |
||
| 585 | // record textinput or image fields |
||
| 586 | $this->intextinput = true; |
||
| 587 | } elseif ($this->_format == 'rss' && $this->current_namespace == '' && $el == 'image') { |
||
| 588 | $this->inimage = true; |
||
| 589 | } elseif ($this->_format == 'atom' && in_array($el, $this->_CONTENT_CONSTRUCTS)) { |
||
| 590 | // handle atom content constructs |
||
| 591 | // avoid clashing w/ RSS mod_content |
||
| 592 | if ($el == 'content') { |
||
| 593 | $el = 'atom_content'; |
||
| 594 | } |
||
| 595 | |||
| 596 | $this->incontent = $el; |
||
| 597 | } elseif ($this->_format == 'atom' && $this->incontent) { |
||
| 598 | // if inside an Atom content construct (e.g. content or summary) field treat tags as text |
||
| 599 | // if tags are inlined, then flatten |
||
| 600 | $attrs_str = implode(' ', array_map('rss_map_attrs', array_keys($attrs), array_values($attrs))); |
||
| 601 | |||
| 602 | $this->append_content("<$element $attrs_str>"); |
||
| 603 | |||
| 604 | array_unshift($this->stack, $el); |
||
| 605 | } elseif ($this->_format == 'atom' && $el == 'link') { |
||
| 606 | // Atom support many links per containing element. |
||
| 607 | // Magpie treats link elements of type rel='alternate' |
||
| 608 | // as being equivalent to RSS's simple link element. |
||
| 609 | if (isset($attrs['rel']) && $attrs['rel'] == 'alternate') { |
||
| 610 | $link_el = 'link'; |
||
| 611 | } elseif (!isset($attrs['rel'])) { |
||
| 612 | $link_el = 'link'; |
||
| 613 | } else { |
||
| 614 | $link_el = 'link_' . $attrs['rel']; |
||
| 615 | } |
||
| 616 | |||
| 617 | $this->append($link_el, $attrs['href']); |
||
| 618 | } else { |
||
| 619 | // set stack[0] to current element |
||
| 620 | array_unshift($this->stack, $el); |
||
| 621 | } |
||
| 622 | } |
||
| 623 | |||
| 624 | |||
| 625 | // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps |
||
| 626 | /** |
||
| 627 | * Triggered when CDATA is found |
||
| 628 | * |
||
| 629 | * @param string $p P |
||
| 630 | * @param string $text Tag |
||
| 631 | * @return void |
||
| 632 | */ |
||
| 633 | public function feed_cdata($p, $text) |
||
| 634 | { |
||
| 635 | // phpcs:enable |
||
| 636 | if ($this->_format == 'atom' and $this->incontent) { |
||
| 637 | $this->append_content($text); |
||
| 638 | } else { |
||
| 639 | $current_el = implode('_', array_reverse($this->stack)); |
||
| 640 | $this->append($current_el, $text); |
||
| 641 | } |
||
| 642 | } |
||
| 643 | |||
| 644 | // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps |
||
| 645 | /** |
||
| 646 | * Triggered when closed tag is found |
||
| 647 | * |
||
| 648 | * @param string $p P |
||
| 649 | * @param string $el Tag |
||
| 650 | * @return void |
||
| 651 | */ |
||
| 652 | public function feed_end_element($p, $el) |
||
| 653 | { |
||
| 654 | // phpcs:enable |
||
| 655 | $el = strtolower($el); |
||
| 656 | |||
| 657 | if ($el == 'item' or $el == 'entry') { |
||
| 658 | $this->items[] = $this->current_item; |
||
| 659 | $this->current_item = array(); |
||
| 660 | $this->initem = false; |
||
| 661 | } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput') { |
||
| 662 | $this->intextinput = false; |
||
| 663 | } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'image') { |
||
| 664 | $this->inimage = false; |
||
| 665 | } elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS)) { |
||
| 666 | $this->incontent = false; |
||
| 667 | } elseif ($el == 'channel' or $el == 'feed') { |
||
| 668 | $this->inchannel = false; |
||
| 669 | } elseif ($this->_format == 'atom' and $this->incontent) { |
||
| 670 | // balance tags properly |
||
| 671 | // note: i don't think this is actually necessary |
||
| 672 | if ($this->stack[0] == $el) { |
||
| 673 | $this->append_content("</$el>"); |
||
| 674 | } else { |
||
| 675 | $this->append_content("<$el />"); |
||
| 676 | } |
||
| 677 | |||
| 678 | array_shift($this->stack); |
||
| 679 | } else { |
||
| 680 | array_shift($this->stack); |
||
| 681 | } |
||
| 682 | |||
| 683 | $this->current_namespace = false; |
||
| 684 | } |
||
| 685 | |||
| 686 | |||
| 687 | /** |
||
| 688 | * To concat 2 strings with no warning if an operand is not defined |
||
| 689 | * |
||
| 690 | * @param string $str1 Str1 |
||
| 691 | * @param string $str2 Str2 |
||
| 692 | * @return string String cancatenated |
||
| 693 | */ |
||
| 694 | public function concat(&$str1, $str2 = "") |
||
| 695 | { |
||
| 696 | if (!isset($str1)) { |
||
| 697 | $str1 = ""; |
||
| 698 | } |
||
| 699 | $str1 .= $str2; |
||
| 700 | return $str1; |
||
| 701 | } |
||
| 702 | |||
| 703 | // phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps |
||
| 704 | /** |
||
| 705 | * Enter description here ... |
||
| 706 | * |
||
| 707 | * @param string $text Text |
||
| 708 | * @return void |
||
| 709 | */ |
||
| 710 | public function append_content($text) |
||
| 711 | { |
||
| 712 | // phpcs:enable |
||
| 713 | if (!empty($this->initem)) { |
||
| 714 | $this->concat($this->current_item[$this->incontent], $text); |
||
| 715 | } elseif (!empty($this->inchannel)) { |
||
| 716 | $this->concat($this->channel[$this->incontent], $text); |
||
| 717 | } |
||
| 718 | } |
||
| 719 | |||
| 720 | /** |
||
| 721 | * smart append - field and namespace aware |
||
| 722 | * |
||
| 723 | * @param string $el El |
||
| 724 | * @param string $text Text |
||
| 725 | * @return void |
||
| 726 | */ |
||
| 727 | public function append($el, $text) |
||
| 728 | { |
||
| 729 | if (!$el) { |
||
| 730 | return; |
||
| 731 | } |
||
| 732 | if (!empty($this->current_namespace)) { |
||
| 733 | if (!empty($this->initem)) { |
||
| 734 | $this->concat($this->current_item[$this->current_namespace][$el], $text); |
||
| 735 | } elseif (!empty($this->inchannel)) { |
||
| 736 | $this->concat($this->channel[$this->current_namespace][$el], $text); |
||
| 737 | } elseif (!empty($this->intextinput)) { |
||
| 738 | $this->concat($this->textinput[$this->current_namespace][$el], $text); |
||
| 739 | } elseif (!empty($this->inimage)) { |
||
| 740 | $this->concat($this->image[$this->current_namespace][$el], $text); |
||
| 741 | } |
||
| 742 | } else { |
||
| 743 | if (!empty($this->initem)) { |
||
| 744 | $this->concat($this->current_item[$el], $text); |
||
| 745 | } elseif (!empty($this->intextinput)) { |
||
| 746 | $this->concat($this->textinput[$el], $text); |
||
| 747 | } elseif (!empty($this->inimage)) { |
||
| 748 | $this->concat($this->image[$el], $text); |
||
| 749 | } elseif (!empty($this->inchannel)) { |
||
| 750 | $this->concat($this->channel[$el], $text); |
||
| 751 | } |
||
| 752 | } |
||
| 753 | } |
||
| 754 | |||
| 755 | /** |
||
| 756 | * Return a description/summary for one item from a ATOM feed |
||
| 757 | * |
||
| 758 | * @param array $item A parsed item of a ATOM feed |
||
| 759 | * @param int $maxlength (optional) The maximum length for the description |
||
| 760 | * @return string A summary description |
||
| 761 | */ |
||
| 762 | private function getAtomItemDescription(array $item, $maxlength = 500) |
||
| 763 | { |
||
| 764 | $result = ""; |
||
| 765 | |||
| 766 | if (isset($item['summary'])) { |
||
| 767 | $result = $item['summary']; |
||
| 768 | } elseif (isset($item['atom_content'])) { |
||
| 769 | $result = $item['atom_content']; |
||
| 770 | } |
||
| 771 | |||
| 772 | // remove all HTML elements that can possible break the maximum size of a tooltip, |
||
| 773 | // like headings, image, video etc. and allow only simple style elements |
||
| 774 | $result = strip_tags($result, "<br><p><ul><ol><li>"); |
||
| 775 | |||
| 776 | $result = str_replace("\n", "", $result); |
||
| 777 | |||
| 778 | if (strlen($result) > $maxlength) { |
||
| 779 | $result = substr($result, 0, $maxlength); |
||
| 780 | $result .= "..."; |
||
| 781 | } |
||
| 782 | |||
| 783 | return $result; |
||
| 784 | } |
||
| 785 | |||
| 786 | /** |
||
| 787 | * Return a URL to a image of the given ATOM feed |
||
| 788 | * |
||
| 789 | * @param array $feed The ATOM feed that possible contain a link to a logo or icon |
||
| 790 | * @return string A URL to a image from a ATOM feed when found, otherwise a empty string |
||
| 791 | */ |
||
| 792 | private function getAtomImageUrl(array $feed) |
||
| 815 | } |
||
| 816 | } |
||
| 817 | |||
| 818 | /* |
||
| 819 | * A method for the xml_set_external_entity_ref_handler() |
||
| 820 | * |
||
| 821 | * @param XMLParser $parser |
||
| 822 | * @param string $ent |
||
| 823 | * @param string|false $base |
||
| 824 | * @param string $sysID |
||
| 825 | * @param string|false $pubID |
||
| 826 | * @return bool |
||
| 827 | function extEntHandler($parser, $ent, $base, $sysID, $pubID) { |
||
| 828 | print 'extEntHandler ran'; |
||
| 829 | return true; |
||
| 830 | } |
||
| 831 | */ |
||
| 832 | |||
| 890 |