codysnider /
tt-rss
| 1 | <?php |
||
| 2 | abstract class FeedItem_Common extends FeedItem { |
||
| 3 | protected $elem; |
||
| 4 | protected $xpath; |
||
| 5 | protected $doc; |
||
| 6 | |||
| 7 | public function __construct($elem, $doc, $xpath) { |
||
| 8 | $this->elem = $elem; |
||
| 9 | $this->xpath = $xpath; |
||
| 10 | $this->doc = $doc; |
||
| 11 | |||
| 12 | try { |
||
| 13 | |||
| 14 | $source = $elem->getElementsByTagName("source")->item(0); |
||
| 15 | |||
| 16 | // we don't need <source> element |
||
| 17 | if ($source) { |
||
| 18 | $elem->removeChild($source); |
||
| 19 | } |
||
| 20 | } catch (DOMException $e) { |
||
| 21 | // |
||
| 22 | } |
||
| 23 | } |
||
| 24 | |||
| 25 | public function get_element() { |
||
| 26 | return $this->elem; |
||
| 27 | } |
||
| 28 | |||
| 29 | public function get_author() { |
||
| 30 | $author = $this->elem->getElementsByTagName("author")->item(0); |
||
| 31 | |||
| 32 | if ($author) { |
||
| 33 | $name = $author->getElementsByTagName("name")->item(0); |
||
| 34 | |||
| 35 | if ($name) { |
||
| 36 | return clean($name->nodeValue); |
||
| 37 | } |
||
| 38 | |||
| 39 | $email = $author->getElementsByTagName("email")->item(0); |
||
| 40 | |||
| 41 | if ($email) { |
||
| 42 | return clean($email->nodeValue); |
||
| 43 | } |
||
| 44 | |||
| 45 | if ($author->nodeValue) { |
||
| 46 | return clean($author->nodeValue); |
||
| 47 | } |
||
| 48 | } |
||
| 49 | |||
| 50 | $author_elems = $this->xpath->query("dc:creator", $this->elem); |
||
| 51 | $authors = []; |
||
| 52 | |||
| 53 | foreach ($author_elems as $author) { |
||
| 54 | array_push($authors, clean($author->nodeValue)); |
||
| 55 | } |
||
| 56 | |||
| 57 | return implode(", ", $authors); |
||
| 58 | } |
||
| 59 | |||
| 60 | public function get_comments_url() { |
||
| 61 | //RSS only. Use a query here to avoid namespace clashes (e.g. with slash). |
||
| 62 | //might give a wrong result if a default namespace was declared (possible with XPath 2.0) |
||
| 63 | $com_url = $this->xpath->query("comments", $this->elem)->item(0); |
||
| 64 | |||
| 65 | if ($com_url) { |
||
| 66 | return clean($com_url->nodeValue); |
||
| 67 | } |
||
| 68 | |||
| 69 | //Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common. |
||
| 70 | //'text/html' for type is too restrictive? |
||
| 71 | $com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0); |
||
| 72 | |||
| 73 | if ($com_url) { |
||
| 74 | return clean($com_url->nodeValue); |
||
| 75 | } |
||
| 76 | } |
||
| 77 | |||
| 78 | public function get_comments_count() { |
||
| 79 | //also query for ATE stuff here |
||
| 80 | $query = "slash:comments|thread:total|atom:link[@rel='replies']/@thread:count"; |
||
| 81 | $comments = $this->xpath->query($query, $this->elem)->item(0); |
||
| 82 | |||
| 83 | if ($comments) { |
||
| 84 | return clean($comments->nodeValue); |
||
| 85 | } |
||
| 86 | } |
||
| 87 | |||
| 88 | // this is common for both Atom and RSS types and deals with various media: elements |
||
| 89 | public function get_enclosures() { |
||
| 90 | $encs = []; |
||
| 91 | |||
| 92 | $enclosures = $this->xpath->query("media:content", $this->elem); |
||
| 93 | |||
| 94 | foreach ($enclosures as $enclosure) { |
||
| 95 | $enc = new FeedEnclosure(); |
||
| 96 | |||
| 97 | $enc->type = clean($enclosure->getAttribute("type")); |
||
| 98 | $enc->link = clean($enclosure->getAttribute("url")); |
||
| 99 | $enc->length = clean($enclosure->getAttribute("length")); |
||
| 100 | $enc->height = clean($enclosure->getAttribute("height")); |
||
| 101 | $enc->width = clean($enclosure->getAttribute("width")); |
||
| 102 | |||
| 103 | $medium = clean($enclosure->getAttribute("medium")); |
||
| 104 | if (!$enc->type && $medium) { |
||
| 105 | $enc->type = strtolower("$medium/generic"); |
||
| 106 | } |
||
| 107 | |||
| 108 | $desc = $this->xpath->query("media:description", $enclosure)->item(0); |
||
| 109 | if ($desc) { |
||
| 110 | $enc->title = clean($desc->nodeValue); |
||
| 111 | } |
||
| 112 | |||
| 113 | array_push($encs, $enc); |
||
| 114 | } |
||
| 115 | |||
| 116 | $enclosures = $this->xpath->query("media:group", $this->elem); |
||
| 117 | |||
| 118 | foreach ($enclosures as $enclosure) { |
||
| 119 | $enc = new FeedEnclosure(); |
||
| 120 | |||
| 121 | $content = $this->xpath->query("media:content", $enclosure)->item(0); |
||
| 122 | |||
| 123 | if ($content) { |
||
| 124 | $enc->type = clean($content->getAttribute("type")); |
||
| 125 | $enc->link = clean($content->getAttribute("url")); |
||
| 126 | $enc->length = clean($content->getAttribute("length")); |
||
| 127 | $enc->height = clean($content->getAttribute("height")); |
||
| 128 | $enc->width = clean($content->getAttribute("width")); |
||
| 129 | |||
| 130 | $medium = clean($content->getAttribute("medium")); |
||
| 131 | if (!$enc->type && $medium) { |
||
| 132 | $enc->type = strtolower("$medium/generic"); |
||
| 133 | } |
||
| 134 | |||
| 135 | $desc = $this->xpath->query("media:description", $content)->item(0); |
||
| 136 | if ($desc) { |
||
| 137 | $enc->title = clean($desc->nodeValue); |
||
| 138 | } else { |
||
| 139 | $desc = $this->xpath->query("media:description", $enclosure)->item(0); |
||
| 140 | if ($desc) { |
||
| 141 | $enc->title = clean($desc->nodeValue); |
||
| 142 | } |
||
| 143 | } |
||
| 144 | |||
| 145 | array_push($encs, $enc); |
||
| 146 | } |
||
| 147 | } |
||
| 148 | |||
| 149 | $enclosures = $this->xpath->query("media:thumbnail", $this->elem); |
||
| 150 | |||
| 151 | foreach ($enclosures as $enclosure) { |
||
| 152 | $enc = new FeedEnclosure(); |
||
| 153 | |||
| 154 | $enc->type = "image/generic"; |
||
| 155 | $enc->link = clean($enclosure->getAttribute("url")); |
||
| 156 | $enc->height = clean($enclosure->getAttribute("height")); |
||
| 157 | $enc->width = clean($enclosure->getAttribute("width")); |
||
| 158 | |||
| 159 | array_push($encs, $enc); |
||
| 160 | } |
||
| 161 | |||
| 162 | return $encs; |
||
| 163 | } |
||
| 164 | |||
| 165 | public function count_children($node) { |
||
| 166 | return $node->getElementsByTagName("*")->length; |
||
| 167 | } |
||
| 168 | |||
| 169 | public function subtree_or_text($node) { |
||
| 170 | if ($this->count_children($node) == 0) { |
||
| 171 | return $node->nodeValue; |
||
| 172 | } else { |
||
| 173 | return $node->c14n(); |
||
| 174 | } |
||
| 175 | } |
||
| 176 | |||
| 177 | public static function normalize_categories($cats) { |
||
| 178 | |||
| 179 | $tmp = []; |
||
| 180 | |||
| 181 | foreach ($cats as $rawcat) { |
||
| 182 | $tmp = array_merge($tmp, explode(",", $rawcat)); |
||
| 183 | } |
||
| 184 | |||
| 185 | $tmp = array_map(function($srccat) { |
||
| 186 | $cat = clean(trim(mb_strtolower($srccat))); |
||
| 187 | |||
| 188 | // we don't support numeric tags |
||
| 189 | if (is_numeric($cat)) { |
||
| 190 | $cat = 't:'.$cat; |
||
| 191 | } |
||
| 192 | |||
| 193 | $cat = preg_replace('/[,\'\"]/', "", $cat); |
||
| 194 | |||
| 195 | if (DB_TYPE == "mysql") { |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 196 | $cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat); |
||
| 197 | } |
||
| 198 | |||
| 199 | if (mb_strlen($cat) > 250) { |
||
| 200 | $cat = mb_substr($cat, 0, 250); |
||
| 201 | } |
||
| 202 | |||
| 203 | return $cat; |
||
| 204 | }, $tmp); |
||
| 205 | |||
| 206 | asort($tmp); |
||
| 207 | |||
| 208 | return array_unique($tmp); |
||
| 209 | } |
||
| 210 | } |
||
| 211 |