FeedParser::get_link()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 2
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
c 0
b 0
f 0
nc 1
nop 0
dl 0
loc 2
rs 10
1
<?php
2
class FeedParser {
3
    private $doc;
4
    private $error;
5
    private $libxml_errors = array();
6
    private $items;
7
    private $link;
8
    private $title;
9
    private $type;
10
    private $xpath;
11
12
    const FEED_RDF = 0;
13
    const FEED_RSS = 1;
14
    const FEED_ATOM = 2;
15
16
    public function __construct($data) {
17
        libxml_use_internal_errors(true);
18
        libxml_clear_errors();
19
        $this->doc = new DOMDocument();
20
        $this->doc->loadXML($data);
21
22
        mb_substitute_character("none");
23
24
        $error = libxml_get_last_error();
25
26
        if ($error) {
0 ignored issues
show
introduced by
$error is of type LibXMLError, thus it always evaluated to true.
Loading history...
27
            foreach (libxml_get_errors() as $error) {
28
                if ($error->level == LIBXML_ERR_FATAL) {
29
                    if (!isset($this->error)) {
30
                        //currently only the first error is reported
31
                        $this->error = $this->format_error($error);
32
                    }
33
                    $this->libxml_errors [] = $this->format_error($error);
34
                }
35
            }
36
        }
37
        libxml_clear_errors();
38
39
        $this->items = array();
40
    }
41
42
    public function init() {
43
        $xpath = new DOMXPath($this->doc);
44
        $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom');
45
        $xpath->registerNamespace('atom03', 'http://purl.org/atom/ns#');
46
        $xpath->registerNamespace('media', 'http://search.yahoo.com/mrss/');
47
        $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
48
        $xpath->registerNamespace('slash', 'http://purl.org/rss/1.0/modules/slash/');
49
        $xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/');
50
        $xpath->registerNamespace('content', 'http://purl.org/rss/1.0/modules/content/');
51
        $xpath->registerNamespace('thread', 'http://purl.org/syndication/thread/1.0');
52
53
        $this->xpath = $xpath;
54
55
        $root = $xpath->query("(//atom03:feed|//atom:feed|//channel|//rdf:rdf|//rdf:RDF)");
56
57
        if ($root && $root->length > 0) {
58
            $root = $root->item(0);
59
60
            if ($root) {
61
                switch (mb_strtolower($root->tagName)) {
62
                case "rdf:rdf":
63
                    $this->type = $this::FEED_RDF;
64
                    break;
65
                case "channel":
66
                    $this->type = $this::FEED_RSS;
67
                    break;
68
                case "feed":
69
                case "atom:feed":
70
                    $this->type = $this::FEED_ATOM;
71
                    break;
72
                default:
73
                    if (!isset($this->error)) {
74
                        $this->error = "Unknown/unsupported feed type";
75
                    }
76
                    return;
77
                }
78
            }
79
80
            switch ($this->type) {
81
            case $this::FEED_ATOM:
82
83
                $title = $xpath->query("//atom:feed/atom:title")->item(0);
84
85
                if (!$title) {
86
                                    $title = $xpath->query("//atom03:feed/atom03:title")->item(0);
87
                }
88
89
90
                if ($title) {
91
                    $this->title = $title->nodeValue;
92
                }
93
94
                $link = $xpath->query("//atom:feed/atom:link[not(@rel)]")->item(0);
95
96
                if (!$link) {
97
                                    $link = $xpath->query("//atom:feed/atom:link[@rel='alternate']")->item(0);
98
                }
99
100
                if (!$link) {
101
                                    $link = $xpath->query("//atom03:feed/atom03:link[not(@rel)]")->item(0);
102
                }
103
104
                if (!$link) {
105
                                    $link = $xpath->query("//atom03:feed/atom03:link[@rel='alternate']")->item(0);
106
                }
107
108
                if ($link && $link->hasAttributes()) {
109
                    $this->link = $link->getAttribute("href");
110
                }
111
112
                $articles = $xpath->query("//atom:entry");
113
114
                if (!$articles || $articles->length == 0) {
115
                                    $articles = $xpath->query("//atom03:entry");
116
                }
117
118
                foreach ($articles as $article) {
119
                    array_push($this->items, new FeedItem_Atom($article, $this->doc, $this->xpath));
120
                }
121
122
                break;
123
            case $this::FEED_RSS:
124
                $title = $xpath->query("//channel/title")->item(0);
125
126
                if ($title) {
127
                    $this->title = $title->nodeValue;
128
                }
129
130
                $link = $xpath->query("//channel/link")->item(0);
131
132
                if ($link) {
133
                    if ($link->getAttribute("href")) {
134
                                            $this->link = $link->getAttribute("href");
135
                    } else if ($link->nodeValue) {
136
                                            $this->link = $link->nodeValue;
137
                    }
138
                }
139
140
                $articles = $xpath->query("//channel/item");
141
142
                foreach ($articles as $article) {
143
                    array_push($this->items, new FeedItem_RSS($article, $this->doc, $this->xpath));
144
                }
145
146
                break;
147
            case $this::FEED_RDF:
148
                $xpath->registerNamespace('rssfake', 'http://purl.org/rss/1.0/');
149
150
                $title = $xpath->query("//rssfake:channel/rssfake:title")->item(0);
151
152
                if ($title) {
153
                    $this->title = $title->nodeValue;
154
                }
155
156
                $link = $xpath->query("//rssfake:channel/rssfake:link")->item(0);
157
158
                if ($link) {
159
                    $this->link = $link->nodeValue;
160
                }
161
162
                $articles = $xpath->query("//rssfake:item");
163
164
                foreach ($articles as $article) {
165
                    array_push($this->items, new FeedItem_RSS($article, $this->doc, $this->xpath));
166
                }
167
168
                break;
169
170
            }
171
172
            if ($this->title) {
173
                $this->title = trim($this->title);
174
            }
175
            if ($this->link) {
176
                $this->link = trim($this->link);
177
            }
178
179
        } else {
180
            if (!isset($this->error)) {
181
                $this->error = "Unknown/unsupported feed type";
182
            }
183
            return;
184
        }
185
    }
186
187
    public function format_error($error) {
188
        if ($error) {
189
            return sprintf("LibXML error %s at line %d (column %d): %s",
190
                $error->code, $error->line, $error->column,
191
                $error->message);
192
        } else {
193
            return "";
194
        }
195
    }
196
197
    // libxml may have invalid unicode data in error messages
198
    public function error() {
199
        return UConverter::transcode($this->error, 'UTF-8', 'UTF-8');
200
    }
201
202
    // WARNING: may return invalid unicode data
203
    public function errors() {
204
        return $this->libxml_errors;
205
    }
206
207
    public function get_link() {
208
        return clean($this->link);
209
    }
210
211
    public function get_title() {
212
        return clean($this->title);
213
    }
214
215
    public function get_items() {
216
        return $this->items;
217
    }
218
219
    public function get_links($rel) {
220
        $rv = array();
221
222
        switch ($this->type) {
223
        case $this::FEED_ATOM:
224
            $links = $this->xpath->query("//atom:feed/atom:link");
225
226
            foreach ($links as $link) {
227
                if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
228
                    array_push($rv, clean(trim($link->getAttribute('href'))));
229
                }
230
            }
231
            break;
232
        case $this::FEED_RSS:
233
            $links = $this->xpath->query("//atom:link");
234
235
            foreach ($links as $link) {
236
                if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
237
                    array_push($rv, clean(trim($link->getAttribute('href'))));
238
                }
239
            }
240
            break;
241
        }
242
243
        return $rv;
244
    }
245
}
246