1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* ownCloud - News |
4
|
|
|
* |
5
|
|
|
* This file is licensed under the Affero General Public License version 3 or |
6
|
|
|
* later. See the COPYING file. |
7
|
|
|
* |
8
|
|
|
* @author Alessandro Cosentino <[email protected]> |
9
|
|
|
* @author Bernhard Posselt <[email protected]> |
10
|
|
|
* @copyright Alessandro Cosentino 2012 |
11
|
|
|
* @copyright Bernhard Posselt 2012, 2014 |
12
|
|
|
*/ |
13
|
|
|
|
14
|
|
|
namespace OCA\News\Fetcher; |
15
|
|
|
|
16
|
|
|
use PicoFeed\Parser\MalFormedXmlException; |
17
|
|
|
use PicoFeed\Reader\Reader; |
18
|
|
|
use PicoFeed\Parser\Parser; |
19
|
|
|
use PicoFeed\Reader\SubscriptionNotFoundException; |
20
|
|
|
use PicoFeed\Reader\UnsupportedFeedFormatException; |
21
|
|
|
use PicoFeed\Client\InvalidCertificateException; |
22
|
|
|
use PicoFeed\Client\InvalidUrlException; |
23
|
|
|
use PicoFeed\Client\MaxRedirectException; |
24
|
|
|
use PicoFeed\Client\MaxSizeException; |
25
|
|
|
use PicoFeed\Client\TimeoutException; |
26
|
|
|
use PicoFeed\Client\ForbiddenException; |
27
|
|
|
use PicoFeed\Client\UnauthorizedException; |
28
|
|
|
|
29
|
|
|
use OCP\IL10N; |
30
|
|
|
use OCP\AppFramework\Utility\ITimeFactory; |
31
|
|
|
|
32
|
|
|
use OCA\News\Db\Item; |
33
|
|
|
use OCA\News\Db\Feed; |
34
|
|
|
use OCA\News\Utility\PicoFeedFaviconFactory; |
35
|
|
|
use OCA\News\Utility\PicoFeedReaderFactory; |
36
|
|
|
|
37
|
|
|
class FeedFetcher implements IFeedFetcher { |
38
|
|
|
|
39
|
|
|
private $faviconFactory; |
40
|
|
|
private $reader; |
41
|
|
|
private $l10n; |
42
|
|
|
private $time; |
43
|
|
|
|
44
|
|
|
public function __construct(Reader $reader, |
45
|
|
|
PicoFeedFaviconFactory $faviconFactory, |
46
|
|
|
IL10N $l10n, |
47
|
|
|
ITimeFactory $time){ |
48
|
|
|
$this->faviconFactory = $faviconFactory; |
49
|
|
|
$this->reader = $reader; |
50
|
|
|
$this->time = $time; |
51
|
|
|
$this->l10n = $l10n; |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* This fetcher handles all the remaining urls therefore always returns true |
57
|
|
|
*/ |
58
|
|
|
public function canHandle($url){ |
59
|
|
|
return true; |
60
|
|
|
} |
61
|
|
|
|
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* Fetch a feed from remote |
65
|
|
|
* @param string $url remote url of the feed |
66
|
|
|
* @param boolean $getFavicon if the favicon should also be fetched, |
67
|
|
|
* defaults to true |
68
|
|
|
* @param string $lastModified a last modified value from an http header |
69
|
|
|
* defaults to false. If lastModified matches the http header from the feed |
70
|
|
|
* no results are fetched |
71
|
|
|
* @param string $etag an etag from an http header. |
72
|
|
|
* If lastModified matches the http header from the feed |
73
|
|
|
* no results are fetched |
74
|
|
|
* @param bool fullTextEnabled if true tells the fetcher to enhance the |
75
|
|
|
* articles by fetching custom enhanced content |
76
|
|
|
* @param string $basicAuthUser if given, basic auth is set for this feed |
77
|
|
|
* @param string $basicAuthPassword if given, basic auth is set for this |
78
|
|
|
* feed. Ignored if user is null or an empty string |
79
|
|
|
* @throws FetcherException if it fails |
80
|
|
|
* @return array an array containing the new feed and its items, first |
81
|
|
|
* element being the Feed and second element being an array of Items |
82
|
|
|
*/ |
83
|
|
|
public function fetch($url, $getFavicon=true, $lastModified=null, |
84
|
|
|
$etag=null, $fullTextEnabled=false, |
85
|
|
|
$basicAuthUser=null, $basicAuthPassword=null) { |
86
|
|
|
try { |
87
|
|
|
if ($basicAuthUser !== null && trim($basicAuthUser) !== '') { |
88
|
|
|
$resource = $this->reader->discover($url, $lastModified, $etag, |
89
|
|
|
$basicAuthUser, |
90
|
|
|
$basicAuthPassword); |
91
|
|
|
} else { |
92
|
|
|
$resource = $this->reader->discover($url, $lastModified, $etag); |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
if (!$resource->isModified()) { |
96
|
|
|
return [null, null]; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
$location = $resource->getUrl(); |
100
|
|
|
$etag = $resource->getEtag(); |
101
|
|
|
$content = $resource->getContent(); |
102
|
|
|
$encoding = $resource->getEncoding(); |
103
|
|
|
$lastModified = $resource->getLastModified(); |
104
|
|
|
|
105
|
|
|
$parser = $this->reader->getParser($location, $content, $encoding); |
106
|
|
|
|
107
|
|
|
if ($fullTextEnabled) { |
108
|
|
|
$parser->enableContentGrabber(); |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
$parsedFeed = $parser->execute(); |
112
|
|
|
|
113
|
|
|
$feed = $this->buildFeed( |
114
|
|
|
$parsedFeed, $url, $getFavicon, $lastModified, $etag, $location |
115
|
|
|
); |
116
|
|
|
|
117
|
|
|
$items = []; |
118
|
|
|
foreach($parsedFeed->getItems() as $item) { |
119
|
|
|
$items[] = $this->buildItem($item, $parsedFeed); |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
return [$feed, $items]; |
123
|
|
|
|
124
|
|
|
} catch(\Exception $ex){ |
125
|
|
|
$msg = $ex->getMessage(); |
126
|
|
|
|
127
|
|
|
if ($ex instanceof MalFormedXmlException) { |
128
|
|
|
$msg = $this->l10n->t('Feed contains invalid XML'); |
129
|
|
|
} else if ($ex instanceof SubscriptionNotFoundException) { |
130
|
|
|
$msg = $this->l10n->t('Feed not found: either the website ' . |
131
|
|
|
'does not provide a feed or blocks access. To rule out ' . |
132
|
|
|
'blocking, try to download the feed on your server\'s ' . |
133
|
|
|
'command line using curl: curl http://the-feed.tld'); |
134
|
|
|
} else if ($ex instanceof UnsupportedFeedFormatException) { |
135
|
|
|
$msg = $this->l10n->t('Detected feed format is not supported'); |
136
|
|
|
} else if ($ex instanceof InvalidCertificateException) { |
137
|
|
|
$msg = $this->l10n->t('SSL Certificate is invalid'); |
138
|
|
|
} else if ($ex instanceof InvalidUrlException) { |
139
|
|
|
$msg = $this->l10n->t('Website not found'); |
140
|
|
|
} else if ($ex instanceof MaxRedirectException) { |
141
|
|
|
$msg = $this->l10n->t('More redirects than allowed, aborting'); |
142
|
|
|
} else if ($ex instanceof MaxSizeException) { |
143
|
|
|
$msg = $this->l10n->t('Bigger than maximum allowed size'); |
144
|
|
|
} else if ($ex instanceof TimeoutException) { |
145
|
|
|
$msg = $this->l10n->t('Request timed out'); |
146
|
|
|
} else if ($ex instanceof UnauthorizedException) { |
|
|
|
|
147
|
|
|
$msg = $this->l10n->t('Required credentials for feed were ' . |
148
|
|
|
'either missing or incorrect'); |
149
|
|
|
} else if ($ex instanceof ForbiddenException) { |
|
|
|
|
150
|
|
|
$msg = $this->l10n->t('Forbidden to access feed'); |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
throw new FetcherException($msg); |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
|
159
|
|
|
private function decodeTwice($string) { |
160
|
|
|
return html_entity_decode( |
161
|
|
|
html_entity_decode( |
162
|
|
|
$string, ENT_QUOTES | ENT_HTML5, 'UTF-8' |
163
|
|
|
), |
164
|
|
|
ENT_QUOTES | ENT_HTML5, 'UTF-8' |
165
|
|
|
); |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
|
169
|
|
|
protected function determineRtl($parsedItem, $parsedFeed) { |
170
|
|
|
$itemLang = $parsedItem->getLanguage(); |
171
|
|
|
$feedLang = $parsedFeed->getLanguage(); |
172
|
|
|
|
173
|
|
|
if ($itemLang) { |
174
|
|
|
return Parser::isLanguageRTL($itemLang); |
175
|
|
|
} else { |
176
|
|
|
return Parser::isLanguageRTL($feedLang); |
177
|
|
|
} |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
|
181
|
|
|
protected function buildItem($parsedItem, $parsedFeed) { |
182
|
|
|
$item = new Item(); |
183
|
|
|
$item->setUnread(); |
184
|
|
|
$item->setUrl($parsedItem->getUrl()); |
185
|
|
|
$item->setGuid($parsedItem->getId()); |
186
|
|
|
$item->setGuidHash($item->getGuid()); |
187
|
|
|
$item->setPubDate($parsedItem->getDate()->getTimestamp()); |
188
|
|
|
$item->setLastModified($this->time->getTime()); |
189
|
|
|
$item->setRtl($this->determineRtl($parsedItem, $parsedFeed)); |
190
|
|
|
|
191
|
|
|
// unescape content because angularjs helps against XSS |
192
|
|
|
$item->setTitle($this->decodeTwice($parsedItem->getTitle())); |
193
|
|
|
$item->setAuthor($this->decodeTwice($parsedItem->getAuthor())); |
194
|
|
|
|
195
|
|
|
// purification is done in the service layer |
196
|
|
|
$body = $parsedItem->getContent(); |
197
|
|
|
$body = mb_convert_encoding($body, 'HTML-ENTITIES', |
198
|
|
|
mb_detect_encoding($body)); |
199
|
|
|
$item->setBody($body); |
200
|
|
|
|
201
|
|
|
$enclosureUrl = $parsedItem->getEnclosureUrl(); |
202
|
|
|
if($enclosureUrl) { |
203
|
|
|
$enclosureType = $parsedItem->getEnclosureType(); |
204
|
|
|
if(stripos($enclosureType, 'audio/') !== false || |
205
|
|
|
stripos($enclosureType, 'video/') !== false) { |
206
|
|
|
$item->setEnclosureMime($enclosureType); |
207
|
|
|
$item->setEnclosureLink($enclosureUrl); |
208
|
|
|
} |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
$item->generateSearchIndex(); |
212
|
|
|
|
213
|
|
|
return $item; |
214
|
|
|
} |
215
|
|
|
|
216
|
|
|
|
217
|
|
|
protected function buildFeed($parsedFeed, $url, $getFavicon, $modified, |
218
|
|
|
$etag, $location) { |
219
|
|
|
$feed = new Feed(); |
220
|
|
|
|
221
|
|
|
$link = $parsedFeed->getSiteUrl(); |
222
|
|
|
|
223
|
|
|
if (!$link) { |
224
|
|
|
$link = $location; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
// unescape content because angularjs helps against XSS |
228
|
|
|
$title = strip_tags($this->decodeTwice($parsedFeed->getTitle())); |
229
|
|
|
$feed->setTitle($title); |
230
|
|
|
$feed->setUrl($url); // the url used to add the feed |
231
|
|
|
$feed->setLocation($location); // the url where the feed was found |
232
|
|
|
$feed->setLink($link); // <link> attribute in the feed |
233
|
|
|
$feed->setLastModified($modified); |
234
|
|
|
$feed->setEtag($etag); |
235
|
|
|
$feed->setAdded($this->time->getTime()); |
236
|
|
|
|
237
|
|
|
if ($getFavicon) { |
238
|
|
|
$faviconFetcher = $this->faviconFactory->build(); |
239
|
|
|
$favicon = $faviconFetcher->find($feed->getLink()); |
240
|
|
|
$feed->setFaviconLink($favicon); |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
return $feed; |
244
|
|
|
} |
245
|
|
|
|
246
|
|
|
} |
247
|
|
|
|
This error could be the result of:
1. Missing dependencies
PHP Analyzer uses your
composer.json
file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects thecomposer.json
to be in the root folder of your repository.Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the
require
orrequire-dev
section?2. Missing use statement
PHP does not complain about undefined classes in
ìnstanceof
checks. For example, the following PHP code will work perfectly fine:If you have not tested against this specific condition, such errors might go unnoticed.