|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* ownCloud - News |
|
4
|
|
|
* |
|
5
|
|
|
* This file is licensed under the Affero General Public License version 3 or |
|
6
|
|
|
* later. See the COPYING file. |
|
7
|
|
|
* |
|
8
|
|
|
* @author Alessandro Cosentino <[email protected]> |
|
9
|
|
|
* @author Bernhard Posselt <[email protected]> |
|
10
|
|
|
* @copyright Alessandro Cosentino 2012 |
|
11
|
|
|
* @copyright Bernhard Posselt 2012, 2014 |
|
12
|
|
|
*/ |
|
13
|
|
|
|
|
14
|
|
|
namespace OCA\News\Fetcher; |
|
15
|
|
|
|
|
16
|
|
|
use Exception; |
|
17
|
|
|
|
|
18
|
|
|
use PicoFeed\Parser\MalFormedXmlException; |
|
19
|
|
|
use PicoFeed\Reader\Reader; |
|
20
|
|
|
use PicoFeed\Parser\Parser; |
|
21
|
|
|
use PicoFeed\Reader\SubscriptionNotFoundException; |
|
22
|
|
|
use PicoFeed\Reader\UnsupportedFeedFormatException; |
|
23
|
|
|
use PicoFeed\Client\InvalidCertificateException; |
|
24
|
|
|
use PicoFeed\Client\InvalidUrlException; |
|
25
|
|
|
use PicoFeed\Client\MaxRedirectException; |
|
26
|
|
|
use PicoFeed\Client\MaxSizeException; |
|
27
|
|
|
use PicoFeed\Client\TimeoutException; |
|
28
|
|
|
use PicoFeed\Client\ForbiddenException; |
|
29
|
|
|
use PicoFeed\Client\UnauthorizedException; |
|
30
|
|
|
|
|
31
|
|
|
use OCP\IL10N; |
|
32
|
|
|
use OCP\AppFramework\Utility\ITimeFactory; |
|
33
|
|
|
|
|
34
|
|
|
use OCA\News\Db\Item; |
|
35
|
|
|
use OCA\News\Db\Feed; |
|
36
|
|
|
use OCA\News\Utility\PicoFeedFaviconFactory; |
|
37
|
|
|
use OCA\News\Utility\PicoFeedReaderFactory; |
|
38
|
|
|
|
|
39
|
|
|
class FeedFetcher implements IFeedFetcher { |
|
40
|
|
|
|
|
41
|
|
|
private $faviconFactory; |
|
42
|
|
|
private $reader; |
|
43
|
|
|
private $l10n; |
|
44
|
|
|
private $time; |
|
45
|
|
|
|
|
46
|
|
|
public function __construct(Reader $reader, |
|
47
|
|
|
PicoFeedFaviconFactory $faviconFactory, |
|
48
|
|
|
IL10N $l10n, |
|
49
|
|
|
ITimeFactory $time) { |
|
50
|
|
|
$this->faviconFactory = $faviconFactory; |
|
51
|
|
|
$this->reader = $reader; |
|
52
|
|
|
$this->time = $time; |
|
53
|
|
|
$this->l10n = $l10n; |
|
54
|
|
|
} |
|
55
|
|
|
|
|
56
|
|
|
|
|
57
|
|
|
/** |
|
58
|
|
|
* This fetcher handles all the remaining urls therefore always returns true |
|
59
|
|
|
*/ |
|
60
|
|
|
public function canHandle($url) { |
|
61
|
|
|
return true; |
|
62
|
|
|
} |
|
63
|
|
|
|
|
64
|
|
|
|
|
65
|
|
|
/** |
|
66
|
|
|
* Fetch a feed from remote |
|
67
|
|
|
* @param string $url remote url of the feed |
|
68
|
|
|
* @param boolean $getFavicon if the favicon should also be fetched, |
|
69
|
|
|
* defaults to true |
|
70
|
|
|
* @param string $lastModified a last modified value from an http header |
|
71
|
|
|
* defaults to false. If lastModified matches the http header from the feed |
|
72
|
|
|
* no results are fetched |
|
73
|
|
|
* @param string $etag an etag from an http header. |
|
74
|
|
|
* If lastModified matches the http header from the feed |
|
75
|
|
|
* no results are fetched |
|
76
|
|
|
* @param bool fullTextEnabled if true tells the fetcher to enhance the |
|
77
|
|
|
* articles by fetching custom enhanced content |
|
78
|
|
|
* @param string $basicAuthUser if given, basic auth is set for this feed |
|
79
|
|
|
* @param string $basicAuthPassword if given, basic auth is set for this |
|
80
|
|
|
* feed. Ignored if user is null or an empty string |
|
81
|
|
|
* @throws FetcherException if it fails |
|
82
|
|
|
* @return array an array containing the new feed and its items, first |
|
83
|
|
|
* element being the Feed and second element being an array of Items |
|
84
|
|
|
*/ |
|
85
|
|
|
public function fetch($url, $getFavicon = true, $lastModified = null, |
|
86
|
|
|
$etag = null, $fullTextEnabled = false, |
|
87
|
|
|
$basicAuthUser = null, $basicAuthPassword = null) { |
|
88
|
|
|
try { |
|
89
|
|
|
if ($basicAuthUser !== null && trim($basicAuthUser) !== '') { |
|
90
|
|
|
$resource = $this->reader->discover($url, $lastModified, $etag, |
|
91
|
|
|
$basicAuthUser, |
|
92
|
|
|
$basicAuthPassword); |
|
93
|
|
|
} else { |
|
94
|
|
|
$resource = $this->reader->discover($url, $lastModified, $etag); |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
if (!$resource->isModified()) { |
|
98
|
|
|
return [null, null]; |
|
99
|
|
|
} |
|
100
|
|
|
|
|
101
|
|
|
$location = $resource->getUrl(); |
|
102
|
|
|
$etag = $resource->getEtag(); |
|
103
|
|
|
$content = $resource->getContent(); |
|
104
|
|
|
$encoding = $resource->getEncoding(); |
|
105
|
|
|
$lastModified = $resource->getLastModified(); |
|
106
|
|
|
|
|
107
|
|
|
$parser = $this->reader->getParser($location, $content, $encoding); |
|
108
|
|
|
|
|
109
|
|
|
if ($fullTextEnabled) { |
|
110
|
|
|
$parser->enableContentGrabber(); |
|
111
|
|
|
} |
|
112
|
|
|
|
|
113
|
|
|
$parsedFeed = $parser->execute(); |
|
114
|
|
|
|
|
115
|
|
|
$feed = $this->buildFeed( |
|
116
|
|
|
$parsedFeed, $url, $getFavicon, $lastModified, $etag, $location |
|
117
|
|
|
); |
|
118
|
|
|
|
|
119
|
|
|
$items = []; |
|
120
|
|
|
foreach ($parsedFeed->getItems() as $item) { |
|
121
|
|
|
$items[] = $this->buildItem($item, $parsedFeed); |
|
122
|
|
|
} |
|
123
|
|
|
|
|
124
|
|
|
return [$feed, $items]; |
|
125
|
|
|
|
|
126
|
|
|
} catch (Exception $ex) { |
|
127
|
|
|
$this->handleError($ex, $url); |
|
128
|
|
|
} |
|
129
|
|
|
|
|
130
|
|
|
} |
|
131
|
|
|
|
|
132
|
|
|
|
|
133
|
|
|
private function handleError(Exception $ex, $url) { |
|
134
|
|
|
$msg = $ex->getMessage(); |
|
135
|
|
|
|
|
136
|
|
|
if ($ex instanceof MalFormedXmlException) { |
|
137
|
|
|
$msg = $this->l10n->t('Feed contains invalid XML'); |
|
138
|
|
|
} else if ($ex instanceof SubscriptionNotFoundException) { |
|
139
|
|
|
$msg = $this->l10n->t('Feed not found: either the website ' . |
|
140
|
|
|
'does not provide a feed or blocks access. To rule out ' . |
|
141
|
|
|
'blocking, try to download the feed on your server\'s ' . |
|
142
|
|
|
'command line using curl: curl ' . $url); |
|
143
|
|
|
} else if ($ex instanceof UnsupportedFeedFormatException) { |
|
144
|
|
|
$msg = $this->l10n->t('Detected feed format is not supported'); |
|
145
|
|
|
} else if ($ex instanceof InvalidCertificateException) { |
|
146
|
|
|
$msg = $this->buildCurlSslErrorMessage($ex->getCode()); |
|
147
|
|
|
} else if ($ex instanceof InvalidUrlException) { |
|
148
|
|
|
$msg = $this->l10n->t('Website not found'); |
|
149
|
|
|
} else if ($ex instanceof MaxRedirectException) { |
|
150
|
|
|
$msg = $this->l10n->t('More redirects than allowed, aborting'); |
|
151
|
|
|
} else if ($ex instanceof MaxSizeException) { |
|
152
|
|
|
$msg = $this->l10n->t('Bigger than maximum allowed size'); |
|
153
|
|
|
} else if ($ex instanceof TimeoutException) { |
|
154
|
|
|
$msg = $this->l10n->t('Request timed out'); |
|
155
|
|
|
} else if ($ex instanceof UnauthorizedException) { |
|
156
|
|
|
$msg = $this->l10n->t('Required credentials for feed were ' . |
|
157
|
|
|
'either missing or incorrect'); |
|
158
|
|
|
} else if ($ex instanceof ForbiddenException) { |
|
159
|
|
|
$msg = $this->l10n->t('Forbidden to access feed'); |
|
160
|
|
|
} |
|
161
|
|
|
|
|
162
|
|
|
throw new FetcherException($msg); |
|
163
|
|
|
} |
|
164
|
|
|
|
|
165
|
|
|
private function buildCurlSslErrorMessage($errorCode) { |
|
166
|
|
|
switch ($errorCode) { |
|
167
|
|
|
case 35: // CURLE_SSL_CONNECT_ERROR |
|
168
|
|
|
return $this->l10n->t( |
|
169
|
|
|
'Certificate error: A problem occurred ' . |
|
170
|
|
|
'somewhere in the SSL/TLS handshake. Could be ' . |
|
171
|
|
|
'certificates (file formats, paths, permissions), ' . |
|
172
|
|
|
'passwords, and others.' |
|
173
|
|
|
); |
|
174
|
|
|
case 51: // CURLE_PEER_FAILED_VERIFICATION |
|
175
|
|
|
return $this->l10n->t( |
|
176
|
|
|
'Certificate error: The remote server\'s SSL ' . |
|
177
|
|
|
'certificate or SSH md5 fingerprint was deemed not OK.' |
|
178
|
|
|
); |
|
179
|
|
|
case 58: // CURLE_SSL_CERTPROBLEM |
|
180
|
|
|
return $this->l10n->t( |
|
181
|
|
|
'Certificate error: Problem with the local client ' . |
|
182
|
|
|
'certificate.' |
|
183
|
|
|
); |
|
184
|
|
|
case 59: // CURLE_SSL_CIPHER |
|
185
|
|
|
return $this->l10n->t( |
|
186
|
|
|
'Certificate error: Couldn\'t use specified cipher.' |
|
187
|
|
|
); |
|
188
|
|
|
case 60: // CURLE_SSL_CACERT |
|
189
|
|
|
return $this->l10n->t( |
|
190
|
|
|
'Certificate error: Peer certificate cannot be ' . |
|
191
|
|
|
'authenticated with known CA certificates.' |
|
192
|
|
|
); |
|
193
|
|
|
case 64: // CURLE_USE_SSL_FAILED |
|
194
|
|
|
return $this->l10n->t( |
|
195
|
|
|
'Certificate error: Requested FTP SSL level failed.' |
|
196
|
|
|
); |
|
197
|
|
|
case 66: // CURLE_SSL_ENGINE_INITFAILED |
|
198
|
|
|
return $this->l10n->t( |
|
199
|
|
|
'Certificate error: Initiating the SSL Engine failed.' |
|
200
|
|
|
); |
|
201
|
|
|
case 77: // CURLE_SSL_CACERT_BADFILE |
|
202
|
|
|
return $this->l10n->t( |
|
203
|
|
|
'Certificate error: Problem with reading the SSL CA ' . |
|
204
|
|
|
'cert (path? access rights?)' |
|
205
|
|
|
); |
|
206
|
|
|
case 83: // CURLE_SSL_ISSUER_ERROR |
|
207
|
|
|
return $this->l10n->t( |
|
208
|
|
|
'Certificate error: Issuer check failed' |
|
209
|
|
|
); |
|
210
|
|
|
default: |
|
211
|
|
|
return $this->l10n->t('Unknown SSL certificate error!'); |
|
212
|
|
|
} |
|
213
|
|
|
} |
|
214
|
|
|
|
|
215
|
|
|
private function decodeTwice($string) { |
|
216
|
|
|
return html_entity_decode( |
|
217
|
|
|
html_entity_decode( |
|
218
|
|
|
$string, ENT_QUOTES | ENT_HTML5, 'UTF-8' |
|
219
|
|
|
), |
|
220
|
|
|
ENT_QUOTES | ENT_HTML5, 'UTF-8' |
|
221
|
|
|
); |
|
222
|
|
|
} |
|
223
|
|
|
|
|
224
|
|
|
|
|
225
|
|
|
protected function determineRtl($parsedItem, $parsedFeed) { |
|
226
|
|
|
$itemLang = $parsedItem->getLanguage(); |
|
227
|
|
|
$feedLang = $parsedFeed->getLanguage(); |
|
228
|
|
|
|
|
229
|
|
|
if ($itemLang) { |
|
230
|
|
|
return Parser::isLanguageRTL($itemLang); |
|
231
|
|
|
} else { |
|
232
|
|
|
return Parser::isLanguageRTL($feedLang); |
|
233
|
|
|
} |
|
234
|
|
|
} |
|
235
|
|
|
|
|
236
|
|
|
|
|
237
|
|
|
protected function buildItem($parsedItem, $parsedFeed) { |
|
238
|
|
|
$item = new Item(); |
|
239
|
|
|
$item->setUnread(); |
|
240
|
|
|
$item->setUrl($parsedItem->getUrl()); |
|
241
|
|
|
$item->setGuid($parsedItem->getId()); |
|
242
|
|
|
$item->setGuidHash($item->getGuid()); |
|
243
|
|
|
$item->setPubDate($parsedItem->getDate()->getTimestamp()); |
|
244
|
|
|
$item->setLastModified($this->time->getTime()); |
|
245
|
|
|
$item->setRtl($this->determineRtl($parsedItem, $parsedFeed)); |
|
246
|
|
|
|
|
247
|
|
|
// unescape content because angularjs helps against XSS |
|
248
|
|
|
$item->setTitle($this->decodeTwice($parsedItem->getTitle())); |
|
249
|
|
|
$item->setAuthor($this->decodeTwice($parsedItem->getAuthor())); |
|
250
|
|
|
|
|
251
|
|
|
// purification is done in the service layer |
|
252
|
|
|
$body = $parsedItem->getContent(); |
|
253
|
|
|
$body = mb_convert_encoding($body, 'HTML-ENTITIES', |
|
254
|
|
|
mb_detect_encoding($body)); |
|
255
|
|
|
$item->setBody($body); |
|
256
|
|
|
|
|
257
|
|
|
$enclosureUrl = $parsedItem->getEnclosureUrl(); |
|
258
|
|
|
if ($enclosureUrl) { |
|
259
|
|
|
$enclosureType = $parsedItem->getEnclosureType(); |
|
260
|
|
|
if (stripos($enclosureType, 'audio/') !== false || |
|
261
|
|
|
stripos($enclosureType, 'video/') !== false |
|
262
|
|
|
) { |
|
263
|
|
|
$item->setEnclosureMime($enclosureType); |
|
264
|
|
|
$item->setEnclosureLink($enclosureUrl); |
|
265
|
|
|
} |
|
266
|
|
|
} |
|
267
|
|
|
|
|
268
|
|
|
$item->generateSearchIndex(); |
|
269
|
|
|
|
|
270
|
|
|
return $item; |
|
271
|
|
|
} |
|
272
|
|
|
|
|
273
|
|
|
|
|
274
|
|
|
protected function buildFeed($parsedFeed, $url, $getFavicon, $modified, |
|
275
|
|
|
$etag, $location) { |
|
276
|
|
|
$feed = new Feed(); |
|
277
|
|
|
|
|
278
|
|
|
$link = $parsedFeed->getSiteUrl(); |
|
279
|
|
|
|
|
280
|
|
|
if (!$link) { |
|
281
|
|
|
$link = $location; |
|
282
|
|
|
} |
|
283
|
|
|
|
|
284
|
|
|
// unescape content because angularjs helps against XSS |
|
285
|
|
|
$title = strip_tags($this->decodeTwice($parsedFeed->getTitle())); |
|
286
|
|
|
$feed->setTitle($title); |
|
287
|
|
|
$feed->setUrl($url); // the url used to add the feed |
|
288
|
|
|
$feed->setLocation($location); // the url where the feed was found |
|
289
|
|
|
$feed->setLink($link); // <link> attribute in the feed |
|
290
|
|
|
$feed->setHttpLastModified($modified); |
|
291
|
|
|
$feed->setHttpEtag($etag); |
|
292
|
|
|
$feed->setAdded($this->time->getTime()); |
|
293
|
|
|
|
|
294
|
|
|
if ($getFavicon) { |
|
295
|
|
|
$faviconFetcher = $this->faviconFactory->build(); |
|
296
|
|
|
$favicon = $faviconFetcher->find($feed->getLink()); |
|
297
|
|
|
$feed->setFaviconLink($favicon); |
|
298
|
|
|
} |
|
299
|
|
|
|
|
300
|
|
|
return $feed; |
|
301
|
|
|
} |
|
302
|
|
|
|
|
303
|
|
|
} |
|
304
|
|
|
|