1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* ownCloud - News |
4
|
|
|
* |
5
|
|
|
* This file is licensed under the Affero General Public License version 3 or |
6
|
|
|
* later. See the COPYING file. |
7
|
|
|
* |
8
|
|
|
* @author Alessandro Cosentino <[email protected]> |
9
|
|
|
* @author Bernhard Posselt <[email protected]> |
10
|
|
|
* @copyright Alessandro Cosentino 2012 |
11
|
|
|
* @copyright Bernhard Posselt 2012, 2014 |
12
|
|
|
*/ |
13
|
|
|
|
14
|
|
|
namespace OCA\News\Fetcher; |
15
|
|
|
|
16
|
|
|
use Exception; |
17
|
|
|
|
18
|
|
|
use PicoFeed\Parser\MalFormedXmlException; |
19
|
|
|
use PicoFeed\Reader\Reader; |
20
|
|
|
use PicoFeed\Parser\Parser; |
21
|
|
|
use PicoFeed\Reader\SubscriptionNotFoundException; |
22
|
|
|
use PicoFeed\Reader\UnsupportedFeedFormatException; |
23
|
|
|
use PicoFeed\Client\InvalidCertificateException; |
24
|
|
|
use PicoFeed\Client\InvalidUrlException; |
25
|
|
|
use PicoFeed\Client\MaxRedirectException; |
26
|
|
|
use PicoFeed\Client\MaxSizeException; |
27
|
|
|
use PicoFeed\Client\TimeoutException; |
28
|
|
|
use PicoFeed\Client\ForbiddenException; |
29
|
|
|
use PicoFeed\Client\UnauthorizedException; |
30
|
|
|
|
31
|
|
|
use OCP\IL10N; |
32
|
|
|
use OCP\AppFramework\Utility\ITimeFactory; |
33
|
|
|
|
34
|
|
|
use OCA\News\Db\Item; |
35
|
|
|
use OCA\News\Db\Feed; |
36
|
|
|
use OCA\News\Utility\PicoFeedFaviconFactory; |
37
|
|
|
use OCA\News\Utility\PicoFeedReaderFactory; |
38
|
|
|
|
39
|
|
|
class FeedFetcher implements IFeedFetcher { |
40
|
|
|
|
41
|
|
|
private $faviconFactory; |
42
|
|
|
private $reader; |
43
|
|
|
private $l10n; |
44
|
11 |
|
private $time; |
45
|
|
|
|
46
|
|
|
public function __construct(Reader $reader, |
47
|
|
|
PicoFeedFaviconFactory $faviconFactory, |
48
|
11 |
|
IL10N $l10n, |
49
|
11 |
|
ITimeFactory $time) { |
50
|
11 |
|
$this->faviconFactory = $faviconFactory; |
51
|
11 |
|
$this->reader = $reader; |
52
|
11 |
|
$this->time = $time; |
53
|
|
|
$this->l10n = $l10n; |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
|
57
|
|
|
/** |
58
|
1 |
|
* This fetcher handles all the remaining urls therefore always returns true |
59
|
1 |
|
*/ |
60
|
|
|
public function canHandle($url) { |
61
|
|
|
return true; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* Fetch a feed from remote |
67
|
|
|
* @param string $url remote url of the feed |
68
|
|
|
* @param boolean $getFavicon if the favicon should also be fetched, |
69
|
|
|
* defaults to true |
70
|
|
|
* @param string $lastModified a last modified value from an http header |
71
|
|
|
* defaults to false. If lastModified matches the http header from the feed |
72
|
|
|
* no results are fetched |
73
|
|
|
* @param string $etag an etag from an http header. |
74
|
|
|
* If lastModified matches the http header from the feed |
75
|
|
|
* no results are fetched |
76
|
|
|
* @param bool fullTextEnabled if true tells the fetcher to enhance the |
77
|
|
|
* articles by fetching custom enhanced content |
78
|
|
|
* @param string $basicAuthUser if given, basic auth is set for this feed |
79
|
|
|
* @param string $basicAuthPassword if given, basic auth is set for this |
80
|
|
|
* feed. Ignored if user is null or an empty string |
81
|
|
|
* @throws FetcherException if it fails |
82
|
|
|
* @return array an array containing the new feed and its items, first |
83
|
10 |
|
* element being the Feed and second element being an array of Items |
84
|
|
|
*/ |
85
|
|
|
public function fetch($url, $getFavicon = true, $lastModified = null, |
86
|
|
|
$etag = null, $fullTextEnabled = false, |
87
|
10 |
|
$basicAuthUser = null, $basicAuthPassword = null) { |
88
|
|
|
try { |
89
|
|
|
if ($basicAuthUser !== null && trim($basicAuthUser) !== '') { |
90
|
|
|
$resource = $this->reader->discover($url, $lastModified, $etag, |
91
|
|
|
$basicAuthUser, |
92
|
10 |
|
$basicAuthPassword); |
93
|
|
|
} else { |
94
|
|
|
$resource = $this->reader->discover($url, $lastModified, $etag); |
95
|
10 |
|
} |
96
|
1 |
|
|
97
|
|
|
if (!$resource->isModified()) { |
98
|
|
|
return [null, null]; |
99
|
9 |
|
} |
100
|
9 |
|
|
101
|
9 |
|
$location = $resource->getUrl(); |
102
|
9 |
|
$etag = $resource->getEtag(); |
103
|
9 |
|
$content = $resource->getContent(); |
104
|
|
|
$encoding = $resource->getEncoding(); |
105
|
9 |
|
$lastModified = $resource->getLastModified(); |
106
|
|
|
|
107
|
9 |
|
$parser = $this->reader->getParser($location, $content, $encoding); |
108
|
4 |
|
|
109
|
4 |
|
if ($fullTextEnabled) { |
110
|
|
|
$parser->enableContentGrabber(); |
111
|
9 |
|
} |
112
|
|
|
|
113
|
9 |
|
$parsedFeed = $parser->execute(); |
114
|
9 |
|
|
115
|
9 |
|
$feed = $this->buildFeed( |
116
|
|
|
$parsedFeed, $url, $getFavicon, $lastModified, $etag, $location |
117
|
9 |
|
); |
118
|
9 |
|
|
119
|
9 |
|
$items = []; |
120
|
9 |
|
foreach ($parsedFeed->getItems() as $item) { |
121
|
|
|
$items[] = $this->buildItem($item, $parsedFeed); |
122
|
9 |
|
} |
123
|
|
|
|
124
|
|
|
return [$feed, $items]; |
125
|
|
|
|
126
|
|
|
} catch (Exception $ex) { |
127
|
|
|
$this->handleError($ex); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
|
133
|
|
|
private function handleError(Exception $ex) { |
134
|
|
|
$msg = $ex->getMessage(); |
135
|
|
|
|
136
|
|
|
if ($ex instanceof MalFormedXmlException) { |
137
|
|
|
$msg = $this->l10n->t('Feed contains invalid XML'); |
138
|
|
|
} else if ($ex instanceof SubscriptionNotFoundException) { |
139
|
|
|
$msg = $this->l10n->t('Feed not found: either the website ' . |
140
|
|
|
'does not provide a feed or blocks access. To rule out ' . |
141
|
|
|
'blocking, try to download the feed on your server\'s ' . |
142
|
|
|
'command line using curl: curl http://the-feed.tld'); |
143
|
|
|
} else if ($ex instanceof UnsupportedFeedFormatException) { |
144
|
|
|
$msg = $this->l10n->t('Detected feed format is not supported'); |
145
|
|
|
} else if ($ex instanceof InvalidCertificateException) { |
146
|
|
|
$msg = $this->buildCurlSslErrorMessage($ex->getCode()); |
147
|
|
|
} else if ($ex instanceof InvalidUrlException) { |
148
|
|
|
$msg = $this->l10n->t('Website not found'); |
149
|
|
|
} else if ($ex instanceof MaxRedirectException) { |
150
|
|
|
$msg = $this->l10n->t('More redirects than allowed, aborting'); |
151
|
|
|
} else if ($ex instanceof MaxSizeException) { |
152
|
|
|
$msg = $this->l10n->t('Bigger than maximum allowed size'); |
153
|
|
|
} else if ($ex instanceof TimeoutException) { |
154
|
|
|
$msg = $this->l10n->t('Request timed out'); |
155
|
|
|
} else if ($ex instanceof UnauthorizedException) { |
156
|
|
|
$msg = $this->l10n->t('Required credentials for feed were ' . |
157
|
|
|
'either missing or incorrect'); |
158
|
|
|
} else if ($ex instanceof ForbiddenException) { |
159
|
9 |
|
$msg = $this->l10n->t('Forbidden to access feed'); |
160
|
9 |
|
} |
161
|
9 |
|
|
162
|
9 |
|
throw new FetcherException($msg); |
163
|
9 |
|
} |
164
|
9 |
|
|
165
|
9 |
|
private function buildCurlSslErrorMessage($errorCode) { |
166
|
|
|
switch ($errorCode) { |
167
|
|
|
case 35: // CURLE_SSL_CONNECT_ERROR |
168
|
|
|
return $this->l10n->t( |
169
|
9 |
|
'Certificate error: A problem occurred ' . |
170
|
9 |
|
'somewhere in the SSL/TLS handshake. Could be ' . |
171
|
9 |
|
'certificates (file formats, paths, permissions), ' . |
172
|
|
|
'passwords, and others.' |
173
|
9 |
|
); |
174
|
2 |
|
case 51: // CURLE_PEER_FAILED_VERIFICATION |
175
|
|
|
return $this->l10n->t( |
176
|
7 |
|
'Certificate error: The remote server\'s SSL ' . |
177
|
|
|
'certificate or SSH md5 fingerprint was deemed not OK.' |
178
|
|
|
); |
179
|
|
|
case 58: // CURLE_SSL_CERTPROBLEM |
180
|
|
|
return $this->l10n->t( |
181
|
9 |
|
'Certificate error: Problem with the local client ' . |
182
|
9 |
|
'certificate.' |
183
|
9 |
|
); |
184
|
9 |
|
case 59: // CURLE_SSL_CIPHER |
185
|
9 |
|
return $this->l10n->t( |
186
|
9 |
|
'Certificate error: Couldn\'t use specified cipher.' |
187
|
9 |
|
); |
188
|
9 |
|
case 60: // CURLE_SSL_CACERT |
189
|
9 |
|
return $this->l10n->t( |
190
|
|
|
'Certificate error: Peer certificate cannot be ' . |
191
|
|
|
'authenticated with known CA certificates.' |
192
|
9 |
|
); |
193
|
9 |
|
case 64: // CURLE_USE_SSL_FAILED |
194
|
|
|
return $this->l10n->t( |
195
|
|
|
'Certificate error: Requested FTP SSL level failed.' |
196
|
9 |
|
); |
197
|
9 |
|
case 66: // CURLE_SSL_ENGINE_INITFAILED |
198
|
9 |
|
return $this->l10n->t( |
199
|
9 |
|
'Certificate error: Initiating the SSL Engine failed.' |
200
|
|
|
); |
201
|
9 |
|
case 77: // CURLE_SSL_CACERT_BADFILE |
202
|
9 |
|
return $this->l10n->t( |
203
|
2 |
|
'Certificate error: Problem with reading the SSL CA ' . |
204
|
2 |
|
'cert (path? access rights?)' |
205
|
2 |
|
); |
206
|
2 |
|
case 83: // CURLE_SSL_ISSUER_ERROR |
207
|
2 |
|
return $this->l10n->t( |
208
|
2 |
|
'Certificate error: Issuer check failed' |
209
|
2 |
|
); |
210
|
|
|
default: |
211
|
9 |
|
return $this->l10n->t('Unknown SSL certificate error!'); |
212
|
|
|
} |
213
|
9 |
|
} |
214
|
|
|
|
215
|
|
|
private function decodeTwice($string) { |
216
|
|
|
return html_entity_decode( |
217
|
9 |
|
html_entity_decode( |
218
|
|
|
$string, ENT_QUOTES | ENT_HTML5, 'UTF-8' |
219
|
9 |
|
), |
220
|
|
|
ENT_QUOTES | ENT_HTML5, 'UTF-8' |
221
|
9 |
|
); |
222
|
|
|
} |
223
|
9 |
|
|
224
|
|
|
|
225
|
|
|
protected function determineRtl($parsedItem, $parsedFeed) { |
226
|
|
|
$itemLang = $parsedItem->getLanguage(); |
227
|
|
|
$feedLang = $parsedFeed->getLanguage(); |
228
|
9 |
|
|
229
|
9 |
|
if ($itemLang) { |
230
|
9 |
|
return Parser::isLanguageRTL($itemLang); |
231
|
9 |
|
} else { |
232
|
9 |
|
return Parser::isLanguageRTL($feedLang); |
233
|
9 |
|
} |
234
|
9 |
|
} |
235
|
9 |
|
|
236
|
|
|
|
237
|
9 |
|
protected function buildItem($parsedItem, $parsedFeed) { |
238
|
1 |
|
$item = new Item(); |
239
|
1 |
|
$item->setUnread(); |
240
|
1 |
|
$item->setUrl($parsedItem->getUrl()); |
241
|
1 |
|
$item->setGuid($parsedItem->getId()); |
242
|
|
|
$item->setGuidHash($item->getGuid()); |
243
|
9 |
|
$item->setPubDate($parsedItem->getDate()->getTimestamp()); |
244
|
|
|
$item->setLastModified($this->time->getTime()); |
245
|
|
|
$item->setRtl($this->determineRtl($parsedItem, $parsedFeed)); |
246
|
|
|
|
247
|
|
|
// unescape content because angularjs helps against XSS |
248
|
|
|
$item->setTitle($this->decodeTwice($parsedItem->getTitle())); |
249
|
|
|
$item->setAuthor($this->decodeTwice($parsedItem->getAuthor())); |
250
|
|
|
|
251
|
|
|
// purification is done in the service layer |
252
|
|
|
$body = $parsedItem->getContent(); |
253
|
|
|
$body = mb_convert_encoding($body, 'HTML-ENTITIES', |
254
|
|
|
mb_detect_encoding($body)); |
255
|
|
|
$item->setBody($body); |
256
|
|
|
|
257
|
|
|
$enclosureUrl = $parsedItem->getEnclosureUrl(); |
258
|
|
|
if ($enclosureUrl) { |
259
|
|
|
$enclosureType = $parsedItem->getEnclosureType(); |
260
|
|
|
if (stripos($enclosureType, 'audio/') !== false || |
261
|
|
|
stripos($enclosureType, 'video/') !== false |
262
|
|
|
) { |
263
|
|
|
$item->setEnclosureMime($enclosureType); |
264
|
|
|
$item->setEnclosureLink($enclosureUrl); |
265
|
|
|
} |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
$item->generateSearchIndex(); |
269
|
|
|
|
270
|
|
|
return $item; |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
|
274
|
|
|
protected function buildFeed($parsedFeed, $url, $getFavicon, $modified, |
275
|
|
|
$etag, $location) { |
276
|
|
|
$feed = new Feed(); |
277
|
|
|
|
278
|
|
|
$link = $parsedFeed->getSiteUrl(); |
279
|
|
|
|
280
|
|
|
if (!$link) { |
281
|
|
|
$link = $location; |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
// unescape content because angularjs helps against XSS |
285
|
|
|
$title = strip_tags($this->decodeTwice($parsedFeed->getTitle())); |
286
|
|
|
$feed->setTitle($title); |
287
|
|
|
$feed->setUrl($url); // the url used to add the feed |
288
|
|
|
$feed->setLocation($location); // the url where the feed was found |
289
|
|
|
$feed->setLink($link); // <link> attribute in the feed |
290
|
|
|
$feed->setHttpLastModified($modified); |
291
|
|
|
$feed->setHttpEtag($etag); |
292
|
|
|
$feed->setAdded($this->time->getTime()); |
293
|
|
|
|
294
|
|
|
if ($getFavicon) { |
295
|
|
|
$faviconFetcher = $this->faviconFactory->build(); |
296
|
|
|
$favicon = $faviconFetcher->find($feed->getLink()); |
297
|
|
|
$feed->setFaviconLink($favicon); |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
return $feed; |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
} |
304
|
|
|
|