Passed
Push — master ( 25fe32...0fb74a )
by Morris
13:02 queued 10s
created

RefreshWebcalJob::cleanURL()   B

Complexity

Conditions 9
Paths 33

Size

Total Lines 27
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 17
nc 33
nop 1
dl 0
loc 27
rs 8.0555
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
/**
4
 * @copyright 2018 Georg Ehrke <[email protected]>
5
 *
6
 * @author Georg Ehrke <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\DAV\BackgroundJob;
25
26
use GuzzleHttp\HandlerStack;
27
use GuzzleHttp\Middleware;
28
use OC\BackgroundJob\Job;
29
use OCA\DAV\CalDAV\CalDavBackend;
30
use OCP\AppFramework\Utility\ITimeFactory;
31
use OCP\Http\Client\IClientService;
32
use OCP\IConfig;
33
use OCP\ILogger;
34
use Psr\Http\Message\RequestInterface;
35
use Psr\Http\Message\ResponseInterface;
36
use Sabre\DAV\Exception\BadRequest;
37
use Sabre\DAV\PropPatch;
38
use Sabre\DAV\Xml\Property\Href;
39
use Sabre\VObject\Component;
40
use Sabre\VObject\DateTimeParser;
41
use Sabre\VObject\InvalidDataException;
42
use Sabre\VObject\ParseException;
43
use Sabre\VObject\Reader;
44
use Sabre\VObject\Splitter\ICalendar;
45
46
class RefreshWebcalJob extends Job {
47
48
	/** @var CalDavBackend */
49
	private $calDavBackend;
50
51
	/** @var IClientService */
52
	private $clientService;
53
54
	/** @var IConfig */
55
	private $config;
56
57
	/** @var ILogger */
58
	private $logger;
59
60
	/** @var ITimeFactory */
61
	private $timeFactory;
62
63
	/** @var array */
64
	private $subscription;
65
66
	/**
67
	 * RefreshWebcalJob constructor.
68
	 *
69
	 * @param CalDavBackend $calDavBackend
70
	 * @param IClientService $clientService
71
	 * @param IConfig $config
72
	 * @param ILogger $logger
73
	 * @param ITimeFactory $timeFactory
74
	 */
75
	public function __construct(CalDavBackend $calDavBackend, IClientService $clientService, IConfig $config, ILogger $logger, ITimeFactory $timeFactory) {
76
		$this->calDavBackend = $calDavBackend;
77
		$this->clientService = $clientService;
78
		$this->config = $config;
79
		$this->logger = $logger;
80
		$this->timeFactory = $timeFactory;
81
	}
82
83
	/**
84
	 * this function is called at most every hour
85
	 *
86
	 * @inheritdoc
87
	 */
88
	public function execute($jobList, ILogger $logger = null) {
89
		$subscription = $this->getSubscription($this->argument['principaluri'], $this->argument['uri']);
90
		if (!$subscription) {
91
			return;
92
		}
93
94
		// if no refresh rate was configured, just refresh once a week
95
		$subscriptionId = $subscription['id'];
96
		$refreshrate = $subscription['refreshrate'] ?? 'P1W';
97
98
		try {
99
			/** @var \DateInterval $dateInterval */
100
			$dateInterval = DateTimeParser::parseDuration($refreshrate);
101
		} catch(InvalidDataException $ex) {
102
			$this->logger->logException($ex);
103
			$this->logger->warning("Subscription $subscriptionId could not be refreshed, refreshrate in database is invalid");
104
			return;
105
		}
106
107
		$interval = $this->getIntervalFromDateInterval($dateInterval);
108
		if (($this->timeFactory->getTime() - $this->lastRun) <= $interval) {
109
			return;
110
		}
111
112
		parent::execute($jobList, $logger);
113
	}
114
115
	/**
116
	 * @param array $argument
117
	 */
118
	protected function run($argument) {
119
		$subscription = $this->getSubscription($argument['principaluri'], $argument['uri']);
120
		$mutations = [];
121
		if (!$subscription) {
122
			return;
123
		}
124
125
		$webcalData = $this->queryWebcalFeed($subscription, $mutations);
126
		if (!$webcalData) {
127
			return;
128
		}
129
130
		$stripTodos = $subscription['striptodos'] ?? 1;
131
		$stripAlarms = $subscription['stripalarms'] ?? 1;
132
		$stripAttachments = $subscription['stripattachments'] ?? 1;
133
134
		try {
135
			$splitter = new ICalendar($webcalData, Reader::OPTION_FORGIVING);
0 ignored issues
show
Bug introduced by
$webcalData of type string is incompatible with the type resource expected by parameter $input of Sabre\VObject\Splitter\ICalendar::__construct(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

135
			$splitter = new ICalendar(/** @scrutinizer ignore-type */ $webcalData, Reader::OPTION_FORGIVING);
Loading history...
136
137
			// we wait with deleting all outdated events till we parsed the new ones
138
			// in case the new calendar is broken and `new ICalendar` throws a ParseException
139
			// the user will still see the old data
140
			$this->calDavBackend->purgeAllCachedEventsForSubscription($subscription['id']);
141
142
			while ($vObject = $splitter->getNext()) {
143
				/** @var Component $vObject */
144
				$uid = null;
145
				$compName = null;
146
147
				foreach ($vObject->getComponents() as $component) {
148
					if ($component->name === 'VTIMEZONE') {
149
						continue;
150
					}
151
152
					$uid = $component->{'UID'}->getValue();
153
					$compName = $component->name;
154
155
					if ($stripAlarms) {
156
						unset($component->{'VALARM'});
157
					}
158
					if ($stripAttachments) {
159
						unset($component->{'ATTACH'});
160
					}
161
				}
162
163
				if ($stripTodos && $compName === 'VTODO') {
164
					continue;
165
				}
166
167
				$uri = $uid . '.ics';
168
				$calendarData = $vObject->serialize();
169
				try {
170
					$this->calDavBackend->createCalendarObject($subscription['id'], $uri, $calendarData, CalDavBackend::CALENDAR_TYPE_SUBSCRIPTION);
171
				} catch(BadRequest $ex) {
172
					$this->logger->logException($ex);
173
				}
174
			}
175
176
			$newRefreshRate = $this->checkWebcalDataForRefreshRate($subscription, $webcalData);
177
			if ($newRefreshRate) {
178
				$mutations['{http://apple.com/ns/ical/}refreshrate'] = $newRefreshRate;
179
			}
180
181
			$this->updateSubscription($subscription, $mutations);
182
		} catch(ParseException $ex) {
183
			$subscriptionId = $subscription['id'];
184
185
			$this->logger->logException($ex);
186
			$this->logger->warning("Subscription $subscriptionId could not be refreshed due to a parsing error");
187
		}
188
	}
189
190
	/**
191
	 * gets webcal feed from remote server
192
	 *
193
	 * @param array $subscription
194
	 * @param array &$mutations
195
	 * @return null|string
196
	 */
197
	private function queryWebcalFeed(array $subscription, array &$mutations) {
198
		$client = $this->clientService->newClient();
199
200
		$didBreak301Chain = false;
201
		$latestLocation = null;
202
203
		$handlerStack = HandlerStack::create();
204
		$handlerStack->push(Middleware::mapRequest(function (RequestInterface $request) {
205
			return $request
206
				->withHeader('Accept', 'text/calendar, application/calendar+json, application/calendar+xml')
207
				->withHeader('User-Agent', 'Nextcloud Webcal Crawler');
208
		}));
209
		$handlerStack->push(Middleware::mapResponse(function(ResponseInterface $response) use (&$didBreak301Chain, &$latestLocation) {
210
			if (!$didBreak301Chain) {
211
				if ($response->getStatusCode() !== 301) {
212
					$didBreak301Chain = true;
213
				} else {
214
					$latestLocation = $response->getHeader('Location');
215
				}
216
			}
217
			return $response;
218
		}));
219
220
		$allowLocalAccess = $this->config->getAppValue('dav', 'webcalAllowLocalAccess', 'no');
221
		$subscriptionId = $subscription['id'];
222
		$url = $this->cleanURL($subscription['source']);
223
		if ($url === null) {
224
			return null;
225
		}
226
227
		if ($allowLocalAccess !== 'yes') {
228
			$host = parse_url($url, PHP_URL_HOST);
229
			// remove brackets from IPv6 addresses
230
			if (strpos($host, '[') === 0 && substr($host, -1) === ']') {
231
				$host = substr($host, 1, -1);
232
			}
233
234
			if ($host === 'localhost' || substr($host, -6) === '.local' || substr($host, -10) === '.localhost' ||
235
				preg_match('/(^127\.)|(^192\.168\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^::1$)|(^[fF][cCdD])/', $host)) {
236
				$this->logger->warning("Subscription $subscriptionId was not refreshed because it violates local access rules");
237
				return null;
238
			}
239
		}
240
241
		try {
242
			$params = [
243
				'allow_redirects' => [
244
					'redirects' => 10
245
				],
246
				'handler' => $handlerStack,
247
			];
248
249
			$user = parse_url($subscription['source'], PHP_URL_USER);
250
			$pass = parse_url($subscription['source'], PHP_URL_PASS);
251
			if ($user !== null && $pass !== null) {
252
				$params['auth'] = [$user, $pass];
253
			}
254
255
			$response = $client->get($url, $params);
256
			$body = $response->getBody();
257
258
			if ($latestLocation) {
259
				$mutations['{http://calendarserver.org/ns/}source'] = new Href($latestLocation);
260
			}
261
262
			$contentType = $response->getHeader('Content-Type');
263
			$contentType = explode(';', $contentType, 2)[0];
264
			switch($contentType) {
265
				case 'application/calendar+json':
266
					try {
267
						$jCalendar = Reader::readJson($body, Reader::OPTION_FORGIVING);
268
					} catch(\Exception $ex) {
269
						// In case of a parsing error return null
270
						$this->logger->debug("Subscription $subscriptionId could not be parsed");
271
						return null;
272
					}
273
					return $jCalendar->serialize();
274
275
				case 'application/calendar+xml':
276
					try {
277
						$xCalendar = Reader::readXML($body);
278
					} catch(\Exception $ex) {
279
						// In case of a parsing error return null
280
						$this->logger->debug("Subscription $subscriptionId could not be parsed");
281
						return null;
282
					}
283
					return $xCalendar->serialize();
284
285
				case 'text/calendar':
286
				default:
287
					try {
288
						$vCalendar = Reader::read($body);
289
					} catch(\Exception $ex) {
290
						// In case of a parsing error return null
291
						$this->logger->debug("Subscription $subscriptionId could not be parsed");
292
						return null;
293
					}
294
					return $vCalendar->serialize();
295
			}
296
		} catch(\Exception $ex) {
297
			$this->logger->logException($ex);
298
			$this->logger->warning("Subscription $subscriptionId could not be refreshed due to a network error");
299
300
			return null;
301
		}
302
	}
303
304
	/**
305
	 * loads subscription from backend
306
	 *
307
	 * @param string $principalUri
308
	 * @param string $uri
309
	 * @return array|null
310
	 */
311
	private function getSubscription(string $principalUri, string $uri) {
312
		$subscriptions = array_values(array_filter(
313
			$this->calDavBackend->getSubscriptionsForUser($principalUri),
314
			function($sub) use ($uri) {
315
				return $sub['uri'] === $uri;
316
			}
317
		));
318
319
		if (\count($subscriptions) === 0) {
320
			return null;
321
		}
322
323
		$this->subscription = $subscriptions[0];
324
		return $this->subscription;
325
	}
326
327
	/**
328
	 * get total number of seconds from DateInterval object
329
	 *
330
	 * @param \DateInterval $interval
331
	 * @return int
332
	 */
333
	private function getIntervalFromDateInterval(\DateInterval $interval):int {
334
		return $interval->s
335
			+ ($interval->i * 60)
336
			+ ($interval->h * 60 * 60)
337
			+ ($interval->d * 60 * 60 * 24)
338
			+ ($interval->m * 60 * 60 * 24 * 30)
339
			+ ($interval->y * 60 * 60 * 24 * 365);
340
	}
341
342
	/**
343
	 * check if:
344
	 *  - current subscription stores a refreshrate
345
	 *  - the webcal feed suggests a refreshrate
346
	 *  - return suggested refreshrate if user didn't set a custom one
347
	 *
348
	 * @param array $subscription
349
	 * @param string $webcalData
350
	 * @return string|null
351
	 */
352
	private function checkWebcalDataForRefreshRate($subscription, $webcalData) {
353
		// if there is no refreshrate stored in the database, check the webcal feed
354
		// whether it suggests any refresh rate and store that in the database
355
		if (isset($subscription['refreshrate']) && $subscription['refreshrate'] !== null) {
356
			return null;
357
		}
358
359
		/** @var Component\VCalendar $vCalendar */
360
		$vCalendar = Reader::read($webcalData);
361
362
		$newRefreshrate = null;
363
		if (isset($vCalendar->{'X-PUBLISHED-TTL'})) {
364
			$newRefreshrate = $vCalendar->{'X-PUBLISHED-TTL'}->getValue();
365
		}
366
		if (isset($vCalendar->{'REFRESH-INTERVAL'})) {
367
			$newRefreshrate = $vCalendar->{'REFRESH-INTERVAL'}->getValue();
368
		}
369
370
		if (!$newRefreshrate) {
371
			return null;
372
		}
373
374
		// check if new refresh rate is even valid
375
		try {
376
			DateTimeParser::parseDuration($newRefreshrate);
377
		} catch(InvalidDataException $ex) {
378
			return null;
379
		}
380
381
		return $newRefreshrate;
382
	}
383
384
	/**
385
	 * update subscription stored in database
386
	 * used to set:
387
	 *  - refreshrate
388
	 *  - source
389
	 *
390
	 * @param array $subscription
391
	 * @param array $mutations
392
	 */
393
	private function updateSubscription(array $subscription, array $mutations) {
394
		if (empty($mutations)) {
395
			return;
396
		}
397
398
		$propPatch = new PropPatch($mutations);
399
		$this->calDavBackend->updateSubscription($subscription['id'], $propPatch);
400
		$propPatch->commit();
401
	}
402
403
	/**
404
	 * This method will strip authentication information and replace the
405
	 * 'webcal' or 'webcals' protocol scheme
406
	 *
407
	 * @param string $url
408
	 * @return string|null
409
	 */
410
	private function cleanURL(string $url) {
411
		$parsed = parse_url($url);
412
		if ($parsed === false) {
413
			return null;
414
		}
415
416
		if (isset($parsed['scheme']) && $parsed['scheme'] === 'http') {
417
			$scheme = 'http';
418
		} else {
419
			$scheme = 'https';
420
		}
421
422
		$host = $parsed['host'] ?? '';
423
		$port = isset($parsed['port']) ? ':' . $parsed['port'] : '';
424
		$path = $parsed['path'] ?? '';
425
		$query = isset($parsed['query']) ? '?' . $parsed['query'] : '';
426
		$fragment = isset($parsed['fragment']) ? '#' . $parsed['fragment'] : '';
427
428
		$cleanURL = "$scheme://$host$port$path$query$fragment";
429
		// parse_url is giving some weird results if no url and no :// is given,
430
		// so let's test the url again
431
		$parsedClean = parse_url($cleanURL);
432
		if ($parsedClean === false || !isset($parsedClean['host'])) {
433
			return null;
434
		}
435
436
		return $cleanURL;
437
	}
438
}
439