Completed
Push — master ( 8931ba...9baa96 )
by Roeland
47:53
created

Scanner::updateCache()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 8

Duplication

Lines 7
Ratio 63.64 %

Code Coverage

Tests 10
CRAP Score 3
Metric Value
dl 7
loc 11
ccs 10
cts 10
cp 1
rs 9.4286
cc 3
eloc 8
nc 3
nop 3
crap 3
1
<?php
2
/**
3
 * @author Arthur Schiwon <[email protected]>
4
 * @author Björn Schießle <[email protected]>
5
 * @author Jörn Friedrich Dreyer <[email protected]>
6
 * @author Lukas Reschke <[email protected]>
7
 * @author Martin Mattel <[email protected]>
8
 * @author Michael Gapczynski <[email protected]>
9
 * @author Morris Jobke <[email protected]>
10
 * @author Olivier Paroz <[email protected]>
11
 * @author Owen Winkler <[email protected]>
12
 * @author Robin Appelman <[email protected]>
13
 * @author Robin McCorkell <[email protected]>
14
 * @author Thomas Müller <[email protected]>
15
 * @author Vincent Petry <[email protected]>
16
 *
17
 * @copyright Copyright (c) 2015, ownCloud, Inc.
18
 * @license AGPL-3.0
19
 *
20
 * This code is free software: you can redistribute it and/or modify
21
 * it under the terms of the GNU Affero General Public License, version 3,
22
 * as published by the Free Software Foundation.
23
 *
24
 * This program is distributed in the hope that it will be useful,
25
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
 * GNU Affero General Public License for more details.
28
 *
29
 * You should have received a copy of the GNU Affero General Public License, version 3,
30
 * along with this program.  If not, see <http://www.gnu.org/licenses/>
31
 *
32
 */
33
34
namespace OC\Files\Cache;
35
36
use OC\Files\Filesystem;
37
use OC\Hooks\BasicEmitter;
38
use OCP\Config;
39
use OCP\Lock\ILockingProvider;
40
41
/**
42
 * Class Scanner
43
 *
44
 * Hooks available in scope \OC\Files\Cache\Scanner:
45
 *  - scanFile(string $path, string $storageId)
46
 *  - scanFolder(string $path, string $storageId)
47
 *  - postScanFile(string $path, string $storageId)
48
 *  - postScanFolder(string $path, string $storageId)
49
 *
50
 * @package OC\Files\Cache
51
 */
52
class Scanner extends BasicEmitter {
53
	/**
54
	 * @var \OC\Files\Storage\Storage $storage
55
	 */
56
	protected $storage;
57
58
	/**
59
	 * @var string $storageId
60
	 */
61
	protected $storageId;
62
63
	/**
64
	 * @var \OC\Files\Cache\Cache $cache
65
	 */
66
	protected $cache;
67
68
	/**
69
	 * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
70
	 */
71
	protected $cacheActive;
72
73
	/**
74
	 * @var bool $useTransactions whether to use transactions
75
	 */
76
	protected $useTransactions = true;
77
78
	/**
79
	 * @var \OCP\Lock\ILockingProvider
80
	 */
81
	protected $lockingProvider;
82
83
	const SCAN_RECURSIVE = true;
84
	const SCAN_SHALLOW = false;
85
86
	const REUSE_ETAG = 1;
87
	const REUSE_SIZE = 2;
88
89 886
	public function __construct(\OC\Files\Storage\Storage $storage) {
90 886
		$this->storage = $storage;
91 886
		$this->storageId = $this->storage->getId();
92 886
		$this->cache = $storage->getCache();
93 886
		$this->cacheActive = !Config::getSystemValue('filesystem_cache_readonly', false);
94 886
		$this->lockingProvider = \OC::$server->getLockingProvider();
95 886
	}
96
97
	/**
98
	 * Whether to wrap the scanning of a folder in a database transaction
99
	 * On default transactions are used
100
	 *
101
	 * @param bool $useTransactions
102
	 */
103 4
	public function setUseTransactions($useTransactions) {
104 4
		$this->useTransactions = $useTransactions;
105 4
	}
106
107
	/**
108
	 * get all the metadata of a file or folder
109
	 * *
110
	 *
111
	 * @param string $path
112
	 * @return array an array of metadata of the file
113
	 */
114 871
	public function getData($path) {
115 871
		$data = $this->storage->getMetaData($path);
116 871
		if (is_null($data)) {
117 9
			\OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not accessible or present !!!", \OCP\Util::DEBUG);
118 9
		}
119 871
		return $data;
120
	}
121
122
	/**
123
	 * scan a single file and store it in the cache
124
	 *
125
	 * @param string $file
126
	 * @param int $reuseExisting
127
	 * @param int $parentId
128
	 * @param array | null $cacheData existing data in the cache for the file to be scanned
129
	 * @param bool $lock set to false to disable getting an additional read lock during scanning
130
	 * @return array an array of metadata of the scanned file
131
	 * @throws \OC\ServerNotAvailableException
132
	 * @throws \OCP\Lock\LockedException
133
	 */
134 871
	public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true) {
135 871
		if (!self::isPartialFile($file)
136 871
			and !Filesystem::isFileBlacklisted($file)
137 871
		) {
138 871
			if ($lock) {
139 604
				$this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
140 604
			}
141 871
			$this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
142 871
			\OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
143 871
			$data = $this->getData($file);
144 871
			if ($data) {
145 870
				$parent = dirname($file);
146 870
				if ($parent === '.' or $parent === '/') {
147 776
					$parent = '';
148 776
				}
149 870
				if ($parentId === -1) {
150 870
					$parentId = $this->cache->getId($parent);
151 870
				}
152
153
				// scan the parent if it's not in the cache (id -1) and the current file is not the root folder
154 870
				if ($file and $parentId === -1) {
155 167
					$parentData = $this->scanFile($parent);
156 167
					$parentId = $parentData['fileid'];
157 167
				}
158 870
				if ($parent) {
159 821
					$data['parent'] = $parentId;
160 821
				}
161 870
				if (is_null($cacheData)) {
162 870
					$cacheData = $this->cache->get($file);
163 870
				}
164 870
				if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) {
165
					// prevent empty etag
166 660
					if (empty($cacheData['etag'])) {
167 3
						$etag = $data['etag'];
168 3
					} else {
169 660
						$etag = $cacheData['etag'];
170
					}
171 660
					$fileId = $cacheData['fileid'];
172 660
					$data['fileid'] = $fileId;
173
					// only reuse data if the file hasn't explicitly changed
174 660
					if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
175 646
						$data['mtime'] = $cacheData['mtime'];
176 646
						if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
177 68
							$data['size'] = $cacheData['size'];
178 68
						}
179 646
						if ($reuseExisting & self::REUSE_ETAG) {
180 646
							$data['etag'] = $etag;
181 646
						}
182 646
					}
183
					// Only update metadata that has changed
184 660
					$newData = array_diff_assoc($data, $cacheData);
185 660
				} else {
186 860
					$newData = $data;
187 860
					$fileId = -1;
188
				}
189 870
				if (!empty($newData)) {
190 870
					$data['fileid'] = $this->addToCache($file, $newData, $fileId);
191 870
				}
192 870
				$this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
193 870
				\OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
194 870
			} else {
195 9
				$this->removeFromCache($file);
196
			}
197 871
			if ($lock) {
198 604
				$this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
199 604
			}
200 871
			return $data;
201
		}
202 1
		return null;
203
	}
204
205 24
	protected function removeFromCache($path) {
206 24
		\OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $path));
207 24
		$this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', array($path));
208 24
		if ($this->cacheActive) {
209 24
			$this->cache->remove($path);
210 24
		}
211 24
	}
212
213
	/**
214
	 * @param string $path
215
	 * @param array $data
216
	 * @param int $fileId
217
	 * @return int the id of the added file
218
	 */
219 870
	protected function addToCache($path, $data, $fileId = -1) {
220 870
		\OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
221 870
		$this->emit('\OC\Files\Cache\Scanner', 'addToCache', array($path, $this->storageId, $data));
222 870 View Code Duplication
		if ($this->cacheActive) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
223 870
			if ($fileId !== -1) {
224 578
				$this->cache->update($fileId, $data);
225 578
				return $fileId;
226
			} else {
227 860
				return $this->cache->put($path, $data);
228
			}
229
		} else {
230
			return -1;
231
		}
232
	}
233
234
	/**
235
	 * @param string $path
236
	 * @param array $data
237
	 * @param int $fileId
238
	 */
239 820
	protected function updateCache($path, $data, $fileId = -1) {
240 820
		\OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
241 820
		$this->emit('\OC\Files\Cache\Scanner', 'updateCache', array($path, $this->storageId, $data));
242 820 View Code Duplication
		if ($this->cacheActive) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
243 820
			if ($fileId !== -1) {
244 820
				$this->cache->update($fileId, $data);
245 820
			} else {
246 2
				$this->cache->put($path, $data);
247
			}
248 820
		}
249 820
	}
250
251
	/**
252
	 * scan a folder and all it's children
253
	 *
254
	 * @param string $path
255
	 * @param bool $recursive
256
	 * @param int $reuse
257
	 * @param bool $lock set to false to disable getting an additional read lock during scanning
258
	 * @return array an array of the meta data of the scanned file or folder
259
	 */
260 865
	public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
261 865 View Code Duplication
		if ($reuse === -1) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
262 861
			$reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
263 861
		}
264 865
		if ($lock) {
265 554
			$this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
266 554
		}
267 865
		$data = $this->scanFile($path, $reuse, -1, null, $lock);
268 865
		if ($data and $data['mimetype'] === 'httpd/unix-directory') {
269 846
			$size = $this->scanChildren($path, $recursive, $reuse, $data, $lock);
270 846
			$data['size'] = $size;
271 846
		}
272 865
		if ($lock) {
273 554
			$this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
274 554
		}
275 865
		return $data;
276
	}
277
278
	/**
279
	 * Get the children currently in the cache
280
	 *
281
	 * @param int $folderId
282
	 * @return array[]
283
	 */
284 846
	protected function getExistingChildren($folderId) {
285 846
		$existingChildren = array();
286 846
		$children = $this->cache->getFolderContentsById($folderId);
287 846
		foreach ($children as $child) {
288 539
			$existingChildren[$child['name']] = $child;
289 846
		}
290 846
		return $existingChildren;
291
	}
292
293
	/**
294
	 * Get the children from the storage
295
	 *
296
	 * @param string $folder
297
	 * @return string[]
298
	 */
299 846
	protected function getNewChildren($folder) {
300 846
		$children = array();
301 846
		if ($dh = $this->storage->opendir($folder)) {
302 846
			if (is_resource($dh)) {
303 846
				while (($file = readdir($dh)) !== false) {
304 846
					if (!Filesystem::isIgnoredDir($file)) {
305 576
						$children[] = $file;
306 576
					}
307 846
				}
308 846
			}
309 846
		}
310 846
		return $children;
311
	}
312
313
	/**
314
	 * scan all the files and folders in a folder
315
	 *
316
	 * @param string $path
317
	 * @param bool $recursive
318
	 * @param int $reuse
319
	 * @param array $folderData existing cache data for the folder to be scanned
320
	 * @param bool $lock set to false to disable getting an additional read lock during scanning
321
	 * @return int the size of the scanned folder or -1 if the size is unknown at this stage
322
	 */
323 846
	protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderData = null, $lock = true) {
324 846 View Code Duplication
		if ($reuse === -1) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
325
			$reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
326
		}
327 846
		$this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
328 846
		$size = 0;
329 846
		$childQueue = array();
330 846
		if (is_array($folderData) and isset($folderData['fileid'])) {
331 846
			$folderId = $folderData['fileid'];
332 846
		} else {
333
			$folderId = $this->cache->getId($path);
334
		}
335 846
		$existingChildren = $this->getExistingChildren($folderId);
336 846
		$newChildren = $this->getNewChildren($path);
337
338 846
		if ($this->useTransactions) {
339 842
			\OC_DB::beginTransaction();
340 842
		}
341 846
		$exceptionOccurred = false;
342 846
		foreach ($newChildren as $file) {
343 576
			$child = ($path) ? $path . '/' . $file : $file;
344
			try {
345 576
				$existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : null;
346 576
				$data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock);
347 576
				if ($data) {
348 576
					if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
349 538
						$childQueue[$child] = $data;
350 576
					} else if ($data['size'] === -1) {
351 14
						$size = -1;
352 566
					} else if ($size !== -1) {
353 566
						$size += $data['size'];
354 566
					}
355 576
				}
356 576
			} catch (\Doctrine\DBAL\DBALException $ex) {
0 ignored issues
show
Bug introduced by
The class Doctrine\DBAL\DBALException does not exist. Did you forget a USE statement, or did you not list all dependencies?

Scrutinizer analyzes your composer.json/composer.lock file if available to determine the classes, and functions that are defined by your dependencies.

It seems like the listed class was neither found in your dependencies, nor was it found in the analyzed files in your repository. If you are using some other form of dependency management, you might want to disable this analysis.

Loading history...
357
				// might happen if inserting duplicate while a scanning
358
				// process is running in parallel
359
				// log and ignore
360
				\OCP\Util::writeLog('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OCP\Util::DEBUG);
361
				$exceptionOccurred = true;
362
			} catch (\OCP\Lock\LockedException $e) {
363
				if ($this->useTransactions) {
364
					\OC_DB::rollback();
365
				}
366
				throw $e;
367
			}
368 846
		}
369 846
		$removedChildren = \array_diff(array_keys($existingChildren), $newChildren);
370 846
		foreach ($removedChildren as $childName) {
371 15
			$child = ($path) ? $path . '/' . $childName : $childName;
372 15
			$this->removeFromCache($child);
373 846
		}
374 846
		if ($this->useTransactions) {
375 842
			\OC_DB::commit();
376 842
		}
377 846
		if ($exceptionOccurred) {
378
			// It might happen that the parallel scan process has already
379
			// inserted mimetypes but those weren't available yet inside the transaction
380
			// To make sure to have the updated mime types in such cases,
381
			// we reload them here
382
			\OC::$server->getMimeTypeLoader()->reset();
383
		}
384
385 846
		foreach ($childQueue as $child => $childData) {
386 538
			$childSize = $this->scanChildren($child, self::SCAN_RECURSIVE, $reuse, $childData, $lock);
387 538
			if ($childSize === -1) {
388
				$size = -1;
389 538
			} else if ($size !== -1) {
390 538
				$size += $childSize;
391 538
			}
392 846
		}
393 846
		if (!is_array($folderData) or !isset($folderData['size']) or $folderData['size'] !== $size) {
394 820
			$this->updateCache($path, array('size' => $size), $folderId);
395 820
		}
396 846
		$this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
397 846
		return $size;
398
	}
399
400
	/**
401
	 * check if the file should be ignored when scanning
402
	 * NOTE: files with a '.part' extension are ignored as well!
403
	 *       prevents unfinished put requests to be scanned
404
	 *
405
	 * @param string $file
406
	 * @return boolean
407
	 */
408 1009
	public static function isPartialFile($file) {
409 1009
		if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
410 41
			return true;
411
		}
412 1007
		if (strpos($file, '.part/') !== false) {
413 7
			return true;
414
		}
415
416 1006
		return false;
417
	}
418
419
	/**
420
	 * walk over any folders that are not fully scanned yet and scan them
421
	 */
422 2
	public function backgroundScan() {
423 2
		$lastPath = null;
424 2
		while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
425
			try {
426 1
				$this->scan($path, self::SCAN_RECURSIVE, self::REUSE_ETAG);
0 ignored issues
show
Bug introduced by
It seems like $path defined by $this->cache->getIncomplete() on line 424 can also be of type boolean; however, OC\Files\Cache\Scanner::scan() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
427 1
				\OC_Hook::emit('Scanner', 'correctFolderSize', array('path' => $path));
428 1
				if ($this->cacheActive) {
429 1
					$this->cache->correctFolderSize($path);
430 1
				}
431 1
			} catch (\OCP\Files\StorageInvalidException $e) {
432
				// skip unavailable storages
433
			} catch (\OCP\Files\StorageNotAvailableException $e) {
434
				// skip unavailable storages
435
			} catch (\OCP\Files\ForbiddenException $e) {
436
				// skip forbidden storages
437
			} catch (\OCP\Lock\LockedException $e) {
438
				// skip unavailable storages
439
			}
440
			// FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
441
			// to make this possible
442 1
			$lastPath = $path;
443 1
		}
444 2
	}
445
446
	/**
447
	 * Set whether the cache is affected by scan operations
448
	 *
449
	 * @param boolean $active The active state of the cache
450
	 */
451
	public function setCacheActive($active) {
452
		$this->cacheActive = $active;
453
	}
454
}
455