1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @author Matthijs van den Bos <[email protected]> |
4
|
|
|
* @copyright 2013 Matthijs van den Bos |
5
|
|
|
*/ |
6
|
|
|
|
7
|
|
|
namespace VDB\Spider\PersistenceHandler; |
8
|
|
|
|
9
|
|
|
use Symfony\Component\Finder\Finder; |
10
|
|
|
use VDB\Spider\Resource; |
11
|
|
|
|
12
|
|
|
class FileSerializedResourcePersistenceHandler implements PersistenceHandlerInterface |
13
|
|
|
{ |
14
|
|
|
/** |
15
|
|
|
* @var string the path where all spider results should be persisted. |
16
|
|
|
* The results will be grouped in a directory by spider ID. |
17
|
|
|
*/ |
18
|
|
|
private $path = ''; |
19
|
|
|
|
20
|
|
|
private $spiderId = ''; |
21
|
|
|
|
22
|
|
|
private $totalSizePersisted = 0; |
23
|
|
|
|
24
|
|
|
/** @var \Iterator */ |
25
|
|
|
private $iterator; |
26
|
|
|
|
27
|
|
|
/** @var Finder */ |
28
|
|
|
private $finder; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @param string $path the path where all spider results should be persisted. |
32
|
|
|
* The results will be grouped in a directory by spider ID. |
33
|
|
|
*/ |
34
|
|
|
public function __construct($path) |
35
|
|
|
{ |
36
|
|
|
$this->path = $path; |
37
|
|
|
} |
38
|
|
|
|
39
|
|
View Code Duplication |
public function setSpiderId($spiderId) |
|
|
|
|
40
|
|
|
{ |
41
|
|
|
$this->spiderId = $spiderId; |
42
|
|
|
|
43
|
|
|
// create the path |
44
|
|
|
if (!file_exists($this->getResultPath())) { |
45
|
|
|
mkdir($this->getResultPath(), 0700, true); |
46
|
|
|
} |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
public function count() |
50
|
|
|
{ |
51
|
|
|
return $this->getFinder()->count(); |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
private function getResultPath() |
55
|
|
|
{ |
56
|
|
|
return $this->path . DIRECTORY_SEPARATOR . $this->spiderId . DIRECTORY_SEPARATOR; |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
public function persist(Resource $resource) |
60
|
|
|
{ |
61
|
|
|
$fileName = urlencode($resource->getUri()->toString()); |
62
|
|
|
$file = new \SplFileObject($this->getResultPath() . $fileName, 'w'); |
63
|
|
|
$this->totalSizePersisted += $file->fwrite(serialize($resource)); |
64
|
|
|
} |
65
|
|
|
|
66
|
|
View Code Duplication |
private function getFinder() |
|
|
|
|
67
|
|
|
{ |
68
|
|
|
if (!$this->finder instanceof Finder) { |
69
|
|
|
$this->finder = Finder::create()->files()->in($this->getResultPath()); |
70
|
|
|
} |
71
|
|
|
return $this->finder; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
private function getIterator() |
|
|
|
|
75
|
|
|
{ |
76
|
|
|
if (!$this->iterator instanceof \Iterator) { |
77
|
|
|
$this->iterator = $this->getFinder()->getIterator(); |
78
|
|
|
} |
79
|
|
|
return $this->iterator; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* @return Resource |
84
|
|
|
*/ |
85
|
|
|
public function current() |
86
|
|
|
{ |
87
|
|
|
return unserialize($this->getIterator()->current()->getContents()); |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* @return void |
92
|
|
|
*/ |
93
|
|
|
public function next() |
94
|
|
|
{ |
95
|
|
|
$this->getIterator()->next(); |
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
/** |
99
|
|
|
* @return int |
100
|
|
|
*/ |
101
|
|
|
public function key() |
102
|
|
|
{ |
103
|
|
|
return $this->getIterator()->key(); |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* @return boolean |
108
|
|
|
*/ |
109
|
|
|
public function valid() |
110
|
|
|
{ |
111
|
|
|
return $this->getIterator()->valid(); |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* @return void |
116
|
|
|
*/ |
117
|
|
|
public function rewind() |
118
|
|
|
{ |
119
|
|
|
$this->getIterator()->rewind(); |
120
|
|
|
} |
121
|
|
|
} |
122
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.