1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @author Matthijs van den Bos <[email protected]> |
4
|
|
|
* @copyright 2013 Matthijs van den Bos |
5
|
|
|
*/ |
6
|
|
|
|
7
|
|
|
namespace VDB\Spider\PersistenceHandler; |
8
|
|
|
|
9
|
|
|
use Symfony\Component\Finder\Finder; |
10
|
|
|
use VDB\Spider\Resource; |
11
|
|
|
|
12
|
|
|
abstract class FilePersistenceHandler implements PersistenceHandlerInterface |
13
|
|
|
{ |
14
|
|
|
/** |
15
|
|
|
* @var string the path where all spider results should be persisted. |
16
|
|
|
* The results will be grouped in a directory by spider ID. |
17
|
|
|
*/ |
18
|
|
|
protected $path = ''; |
19
|
|
|
|
20
|
|
|
protected $spiderId = ''; |
21
|
|
|
|
22
|
|
|
protected $totalSizePersisted = 0; |
23
|
|
|
|
24
|
|
|
/** @var \Iterator */ |
25
|
|
|
protected $iterator; |
26
|
|
|
|
27
|
|
|
/** @var Finder */ |
28
|
|
|
protected $finder; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @param string $path the path where all spider results should be persisted. |
32
|
|
|
* The results will be grouped in a directory by spider ID. |
33
|
|
|
*/ |
34
|
|
|
public function __construct($path) |
35
|
|
|
{ |
36
|
|
|
$this->path = $path; |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
public function setSpiderId($spiderId) |
40
|
|
|
{ |
41
|
|
|
$this->spiderId = $spiderId; |
42
|
|
|
|
43
|
|
|
// create the path |
44
|
|
|
if (!file_exists($this->getResultPath())) { |
45
|
|
|
mkdir($this->getResultPath(), 0700, true); |
46
|
|
|
} |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
public function count() |
50
|
|
|
{ |
51
|
|
|
return $this->getFinder()->count(); |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
protected function getResultPath() |
55
|
|
|
{ |
56
|
|
|
return $this->path . DIRECTORY_SEPARATOR . $this->spiderId . DIRECTORY_SEPARATOR; |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
abstract public function persist(Resource $resource); |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* @return Finder |
63
|
|
|
*/ |
64
|
|
|
protected function getFinder() |
65
|
|
|
{ |
66
|
|
|
if (!$this->finder instanceof Finder) { |
67
|
|
|
$this->finder = Finder::create()->files()->in($this->getResultPath()); |
68
|
|
|
} |
69
|
|
|
return $this->finder; |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* @return \Iterator |
74
|
|
|
*/ |
75
|
|
|
protected function getIterator() |
76
|
|
|
{ |
77
|
|
|
if (!$this->iterator instanceof \Iterator) { |
78
|
|
|
$this->iterator = $this->getFinder()->getIterator(); |
79
|
|
|
} |
80
|
|
|
return $this->iterator; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* @return Resource |
85
|
|
|
*/ |
86
|
|
|
abstract public function current(); |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* @return void |
90
|
|
|
*/ |
91
|
|
|
public function next() |
92
|
|
|
{ |
93
|
|
|
$this->getIterator()->next(); |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* @return integer|double|string|boolean|null |
98
|
|
|
*/ |
99
|
|
|
public function key() |
100
|
|
|
{ |
101
|
|
|
return $this->getIterator()->key(); |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* @return boolean |
106
|
|
|
*/ |
107
|
|
|
public function valid() |
108
|
|
|
{ |
109
|
|
|
return $this->getIterator()->valid(); |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* @return void |
114
|
|
|
*/ |
115
|
|
|
public function rewind() |
116
|
|
|
{ |
117
|
|
|
$this->getIterator()->rewind(); |
118
|
|
|
} |
119
|
|
|
} |
120
|
|
|
|