1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @author Matthijs van den Bos <[email protected]> |
4
|
|
|
* @copyright 2013 Matthijs van den Bos |
5
|
|
|
*/ |
6
|
|
|
|
7
|
|
|
namespace VDB\Spider\PersistenceHandler; |
8
|
|
|
|
9
|
|
|
use Symfony\Component\Finder\Finder; |
10
|
|
|
use VDB\Spider\Resource; |
11
|
|
|
|
12
|
|
|
abstract class FilePersistenceHandler implements PersistenceHandlerInterface |
13
|
|
|
{ |
14
|
|
|
/** |
15
|
|
|
* @var string the path where all spider results should be persisted. |
16
|
|
|
* The results will be grouped in a directory by spider ID. |
17
|
|
|
*/ |
18
|
|
|
protected $path = ''; |
19
|
|
|
|
20
|
|
|
protected $spiderId = ''; |
21
|
|
|
|
22
|
|
|
protected $totalSizePersisted = 0; |
23
|
|
|
|
24
|
|
|
/** @var \Iterator */ |
25
|
|
|
protected $iterator; |
26
|
|
|
|
27
|
|
|
/** @var Finder */ |
28
|
|
|
protected $finder; |
29
|
|
|
|
30
|
|
|
/** @var string The filename that will be appended for resources that end with a slash */ |
31
|
|
|
protected $defaultFilename = 'index.html'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @param string $path the path where all spider results should be persisted. |
35
|
|
|
* The results will be grouped in a directory by spider ID. |
36
|
|
|
*/ |
37
|
|
|
public function __construct($path) |
38
|
|
|
{ |
39
|
|
|
$this->path = $path; |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
public function setSpiderId($spiderId) |
43
|
|
|
{ |
44
|
|
|
$this->spiderId = $spiderId; |
45
|
|
|
|
46
|
|
|
// create the path |
47
|
|
|
if (!file_exists($this->getResultPath())) { |
48
|
|
|
mkdir($this->getResultPath(), 0700, true); |
49
|
|
|
} |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
protected function getFileSystemFilename($resource) |
53
|
|
|
{ |
54
|
|
|
$fullPath = $this->completePath($resource->getUri()->getPath()); |
55
|
|
|
|
56
|
|
|
return urlencode(basename($fullPath)); |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
protected function getFileSystemPath($resource) |
60
|
|
|
{ |
61
|
|
|
$hostname = $resource->getUri()->getHost(); |
62
|
|
|
$fullPath = $this->completePath($resource->getUri()->getPath()); |
63
|
|
|
|
64
|
|
|
return $hostname . dirname($fullPath); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @return The path that was provided with a default filenameappended if it is |
|
|
|
|
69
|
|
|
* a path ending in a /. This is because we don't want to persist |
70
|
|
|
* the directories as files. This is similar to wget behaviour. |
71
|
|
|
*/ |
72
|
|
|
protected function completePath($path) |
73
|
|
|
{ |
74
|
|
|
if (substr($path, -1, 1) === '/') { |
75
|
|
|
$path .= $this->defaultFilename; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
return $path; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
public function count() |
82
|
|
|
{ |
83
|
|
|
return $this->getFinder()->count(); |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
protected function getResultPath() |
87
|
|
|
{ |
88
|
|
|
return $this->path . DIRECTORY_SEPARATOR . $this->spiderId . DIRECTORY_SEPARATOR; |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
abstract public function persist(Resource $resource); |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* @return Finder |
95
|
|
|
*/ |
96
|
|
|
protected function getFinder() |
97
|
|
|
{ |
98
|
|
|
if (!$this->finder instanceof Finder) { |
|
|
|
|
99
|
|
|
$this->finder = Finder::create()->files()->in($this->getResultPath()); |
100
|
|
|
} |
101
|
|
|
return $this->finder; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* @return \Iterator |
106
|
|
|
*/ |
107
|
|
|
protected function getIterator() |
108
|
|
|
{ |
109
|
|
|
if (!$this->iterator instanceof \Iterator) { |
|
|
|
|
110
|
|
|
$this->iterator = $this->getFinder()->getIterator(); |
111
|
|
|
} |
112
|
|
|
return $this->iterator; |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
/** |
116
|
|
|
* @return Resource |
117
|
|
|
*/ |
118
|
|
|
abstract public function current(); |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* @return void |
122
|
|
|
*/ |
123
|
|
|
public function next() |
124
|
|
|
{ |
125
|
|
|
$this->getIterator()->next(); |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* @return integer|double|string|boolean|null |
130
|
|
|
*/ |
131
|
|
|
public function key() |
132
|
|
|
{ |
133
|
|
|
return $this->getIterator()->key(); |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* @return boolean |
138
|
|
|
*/ |
139
|
|
|
public function valid() |
140
|
|
|
{ |
141
|
|
|
return $this->getIterator()->valid(); |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* @return void |
146
|
|
|
*/ |
147
|
|
|
public function rewind() |
148
|
|
|
{ |
149
|
|
|
$this->getIterator()->rewind(); |
150
|
|
|
} |
151
|
|
|
} |
152
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths