1 | <?php |
||||||
2 | |||||||
3 | namespace VDB\Spider\Downloader; |
||||||
4 | |||||||
5 | use VDB\Spider\Downloader\DownloaderInterface; |
||||||
6 | use Symfony\Component\EventDispatcher\Event; |
||||||
7 | use Symfony\Component\EventDispatcher\EventDispatcher; |
||||||
8 | use Symfony\Component\EventDispatcher\EventDispatcherInterface; |
||||||
9 | use Symfony\Component\EventDispatcher\GenericEvent; |
||||||
10 | use VDB\Spider\Event\SpiderEvents; |
||||||
11 | use VDB\Spider\PersistenceHandler\MemoryPersistenceHandler; |
||||||
12 | use VDB\Spider\PersistenceHandler\PersistenceHandlerInterface; |
||||||
13 | use VDB\Spider\RequestHandler\GuzzleRequestHandler; |
||||||
14 | use VDB\Spider\RequestHandler\RequestHandlerInterface; |
||||||
15 | use VDB\Spider\Filter\PostFetchFilterInterface; |
||||||
16 | use VDB\Spider\Resource; |
||||||
17 | use VDB\Spider\Uri\DiscoveredUri; |
||||||
18 | |||||||
19 | class Downloader implements DownloaderInterface |
||||||
20 | { |
||||||
21 | /** @var EventDispatcherInterface */ |
||||||
22 | private $dispatcher; |
||||||
23 | |||||||
24 | /** @var PersistenceHandlerInterface */ |
||||||
25 | private $persistenceHandler; |
||||||
26 | |||||||
27 | /** @var RequestHandlerInterface */ |
||||||
28 | private $requestHandler; |
||||||
29 | |||||||
30 | /** @var int the maximum number of downloaded resources. 0 means no limit */ |
||||||
31 | private $downloadLimit = 0; |
||||||
32 | |||||||
33 | /** @var PostFetchFilterInterface[] */ |
||||||
34 | private $postFetchFilters = array(); |
||||||
35 | |||||||
36 | /** |
||||||
37 | * @param int Maximum number of resources to download |
||||||
0 ignored issues
–
show
|
|||||||
38 | * @return $this |
||||||
39 | */ |
||||||
40 | public function setDownloadLimit($downloadLimit) |
||||||
41 | { |
||||||
42 | $this->downloadLimit = $downloadLimit; |
||||||
43 | return $this; |
||||||
44 | } |
||||||
45 | |||||||
46 | /** |
||||||
47 | * @return int Maximum number of resources to download |
||||||
48 | */ |
||||||
49 | public function getDownloadLimit() |
||||||
50 | { |
||||||
51 | return $this->downloadLimit; |
||||||
52 | } |
||||||
53 | |||||||
54 | /** |
||||||
55 | * @param PostFetchFilterInterface $filter |
||||||
56 | */ |
||||||
57 | public function addPostFetchFilter(PostFetchFilterInterface $filter) |
||||||
58 | { |
||||||
59 | $this->postFetchFilters[] = $filter; |
||||||
60 | } |
||||||
61 | |||||||
62 | /** |
||||||
63 | * @param DiscoveredUri $uri |
||||||
64 | * @return false|Resource |
||||||
65 | */ |
||||||
66 | public function download(DiscoveredUri $uri) |
||||||
67 | { |
||||||
68 | // Fetch the document |
||||||
69 | if (!$resource = $this->fetchResource($uri)) { |
||||||
70 | return false; |
||||||
71 | } |
||||||
72 | |||||||
73 | if ($this->matchesPostfetchFilter($resource)) { |
||||||
74 | return false; |
||||||
75 | } |
||||||
76 | |||||||
77 | $this->getPersistenceHandler()->persist($resource); |
||||||
78 | |||||||
79 | return $resource; |
||||||
80 | } |
||||||
81 | |||||||
82 | public function isDownLoadLimitExceeded() |
||||||
83 | { |
||||||
84 | return $this->getDownloadLimit() !== 0 && $this->getPersistenceHandler()->count() >= $this->getDownloadLimit(); |
||||||
85 | } |
||||||
86 | |||||||
87 | /** |
||||||
88 | * A shortcut for EventDispatcher::dispatch() |
||||||
89 | * |
||||||
90 | * @param string $eventName |
||||||
91 | * @param null|Event $event |
||||||
92 | */ |
||||||
93 | private function dispatch($eventName, Event $event = null) |
||||||
94 | { |
||||||
95 | $this->getDispatcher()->dispatch($eventName, $event); |
||||||
0 ignored issues
–
show
$eventName of type string is incompatible with the type object expected by parameter $event of Symfony\Contracts\EventD...erInterface::dispatch() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
The call to
Symfony\Contracts\EventD...erInterface::dispatch() has too many arguments starting with $event .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue. If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.
Loading history...
|
|||||||
96 | } |
||||||
97 | |||||||
98 | /** |
||||||
99 | * @param EventDispatcherInterface $eventDispatcher |
||||||
100 | * @return $this |
||||||
101 | */ |
||||||
102 | public function setDispatcher(EventDispatcherInterface $eventDispatcher) |
||||||
103 | { |
||||||
104 | $this->dispatcher = $eventDispatcher; |
||||||
105 | |||||||
106 | return $this; |
||||||
107 | } |
||||||
108 | |||||||
109 | /** |
||||||
110 | * @return EventDispatcherInterface |
||||||
111 | */ |
||||||
112 | public function getDispatcher() |
||||||
113 | { |
||||||
114 | if (!$this->dispatcher) { |
||||||
115 | $this->dispatcher = new EventDispatcher(); |
||||||
116 | } |
||||||
117 | return $this->dispatcher; |
||||||
118 | } |
||||||
119 | |||||||
120 | |||||||
121 | /** |
||||||
122 | * @param DiscoveredUri $uri |
||||||
123 | * @return Resource|false |
||||||
124 | */ |
||||||
125 | protected function fetchResource(DiscoveredUri $uri) |
||||||
126 | { |
||||||
127 | $resource = false; |
||||||
0 ignored issues
–
show
|
|||||||
128 | |||||||
129 | $this->dispatch(SpiderEvents::SPIDER_CRAWL_PRE_REQUEST, new GenericEvent($this, array('uri' => $uri))); |
||||||
130 | |||||||
131 | try { |
||||||
132 | $resource = $this->getRequestHandler()->request($uri); |
||||||
133 | } catch (\Exception $e) { |
||||||
134 | $this->dispatch( |
||||||
135 | SpiderEvents::SPIDER_CRAWL_ERROR_REQUEST, |
||||||
136 | new GenericEvent($this, array('uri' => $uri, 'message' => $e->getMessage())) |
||||||
137 | ); |
||||||
138 | } finally { |
||||||
139 | $this->dispatch(SpiderEvents::SPIDER_CRAWL_POST_REQUEST, new GenericEvent($this, array('uri' => $uri))); |
||||||
140 | } |
||||||
141 | |||||||
142 | return $resource; |
||||||
143 | } |
||||||
144 | |||||||
145 | /** |
||||||
146 | * @param Resource $resource |
||||||
147 | * @return bool |
||||||
148 | */ |
||||||
149 | private function matchesPostfetchFilter(Resource $resource) |
||||||
150 | { |
||||||
151 | foreach ($this->postFetchFilters as $filter) { |
||||||
152 | if ($filter->match($resource)) { |
||||||
153 | $this->dispatch( |
||||||
154 | SpiderEvents::SPIDER_CRAWL_FILTER_POSTFETCH, |
||||||
155 | new GenericEvent($this, array('uri' => $resource->getUri())) |
||||||
156 | ); |
||||||
157 | return true; |
||||||
158 | } |
||||||
159 | } |
||||||
160 | return false; |
||||||
161 | } |
||||||
162 | |||||||
163 | /** |
||||||
164 | * @param PersistenceHandlerInterface $persistenceHandler |
||||||
165 | */ |
||||||
166 | public function setPersistenceHandler(PersistenceHandlerInterface $persistenceHandler) |
||||||
167 | { |
||||||
168 | $this->persistenceHandler = $persistenceHandler; |
||||||
169 | } |
||||||
170 | |||||||
171 | /** |
||||||
172 | * @return PersistenceHandlerInterface |
||||||
173 | */ |
||||||
174 | public function getPersistenceHandler() |
||||||
175 | { |
||||||
176 | if (!$this->persistenceHandler) { |
||||||
177 | $this->persistenceHandler = new MemoryPersistenceHandler(); |
||||||
178 | } |
||||||
179 | |||||||
180 | return $this->persistenceHandler; |
||||||
181 | } |
||||||
182 | |||||||
183 | /** |
||||||
184 | * @param RequestHandlerInterface $requestHandler |
||||||
185 | */ |
||||||
186 | public function setRequestHandler(RequestHandlerInterface $requestHandler) |
||||||
187 | { |
||||||
188 | $this->requestHandler = $requestHandler; |
||||||
189 | } |
||||||
190 | |||||||
191 | /** |
||||||
192 | * @return RequestHandlerInterface |
||||||
193 | */ |
||||||
194 | public function getRequestHandler() |
||||||
195 | { |
||||||
196 | if (!$this->requestHandler) { |
||||||
197 | $this->requestHandler = new GuzzleRequestHandler(); |
||||||
198 | } |
||||||
199 | |||||||
200 | return $this->requestHandler; |
||||||
201 | } |
||||||
202 | } |
||||||
203 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths