Completed
Pull Request — master (#20)
by Matthijs
05:12
created

Downloader::isDownLoadLimitExceeded()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
rs 10
cc 2
eloc 2
nc 2
nop 0
1
<?php
2
3
namespace VDB\Spider\Downloader;
4
5
use VDB\Spider\Downloader\DownloaderInterface;
6
use Symfony\Component\EventDispatcher\Event;
7
use Symfony\Component\EventDispatcher\EventDispatcher;
8
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
9
use Symfony\Component\EventDispatcher\GenericEvent;
10
use VDB\Spider\Event\SpiderEvents;
11
use VDB\Spider\PersistenceHandler\MemoryPersistenceHandler;
12
use VDB\Spider\PersistenceHandler\PersistenceHandlerInterface;
13
use VDB\Spider\RequestHandler\GuzzleRequestHandler;
14
use VDB\Spider\RequestHandler\RequestHandlerInterface;
15
use VDB\Spider\Filter\PostFetchFilterInterface;
16
use VDB\Spider\Resource;
17
use VDB\Spider\Uri\DiscoveredUri;
18
19
class Downloader implements DownloaderInterface
20
{
21
    /** @var EventDispatcherInterface */
22
    private $dispatcher;
23
24
    /** @var PersistenceHandlerInterface */
25
    private $persistenceHandler;
26
27
    /** @var RequestHandlerInterface */
28
    private $requestHandler;
29
30
    /** @var int the maximum number of downloaded resources. 0 means no limit */
31
    public $downloadLimit = 0;
32
33
    /** @var PostFetchFilterInterface[] */
34
    private $postFetchFilters = array();
35
36
    /**
37
     * @param PostFetchFilterInterface $filter
38
     */
39
    public function addPostFetchFilter(PostFetchFilterInterface $filter)
40
    {
41
        $this->postFetchFilters[] = $filter;
42
    }
43
44
    /**
45
     * @return false|Resource
46
     */
47
    public function download(DiscoveredUri $uri)
48
    {
49
        // Fetch the document
50
        if (!$resource = $this->fetchResource($uri)) {
51
            return false;
52
        }
53
54
        $this->getPersistenceHandler()->persist($resource);
55
56
        return $resource;
57
    }
58
59
    public function isDownLoadLimitExceeded()
60
    {
61
        return $this->downloadLimit !== 0 && $this->getPersistenceHandler()->count() >= $this->downloadLimit;
62
    }
63
64
    /**
65
     * A shortcut for EventDispatcher::dispatch()
66
     *
67
     * @param string $eventName
68
     * @param null|Event $event
69
     */
70
    private function dispatch($eventName, Event $event = null)
71
    {
72
        $this->getDispatcher()->dispatch($eventName, $event);
73
    }
74
75
    /**
76
     * @param EventDispatcherInterface $eventDispatcher
77
     * @return $this
78
     */
79
    public function setDispatcher(EventDispatcherInterface $eventDispatcher)
80
    {
81
        $this->dispatcher = $eventDispatcher;
82
83
        return $this;
84
    }
85
86
    /**
87
     * @return EventDispatcherInterface
88
     */
89
    public function getDispatcher()
90
    {
91
        if (!$this->dispatcher) {
92
            $this->dispatcher = new EventDispatcher();
93
        }
94
        return $this->dispatcher;
95
    }
96
97
98
    /**
99
     * @param UriInterface $uri
0 ignored issues
show
Documentation introduced by
Should the type for parameter $uri not be DiscoveredUri?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
100
     * @return Resource|false
101
     */
102
    protected function fetchResource(DiscoveredUri $uri)
103
    {
104
        $this->dispatch(SpiderEvents::SPIDER_CRAWL_PRE_REQUEST, new GenericEvent($this, array('uri' => $uri)));
105
106
        try {
107
            $resource = $this->getRequestHandler()->request($uri);
108
109
            $this->dispatch(SpiderEvents::SPIDER_CRAWL_POST_REQUEST, new GenericEvent($this, array('uri' => $uri))); // necessary until we have 'finally'
110
111
            if ($this->matchesPostfetchFilter($resource)) {
112
                return false;
113
            }
114
115
            return $resource;
116
        } catch (\Exception $e) {
117
            $this->dispatch(
118
                SpiderEvents::SPIDER_CRAWL_ERROR_REQUEST,
119
                new GenericEvent($this, array('uri' => $uri, 'message' => $e->getMessage()))
120
            );
121
122
            $this->dispatch(SpiderEvents::SPIDER_CRAWL_POST_REQUEST, new GenericEvent($this, array('uri' => $uri))); // necessary until we have 'finally'
123
124
            return false;
125
        }
126
    }
127
128
    /**
129
     * @param Resource $resource
130
     * @return bool
131
     */
132
    private function matchesPostfetchFilter(Resource $resource)
133
    {
134
        foreach ($this->postFetchFilters as $filter) {
135
            if ($filter->match($resource)) {
136
                $this->dispatch(
137
                    SpiderEvents::SPIDER_CRAWL_FILTER_POSTFETCH,
138
                    new GenericEvent($this, array('uri' => $resource->getUri()))
139
                );
140
                return true;
141
            }
142
        }
143
        return false;
144
    }
145
146
    /**
147
     * @param PersistenceHandlerInterface $persistenceHandler
148
     */
149
    public function setPersistenceHandler(PersistenceHandlerInterface $persistenceHandler)
150
    {
151
        $this->persistenceHandler = $persistenceHandler;
152
    }
153
154
    /**
155
     * @return PersistenceHandlerInterface
156
     */
157
    public function getPersistenceHandler()
158
    {
159
        if (!$this->persistenceHandler) {
160
            $this->persistenceHandler = new MemoryPersistenceHandler();
161
        }
162
163
        return $this->persistenceHandler;
164
    }
165
166
    /**
167
     * @param RequestHandlerInterface $requestHandler
168
     */
169
    public function setRequestHandler(RequestHandlerInterface $requestHandler)
170
    {
171
        $this->requestHandler = $requestHandler;
172
    }
173
174
    /**
175
     * @return RequestHandlerInterface
176
     */
177
    public function getRequestHandler()
178
    {
179
        if (!$this->requestHandler) {
180
            $this->requestHandler = new GuzzleRequestHandler();
181
        }
182
183
        return $this->requestHandler;
184
    }
185
186
}
187