Completed
Pull Request — master (#16)
by Matthijs
05:13
created

DiscovererSet::isAtMaxDepth()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 7
rs 9.4286
cc 2
eloc 4
nc 2
nop 1
1
<?php
2
3
namespace VDB\Spider\Discoverer;
4
5
use VDB\Spider\Resource;
6
use VDB\Spider\Filter\PreFetchFilterInterface;
7
use VDB\Spider\Uri\DiscoveredUri;
8
9
class DiscovererSet implements \IteratorAggregate
10
{
11
    /**
12
     * @var Discoverer[]
13
     */
14
    private $discoverers = array();
15
16
    /** @var Filter[] */
17
    private $filters = array();
18
19
    /**
20
     * @var int maximum crawl depth
21
     */
22
    public $maxDepth = 3;
23
24
    /**
25
     * @var array the list of already visited URIs with the depth they were discovered on as value
26
     */
27
    private $alreadySeenUris = array();
28
29
    public function __construct(array $discoverers = array())
30
    {
31
        foreach ($discoverers as $alias => $discoverer) {
32
            $this->set($discoverer, is_int($alias) ? null : $alias);
33
        }
34
    }
35
36
    /**
37
     * @param DiscoveredUri $uri
38
     *
39
     * Mark an Uri as already seen.
40
     *
41
     * If it already exists, it is not overwritten, since we want to keep the
42
     * first depth it was found at.
43
     */
44
    private function markSeen(DiscoveredUri $uri)
45
    {
46
        $uriString = $uri->normalize()->toString();
47
        if (!array_key_exists($uriString, $this->alreadySeenUris)) {
48
            $this->alreadySeenUris[$uriString] = $uri->getDepthFound();
49
        }
50
    }
51
52
    /**
53
     * @return bool Returns true if this URI was found at max depth
54
     */
55
    private function isAtMaxDepth(DiscoveredUri $uri)
56
    {
57
        if ($uri->getDepthFound() === $this->maxDepth) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $uri->getDepthFound() === $this->maxDepth;.
Loading history...
58
            return true;
59
        }
60
        return false;
61
    }
62
63
    /**
64
     * @param Resource $resource
65
     * @return UriInterface[]
66
     */
67
    public function discover(Resource $resource)
68
    {
69
        $this->markSeen($resource->getUri());
0 ignored issues
show
Compatibility introduced by
$resource->getUri() of type object<VDB\Uri\UriInterface> is not a sub-type of object<VDB\Spider\Uri\DiscoveredUri>. It seems like you assume a concrete implementation of the interface VDB\Uri\UriInterface to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
70
71
        if ($this->isAtMaxDepth($resource->getUri())) {
0 ignored issues
show
Compatibility introduced by
$resource->getUri() of type object<VDB\Uri\UriInterface> is not a sub-type of object<VDB\Spider\Uri\DiscoveredUri>. It seems like you assume a concrete implementation of the interface VDB\Uri\UriInterface to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
72
            return [];
73
        }
74
75
        $discoveredUris = [];
76
77
        foreach ($this->discoverers as $discoverer) {
78
            $discoveredUris = array_merge($discoveredUris, $discoverer->discover($resource));
79
        }
80
81
        $this->normalize($discoveredUris);
82
        $this->removeDuplicates($discoveredUris);
83
        $this->filterAlreadySeen($discoveredUris);
84
        $this->filter($discoveredUris);
85
86
        foreach ($discoveredUris as $uri) {
87
            $uri->setDepthFound($resource->getUri()->getDepthFound() + 1);
1 ignored issue
show
Bug introduced by
It seems like you code against a concrete implementation and not the interface VDB\Uri\UriInterface as the method getDepthFound() does only exist in the following implementations of said interface: VDB\Spider\Uri\DiscoveredUri.

Let’s take a look at an example:

interface User
{
    /** @return string */
    public function getPassword();
}

class MyUser implements User
{
    public function getPassword()
    {
        // return something
    }

    public function getDisplayName()
    {
        // return some name.
    }
}

class AuthSystem
{
    public function authenticate(User $user)
    {
        $this->logger->info(sprintf('Authenticating %s.', $user->getDisplayName()));
        // do something.
    }
}

In the above example, the authenticate() method works fine as long as you just pass instances of MyUser. However, if you now also want to pass a different implementation of User which does not have a getDisplayName() method, the code will break.

Available Fixes

  1. Change the type-hint for the parameter:

    class AuthSystem
    {
        public function authenticate(MyUser $user) { /* ... */ }
    }
    
  2. Add an additional type-check:

    class AuthSystem
    {
        public function authenticate(User $user)
        {
            if ($user instanceof MyUser) {
                $this->logger->info(/** ... */);
            }
    
            // or alternatively
            if ( ! $user instanceof MyUser) {
                throw new \LogicException(
                    '$user must be an instance of MyUser, '
                   .'other instances are not supported.'
                );
            }
    
        }
    }
    
Note: PHP Analyzer uses reverse abstract interpretation to narrow down the types inside the if block in such a case.
  1. Add the method to the interface:

    interface User
    {
        /** @return string */
        public function getPassword();
    
        /** @return string */
        public function getDisplayName();
    }
    
Loading history...
88
            $this->markSeen($uri);
89
        }
90
91
        return $discoveredUris;
92
    }
93
94
    /**
95
     * Sets a discoverer.
96
     *
97
     * @param discovererInterface $discoverer The discoverer instance
98
     * @param string          $alias  An alias
0 ignored issues
show
Documentation introduced by
Should the type for parameter $alias not be string|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
99
     */
100
    public function set(DiscovererInterface $discoverer, $alias = null)
101
    {
102
        $this->discoverers[$discoverer->getName()] = $discoverer;
103
        if (null !== $alias) {
104
            $this->discoverers[$alias] = $discoverer;
105
        }
106
107
        $discoverer->setDiscovererSet($this);
108
    }
109
110
    /**
111
     * @param PreFetchFilterInterface $filter
112
     */
113
    public function addFilter(PreFetchFilterInterface $filter)
114
    {
115
        $this->filters[] = $filter;
116
    }
117
118
    /**
119
     * Returns true if the discoverer is defined.
120
     *
121
     * @param string $name The discoverer name
122
     *
123
     * @return bool true if the discoverer is defined, false otherwise
124
     */
125
    public function has($name)
126
    {
127
        return isset($this->discoverers[$name]);
128
    }
129
130
    /**
131
     * Gets a discoverer.
132
     *
133
     * @param string $name The discoverer name
134
     *
135
     * @return Discoverer The discoverer instance
136
     *
137
     * @throws InvalidArgumentException if the discoverer is not defined
138
     */
139
    public function get($name)
140
    {
141
        if (!$this->has($name)) {
142
            throw new \InvalidArgumentException(sprintf('The discoverer "%s" is not defined.', $name));
143
        }
144
145
        return $this->discoverers[$name];
146
    }
147
148
    public function getIterator()
149
    {
150
        return new \ArrayIterator($this->discoverers);
151
    }
152
153
    /**
154
     * @param UriInterface[] $discoveredUris
155
     */
156
    private function normalize(array &$discoveredUris)
157
    {
158
        foreach ($discoveredUris as &$uri) {
159
            $uri->normalize();
160
        }
161
    }
162
163
    /**
164
     * @param UriInterface[] $discoveredUris
165
     */
166
    private function filterAlreadySeen(array &$discoveredUris)
167
    {
168
        foreach ($discoveredUris as $k => &$uri) {
169
            if (array_key_exists($uri->toString(), $this->alreadySeenUris)) {
170
                unset($discoveredUris[$k]);
171
            }
172
        }
173
    }
174
175
    /**
176
     * @param UriInterface[] $discoveredUris
177
     */
178
    private function filter(array &$discoveredUris)
179
    {
180
        foreach ($discoveredUris as $k => &$uri) {
181
            foreach ($this->filters as $filter) {
182
                if ($filter->match($uri)) {
183
                    unset($discoveredUris[$k]);
184
                }
185
            }
186
        }
187
    }
188
189
    /**
190
     * @param UriInterface[] $discoveredUris
191
     */
192
    private function removeDuplicates(array &$discoveredUris)
193
    {
194
        // make sure there are no duplicates in the list
195
        $tmp = array();
196
        /** @var Uri $uri */
197
        foreach ($discoveredUris as $k => $uri) {
198
            $tmp[$k] = $uri->toString();
199
        }
200
201
        // Find duplicates in temporary array
202
        $tmp = array_unique($tmp);
203
204
        // Remove the duplicates from original array
205
        foreach ($discoveredUris as $k => $uri) {
206
            if (!array_key_exists($k, $tmp)) {
207
                unset($discoveredUris[$k]);
208
            }
209
        }
210
    }
211
}
212