DiscovererSet::filterAlreadySeen()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 3
nc 3
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace VDB\Spider\Discoverer;
4
5
use VDB\Spider\Resource;
6
use VDB\Spider\Filter\PreFetchFilterInterface;
7
use VDB\Spider\Uri\DiscoveredUri;
8
9
class DiscovererSet
10
{
11
    /**
12
     * @var Discoverer[]
13
     */
14
    private $discoverers = array();
15
16
    /** @var Filter[] */
17
    private $filters = array();
18
19
    /**
20
     * @var int maximum crawl depth
21
     */
22
    public $maxDepth = 3;
23
24
    /**
25
     * @var array the list of already visited URIs with the depth they were discovered on as value
26
     */
27
    private $alreadySeenUris = array();
28
29
    public function __construct(array $discoverers = array())
30
    {
31
        foreach ($discoverers as $alias => $discoverer) {
32
            $this->set($discoverer, is_int($alias) ? null : $alias);
33
        }
34
    }
35
36
    /**
37
     * @param DiscoveredUri $uri
38
     *
39
     * Mark an Uri as already seen.
40
     *
41
     * If it already exists, it is not overwritten, since we want to keep the
42
     * first depth it was found at.
43
     */
44
    private function markSeen(DiscoveredUri $uri)
45
    {
46
        $uriString = $uri->normalize()->toString();
47
        if (!array_key_exists($uriString, $this->alreadySeenUris)) {
48
            $this->alreadySeenUris[$uriString] = $uri->getDepthFound();
49
        }
50
    }
51
52
    /**
53
     * @return bool Returns true if this URI was found at max depth
54
     */
55
    private function isAtMaxDepth(DiscoveredUri $uri)
56
    {
57
        return $uri->getDepthFound() === $this->maxDepth;
58
    }
59
60
    /**
61
     * @param Resource $resource
62
     * @return UriInterface[]
63
     */
64
    public function discover(Resource $resource)
65
    {
66
        $this->markSeen($resource->getUri());
67
68
        if ($this->isAtMaxDepth($resource->getUri())) {
69
            return [];
70
        }
71
72
        $discoveredUris = [];
73
74
        foreach ($this->discoverers as $discoverer) {
75
            $discoveredUris = array_merge($discoveredUris, $discoverer->discover($resource));
76
        }
77
78
        $this->normalize($discoveredUris);
79
        $this->removeDuplicates($discoveredUris);
80
        $this->filterAlreadySeen($discoveredUris);
81
        $this->filter($discoveredUris);
82
83
        foreach ($discoveredUris as $uri) {
84
            $uri->setDepthFound($resource->getUri()->getDepthFound() + 1);
85
            $this->markSeen($uri);
86
        }
87
88
        return $discoveredUris;
89
    }
90
91
    /**
92
     * Sets a discoverer.
93
     *
94
     * @param discovererInterface $discoverer The discoverer instance
95
     * @param string|null         $alias  An alias
96
     */
97
    public function set(DiscovererInterface $discoverer, $alias = null)
98
    {
99
        $this->discoverers[$discoverer->getName()] = $discoverer;
100
        if (null !== $alias) {
101
            $this->discoverers[$alias] = $discoverer;
102
        }
103
    }
104
105
    /**
106
     * @param PreFetchFilterInterface $filter
107
     */
108
    public function addFilter(PreFetchFilterInterface $filter)
109
    {
110
        $this->filters[] = $filter;
111
    }
112
113
    /**
114
     * @param UriInterface[] $discoveredUris
115
     */
116
    private function normalize(array &$discoveredUris)
117
    {
118
        foreach ($discoveredUris as &$uri) {
119
            $uri->normalize();
120
        }
121
    }
122
123
    /**
124
     * @param UriInterface[] $discoveredUris
125
     */
126
    private function filterAlreadySeen(array &$discoveredUris)
127
    {
128
        foreach ($discoveredUris as $k => &$uri) {
129
            if (array_key_exists($uri->toString(), $this->alreadySeenUris)) {
130
                unset($discoveredUris[$k]);
131
            }
132
        }
133
    }
134
135
    /**
136
     * @param UriInterface[] $discoveredUris
137
     */
138
    private function filter(array &$discoveredUris)
139
    {
140
        foreach ($discoveredUris as $k => &$uri) {
141
            foreach ($this->filters as $filter) {
142
                if ($filter->match($uri)) {
143
                    unset($discoveredUris[$k]);
144
                }
145
            }
146
        }
147
    }
148
149
    /**
150
     * @param UriInterface[] $discoveredUris
151
     */
152
    private function removeDuplicates(array &$discoveredUris)
153
    {
154
        // make sure there are no duplicates in the list
155
        $tmp = array();
156
        /** @var Uri $uri */
157
        foreach ($discoveredUris as $k => $uri) {
158
            $tmp[$k] = $uri->toString();
159
        }
160
161
        // Find duplicates in temporary array
162
        $tmp = array_unique($tmp);
163
164
        // Remove the duplicates from original array
165
        foreach ($discoveredUris as $k => $uri) {
166
            if (!array_key_exists($k, $tmp)) {
167
                unset($discoveredUris[$k]);
168
            }
169
        }
170
    }
171
}
172