1 | <?php |
||
2 | /** |
||
3 | * @author Matthijs van den Bos <[email protected]> |
||
4 | * @copyright 2013 Matthijs van den Bos |
||
5 | */ |
||
6 | |||
7 | namespace VDB\Spider\QueueManager; |
||
8 | |||
9 | use VDB\Spider\Uri\DiscoveredUri; |
||
10 | use VDB\Spider\Exception\QueueException; |
||
11 | use Symfony\Component\EventDispatcher\Event; |
||
12 | use Symfony\Component\EventDispatcher\EventDispatcher; |
||
13 | use Symfony\Component\EventDispatcher\EventDispatcherInterface; |
||
14 | use Symfony\Component\EventDispatcher\GenericEvent; |
||
15 | use VDB\Spider\Event\SpiderEvents; |
||
16 | |||
17 | class InMemoryQueueManager implements QueueManagerInterface |
||
18 | { |
||
19 | /** @var int The maximum size of the process queue for this spider. 0 means infinite */ |
||
20 | public $maxQueueSize = 0; |
||
21 | |||
22 | /** @var int the amount of times a Resource was enqueued */ |
||
23 | private $currentQueueSize = 0; |
||
24 | |||
25 | /** @var DiscoveredUri[] the list of URIs to process */ |
||
26 | private $traversalQueue = array(); |
||
27 | |||
28 | /** @var int The traversal algorithm to use. Choose from the class constants |
||
29 | */ |
||
30 | private $traversalAlgorithm = self::ALGORITHM_DEPTH_FIRST; |
||
31 | |||
32 | /** @var EventDispatcherInterface */ |
||
33 | private $dispatcher; |
||
34 | |||
35 | /** |
||
36 | * @param int $traversalAlgorithm Choose from the class constants |
||
37 | * TODO: This should be extracted to a Strategy pattern |
||
38 | */ |
||
39 | public function setTraversalAlgorithm($traversalAlgorithm) |
||
40 | { |
||
41 | $this->traversalAlgorithm = $traversalAlgorithm; |
||
42 | } |
||
43 | |||
44 | /** |
||
45 | * @return int |
||
46 | */ |
||
47 | public function getTraversalAlgorithm() |
||
48 | { |
||
49 | return $this->traversalAlgorithm; |
||
50 | } |
||
51 | |||
52 | /** |
||
53 | * @param EventDispatcherInterface $eventDispatcher |
||
54 | * @return $this |
||
55 | */ |
||
56 | public function setDispatcher(EventDispatcherInterface $eventDispatcher) |
||
57 | { |
||
58 | $this->dispatcher = $eventDispatcher; |
||
59 | |||
60 | return $this; |
||
61 | } |
||
62 | |||
63 | /** |
||
64 | * @return EventDispatcherInterface |
||
65 | */ |
||
66 | public function getDispatcher() |
||
67 | { |
||
68 | if (!$this->dispatcher) { |
||
69 | $this->dispatcher = new EventDispatcher(); |
||
70 | } |
||
71 | return $this->dispatcher; |
||
72 | } |
||
73 | |||
74 | /** |
||
75 | * @param DiscoveredUri |
||
76 | */ |
||
77 | public function addUri(DiscoveredUri $uri) |
||
78 | { |
||
79 | if ($this->maxQueueSize != 0 && $this->currentQueueSize >= $this->maxQueueSize) { |
||
80 | throw new QueueException('Maximum Queue Size of ' . $this->maxQueueSize . ' reached'); |
||
81 | } |
||
82 | |||
83 | $this->currentQueueSize++; |
||
84 | array_push($this->traversalQueue, $uri); |
||
85 | |||
86 | $this->getDispatcher()->dispatch( |
||
87 | SpiderEvents::SPIDER_CRAWL_POST_ENQUEUE, |
||
88 | new GenericEvent($this, array('uri' => $uri)) |
||
0 ignored issues
–
show
|
|||
89 | ); |
||
90 | } |
||
91 | |||
92 | public function next() |
||
93 | { |
||
94 | if ($this->traversalAlgorithm === static::ALGORITHM_DEPTH_FIRST) { |
||
95 | return array_pop($this->traversalQueue); |
||
96 | } elseif ($this->traversalAlgorithm === static::ALGORITHM_BREADTH_FIRST) { |
||
97 | return array_shift($this->traversalQueue); |
||
98 | } else { |
||
99 | throw new \LogicException('No search algorithm set'); |
||
100 | } |
||
101 | } |
||
102 | } |
||
103 |
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.
If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.