Completed
Push — master ( c5a351...6e1b6f )
by Freek
01:19
created

ArrayCrawlQueue::has()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 17
rs 9.7
c 0
b 0
f 0
cc 4
nc 3
nop 1
1
<?php
2
3
namespace Spatie\Crawler\CrawlQueue;
4
5
use TypeError;
6
use Spatie\Crawler\CrawlUrl;
7
use Psr\Http\Message\UriInterface;
8
use Spatie\Crawler\Exception\UrlNotFoundByIndex;
9
10
/**
11
 * Crawl queue implemented with arrays.
12
 */
13
class ArrayCrawlQueue implements CrawlQueue
14
{
15
    /**
16
     * All known URLs, indexed by URL string.
17
     *
18
     * @var CrawlUrl[]
19
     */
20
    protected $urls = [];
21
22
    /**
23
     * Pending URLs, indexed by URL string.
24
     *
25
     * @var CrawlUrl[]
26
     */
27
    protected $pendingUrls = [];
28
29
    public function add(CrawlUrl $url) : CrawlQueue
30
    {
31
        $urlString = (string) $url->url;
32
33
        if (! isset($this->urls[$urlString])) {
34
            $url->setId($urlString);
35
36
            $this->urls[$urlString] = $url;
37
            $this->pendingUrls[$urlString] = $url;
38
        }
39
40
        return $this;
41
    }
42
43
    public function hasPendingUrls() : bool
44
    {
45
        return (bool) $this->pendingUrls;
46
    }
47
48
    public function getUrlById($id) : CrawlUrl
49
    {
50
        if (! isset($this->urls[$id])) {
51
            throw new UrlNotFoundByIndex("Crawl url $id not found in collection.");
52
        }
53
54
        return $this->urls[$id];
55
    }
56
57
    public function hasAlreadyBeenProcessed(CrawlUrl $url) : bool
58
    {
59
        $url = (string) $url->url;
60
61
        return ! isset($this->pendingUrls[$url]) && isset($this->pendingUrls[$url]);
62
    }
63
64
    public function markAsProcessed(CrawlUrl $crawlUrl)
65
    {
66
        $url = (string) $crawlUrl->url;
67
68
        unset($this->pendingUrls[$url]);
69
    }
70
71
    /**
72
     * @param CrawlUrl|UriInterface $crawlUrl
73
     *
74
     * @return bool
75
     */
76
    public function has($crawlUrl) : bool
77
    {
78
        if ($crawlUrl instanceof CrawlUrl) {
79
            $url = (string) $crawlUrl->url;
80
        } elseif ($crawlUrl instanceof UriInterface) {
81
            $url = (string) $crawlUrl;
82
        } else {
83
            throw new TypeError(sprintf(
0 ignored issues
show
Unused Code introduced by
The call to TypeError::__construct() has too many arguments starting with sprintf('Expected %s or ...) : gettype($crawlUrl)).

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
84
                'Expected %s or %s, got %s.',
85
                CrawlUrl::class,
86
                UriInterface::class,
87
                is_object($crawlUrl) ? get_class($crawlUrl) : gettype($crawlUrl)
88
            ));
89
        }
90
91
        return isset($this->urls[$url]);
92
    }
93
94
    public function getFirstPendingUrl() : ?CrawlUrl
95
    {
96
        foreach ($this->pendingUrls as $pendingUrl) {
97
            return $pendingUrl;
98
        }
99
100
        return null;
101
    }
102
}
103