ArrayCrawlQueue   A
last analyzed

Complexity

Total Complexity 14

Size/Duplication

Total Lines 93
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 3

Importance

Changes 0
Metric Value
wmc 14
lcom 1
cbo 3
dl 0
loc 93
rs 10
c 0
b 0
f 0

7 Methods

Rating   Name   Duplication   Size   Complexity  
A getFirstPendingUrl() 0 8 2
A add() 0 13 2
A hasPendingUrls() 0 4 1
A getUrlById() 0 8 2
A hasAlreadyBeenProcessed() 0 14 3
A markAsProcessed() 0 6 1
A has() 0 12 3
1
<?php
2
3
namespace Spatie\Crawler\CrawlQueue;
4
5
use Psr\Http\Message\UriInterface;
6
use Spatie\Crawler\CrawlUrl;
7
use Spatie\Crawler\Exception\InvalidUrl;
8
use Spatie\Crawler\Exception\UrlNotFoundByIndex;
9
10
class ArrayCrawlQueue implements CrawlQueue
11
{
12
    /**
13
     * All known URLs, indexed by URL string.
14
     *
15
     * @var CrawlUrl[]
16
     */
17
    protected $urls = [];
18
19
    /**
20
     * Pending URLs, indexed by URL string.
21
     *
22
     * @var CrawlUrl[]
23
     */
24
    protected $pendingUrls = [];
25
26
    public function add(CrawlUrl $url) : CrawlQueue
27
    {
28
        $urlString = (string) $url->url;
29
30
        if (! isset($this->urls[$urlString])) {
31
            $url->setId($urlString);
32
33
            $this->urls[$urlString] = $url;
34
            $this->pendingUrls[$urlString] = $url;
35
        }
36
37
        return $this;
38
    }
39
40
    public function hasPendingUrls() : bool
41
    {
42
        return (bool) $this->pendingUrls;
43
    }
44
45
    public function getUrlById($id) : CrawlUrl
46
    {
47
        if (! isset($this->urls[$id])) {
48
            throw new UrlNotFoundByIndex("Crawl url {$id} not found in collection.");
49
        }
50
51
        return $this->urls[$id];
52
    }
53
54
    public function hasAlreadyBeenProcessed(CrawlUrl $url) : bool
55
    {
56
        $url = (string) $url->url;
57
58
        if (isset($this->pendingUrls[$url])) {
59
            return false;
60
        }
61
62
        if (isset($this->urls[$url])) {
63
            return true;
64
        }
65
66
        return false;
67
    }
68
69
    public function markAsProcessed(CrawlUrl $crawlUrl)
70
    {
71
        $url = (string) $crawlUrl->url;
72
73
        unset($this->pendingUrls[$url]);
74
    }
75
76
    /**
77
     * @param CrawlUrl|UriInterface $crawlUrl
78
     *
79
     * @return bool
80
     */
81
    public function has($crawlUrl) : bool
82
    {
83
        if ($crawlUrl instanceof CrawlUrl) {
84
            $url = (string) $crawlUrl->url;
85
        } elseif ($crawlUrl instanceof UriInterface) {
86
            $url = (string) $crawlUrl;
87
        } else {
88
            throw InvalidUrl::unexpectedType($crawlUrl);
89
        }
90
91
        return isset($this->urls[$url]);
92
    }
93
94
    public function getFirstPendingUrl() : ?CrawlUrl
95
    {
96
        foreach ($this->pendingUrls as $pendingUrl) {
97
            return $pendingUrl;
98
        }
99
100
        return null;
101
    }
102
}
103