Completed
Pull Request — master (#244)
by Benjamin
01:37
created

CollectionCrawlQueue::getFirstPendingUrl()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
namespace Spatie\Crawler\CrawlQueue;
4
5
use Spatie\Crawler\CrawlUrl;
6
use Spatie\Crawler\Exception\UrlNotFound;
7
8
class CollectionCrawlQueue implements CrawlQueue
9
{
10
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
11
    protected $urls;
12
13
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
14
    protected $pendingUrls;
15
16
    public function __construct()
17
    {
18
        $this->urls = collect();
19
20
        $this->pendingUrls = collect();
21
    }
22
23
    public function getUrls()
24
    {
25
        return $this->urls;
26
    }
27
28
    public function getPendingUrls()
29
    {
30
        return $this->pendingUrls;
31
    }
32
33
    public function add(CrawlUrl $url): CrawlQueue
34
    {
35
        if ($this->has($url)) {
36
            return $this;
37
        }
38
39
        $this->urls[(string) $url->url] = $url;
40
        $this->pendingUrls->push($url);
41
42
        return $this;
43
    }
44
45
    public function hasPendingUrls(): bool
46
    {
47
        return (bool) $this->pendingUrls->count();
48
    }
49
50
    /**
51
     * @param string $url
52
     *
53
     * @return \Spatie\Crawler\CrawlUrl
54
     */
55
    public function get(string $url): CrawlUrl
56
    {
57
        if (! isset($this->urls[$url])) {
58
            throw new UrlNotFound("Crawl url $url not found in collection");
59
        }
60
61
        return $this->urls[$url];
62
    }
63
64
    public function hasAlreadyBeenProcessed(CrawlUrl $url): bool
65
    {
66
        return ! $this->contains($this->pendingUrls, $url) && $this->contains($this->urls, $url);
67
    }
68
69
    public function markAsProcessed(CrawlUrl $crawlUrl)
70
    {
71
        $this->pendingUrls = $this->pendingUrls
72
            ->reject(function (CrawlUrl $crawlUrlItem) use ($crawlUrl) {
73
                return (string) $crawlUrlItem->url === (string) $crawlUrl->url;
74
            });
75
    }
76
77
    /**
78
     * @param CrawlUrl|\Psr\Http\Message\UriInterface $crawlUrl
79
     *
80
     * @return bool
81
     */
82
    public function has($crawlUrl): bool
83
    {
84
        if (! $crawlUrl instanceof CrawlUrl) {
85
            $crawlUrl = CrawlUrl::create($crawlUrl);
86
        }
87
88
        if ($this->contains($this->urls, $crawlUrl)) {
89
            return true;
90
        }
91
92
        return false;
93
    }
94
95
    /** @return \Spatie\Crawler\CrawlUrl|null */
96
    public function getFirstPendingUrl()
97
    {
98
        return $this->pendingUrls->first();
99
    }
100
101
    /**
102
     * @param \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection $collection
103
     * @param \Spatie\Crawler\CrawlUrl                                             $searchCrawlUrl
104
     *
105
     * @return bool
106
     */
107
    protected function contains($collection, CrawlUrl $searchCrawlUrl): bool
108
    {
109
        foreach ($collection as $crawlUrl) {
110
            if ((string) $crawlUrl->url === (string) $searchCrawlUrl->url) {
111
                return true;
112
            }
113
        }
114
115
        return false;
116
    }
117
}
118