Completed
Pull Request — master (#237)
by Benjamin
01:16
created

CollectionCrawlQueue::retry()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 9.8666
c 0
b 0
f 0
cc 3
nc 3
nop 1
1
<?php
2
3
namespace Spatie\Crawler\CrawlQueue;
4
5
use Spatie\Crawler\CrawlUrl;
6
use Spatie\Crawler\Exception\UrlNotFoundByIndex;
7
8
class CollectionCrawlQueue implements RetryableCrawlQueue
9
{
10
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
11
    protected $urls;
12
13
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
14
    protected $pendingUrls;
15
16
    public function __construct()
17
    {
18
        $this->urls = collect();
19
20
        $this->pendingUrls = collect();
21
    }
22
23
    public function getUrls()
24
    {
25
        return $this->urls;
26
    }
27
28
    public function getPendingUrls()
29
    {
30
        return $this->pendingUrls;
31
    }
32
33
    public function add(CrawlUrl $url): CrawlQueue
34
    {
35
        if ($this->has($url)) {
36
            return $this;
37
        }
38
39
        $this->urls->push($url);
40
41
        $url->setId($this->urls->keys()->last());
42
        $this->pendingUrls->push($url);
43
44
        return $this;
45
    }
46
47
    public function hasPendingUrls(): bool
48
    {
49
        return (bool) $this->pendingUrls->count();
50
    }
51
52
    /**
53
     * @param mixed $id
54
     *
55
     * @return \Spatie\Crawler\CrawlUrl|null
56
     */
57
    public function getUrlById($id): CrawlUrl
58
    {
59
        if (! isset($this->urls->values()[$id])) {
60
            throw new UrlNotFoundByIndex("#{$id} crawl url not found in collection");
61
        }
62
63
        return $this->urls->values()[$id];
64
    }
65
66
    public function hasAlreadyBeenProcessed(CrawlUrl $url): bool
67
    {
68
        return ! $this->contains($this->pendingUrls, $url) && $this->contains($this->urls, $url);
69
    }
70
71
    public function markAsProcessed(CrawlUrl $crawlUrl)
72
    {
73
        $this->pendingUrls = $this->pendingUrls
74
            ->reject(function (CrawlUrl $crawlUrlItem) use ($crawlUrl) {
75
                return (string) $crawlUrlItem->url === (string) $crawlUrl->url;
76
            });
77
    }
78
79
    /**
80
     * @param CrawlUrl|\Psr\Http\Message\UriInterface $crawlUrl
81
     *
82
     * @return bool
83
     */
84
    public function has($crawlUrl): bool
85
    {
86
        if (! $crawlUrl instanceof CrawlUrl) {
87
            $crawlUrl = CrawlUrl::create($crawlUrl);
88
        }
89
90
        if ($this->contains($this->urls, $crawlUrl)) {
91
            return true;
92
        }
93
94
        return false;
95
    }
96
97
    /** @return \Spatie\Crawler\CrawlUrl|null */
98
    public function getFirstPendingUrl()
99
    {
100
        return $this->pendingUrls->first();
101
    }
102
103
    /**
104
     * @param \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection $collection
105
     * @param \Spatie\Crawler\CrawlUrl                                             $searchCrawlUrl
106
     *
107
     * @return bool
108
     */
109
    protected function contains($collection, CrawlUrl $searchCrawlUrl): bool
110
    {
111
        foreach ($collection as $crawlUrl) {
112
            if ((string) $crawlUrl->url === (string) $searchCrawlUrl->url) {
113
                return true;
114
            }
115
        }
116
117
        return false;
118
    }
119
120
    /**
121
     * @param CrawlUrl $crawlUrl
122
     *
123
     * @return void
124
     */
125
    public function retry(CrawlUrl $crawlUrl)
126
    {
127
        if (! $this->contains($this->urls, $crawlUrl)) {
128
            return;
129
        }
130
131
        if ($this->contains($this->pendingUrls, $crawlUrl)) {
132
            return;
133
        }
134
135
        $this->pendingUrls->push($crawlUrl);
136
    }
137
}
138