Completed
Push — master ( 847f6a...e930d8 )
by Freek
01:38
created

CollectionCrawlQueue::getUrls()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
1
<?php
2
3
namespace Spatie\Crawler\CrawlQueue;
4
5
use Spatie\Crawler\CrawlUrl;
6
use Spatie\Crawler\Exception\UrlNotFoundByIndex;
7
8
class CollectionCrawlQueue implements CrawlQueue
9
{
10
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
11
    protected $urls;
12
13
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
14
    protected $pendingUrls;
15
16
    public function __construct()
17
    {
18
        $this->urls = collect();
19
20
        $this->pendingUrls = collect();
21
    }
22
23
    public function getUrls()
24
    {
25
        return $this->urls;
26
    }
27
28
    public function getPendingUrls()
29
    {
30
        return $this->pendingUrls;
31
    }
32
33
    public function add(CrawlUrl $url): CrawlQueue
34
    {
35
        if ($this->has($url)) {
36
            return $this;
37
        }
38
39
        $this->urls->push($url);
40
41
        $url->setId($this->urls->keys()->last());
42
        $this->pendingUrls->push($url);
43
44
        return $this;
45
    }
46
47
    public function hasPendingUrls(): bool
48
    {
49
        return (bool) $this->pendingUrls->count();
50
    }
51
52
    /**
53
     * @param mixed $id
54
     *
55
     * @return \Spatie\Crawler\CrawlUrl|null
56
     */
57
    public function getUrlById($id): CrawlUrl
58
    {
59
        if (! isset($this->urls->values()[$id])) {
60
            throw new UrlNotFoundByIndex("#{$id} crawl url not found in collection");
61
        }
62
63
        return $this->urls->values()[$id];
64
    }
65
66
    public function hasAlreadyBeenProcessed(CrawlUrl $url): bool
67
    {
68
        return ! $this->contains($this->pendingUrls, $url) && $this->contains($this->urls, $url);
69
    }
70
71
    public function markAsProcessed(CrawlUrl $crawlUrl)
72
    {
73
        $this->pendingUrls = $this->pendingUrls
74
            ->reject(function (CrawlUrl $crawlUrlItem) use ($crawlUrl) {
75
                return (string) $crawlUrlItem->url === (string) $crawlUrl->url;
76
            });
77
    }
78
79
    /**
80
     * @param CrawlUrl|\Psr\Http\Message\UriInterface|string $crawlUrl
81
     *
82
     * @return bool
83
     */
84
    public function has($crawlUrl): bool
85
    {
86
        if (! $crawlUrl instanceof CrawlUrl) {
87
            $crawlUrl = CrawlUrl::create($crawlUrl);
0 ignored issues
show
Bug introduced by
It seems like $crawlUrl defined by \Spatie\Crawler\CrawlUrl::create($crawlUrl) on line 87 can also be of type string; however, Spatie\Crawler\CrawlUrl::create() does only seem to accept object<Psr\Http\Message\UriInterface>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
88
        }
89
90
        if ($this->contains($this->urls, $crawlUrl)) {
91
            return true;
92
        }
93
94
        return false;
95
    }
96
97
    /** @return \Spatie\Crawler\CrawlUrl|null */
98
    public function getFirstPendingUrl()
99
    {
100
        return $this->pendingUrls->first();
101
    }
102
103
    /**
104
     * @param \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection $collection
105
     * @param \Spatie\Crawler\CrawlUrl                                             $searchCrawlUrl
106
     *
107
     * @return bool
108
     */
109
    protected function contains($collection, CrawlUrl $searchCrawlUrl): bool
110
    {
111
        foreach ($collection as $crawlUrl) {
112
            if ((string) $crawlUrl->url === (string) $searchCrawlUrl->url) {
113
                return true;
114
            }
115
        }
116
117
        return false;
118
    }
119
}
120