Completed
Pull Request — master (#177)
by Peter
01:29
created

CollectionCrawlQueue::getFirstPendingUrl()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
namespace Spatie\Crawler\CrawlQueue;
4
5
use Spatie\Crawler\CrawlUrl;
6
use Spatie\Crawler\Exception\UrlNotFoundByIndex;
7
8
class CollectionCrawlQueue implements CrawlQueue
9
{
10
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
11
    protected $urls;
12
13
    /** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
14
    protected $pendingUrls;
15
16
    public function __construct()
17
    {
18
        $this->urls = collect();
19
20
        $this->pendingUrls = collect();
21
    }
22
23
    public function add(CrawlUrl $url): CrawlQueue
24
    {
25
        if ($this->has($url)) {
26
            return $this;
27
        }
28
29
        $this->urls->push($url);
30
31
        $url->setId($this->urls->keys()->last());
32
        $this->pendingUrls->push($url);
33
34
        return $this;
35
    }
36
37
    public function hasPendingUrls(): bool
38
    {
39
        return (bool) $this->pendingUrls->count();
40
    }
41
42
    /**
43
     * @param mixed $id
44
     *
45
     * @return \Spatie\Crawler\CrawlUrl|null
46
     */
47
    public function getUrlById($id): CrawlUrl
48
    {
49
        if (! isset($this->urls->values()[$id])) {
50
            throw new UrlNotFoundByIndex("#{$id} crawl url not found in collection");
51
        }
52
53
        return $this->urls->values()[$id];
54
    }
55
56
    public function hasAlreadyBeenProcessed(CrawlUrl $url): bool
57
    {
58
        return ! $this->contains($this->pendingUrls, $url) && $this->contains($this->urls, $url);
59
    }
60
61
    public function markAsProcessed(CrawlUrl $crawlUrl)
62
    {
63
        $this->pendingUrls = $this->pendingUrls
64
            ->reject(function (CrawlUrl $crawlUrlItem) use ($crawlUrl) {
65
                return (string) $crawlUrlItem->url === (string) $crawlUrl->url;
66
            });
67
    }
68
69
    /**
70
     * @param CrawlUrl|\Psr\Http\Message\UriInterface|string $crawlUrl
71
     *
72
     * @return bool
73
     */
74
    public function has($crawlUrl): bool
75
    {
76
        if (! $crawlUrl instanceof CrawlUrl) {
77
            $crawlUrl = CrawlUrl::create($crawlUrl);
0 ignored issues
show
Bug introduced by
It seems like $crawlUrl defined by \Spatie\Crawler\CrawlUrl::create($crawlUrl) on line 77 can also be of type string; however, Spatie\Crawler\CrawlUrl::create() does only seem to accept object<Psr\Http\Message\UriInterface>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
78
        }
79
80
        if ($this->contains($this->urls, $crawlUrl)) {
81
            return true;
82
        }
83
84
        return false;
85
    }
86
87
    /** @return \Spatie\Crawler\CrawlUrl|null */
88
    public function getFirstPendingUrl()
89
    {
90
        return $this->pendingUrls->first();
91
    }
92
93
    public function count(): int
94
    {
95
        return $this->urls->count();
96
    }
97
98
    /**
99
     * @param \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection $collection
100
     * @param \Spatie\Crawler\CrawlUrl                                             $searchCrawlUrl
101
     *
102
     * @return bool
103
     */
104
    protected function contains($collection, CrawlUrl $searchCrawlUrl): bool
105
    {
106
        foreach ($collection as $crawlUrl) {
107
            if ((string) $crawlUrl->url === (string) $searchCrawlUrl->url) {
108
                return true;
109
            }
110
        }
111
112
        return false;
113
    }
114
}
115