RobotsContext::iShouldBeAbleToCrawl()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 6
nc 1
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
<?php declare(strict_types=1);
2
3
namespace MOrtola\BehatSEOContexts\Context;
4
5
use Behat\Mink\Exception\UnsupportedDriverActionException;
6
use Behat\Symfony2Extension\Driver\KernelDriver;
7
use InvalidArgumentException;
8
use vipnytt\RobotsTxtParser\UriClient;
9
use Webmozart\Assert\Assert;
10
11
class RobotsContext extends BaseContext
12
{
13
    /**
14
     * @var string
15
     */
16
    private $crawlerUserAgent = 'Googlebot';
17
18
    /**
19
     * @Given I am a :crawlerUserAgent crawler
20
     */
21
    public function iAmACrawler(string $crawlerUserAgent): void
22
    {
23
        $this->crawlerUserAgent = $crawlerUserAgent;
24
    }
25
26
    /**
27
     * @Then I should not be able to crawl :resource
28
     */
29
    public function iShouldNotBeAbleToCrawl(string $resource): void
30
    {
31
        Assert::false(
32
            $this->getRobotsClient()->userAgent($this->crawlerUserAgent)->isAllowed($resource),
33
            sprintf(
34
                'Crawler with User-Agent %s is allowed to crawl %s',
35
                $this->crawlerUserAgent,
36
                $resource
37
            )
38
        );
39
    }
40
41
    private function getRobotsClient(): UriClient
42
    {
43
        return new UriClient($this->webUrl);
44
    }
45
46
    /**
47
     * @throws UnsupportedDriverActionException
48
     *
49
     * @Then I should be able to get the sitemap URL
50
     */
51
    public function iShouldBeAbleToGetTheSitemapUrl(): void
52
    {
53
        $this->doesNotSupportDriver(KernelDriver::class);
54
55
        $sitemaps = $this->getRobotsClient()->sitemap()->export();
56
57
        Assert::false(
58
            empty($sitemaps),
59
            sprintf('Crawler with User-Agent %s can not find a sitemap url in robots file.', $this->crawlerUserAgent)
60
        );
61
62
        Assert::count(
63
            $sitemaps,
64
            1,
65
            sprintf(
66
                'Crawler with User-Agent %s has find more than 1 sitemap url in robots file.',
67
                $this->crawlerUserAgent
68
            )
69
        );
70
71
        try {
72
            $this->getSession()->visit($sitemaps[0]);
73
        } catch (\Throwable $e) {
74
            throw new InvalidArgumentException(
75
                sprintf(
76
                    'Sitemap url %s is not valid. Exception: %s',
77
                    $sitemaps[0],
78
                    $e->getMessage()
79
                ),
80
                0,
81
                $e
82
            );
83
        }
84
85
        Assert::eq(
86
            200,
87
            $this->getStatusCode(),
88
            sprintf('Sitemap url %s is not valid.', $sitemaps[0])
89
        );
90
    }
91
92
    /**
93
     * @Then I should be able to crawl :resource
94
     */
95
    public function iShouldBeAbleToCrawl(string $resource): void
96
    {
97
        Assert::true(
98
            $this->getRobotsClient()->userAgent($this->crawlerUserAgent)->isAllowed($resource),
99
            sprintf(
100
                'Crawler with User-Agent %s is not allowed to crawl %s',
101
                $this->crawlerUserAgent,
102
                $resource
103
            )
104
        );
105
    }
106
}
107