Completed
Branch 2.0-dev (2c252e)
by Jan-Petter
02:43
created

Checks::isDisallowed()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
rs 10
cc 1
eloc 2
nc 1
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser\Client\Directives;
3
4
5
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
6
use vipnytt\RobotsTxtParser\Parser\Directives\DirectiveParserCommons;
7
use vipnytt\RobotsTxtParser\Parser\Directives\SubDirectiveHandler;
8
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
9
use vipnytt\RobotsTxtParser\Parser\UrlParser;
10
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
11
12
class Checks implements RobotsTxtInterface
13
{
14
    use UrlParser;
15
    use DirectiveParserCommons;
16
17
    /**
18
     * Base Uri
19
     * @var string
20
     */
21
    private $base;
22
23
    /**
24
     * Status code
25
     * @var int|null
26
     */
27
    private $statusCode;
28
29
    /**
30
     * Rules
31
     * @var SubDirectiveHandler
32
     */
33
    private $handler;
34
35
    /**
36
     * DisAllowClient constructor.
37
     *
38
     * @param string $base
39
     * @param int|null $statusCode
40
     * @param SubDirectiveHandler $handler
41
     */
42
    public function __construct($base, $statusCode, SubDirectiveHandler $handler)
43
    {
44
        $this->base = $base;
45
        $this->statusCode = $statusCode;
46
        $this->handler = $handler;
47
    }
48
49
    /**
50
     * Check if URL is allowed to crawl
51
     *
52
     * @param string $url
53
     * @return bool
54
     */
55
    public function isAllowed($url)
56
    {
57
        return $this->check(self::DIRECTIVE_ALLOW, $url);
58
    }
59
60
    /**
61
     * Check
62
     *
63
     * @param string $directive
64
     * @param string $url - URL to check
65
     * @return bool
66
     * @throws ClientException
67
     */
68
    private function check($directive, $url)
69
    {
70
        $directive = $this->validateDirective($directive, [self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW]);
71
        $url = $this->urlConvertToFull($url, $this->base);
72
        if (!$this->isUrlApplicable([$url, $this->base])) {
73
            throw new ClientException('URL belongs to a different robots.txt');
74
        }
75
        $statusCodeParser = new StatusCodeParser($this->statusCode, parse_url($this->base, PHP_URL_SCHEME));
76
        $statusCodeParser->replaceUnofficial();
77
        if (($result = $statusCodeParser->check()) !== null) {
78
            return $directive === $result;
79
        }
80
        if ($this->handler->visitTime()->client()->isVisitTime() === false) {
81
            return $result === self::DIRECTIVE_DISALLOW;
82
        }
83
        $result = self::DIRECTIVE_ALLOW;
84
        foreach (
85
            [
86
                self::DIRECTIVE_DISALLOW => $this->handler->disallow()->client(),
87
                self::DIRECTIVE_ALLOW => $this->handler->allow()->client(),
88
            ] as $currentDirective => $ruleClient
89
        ) {
90
            if ($ruleClient->affected($url)) {
91
                $result = $currentDirective;
92
            }
93
        }
94
        return $directive === $result;
95
    }
96
97
    /**
98
     * Check if the URL belongs to current robots.txt
99
     *
100
     * @param string[] $urls
101
     * @return bool
102
     */
103
    private function isUrlApplicable($urls)
104
    {
105
        foreach ($urls as $url) {
106
            $parsed = parse_url($url);
107
            $parsed['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parsed['scheme'], 'tcp');
108
            $assembled = $parsed['scheme'] . '://' . $parsed['host'] . ':' . $parsed['port'];
109
            if (!isset($result)) {
110
                $result = $assembled;
111
            } elseif ($result !== $assembled) {
112
                return false;
113
            }
114
        }
115
        return true;
116
    }
117
118
    /**
119
     * Check if URL is disallowed to crawl
120
     *
121
     * @param string $url
122
     * @return bool
123
     */
124
    public function isDisallowed($url)
125
    {
126
        return $this->check(self::DIRECTIVE_DISALLOW, $url);
127
    }
128
}
129