Completed
Push — 2.0-dev ( 97b412...d10788 )
by Jan-Petter
02:43
created

UserAgentTools::checkPath()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 15
rs 9.4285
cc 3
eloc 9
nc 3
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Client\Directives;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
use vipnytt\RobotsTxtParser\Parser\Directives\SubDirectiveHandler;
6
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
7
use vipnytt\RobotsTxtParser\Parser\UrlParser;
8
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
9
10
/**
11
 * Class UserAgentTools
12
 *
13
 * @package vipnytt\RobotsTxtParser\Client\Directives
14
 */
15
class UserAgentTools implements RobotsTxtInterface
16
{
17
    use UrlParser;
18
19
    /**
20
     * Rules
21
     * @var SubDirectiveHandler
22
     */
23
    protected $handler;
24
25
    /**
26
     * Base Uri
27
     * @var string
28
     */
29
    private $base;
30
31
    /**
32
     * Status code
33
     * @var int|null
34
     */
35
    private $statusCode;
36
37
    /**
38
     * DisAllowClient constructor.
39
     *
40
     * @param string $base
41
     * @param int|null $statusCode
42
     * @param SubDirectiveHandler $handler
43
     */
44
    public function __construct(SubDirectiveHandler $handler, $base, $statusCode)
45
    {
46
        $this->handler = $handler;
47
        $this->base = $base;
48
        $this->statusCode = $statusCode;
49
    }
50
51
    /**
52
     * UserAgentClient destructor.
53
     */
54
    public function __destruct()
55
    {
56
        $this->handler->comment()->client();
57
    }
58
59
    /**
60
     * Check if URL is allowed to crawl
61
     *
62
     * @param string $url
63
     * @return bool
64
     */
65
    public function isAllowed($url)
66
    {
67
        return $this->check(self::DIRECTIVE_ALLOW, $url);
68
    }
69
70
    /**
71
     * Check
72
     *
73
     * @param string $directive
74
     * @param string $url
75
     * @return bool
76
     * @throws ClientException
77
     */
78
    private function check($directive, $url)
79
    {
80
        $url = $this->urlConvertToFull($url, $this->base);
81
        if (!$this->isUrlApplicable([$url, $this->base])) {
82
            throw new ClientException('URL belongs to a different robots.txt');
83
        }
84
        $statusCodeParser = new StatusCodeParser($this->statusCode, parse_url($this->base, PHP_URL_SCHEME));
85
        $statusCodeParser->codeOverride();
86
        if (($result = $statusCodeParser->accessOverrideCheck()) !== null) {
87
            return $directive === $result;
88
        }
89
        if ($this->handler->visitTime()->client()->isVisitTime() === false) {
90
            return $result === self::DIRECTIVE_DISALLOW;
91
        }
92
        return $this->checkPath($directive, $url);
93
    }
94
95
    /**
96
     * Check if the URL belongs to current robots.txt
97
     *
98
     * @param string[] $urls
99
     * @return bool
100
     */
101
    private function isUrlApplicable($urls)
102
    {
103
        foreach ($urls as $url) {
104
            $parsed = parse_url($url);
105
            $parsed['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parsed['scheme'], 'tcp');
106
            $assembled = $parsed['scheme'] . '://' . $parsed['host'] . ':' . $parsed['port'];
107
            if (!isset($result)) {
108
                $result = $assembled;
109
            } elseif ($result !== $assembled) {
110
                return false;
111
            }
112
        }
113
        return true;
114
    }
115
116
    /**
117
     * Check path
118
     *
119
     * @param string $directive
120
     * @param string $url
121
     * @return bool
122
     */
123
    private function checkPath($directive, $url)
124
    {
125
        $result = self::DIRECTIVE_ALLOW;
126
        foreach (
127
            [
128
                self::DIRECTIVE_DISALLOW => $this->handler->disallow(),
129
                self::DIRECTIVE_ALLOW => $this->handler->allow(),
130
            ] as $currentDirective => $ruleClient
131
        ) {
132
            if ($ruleClient->client()->isListed($url)) {
133
                $result = $currentDirective;
134
            }
135
        }
136
        return $directive === $result;
137
    }
138
139
    /**
140
     * Check if URL is disallowed to crawl
141
     *
142
     * @param string $url
143
     * @return bool
144
     */
145
    public function isDisallowed($url)
146
    {
147
        return $this->check(self::DIRECTIVE_DISALLOW, $url);
148
    }
149
150
    /**
151
     * Rule export
152
     *
153
     * @return array
154
     */
155
    public function export()
156
    {
157
        return [
158
            self::DIRECTIVE_ROBOT_VERSION => $this->handler->robotVersion()->client()->export(),
159
            self::DIRECTIVE_VISIT_TIME => $this->handler->visitTime()->client()->export(),
160
            self::DIRECTIVE_DISALLOW => $this->handler->disallow()->client()->export(),
161
            self::DIRECTIVE_ALLOW => $this->handler->allow()->client()->export(),
162
            self::DIRECTIVE_CRAWL_DELAY => $this->handler->crawlDelay()->client()->export(),
163
            self::DIRECTIVE_CACHE_DELAY => $this->handler->cacheDelay()->client()->export(),
164
            self::DIRECTIVE_REQUEST_RATE => $this->handler->requestRate()->client()->export(),
165
            self::DIRECTIVE_COMMENT => $this->handler->comment()->client()->export(),
166
        ];
167
    }
168
}
169