Completed
Push — master ( 0fbb6b...23ae93 )
by Jan-Petter
02:52
created

UserAgentTools::check()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 9
Bugs 1 Features 1
Metric Value
c 9
b 1
f 1
dl 0
loc 12
rs 9.4285
cc 3
eloc 7
nc 3
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Client\Directives;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
use vipnytt\RobotsTxtParser\Handler\Directives\SubDirectiveHandler;
6
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
7
use vipnytt\RobotsTxtParser\Parser\UriParser;
8
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
9
10
/**
11
 * Class UserAgentTools
12
 *
13
 * @package vipnytt\RobotsTxtParser\Client\Directives
14
 */
15
class UserAgentTools implements RobotsTxtInterface
16
{
17
    use UriParser;
18
19
    /**
20
     * Rules
21
     * @var SubDirectiveHandler
22
     */
23
    protected $handler;
24
25
    /**
26
     * Base uri
27
     * @var string
28
     */
29
    private $base;
30
31
    /**
32
     * Status code
33
     * @var int|null
34
     */
35
    private $statusCode;
36
37
    /**
38
     * DisAllowClient constructor.
39
     *
40
     * @param string $base
41
     * @param int|null $statusCode
42
     * @param SubDirectiveHandler $handler
43
     */
44
    public function __construct(SubDirectiveHandler $handler, $base, $statusCode)
45
    {
46
        $this->handler = $handler;
47
        $this->base = $base;
48
        $this->statusCode = $statusCode;
49
    }
50
51
    /**
52
     * UserAgentClient destructor.
53
     */
54
    public function __destruct()
55
    {
56
        $this->handler->comment()->client();
57
    }
58
59
    /**
60
     * Check if URI is allowed to crawl
61
     *
62
     * @param string $uri
63
     * @return bool
64
     */
65
    public function isAllowed($uri)
66
    {
67
        return $this->check(self::DIRECTIVE_ALLOW, $uri);
68
    }
69
70
    /**
71
     * Check
72
     *
73
     * @param string $directive
74
     * @param string $uri
75
     * @return bool
76
     * @throws ClientException
77
     */
78
    private function check($directive, $uri)
79
    {
80
        $uri = $this->uriConvertToFull($uri, $this->base);
81
        if ($this->base !== $this->uriBase($uri)) {
82
            throw new ClientException('URI belongs to a different robots.txt');
83
        }
84
        if (($result = $this->checkOverride($uri)) !== false) {
85
            return $directive === $result;
86
        }
87
        // Path check
88
        return $this->checkPath($directive, $uri);
89
    }
90
91
    /**
92
     * Check for overrides
93
     *
94
     * @param string $uri
95
     * @return string|false
96
     */
97
    private function checkOverride($uri)
98
    {
99
        // 1st priority: /robots.txt is permanent allowed
100
        if (parse_url($uri, PHP_URL_PATH) === self::PATH) {
101
            return self::DIRECTIVE_ALLOW;
102
        }
103
        // 2st priority: Status code rules
104
        $statusCodeParser = new StatusCodeParser($this->statusCode, parse_url($this->base, PHP_URL_SCHEME));
105
        if (($result = $statusCodeParser->accessOverride()) !== false) {
106
            return $result;
107
        }
108
        // 3rd priority: Visit times
109
        if ($this->handler->visitTime()->client()->isVisitTime() === false) {
110
            return self::DIRECTIVE_DISALLOW;
111
        }
112
        return false;
113
    }
114
115
    /**
116
     * Check path
117
     *
118
     * @param string $directive
119
     * @param string $uri
120
     * @return bool
121
     */
122
    private function checkPath($directive, $uri)
123
    {
124
        $result = self::DIRECTIVE_ALLOW;
125
        foreach (
126
            [
127
                self::DIRECTIVE_NO_INDEX => $this->handler->noIndex(),
128
                self::DIRECTIVE_DISALLOW => $this->handler->disallow(),
129
                self::DIRECTIVE_ALLOW => $this->handler->allow(),
130
            ] as $currentDirective => $handler
131
        ) {
132
            if ($handler->client()->isListed($uri)) {
133
                if ($currentDirective === self::DIRECTIVE_NO_INDEX) {
134
                    return $directive === self::DIRECTIVE_DISALLOW;
135
                }
136
                $result = $currentDirective;
137
            }
138
        }
139
        return $directive === $result;
140
    }
141
142
    /**
143
     * Check if URI is disallowed to crawl
144
     *
145
     * @param string $uri
146
     * @return bool
147
     */
148
    public function isDisallowed($uri)
149
    {
150
        return $this->check(self::DIRECTIVE_DISALLOW, $uri);
151
    }
152
153
    /**
154
     * Rule export
155
     *
156
     * @return array
157
     */
158
    public function export()
159
    {
160
        return [
161
            self::DIRECTIVE_ROBOT_VERSION => $this->handler->robotVersion()->client()->export(),
162
            self::DIRECTIVE_VISIT_TIME => $this->handler->visitTime()->client()->export(),
163
            self::DIRECTIVE_NO_INDEX => $this->handler->noIndex()->client()->export(),
164
            self::DIRECTIVE_DISALLOW => $this->handler->disallow()->client()->export(),
165
            self::DIRECTIVE_ALLOW => $this->handler->allow()->client()->export(),
166
            self::DIRECTIVE_CRAWL_DELAY => $this->handler->crawlDelay()->client()->export(),
167
            self::DIRECTIVE_CACHE_DELAY => $this->handler->cacheDelay()->client()->export(),
168
            self::DIRECTIVE_REQUEST_RATE => $this->handler->requestRate()->client()->export(),
169
            self::DIRECTIVE_COMMENT => $this->handler->comment()->client()->export(),
170
        ];
171
    }
172
}
173