UserAgentTools   A
last analyzed

Complexity

Total Complexity 15

Size/Duplication

Total Lines 155
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 15

Importance

Changes 0
Metric Value
wmc 15
lcom 1
cbo 15
dl 0
loc 155
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 1
A isAllowed() 0 4 1
A check() 0 13 3
A checkOverride() 0 17 4
A checkPath() 0 15 3
A isDisallowed() 0 4 1
A export() 0 14 1
A getUserAgentGroup() 0 4 1
1
<?php
2
/**
3
 * vipnytt/RobotsTxtParser
4
 *
5
 * @link https://github.com/VIPnytt/RobotsTxtParser
6
 * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT)
7
 */
8
9
namespace vipnytt\RobotsTxtParser\Client\Directives;
10
11
use vipnytt\RobotsTxtParser\Handler\Directives\SubDirectiveHandler;
12
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
13
use vipnytt\RobotsTxtParser\Parser\UriParser;
14
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
15
16
/**
17
 * Class UserAgentTools
18
 *
19
 * @package vipnytt\RobotsTxtParser\Client\Directives
20
 */
21
class UserAgentTools implements RobotsTxtInterface
22
{
23
    /**
24
     * Rules
25
     * @var SubDirectiveHandler
26
     */
27
    protected $handler;
28
29
    /**
30
     * Base uri
31
     * @var string
32
     */
33
    private $base;
34
35
    /**
36
     * Status code
37
     * @var int|null
38
     */
39
    private $statusCode;
40
41
    /**
42
     * DisAllowClient constructor.
43
     *
44
     * @param SubDirectiveHandler $handler
45
     * @param string $base
46
     * @param int|null $statusCode
47
     */
48
    public function __construct(SubDirectiveHandler $handler, $base, $statusCode = null)
49
    {
50
        $this->handler = $handler;
51
        $this->base = $base;
52
        $this->statusCode = $statusCode;
53
    }
54
55
    /**
56
     * Check if URI is allowed to crawl
57
     *
58
     * @param string $uri
59
     * @return bool
60
     */
61
    public function isAllowed($uri)
62
    {
63
        return $this->check(self::DIRECTIVE_ALLOW, $uri);
64
    }
65
66
    /**
67
     * Check
68
     *
69
     * @param string $directive
70
     * @param string $uri
71
     * @return bool
72
     * @throws \InvalidArgumentException
73
     */
74
    private function check($directive, $uri)
75
    {
76
        $uriParser = new UriParser($uri);
77
        $uri = $uriParser->convertToFull($this->base);
78
        if ($this->base !== $uriParser->base()) {
79
            throw new \InvalidArgumentException('URI belongs to a different robots.txt');
80
        }
81
        if (($result = $this->checkOverride($uri)) !== false) {
82
            return $directive === $result;
83
        }
84
        // Path check
85
        return $this->checkPath($directive, $uri);
86
    }
87
88
    /**
89
     * Check for overrides
90
     *
91
     * @param string $uri
92
     * @return string|false
93
     */
94
    private function checkOverride($uri)
95
    {
96
        // 1st priority: /robots.txt is permanent allowed
97
        if (parse_url($uri, PHP_URL_PATH) === self::PATH) {
98
            return self::DIRECTIVE_ALLOW;
99
        }
100
        // 2st priority: Status code rules
101
        $statusCodeParser = new StatusCodeParser($this->statusCode, parse_url($this->base, PHP_URL_SCHEME));
102
        if (($result = $statusCodeParser->accessOverride()) !== false) {
103
            return $result;
104
        }
105
        // 3rd priority: Visit times
106
        if ($this->handler->visitTime->client()->isVisitTime() === false) {
107
            return self::DIRECTIVE_DISALLOW;
108
        }
109
        return false;
110
    }
111
112
    /**
113
     * Check path
114
     *
115
     * @param string $directive
116
     * @param string $uri
117
     * @return bool
118
     */
119
    private function checkPath($directive, $uri)
120
    {
121
        $resultLength = 0;
122
        $resultDirective = self::DIRECTIVE_ALLOW;
123
        foreach ([
124
                     self::DIRECTIVE_DISALLOW => mb_strlen($this->handler->disallow->client()->isCovered($uri)),
125
                     self::DIRECTIVE_ALLOW => mb_strlen($this->handler->allow->client()->isCovered($uri)),
126
                 ] as $currentDirective => $currentLength) {
127
            if ($currentLength >= $resultLength) {
128
                $resultLength = $currentLength;
129
                $resultDirective = $currentDirective;
130
            }
131
        }
132
        return $directive === $resultDirective;
133
    }
134
135
    /**
136
     * Check if URI is disallowed to crawl
137
     *
138
     * @param string $uri
139
     * @return bool
140
     */
141
    public function isDisallowed($uri)
142
    {
143
        return $this->check(self::DIRECTIVE_DISALLOW, $uri);
144
    }
145
146
    /**
147
     * Rule export
148
     *
149
     * @return array
150
     */
151
    public function export()
152
    {
153
        return [
154
            self::DIRECTIVE_ROBOT_VERSION => $this->handler->robotVersion->client()->export(),
155
            self::DIRECTIVE_VISIT_TIME => $this->handler->visitTime->client()->export(),
156
            self::DIRECTIVE_NO_INDEX => $this->handler->noIndex->client()->export(),
157
            self::DIRECTIVE_DISALLOW => $this->handler->disallow->client()->export(),
158
            self::DIRECTIVE_ALLOW => $this->handler->allow->client()->export(),
159
            self::DIRECTIVE_CRAWL_DELAY => $this->handler->crawlDelay->client()->export(),
160
            self::DIRECTIVE_CACHE_DELAY => $this->handler->cacheDelay->client()->export(),
161
            self::DIRECTIVE_REQUEST_RATE => $this->handler->requestRate->client()->export(),
162
            self::DIRECTIVE_COMMENT => $this->handler->comment->client()->export(),
163
        ];
164
    }
165
166
    /**
167
     * Get rule set group name
168
     *
169
     * @return string
170
     */
171
    public function getUserAgentGroup()
172
    {
173
        return $this->handler->group;
174
    }
175
}
176