Completed
Push — master ( 228ec0...89e623 )
by Jan-Petter
04:43
created

UserAgentTools   B

Complexity

Total Complexity 15

Size/Duplication

Total Lines 147
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 16

Importance

Changes 15
Bugs 5 Features 2
Metric Value
wmc 15
c 15
b 5
f 2
lcom 1
cbo 16
dl 0
loc 147
rs 8.4614

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 1
A isAllowed() 0 4 1
A check() 0 13 3
A checkOverride() 0 17 4
A checkPath() 0 17 4
A isDisallowed() 0 4 1
A export() 0 14 1
1
<?php
2
/**
3
 * vipnytt/RobotsTxtParser
4
 *
5
 * @link https://github.com/VIPnytt/RobotsTxtParser
6
 * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT)
7
 */
8
9
namespace vipnytt\RobotsTxtParser\Client\Directives;
10
11
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
12
use vipnytt\RobotsTxtParser\Handler\Directives\SubDirectiveHandler;
13
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
14
use vipnytt\RobotsTxtParser\Parser\UriParser;
15
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
16
17
/**
18
 * Class UserAgentTools
19
 *
20
 * @package vipnytt\RobotsTxtParser\Client\Directives
21
 */
22
class UserAgentTools implements RobotsTxtInterface
23
{
24
    /**
25
     * Rules
26
     * @var SubDirectiveHandler
27
     */
28
    protected $handler;
29
30
    /**
31
     * Base uri
32
     * @var string
33
     */
34
    private $base;
35
36
    /**
37
     * Status code
38
     * @var int|null
39
     */
40
    private $statusCode;
41
42
    /**
43
     * DisAllowClient constructor.
44
     *
45
     * @param string $base
46
     * @param int|null $statusCode
47
     * @param SubDirectiveHandler $handler
48
     */
49
    public function __construct(SubDirectiveHandler $handler, $base, $statusCode)
50
    {
51
        $this->handler = $handler;
52
        $this->base = $base;
53
        $this->statusCode = $statusCode;
54
    }
55
56
    /**
57
     * Check if URI is allowed to crawl
58
     *
59
     * @param string $uri
60
     * @return bool
61
     */
62
    public function isAllowed($uri)
63
    {
64
        return $this->check(self::DIRECTIVE_ALLOW, $uri);
65
    }
66
67
    /**
68
     * Check
69
     *
70
     * @param string $directive
71
     * @param string $uri
72
     * @return bool
73
     * @throws ClientException
74
     */
75
    private function check($directive, $uri)
76
    {
77
        $uriParser = new UriParser($uri);
78
        $uri = $uriParser->convertToFull($this->base);
79
        if ($this->base !== $uriParser->base()) {
80
            throw new ClientException('URI belongs to a different robots.txt');
81
        }
82
        if (($result = $this->checkOverride($uri)) !== false) {
83
            return $directive === $result;
84
        }
85
        // Path check
86
        return $this->checkPath($directive, $uri);
87
    }
88
89
    /**
90
     * Check for overrides
91
     *
92
     * @param string $uri
93
     * @return string|false
94
     */
95
    private function checkOverride($uri)
96
    {
97
        // 1st priority: /robots.txt is permanent allowed
98
        if (parse_url($uri, PHP_URL_PATH) === self::PATH) {
99
            return self::DIRECTIVE_ALLOW;
100
        }
101
        // 2st priority: Status code rules
102
        $statusCodeParser = new StatusCodeParser($this->statusCode, parse_url($this->base, PHP_URL_SCHEME));
103
        if (($result = $statusCodeParser->accessOverride()) !== false) {
104
            return $result;
105
        }
106
        // 3rd priority: Visit times
107
        if ($this->handler->visitTime()->client()->isVisitTime() === false) {
108
            return self::DIRECTIVE_DISALLOW;
109
        }
110
        return false;
111
    }
112
113
    /**
114
     * Check path
115
     *
116
     * @param string $directive
117
     * @param string $uri
118
     * @return bool
119
     */
120
    private function checkPath($directive, $uri)
121
    {
122
        $result = self::DIRECTIVE_ALLOW;
123
        foreach ([
124
                     self::DIRECTIVE_NO_INDEX => $this->handler->noIndex(),
125
                     self::DIRECTIVE_DISALLOW => $this->handler->disallow(),
126
                     self::DIRECTIVE_ALLOW => $this->handler->allow(),
127
                 ] as $currentDirective => $handler) {
128
            if ($handler->client()->isListed($uri)) {
129
                if ($currentDirective === self::DIRECTIVE_NO_INDEX) {
130
                    return $directive === self::DIRECTIVE_DISALLOW;
131
                }
132
                $result = $currentDirective;
133
            }
134
        }
135
        return $directive === $result;
136
    }
137
138
    /**
139
     * Check if URI is disallowed to crawl
140
     *
141
     * @param string $uri
142
     * @return bool
143
     */
144
    public function isDisallowed($uri)
145
    {
146
        return $this->check(self::DIRECTIVE_DISALLOW, $uri);
147
    }
148
149
    /**
150
     * Rule export
151
     *
152
     * @return array
153
     */
154
    public function export()
155
    {
156
        return [
157
            self::DIRECTIVE_ROBOT_VERSION => $this->handler->robotVersion()->client()->export(),
158
            self::DIRECTIVE_VISIT_TIME => $this->handler->visitTime()->client()->export(),
159
            self::DIRECTIVE_NO_INDEX => $this->handler->noIndex()->client()->export(),
160
            self::DIRECTIVE_DISALLOW => $this->handler->disallow()->client()->export(),
161
            self::DIRECTIVE_ALLOW => $this->handler->allow()->client()->export(),
162
            self::DIRECTIVE_CRAWL_DELAY => $this->handler->crawlDelay()->client()->export(),
163
            self::DIRECTIVE_CACHE_DELAY => $this->handler->cacheDelay()->client()->export(),
164
            self::DIRECTIVE_REQUEST_RATE => $this->handler->requestRate()->client()->export(),
165
            self::DIRECTIVE_COMMENT => $this->handler->comment()->client()->export(),
166
        ];
167
    }
168
}
169