Completed
Push — master ( 0489e6...065a8d )
by Jan-Petter
04:09
created

UserAgentTools::check()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 22
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 7
Bugs 1 Features 1
Metric Value
c 7
b 1
f 1
dl 0
loc 22
rs 8.6737
cc 5
eloc 12
nc 5
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Client\Directives;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
use vipnytt\RobotsTxtParser\Parser\Directives\SubDirectiveHandler;
6
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
7
use vipnytt\RobotsTxtParser\Parser\UriParser;
8
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
9
10
/**
11
 * Class UserAgentTools
12
 *
13
 * @package vipnytt\RobotsTxtParser\Client\Directives
14
 */
15
class UserAgentTools implements RobotsTxtInterface
16
{
17
    use UriParser;
18
19
    /**
20
     * Rules
21
     * @var SubDirectiveHandler
22
     */
23
    protected $handler;
24
25
    /**
26
     * Base uri
27
     * @var string
28
     */
29
    private $base;
30
31
    /**
32
     * Status code
33
     * @var int|null
34
     */
35
    private $statusCode;
36
37
    /**
38
     * DisAllowClient constructor.
39
     *
40
     * @param string $base
41
     * @param int|null $statusCode
42
     * @param SubDirectiveHandler $handler
43
     */
44
    public function __construct(SubDirectiveHandler $handler, $base, $statusCode)
45
    {
46
        $this->handler = $handler;
47
        $this->base = $base;
48
        $this->statusCode = $statusCode;
49
    }
50
51
    /**
52
     * UserAgentClient destructor.
53
     */
54
    public function __destruct()
55
    {
56
        $this->handler->comment()->client();
57
    }
58
59
    /**
60
     * Check if URL is allowed to crawl
61
     *
62
     * @param string $url
63
     * @return bool
64
     */
65
    public function isAllowed($url)
66
    {
67
        return $this->check(self::DIRECTIVE_ALLOW, $url);
68
    }
69
70
    /**
71
     * Check
72
     *
73
     * @param string $directive
74
     * @param string $url
75
     * @return bool
76
     * @throws ClientException
77
     */
78
    private function check($directive, $url)
79
    {
80
        $url = $this->urlConvertToFull($url, $this->base);
81
        if ($this->base !== $this->urlBase($url)) {
82
            throw new ClientException('URL belongs to a different robots.txt');
83
        }
84
        // 1st priority override: /robots.txt is permanent allowed
85
        if (parse_url($url, PHP_URL_PATH) === self::PATH) {
86
            return $directive === self::DIRECTIVE_ALLOW;
87
        }
88
        // 2st priority override: Status code rules
89
        $statusCodeParser = new StatusCodeParser($this->statusCode, parse_url($this->base, PHP_URL_SCHEME));
90
        if (($result = $statusCodeParser->accessOverride()) !== false) {
91
            return $directive === $result;
92
        }
93
        // 3rd priority override: Visit times
94
        if ($this->handler->visitTime()->client()->isVisitTime() === false) {
95
            return $directive === self::DIRECTIVE_DISALLOW;
96
        }
97
        // Path check
98
        return $this->checkPath($directive, $url);
99
    }
100
101
    /**
102
     * Check path
103
     *
104
     * @param string $directive
105
     * @param string $url
106
     * @return bool
107
     */
108
    private function checkPath($directive, $url)
109
    {
110
        $result = self::DIRECTIVE_ALLOW;
111
        foreach (
112
            [
113
                self::DIRECTIVE_NO_INDEX => $this->handler->noIndex(),
114
                self::DIRECTIVE_DISALLOW => $this->handler->disallow(),
115
                self::DIRECTIVE_ALLOW => $this->handler->allow(),
116
            ] as $currentDirective => $handler
117
        ) {
118
            if ($handler->client()->isListed($url)) {
119
                if ($currentDirective === self::DIRECTIVE_NO_INDEX) {
120
                    return $directive === self::DIRECTIVE_DISALLOW;
121
                }
122
                $result = $currentDirective;
123
            }
124
        }
125
        return $directive === $result;
126
    }
127
128
    /**
129
     * Check if URL is disallowed to crawl
130
     *
131
     * @param string $url
132
     * @return bool
133
     */
134
    public function isDisallowed($url)
135
    {
136
        return $this->check(self::DIRECTIVE_DISALLOW, $url);
137
    }
138
139
    /**
140
     * Rule export
141
     *
142
     * @return array
143
     */
144
    public function export()
145
    {
146
        return [
147
            self::DIRECTIVE_ROBOT_VERSION => $this->handler->robotVersion()->client()->export(),
148
            self::DIRECTIVE_VISIT_TIME => $this->handler->visitTime()->client()->export(),
149
            self::DIRECTIVE_NO_INDEX => $this->handler->noIndex()->client()->export(),
150
            self::DIRECTIVE_DISALLOW => $this->handler->disallow()->client()->export(),
151
            self::DIRECTIVE_ALLOW => $this->handler->allow()->client()->export(),
152
            self::DIRECTIVE_CRAWL_DELAY => $this->handler->crawlDelay()->client()->export(),
153
            self::DIRECTIVE_CACHE_DELAY => $this->handler->cacheDelay()->client()->export(),
154
            self::DIRECTIVE_REQUEST_RATE => $this->handler->requestRate()->client()->export(),
155
            self::DIRECTIVE_COMMENT => $this->handler->comment()->client()->export(),
156
        ];
157
    }
158
}
159