Completed
Push — master ( b7c1d2...efe9f4 )
by Jan-Petter
01:56
created

UserAgentClient::export()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 10
rs 9.4285
cc 3
eloc 6
nc 3
nop 0
1
<?php
2
namespace vipnytt\RobotsTxtParser\Modules;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
6
7
/**
8
 * Class UserAgentClient
9
 *
10
 * @package vipnytt\RobotsTxtParser\Modules
11
 */
12
class UserAgentClient implements RobotsTxtInterface
13
{
14
    use UrlTools;
15
16
    /**
17
     * Rules
18
     * @var array
19
     */
20
    protected $rules;
21
22
    /**
23
     * User-agent
24
     * @var string
25
     */
26
    protected $userAgent;
27
28
    /**
29
     * Robots.txt base URL
30
     * @var string
31
     */
32
    protected $base;
33
34
    /**
35
     * Status code parser
36
     * @var StatusCodeParser
37
     */
38
    protected $statusCodeParser;
39
40
    /**
41
     * UserAgentClient constructor.
42
     *
43
     * @param array $rules
44
     * @param string $userAgent
45
     * @param string $baseUrl
46
     * @param int $statusCode
47
     */
48
    public function __construct($rules, $userAgent, $baseUrl, $statusCode)
49
    {
50
        $this->statusCodeParser = new StatusCodeParser($statusCode, parse_url($baseUrl, PHP_URL_SCHEME));
51
        $this->userAgent = $userAgent;
52
        $this->rules = $rules;
53
        $this->base = $baseUrl;
54
    }
55
56
    /**
57
     * Check if URL is allowed to crawl
58
     *
59
     * @param string $url
60
     * @return bool
61
     */
62
    public function isAllowed($url)
63
    {
64
        return $this->check(self::DIRECTIVE_ALLOW, $url);
65
    }
66
67
    /**
68
     * Check
69
     *
70
     * @param string $directive
71
     * @param string $url - URL to check
72
     * @return bool
73
     * @throws ClientException
74
     */
75
    protected function check($directive, $url)
76
    {
77
        $url = $this->urlConvertToFull($url, $this->base);
78
        if (!$this->isUrlApplicable([$url, $this->base])) {
79
            throw new ClientException('URL belongs to a different robots.txt, please check it against that one instead');
80
        }
81
        $this->statusCodeParser->replaceUnofficial();
82
        if (($result = $this->statusCodeParser->check()) !== null) {
83
            return $directive === $result;
84
        }
85
        $result = self::DIRECTIVE_ALLOW;
86
        foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $currentDirective) {
87
            if ($this->rules[$currentDirective]->check($url)) {
88
                $result = $currentDirective;
89
            }
90
        }
91
        return $directive === $result;
92
    }
93
94
    /**
95
     * Check if the URL belongs to current robots.txt
96
     *
97
     * @param $urls
98
     * @return bool
99
     */
100
    protected function isUrlApplicable($urls)
101
    {
102
        foreach ($urls as $url) {
103
            $parsed = parse_url($url);
104
            $parsed['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parsed['scheme'], 'tcp');
105
            $assembled = $parsed['scheme'] . '://' . $parsed['host'] . ':' . $parsed['port'];
106
            if (!isset($result)) {
107
                $result = $assembled;
108
            } elseif ($result !== $assembled) {
109
                return false;
110
            }
111
        }
112
        return true;
113
    }
114
115
    /**
116
     * Check if URL is disallowed to crawl
117
     *
118
     * @param string $url
119
     * @return bool
120
     */
121
    public function isDisallowed($url)
122
    {
123
        return $this->check(self::DIRECTIVE_DISALLOW, $url);
124
    }
125
126
    /**
127
     * Get Cache-delay
128
     *
129
     * @return float|int
130
     */
131
    public function getCacheDelay()
132
    {
133
        $exported = $this->rules[self::DIRECTIVE_CACHE_DELAY]->export();
134
        return isset($exported[self::DIRECTIVE_CACHE_DELAY]) ? $exported[self::DIRECTIVE_CACHE_DELAY] : $this->getCrawlDelay();
135
    }
136
137
    /**
138
     * Get Crawl-delay
139
     *
140
     * @return float|int
141
     */
142
    public function getCrawlDelay()
143
    {
144
        $exported = $this->rules[self::DIRECTIVE_CRAWL_DELAY]->export();
145
        return isset($exported[self::DIRECTIVE_CRAWL_DELAY]) ? $exported[self::DIRECTIVE_CRAWL_DELAY] : 0;
146
    }
147
148
    /**
149
     * Rule export
150
     *
151
     * @return array
152
     */
153
    public function export()
154
    {
155
        $result = [];
156
        foreach ($this->rules as $directive => $object) {
157
            if (!empty($export = $object->export())) {
158
                $result[$directive] = $export[$directive];
159
            }
160
        }
161
        return $result;
162
    }
163
}
164