Completed
Push — master ( b5f967...f3d3e2 )
by Jan-Petter
01:55
created

UserAgentClient::check()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 17
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 17
rs 8.8571
cc 5
eloc 11
nc 5
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Directives\DisAllow;
5
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
6
use vipnytt\RobotsTxtParser\Exceptions\ParserException;
7
8
class UserAgentClient implements RobotsTxtInterface
9
{
10
    protected $allow;
11
    protected $disallow;
12
13
    protected $userAgent;
14
    protected $origin;
15
    protected $statusCodeParser;
16
17
    /**
18
     * UserAgentClient constructor.
19
     *
20
     * @param array $rules
21
     * @param string $userAgent
22
     * @param string $origin
23
     * @param int $statusCode
24
     */
25
    public function __construct($rules, $userAgent, $origin, $statusCode)
26
    {
27
        $this->statusCodeParser = new StatusCodeParser($statusCode, parse_url($origin, PHP_URL_SCHEME));
28
        $this->userAgent = $userAgent;
29
        $this->origin = $origin;
30
        $this->validateRules($rules);
31
    }
32
33
    protected function validateRules($rules)
34
    {
35
        foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $directive) {
36
            if (!$rules[$directive] instanceof DisAllow) {
37
                throw new ParserException('Invalid rule object');
38
            }
39
            $this->$directive = $rules[$directive];
40
        }
41
    }
42
43
    /**
44
     * Check if URL is allowed to crawl
45
     *
46
     * @param string $url
47
     * @return bool
48
     */
49
    public function isAllowed($url)
50
    {
51
        return $this->check(self::DIRECTIVE_ALLOW, $url);
52
    }
53
54
    /**
55
     * Check
56
     *
57
     * @param string $directive
58
     * @param string $url - URL to check
59
     * @return bool
60
     * @throws ClientException
61
     */
62
    protected function check($directive, $url)
63
    {
64
        if (!$this->isUrlApplicable($url)) {
65
            throw new ClientException('URL belongs to a different robots.txt, please check it against that one instead');
66
        }
67
        $this->statusCodeParser->replaceUnofficial();
68
        if (($result = $this->statusCodeParser->check()) !== null) {
69
            return $directive === $result;
70
        }
71
        $result = self::DIRECTIVE_ALLOW;
72
        foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $currentDirective) {
73
            if ($this->$currentDirective->check($url)) {
74
                $result = $currentDirective;
75
            }
76
        }
77
        return $directive === $result;
78
    }
79
80
    protected function isUrlApplicable($urls)
81
    {
82
        foreach ($urls as $url) {
83
            $parsed = parse_url($url);
84
            $parsed['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parsed['scheme'], 'tcp');
85
            $assembled = $parsed['scheme'] . '://' . $parsed['host'] . ':' . $parsed['port'];
86
            if (!isset($result)) {
87
                $result = $assembled;
88
            } elseif ($result !== $assembled) {
89
                return false;
90
            }
91
        }
92
        return true;
93
    }
94
95
    /**
96
     * Check if URL is disallowed to crawl
97
     *
98
     * @param string $url
99
     * @return bool
100
     */
101
    public function isDisallowed($url)
102
    {
103
        return $this->check(self::DIRECTIVE_DISALLOW, $url);
104
    }
105
106
    /**
107
     * Get Cache-delay
108
     *
109
     * @return float|int
110
     */
111
    public function getCacheDelay()
112
    {
113
        $exported = $this->{self::DIRECTIVE_CACHE_DELAY}->export();
114
        return isset($exported[self::DIRECTIVE_CACHE_DELAY]) ? $exported[self::DIRECTIVE_CACHE_DELAY] : $this->getCrawlDelay();
115
    }
116
117
    /**
118
     * Get Crawl-delay
119
     *
120
     * @return float|int
121
     */
122
    public function getCrawlDelay()
123
    {
124
        $exported = $this->{self::DIRECTIVE_CRAWL_DELAY}->export();
125
        return isset($exported[self::DIRECTIVE_CRAWL_DELAY]) ? $exported[self::DIRECTIVE_CRAWL_DELAY] : 0;
126
    }
127
}
128