Completed
Push — master ( 49e9a0...fa7091 )
by Jan-Petter
02:11
created

UserAgentClient   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 141
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 17
c 1
b 0
f 0
lcom 1
cbo 2
dl 0
loc 141
rs 10

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 8 1
A isAllowed() 0 4 1
B check() 0 17 5
B isUrlApplicable() 0 14 5
A isDisallowed() 0 4 1
A getCacheDelay() 0 5 2
A getCrawlDelay() 0 5 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Modules;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
use vipnytt\RobotsTxtParser\Modules\Directives\DisAllow;
6
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
7
8
/**
9
 * Class UserAgentClient
10
 *
11
 * @package vipnytt\RobotsTxtParser\Modules
12
 */
13
class UserAgentClient implements RobotsTxtInterface
14
{
15
    /**
16
     * Allow rules
17
     * @var DisAllow
18
     */
19
    protected $allow;
20
21
    /**
22
     * Disallow rules
23
     * @var DisAllow
24
     */
25
    protected $disallow;
26
27
    /**
28
     * User-agent
29
     * @var string
30
     */
31
    protected $userAgent;
32
33
    /**
34
     * Robots.txt origin
35
     * @var string
36
     */
37
    protected $origin;
38
39
    /**
40
     * Status code parser
41
     * @var StatusCodeParser
42
     */
43
    protected $statusCodeParser;
44
45
    /**
46
     * UserAgentClient constructor.
47
     *
48
     * @param DisAllow $allow
49
     * @param DisAllow $disallow
50
     * @param string $userAgent
51
     * @param string $origin
52
     * @param int $statusCode
53
     */
54
    public function __construct($allow, $disallow, $userAgent, $origin, $statusCode)
55
    {
56
        $this->statusCodeParser = new StatusCodeParser($statusCode, parse_url($origin, PHP_URL_SCHEME));
57
        $this->userAgent = $userAgent;
58
        $this->origin = $origin;
59
        $this->allow = $allow;
60
        $this->disallow = $disallow;
61
    }
62
63
    /**
64
     * Check if URL is allowed to crawl
65
     *
66
     * @param string $url
67
     * @return bool
68
     */
69
    public function isAllowed($url)
70
    {
71
        return $this->check(self::DIRECTIVE_ALLOW, $url);
72
    }
73
74
    /**
75
     * Check
76
     *
77
     * @param string $directive
78
     * @param string $url - URL to check
79
     * @return bool
80
     * @throws ClientException
81
     */
82
    protected function check($directive, $url)
83
    {
84
        if (!$this->isUrlApplicable([$url, $this->origin])) {
85
            throw new ClientException('URL belongs to a different robots.txt, please check it against that one instead');
86
        }
87
        $this->statusCodeParser->replaceUnofficial();
88
        if (($result = $this->statusCodeParser->check()) !== null) {
89
            return $directive === $result;
90
        }
91
        $result = self::DIRECTIVE_ALLOW;
92
        foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $currentDirective) {
93
            if ($this->$currentDirective->check($url)) {
94
                $result = $currentDirective;
95
            }
96
        }
97
        return $directive === $result;
98
    }
99
100
    /**
101
     * Check if the URL belongs to current robots.txt
102
     *
103
     * @param $urls
104
     * @return bool
105
     */
106
    protected function isUrlApplicable($urls)
107
    {
108
        foreach ($urls as $url) {
109
            $parsed = parse_url($url);
110
            $parsed['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parsed['scheme'], 'tcp');
111
            $assembled = $parsed['scheme'] . '://' . $parsed['host'] . ':' . $parsed['port'];
112
            if (!isset($result)) {
113
                $result = $assembled;
114
            } elseif ($result !== $assembled) {
115
                return false;
116
            }
117
        }
118
        return true;
119
    }
120
121
    /**
122
     * Check if URL is disallowed to crawl
123
     *
124
     * @param string $url
125
     * @return bool
126
     */
127
    public function isDisallowed($url)
128
    {
129
        return $this->check(self::DIRECTIVE_DISALLOW, $url);
130
    }
131
132
    /**
133
     * Get Cache-delay
134
     *
135
     * @return float|int
136
     */
137
    public function getCacheDelay()
138
    {
139
        $exported = $this->{self::DIRECTIVE_CACHE_DELAY}->export();
140
        return isset($exported[self::DIRECTIVE_CACHE_DELAY]) ? $exported[self::DIRECTIVE_CACHE_DELAY] : $this->getCrawlDelay();
141
    }
142
143
    /**
144
     * Get Crawl-delay
145
     *
146
     * @return float|int
147
     */
148
    public function getCrawlDelay()
149
    {
150
        $exported = $this->{self::DIRECTIVE_CRAWL_DELAY}->export();
151
        return isset($exported[self::DIRECTIVE_CRAWL_DELAY]) ? $exported[self::DIRECTIVE_CRAWL_DELAY] : 0;
152
    }
153
}
154