Completed
Push — master ( dc3dad...b5f967 )
by Jan-Petter
01:56
created

UserAgentClient   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 93
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 12
c 1
b 0
f 0
lcom 1
cbo 2
dl 0
loc 93
rs 10

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 1
A validateRules() 0 9 3
A isAllowed() 0 4 1
A check() 0 15 4
A isDisallowed() 0 4 1
A getCrawlDelay() 0 4 1
A getCacheDelay() 0 4 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Directives\DisAllow;
5
use vipnytt\RobotsTxtParser\Exceptions\ParserException;
6
7
class UserAgentClient implements RobotsTxtInterface
8
{
9
    protected $allow;
10
    protected $disallow;
11
12
    protected $userAgent;
13
    protected $origin;
14
    protected $statusCodeParser;
15
16
    public function __construct($rules, $userAgent, $origin, $statusCode)
17
    {
18
        $this->statusCodeParser = new StatusCodeParser($statusCode, parse_url($origin, PHP_URL_SCHEME));
0 ignored issues
show
Security Bug introduced by
It seems like parse_url($origin, PHP_URL_SCHEME) targeting parse_url() can also be of type false; however, vipnytt\RobotsTxtParser\...deParser::__construct() does only seem to accept string|null, did you maybe forget to handle an error condition?
Loading history...
19
        $this->userAgent = $userAgent;
20
        $this->origin = $origin;
21
        $this->validateRules($rules);
22
    }
23
24
    protected function validateRules($rules)
25
    {
26
        foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $directive) {
27
            if (!$rules[$directive] instanceof DisAllow) {
28
                throw new ParserException('Invalid rule object');
29
            }
30
            $this->$directive = $rules[$directive];
31
        }
32
    }
33
34
    /**
35
     * Check if URL is allowed to crawl
36
     *
37
     * @param string $url
38
     * @return bool
39
     */
40
    public function isAllowed($url)
41
    {
42
        return $this->check(self::DIRECTIVE_ALLOW, $url);
43
    }
44
45
    /**
46
     * Check
47
     *
48
     * @param string $directive
49
     * @param string $url - URL to check
50
     * @return bool
51
     * @throws ParserException
52
     */
53
    protected function check($directive, $url)
54
    {
55
        //TODO: Throw new exception Cannot check URL, belongs to a different robots.txt
56
        $this->statusCodeParser->replaceUnofficial();
57
        if (($result = $this->statusCodeParser->check()) !== null) {
58
            return $directive === $result;
59
        }
60
        $result = self::DIRECTIVE_ALLOW;
61
        foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $currentDirective) {
62
            if ($this->$currentDirective->check($url)) {
63
                $result = $currentDirective;
64
            }
65
        }
66
        return $directive === $result;
67
    }
68
69
    /**
70
     * Check if URL is disallowed to crawl
71
     *
72
     * @param string $url
73
     * @return bool
74
     */
75
    public function isDisallowed($url)
76
    {
77
        return $this->check(self::DIRECTIVE_DISALLOW, $url);
78
    }
79
80
    /**
81
     * Get Crawl-delay
82
     *
83
     * @return array
84
     */
85
    public function getCrawlDelay()
86
    {
87
        //TODO: Crawl-delay
88
    }
89
90
    /**
91
     * Get Cache-delay
92
     *
93
     * @return array
94
     */
95
    public function getCacheDelay()
96
    {
97
        //TODO: Cache-delay
98
    }
99
}
100