Completed
Push — master ( a7ec5f...7812c9 )
by Jan-Petter
02:03
created

Parser::generateRulePair()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 17
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 17
rs 9.2
cc 4
eloc 10
nc 2
nop 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A Parser::getSitemaps() 0 4 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Directives\CleanParam;
5
use vipnytt\RobotsTxtParser\Directives\Host;
6
use vipnytt\RobotsTxtParser\Directives\Sitemap;
7
use vipnytt\RobotsTxtParser\Directives\UserAgent;
8
9
class Parser implements RobotsTxtInterface
10
{
11
    use ObjectTools;
12
13
    const SUB_DIRECTIVES = [
14
        self::DIRECTIVE_CLEAN_PARAM,
15
        self::DIRECTIVE_HOST,
16
        self::DIRECTIVE_SITEMAP,
17
        self::DIRECTIVE_USER_AGENT,
18
    ];
19
20
    protected $raw;
21
22
    protected $cleanParam;
23
    protected $host;
24
    protected $sitemap;
25
    protected $userAgent;
26
27
    /**
28
     * Constructor
29
     *
30
     * @param string $content - file content
31
     * @param string $encoding - character encoding
32
     * @param integer|null $byteLimit - maximum of bytes to parse
33
     * @throws Exceptions\ParserException
34
     */
35
    public function __construct($content, $encoding = self::ENCODING, $byteLimit = self::BYTE_LIMIT)
36
    {
37
        if (!mb_internal_encoding($encoding)) {
38
            throw new Exceptions\ParserException('Unable to set internal character encoding to `' . $encoding . '`');
39
        }
40
41
        $this->cleanParam = new CleanParam([]);
42
        $this->host = new Host([]);
43
        $this->sitemap = new Sitemap([]);
0 ignored issues
show
Documentation introduced by
array() is of type array, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
44
        $this->userAgent = new UserAgent([]);
0 ignored issues
show
Documentation introduced by
array() is of type array, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
45
46
47
        $this->raw = is_int($byteLimit) ? mb_strcut($content, 0, $byteLimit, $encoding) : $content;
48
        $this->parseTxt();
49
    }
50
51
    /**
52
     * Parse robots.txt
53
     *
54
     * @return void
55
     */
56
    private function parseTxt()
57
    {
58
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $this->raw)));
59
        // Parse each line individually
60
        foreach ($lines as $line) {
61
            // Limit rule length
62
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
63
            // Remove comments
64
            $line = mb_split('#', $line, 2)[0];
65
            // Parse line
66
            $this->add($line);
67
        }
68
    }
69
70
    public function add($line)
71
    {
72
        $pair = $this->generateRulePair($line, self::SUB_DIRECTIVES);
73
        switch ($pair['directive']) {
74
            case self::DIRECTIVE_CLEAN_PARAM:
75
                return $this->cleanParam->add($pair['value']);
76
            case self::DIRECTIVE_HOST:
77
                return $this->host->add($pair['value']);
78
            case self::DIRECTIVE_SITEMAP:
79
                return $this->sitemap->add($pair['value']);
80
            case self::DIRECTIVE_USER_AGENT:
81
                return $this->userAgent->add($pair['value']);
82
        }
83
        return $this->userAgent->add($line);
84
    }
85
86
    public function export()
87
    {
88
        return $this->cleanParam->export()
89
        + $this->host->export()
90
        + $this->sitemap->export()
91
        + $this->userAgent->export();
92
    }
93
94
    /**
95
     * Check if URL is allowed to crawl
96
     *
97
     * @param  string $url - url to check
98
     * @return bool
99
     */
100
    public function isAllowed($url)
101
    {
102
        return $this->userAgent->check($url, self::DIRECTIVE_ALLOW);
103
    }
104
105
    /**
106
     * Check if URL is disallowed to crawl
107
     *
108
     * @param  string $url - url to check
109
     * @return bool
110
     */
111
    public function isDisallowed($url)
112
    {
113
        return $this->userAgent->check($url, self::DIRECTIVE_DISALLOW);
114
    }
115
116
    /**
117
     * Get sitemaps
118
     *
119
     * @return array
120
     */
121
    public function getSitemaps()
122
    {
123
        return $this->sitemap->export();
124
    }
125
126
    /**
127
     * Get host
128
     *
129
     * @return string|null
130
     */
131
    public function getHost()
132
    {
133
        return $this->host->export();
134
    }
135
136
    /**
137
     * Get Clean-param
138
     *
139
     * @return array
140
     */
141
    public function getCleanParam()
142
    {
143
        return $this->cleanParam->export();
144
    }
145
}
146