GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#13)
by Dev
01:19
created

RobotsTxt::allows()   B

Complexity

Conditions 10
Paths 14

Size

Total Lines 43

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 43
rs 7.6666
c 0
b 0
f 0
cc 10
nc 14
nop 2

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Spatie\Robots;
4
5
class RobotsTxt
6
{
7
    protected static $robotsCache = [];
8
9
    protected $disallowsPerUserAgent = [];
10
11
    public static function readFrom(string $source): self
12
    {
13
        $content = @file_get_contents($source) ?? '';
14
15
        return new self($content);
16
    }
17
18
    public function __construct(string $content)
19
    {
20
        $this->disallowsPerUserAgent = $this->getDisallowsPerUserAgent($content);
21
    }
22
23
    public static function create(string $source): self
24
    {
25
        if (
26
            strpos($source, 'http') !== false
27
            && strpos($source, 'robots.txt') !== false
28
        ) {
29
            return self::readFrom($source);
30
        }
31
32
        return new self($source);
33
    }
34
35
    public function allows(string $url, ?string $userAgent = '*'): bool
36
    {
37
        $userAgent = strtolower($userAgent);
38
39
        if ($userAgent === null) {
40
            $userAgent = '*';
41
        }
42
43
        $path = parse_url($url, PHP_URL_PATH) ?? '';
44
45
        $disallows = $this->disallowsPerUserAgent[$userAgent] ?? $this->disallowsPerUserAgent['*'] ?? [];
46
47
        // check for exact
48
        $type = null;
49
        if ($return = $this->pathIsDenied($path, $disallows, $type) !== null) {
50
51
            // if it's in a dir, maybe wildcard authorize or forbid it
52
            if ($type === 1 && $wildCardReturn = $this->checkForWildcard($url, $userAgent) !== null) {
53
                return $wildCardReturn;
54
            }
55
56
            // if it's in a dir but wildcard forbid access
57
            if ($type === 1 && $wildCardReturn = $this->pathIsDenied($path, $disallows) !== null) {
58
                if ($wildCardReturn === true) {
59
                    return false;
60
                }
61
            }
62
63
            return ! $return;
64
        }
65
66
        // check for wildcrad user agent
67
        if ($return = $this->checkForWildcard($url, $userAgent) !== null) {
68
            return ! $return;
69
        }
70
71
        // check for wildcard
72
        if ($return = $this->pathIsDenied($path, $disallows) !== null) {
73
            return ! $return;
74
        }
75
76
        return true;
77
    }
78
79
    protected function checkForWildcard(string $path, string $userAgent)
80
    {
81
        if ($userAgent !== '*') {
82
            for ($i = 1; $i <= strlen($userAgent); $i++) {
83
                $wildCardUserAgent = substr($userAgent, 0, $i).'*';
84
                if (isset($this->disallowsPerUserAgent[$wildCardUserAgent])) {
85
                    return ! $this->pathIsDenied($path, $this->disallowsPerUserAgent[$wildCardUserAgent]);
86
                }
87
            }
88
        }
89
    }
90
91
    protected function pathIsDenied(string $path, array $rules, &$type = null)
92
    {
93
        foreach ($rules as $uri => $rule) {
94
            $trimUri = rtrim($uri, '/');
95
96
            if (in_array($path, [$uri, $trimUri])) {
97
                $type = 0;
98
                return $rule;
99
            }
100
101
            if (! $this->concernsDirectory($uri)) {
102
                $type = 0;
103
                return $rule;
104
            }
105
106
            if ($this->isUrlInDirectory($path, $uri)) {
107
                $type = 1;
108
                return $rule;
109
            }
110
        }
111
    }
112
113
    protected function getDisallowsPerUserAgent(string $content): array
114
    {
115
        $lines = explode(PHP_EOL, $content);
116
117
        $lines = array_filter($lines);
118
119
        $disallowsPerUserAgent = [];
120
121
        $currentUserAgent = null;
122
123
        foreach ($lines as $line) {
124
            if ($this->isUserAgentLine($line)) {
125
                $disallowsPerUserAgent[$this->parseUserAgent($line)] = [];
126
127
                $currentUserAgent = &$disallowsPerUserAgent[$this->parseUserAgent($line)];
128
129
                continue;
130
            }
131
132
            if ($currentUserAgent === null) {
133
                continue;
134
            }
135
136
            $rule = null;
137
            $disallowUrl = $this->parse($line, $rule);
138
139
            if ($disallowUrl !== null) { // other than allow/disallow
140
                $currentUserAgent[$disallowUrl] = $rule;
141
            }
142
        }
143
144
        return $disallowsPerUserAgent;
145
    }
146
147
    protected function isUserAgentLine(string $line): bool
148
    {
149
        return preg_match('/^User-agent\s*:/i', trim($line));
150
    }
151
152
    protected function parseUserAgent(string $line): string
153
    {
154
        return strtolower(trim(preg_replace('/^User-agent\s*:/i', '', trim($line))));
155
    }
156
157
    protected function parse(string $line, &$type): ?string
158
    {
159
        $line = trim(preg_replace('/\s+!/', ':', $line));
160
161
        if (stripos($line, 'disallow:') === 0) {
162
            $type = true;
163
            return trim(preg_replace('/^disallow:/i', '', $line));
164
        }
165
166
        if (stripos($line, 'allow:') === 0) {
167
            $type = false;
168
            return trim(preg_replace('/^allow:/i', '', $line));
169
        }
170
171
        // else: could be crawl-delay, sitemap...
172
    }
173
174
    protected function concernsDirectory(string $path): bool
175
    {
176
        return substr($path, strlen($path) - 1, 1) === '/';
177
    }
178
179
    protected function isUrlInDirectory(string $url, string $path): bool
180
    {
181
        return strpos($url, $path) === 0;
182
    }
183
}
184