GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#13)
by Dev
01:57 queued 41s
created

RobotsTxt::allows()   B

Complexity

Conditions 10
Paths 14

Size

Total Lines 43

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 43
rs 7.6666
c 0
b 0
f 0
cc 10
nc 14
nop 2

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Spatie\Robots;
4
5
class RobotsTxt
6
{
7
    protected static $robotsCache = [];
8
9
    protected $disallowsPerUserAgent = [];
10
11
    public static function readFrom(string $source): self
12
    {
13
        $content = @file_get_contents($source) ?? '';
14
15
        return new self($content);
16
    }
17
18
    public function __construct(string $content)
19
    {
20
        $this->disallowsPerUserAgent = $this->getDisallowsPerUserAgent($content);
21
    }
22
23
    public static function create(string $source): self
24
    {
25
        if (
26
            strpos($source, 'http') !== false
27
            && strpos($source, 'robots.txt') !== false
28
        ) {
29
            return self::readFrom($source);
30
        }
31
32
        return new self($source);
33
    }
34
35
    public function allows(string $url, ?string $userAgent = '*'): bool
36
    {
37
        $userAgent = strtolower($userAgent);
38
39
        if ($userAgent === null) {
40
            $userAgent = '*';
41
        }
42
43
        $path = parse_url($url, PHP_URL_PATH) ?? '';
44
45
        $disallows = $this->disallowsPerUserAgent[$userAgent] ?? $this->disallowsPerUserAgent['*'] ?? [];
46
47
        // check for exact
48
        $type = null;
49
        if ($return = $this->pathIsDenied($path, $disallows, $type) !== null) {
50
51
            // if it's in a dir, maybe wildcard authorize or forbid it
52
            if ($type === 1 && $wildCardReturn = $this->checkForWildcard($url, $userAgent) !== null) {
53
                return $wildCardReturn;
54
            }
55
56
            // if it's in a dir but wildcard forbid access
57
            if ($type === 1 && $wildCardReturn = $this->pathIsDenied($path, $disallows) !== null) {
58
                if ($wildCardReturn === true) {
59
                    return false;
60
                }
61
            }
62
63
            return ! $return;
64
        }
65
66
        // check for wildcrad user agent
67
        if ($return = $this->checkForWildcard($url, $userAgent) !== null) {
68
            return ! $return;
69
        }
70
71
        // check for wildcard
72
        if ($return = $this->pathIsDenied($path, $disallows) !== null) {
73
            return ! $return;
74
        }
75
76
        return true;
77
    }
78
79
    protected function checkForWildcard(string $path, string $userAgent)
80
    {
81
        if ($userAgent !== '*') {
82
            for ($i = 1; $i <= strlen($userAgent); $i++) {
83
                $wildCardUserAgent = substr($userAgent, 0, $i).'*';
84
                if (isset($this->disallowsPerUserAgent[$wildCardUserAgent])) {
85
                    return ! $this->pathIsDenied($path, $this->disallowsPerUserAgent[$wildCardUserAgent]);
86
                }
87
            }
88
        }
89
    }
90
91
    protected function pathIsDenied(string $path, array $rules, &$type = null)
92
    {
93
        foreach ($rules as $uri => $rule) {
94
            $trimUri = rtrim($uri, '/');
95
96
            if (in_array($path, [$uri, $trimUri])) {
97
                $type = 0;
98
99
                return $rule;
100
            }
101
102
            if (! $this->concernsDirectory($uri)) {
103
                $type = 0;
104
105
                return $rule;
106
            }
107
108
            if ($this->isUrlInDirectory($path, $uri)) {
109
                $type = 1;
110
111
                return $rule;
112
            }
113
        }
114
    }
115
116
    protected function getDisallowsPerUserAgent(string $content): array
117
    {
118
        $lines = explode(PHP_EOL, $content);
119
120
        $lines = array_filter($lines);
121
122
        $disallowsPerUserAgent = [];
123
124
        $currentUserAgent = null;
125
126
        foreach ($lines as $line) {
127
            if ($this->isUserAgentLine($line)) {
128
                $disallowsPerUserAgent[$this->parseUserAgent($line)] = [];
129
130
                $currentUserAgent = &$disallowsPerUserAgent[$this->parseUserAgent($line)];
131
132
                continue;
133
            }
134
135
            if ($currentUserAgent === null) {
136
                continue;
137
            }
138
139
            $rule = null;
140
            $disallowUrl = $this->parse($line, $rule);
141
142
            if ($disallowUrl !== null) { // other than allow/disallow
143
                $currentUserAgent[$disallowUrl] = $rule;
144
            }
145
        }
146
147
        return $disallowsPerUserAgent;
148
    }
149
150
    protected function isUserAgentLine(string $line): bool
151
    {
152
        return preg_match('/^User-agent\s*:/i', trim($line));
153
    }
154
155
    protected function parseUserAgent(string $line): string
156
    {
157
        return strtolower(trim(preg_replace('/^User-agent\s*:/i', '', trim($line))));
158
    }
159
160
    protected function parse(string $line, &$type): ?string
161
    {
162
        $line = trim(preg_replace('/\s+!/', ':', $line));
163
164
        if (stripos($line, 'disallow:') === 0) {
165
            $type = true;
166
167
            return trim(preg_replace('/^disallow:/i', '', $line));
168
        }
169
170
        if (stripos($line, 'allow:') === 0) {
171
            $type = false;
172
173
            return trim(preg_replace('/^allow:/i', '', $line));
174
        }
175
176
        // else: could be crawl-delay, sitemap...
177
    }
178
179
    protected function concernsDirectory(string $path): bool
180
    {
181
        return substr($path, strlen($path) - 1, 1) === '/';
182
    }
183
184
    protected function isUrlInDirectory(string $url, string $path): bool
185
    {
186
        return strpos($url, $path) === 0;
187
    }
188
}
189