GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#13)
by Dev
01:29
created

RobotsTxt::parse()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 18
rs 9.6666
c 0
b 0
f 0
cc 3
nc 3
nop 2
1
<?php
2
3
namespace Spatie\Robots;
4
5
class RobotsTxt
6
{
7
    protected static $robotsCache = [];
8
9
    protected $disallowsPerUserAgent = [];
10
11
    public static function readFrom(string $source): self
12
    {
13
        $content = @file_get_contents($source) ?? '';
14
15
        return new self($content);
16
    }
17
18
    public function __construct(string $content)
19
    {
20
        $this->disallowsPerUserAgent = $this->getDisallowsPerUserAgent($content);
21
    }
22
23
    public static function create(string $source): self
24
    {
25
        if (
26
            strpos($source, 'http') !== false
27
            && strpos($source, 'robots.txt') !== false
28
        ) {
29
            return self::readFrom($source);
30
        }
31
32
        return new self($source);
33
    }
34
35
    public function allows(string $url, ?string $userAgent = '*'): bool
36
    {
37
        $userAgent = strtolower($userAgent);
38
39
        if ($userAgent === null) {
40
            $userAgent = '*';
41
        }
42
43
        $path = parse_url($url, PHP_URL_PATH) ?? '';
44
45
        $disallows = $this->disallowsPerUserAgent[$userAgent] ?? $this->disallowsPerUserAgent['*'] ?? [];
46
47
        // check for exact
48
        $type = null;
49
        if ($return = $this->pathIsDenied($path, $disallows, $type) !== null) {
50
51
            // if it's in a dir, maybe wildcard authorize or forbid it
52
            if ($type === 1 && $wildCardReturn = $this->checkForWildcard($url, $userAgent) !== null) {
53
                return $wildCardReturn;
54
            }
55
56
            // if it's in a dir but wildcard forbid access
57
            if ($type === 1 && $wildCardReturn = $this->pathIsDenied($path, $disallows) !== null) {
58
                if ($wildCardReturn === true) {
59
60
                    return false;
61
                }
62
            }
63
64
            return ! $return;
65
        }
66
67
        // check for wildcrad user agent
68
        if ($return = $this->checkForWildcard($url, $userAgent) !== null) {
69
70
            return ! $return;
71
        }
72
73
        // check for wildcard
74
        if ($return = $this->pathIsDenied($path, $disallows) !== null) {
75
76
            return ! $return;
77
        }
78
79
        return true;
80
    }
81
82
    protected function checkForWildcard(string $path, string $userAgent)
83
    {
84
        if ($userAgent !== '*') {
85
            for ($i = 1; $i <= strlen($userAgent); $i++) {
86
                $wildCardUserAgent = substr($userAgent, 0, $i).'*';
87
                if (isset($this->disallowsPerUserAgent[$wildCardUserAgent])) {
88
89
                    return ! $this->pathIsDenied($path, $this->disallowsPerUserAgent[$wildCardUserAgent]);
90
                }
91
            }
92
        }
93
    }
94
95
    protected function pathIsDenied(string $path, array $rules, &$type = null)
96
    {
97
        foreach ($rules as $uri => $rule) {
98
            $trimUri = rtrim($uri, '/');
99
100
            if (in_array($path, [$uri, $trimUri])) {
101
                $type = 0;
102
103
                return $rule;
104
            }
105
106
            if (! $this->concernsDirectory($uri)) {
107
                $type = 0;
108
109
                return $rule;
110
            }
111
112
            if ($this->isUrlInDirectory($path, $uri)) {
113
                $type = 1;
114
115
                return $rule;
116
            }
117
        }
118
    }
119
120
    protected function getDisallowsPerUserAgent(string $content): array
121
    {
122
        $lines = explode(PHP_EOL, $content);
123
124
        $lines = array_filter($lines);
125
126
        $disallowsPerUserAgent = [];
127
128
        $currentUserAgent = null;
129
130
        foreach ($lines as $line) {
131
            if ($this->isUserAgentLine($line)) {
132
                $disallowsPerUserAgent[$this->parseUserAgent($line)] = [];
133
134
                $currentUserAgent = &$disallowsPerUserAgent[$this->parseUserAgent($line)];
135
136
                continue;
137
            }
138
139
            if ($currentUserAgent === null) {
140
                continue;
141
            }
142
143
            $rule = null;
144
            $disallowUrl = $this->parse($line, $rule);
145
146
            if ($disallowUrl !== null) { // other than allow/disallow
147
                $currentUserAgent[$disallowUrl] = $rule;
148
            }
149
        }
150
151
        return $disallowsPerUserAgent;
152
    }
153
154
    protected function isUserAgentLine(string $line): bool
155
    {
156
        return preg_match('/^User-agent\s*:/i', trim($line));
157
    }
158
159
    protected function parseUserAgent(string $line): string
160
    {
161
        return strtolower(trim(preg_replace('/^User-agent\s*:/i', '', trim($line))));
162
    }
163
164
    protected function parse(string $line, &$type): ?string
165
    {
166
        $line = trim(preg_replace('/\s+!/', ':', $line));
167
168
        if (stripos($line, 'disallow:') === 0) {
169
            $type = true;
170
171
            return trim(preg_replace('/^disallow:/i', '', $line));
172
        }
173
174
        if (stripos($line, 'allow:') === 0) {
175
            $type = false;
176
177
            return trim(preg_replace('/^allow:/i', '', $line));
178
        }
179
180
        // else: could be crawl-delay, sitemap...
181
    }
182
183
    protected function concernsDirectory(string $path): bool
184
    {
185
        return substr($path, strlen($path) - 1, 1) === '/';
186
    }
187
188
    protected function isUrlInDirectory(string $url, string $path): bool
189
    {
190
        return strpos($url, $path) === 0;
191
    }
192
}
193