GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#13)
by Dev
01:13
created

RobotsTxt::parse()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 18
rs 9.6666
c 0
b 0
f 0
cc 3
nc 3
nop 2
1
<?php
2
3
namespace Spatie\Robots;
4
5
class RobotsTxt
6
{
7
    protected static $robotsCache = [];
8
9
    protected $disallowsPerUserAgent = [];
10
11
    public static function readFrom(string $source): self
12
    {
13
        $content = @file_get_contents($source) ?? '';
14
15
        return new self($content);
16
    }
17
18
    public function __construct(string $content)
19
    {
20
        $this->disallowsPerUserAgent = $this->getDisallowsPerUserAgent($content);
21
    }
22
23
    public static function create(string $source): self
24
    {
25
        if (
26
            strpos($source, 'http') !== false
27
            && strpos($source, 'robots.txt') !== false
28
        ) {
29
            return self::readFrom($source);
30
        }
31
32
        return new self($source);
33
    }
34
35
    public function allows(string $url, ?string $userAgent = '*'): bool
36
    {
37
        $userAgent = strtolower($userAgent);
38
39
        if ($userAgent === null) {
40
            $userAgent = '*';
41
        }
42
43
        $path = parse_url($url, PHP_URL_PATH) ?? '';
44
45
        $disallows = $this->disallowsPerUserAgent[$userAgent] ?? [];
46
47
        // check for exact
48
        $type = null;
49
        if ($return = $this->pathIsDenied($path, $disallows, $type) !== null) {
50
51
            // if it's in a dir, maybe wildcard authorize or forbid  it
52
            if ($type === 1 && $wildCardReturn = $this->checkForWildcard($url, $userAgent) !== null) {
53
                return $wildCardReturn;
54
            }
55
56
            // if it's in a dir but wildcard forbid access
57
            if ($type === 1 && $wildCardReturn = $this->pathIsDenied($path, $disallows) !== null) {
58
                if ($wildCardReturn === true) {
59
                    return false;
60
                }
61
            }
62
63
            return ! $return;
64
        }
65
66
        // check for wildcrad user agent
67
        if ($return = $this->checkForWildcard($url, $userAgent) !== null) {
68
            return $return;
69
        }
70
71
        // check for wildcard
72
        if ($return = $this->pathIsDenied($path, $disallows) !== null) {
73
            return ! $return;
74
        }
75
76
        return true;
77
    }
78
79
    protected function checkForWildcard(string $path, string $userAgent)
80
    {
81
        if ($userAgent !== '*') {
82
            for ($i = 1; $i <= strlen($userAgent); $i++) {
83
                $wildCardUserAgent = substr($userAgent, 0, $i).'*';
84
                if (isset($this->disallowsPerUserAgent[$wildCardUserAgent])) {
85
                    return ! $this->pathIsDenied($path, $this->disallowsPerUserAgent[$wildCardUserAgent]);
86
                }
87
            }
88
        }
89
    }
90
91
    protected function pathIsDenied(string $path, array $rules, &$type = null)
92
    {
93
        foreach ($rules as $uri => $rule) {
94
            $trimUri = rtrim($uri, '/');
95
96
            if (in_array($path, [$uri, $trimUri])) {
97
                $type = 0;
98
99
                return $rule;
100
            }
101
102
            if ($this->isUrlInDirectory($path, $uri)) {
103
                $type = 1;
104
105
                return $rule;
106
            }
107
        }
108
    }
109
110
    protected function getDisallowsPerUserAgent(string $content): array
111
    {
112
        $lines = explode(PHP_EOL, $content);
113
114
        $lines = array_filter($lines);
115
116
        $disallowsPerUserAgent = [];
117
118
        $currentUserAgent = null;
119
120
        foreach ($lines as $line) {
121
            if ($this->isUserAgentLine($line)) {
122
                $disallowsPerUserAgent[$this->parseUserAgent($line)] = [];
123
124
                $currentUserAgent = &$disallowsPerUserAgent[$this->parseUserAgent($line)];
125
126
                continue;
127
            }
128
129
            if ($currentUserAgent === null) {
130
                continue;
131
            }
132
133
            $rule = null;
134
            $disallowUrl = $this->parse($line, $rule);
135
136
            if ($disallowUrl !== null) { // other than allow/disallow
137
                $currentUserAgent[$disallowUrl] = $rule;
138
            }
139
        }
140
141
        return $disallowsPerUserAgent;
142
    }
143
144
    protected function isUserAgentLine(string $line): bool
145
    {
146
        return preg_match('/^User-agent\s*:/i', trim($line));
147
    }
148
149
    protected function parseUserAgent(string $line): string
150
    {
151
        return strtolower(trim(preg_replace('/^User-agent\s*:/i', '', trim($line))));
152
    }
153
154
    protected function parse(string $line, &$type): ?string
155
    {
156
        $line = trim(preg_replace('/\s+!/', ':', $line));
157
158
        if (stripos($line, 'disallow:') === 0) {
159
            $type = true;
160
161
            return trim(preg_replace('/^disallow:/i', '', $line));
162
        }
163
164
        if (stripos($line, 'allow:') === 0) {
165
            $type = false;
166
167
            return trim(preg_replace('/^allow:/i', '', $line));
168
        }
169
170
        // else: could be crawl-delay, sitemap...
171
    }
172
173
    protected function isUrlInDirectory(string $url, string $path): bool
174
    {
175
        return strpos($url, $path) === 0;
176
    }
177
}
178