GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#13)
by Dev
01:31
created

RobotsTxt::parse()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 16
rs 9.7333
c 0
b 0
f 0
cc 3
nc 3
nop 2
1
<?php
2
3
namespace Spatie\Robots;
4
5
class RobotsTxt
6
{
7
    protected static $robotsCache = [];
8
9
    protected $disallowsPerUserAgent = [];
10
11
    public static function readFrom(string $source): self
12
    {
13
        $content = @file_get_contents($source) ?? '';
14
15
        return new self($content);
16
    }
17
18
    public function __construct(string $content)
19
    {
20
        $this->disallowsPerUserAgent = $this->getDisallowsPerUserAgent($content);
21
    }
22
23
    public static function create(string $source): self
24
    {
25
        if (
26
            strpos($source, 'http') !== false
27
            && strpos($source, 'robots.txt') !== false
28
        ) {
29
            return self::readFrom($source);
30
        }
31
32
        return new self($source);
33
    }
34
35
    public function allows(string $url, ?string $userAgent = '*'): bool
36
    {
37
        $userAgent = strtolower($userAgent);
38
39
        if ($userAgent === null) {
40
            $userAgent = '*';
41
        }
42
43
        $path = parse_url($url, PHP_URL_PATH) ?? '';
44
45
        $disallows = $this->disallowsPerUserAgent[$userAgent] ?? $this->disallowsPerUserAgent['*'] ?? [];
46
47
        // check for exact
48
49
        $type = null;
50
        if ($return = $this->pathIsDenied($path, $disallows, $type) !== null) {
51
52
            // if it's in a dir, maybe wildcard authorize it
53
            if ($type === 1 && $wildCardReturn = $this->checkForWildcard($url, $userAgent) !== null) {
54
                return $wildCardReturn;
55
            }
56
57
            // if it's in a dir but wildcard forbid access
58
            if ($type === 1 && $wildCardReturn = $this->pathIsDenied($path, $disallows) !== null) {
59
                if ($wildCardReturn === true) {
60
                    return false;
61
                }
62
            }
63
64
            return !$return;
65
        }
66
67
        // check for wildcrad user agent
68
        if ($return = $this->checkForWildcard($url, $userAgent) !== null) {
69
            return !$return;
70
        }
71
72
        // check for wildcard
73
        if ($return = $this->pathIsDenied($path, $disallows) !== null) {
74
            return !$return;
75
        }
76
77
        return true;
78
    }
79
80
81
    protected function checkForWildcard(string $path, string $userAgent)
82
    {
83
        if ($userAgent !== '*') {
84
            for ($i=1; $i<=strlen($userAgent); $i++) {
85
                $wildCardUserAgent = substr($userAgent, 0, $i).'*';
86
                if (isset($this->disallowsPerUserAgent[$wildCardUserAgent])) {
87
                    return ! $this->pathIsDenied($path, $this->disallowsPerUserAgent[$wildCardUserAgent]);
88
                }
89
            }
90
        }
91
    }
92
93
    protected function pathIsDenied(string $path, array $rules, &$type = null)
94
    {
95
        foreach ($rules as $uri => $rule) {
96
            $trimUri = rtrim($uri, '/');
97
98
            if (in_array($path, [$uri, $trimUri])) {
99
                $type = 0;
100
                return $rule;
101
            }
102
103
            if (! $this->concernsDirectory($uri)) {
104
                $type = 0;
105
                return $rule;
106
            }
107
108
            if ($this->isUrlInDirectory($path, $uri)) {
109
                $type = 1;
110
                return $rule;
111
            }
112
        }
113
    }
114
115
    protected function getDisallowsPerUserAgent(string $content): array
116
    {
117
        $lines = explode(PHP_EOL, $content);
118
119
        $lines = array_filter($lines);
120
121
        $disallowsPerUserAgent = [];
122
123
        $currentUserAgent = null;
124
125
        foreach ($lines as $line) {
126
            if ($this->isUserAgentLine($line)) {
127
                $disallowsPerUserAgent[$this->parseUserAgent($line)] = [];
128
129
                $currentUserAgent = &$disallowsPerUserAgent[$this->parseUserAgent($line)];
130
131
                continue;
132
            }
133
134
            if ($currentUserAgent === null) {
135
                continue;
136
            }
137
138
            $rule = null;
139
            $disallowUrl = $this->parse($line, $rule);
140
141
            if ($disallowUrl !== null) { // other than allow/disallow
142
                $currentUserAgent[$disallowUrl] = $rule;
143
            }
144
        }
145
146
        return $disallowsPerUserAgent;
147
    }
148
149
    /**
150
    protected function applyWildCardRules(array $disallowsPerUserAgent)
151
    {
152
        if (isset($disallowsPerUserAgent['*'])) {
153
            foreach ($disallowsPerUserAgent as $userAgent => $rules) {
154
                if ('*' !== $userAgent) {
155
                    $disallowsPerUserAgent[$userAgent] = array_merge($rules, $disallowsPerUserAgent['*']);
156
                }
157
            }
158
        }
159
160
        return $disallowsPerUserAgent;
161
    }/**/
162
163
    protected function isUserAgentLine(string $line): bool
164
    {
165
        return preg_match('/^User-agent\s*:/i', trim($line));
166
    }
167
168
    protected function parseUserAgent(string $line): string
169
    {
170
        return strtolower(trim(preg_replace('/^User-agent\s*:/i', '', trim($line))));
171
    }
172
173
    /**
174
     * Remember : dissalow = true , allow = false
175
     */
176
    protected function parse(string $line, &$type): ?string
177
    {
178
        $line = trim(preg_replace('/\s+!/', ':', $line));
179
180
        if (stripos($line, 'disallow:') === 0) {
181
            $type = true;
182
            return trim(preg_replace('/^disallow:/i', '', $line));
183
        }
184
185
        if (stripos($line, 'allow:') === 0) {
186
            $type = false;
187
            return trim(preg_replace('/^allow:/i', '', $line));
188
        }
189
190
        // else: could be crawl-delay, sitemap...
191
    }
192
193
    protected function concernsDirectory(string $path): bool
194
    {
195
        return substr($path, strlen($path) - 1, 1) === '/';
196
    }
197
198
    protected function isUrlInDirectory(string $url, string $path): bool
199
    {
200
        return strpos($url, $path) === 0;
201
    }
202
}
203