1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Spatie\Robots; |
4
|
|
|
|
5
|
|
|
use InvalidArgumentException; |
6
|
|
|
|
7
|
|
|
class RobotsHeaders |
8
|
|
|
{ |
9
|
|
|
protected $robotHeadersProperties = []; |
10
|
|
|
|
11
|
|
|
public static function readFrom(string $source): self |
12
|
|
|
{ |
13
|
|
|
$content = @file_get_contents($source); |
14
|
|
|
|
15
|
|
|
if ($content === false) { |
16
|
|
|
throw new InvalidArgumentException("Could not read from source `{$source}`"); |
17
|
|
|
} |
18
|
|
|
|
19
|
|
|
return new self($http_response_header ?? []); |
20
|
|
|
} |
21
|
|
|
|
22
|
|
|
public static function create(array $headers): self |
23
|
|
|
{ |
24
|
|
|
return new self($headers); |
25
|
|
|
} |
26
|
|
|
|
27
|
|
|
public function __construct(array $headers) |
28
|
|
|
{ |
29
|
|
|
$this->robotHeadersProperties = $this->parseHeaders($headers); |
30
|
|
|
} |
31
|
|
|
|
32
|
|
|
public function mayIndex(string $userAgent = '*'): bool |
33
|
|
|
{ |
34
|
|
|
return ! $this->noindex($userAgent); |
35
|
|
|
} |
36
|
|
|
|
37
|
|
|
public function mayFollow(string $userAgent = '*'): bool |
38
|
|
|
{ |
39
|
|
|
return ! $this->nofollow($userAgent); |
40
|
|
|
} |
41
|
|
|
|
42
|
|
View Code Duplication |
public function noindex(string $userAgent = '*'): bool |
|
|
|
|
43
|
|
|
{ |
44
|
|
|
$userAgent = strtolower($userAgent); |
45
|
|
|
|
46
|
|
|
if (isset($this->robotHeadersProperties[$userAgent]) |
47
|
|
|
&& isset($this->robotHeadersProperties[$userAgent]['noindex'])) { |
48
|
|
|
return $this->robotHeadersProperties[$userAgent]['noindex']; |
49
|
|
|
} |
50
|
|
|
|
51
|
|
|
if ($wildCard = $this->checkForWildcard($userAgent, 'noindex') !== null) { |
52
|
|
|
return $wildCard; |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
return $this->robotHeadersProperties['*']['noindex'] ?? false; |
56
|
|
|
} |
57
|
|
|
|
58
|
|
View Code Duplication |
public function nofollow(string $userAgent = '*'): bool |
|
|
|
|
59
|
|
|
{ |
60
|
|
|
$userAgent = strtolower($userAgent); |
61
|
|
|
|
62
|
|
|
// check for exact |
63
|
|
|
if (isset($this->robotHeadersProperties[$userAgent]) |
64
|
|
|
&& isset($this->robotHeadersProperties[$userAgent]['nofollow'])) { |
65
|
|
|
return $this->robotHeadersProperties[$userAgent]['nofollow']; |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
// check for wildcrad user agent |
69
|
|
|
if ($wildCard = $this->checkForWildcard($userAgent, 'nofollow') !== null) { |
70
|
|
|
return $wildCard; |
71
|
|
|
} |
72
|
|
|
|
73
|
|
|
// check for wildcrad |
74
|
|
|
return $this->robotHeadersProperties['*']['nofollow'] ?? false; |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
protected function checkForWildcard(string $userAgent, string $property) |
78
|
|
|
{ |
79
|
|
|
if ($userAgent !== '*') { |
80
|
|
|
for ($i=1; $i<=strlen($userAgent); $i++) { |
81
|
|
|
$wildCardUserAgent = substr($userAgent, 0, $i).'*'; |
82
|
|
|
if (isset($this->robotHeadersProperties[$wildCardUserAgent]) |
83
|
|
|
&& isset($this->robotHeadersProperties[$wildCardUserAgent][$property])) { |
84
|
|
|
return $this->robotHeadersProperties[$wildCardUserAgent][$property]; |
85
|
|
|
} |
86
|
|
|
} |
87
|
|
|
} |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
protected function parseHeaders(array $headers): array |
91
|
|
|
{ |
92
|
|
|
$robotHeaders = $this->filterRobotHeaders($headers); |
93
|
|
|
|
94
|
|
|
return array_reduce($robotHeaders, function (array $parsedHeaders, $header) { |
95
|
|
|
$header = $this->normalizeHeaders($header); |
96
|
|
|
|
97
|
|
|
$headerParts = explode(':', $header); |
98
|
|
|
|
99
|
|
|
$userAgent = count($headerParts) === 3 |
100
|
|
|
? trim($headerParts[1]) |
101
|
|
|
: '*'; |
102
|
|
|
|
103
|
|
|
$userAgent = strtolower($userAgent); |
104
|
|
|
|
105
|
|
|
$options = end($headerParts); |
106
|
|
|
|
107
|
|
|
$parsedHeaders[$userAgent] = []; |
108
|
|
|
|
109
|
|
View Code Duplication |
if (strpos(strtolower($options), 'index')) { |
|
|
|
|
110
|
|
|
$parsedHeaders[$userAgent]['noindex'] = strpos(strtolower($options), 'noindex') !== false; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
View Code Duplication |
if (strpos(strtolower($options), 'follow')) { |
|
|
|
|
114
|
|
|
$parsedHeaders[$userAgent]['nofollow'] = strpos(strtolower($options), 'nofollow') !== false; |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
return $parsedHeaders; |
118
|
|
|
}, []); |
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
protected function filterRobotHeaders(array $headers): array |
122
|
|
|
{ |
123
|
|
|
return array_filter($headers, function ($header) use ($headers) { |
124
|
|
|
$headerContent = $this->normalizeHeaders($headers[$header] ?? []); |
125
|
|
|
|
126
|
|
|
return strpos(strtolower($header), 'x-robots-tag') === 0 |
127
|
|
|
|| strpos(strtolower($headerContent), 'x-robots-tag') === 0; |
128
|
|
|
}, ARRAY_FILTER_USE_KEY); |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
protected function normalizeHeaders($headers): string |
132
|
|
|
{ |
133
|
|
|
return implode(',', (array) $headers); |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.