1
|
|
|
<?php |
2
|
|
|
namespace vipnytt; |
3
|
|
|
|
4
|
|
|
use vipnytt\XRobotsTagParser\Directives; |
5
|
|
|
use vipnytt\XRobotsTagParser\Exceptions\XRobotsTagParserException; |
6
|
|
|
use vipnytt\XRobotsTagParser\RobotsTagInterface; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* Class XRobotsTagParser |
10
|
|
|
* X-Robots-Tag HTTP header parser |
11
|
|
|
* |
12
|
|
|
* @package vipnytt |
13
|
|
|
* |
14
|
|
|
* @author VIP nytt ([email protected]) |
15
|
|
|
* @author Jan-Petter Gundersen ([email protected]) |
16
|
|
|
* |
17
|
|
|
* Project: |
18
|
|
|
* @link https://github.com/VIPnytt/RobotsTagParser |
19
|
|
|
* @license https://opensource.org/licenses/MIT MIT license |
20
|
|
|
* |
21
|
|
|
* Specification: |
22
|
|
|
* @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header |
23
|
|
|
*/ |
24
|
|
|
class XRobotsTagParser implements RobotsTagInterface |
25
|
|
|
{ |
26
|
|
|
/** |
27
|
|
|
* User-Agent string |
28
|
|
|
* |
29
|
|
|
* @var string |
30
|
|
|
*/ |
31
|
|
|
protected $userAgent = self::USER_AGENT; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* User-Agent for rule selection |
35
|
|
|
* |
36
|
|
|
* @var string |
37
|
|
|
*/ |
38
|
|
|
protected $userAgentMatch = self::USER_AGENT; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* Current rule |
42
|
|
|
* |
43
|
|
|
* @var string |
44
|
|
|
*/ |
45
|
|
|
protected $currentRule = ''; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* User-Agent for the current rule |
49
|
|
|
* |
50
|
|
|
* @var string |
51
|
|
|
*/ |
52
|
|
|
protected $currentUserAgent; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Rule array |
56
|
|
|
* |
57
|
|
|
* @var array |
58
|
|
|
*/ |
59
|
|
|
protected $rules = []; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* Constructor |
63
|
|
|
* |
64
|
|
|
* @param string $userAgent |
65
|
|
|
* @param array|null $headers |
66
|
|
|
*/ |
67
|
|
|
public function __construct($userAgent = self::USER_AGENT, $headers = null) |
68
|
|
|
{ |
69
|
|
|
$this->userAgent = $userAgent; |
70
|
|
|
if (!empty($headers)) { |
71
|
|
|
$this->parse($headers); |
72
|
|
|
} |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* Parse HTTP headers |
77
|
|
|
* |
78
|
|
|
* @param array $headers |
79
|
|
|
* @return void |
80
|
|
|
*/ |
81
|
|
|
public function parse(array $headers) |
82
|
|
|
{ |
83
|
|
|
foreach ($headers as $header) { |
84
|
|
|
$parts = array_map('trim', mb_split(':', mb_strtolower($header), 2)); |
85
|
|
|
if (count($parts) < 2 || $parts[0] != mb_strtolower(self::HEADER_RULE_IDENTIFIER)) { |
86
|
|
|
// Header is not a rule |
87
|
|
|
continue; |
88
|
|
|
} |
89
|
|
|
$this->currentRule = $parts[1]; |
90
|
|
|
$this->detectDirectives(); |
91
|
|
|
} |
92
|
|
|
$this->matchUserAgent(); |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Detect directives in rule |
97
|
|
|
* |
98
|
|
|
* @return void |
99
|
|
|
*/ |
100
|
|
|
protected function detectDirectives() |
101
|
|
|
{ |
102
|
|
|
$directives = array_map('trim', mb_split(',', $this->currentRule)); |
103
|
|
|
$pair = array_map('trim', mb_split(':', $directives[0], 2)); |
104
|
|
|
if (count($pair) == 2 && !in_array($pair[0], self::DIRECTIVES)) { |
105
|
|
|
$this->currentUserAgent = $pair[0]; |
106
|
|
|
$directives[0] = $pair[1]; |
107
|
|
|
} |
108
|
|
|
foreach ($directives as $rule) { |
109
|
|
|
$directive = trim(mb_split(':', $rule, 2)[0]); |
110
|
|
|
if (in_array($directive, self::DIRECTIVES)) { |
111
|
|
|
$this->addRule($directive); |
112
|
|
|
} |
113
|
|
|
} |
114
|
|
|
$this->cleanup(); |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
/** |
118
|
|
|
* Add rule |
119
|
|
|
* |
120
|
|
|
* @param string $directive |
121
|
|
|
* @return void |
122
|
|
|
* @throws XRobotsTagParserException |
123
|
|
|
*/ |
124
|
|
|
protected function addRule($directive) |
125
|
|
|
{ |
126
|
|
|
if (!isset($this->rules[$this->currentUserAgent])) { |
127
|
|
|
$this->rules[$this->currentUserAgent] = []; |
128
|
|
|
} |
129
|
|
|
switch ($directive) { |
130
|
|
|
case self::DIRECTIVE_UNAVAILABLE_AFTER: |
131
|
|
|
$object = new Directives\UnavailableAfter($directive, $this->currentRule); |
132
|
|
|
break; |
133
|
|
|
default: |
134
|
|
|
$object = new Directives\Generic($directive, $this->currentRule); |
135
|
|
|
} |
136
|
|
|
$this->rules[$this->currentUserAgent] = array_merge($this->rules[$this->currentUserAgent], [$object->getDirective() => $object->getValue()]); |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Cleanup before next rule is read |
141
|
|
|
* |
142
|
|
|
* @return void |
143
|
|
|
*/ |
144
|
|
|
protected function cleanup() |
145
|
|
|
{ |
146
|
|
|
$this->currentRule = ''; |
147
|
|
|
$this->currentUserAgent = self::USER_AGENT; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Find the most rule-matching User-Agent |
152
|
|
|
* |
153
|
|
|
* @return string |
154
|
|
|
*/ |
155
|
|
|
protected function matchUserAgent() |
156
|
|
|
{ |
157
|
|
|
$userAgentParser = new UserAgentParser($this->userAgent); |
158
|
|
|
$this->userAgentMatch = (($match = $userAgentParser->getMostSpecific(array_keys($this->rules))) !== false) ? $match : self::USER_AGENT; |
|
|
|
|
159
|
|
|
return $this->userAgentMatch; |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
/** |
163
|
|
|
* Return all applicable rules |
164
|
|
|
* |
165
|
|
|
* @return array |
166
|
|
|
*/ |
167
|
|
|
public function getRules() |
168
|
|
|
{ |
169
|
|
|
$rules = []; |
170
|
|
|
// Default UserAgent |
171
|
|
|
if (isset($this->rules[self::USER_AGENT])) { |
172
|
|
|
$rules = array_merge($rules, $this->rules[self::USER_AGENT]); |
173
|
|
|
} |
174
|
|
|
// Matching UserAgent |
175
|
|
|
if (isset($this->rules[$this->userAgentMatch])) { |
176
|
|
|
$rules = array_merge($rules, $this->rules[$this->userAgentMatch]); |
177
|
|
|
} |
178
|
|
|
// Result |
179
|
|
|
return $rules; |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
/** |
183
|
|
|
* Export all rules for all UserAgents |
184
|
|
|
* |
185
|
|
|
* @return array |
186
|
|
|
*/ |
187
|
|
|
public function export() |
188
|
|
|
{ |
189
|
|
|
return $this->rules; |
190
|
|
|
} |
191
|
|
|
} |
192
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.