1 | <?php |
||
23 | class XRobotsTagParser |
||
24 | { |
||
25 | const HEADER_RULE_IDENTIFIER = 'X-Robots-Tag'; |
||
26 | const USERAGENT_DEFAULT = ''; |
||
27 | |||
28 | const DIRECTIVE_ALL = 'all'; |
||
29 | const DIRECTIVE_NONE = 'none'; |
||
30 | const DIRECTIVE_NO_ARCHIVE = 'noarchive'; |
||
31 | const DIRECTIVE_NO_FOLLOW = 'nofollow'; |
||
32 | const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex'; |
||
33 | const DIRECTIVE_NO_INDEX = 'noindex'; |
||
34 | const DIRECTIVE_NO_ODP = 'noodp'; |
||
35 | const DIRECTIVE_NO_SNIPPET = 'nosnippet'; |
||
36 | const DIRECTIVE_NO_TRANSLATE = 'notranslate'; |
||
37 | const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after'; |
||
38 | |||
39 | protected $url = ''; |
||
40 | protected $userAgent = self::USERAGENT_DEFAULT; |
||
41 | protected $userAgentMatch = self::USERAGENT_DEFAULT; |
||
42 | protected $config = []; |
||
43 | |||
44 | protected $headers = []; |
||
45 | protected $currentRule = ''; |
||
46 | protected $currentUserAgent = self::USERAGENT_DEFAULT; |
||
47 | |||
48 | protected $rules = []; |
||
49 | |||
50 | /** |
||
51 | * Constructor |
||
52 | * |
||
53 | * @param string $url |
||
54 | * @param string $userAgent |
||
55 | * @param array $config |
||
56 | * @throws XRobotsTagParserException |
||
57 | */ |
||
58 | public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $config = []) |
||
74 | |||
75 | /** |
||
76 | * Parse HTTP headers |
||
77 | * |
||
78 | * @return void |
||
79 | */ |
||
80 | protected function parse() |
||
93 | |||
94 | /** |
||
95 | * Select HTTP header source |
||
96 | * |
||
97 | * @return array |
||
98 | */ |
||
99 | protected function selectHeaderSource() |
||
107 | |||
108 | /** |
||
109 | * Request the HTTP headers from an URL |
||
110 | * |
||
111 | * @return array Raw HTTP headers |
||
112 | * @throws XRobotsTagParserException |
||
113 | */ |
||
114 | protected function getHeaders() |
||
131 | |||
132 | /** |
||
133 | * Detect directives in rule |
||
134 | * |
||
135 | * @return void |
||
136 | */ |
||
137 | protected function detectDirectives() |
||
153 | |||
154 | /** |
||
155 | * Array of directives and their class names |
||
156 | * |
||
157 | * @return array |
||
158 | */ |
||
159 | protected function directiveClasses() |
||
174 | |||
175 | /** |
||
176 | * Add rule |
||
177 | * |
||
178 | * @param string $directive |
||
179 | * @return void |
||
180 | * @throws XRobotsTagParserException |
||
181 | */ |
||
182 | protected function addRule($directive) |
||
194 | |||
195 | /** |
||
196 | * Cleanup before next rule is read |
||
197 | * |
||
198 | * @return void |
||
199 | */ |
||
200 | protected function cleanup() |
||
205 | |||
206 | /** |
||
207 | * Return all applicable rules |
||
208 | * |
||
209 | * @param bool $raw |
||
210 | * @return array |
||
211 | */ |
||
212 | public function getRules($raw = false) |
||
230 | |||
231 | /** |
||
232 | * Export all rules for all UserAgents |
||
233 | * |
||
234 | * @return array |
||
235 | */ |
||
236 | public function export() |
||
240 | |||
241 | /** |
||
242 | * Get the meaning of an Directive |
||
243 | * |
||
244 | * @param string $directive |
||
245 | * @return string |
||
246 | * @throws XRobotsTagParserException |
||
247 | */ |
||
248 | public function getDirectiveMeaning($directive) |
||
260 | } |
||
261 |