1 | <?php |
||
24 | class XRobotsTagParser |
||
25 | { |
||
26 | const HEADER_RULE_IDENTIFIER = 'x-robots-tag'; |
||
27 | const USERAGENT_DEFAULT = ''; |
||
28 | |||
29 | const DIRECTIVE_ALL = 'all'; |
||
30 | const DIRECTIVE_NONE = 'none'; |
||
31 | const DIRECTIVE_NO_ARCHIVE = 'noarchive'; |
||
32 | const DIRECTIVE_NO_FOLLOW = 'nofollow'; |
||
33 | const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex'; |
||
34 | const DIRECTIVE_NO_INDEX = 'noindex'; |
||
35 | const DIRECTIVE_NO_ODP = 'noodp'; |
||
36 | const DIRECTIVE_NO_SNIPPET = 'nosnippet'; |
||
37 | const DIRECTIVE_NO_TRANSLATE = 'notranslate'; |
||
38 | const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after'; |
||
39 | |||
40 | protected $url = ''; |
||
41 | protected $userAgent = self::USERAGENT_DEFAULT; |
||
42 | protected $config = []; |
||
43 | |||
44 | protected $headers = []; |
||
45 | protected $currentRule = ''; |
||
46 | protected $currentUserAgent = self::USERAGENT_DEFAULT; |
||
47 | |||
48 | protected $rules = []; |
||
49 | |||
50 | /** |
||
51 | * Constructor |
||
52 | * |
||
53 | * @param string $url |
||
54 | * @param string $userAgent |
||
55 | * @param array $config |
||
56 | * @throws XRobotsTagParserException |
||
57 | */ |
||
58 | public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $config = []) |
||
78 | |||
79 | /** |
||
80 | * Parse HTTP headers |
||
81 | * |
||
82 | * @return void |
||
83 | */ |
||
84 | protected function parse() |
||
100 | |||
101 | /** |
||
102 | * Request the HTTP headers from an URL |
||
103 | * |
||
104 | * @return array Raw HTTP headers |
||
105 | * @throws XRobotsTagParserException |
||
106 | */ |
||
107 | protected function getHeaders() |
||
123 | |||
124 | /** |
||
125 | * Detect directives in rule |
||
126 | * |
||
127 | * @return void |
||
128 | */ |
||
129 | protected function detectDirectives() |
||
145 | |||
146 | /** |
||
147 | * Array of directives and their class names |
||
148 | * |
||
149 | * @return array |
||
150 | */ |
||
151 | protected function directiveClasses() |
||
166 | |||
167 | /** |
||
168 | * Add rule |
||
169 | * |
||
170 | * @param string $directive |
||
171 | * @return void |
||
172 | * @throws XRobotsTagParserException |
||
173 | */ |
||
174 | protected function addRule($directive) |
||
186 | |||
187 | /** |
||
188 | * Cleanup before next rule is read |
||
189 | * |
||
190 | * @return void |
||
191 | */ |
||
192 | protected function cleanup() |
||
197 | |||
198 | /** |
||
199 | * Return all applicable rules |
||
200 | * |
||
201 | * @param bool $raw |
||
202 | * @return array |
||
203 | */ |
||
204 | public function getRules($raw = false) |
||
222 | |||
223 | /** |
||
224 | * Export all rules for all UserAgents |
||
225 | * |
||
226 | * @return array |
||
227 | */ |
||
228 | public function export() |
||
232 | |||
233 | /** |
||
234 | * Get the meaning of an Directive |
||
235 | * |
||
236 | * @param string $directive |
||
237 | * @return string |
||
238 | * @throws XRobotsTagParserException |
||
239 | */ |
||
240 | public function getDirectiveMeaning($directive) |
||
252 | } |
||
253 |