1 | <?php |
||
24 | class XRobotsTagParser |
||
25 | { |
||
26 | const HEADER_RULE_IDENTIFIER = 'x-robots-tag'; |
||
27 | const USERAGENT_DEFAULT = ''; |
||
28 | |||
29 | const DIRECTIVE_ALL = 'all'; |
||
30 | const DIRECTIVE_NONE = 'none'; |
||
31 | const DIRECTIVE_NO_ARCHIVE = 'noarchive'; |
||
32 | const DIRECTIVE_NO_FOLLOW = 'nofollow'; |
||
33 | const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex'; |
||
34 | const DIRECTIVE_NO_INDEX = 'noindex'; |
||
35 | const DIRECTIVE_NO_ODP = 'noodp'; |
||
36 | const DIRECTIVE_NO_SNIPPET = 'nosnippet'; |
||
37 | const DIRECTIVE_NO_TRANSLATE = 'notranslate'; |
||
38 | const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after'; |
||
39 | |||
40 | protected $url = ''; |
||
41 | protected $userAgent = self::USERAGENT_DEFAULT; |
||
42 | protected $config = []; |
||
43 | |||
44 | protected $headers = []; |
||
45 | protected $currentRule = ''; |
||
46 | protected $currentUserAgent = self::USERAGENT_DEFAULT; |
||
47 | |||
48 | protected $options = []; |
||
49 | protected $rules = []; |
||
50 | |||
51 | /** |
||
52 | * Constructor |
||
53 | * |
||
54 | * @param string $url |
||
55 | * @param string $userAgent |
||
56 | * @param array $options |
||
57 | * @throws XRobotsTagParserException |
||
58 | */ |
||
59 | public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $options = []) |
||
79 | |||
80 | /** |
||
81 | * Parse HTTP headers |
||
82 | * |
||
83 | * @return void |
||
84 | */ |
||
85 | protected function parse() |
||
101 | |||
102 | /** |
||
103 | * Request the HTTP headers from an URL |
||
104 | * |
||
105 | * @return array Raw HTTP headers |
||
106 | * @throws XRobotsTagParserException |
||
107 | */ |
||
108 | protected function getHeaders() |
||
124 | |||
125 | /** |
||
126 | * Detect directives in rule |
||
127 | * |
||
128 | * @return void |
||
129 | */ |
||
130 | protected function detectDirectives() |
||
146 | |||
147 | /** |
||
148 | * Array of directives and their class names |
||
149 | * |
||
150 | * @return array |
||
151 | */ |
||
152 | protected function directiveClasses() |
||
167 | |||
168 | /** |
||
169 | * Add rule |
||
170 | * |
||
171 | * @param string $directive |
||
172 | * @return void |
||
173 | * @throws XRobotsTagParserException |
||
174 | */ |
||
175 | protected function addRule($directive) |
||
187 | |||
188 | /** |
||
189 | * Cleanup before next rule is read |
||
190 | * |
||
191 | * @return void |
||
192 | */ |
||
193 | protected function cleanup() |
||
198 | |||
199 | /** |
||
200 | * Return all applicable rules |
||
201 | * |
||
202 | * @param bool $raw |
||
203 | * @return array |
||
204 | */ |
||
205 | public function getRules($raw = false) |
||
223 | |||
224 | /** |
||
225 | * Export all rules for all UserAgents |
||
226 | * |
||
227 | * @return array |
||
228 | */ |
||
229 | public function export() |
||
233 | } |
||
234 |