1 | <?php |
||
24 | class XRobotsTagParser |
||
25 | { |
||
26 | /** |
||
27 | * HTTP header prefix |
||
28 | */ |
||
29 | const HEADER_RULE_IDENTIFIER = 'X-Robots-Tag'; |
||
30 | |||
31 | /** |
||
32 | * Directives |
||
33 | */ |
||
34 | const DIRECTIVE_ALL = 'all'; |
||
35 | const DIRECTIVE_NONE = 'none'; |
||
36 | const DIRECTIVE_NO_ARCHIVE = 'noarchive'; |
||
37 | const DIRECTIVE_NO_FOLLOW = 'nofollow'; |
||
38 | const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex'; |
||
39 | const DIRECTIVE_NO_INDEX = 'noindex'; |
||
40 | const DIRECTIVE_NO_ODP = 'noodp'; |
||
41 | const DIRECTIVE_NO_SNIPPET = 'nosnippet'; |
||
42 | const DIRECTIVE_NO_TRANSLATE = 'notranslate'; |
||
43 | const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after'; |
||
44 | |||
45 | const DIRECTIVES = [ |
||
46 | self::DIRECTIVE_ALL => 'There are no restrictions for indexing or serving. Note: this directive is the default value and has no effect if explicitly listed.', |
||
47 | self::DIRECTIVE_NO_ARCHIVE => 'Do not show a `Cached` link in search results.', |
||
48 | self::DIRECTIVE_NO_FOLLOW => 'Do not follow the links on this page.', |
||
49 | self::DIRECTIVE_NO_IMAGE_INDEX => 'Do not index images on this page.', |
||
50 | self::DIRECTIVE_NO_INDEX => 'Do not show this page in search results and do not show a `Cached` link in search results.', |
||
51 | self::DIRECTIVE_NONE => 'Equivalent to `noindex` and `nofollow`.', |
||
52 | self::DIRECTIVE_NO_ODP => 'Do not use metadata from the `Open Directory project` (http://dmoz.org/) for titles or snippets shown for this page.', |
||
53 | self::DIRECTIVE_NO_SNIPPET => 'Do not show a snippet in the search results for this page.', |
||
54 | self::DIRECTIVE_NO_TRANSLATE => 'Do not offer translation of this page in search results.', |
||
55 | self::DIRECTIVE_UNAVAILABLE_AFTER => 'Do not show this page in search results after the specified date/time.', |
||
56 | ]; |
||
57 | |||
58 | /** |
||
59 | * User-Agent string |
||
60 | * |
||
61 | * @var string |
||
62 | */ |
||
63 | protected $userAgent = ''; |
||
64 | |||
65 | /** |
||
66 | * User-Agent for rule selection |
||
67 | * |
||
68 | * @var string |
||
69 | */ |
||
70 | protected $userAgentMatch = ''; |
||
71 | |||
72 | /** |
||
73 | * Current rule |
||
74 | * |
||
75 | * @var string |
||
76 | */ |
||
77 | protected $currentRule = ''; |
||
78 | |||
79 | /** |
||
80 | * User-Agent for the current rule |
||
81 | * |
||
82 | * @var string |
||
83 | */ |
||
84 | protected $currentUserAgent; |
||
85 | |||
86 | /** |
||
87 | * Rule array |
||
88 | * |
||
89 | * @var array |
||
90 | */ |
||
91 | protected $rules = []; |
||
92 | |||
93 | /** |
||
94 | * Constructor |
||
95 | * |
||
96 | * @param string $userAgent |
||
97 | * @param array $headers |
||
98 | */ |
||
99 | public function __construct($userAgent = '', $headers = null) |
||
106 | |||
107 | /** |
||
108 | * Parse HTTP headers |
||
109 | * |
||
110 | * @param array $headers |
||
111 | * @return void |
||
112 | */ |
||
113 | public function parse(array $headers) |
||
126 | |||
127 | /** |
||
128 | * Detect directives in rule |
||
129 | * |
||
130 | * @return void |
||
131 | */ |
||
132 | protected function detectDirectives() |
||
148 | |||
149 | /** |
||
150 | * Add rule |
||
151 | * |
||
152 | * @param string $directive |
||
153 | * @return void |
||
154 | * @throws XRobotsTagParserException |
||
155 | */ |
||
156 | protected function addRule($directive) |
||
170 | |||
171 | /** |
||
172 | * Cleanup before next rule is read |
||
173 | * |
||
174 | * @return void |
||
175 | */ |
||
176 | protected function cleanup() |
||
181 | |||
182 | /** |
||
183 | * Find the most rule-matching User-Agent |
||
184 | * |
||
185 | * @return string |
||
186 | */ |
||
187 | protected function matchUserAgent() |
||
194 | |||
195 | /** |
||
196 | * Return all applicable rules |
||
197 | * |
||
198 | * @param bool $raw |
||
199 | * @return array |
||
200 | */ |
||
201 | public function getRules($raw = false) |
||
219 | |||
220 | /** |
||
221 | * Export all rules for all UserAgents |
||
222 | * |
||
223 | * @return array |
||
224 | */ |
||
225 | public function export() |
||
229 | |||
230 | /** |
||
231 | * Get the meaning of an Directive |
||
232 | * |
||
233 | * @param string $directive |
||
234 | * @return string |
||
235 | * @throws XRobotsTagParserException |
||
236 | */ |
||
237 | public function getDirectiveMeaning($directive) |
||
245 | } |
||
246 |