These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * vipnytt/RobotsTxtParser |
||
4 | * |
||
5 | * @link https://github.com/VIPnytt/RobotsTxtParser |
||
6 | * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT) |
||
7 | */ |
||
8 | |||
9 | namespace vipnytt\RobotsTxtParser; |
||
10 | |||
11 | /** |
||
12 | * Interface RobotsTxtInterface |
||
13 | * |
||
14 | * @package vipnytt\RobotsTxtParser |
||
15 | */ |
||
16 | interface RobotsTxtInterface |
||
17 | { |
||
18 | /** |
||
19 | * Robots.txt path |
||
20 | * |
||
21 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-location--range-of-validity |
||
22 | * @link https://tools.ietf.org/html/rfc3986 |
||
23 | * @link https://tools.ietf.org/html/rfc1808 |
||
24 | */ |
||
25 | const PATH = '/robots.txt'; |
||
26 | |||
27 | /** |
||
28 | * Cache time |
||
29 | * |
||
30 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#handling-http-result-codes |
||
31 | */ |
||
32 | const CACHE_TIME = 86400; |
||
33 | |||
34 | /** |
||
35 | * Max redirects |
||
36 | * |
||
37 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#handling-http-result-codes |
||
38 | * @link https://tools.ietf.org/html/rfc1945 |
||
39 | */ |
||
40 | const MAX_REDIRECTS = 5; |
||
41 | |||
42 | /** |
||
43 | * Expected encoding |
||
44 | * |
||
45 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-format |
||
46 | * @link https://tools.ietf.org/html/rfc3986 |
||
47 | */ |
||
48 | const ENCODING = 'UTF-8'; |
||
49 | |||
50 | /** |
||
51 | * Robots.txt max length in bytes |
||
52 | * |
||
53 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-format |
||
54 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#additional-info |
||
55 | */ |
||
56 | const BYTE_LIMIT = 524288; // 4,194,304 bits | 512 kilobytes | 0.5 megabytes |
||
0 ignored issues
–
show
|
|||
57 | |||
58 | /** |
||
59 | * Max rule length |
||
60 | * |
||
61 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#clean-param |
||
62 | */ |
||
63 | const MAX_LENGTH_RULE = 500; |
||
64 | |||
65 | /** |
||
66 | * Default User-Agent |
||
67 | * |
||
68 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#user-agent |
||
69 | */ |
||
70 | const USER_AGENT = '*'; |
||
71 | |||
72 | /** |
||
73 | * Directive: Allow |
||
74 | * |
||
75 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#allow |
||
76 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#allow-disallow |
||
77 | * @link http://www.conman.org/people/spc/robots2.html#format.directives.allow |
||
78 | * @link http://www.robotstxt.org/norobots-rfc.txt |
||
79 | */ |
||
80 | const DIRECTIVE_ALLOW = 'allow'; |
||
81 | |||
82 | /** |
||
83 | * Directive: Cache-delay |
||
84 | * |
||
85 | * Unofficial |
||
86 | * Used as an crawl-delay alternative specifically for caching purposes. |
||
87 | */ |
||
88 | const DIRECTIVE_CACHE_DELAY = 'cache-delay'; |
||
89 | |||
90 | /** |
||
91 | * Directive: Clean-param |
||
92 | * |
||
93 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#clean-param |
||
94 | */ |
||
95 | const DIRECTIVE_CLEAN_PARAM = 'clean-param'; |
||
96 | |||
97 | /** |
||
98 | * Directive: Comment |
||
99 | * |
||
100 | * @link http://www.conman.org/people/spc/robots2.html#format.directives.comment |
||
101 | */ |
||
102 | const DIRECTIVE_COMMENT = 'comment'; |
||
103 | |||
104 | /** |
||
105 | * Directive: Crawl-delay |
||
106 | * |
||
107 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#crawl-delay |
||
108 | */ |
||
109 | const DIRECTIVE_CRAWL_DELAY = 'crawl-delay'; |
||
110 | |||
111 | /** |
||
112 | * Directive: Disallow |
||
113 | * |
||
114 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#disallow |
||
115 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#allow-disallow |
||
116 | * @link https://www.w3.org/TR/html4/appendix/notes.html#h-B.4.1.1 |
||
117 | * @link http://www.conman.org/people/spc/robots2.html#format.directives.disallow |
||
118 | * @link http://www.robotstxt.org/norobots-rfc.txt |
||
119 | * @link http://www.robotstxt.org/orig.html |
||
120 | */ |
||
121 | const DIRECTIVE_DISALLOW = 'disallow'; |
||
122 | |||
123 | /** |
||
124 | * Directive: Host |
||
125 | * |
||
126 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#host |
||
127 | * @link https://tools.ietf.org/html/rfc952 |
||
128 | */ |
||
129 | const DIRECTIVE_HOST = 'host'; |
||
130 | |||
131 | /** |
||
132 | * Directive: NoIndex |
||
133 | */ |
||
134 | const DIRECTIVE_NO_INDEX = 'noindex'; |
||
135 | |||
136 | /** |
||
137 | * Directive: RequestClient-rate |
||
138 | * |
||
139 | * @link http://www.conman.org/people/spc/robots2.html#format.directives.request-rate |
||
140 | */ |
||
141 | const DIRECTIVE_REQUEST_RATE = 'request-rate'; |
||
142 | |||
143 | /** |
||
144 | * Directive: Robot-version |
||
145 | * |
||
146 | * @link http://www.conman.org/people/spc/robots2.html#format.directives.robot-version |
||
147 | */ |
||
148 | const DIRECTIVE_ROBOT_VERSION = 'robot-version'; |
||
149 | |||
150 | /** |
||
151 | * Directive: Sitemap |
||
152 | * |
||
153 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#sitemap |
||
154 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#sitemap |
||
155 | * @link http://www.sitemaps.org/protocol.html#submit_robots |
||
156 | */ |
||
157 | const DIRECTIVE_SITEMAP = 'sitemap'; |
||
158 | |||
159 | /** |
||
160 | * Directive: User-Agent |
||
161 | * |
||
162 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#order-of-precedence-for-user-agents |
||
163 | * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#user-agent |
||
164 | * @link https://www.w3.org/TR/html4/appendix/notes.html#h-B.4.1.1 |
||
165 | * @link http://www.conman.org/people/spc/robots2.html#format.directives.user-agent |
||
166 | * @link http://www.robotstxt.org/norobots-rfc.txt |
||
167 | * @link http://www.robotstxt.org/orig.html |
||
168 | */ |
||
169 | const DIRECTIVE_USER_AGENT = 'user-agent'; |
||
170 | |||
171 | /** |
||
172 | * Directive: Visit-time |
||
173 | * |
||
174 | * @link http://www.conman.org/people/spc/robots2.html#format.directives.visit-time |
||
175 | */ |
||
176 | const DIRECTIVE_VISIT_TIME = 'visit-time'; |
||
177 | |||
178 | /** |
||
179 | * Directive aliases (for simple errors / typos) |
||
180 | * |
||
181 | * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-format |
||
182 | */ |
||
183 | const ALIAS_DIRECTIVES = [ |
||
184 | 'cachedelay' => self::DIRECTIVE_CACHE_DELAY, |
||
185 | 'cleanparam' => self::DIRECTIVE_CLEAN_PARAM, |
||
186 | 'crawldelay' => self::DIRECTIVE_CRAWL_DELAY, |
||
187 | 'no-index' => self::DIRECTIVE_NO_INDEX, |
||
188 | 'requestrate' => self::DIRECTIVE_REQUEST_RATE, |
||
189 | 'robotversion' => self::DIRECTIVE_ROBOT_VERSION, |
||
190 | 'useragent' => self::DIRECTIVE_USER_AGENT, |
||
191 | 'visittime' => self::DIRECTIVE_VISIT_TIME, |
||
192 | ]; |
||
193 | } |
||
194 |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.