Completed
Push — master ( 24f01f...13aa9d )
by Jan-Petter
03:06
created

src/RobotsTxtInterface.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * vipnytt/RobotsTxtParser
4
 *
5
 * @link https://github.com/VIPnytt/RobotsTxtParser
6
 * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT)
7
 */
8
9
namespace vipnytt\RobotsTxtParser;
10
11
/**
12
 * Interface RobotsTxtInterface
13
 *
14
 * @package vipnytt\RobotsTxtParser
15
 */
16
interface RobotsTxtInterface
17
{
18
    /**
19
     * Robots.txt path
20
     *
21
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-location--range-of-validity
22
     * @link https://tools.ietf.org/html/rfc3986
23
     * @link https://tools.ietf.org/html/rfc1808
24
     */
25
    const PATH = '/robots.txt';
26
27
    /**
28
     * Cache time
29
     *
30
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#handling-http-result-codes
31
     */
32
    const CACHE_TIME = 86400;
33
34
    /**
35
     * Max redirects
36
     *
37
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#handling-http-result-codes
38
     * @link https://tools.ietf.org/html/rfc1945
39
     */
40
    const MAX_REDIRECTS = 5;
41
42
    /**
43
     * Expected encoding
44
     *
45
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-format
46
     * @link https://tools.ietf.org/html/rfc3986
47
     */
48
    const ENCODING = 'UTF-8';
49
50
    /**
51
     * Robots.txt max length in bytes
52
     *
53
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-format
54
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#additional-info
55
     */
56
    const BYTE_LIMIT = 524288; // 4,194,304 bits | 512 kilobytes | 0.5 megabytes
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
57
58
    /**
59
     * Max rule length
60
     *
61
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#clean-param
62
     */
63
    const MAX_LENGTH_RULE = 500;
64
65
    /**
66
     * Default User-Agent
67
     *
68
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#user-agent
69
     */
70
    const USER_AGENT = '*';
71
72
    /**
73
     * Directive: Allow
74
     *
75
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#allow
76
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#allow-disallow
77
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.allow
78
     * @link http://www.robotstxt.org/norobots-rfc.txt
79
     */
80
    const DIRECTIVE_ALLOW = 'allow';
81
82
    /**
83
     * Directive: Cache-delay
84
     *
85
     * Unofficial
86
     * Used as an crawl-delay alternative specifically for caching purposes.
87
     */
88
    const DIRECTIVE_CACHE_DELAY = 'cache-delay';
89
90
    /**
91
     * Directive: Clean-param
92
     *
93
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#clean-param
94
     */
95
    const DIRECTIVE_CLEAN_PARAM = 'clean-param';
96
97
    /**
98
     * Directive: Comment
99
     *
100
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.comment
101
     */
102
    const DIRECTIVE_COMMENT = 'comment';
103
104
    /**
105
     * Directive: Crawl-delay
106
     *
107
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#crawl-delay
108
     */
109
    const DIRECTIVE_CRAWL_DELAY = 'crawl-delay';
110
111
    /**
112
     * Directive: Disallow
113
     *
114
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#disallow
115
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#allow-disallow
116
     * @link https://www.w3.org/TR/html4/appendix/notes.html#h-B.4.1.1
117
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.disallow
118
     * @link http://www.robotstxt.org/norobots-rfc.txt
119
     * @link http://www.robotstxt.org/orig.html
120
     */
121
    const DIRECTIVE_DISALLOW = 'disallow';
122
123
    /**
124
     * Directive: Host
125
     *
126
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#host
127
     * @link https://tools.ietf.org/html/rfc952
128
     */
129
    const DIRECTIVE_HOST = 'host';
130
131
    /**
132
     * Directive: NoIndex
133
     */
134
    const DIRECTIVE_NO_INDEX = 'noindex';
135
136
    /**
137
     * Directive: RequestClient-rate
138
     *
139
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.request-rate
140
     */
141
    const DIRECTIVE_REQUEST_RATE = 'request-rate';
142
143
    /**
144
     * Directive: Robot-version
145
     *
146
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.robot-version
147
     */
148
    const DIRECTIVE_ROBOT_VERSION = 'robot-version';
149
150
    /**
151
     * Directive: Sitemap
152
     *
153
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#sitemap
154
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#sitemap
155
     * @link http://www.sitemaps.org/protocol.html#submit_robots
156
     */
157
    const DIRECTIVE_SITEMAP = 'sitemap';
158
159
    /**
160
     * Directive: User-Agent
161
     *
162
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#order-of-precedence-for-user-agents
163
     * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml#user-agent
164
     * @link https://www.w3.org/TR/html4/appendix/notes.html#h-B.4.1.1
165
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.user-agent
166
     * @link http://www.robotstxt.org/norobots-rfc.txt
167
     * @link http://www.robotstxt.org/orig.html
168
     */
169
    const DIRECTIVE_USER_AGENT = 'user-agent';
170
171
    /**
172
     * Directive: Visit-time
173
     *
174
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.visit-time
175
     */
176
    const DIRECTIVE_VISIT_TIME = 'visit-time';
177
178
    /**
179
     * Directive aliases (for simple errors / typos)
180
     *
181
     * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt#file-format
182
     */
183
    const ALIAS_DIRECTIVES = [
184
        'cachedelay' => self::DIRECTIVE_CACHE_DELAY,
185
        'cleanparam' => self::DIRECTIVE_CLEAN_PARAM,
186
        'crawldelay' => self::DIRECTIVE_CRAWL_DELAY,
187
        'no-index' => self::DIRECTIVE_NO_INDEX,
188
        'requestrate' => self::DIRECTIVE_REQUEST_RATE,
189
        'robotversion' => self::DIRECTIVE_ROBOT_VERSION,
190
        'useragent' => self::DIRECTIVE_USER_AGENT,
191
        'visittime' => self::DIRECTIVE_VISIT_TIME,
192
    ];
193
}
194