1 | <?php |
||
9 | class Parser implements RobotsTxtInterface |
||
10 | { |
||
11 | use ObjectTools; |
||
12 | |||
13 | const TOP_LEVEL_DIRECTIVES = [ |
||
14 | self::DIRECTIVE_CLEAN_PARAM, |
||
15 | self::DIRECTIVE_HOST, |
||
16 | self::DIRECTIVE_SITEMAP, |
||
17 | self::DIRECTIVE_USER_AGENT, |
||
18 | ]; |
||
19 | |||
20 | protected $raw; |
||
21 | |||
22 | protected $previousDirective; |
||
23 | protected $userAgentValues; |
||
24 | |||
25 | protected $cleanParam; |
||
26 | protected $host; |
||
27 | protected $sitemap; |
||
28 | protected $userAgent; |
||
29 | |||
30 | /** |
||
31 | * Constructor |
||
32 | * |
||
33 | * @param string $content - file content |
||
34 | * @param string $encoding - character encoding |
||
35 | * @param integer|null $byteLimit - maximum of bytes to parse |
||
36 | * @throws Exceptions\ParserException |
||
37 | */ |
||
38 | public function __construct($content, $encoding = self::ENCODING, $byteLimit = self::BYTE_LIMIT) |
||
52 | |||
53 | /** |
||
54 | * Parse robots.txt |
||
55 | * |
||
56 | * @return void |
||
57 | */ |
||
58 | private function parseTxt() |
||
71 | |||
72 | public function add($line) |
||
95 | |||
96 | public function export() |
||
103 | |||
104 | /** |
||
105 | * Check if URL is allowed to crawl |
||
106 | * |
||
107 | * @param string $url - url to check |
||
108 | * @return bool |
||
109 | */ |
||
110 | public function isAllowed($url) |
||
114 | |||
115 | /** |
||
116 | * Check if URL is disallowed to crawl |
||
117 | * |
||
118 | * @param string $url - url to check |
||
119 | * @return bool |
||
120 | */ |
||
121 | public function isDisallowed($url) |
||
125 | |||
126 | /** |
||
127 | * Get sitemaps |
||
128 | * |
||
129 | * @return array |
||
130 | */ |
||
131 | public function getSitemaps() |
||
135 | |||
136 | /** |
||
137 | * Get host |
||
138 | * |
||
139 | * @return string|null |
||
140 | */ |
||
141 | public function getHost() |
||
145 | |||
146 | /** |
||
147 | * Get Clean-param |
||
148 | * |
||
149 | * @return array |
||
150 | */ |
||
151 | public function getCleanParam() |
||
155 | } |
||
156 |