1 | <?php |
||
13 | class UserAgentClient implements RobotsTxtInterface |
||
14 | { |
||
15 | /** |
||
16 | * Allow rules |
||
17 | * @var DisAllow |
||
18 | */ |
||
19 | protected $allow; |
||
20 | |||
21 | /** |
||
22 | * Disallow rules |
||
23 | * @var DisAllow |
||
24 | */ |
||
25 | protected $disallow; |
||
26 | |||
27 | /** |
||
28 | * User-agent |
||
29 | * @var string |
||
30 | */ |
||
31 | protected $userAgent; |
||
32 | |||
33 | /** |
||
34 | * Robots.txt origin |
||
35 | * @var string |
||
36 | */ |
||
37 | protected $origin; |
||
38 | |||
39 | /** |
||
40 | * Status code parser |
||
41 | * @var StatusCodeParser |
||
42 | */ |
||
43 | protected $statusCodeParser; |
||
44 | |||
45 | /** |
||
46 | * UserAgentClient constructor. |
||
47 | * |
||
48 | * @param DisAllow $allow |
||
49 | * @param DisAllow $disallow |
||
50 | * @param string $userAgent |
||
51 | * @param string $origin |
||
52 | * @param int $statusCode |
||
53 | */ |
||
54 | public function __construct($allow, $disallow, $userAgent, $origin, $statusCode) |
||
62 | |||
63 | /** |
||
64 | * Check if URL is allowed to crawl |
||
65 | * |
||
66 | * @param string $url |
||
67 | * @return bool |
||
68 | */ |
||
69 | public function isAllowed($url) |
||
73 | |||
74 | /** |
||
75 | * Check |
||
76 | * |
||
77 | * @param string $directive |
||
78 | * @param string $url - URL to check |
||
79 | * @return bool |
||
80 | * @throws ClientException |
||
81 | */ |
||
82 | protected function check($directive, $url) |
||
99 | |||
100 | /** |
||
101 | * Check if the URL belongs to current robots.txt |
||
102 | * |
||
103 | * @param $urls |
||
104 | * @return bool |
||
105 | */ |
||
106 | protected function isUrlApplicable($urls) |
||
120 | |||
121 | /** |
||
122 | * Check if URL is disallowed to crawl |
||
123 | * |
||
124 | * @param string $url |
||
125 | * @return bool |
||
126 | */ |
||
127 | public function isDisallowed($url) |
||
131 | |||
132 | /** |
||
133 | * Get Cache-delay |
||
134 | * |
||
135 | * @return float|int |
||
136 | */ |
||
137 | public function getCacheDelay() |
||
142 | |||
143 | /** |
||
144 | * Get Crawl-delay |
||
145 | * |
||
146 | * @return float|int |
||
147 | */ |
||
148 | public function getCrawlDelay() |
||
153 | } |
||
154 |