1
|
|
|
<?php |
2
|
|
|
namespace vipnytt\RobotsTxtParser\Parser; |
3
|
|
|
|
4
|
|
|
use vipnytt\RobotsTxtParser\RobotsTxtInterface; |
5
|
|
|
|
6
|
|
|
/** |
7
|
|
|
* Class HeaderParser |
8
|
|
|
* |
9
|
|
|
* @package vipnytt\RobotsTxtParser\Parser |
10
|
|
|
*/ |
11
|
|
|
class HeaderParser implements RobotsTxtInterface |
12
|
|
|
{ |
13
|
|
|
/** |
14
|
|
|
* ANSI C's asctime() format |
15
|
|
|
*/ |
16
|
|
|
const DATE_ASCTIME = 'D M j h:i:s Y'; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* HTTP date formats |
20
|
|
|
*/ |
21
|
|
|
const DATE_HTTP = [ |
22
|
|
|
DATE_RFC1123, |
23
|
|
|
DATE_RFC850, |
24
|
|
|
self::DATE_ASCTIME, |
25
|
|
|
]; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* cURL resource |
29
|
|
|
* @var resource |
30
|
|
|
*/ |
31
|
|
|
private $ch; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Headers |
35
|
|
|
* @var string[] |
36
|
|
|
*/ |
37
|
|
|
private $headers; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* HeaderParser constructor. |
41
|
|
|
*/ |
42
|
|
|
public function __construct() |
43
|
|
|
{ |
44
|
|
|
} |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* cURL CURLOPT_HEADERFUNCTION callback |
48
|
|
|
* |
49
|
|
|
* @param resource $ch - cURL resource |
50
|
|
|
* @param string $headerLine - cURL header line string |
51
|
|
|
* @return int - the number of bytes written |
52
|
|
|
*/ |
53
|
|
|
public function curlCallback($ch, $headerLine) |
54
|
|
|
{ |
55
|
|
|
$this->ch = $ch; |
56
|
|
|
$split = array_map('trim', explode(':', $headerLine, 2)); |
57
|
|
|
$this->headers[strtolower($split[0])] = end($split); |
58
|
|
|
return strlen($headerLine); |
59
|
|
|
} |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* Content-Type encoding HTTP header |
63
|
|
|
* @link https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 |
64
|
|
|
* |
65
|
|
|
* @return string |
66
|
|
|
*/ |
67
|
|
View Code Duplication |
public function getCharset() |
|
|
|
|
68
|
|
|
{ |
69
|
|
|
if ( |
70
|
|
|
isset($this->headers['content-type']) && |
71
|
|
|
($value = $this->getInlineValue($this->headers['content-type'], 'charset', ';')) !== false |
72
|
|
|
) { |
73
|
|
|
return $value; |
74
|
|
|
} |
75
|
|
|
return self::ENCODING; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* Get inline header variable value |
80
|
|
|
* |
81
|
|
|
* @param string $header |
82
|
|
|
* @param string $part |
83
|
|
|
* @param string $delimiter |
84
|
|
|
* @return string|false |
85
|
|
|
*/ |
86
|
|
|
private function getInlineValue($header, $part, $delimiter = ";") |
87
|
|
|
{ |
88
|
|
|
foreach (array_map('trim', explode($delimiter, $header)) as $string) { |
89
|
|
|
if (stripos($string, $part . '=') === 0) { |
90
|
|
|
return trim(explode('=', $string, 2)[1]); |
91
|
|
|
} |
92
|
|
|
} |
93
|
|
|
return false; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* Cache-Control max-age HTTP header |
98
|
|
|
* @link https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.3 |
99
|
|
|
* |
100
|
|
|
* @return int |
101
|
|
|
*/ |
102
|
|
View Code Duplication |
public function getMaxAge() |
|
|
|
|
103
|
|
|
{ |
104
|
|
|
if ( |
105
|
|
|
isset($this->headers['cache-control']) && |
106
|
|
|
($value = $this->getInlineValue($this->headers['content-type'], 'max-age', ',')) !== false |
107
|
|
|
) { |
108
|
|
|
return intval($value); |
109
|
|
|
} |
110
|
|
|
return 0; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* Cache-Control Retry-After HTTP header |
115
|
|
|
* @link https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.37 |
116
|
|
|
* |
117
|
|
|
* @param int $requestTime |
118
|
|
|
* @return int |
119
|
|
|
*/ |
120
|
|
|
public function getRetryAfter($requestTime) |
121
|
|
|
{ |
122
|
|
|
if (isset($this->headers['retry-after'])) { |
123
|
|
|
if (is_numeric($this->headers['retry-after'])) { |
124
|
|
|
return intval($this->headers['retry-after']); |
125
|
|
|
} elseif (($time = $this->parseHttpDate($this->headers['retry-after'])) !== false) { |
126
|
|
|
return max(0, $time - $requestTime); |
127
|
|
|
} |
128
|
|
|
} |
129
|
|
|
return 0; |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
/** |
133
|
|
|
* Parse HTTP-date |
134
|
|
|
* @link https://tools.ietf.org/html/rfc2616#section-3.3 |
135
|
|
|
* |
136
|
|
|
* @param string $string |
137
|
|
|
* @return int|false |
138
|
|
|
*/ |
139
|
|
|
private function parseHttpDate($string) |
140
|
|
|
{ |
141
|
|
|
foreach (self::DATE_HTTP as $format) { |
142
|
|
|
if (($dateTime = date_create_from_format($format, $string, new \DateTimeZone('GMT'))) !== false) { |
143
|
|
|
return (int)date_format($dateTime, 'U'); |
144
|
|
|
} |
145
|
|
|
} |
146
|
|
|
return false; |
147
|
|
|
} |
148
|
|
|
} |
149
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.