1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* vipnytt/RobotsTxtParser |
4
|
|
|
* |
5
|
|
|
* @link https://github.com/VIPnytt/RobotsTxtParser |
6
|
|
|
* @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT) |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
namespace vipnytt\RobotsTxtParser\Parser\Directives; |
10
|
|
|
|
11
|
|
|
use vipnytt\RobotsTxtParser\Client\Directives\RequestRateClient; |
12
|
|
|
use vipnytt\RobotsTxtParser\Handler\RenderHandler; |
13
|
|
|
use vipnytt\RobotsTxtParser\RobotsTxtInterface; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Class RequestRateParser |
17
|
|
|
* |
18
|
|
|
* @package vipnytt\RobotsTxtParser\Parser\Directives |
19
|
|
|
*/ |
20
|
|
|
class RequestRateParser implements ParserInterface, RobotsTxtInterface |
21
|
|
|
{ |
22
|
|
|
use DirectiveParserTrait; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* Base uri |
26
|
|
|
* @var string |
27
|
|
|
*/ |
28
|
|
|
private $base; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* RequestRate array |
32
|
|
|
* @var array |
33
|
|
|
*/ |
34
|
|
|
private $requestRates = []; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* Sorted |
38
|
|
|
* @var bool |
39
|
|
|
*/ |
40
|
|
|
private $sorted = false; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Time units |
44
|
|
|
* @var int[] |
45
|
|
|
*/ |
46
|
|
|
private $units = [ |
47
|
|
|
'w' => 604800, |
48
|
|
|
'd' => 86400, |
49
|
|
|
'h' => 3600, |
50
|
|
|
'm' => 60, |
51
|
|
|
]; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* RequestRate constructor. |
55
|
|
|
* |
56
|
|
|
* @param string $base |
57
|
|
|
*/ |
58
|
|
|
public function __construct($base) |
59
|
|
|
{ |
60
|
|
|
$this->base = $base; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* Add |
65
|
|
|
* |
66
|
|
|
* @param string $line |
67
|
|
|
* @return bool |
68
|
|
|
*/ |
69
|
|
|
public function add($line) |
70
|
|
|
{ |
71
|
|
|
$array = preg_split('/\s+/', $line, 2); |
72
|
|
|
$parts = array_map('trim', explode('/', $array[0])); |
73
|
|
|
if (count($parts) != 2) { |
74
|
|
|
return false; |
75
|
|
|
} |
76
|
|
|
$unit = strtolower(substr(preg_replace('/[^A-Za-z]/', '', filter_var($parts[1], FILTER_SANITIZE_STRING)), 0, 1)); |
77
|
|
|
$multiplier = isset($this->units[$unit]) ? $this->units[$unit] : 1; |
78
|
|
|
|
79
|
|
|
$rate = (int)abs(filter_var($parts[0], FILTER_SANITIZE_NUMBER_INT)); |
80
|
|
|
$time = $multiplier * (int)abs(filter_var($parts[1], FILTER_SANITIZE_NUMBER_INT)); |
81
|
|
|
|
82
|
|
|
$result = [ |
83
|
|
|
'rate' => $time / $rate, |
84
|
|
|
'ratio' => $this->getRatio($rate, $time), |
85
|
|
|
'from' => null, |
86
|
|
|
'to' => null, |
87
|
|
|
]; |
88
|
|
|
if (!empty($array[1]) && |
89
|
|
|
($times = $this->draftParseTime($array[1])) !== false |
90
|
|
|
) { |
91
|
|
|
$result = array_merge($result, $times); |
92
|
|
|
} |
93
|
|
|
$this->requestRates[] = $result; |
94
|
|
|
return true; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* Get ratio string |
99
|
|
|
* |
100
|
|
|
* @param int $rate |
101
|
|
|
* @param int $time |
102
|
|
|
* @return string |
103
|
|
|
*/ |
104
|
|
|
private function getRatio($rate, $time) |
105
|
|
|
{ |
106
|
|
|
$gcd = $this->getGCD($rate, $time); |
107
|
|
|
$requests = $rate / $gcd; |
108
|
|
|
$time = $time / $gcd; |
109
|
|
|
$suffix = 's'; |
110
|
|
|
foreach ($this->units as $unit => $sec) { |
111
|
|
|
if ($time % $sec === 0) { |
112
|
|
|
$suffix = $unit; |
113
|
|
|
$time /= $sec; |
114
|
|
|
break; |
115
|
|
|
} |
116
|
|
|
} |
117
|
|
|
return $requests . '/' . $time . $suffix; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* Returns the greatest common divisor of two integers using the Euclidean algorithm. |
122
|
|
|
* |
123
|
|
|
* @param int $a |
124
|
|
|
* @param int $b |
125
|
|
|
* @return int |
126
|
|
|
*/ |
127
|
|
|
private function getGCD($a, $b) |
128
|
|
|
{ |
129
|
|
|
if (extension_loaded('gmp')) { |
130
|
|
|
return gmp_intval(gmp_gcd((string)$a, (string)$b)); |
131
|
|
|
} |
132
|
|
|
$large = $a > $b ? $a : $b; |
133
|
|
|
$small = $a > $b ? $b : $a; |
134
|
|
|
$remainder = $large % $small; |
135
|
|
|
return 0 === $remainder ? $small : $this->getGCD($small, $remainder); |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
/** |
139
|
|
|
* Client |
140
|
|
|
* |
141
|
|
|
* @param string $userAgent |
142
|
|
|
* @param float|int $fallbackValue |
143
|
|
|
* @return RequestRateClient |
144
|
|
|
*/ |
145
|
|
|
public function client($userAgent = self::USER_AGENT, $fallbackValue = 0) |
146
|
|
|
{ |
147
|
|
|
$this->sort(); |
148
|
|
|
return new RequestRateClient($this->base, $userAgent, $this->requestRates, $fallbackValue); |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
/** |
152
|
|
|
* Sort |
153
|
|
|
* |
154
|
|
|
* @return bool |
155
|
|
|
*/ |
156
|
|
|
private function sort() |
157
|
|
|
{ |
158
|
|
|
if (!$this->sorted) { |
159
|
|
|
$this->sorted = true; |
160
|
|
|
return usort($this->requestRates, function (array $requestRateA, array $requestRateB) { |
161
|
|
|
// PHP 7: Switch to the <=> "Spaceship" operator |
162
|
|
|
return $requestRateB['rate'] > $requestRateA['rate']; |
163
|
|
|
}); |
164
|
|
|
} |
165
|
|
|
return $this->sorted; |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
/** |
169
|
|
|
* Render |
170
|
|
|
* |
171
|
|
|
* @param RenderHandler $handler |
172
|
|
|
* @return bool |
173
|
|
|
*/ |
174
|
|
|
public function render(RenderHandler $handler) |
175
|
|
|
{ |
176
|
|
|
$this->sort(); |
177
|
|
View Code Duplication |
foreach ($this->requestRates as $array) { |
|
|
|
|
178
|
|
|
$time = ''; |
179
|
|
|
if (isset($array['from']) && |
180
|
|
|
isset($array['to']) |
181
|
|
|
) { |
182
|
|
|
$time .= ' ' . $array['from'] . '-' . $array['to']; |
183
|
|
|
} |
184
|
|
|
$handler->add(self::DIRECTIVE_REQUEST_RATE, $array['ratio'] . $time); |
185
|
|
|
} |
186
|
|
|
return true; |
187
|
|
|
} |
188
|
|
|
} |
189
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.