1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* vipnytt/RobotsTxtParser |
4
|
|
|
* |
5
|
|
|
* @link https://github.com/VIPnytt/RobotsTxtParser |
6
|
|
|
* @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT) |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
namespace vipnytt\RobotsTxtParser\Parser\Directives; |
10
|
|
|
|
11
|
|
|
use vipnytt\RobotsTxtParser\Client\Directives\RequestRateClient; |
12
|
|
|
use vipnytt\RobotsTxtParser\Handler\RenderHandler; |
13
|
|
|
use vipnytt\RobotsTxtParser\RobotsTxtInterface; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Class RequestRateParser |
17
|
|
|
* |
18
|
|
|
* @package vipnytt\RobotsTxtParser\Parser\Directives |
19
|
|
|
*/ |
20
|
|
|
class RequestRateParser implements ParserInterface, RobotsTxtInterface |
21
|
|
|
{ |
22
|
|
|
use DirectiveParserTrait; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* Base uri |
26
|
|
|
* @var string |
27
|
|
|
*/ |
28
|
|
|
private $base; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* RequestRate array |
32
|
|
|
* @var array |
33
|
|
|
*/ |
34
|
|
|
private $requestRates = []; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* Sorted |
38
|
|
|
* @var bool |
39
|
|
|
*/ |
40
|
|
|
private $sorted = false; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* RequestRate constructor. |
44
|
|
|
* |
45
|
|
|
* @param string $base |
46
|
|
|
*/ |
47
|
|
|
public function __construct($base) |
48
|
|
|
{ |
49
|
|
|
$this->base = $base; |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Add |
54
|
|
|
* |
55
|
|
|
* @param string $line |
56
|
|
|
* @return bool |
57
|
|
|
*/ |
58
|
|
|
public function add($line) |
59
|
|
|
{ |
60
|
|
|
$array = preg_split('/\s+/', $line, 2); |
61
|
|
|
$result = [ |
62
|
|
|
'rate' => $this->draftParseRate($array[0]), |
63
|
|
|
'from' => null, |
64
|
|
|
'to' => null, |
65
|
|
|
]; |
66
|
|
|
if ($result['rate'] === false) { |
67
|
|
|
return false; |
68
|
|
|
} elseif (!empty($array[1]) && |
69
|
|
|
($times = $this->draftParseTime($array[1])) !== false |
70
|
|
|
) { |
71
|
|
|
$result = array_merge($result, $times); |
72
|
|
|
} |
73
|
|
|
$this->requestRates[] = $result; |
74
|
|
|
return true; |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* Client rate as specified in the `Robot exclusion standard` version 2.0 draft |
79
|
|
|
* rate = numDocuments / timeUnit |
80
|
|
|
* @link http://www.conman.org/people/spc/robots2.html#format.directives.request-rate |
81
|
|
|
* |
82
|
|
|
* @param string $string |
83
|
|
|
* @return float|int|false |
84
|
|
|
*/ |
85
|
|
|
private function draftParseRate($string) |
86
|
|
|
{ |
87
|
|
|
$parts = array_map('trim', explode('/', $string)); |
88
|
|
|
if (count($parts) != 2) { |
89
|
|
|
return false; |
90
|
|
|
} |
91
|
|
|
$multiplier = 1; |
92
|
|
|
switch (strtolower(substr(preg_replace('/[^A-Za-z]/', '', filter_var($parts[1], FILTER_SANITIZE_STRING)), 0, 1))) { |
93
|
|
|
case 'm': |
94
|
|
|
$multiplier = 60; |
95
|
|
|
break; |
96
|
|
|
case 'h': |
97
|
|
|
$multiplier = 3600; |
98
|
|
|
break; |
99
|
|
|
case 'd': |
100
|
|
|
$multiplier = 86400; |
101
|
|
|
break; |
102
|
|
|
} |
103
|
|
|
$rate = abs(filter_var($parts[1], FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION)) * $multiplier / abs(filter_var($parts[0], FILTER_SANITIZE_NUMBER_INT)); |
104
|
|
|
return $rate > 0 ? $rate : false; |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* Client |
109
|
|
|
* |
110
|
|
|
* @param string $userAgent |
111
|
|
|
* @param float|int $fallbackValue |
112
|
|
|
* @return RequestRateClient |
113
|
|
|
*/ |
114
|
|
|
public function client($userAgent = self::USER_AGENT, $fallbackValue = 0) |
115
|
|
|
{ |
116
|
|
|
$this->sort(); |
117
|
|
|
return new RequestRateClient($this->base, $userAgent, $this->requestRates, $fallbackValue); |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* Sort |
122
|
|
|
* |
123
|
|
|
* @return bool |
124
|
|
|
*/ |
125
|
|
|
private function sort() |
126
|
|
|
{ |
127
|
|
|
if (!$this->sorted) { |
128
|
|
|
$this->sorted = true; |
129
|
|
|
return usort($this->requestRates, function (array $requestRateA, array $requestRateB) { |
130
|
|
|
// PHP 7: Switch to the <=> "Spaceship" operator |
131
|
|
|
return $requestRateB['rate'] > $requestRateA['rate']; |
132
|
|
|
}); |
133
|
|
|
} |
134
|
|
|
return $this->sorted; |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
/** |
138
|
|
|
* Render |
139
|
|
|
* |
140
|
|
|
* @param RenderHandler $handler |
141
|
|
|
* @return bool |
142
|
|
|
*/ |
143
|
|
|
public function render(RenderHandler $handler) |
144
|
|
|
{ |
145
|
|
|
$this->sort(); |
146
|
|
View Code Duplication |
foreach ($this->requestRates as $array) { |
|
|
|
|
147
|
|
|
$suffix = 's'; |
148
|
|
|
if (isset($array['from']) && |
149
|
|
|
isset($array['to']) |
150
|
|
|
) { |
151
|
|
|
$suffix .= ' ' . $array['from'] . '-' . $array['to']; |
152
|
|
|
} |
153
|
|
|
$handler->add(self::DIRECTIVE_REQUEST_RATE, '1/' . $array['rate'] . $suffix); |
154
|
|
|
} |
155
|
|
|
return true; |
156
|
|
|
} |
157
|
|
|
} |
158
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.