1
|
|
|
<?php |
2
|
|
|
namespace vipnytt\RobotsTxtParser\Client\Cache\MySQL; |
3
|
|
|
|
4
|
|
|
use PDO; |
5
|
|
|
use vipnytt\RobotsTxtParser\Client\Cache\ManagerInterface; |
6
|
|
|
use vipnytt\RobotsTxtParser\Exceptions\ClientException; |
7
|
|
|
use vipnytt\RobotsTxtParser\Exceptions\DatabaseException; |
8
|
|
|
use vipnytt\RobotsTxtParser\RobotsTxtInterface; |
9
|
|
|
use vipnytt\RobotsTxtParser\TxtClient; |
10
|
|
|
use vipnytt\RobotsTxtParser\UriClient; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* Class Manager |
14
|
|
|
* |
15
|
|
|
* @see https://github.com/VIPnytt/RobotsTxtParser/blob/master/docs/methods/Cache.md for documentation |
16
|
|
|
* @package vipnytt\RobotsTxtParser\Handler\Cache\MySQL |
17
|
|
|
*/ |
18
|
|
|
class Manager implements ManagerInterface, RobotsTxtInterface |
19
|
|
|
{ |
20
|
|
|
/** |
21
|
|
|
* Database handler |
22
|
|
|
* @var PDO |
23
|
|
|
*/ |
24
|
|
|
private $pdo; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* cURL options |
28
|
|
|
* @var array |
29
|
|
|
*/ |
30
|
|
|
private $curlOptions = []; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Byte limit |
34
|
|
|
* @var int|null |
35
|
|
|
*/ |
36
|
|
|
private $byteLimit = self::BYTE_LIMIT; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Manager constructor. |
40
|
|
|
* |
41
|
|
|
* @param PDO $pdo |
42
|
|
|
* @param array $curlOptions |
43
|
|
|
* @param int|null $byteLimit |
44
|
|
|
*/ |
45
|
|
|
public function __construct(PDO $pdo, array $curlOptions, $byteLimit) |
46
|
|
|
{ |
47
|
|
|
$this->pdo = $pdo; |
48
|
|
|
$this->curlOptions = $curlOptions; |
49
|
|
|
$this->byteLimit = $byteLimit; |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Parser client |
54
|
|
|
* |
55
|
|
|
* @param string $base |
56
|
|
|
* @return TxtClient |
57
|
|
|
*/ |
58
|
|
|
public function client($base) |
59
|
|
|
{ |
60
|
|
|
$query = $this->pdo->prepare(<<<SQL |
61
|
|
|
SELECT |
62
|
|
|
content, |
63
|
|
|
statusCode, |
64
|
|
|
nextUpdate, |
65
|
|
|
effective, |
66
|
|
|
worker, |
67
|
|
|
UNIX_TIMESTAMP() |
68
|
|
|
FROM robotstxt__cache1 |
69
|
|
|
WHERE base = :base; |
70
|
|
|
SQL |
71
|
|
|
); |
72
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
73
|
|
|
$query->execute(); |
74
|
|
|
if ($query->rowCount() > 0) { |
75
|
|
|
$row = $query->fetch(PDO::FETCH_ASSOC); |
76
|
|
|
$this->clockSyncCheck($row['UNIX_TIMESTAMP()']); |
77
|
|
|
if ($row['nextUpdate'] >= $row['UNIX_TIMESTAMP()']) { |
78
|
|
|
$this->markAsActive($base, $row['worker']); |
79
|
|
|
return new TxtClient($base, $row['statusCode'], $row['content'], self::ENCODING, $row['effective'], $this->byteLimit); |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
$query = $this->pdo->prepare(<<<SQL |
83
|
|
|
UPDATE robotstxt__cache1 |
84
|
|
|
SET worker = 0 |
85
|
|
|
WHERE base = :base; |
86
|
|
|
SQL |
87
|
|
|
); |
88
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
89
|
|
|
$query->execute(); |
90
|
|
|
$request = new UriClient($base, $this->curlOptions, $this->byteLimit); |
91
|
|
|
$this->push($request, null); |
92
|
|
|
return $request; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Clock sync check |
97
|
|
|
* |
98
|
|
|
* @param int $time |
99
|
|
|
* @throws DatabaseException |
100
|
|
|
*/ |
101
|
|
|
private function clockSyncCheck($time) |
102
|
|
|
{ |
103
|
|
|
if (abs(time() - $time) >= 10) { |
104
|
|
|
throw new DatabaseException('`PHP server` and `SQL server` timestamps are out of sync. Please fix!'); |
105
|
|
|
} |
106
|
|
|
} |
107
|
|
|
|
108
|
|
|
/** |
109
|
|
|
* Mark robots.txt as active |
110
|
|
|
* |
111
|
|
|
* @param string $base |
112
|
|
|
* @param int|null $workerID |
113
|
|
|
* @return bool |
114
|
|
|
*/ |
115
|
|
View Code Duplication |
private function markAsActive($base, $workerID = 0) |
|
|
|
|
116
|
|
|
{ |
117
|
|
|
if ($workerID == 0) { |
118
|
|
|
$query = $this->pdo->prepare(<<<SQL |
119
|
|
|
UPDATE robotstxt__cache1 |
120
|
|
|
SET worker = NULL |
121
|
|
|
WHERE base = :base AND worker = 0; |
122
|
|
|
SQL |
123
|
|
|
); |
124
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
125
|
|
|
return $query->execute(); |
126
|
|
|
} |
127
|
|
|
return true; |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Update an robots.txt in the database |
132
|
|
|
* |
133
|
|
|
* @param UriClient $client |
134
|
|
|
* @param int|null $worker |
135
|
|
|
* @return bool |
136
|
|
|
*/ |
137
|
|
|
private function push(UriClient $client, $worker = 0) |
138
|
|
|
{ |
139
|
|
|
$base = $client->getBaseUri(); |
140
|
|
|
$statusCode = $client->getStatusCode(); |
141
|
|
|
$nextUpdate = $client->nextUpdate(); |
142
|
|
|
$effective = ($effective = $client->getEffectiveUri()) === $base ? null : $effective; |
143
|
|
|
if ( |
144
|
|
|
strpos($base, 'http') === 0 && |
145
|
|
|
( |
146
|
|
|
$statusCode === null || |
147
|
|
|
( |
148
|
|
|
$statusCode >= 500 && |
149
|
|
|
$statusCode < 600 |
150
|
|
|
) |
151
|
|
|
) && |
152
|
|
|
$this->displacePush($base, $nextUpdate, $worker) |
153
|
|
|
) { |
154
|
|
|
return true; |
155
|
|
|
} |
156
|
|
|
$validUntil = $client->validUntil(); |
157
|
|
|
$content = $client->render(); |
158
|
|
|
$query = $this->pdo->prepare(<<<SQL |
159
|
|
|
INSERT INTO robotstxt__cache1 (base, content, statusCode, validUntil, nextUpdate, effective) |
160
|
|
|
VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate, :effective) |
161
|
|
|
ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil, |
162
|
|
|
nextUpdate = :nextUpdate, effective = :effective, worker = :worker; |
163
|
|
|
SQL |
164
|
|
|
); |
165
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
166
|
|
|
$query->bindParam(':content', $content, PDO::PARAM_STR); |
167
|
|
|
$query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT | PDO::PARAM_NULL); |
168
|
|
|
$query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT); |
169
|
|
|
$query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT); |
170
|
|
|
$query->bindParam(':effective', $effective, PDO::PARAM_STR | PDO::PARAM_NULL); |
171
|
|
|
$query->bindParam(':worker', $worker, PDO::PARAM_INT | PDO::PARAM_NULL); |
172
|
|
|
return $query->execute(); |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* Displace push timestamp |
177
|
|
|
* |
178
|
|
|
* @param string $base |
179
|
|
|
* @param int $nextUpdate |
180
|
|
|
* @param int|null $worker |
181
|
|
|
* @return bool |
182
|
|
|
*/ |
183
|
|
|
private function displacePush($base, $nextUpdate, $worker = null) |
184
|
|
|
{ |
185
|
|
|
$query = $this->pdo->prepare(<<<SQL |
186
|
|
|
SELECT |
187
|
|
|
validUntil, |
188
|
|
|
UNIX_TIMESTAMP() |
189
|
|
|
FROM robotstxt__cache1 |
190
|
|
|
WHERE base = :base; |
191
|
|
|
SQL |
192
|
|
|
); |
193
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
194
|
|
|
$query->execute(); |
195
|
|
|
if ($query->rowCount() > 0) { |
196
|
|
|
$row = $query->fetch(PDO::FETCH_ASSOC); |
197
|
|
|
$this->clockSyncCheck($row['UNIX_TIMESTAMP()']); |
198
|
|
|
if ($row['validUntil'] > $row['UNIX_TIMESTAMP()']) { |
199
|
|
|
$nextUpdate = min($row['validUntil'], $nextUpdate); |
200
|
|
|
$query = $this->pdo->prepare(<<<SQL |
201
|
|
|
UPDATE robotstxt__cache1 |
202
|
|
|
SET nextUpdate = :nextUpdate, worker = :worker |
203
|
|
|
WHERE base = :base; |
204
|
|
|
SQL |
205
|
|
|
); |
206
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
207
|
|
|
$query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT); |
208
|
|
|
$query->bindParam(':worker', $worker, PDO::PARAM_INT | PDO::PARAM_NULL); |
209
|
|
|
return $query->execute(); |
210
|
|
|
} |
211
|
|
|
$this->invalidate($base); |
212
|
|
|
} |
213
|
|
|
return false; |
214
|
|
|
} |
215
|
|
|
|
216
|
|
|
/** |
217
|
|
|
* Invalidate cache |
218
|
|
|
* |
219
|
|
|
* @param $base |
220
|
|
|
* @return bool |
221
|
|
|
*/ |
222
|
|
|
public function invalidate($base) |
223
|
|
|
{ |
224
|
|
|
$query = $this->pdo->prepare(<<<SQL |
225
|
|
|
DELETE FROM robotstxt__cache1 |
226
|
|
|
WHERE base = :base; |
227
|
|
|
SQL |
228
|
|
|
); |
229
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
230
|
|
|
return $query->execute(); |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
/** |
234
|
|
|
* Process the update queue |
235
|
|
|
* |
236
|
|
|
* @param float|int|null $timeLimit |
237
|
|
|
* @param int|null $workerID |
238
|
|
|
* @return string[] |
239
|
|
|
* @throws ClientException |
240
|
|
|
*/ |
241
|
|
|
public function cron($timeLimit, $workerID) |
242
|
|
|
{ |
243
|
|
|
$start = microtime(true); |
244
|
|
|
$worker = $this->setWorkerID($workerID); |
245
|
|
|
$log = []; |
246
|
|
|
$lastCount = -1; |
247
|
|
|
while ( |
248
|
|
|
count($log) > $lastCount && |
249
|
|
|
( |
250
|
|
|
empty($timeLimit) || |
251
|
|
|
$timeLimit > (microtime(true) - $start) |
252
|
|
|
) |
253
|
|
|
) { |
254
|
|
|
$lastCount = count($log); |
255
|
|
|
$query = $this->pdo->prepare(<<<SQL |
256
|
|
|
UPDATE robotstxt__cache1 |
257
|
|
|
SET worker = :workerID |
258
|
|
|
WHERE worker IS NULL AND nextUpdate <= UNIX_TIMESTAMP() |
259
|
|
|
ORDER BY nextUpdate ASC |
260
|
|
|
LIMIT 1; |
261
|
|
|
SQL |
262
|
|
|
); |
263
|
|
|
$query->bindParam(':workerID', $worker, PDO::PARAM_INT); |
264
|
|
|
$query->execute(); |
265
|
|
|
$query = $this->pdo->prepare(<<<SQL |
266
|
|
|
SELECT base |
267
|
|
|
FROM robotstxt__cache1 |
268
|
|
|
WHERE worker = :workerID |
269
|
|
|
LIMIT 10; |
270
|
|
|
SQL |
271
|
|
|
); |
272
|
|
|
$query->bindParam(':workerID', $worker, PDO::PARAM_INT); |
273
|
|
|
$query->execute(); |
274
|
|
|
if ($query->rowCount() > 0) { |
275
|
|
|
while ($row = $query->fetch(PDO::FETCH_ASSOC)) { |
276
|
|
|
if (!$this->push(new UriClient($row['base'], $this->curlOptions, $this->byteLimit))) { |
277
|
|
|
throw new ClientException('Unable to update `' . $row['base'] . '`'); |
278
|
|
|
} |
279
|
|
|
$log[(string)microtime(true)] = $row['base']; |
280
|
|
|
} |
281
|
|
|
} |
282
|
|
|
} |
283
|
|
|
return $log; |
284
|
|
|
} |
285
|
|
|
|
286
|
|
|
/** |
287
|
|
|
* Set WorkerID |
288
|
|
|
* |
289
|
|
|
* @param int|null $workerID |
290
|
|
|
* @return int |
291
|
|
|
* @throws DatabaseException |
292
|
|
|
*/ |
293
|
|
|
private function setWorkerID($workerID = null) |
294
|
|
|
{ |
295
|
|
|
if ( |
296
|
|
|
is_int($workerID) && |
297
|
|
|
$workerID <= 255 && |
298
|
|
|
$workerID >= 1 |
299
|
|
|
) { |
300
|
|
|
return $workerID; |
301
|
|
|
} elseif ($workerID !== null) { |
302
|
|
|
throw new DatabaseException('WorkerID out of range (1-255)'); |
303
|
|
|
} |
304
|
|
|
return rand(1, 255); |
305
|
|
|
} |
306
|
|
|
|
307
|
|
|
/** |
308
|
|
|
* Clean the cache table |
309
|
|
|
* |
310
|
|
|
* @param int $delay - in seconds |
311
|
|
|
* @return bool |
312
|
|
|
*/ |
313
|
|
View Code Duplication |
public function clean($delay) |
|
|
|
|
314
|
|
|
{ |
315
|
|
|
$delay = self::CACHE_TIME + $delay; |
316
|
|
|
$query = $this->pdo->prepare(<<<SQL |
317
|
|
|
DELETE FROM robotstxt__cache1 |
318
|
|
|
WHERE (worker = 0 OR worker IS NULL) AND nextUpdate < (UNIX_TIMESTAMP() - :delay); |
319
|
|
|
SQL |
320
|
|
|
); |
321
|
|
|
$query->bindParam(':delay', $delay, PDO::PARAM_INT); |
322
|
|
|
return $query->execute(); |
323
|
|
|
} |
324
|
|
|
} |
325
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.