1
|
|
|
<?php |
2
|
|
|
namespace vipnytt\RobotsTxtParser; |
3
|
|
|
|
4
|
|
|
use PDO; |
5
|
|
|
use vipnytt\RobotsTxtParser\Exceptions\SQLException; |
6
|
|
|
use vipnytt\RobotsTxtParser\Parser\UrlParser; |
7
|
|
|
use vipnytt\RobotsTxtParser\SQL\SQLInterface; |
8
|
|
|
use vipnytt\RobotsTxtParser\SQL\SQLTrait; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Class CacheHandler |
12
|
|
|
* |
13
|
|
|
* @package vipnytt\RobotsTxtParser |
14
|
|
|
*/ |
15
|
|
|
class CacheHandler implements RobotsTxtInterface, SQLInterface |
16
|
|
|
{ |
17
|
|
|
use UrlParser; |
18
|
|
|
use SQLTrait; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* Database connection |
22
|
|
|
* @var PDO |
23
|
|
|
*/ |
24
|
|
|
protected $pdo; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* GuzzleHTTP config |
28
|
|
|
* @var array |
29
|
|
|
*/ |
30
|
|
|
protected $guzzleConfig = []; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Byte limit |
34
|
|
|
* @var int|null |
35
|
|
|
*/ |
36
|
|
|
protected $byteLimit = self::BYTE_LIMIT; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Client nextUpdate margin in seconds |
40
|
|
|
* @var int |
41
|
|
|
*/ |
42
|
|
|
protected $clientUpdateMargin = 300; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* PDO driver |
46
|
|
|
* @var string |
47
|
|
|
*/ |
48
|
|
|
private $driver; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* CacheHandler constructor. |
52
|
|
|
* |
53
|
|
|
* @param PDO $pdo |
54
|
|
|
* @param array $guzzleConfig |
55
|
|
|
* @param int|null $byteLimit |
56
|
|
|
*/ |
57
|
|
|
public function __construct(PDO $pdo, array $guzzleConfig = [], $byteLimit = self::BYTE_LIMIT) |
58
|
|
|
{ |
59
|
|
|
$this->pdo = $this->pdoInitialize($pdo); |
60
|
|
|
$this->driver = $this->pdo->getAttribute(PDO::ATTR_DRIVER_NAME); |
61
|
|
|
if ($this->driver != 'mysql') { |
62
|
|
|
trigger_error('Unsupported database. Currently supports MySQL only. ' . self::README_SQL_CACHE, E_USER_WARNING); |
63
|
|
|
} |
64
|
|
|
$this->guzzleConfig = $guzzleConfig; |
65
|
|
|
$this->byteLimit = $byteLimit; |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* Parser client |
70
|
|
|
* |
71
|
|
|
* @param string $baseUri |
72
|
|
|
* @return TxtClient |
73
|
|
|
*/ |
74
|
|
|
public function client($baseUri) |
75
|
|
|
{ |
76
|
|
|
$base = $this->urlBase($this->urlEncode($baseUri)); |
77
|
|
|
$query = $this->pdo->prepare(<<<SQL |
78
|
|
|
SELECT |
79
|
|
|
content, |
80
|
|
|
statusCode, |
81
|
|
|
nextUpdate, |
82
|
|
|
worker, |
83
|
|
|
UNIX_TIMESTAMP() |
84
|
|
|
FROM robotstxt__cache0 |
85
|
|
|
WHERE base = :base; |
86
|
|
|
SQL |
87
|
|
|
); |
88
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
89
|
|
|
$query->execute(); |
90
|
|
|
if ($query->rowCount() > 0) { |
91
|
|
|
$row = $query->fetch(PDO::FETCH_ASSOC); |
92
|
|
|
if ($row['nextUpdate'] > ($row['UNIX_TIMESTAMP()'] - $this->clientUpdateMargin)) { |
93
|
|
|
$this->markAsActive($base, $row['worker']); |
94
|
|
|
return new TxtClient($base, $row['statusCode'], $row['content'], self::ENCODING, $this->byteLimit); |
95
|
|
|
} |
96
|
|
|
} |
97
|
|
|
$request = new UriClient($base, $this->guzzleConfig, $this->byteLimit); |
98
|
|
|
$this->push($request); |
99
|
|
|
$this->markAsActive($base); |
100
|
|
|
return new TxtClient($base, $request->getStatusCode(), $request->getContents(), self::ENCODING, $this->byteLimit); |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* Mark robots.txt as active |
105
|
|
|
* |
106
|
|
|
* @param string $base |
107
|
|
|
* @param int|null $workerID |
108
|
|
|
* @return bool |
109
|
|
|
*/ |
110
|
|
View Code Duplication |
private function markAsActive($base, $workerID = 0) |
|
|
|
|
111
|
|
|
{ |
112
|
|
|
if ($workerID == 0) { |
113
|
|
|
$query = $this->pdo->prepare(<<<SQL |
114
|
|
|
UPDATE robotstxt__cache0 |
115
|
|
|
SET worker = NULL |
116
|
|
|
WHERE base = :base AND worker = 0; |
117
|
|
|
SQL |
118
|
|
|
); |
119
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
120
|
|
|
return $query->execute(); |
121
|
|
|
} |
122
|
|
|
return true; |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* Update an robots.txt in the database |
127
|
|
|
* |
128
|
|
|
* @param UriClient $request |
129
|
|
|
* @return bool |
130
|
|
|
*/ |
131
|
|
|
public function push(UriClient $request) |
132
|
|
|
{ |
133
|
|
|
$base = $request->getBaseUri(); |
134
|
|
|
$statusCode = $request->getStatusCode(); |
135
|
|
|
$nextUpdate = $request->nextUpdate(); |
136
|
|
|
if ( |
137
|
|
|
$statusCode >= 500 && |
138
|
|
|
$statusCode < 600 && |
139
|
|
|
mb_stripos($base, 'http') === 0 && |
140
|
|
|
$this->displacePush($base, $nextUpdate) |
141
|
|
|
) { |
142
|
|
|
return true; |
143
|
|
|
} |
144
|
|
|
$validUntil = $request->validUntil(); |
145
|
|
|
$content = $request->render(); |
146
|
|
|
$query = $this->pdo->prepare(<<<SQL |
147
|
|
|
INSERT INTO robotstxt__cache0 (base, content, statusCode, validUntil, nextUpdate) |
148
|
|
|
VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate) |
149
|
|
|
ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil, |
150
|
|
|
nextUpdate = :nextUpdate, worker = 0; |
151
|
|
|
SQL |
152
|
|
|
); |
153
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
154
|
|
|
$query->bindParam(':content', $content, PDO::PARAM_STR); |
155
|
|
|
$query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT); |
156
|
|
|
$query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT); |
157
|
|
|
$query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT); |
158
|
|
|
return $query->execute(); |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
/** |
162
|
|
|
* Displace push timestamp |
163
|
|
|
* |
164
|
|
|
* @param string $base |
165
|
|
|
* @param int $nextUpdate |
166
|
|
|
* @return bool |
167
|
|
|
*/ |
168
|
|
|
private function displacePush($base, $nextUpdate) |
169
|
|
|
{ |
170
|
|
|
$query = $this->pdo->prepare(<<<SQL |
171
|
|
|
SELECT |
172
|
|
|
validUntil, |
173
|
|
|
UNIX_TIMESTAMP() |
174
|
|
|
FROM robotstxt__cache0 |
175
|
|
|
WHERE base = :base; |
176
|
|
|
SQL |
177
|
|
|
); |
178
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
179
|
|
|
$query->execute(); |
180
|
|
|
if ($query->rowCount() > 0) { |
181
|
|
|
$row = $query->fetch(PDO::FETCH_ASSOC); |
182
|
|
|
if ($row['validUntil'] > $row['UNIX_TIMESTAMP()']) { |
183
|
|
|
$nextUpdate = min($row['validUntil'], $nextUpdate); |
184
|
|
|
$query = $this->pdo->prepare(<<<SQL |
185
|
|
|
UPDATE robotstxt__cache0 |
186
|
|
|
SET nextUpdate = :nextUpdate, worker = NULL |
187
|
|
|
WHERE base = :base; |
188
|
|
|
SQL |
189
|
|
|
); |
190
|
|
|
$query->bindParam(':base', $base, PDO::PARAM_STR); |
191
|
|
|
$query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT); |
192
|
|
|
return $query->execute(); |
193
|
|
|
} |
194
|
|
|
} |
195
|
|
|
return false; |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* Process the update queue |
200
|
|
|
* |
201
|
|
|
* @param int|null $workerID |
202
|
|
|
* @return bool |
203
|
|
|
*/ |
204
|
|
|
public function cron($workerID = null) |
205
|
|
|
{ |
206
|
|
|
$worker = $this->setWorkerID($workerID); |
207
|
|
|
$result = true; |
208
|
|
|
while ($result) { |
209
|
|
|
$query = $this->pdo->prepare(<<<SQL |
210
|
|
|
UPDATE robotstxt__cache0 |
211
|
|
|
SET worker = :workerID |
212
|
|
|
WHERE worker IS NULL AND nextUpdate <= UNIX_TIMESTAMP() |
213
|
|
|
ORDER BY nextUpdate ASC |
214
|
|
|
LIMIT 1; |
215
|
|
|
SELECT base |
216
|
|
|
FROM robotstxt__cache0 |
217
|
|
|
WHERE worker = :workerID; |
218
|
|
|
SQL |
219
|
|
|
); |
220
|
|
|
$query->bindParam(':workerID', $worker, PDO::PARAM_INT); |
221
|
|
|
$query->execute(); |
222
|
|
|
if ($query->rowCount() > 0) { |
223
|
|
|
while ($row = $query->fetch(PDO::FETCH_ASSOC)) { |
224
|
|
|
$result = $this->push(new UriClient($row['base'], $this->guzzleConfig, $this->byteLimit)); |
225
|
|
|
} |
226
|
|
|
continue; |
227
|
|
|
} |
228
|
|
|
return true; |
229
|
|
|
} |
230
|
|
|
return false; |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
/** |
234
|
|
|
* Set WorkerID |
235
|
|
|
* |
236
|
|
|
* @param int|null $workerID |
237
|
|
|
* @return int |
238
|
|
|
*/ |
239
|
|
|
protected function setWorkerID($workerID = null) |
240
|
|
|
{ |
241
|
|
|
if ( |
242
|
|
|
is_int($workerID) && |
243
|
|
|
$workerID <= 255 && |
244
|
|
|
$workerID >= 1 |
245
|
|
|
) { |
246
|
|
|
return $workerID; |
247
|
|
|
} elseif ($workerID !== null) { |
248
|
|
|
trigger_error('WorkerID out of range (1-255)', E_USER_WARNING); |
249
|
|
|
} |
250
|
|
|
return rand(1, 255); |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
/** |
254
|
|
|
* Clean the cache table |
255
|
|
|
* |
256
|
|
|
* @param int $delay - in seconds |
257
|
|
|
* @return bool |
258
|
|
|
*/ |
259
|
|
View Code Duplication |
public function clean($delay = 600) |
|
|
|
|
260
|
|
|
{ |
261
|
|
|
$delay = self::CACHE_TIME + $delay; |
262
|
|
|
$query = $this->pdo->prepare(<<<SQL |
263
|
|
|
DELETE FROM robotstxt__cache0 |
264
|
|
|
WHERE worker = 0 AND nextUpdate < (UNIX_TIMESTAMP() - :delay); |
265
|
|
|
SQL |
266
|
|
|
); |
267
|
|
|
$query->bindParam(':delay', $delay, PDO::PARAM_INT); |
268
|
|
|
return $query->execute(); |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
/** |
272
|
|
|
* Create SQL table |
273
|
|
|
* |
274
|
|
|
* @return bool |
275
|
|
|
* @throws SQLException |
276
|
|
|
*/ |
277
|
|
|
public function setup() |
|
|
|
|
278
|
|
|
{ |
279
|
|
|
if (!$this->createTable($this->pdo, self::TABLE_CACHE, file_get_contents(__DIR__ . '/SQL/cache.sql'))) { |
280
|
|
|
throw new SQLException('Unable to create table! Please read instructions at ' . self::README_SQL_CACHE); |
281
|
|
|
} |
282
|
|
|
return true; |
283
|
|
|
} |
284
|
|
|
} |
285
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.