1 | <?php |
||
15 | class Cache implements RobotsTxtInterface |
||
16 | { |
||
17 | use UrlParser; |
||
18 | |||
19 | /** |
||
20 | * Database connection |
||
21 | * @var PDO |
||
22 | */ |
||
23 | protected $pdo; |
||
24 | |||
25 | /** |
||
26 | * GuzzleHTTP config |
||
27 | * @var array |
||
28 | */ |
||
29 | protected $guzzleConfig = []; |
||
30 | |||
31 | /** |
||
32 | * Byte limit |
||
33 | * @var int |
||
34 | */ |
||
35 | protected $byteLimit = self::BYTE_LIMIT; |
||
36 | |||
37 | /** |
||
38 | * Client nextUpdate margin in seconds |
||
39 | * @var int |
||
40 | */ |
||
41 | protected $clientNextUpdateMargin = 300; |
||
42 | |||
43 | /** |
||
44 | * Cache constructor. |
||
45 | * |
||
46 | * @param PDO $pdo |
||
47 | * @param array $guzzleConfig |
||
48 | * @param int $byteLimit |
||
49 | */ |
||
50 | public function __construct(PDO $pdo, array $guzzleConfig = [], $byteLimit = self::BYTE_LIMIT) |
||
56 | |||
57 | /** |
||
58 | * Process the update queue |
||
59 | * |
||
60 | * @param int|null $workerID |
||
61 | * @return bool |
||
62 | */ |
||
63 | public function cron($workerID = null) |
||
64 | { |
||
65 | $worker = $this->setWorkerID($workerID); |
||
66 | $result = true; |
||
67 | while ($result) { |
||
68 | $query = $this->pdo->prepare(<<<SQL |
||
69 | UPDATE robotstxt__cache0 |
||
70 | SET worker = :workerID |
||
71 | WHERE worker IS NULL AND nextUpdate <= UNIX_TIMESTAMP() |
||
72 | ORDER BY nextUpdate ASC |
||
73 | LIMIT 1; |
||
74 | SELECT |
||
75 | base, |
||
76 | validUntil |
||
77 | FROM robotstxt__cache0 |
||
78 | WHERE worker = :worker; |
||
79 | SQL |
||
80 | ); |
||
81 | $query->bindParam(':workerID', $worker, PDO::PARAM_INT); |
||
82 | $query->execute(); |
||
83 | if ($query->rowCount() > 0) { |
||
84 | while ($row = $query->fetch(PDO::FETCH_ASSOC)) { |
||
85 | $result = $this->push(new Request($row['base'], $this->guzzleConfig, $this->byteLimit), $row['validUntil']); |
||
86 | } |
||
87 | continue; |
||
88 | } |
||
89 | return true; |
||
90 | } |
||
91 | return false; |
||
92 | } |
||
93 | |||
94 | /** |
||
95 | * Set WorkerID |
||
96 | * |
||
97 | * @param int|null $workerID |
||
98 | * @return int |
||
99 | */ |
||
100 | protected function setWorkerID($workerID = null) |
||
113 | |||
114 | /** |
||
115 | * Update an robots.txt in the database |
||
116 | * |
||
117 | * @param Request $request |
||
118 | * @param int $existingValidUntil |
||
119 | * @return bool |
||
120 | */ |
||
121 | public function push(Request $request, $existingValidUntil = 0) |
||
122 | { |
||
123 | $time = time(); |
||
124 | $base = $request->getBaseUri(); |
||
125 | $statusCode = $request->getStatusCode(); |
||
126 | $nextUpdate = $request->nextUpdate(); |
||
127 | if ( |
||
128 | $existingValidUntil > $time && |
||
129 | $statusCode >= 500 && |
||
130 | $statusCode < 600 && |
||
131 | mb_strpos(parse_url($base, PHP_URL_SCHEME), 'http') === 0 |
||
132 | ) { |
||
133 | $nextUpdate = min($existingValidUntil, $nextUpdate); |
||
134 | $query = $this->pdo->prepare(<<<SQL |
||
135 | UPDATE robotstxt__cache0 |
||
136 | SET nextUpdate = :nextUpdate, worker = NULL |
||
137 | WHERE base = :base; |
||
138 | SQL |
||
139 | ); |
||
140 | $query->bindParam(':base', $base, PDO::PARAM_STR); |
||
141 | $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT); |
||
142 | return $query->execute(); |
||
143 | } |
||
144 | $validUntil = $request->validUntil(); |
||
145 | $content = $request->getContents(); |
||
146 | $query = $this->pdo->prepare(<<<SQL |
||
147 | INSERT INTO robotstxt__cache0 (base, content, statusCode, validUntil, nextUpdate) |
||
148 | VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate) |
||
149 | ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil, nextUpdate = :nextUpdate, worker = 0; |
||
150 | SQL |
||
151 | ); |
||
152 | $query->bindParam(':base', $base, PDO::PARAM_STR); |
||
153 | $query->bindParam(':content', $content, PDO::PARAM_STR); |
||
154 | $query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT); |
||
155 | $query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT); |
||
156 | $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT); |
||
157 | return $query->execute(); |
||
158 | } |
||
159 | |||
160 | /** |
||
161 | * Parser client |
||
162 | * |
||
163 | * @param string $baseUri |
||
164 | * @return Client|Request |
||
165 | */ |
||
166 | public function client($baseUri) |
||
167 | { |
||
168 | $base = $this->urlBase($this->urlEncode($baseUri)); |
||
169 | $query = $this->pdo->prepare(<<<SQL |
||
170 | SELECT content,statusCode,nextUpdate,worker |
||
171 | FROM robotstxt__cache0 |
||
172 | WHERE base = :base; |
||
173 | SQL |
||
174 | ); |
||
175 | $query->bindParam(':base', $base, PDO::PARAM_STR); |
||
176 | $query->execute(); |
||
177 | if ($query->rowCount() > 0) { |
||
178 | $row = $query->fetch(PDO::FETCH_ASSOC); |
||
179 | if ($row['nextUpdate'] >= (time() - $this->clientNextUpdateMargin)) { |
||
180 | $this->markAsActive($base, $row['worker']); |
||
181 | return new Client($base, $row['code'], $row['content'], self::ENCODING, $this->byteLimit); |
||
182 | } |
||
183 | } |
||
184 | $request = new Request($base, $this->guzzleConfig, $this->byteLimit); |
||
185 | $this->push($request); |
||
186 | $this->markAsActive($base); |
||
187 | return $request; |
||
188 | } |
||
189 | |||
190 | /** |
||
191 | * Mark robots.txt as active |
||
192 | * |
||
193 | * @param string $base |
||
194 | * @param int|null $workerID |
||
195 | * @return bool |
||
196 | */ |
||
197 | protected function markAsActive($base, $workerID = 0) |
||
211 | |||
212 | /** |
||
213 | * Delay |
||
214 | * |
||
215 | * @param float|int $delay |
||
216 | * @param string $baseUri |
||
217 | * @param string $userAgent |
||
218 | * @return Delay |
||
219 | */ |
||
220 | public function delay($delay, $baseUri, $userAgent) |
||
224 | } |
||
225 |