Completed
Branch 2.0-dev (131e57)
by Jan-Petter
02:08
created

Cache::cron()   B

Complexity

Conditions 4
Paths 4

Size

Total Lines 30
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 30
rs 8.5806
cc 4
eloc 15
nc 4
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser\Cache\SQL;
3
4
use PDO;
5
use vipnytt\RobotsTxtParser\Client;
6
use vipnytt\RobotsTxtParser\Core\RobotsTxtInterface;
7
use vipnytt\RobotsTxtParser\Core\UrlParser;
8
use vipnytt\RobotsTxtParser\Request;
9
10
/**
11
 * Class Cache
12
 *
13
 * @package vipnytt\RobotsTxtParser\Cache
14
 */
15
class Cache implements RobotsTxtInterface
16
{
17
    use UrlParser;
18
19
    /**
20
     * Database connection
21
     * @var PDO
22
     */
23
    protected $pdo;
24
25
    /**
26
     * GuzzleHTTP config
27
     * @var array
28
     */
29
    protected $guzzleConfig = [];
30
31
    /**
32
     * Byte limit
33
     * @var int
34
     */
35
    protected $byteLimit = self::BYTE_LIMIT;
36
37
    /**
38
     * Client nextUpdate margin in seconds
39
     * @var int
40
     */
41
    protected $clientNextUpdateMargin = 300;
42
43
    /**
44
     * Cache constructor.
45
     *
46
     * @param PDO $pdo
47
     * @param array $guzzleConfig
48
     * @param int $byteLimit
49
     */
50
    public function __construct(PDO $pdo, array $guzzleConfig = [], $byteLimit = self::BYTE_LIMIT)
51
    {
52
        $this->pdo = $pdo;
53
        $this->guzzleConfig = $guzzleConfig;
54
        $this->byteLimit = $byteLimit;
55
    }
56
57
    /**
58
     * Process the update queue
59
     *
60
     * @param int|null $workerID
61
     * @return bool
62
     */
63
    public function cron($workerID = null)
64
    {
65
        $workerID = $this->setWorkerID($workerID);
66
        $result = true;
67
        while ($result) {
68
            $query = $this->pdo->prepare(<<<SQL
69
UPDATE robotstxt__cache0
70
SET workerID = :workerID
71
WHERE workerID IS NULL AND nextUpdate <= NOW()
72
ORDER BY nextUpdate ASC
73
LIMIT 1;
74
SELECT
75
  base,
76
  validUntil
77
FROM robotstxt__cache0
78
WHERE workerID = :workerID;
79
SQL
80
            );
81
            $query->bindParam(':workerID', $workerID, PDO::PARAM_INT);
82
            $query->execute();
83
            if ($query->rowCount() > 0) {
84
                while ($row = $query->fetch(PDO::FETCH_ASSOC)) {
85
                    $result = $this->push(new Request($row['base'], $this->guzzleConfig, $this->byteLimit), $row['validUntil']);
86
                }
87
                continue;
88
            }
89
            return true;
90
        }
91
        return false;
92
    }
93
94
    /**
95
     * Set WorkerID
96
     *
97
     * @param int|null $workerID
98
     * @return int
99
     */
100
    protected function setWorkerID($workerID = null)
101
    {
102
        if (
103
            is_int($workerID) &&
104
            $workerID <= 255 &&
105
            $workerID >= 1
106
        ) {
107
            return $workerID;
108
        } elseif ($workerID !== null) {
109
            trigger_error('WorkerID out of range (1-255)', E_USER_WARNING);
110
        }
111
        return rand(1, 255);
112
    }
113
114
    /**
115
     * Update an robots.txt in the database
116
     *
117
     * @param Request $request
118
     * @param int $existingValidUntil
119
     * @return bool
120
     */
121
    public function push(Request $request, $existingValidUntil = 0)
122
    {
123
        $time = time();
124
        $base = $request->getBaseUri();
125
        $statusCode = $request->getStatusCode();
126
        $nextUpdate = $request->nextUpdate();
127
        if (
128
            $existingValidUntil > $time &&
129
            $statusCode >= 500 &&
130
            $statusCode < 600 &&
131
            mb_strpos(parse_url($base, PHP_URL_SCHEME), 'http') === 0
132
        ) {
133
            $nextUpdate = min($existingValidUntil, $nextUpdate);
134
            $query = $this->pdo->prepare(<<<SQL
135
UPDATE robotstxt__cache0
136
SET nextUpdate = :nextUpdate, workerID = NULL
137
WHERE base = :base;
138
SQL
139
            );
140
            $query->bindParam(':base', $base, PDO::PARAM_STR);
141
            $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
142
            return $query->execute();
143
        }
144
        $validUntil = $request->validUntil();
145
        $content = $request->getContents();
146
        $query = $this->pdo->prepare(<<<SQL
147
INSERT INTO robotstxt__cache0 (base, content, statusCode, validUntil, nextUpdate)
148
VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate)
149
ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil, nextUpdate = :nextUpdate, workerID = 0;
150
SQL
151
        );
152
        $query->bindParam(':base', $base, PDO::PARAM_STR);
153
        $query->bindParam(':content', $content, PDO::PARAM_STR);
154
        $query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT);
155
        $query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT);
156
        $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
157
        return $query->execute();
158
    }
159
160
    /**
161
     * Parser client
162
     *
163
     * @param string $baseUri
164
     * @return Client|Request
165
     */
166
    public function client($baseUri)
167
    {
168
        $base = $this->urlBase($this->urlEncode($baseUri));
169
        $query = $this->pdo->prepare(<<<SQL
170
SELECT content,statusCode,nextUpdate,workerID
171
FROM robotstxt__cache0
172
WHERE base = :base;
173
SQL
174
        );
175
        $query->bindParam(':base', $base, PDO::PARAM_STR);
176
        $query->execute();
177
        if ($query->rowCount() > 0) {
178
            $row = $query->fetch(PDO::FETCH_ASSOC);
179
            if ($row['nextUpdate'] >= (time() - $this->clientNextUpdateMargin)) {
180
                $this->markAsActive($base, $row['workerID']);
181
                return new Client($base, $row['code'], $row['content'], self::ENCODING, $this->byteLimit);
182
            }
183
        }
184
        $request = new Request($base, $this->guzzleConfig, $this->byteLimit);
185
        $this->push($request);
186
        $this->markAsActive($base);
187
        return $request;
188
    }
189
190
    /**
191
     * Mark robots.txt as active
192
     *
193
     * @param string $base
194
     * @param int|null $workerID
195
     * @return bool
196
     */
197
    protected function markAsActive($base, $workerID = 0)
198
    {
199
        if ($workerID == 0) {
200
            $query = $this->pdo->prepare(<<<SQL
201
UPDATE robotstxt__cache0
202
SET workerID = NULL
203
WHERE base = :base;
204
SQL
205
            );
206
            $query->bindParam(':base', $base, PDO::PARAM_STR);
207
            return $query->execute();
208
        }
209
        return true;
210
    }
211
212
    /**
213
     * Database maintenance
214
     *
215
     * @return bool
216
     */
217
    public function cleanup()
218
    {
219
        $nextUpdate = time() - self::CACHE_TIME;
220
        $microTime = microtime(true) * 1000000;
221
        $query = $this->pdo->prepare(<<<SQL
222
DELETE FROM robotstxt__cache0
223
WHERE workerID = 0 AND nextUpdate < :nextUpdate;
224
DELETE FROM robotstxt__delay0
225
WHERE microTime < :microTime;
226
SQL
227
        );
228
        $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
229
        $query->bindParam(':microTime', $microTime, PDO::PARAM_INT);
230
        return $query->execute();
231
    }
232
233
    /**
234
     * Honor the Crawl-delay rules
235
     *
236
     * @param int|float $delay
237
     * @param string $baseUri
238
     * @param string $userAgent
239
     * @return true
240
     */
241
    public function delaySleep($delay, $baseUri, $userAgent = self::USER_AGENT)
242
    {
243
        $until = $this->delayUntil($delay, $baseUri, $userAgent);
244
        if (microtime(true) > $until) {
245
            return true;
246
        }
247
        try {
248
            time_sleep_until($until);
249
        } catch (\Exception $warning) {
250
            // Timestamp already in the past
251
        }
252
        return true;
253
    }
254
255
    /**
256
     * @param int|float $delay
257
     * @param string $baseUri
258
     * @param string $userAgent
259
     * @return int|float|false
260
     */
261
    public function delayUntil($delay, $baseUri, $userAgent = self::USER_AGENT)
262
    {
263
        if ($delay <= 0) {
264
            return false;
265
        }
266
        $base = $this->urlBase($this->urlEncode($baseUri));
267
        $query = $this->pdo->prepare(<<<SQL
268
SELECT microTime
269
FROM robotstxt__delay0
270
WHERE base = :base AND userAgent = :userAgent;
271
SQL
272
        );
273
        $query->bindParam(':base', $base, PDO::PARAM_STR);
274
        $query->bindParam(':userAgent', $userAgent, PDO::PARAM_STR);
275
        $query->execute();
276
        $this->setDelay($delay, $base, $userAgent);
277
        if ($query->rowCount() > 0) {
278
            $row = $query->fetch(PDO::FETCH_ASSOC);
279
            return $row['microTime'] / 1000000;
280
        }
281
        return 0;
282
    }
283
284
    /**
285
     * Set new delayUntil timestamp
286
     *
287
     * @param int|float $delay
288
     * @param string $baseUri
289
     * @param string $userAgent
290
     * @return bool
291
     */
292
    protected function setDelay($delay, $baseUri, $userAgent = self::USER_AGENT)
293
    {
294
        $delay = $delay * 1000000;
295
        $microTime = (microtime(true) * 1000000) + $delay;
296
        $query = $this->pdo->prepare(<<<SQL
297
INSERT INTO robotstxt__delay0 (base, userAgent, microTime)
298
VALUES (:base, :userAgent, :microTime)
299
ON DUPLICATE KEY UPDATE microTime = GREATEST(:microTime, microTime + :delay);
300
SQL
301
        );
302
        $query->bindParam(':base', $baseUri, PDO::PARAM_STR);
303
        $query->bindParam(':userAgent', $userAgent, PDO::PARAM_STR);
304
        $query->bindParam(':microTime', $microTime, PDO::PARAM_INT);
305
        $query->bindParam(':delay', $delay, PDO::PARAM_INT);
306
        return $query->execute();
307
    }
308
}
309