Completed
Push — master ( cb65dd...97cb3a )
by Jan-Petter
02:28
created

Cache::cron()   C

Complexity

Conditions 7
Paths 5

Size

Total Lines 38
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 1 Features 0
Metric Value
c 4
b 1
f 0
dl 0
loc 38
rs 6.7272
cc 7
eloc 21
nc 5
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use PDO;
5
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
6
use vipnytt\RobotsTxtParser\Exceptions\SQLException;
7
use vipnytt\RobotsTxtParser\Parser\UriParser;
8
9
/**
10
 * Class Cache
11
 *
12
 * @see https://github.com/VIPnytt/RobotsTxtParser/blob/master/docs/methods/Cache.md for documentation
13
 * @package vipnytt\RobotsTxtParser
14
 */
15
class Cache implements RobotsTxtInterface, SQLInterface
16
{
17
    use UriParser;
18
19
    /**
20
     * Supported database drivers
21
     */
22
    const SUPPORTED_DRIVERS = [
23
        self::DRIVER_MYSQL,
24
    ];
25
26
    /**
27
     * Client nextUpdate margin in seconds
28
     * @var int
29
     */
30
    protected $clientUpdateMargin = 300;
31
32
    /**
33
     * Database handler
34
     * @var PDO
35
     */
36
    private $pdo;
37
38
    /**
39
     * cURL options
40
     * @var array
41
     */
42
    private $curlOptions = [];
43
44
    /**
45
     * Byte limit
46
     * @var int|null
47
     */
48
    private $byteLimit = self::BYTE_LIMIT;
49
50
    /**
51
     * PDO driver
52
     * @var string
53
     */
54
    private $driver;
55
56
    /**
57
     * Cache constructor.
58
     *
59
     * @param PDO $pdo
60
     * @param array $curlOptions
61
     * @param int|null $byteLimit
62
     */
63
    public function __construct(PDO $pdo, array $curlOptions = [], $byteLimit = self::BYTE_LIMIT)
64
    {
65
        $this->pdo = $this->pdoInitialize($pdo);
66
        $this->curlOptions = $curlOptions;
67
        $this->byteLimit = $byteLimit;
68
    }
69
70
    /**
71
     * Initialize PDO connection
72
     *
73
     * @param PDO $pdo
74
     * @return PDO
75
     * @throws SQLException
76
     */
77
    private function pdoInitialize(PDO $pdo)
78
    {
79
        if ($pdo->getAttribute(PDO::ATTR_ERRMODE) === PDO::ERRMODE_SILENT) {
80
            $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
81
        }
82
        $pdo->setAttribute(PDO::ATTR_CASE, PDO::CASE_NATURAL);
83
        $pdo->setAttribute(PDO::ATTR_ORACLE_NULLS, PDO::NULL_NATURAL);
84
        $pdo->exec('SET NAMES ' . self::SQL_ENCODING);
85
        $this->driver = $pdo->getAttribute(PDO::ATTR_DRIVER_NAME);
86
        if (!in_array($this->driver, self::SUPPORTED_DRIVERS)) {
87
            throw new SQLException('Unsupported database. ' . self::README_SQL_CACHE);
88
        }
89
        try {
90
            $pdo->query("SELECT 1 FROM robotstxt__cache1 LIMIT 1;");
91
        } catch (\Exception $exception1) {
92
            try {
93
                $pdo->query(file_get_contents(__DIR__ . '/../res/cache.sql'));
94
            } catch (\Exception $exception2) {
95
                throw new SQLException('Missing table `' . self::TABLE_CACHE . '`. Setup instructions: ' . self::README_SQL_CACHE);
96
            }
97
        }
98
        return $pdo;
99
    }
100
101
    /**
102
     * Parser client
103
     *
104
     * @param string $baseUri
105
     * @return TxtClient
106
     */
107
    public function client($baseUri)
108
    {
109
        $base = $this->urlBase($baseUri);
110
        $query = $this->pdo->prepare(<<<SQL
111
SELECT
112
  content,
113
  statusCode,
114
  nextUpdate,
115
  effective,
116
  worker,
117
  UNIX_TIMESTAMP()
118
FROM robotstxt__cache1
119
WHERE base = :base;
120
SQL
121
        );
122
        $query->bindParam(':base', $base, PDO::PARAM_STR);
123
        $query->execute();
124
        if ($query->rowCount() > 0) {
125
            $row = $query->fetch(PDO::FETCH_ASSOC);
126
            $this->clockSyncCheck($row['UNIX_TIMESTAMP()']);
127
            if ($row['nextUpdate'] > ($row['UNIX_TIMESTAMP()'] - $this->clientUpdateMargin)) {
128
                $this->markAsActive($base, $row['worker']);
129
                return new TxtClient($base, $row['statusCode'], $row['content'], self::ENCODING, $row['effective'], $this->byteLimit);
130
            }
131
        }
132
        $request = new UriClient($base, $this->curlOptions, $this->byteLimit);
133
        $this->push($request);
134
        $this->markAsActive($base);
135
        return new TxtClient($base, $request->getStatusCode(), $request->getContents(), $request->getEncoding(), $request->getEffectiveUri(), $this->byteLimit);
136
    }
137
138
    /**
139
     * Clock sync check
140
     *
141
     * @param int $time
142
     * @throws SQLException
143
     */
144
    private function clockSyncCheck($time)
145
    {
146
        if (abs(time() - $time) >= 10) {
147
            throw new SQLException('`PHP server` and `SQL server` timestamps are out of sync. Please fix!');
148
        }
149
    }
150
151
    /**
152
     * Mark robots.txt as active
153
     *
154
     * @param string $base
155
     * @param int|null $workerID
156
     * @return bool
157
     */
158 View Code Duplication
    private function markAsActive($base, $workerID = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
159
    {
160
        if ($workerID == 0) {
161
            $query = $this->pdo->prepare(<<<SQL
162
UPDATE robotstxt__cache1
163
SET worker = NULL
164
WHERE base = :base AND worker = 0;
165
SQL
166
            );
167
            $query->bindParam(':base', $base, PDO::PARAM_STR);
168
            return $query->execute();
169
        }
170
        return true;
171
    }
172
173
    /**
174
     * Update an robots.txt in the database
175
     *
176
     * @param UriClient $client
177
     * @return bool
178
     */
179
    private function push(UriClient $client)
180
    {
181
        $base = $client->getBaseUri();
182
        $statusCode = $client->getStatusCode();
183
        $nextUpdate = $client->nextUpdate();
184
        $effective = ($effective = $client->getEffectiveUri()) === $base ? null : $effective;
185
        if (
186
            stripos($base, 'http') === 0 &&
187
            (
188
                $statusCode === null ||
189
                (
190
                    $statusCode >= 500 &&
191
                    $statusCode < 600
192
                )
193
            ) &&
194
            $this->displacePush($base, $nextUpdate)
195
        ) {
196
            return true;
197
        }
198
        $validUntil = $client->validUntil();
199
        $content = $client->render();
200
        $query = $this->pdo->prepare(<<<SQL
201
INSERT INTO robotstxt__cache1 (base, content, statusCode, validUntil, nextUpdate, effective)
202
VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate, :effective)
203
ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil,
204
  nextUpdate = :nextUpdate, effective = :effective, worker = 0;
205
SQL
206
        );
207
        $query->bindParam(':base', $base, PDO::PARAM_STR);
208
        $query->bindParam(':content', $content, PDO::PARAM_STR);
209
        $query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT | PDO::PARAM_NULL);
210
        $query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT);
211
        $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
212
        $query->bindParam(':effective', $effective, PDO::PARAM_STR | PDO::PARAM_NULL);
213
        return $query->execute();
214
    }
215
216
    /**
217
     * Displace push timestamp
218
     *
219
     * @param string $base
220
     * @param int $nextUpdate
221
     * @return bool
222
     */
223
    private function displacePush($base, $nextUpdate)
224
    {
225
        $query = $this->pdo->prepare(<<<SQL
226
SELECT
227
  validUntil,
228
  UNIX_TIMESTAMP()
229
FROM robotstxt__cache1
230
WHERE base = :base;
231
SQL
232
        );
233
        $query->bindParam(':base', $base, PDO::PARAM_STR);
234
        $query->execute();
235
        if ($query->rowCount() > 0) {
236
            $row = $query->fetch(PDO::FETCH_ASSOC);
237
            $this->clockSyncCheck($row['UNIX_TIMESTAMP()']);
238
            if ($row['validUntil'] > $row['UNIX_TIMESTAMP()']) {
239
                $nextUpdate = min($row['validUntil'], $nextUpdate);
240
                $query = $this->pdo->prepare(<<<SQL
241
UPDATE robotstxt__cache1
242
SET nextUpdate = :nextUpdate, worker = NULL
243
WHERE base = :base;
244
SQL
245
                );
246
                $query->bindParam(':base', $base, PDO::PARAM_STR);
247
                $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
248
                return $query->execute();
249
            }
250
            $this->invalidate($base);
251
        }
252
        return false;
253
    }
254
255
    /**
256
     * Invalidate cache
257
     *
258
     * @param $baseUri
259
     * @return bool
260
     */
261
    public function invalidate($baseUri)
262
    {
263
        $base = $this->urlBase($baseUri);
264
        $query = $this->pdo->prepare(<<<SQL
265
DELETE FROM robotstxt__cache1
266
WHERE base = :base;
267
SQL
268
        );
269
        $query->bindParam(':base', $base, PDO::PARAM_STR);
270
        return $query->execute();
271
    }
272
273
    /**
274
     * Process the update queue
275
     *
276
     * @param float|int $targetTime
277
     * @param int|null $workerID
278
     * @return string[]|false
279
     * @throws ClientException
280
     */
281
    public function cron($targetTime = 60, $workerID = null)
282
    {
283
        $start = microtime(true);
284
        $worker = $this->setWorkerID($workerID);
285
        $log = [];
286
        $count = 1;
287
        while (
288
            $count > 0 &&
289
            $targetTime > microtime(true) - $start
290
        ) {
291
            $query = $this->pdo->prepare(<<<SQL
292
UPDATE robotstxt__cache1
293
SET worker = :workerID
294
WHERE worker IS NULL AND nextUpdate <= UNIX_TIMESTAMP()
295
ORDER BY nextUpdate ASC
296
LIMIT 1;
297
SELECT base
298
FROM robotstxt__cache1
299
WHERE worker = :workerID
300
LIMIT 100;
301
SQL
302
            );
303
            $query->bindParam(':workerID', $worker, PDO::PARAM_INT);
304
            $query->execute();
305
            if (($count = $query->rowCount()) > 0) {
306
                while (
307
                    $targetTime > microtime(true) - $start &&
308
                    ($row = $query->fetch(PDO::FETCH_ASSOC))
309
                ) {
310
                    if (!$this->push(new UriClient($row['base'], $this->curlOptions, $this->byteLimit))) {
311
                        throw new ClientException('Unable to update `' . $row['base'] . '`');
312
                    }
313
                    $log[] = $row['base'];
314
                }
315
            }
316
        }
317
        return $log;
318
    }
319
320
    /**
321
     * Set WorkerID
322
     *
323
     * @param int|null $workerID
324
     * @return int
325
     */
326
    protected function setWorkerID($workerID = null)
327
    {
328
        if (
329
            is_int($workerID) &&
330
            $workerID <= 255 &&
331
            $workerID >= 1
332
        ) {
333
            return $workerID;
334
        } elseif ($workerID !== null) {
335
            trigger_error('WorkerID out of range (1-255)', E_USER_WARNING);
336
        }
337
        return rand(1, 255);
338
    }
339
340
    /**
341
     * Clean the cache table
342
     *
343
     * @param int $delay - in seconds
344
     * @return bool
345
     */
346 View Code Duplication
    public function clean($delay = 600)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
347
    {
348
        $delay = self::CACHE_TIME + $delay;
349
        $query = $this->pdo->prepare(<<<SQL
350
DELETE FROM robotstxt__cache1
351
WHERE worker = 0 AND nextUpdate < (UNIX_TIMESTAMP() - :delay);
352
SQL
353
        );
354
        $query->bindParam(':delay', $delay, PDO::PARAM_INT);
355
        return $query->execute();
356
    }
357
}
358