Completed
Push — master ( f17301...aeaf79 )
by Jan-Petter
02:43
created

Cache::pdoInitialize()   B

Complexity

Conditions 5
Paths 8

Size

Total Lines 23
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 1 Features 0
Metric Value
c 2
b 1
f 0
dl 0
loc 23
rs 8.5906
cc 5
eloc 17
nc 8
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use PDO;
5
use vipnytt\RobotsTxtParser\Exceptions\SQLException;
6
use vipnytt\RobotsTxtParser\Parser\UrlParser;
7
use vipnytt\RobotsTxtParser\SQL\SQLInterface;
8
9
/**
10
 * Class Cache
11
 *
12
 * @package vipnytt\RobotsTxtParser
13
 */
14
class Cache implements RobotsTxtInterface, SQLInterface
15
{
16
    use UrlParser;
17
18
    /**
19
     * Supported database drivers
20
     */
21
    const SUPPORTED_DRIVERS = [
22
        self::DRIVER_MYSQL,
23
    ];
24
25
    /**
26
     * Database connection
27
     * @var PDO
28
     */
29
    private $pdo;
30
31
    /**
32
     * GuzzleHTTP config
33
     * @var array
34
     */
35
    private $guzzleConfig = [];
36
37
    /**
38
     * Byte limit
39
     * @var int|null
40
     */
41
    private $byteLimit = self::BYTE_LIMIT;
42
43
    /**
44
     * Client nextUpdate margin in seconds
45
     * @var int
46
     */
47
    private $clientUpdateMargin = 300;
48
49
    /**
50
     * PDO driver
51
     * @var string
52
     */
53
    private $driver;
54
55
    /**
56
     * Cache constructor.
57
     *
58
     * @param PDO $pdo
59
     * @param array $guzzleConfig
60
     * @param int|null $byteLimit
61
     */
62
    public function __construct(PDO $pdo, array $guzzleConfig = [], $byteLimit = self::BYTE_LIMIT)
63
    {
64
        $this->pdo = $this->pdoInitialize($pdo);
65
        $this->guzzleConfig = $guzzleConfig;
66
        $this->byteLimit = $byteLimit;
67
    }
68
69
    /**
70
     * Initialize PDO connection
71
     *
72
     * @param PDO $pdo
73
     * @return PDO
74
     * @throws SQLException
75
     */
76
    private function pdoInitialize(PDO $pdo)
77
    {
78
        if ($pdo->getAttribute(PDO::ATTR_ERRMODE) === PDO::ERRMODE_SILENT) {
79
            $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
80
        }
81
        $pdo->setAttribute(PDO::ATTR_CASE, PDO::CASE_NATURAL);
82
        $pdo->setAttribute(PDO::ATTR_ORACLE_NULLS, PDO::NULL_NATURAL);
83
        $pdo->exec('SET NAMES ' . self::SQL_ENCODING);
84
        $this->driver = $pdo->getAttribute(PDO::ATTR_DRIVER_NAME);
85
        if (!in_array($this->driver, self::SUPPORTED_DRIVERS)) {
86
            throw new SQLException('Unsupported database. ' . self::README_SQL_CACHE);
87
        }
88
        try {
89
            $pdo->query("SELECT 1 FROM robotstxt__cache0 LIMIT 1;");
90
        } catch (\Exception $exception1) {
91
            try {
92
                $pdo->query(file_get_contents(__DIR__ . '/SQL/cache.sql'));
93
            } catch (\Exception $exception2) {
94
                throw new SQLException('Missing table `' . self::TABLE_CACHE . '`. Setup instructions: ' . self::README_SQL_CACHE);
95
            }
96
        }
97
        return $pdo;
98
    }
99
100
    /**
101
     * Parser client
102
     *
103
     * @param string $baseUri
104
     * @return TxtClient
105
     */
106
    public function client($baseUri)
107
    {
108
        $base = $this->urlBase($this->urlEncode($baseUri));
109
        $query = $this->pdo->prepare(<<<SQL
110
SELECT
111
  content,
112
  statusCode,
113
  nextUpdate,
114
  worker,
115
  UNIX_TIMESTAMP()
116
FROM robotstxt__cache0
117
WHERE base = :base;
118
SQL
119
        );
120
        $query->bindParam(':base', $base, PDO::PARAM_STR);
121
        $query->execute();
122
        if ($query->rowCount() > 0) {
123
            $row = $query->fetch(PDO::FETCH_ASSOC);
124
            $this->clockSyncCheck($row['UNIX_TIMESTAMP()']);
125
            if ($row['nextUpdate'] > ($row['UNIX_TIMESTAMP()'] - $this->clientUpdateMargin)) {
126
                $this->markAsActive($base, $row['worker']);
127
                return new TxtClient($base, $row['statusCode'], $row['content'], self::ENCODING, $this->byteLimit);
128
            }
129
        }
130
        $request = new UriClient($base, $this->guzzleConfig, $this->byteLimit);
131
        $this->push($request);
132
        $this->markAsActive($base);
133
        return new TxtClient($base, $request->getStatusCode(), $request->getContents(), self::ENCODING, $this->byteLimit);
134
    }
135
136
    /**
137
     * Clock sync check
138
     *
139
     * @param int $time
140
     * @throws SQLException
141
     */
142
    private function clockSyncCheck($time)
143
    {
144
        if (abs(time() - $time) > 10) {
145
            throw new SQLException('`PHP server` and `SQL server` timestamps are out of sync. Please fix!');
146
        }
147
    }
148
149
    /**
150
     * Mark robots.txt as active
151
     *
152
     * @param string $base
153
     * @param int|null $workerID
154
     * @return bool
155
     */
156 View Code Duplication
    private function markAsActive($base, $workerID = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
157
    {
158
        if ($workerID == 0) {
159
            $query = $this->pdo->prepare(<<<SQL
160
UPDATE robotstxt__cache0
161
SET worker = NULL
162
WHERE base = :base AND worker = 0;
163
SQL
164
            );
165
            $query->bindParam(':base', $base, PDO::PARAM_STR);
166
            return $query->execute();
167
        }
168
        return true;
169
    }
170
171
    /**
172
     * Update an robots.txt in the database
173
     *
174
     * @param UriClient $client
175
     * @return bool
176
     */
177
    private function push(UriClient $client)
178
    {
179
        $base = $client->getBaseUri();
180
        $statusCode = $client->getStatusCode();
181
        $nextUpdate = $client->nextUpdate();
182
        if (
183
            $statusCode >= 500 &&
184
            $statusCode < 600 &&
185
            mb_stripos($base, 'http') === 0 &&
186
            $this->displacePush($base, $nextUpdate)
187
        ) {
188
            return true;
189
        }
190
        $validUntil = $client->validUntil();
191
        $content = $client->render();
192
        $query = $this->pdo->prepare(<<<SQL
193
INSERT INTO robotstxt__cache0 (base, content, statusCode, validUntil, nextUpdate)
194
VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate)
195
ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil,
196
  nextUpdate = :nextUpdate, worker = 0;
197
SQL
198
        );
199
        $query->bindParam(':base', $base, PDO::PARAM_STR);
200
        $query->bindParam(':content', $content, PDO::PARAM_STR);
201
        $query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT);
202
        $query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT);
203
        $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
204
        return $query->execute();
205
    }
206
207
    /**
208
     * Displace push timestamp
209
     *
210
     * @param string $base
211
     * @param int $nextUpdate
212
     * @return bool
213
     */
214
    private function displacePush($base, $nextUpdate)
215
    {
216
        $query = $this->pdo->prepare(<<<SQL
217
SELECT
218
  validUntil,
219
  UNIX_TIMESTAMP()
220
FROM robotstxt__cache0
221
WHERE base = :base;
222
SQL
223
        );
224
        $query->bindParam(':base', $base, PDO::PARAM_STR);
225
        $query->execute();
226
        if ($query->rowCount() > 0) {
227
            $row = $query->fetch(PDO::FETCH_ASSOC);
228
            $this->clockSyncCheck($row['UNIX_TIMESTAMP()']);
229
            if ($row['validUntil'] > $row['UNIX_TIMESTAMP()']) {
230
                $nextUpdate = min($row['validUntil'], $nextUpdate);
231
                $query = $this->pdo->prepare(<<<SQL
232
UPDATE robotstxt__cache0
233
SET nextUpdate = :nextUpdate, worker = NULL
234
WHERE base = :base;
235
SQL
236
                );
237
                $query->bindParam(':base', $base, PDO::PARAM_STR);
238
                $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
239
                return $query->execute();
240
            }
241
        }
242
        return false;
243
    }
244
245
    /**
246
     * Process the update queue
247
     *
248
     * @param int|null $workerID
249
     * @return bool
250
     */
251
    public function cron($workerID = null)
252
    {
253
        $worker = $this->setWorkerID($workerID);
254
        $result = true;
255
        while ($result) {
256
            $query = $this->pdo->prepare(<<<SQL
257
UPDATE robotstxt__cache0
258
SET worker = :workerID
259
WHERE worker IS NULL AND nextUpdate <= UNIX_TIMESTAMP()
260
ORDER BY nextUpdate ASC
261
LIMIT 1;
262
SELECT base
263
FROM robotstxt__cache0
264
WHERE worker = :workerID;
265
SQL
266
            );
267
            $query->bindParam(':workerID', $worker, PDO::PARAM_INT);
268
            $query->execute();
269
            if ($query->rowCount() > 0) {
270
                while ($row = $query->fetch(PDO::FETCH_ASSOC)) {
271
                    $result = $this->push(new UriClient($row['base'], $this->guzzleConfig, $this->byteLimit));
272
                }
273
                continue;
274
            }
275
            return true;
276
        }
277
        return false;
278
    }
279
280
    /**
281
     * Set WorkerID
282
     *
283
     * @param int|null $workerID
284
     * @return int
285
     */
286
    protected function setWorkerID($workerID = null)
287
    {
288
        if (
289
            is_int($workerID) &&
290
            $workerID <= 255 &&
291
            $workerID >= 1
292
        ) {
293
            return $workerID;
294
        } elseif ($workerID !== null) {
295
            trigger_error('WorkerID out of range (1-255)', E_USER_WARNING);
296
        }
297
        return rand(1, 255);
298
    }
299
300
    /**
301
     * Clean the cache table
302
     *
303
     * @param int $delay - in seconds
304
     * @return bool
305
     */
306 View Code Duplication
    public function clean($delay = 600)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
307
    {
308
        $delay = self::CACHE_TIME + $delay;
309
        $query = $this->pdo->prepare(<<<SQL
310
DELETE FROM robotstxt__cache0
311
WHERE worker = 0 AND nextUpdate < (UNIX_TIMESTAMP() - :delay);
312
SQL
313
        );
314
        $query->bindParam(':delay', $delay, PDO::PARAM_INT);
315
        return $query->execute();
316
    }
317
318
    /**
319
     * Invalidate cache
320
     *
321
     * @param $baseUri
322
     * @return bool
323
     */
324
    public function invalidate($baseUri)
325
    {
326
        $base = $this->urlBase($this->urlEncode($baseUri));
327
        $query = $this->pdo->prepare(<<<SQL
328
DELETE FROM robotstxt__cache0
329
WHERE base = :base;
330
SQL
331
        );
332
        $query->bindParam(':base', $base, PDO::PARAM_STR);
333
        return $query->execute();
334
    }
335
}
336