Completed
Push — master ( 97cb3a...821cb6 )
by Jan-Petter
02:15
created

Cache   B

Complexity

Total Complexity 36

Size/Duplication

Total Lines 342
Duplicated Lines 7.31 %

Coupling/Cohesion

Components 2
Dependencies 5

Importance

Changes 9
Bugs 6 Features 0
Metric Value
wmc 36
c 9
b 6
f 0
lcom 2
cbo 5
dl 25
loc 342
rs 8.8

11 Methods

Rating   Name   Duplication   Size   Complexity  
A clockSyncCheck() 0 6 2
A markAsActive() 14 14 2
A __construct() 0 6 1
B pdoInitialize() 0 23 5
B client() 0 29 3
C push() 0 37 7
B displacePush() 0 31 3
A invalidate() 0 11 1
B cron() 0 36 6
B setWorkerID() 0 13 5
A clean() 11 11 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use PDO;
5
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
6
use vipnytt\RobotsTxtParser\Exceptions\SQLException;
7
use vipnytt\RobotsTxtParser\Parser\UriParser;
8
9
/**
10
 * Class Cache
11
 *
12
 * @see https://github.com/VIPnytt/RobotsTxtParser/blob/master/docs/methods/Cache.md for documentation
13
 * @package vipnytt\RobotsTxtParser
14
 */
15
class Cache implements RobotsTxtInterface, SQLInterface
16
{
17
    use UriParser;
18
19
    /**
20
     * Supported database drivers
21
     */
22
    const SUPPORTED_DRIVERS = [
23
        self::DRIVER_MYSQL,
24
    ];
25
26
    /**
27
     * Client nextUpdate margin in seconds
28
     * @var int
29
     */
30
    protected $clientUpdateMargin = 300;
31
32
    /**
33
     * Database handler
34
     * @var PDO
35
     */
36
    private $pdo;
37
38
    /**
39
     * cURL options
40
     * @var array
41
     */
42
    private $curlOptions = [];
43
44
    /**
45
     * Byte limit
46
     * @var int|null
47
     */
48
    private $byteLimit = self::BYTE_LIMIT;
49
50
    /**
51
     * PDO driver
52
     * @var string
53
     */
54
    private $driver;
55
56
    /**
57
     * Cache constructor.
58
     *
59
     * @param PDO $pdo
60
     * @param array $curlOptions
61
     * @param int|null $byteLimit
62
     */
63
    public function __construct(PDO $pdo, array $curlOptions = [], $byteLimit = self::BYTE_LIMIT)
64
    {
65
        $this->pdo = $this->pdoInitialize($pdo);
66
        $this->curlOptions = $curlOptions;
67
        $this->byteLimit = $byteLimit;
68
    }
69
70
    /**
71
     * Initialize PDO connection
72
     *
73
     * @param PDO $pdo
74
     * @return PDO
75
     * @throws SQLException
76
     */
77
    private function pdoInitialize(PDO $pdo)
78
    {
79
        if ($pdo->getAttribute(PDO::ATTR_ERRMODE) === PDO::ERRMODE_SILENT) {
80
            $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING);
81
        }
82
        $pdo->setAttribute(PDO::ATTR_CASE, PDO::CASE_NATURAL);
83
        $pdo->setAttribute(PDO::ATTR_ORACLE_NULLS, PDO::NULL_NATURAL);
84
        $pdo->exec('SET NAMES ' . self::SQL_ENCODING);
85
        $this->driver = $pdo->getAttribute(PDO::ATTR_DRIVER_NAME);
86
        if (!in_array($this->driver, self::SUPPORTED_DRIVERS)) {
87
            throw new SQLException('Unsupported database. ' . self::README_SQL_CACHE);
88
        }
89
        try {
90
            $pdo->query("SELECT 1 FROM robotstxt__cache1 LIMIT 1;");
91
        } catch (\Exception $exception1) {
92
            try {
93
                $pdo->query(file_get_contents(__DIR__ . '/../res/cache.sql'));
94
            } catch (\Exception $exception2) {
95
                throw new SQLException('Missing table `' . self::TABLE_CACHE . '`. Setup instructions: ' . self::README_SQL_CACHE);
96
            }
97
        }
98
        return $pdo;
99
    }
100
101
    /**
102
     * Parser client
103
     *
104
     * @param string $baseUri
105
     * @return TxtClient
106
     */
107
    public function client($baseUri)
108
    {
109
        $base = $this->urlBase($baseUri);
110
        $query = $this->pdo->prepare(<<<SQL
111
SELECT
112
  content,
113
  statusCode,
114
  nextUpdate,
115
  effective,
116
  worker,
117
  UNIX_TIMESTAMP()
118
FROM robotstxt__cache1
119
WHERE base = :base;
120
SQL
121
        );
122
        $query->bindParam(':base', $base, PDO::PARAM_STR);
123
        $query->execute();
124
        if ($query->rowCount() > 0) {
125
            $row = $query->fetch(PDO::FETCH_ASSOC);
126
            $this->clockSyncCheck($row['UNIX_TIMESTAMP()']);
127
            if ($row['nextUpdate'] > ($row['UNIX_TIMESTAMP()'] - $this->clientUpdateMargin)) {
128
                $this->markAsActive($base, $row['worker']);
129
                return new TxtClient($base, $row['statusCode'], $row['content'], self::ENCODING, $row['effective'], $this->byteLimit);
130
            }
131
        }
132
        $request = new UriClient($base, $this->curlOptions, $this->byteLimit);
133
        $this->push($request, null);
134
        return new TxtClient($base, $request->getStatusCode(), $request->getContents(), $request->getEncoding(), $request->getEffectiveUri(), $this->byteLimit);
135
    }
136
137
    /**
138
     * Clock sync check
139
     *
140
     * @param int $time
141
     * @throws SQLException
142
     */
143
    private function clockSyncCheck($time)
144
    {
145
        if (abs(time() - $time) >= 10) {
146
            throw new SQLException('`PHP server` and `SQL server` timestamps are out of sync. Please fix!');
147
        }
148
    }
149
150
    /**
151
     * Mark robots.txt as active
152
     *
153
     * @param string $base
154
     * @param int|null $workerID
155
     * @return bool
156
     */
157 View Code Duplication
    private function markAsActive($base, $workerID = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
158
    {
159
        if ($workerID == 0) {
160
            $query = $this->pdo->prepare(<<<SQL
161
UPDATE robotstxt__cache1
162
SET worker = NULL
163
WHERE base = :base AND worker = 0;
164
SQL
165
            );
166
            $query->bindParam(':base', $base, PDO::PARAM_STR);
167
            return $query->execute();
168
        }
169
        return true;
170
    }
171
172
    /**
173
     * Update an robots.txt in the database
174
     *
175
     * @param UriClient $client
176
     * @param int|null $worker
177
     * @return bool
178
     */
179
    private function push(UriClient $client, $worker = 0)
180
    {
181
        $base = $client->getBaseUri();
182
        $statusCode = $client->getStatusCode();
183
        $nextUpdate = $client->nextUpdate();
184
        $effective = ($effective = $client->getEffectiveUri()) === $base ? null : $effective;
185
        if (
186
            stripos($base, 'http') === 0 &&
187
            (
188
                $statusCode === null ||
189
                (
190
                    $statusCode >= 500 &&
191
                    $statusCode < 600
192
                )
193
            ) &&
194
            $this->displacePush($base, $nextUpdate)
195
        ) {
196
            return true;
197
        }
198
        $validUntil = $client->validUntil();
199
        $content = $client->render();
200
        $query = $this->pdo->prepare(<<<SQL
201
INSERT INTO robotstxt__cache1 (base, content, statusCode, validUntil, nextUpdate, effective)
202
VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate, :effective)
203
ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil,
204
  nextUpdate = :nextUpdate, effective = :effective, worker = :worker;
205
SQL
206
        );
207
        $query->bindParam(':base', $base, PDO::PARAM_STR);
208
        $query->bindParam(':content', $content, PDO::PARAM_STR);
209
        $query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT | PDO::PARAM_NULL);
210
        $query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT);
211
        $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
212
        $query->bindParam(':effective', $effective, PDO::PARAM_STR | PDO::PARAM_NULL);
213
        $query->bindParam(':worker', $worker, PDO::PARAM_INT | PDO::PARAM_NULL);
214
        return $query->execute();
215
    }
216
217
    /**
218
     * Displace push timestamp
219
     *
220
     * @param string $base
221
     * @param int $nextUpdate
222
     * @return bool
223
     */
224
    private function displacePush($base, $nextUpdate)
225
    {
226
        $query = $this->pdo->prepare(<<<SQL
227
SELECT
228
  validUntil,
229
  UNIX_TIMESTAMP()
230
FROM robotstxt__cache1
231
WHERE base = :base;
232
SQL
233
        );
234
        $query->bindParam(':base', $base, PDO::PARAM_STR);
235
        $query->execute();
236
        if ($query->rowCount() > 0) {
237
            $row = $query->fetch(PDO::FETCH_ASSOC);
238
            $this->clockSyncCheck($row['UNIX_TIMESTAMP()']);
239
            if ($row['validUntil'] > $row['UNIX_TIMESTAMP()']) {
240
                $nextUpdate = min($row['validUntil'], $nextUpdate);
241
                $query = $this->pdo->prepare(<<<SQL
242
UPDATE robotstxt__cache1
243
SET nextUpdate = :nextUpdate, worker = NULL
244
WHERE base = :base;
245
SQL
246
                );
247
                $query->bindParam(':base', $base, PDO::PARAM_STR);
248
                $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
249
                return $query->execute();
250
            }
251
            $this->invalidate($base);
252
        }
253
        return false;
254
    }
255
256
    /**
257
     * Invalidate cache
258
     *
259
     * @param $baseUri
260
     * @return bool
261
     */
262
    public function invalidate($baseUri)
263
    {
264
        $base = $this->urlBase($baseUri);
265
        $query = $this->pdo->prepare(<<<SQL
266
DELETE FROM robotstxt__cache1
267
WHERE base = :base;
268
SQL
269
        );
270
        $query->bindParam(':base', $base, PDO::PARAM_STR);
271
        return $query->execute();
272
    }
273
274
    /**
275
     * Process the update queue
276
     *
277
     * @param float|int $targetTime
278
     * @param int|null $workerID
279
     * @return string[]|false
280
     * @throws ClientException
281
     */
282
    public function cron($targetTime = 60, $workerID = null)
283
    {
284
        $start = microtime(true);
285
        $worker = $this->setWorkerID($workerID);
286
        $log = [];
287
        $lastCount = -1;
288
        while (
289
            $targetTime > microtime(true) - $start &&
290
            count($log) > $lastCount
291
        ) {
292
            $lastCount = count($log);
293
            $query = $this->pdo->prepare(<<<SQL
294
UPDATE robotstxt__cache1
295
SET worker = :workerID
296
WHERE worker IS NULL AND nextUpdate <= UNIX_TIMESTAMP()
297
ORDER BY nextUpdate ASC
298
LIMIT 1;
299
SELECT base
300
FROM robotstxt__cache1
301
WHERE worker = :workerID
302
LIMIT 10;
303
SQL
304
            );
305
            $query->bindParam(':workerID', $worker, PDO::PARAM_INT);
306
            $query->execute();
307
            if ($query->rowCount() > 0) {
308
                while ($row = $query->fetch(PDO::FETCH_ASSOC)) {
309
                    if (!$this->push(new UriClient($row['base'], $this->curlOptions, $this->byteLimit))) {
310
                        throw new ClientException('Unable to update `' . $row['base'] . '`');
311
                    }
312
                    $log[] = $row['base'];
313
                }
314
            }
315
        }
316
        return $log;
317
    }
318
319
    /**
320
     * Set WorkerID
321
     *
322
     * @param int|null $workerID
323
     * @return int
324
     */
325
    protected function setWorkerID($workerID = null)
326
    {
327
        if (
328
            is_int($workerID) &&
329
            $workerID <= 255 &&
330
            $workerID >= 1
331
        ) {
332
            return $workerID;
333
        } elseif ($workerID !== null) {
334
            trigger_error('WorkerID out of range (1-255)', E_USER_WARNING);
335
        }
336
        return rand(1, 255);
337
    }
338
339
    /**
340
     * Clean the cache table
341
     *
342
     * @param int $delay - in seconds
343
     * @return bool
344
     */
345 View Code Duplication
    public function clean($delay = 600)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
346
    {
347
        $delay = self::CACHE_TIME + $delay;
348
        $query = $this->pdo->prepare(<<<SQL
349
DELETE FROM robotstxt__cache1
350
WHERE worker = 0 AND nextUpdate < (UNIX_TIMESTAMP() - :delay);
351
SQL
352
        );
353
        $query->bindParam(':delay', $delay, PDO::PARAM_INT);
354
        return $query->execute();
355
    }
356
}
357