Issues (9)

_process.php (4 issues)

Labels
Severity
1
<?php
2
3
use DenisBeliaev\SitemapParser\Link;
4
use DenisBeliaev\SitemapParser\Page;
5
6
require_once __DIR__ . '/config.php';
7
8
$opts = getopt('s:p:t:');
9
10
$scope = $opts['s'];
11
$pPID = $opts['p'] ?? null;
12
$task = $opts['t'] ?? 'check';
13
14
$pdo = new PDO(DSN);
15
16
$stm = $pdo->prepare('SELECT url FROM pages WHERE scope=:scope');
17
$stm->execute([':scope' => $scope]);
18
$items = $stm->fetchAll(PDO::FETCH_COLUMN);
19
unset($stm);
20
21
$ch = curl_init();
22
if ($task == 'check') {
23
    curl_setopt($ch, CURLOPT_NOBODY, true);
0 ignored issues
show
It seems like $ch can also be of type false; however, parameter $ch of curl_setopt() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

23
    curl_setopt(/** @scrutinizer ignore-type */ $ch, CURLOPT_NOBODY, true);
Loading history...
24
} else if ($task == 'parse-links') {
25
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
26
}
27
foreach ($items as $item) {
28
    curl_setopt($ch, CURLOPT_URL, $item);
29
    $httpCode = 'ERR';
30
    if ($content = curl_exec($ch)) {
0 ignored issues
show
It seems like $ch can also be of type false; however, parameter $ch of curl_exec() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

30
    if ($content = curl_exec(/** @scrutinizer ignore-type */ $ch)) {
Loading history...
31
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
0 ignored issues
show
It seems like $ch can also be of type false; however, parameter $ch of curl_getinfo() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

31
        $httpCode = curl_getinfo(/** @scrutinizer ignore-type */ $ch, CURLINFO_HTTP_CODE);
Loading history...
32
        if ($httpCode == 501 && $task == 'check') {
33
            curl_setopt($ch, CURLOPT_NOBODY, false);
34
            if ($content = curl_exec($ch)) {
35
                $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
36
            }
37
            curl_setopt($ch, CURLOPT_NOBODY, true);
38
        }
39
    }
40
41
    $pdo->setAttribute(PDO::ATTR_TIMEOUT, 100);
42
    $stm = $pdo->prepare('UPDATE pages SET status=:status WHERE url=:url');
43
    $stm->execute([':status' => $httpCode, ':url' => $item]);
44
    unset($stm);
45
46
    if ($task == 'parse-links') {
47
        $Page = new Page($content, $item);
48
        $links = $Page->links;
49
        foreach ($links as &$link) {
50
            $link = Link::normalize($link, $Page->base);
51
        }
52
        $links = array_unique(array_filter($links));
53
54
        foreach ($links as $key => &$link) {
55
            $link = [$item, $Page->links[$key], $link];
56
        }
57
        $links = array_filter($links, function ($value) use ($pdo) {
58
            $stm = $pdo->prepare('SELECT COUNT(url) FROM pages WHERE url=:url');
59
            $stm->execute([':url' => $value[2]]);
60
            $result = $stm->fetchColumn();
61
            return $result == 0;
62
        });
63
        if (!empty($links)) {
64
            $parameters = [];
65
            foreach ($links as $link) {
66
                $parameters[] = $link[0];
67
                $parameters[] = $link[1];
68
                $parameters[] = $link[2];
69
            }
70
71
            $pdo->setAttribute(PDO::ATTR_TIMEOUT, 100);
72
            $stm = $pdo->prepare('INSERT OR IGNORE INTO links (`page`, link, url) VALUES ' . implode(',', array_pad([], count($links), '(?,?,?)')));
73
            if ($stm == false) {
74
                fwrite(STDERR, $pdo->errorInfo()[2] . PHP_EOL);
75
                exit;
76
            }
77
            $stm->execute($parameters);
78
            unset($stm);
79
        }
80
    }
81
82
    if ($pPID && !isRunning($pPID)) {
83
        exit(1);
84
    }
85
}
86
87
unset($pdo);
88
curl_close($ch);
0 ignored issues
show
It seems like $ch can also be of type false; however, parameter $ch of curl_close() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

88
curl_close(/** @scrutinizer ignore-type */ $ch);
Loading history...
89
90
function isRunning($pid)
91
{
92
    if (function_exists('posix_kill')) {
93
        return posix_kill($pid, 0);
94
    }
95
    exec('ps -W -p ' . $pid, $out);
96
    return count($out) > 1;
97
}
98