1 | <?php |
||||
2 | |||||
3 | use DenisBeliaev\SitemapParser\Link; |
||||
4 | use DenisBeliaev\SitemapParser\Page; |
||||
5 | |||||
6 | require_once __DIR__ . '/config.php'; |
||||
7 | |||||
8 | $opts = getopt('s:p:t:'); |
||||
9 | |||||
10 | $scope = $opts['s']; |
||||
11 | $pPID = $opts['p'] ?? null; |
||||
12 | $task = $opts['t'] ?? 'check'; |
||||
13 | |||||
14 | $pdo = new PDO(DSN); |
||||
15 | |||||
16 | $stm = $pdo->prepare('SELECT url FROM pages WHERE scope=:scope'); |
||||
17 | $stm->execute([':scope' => $scope]); |
||||
18 | $items = $stm->fetchAll(PDO::FETCH_COLUMN); |
||||
19 | unset($stm); |
||||
20 | |||||
21 | $ch = curl_init(); |
||||
22 | if ($task == 'check') { |
||||
23 | curl_setopt($ch, CURLOPT_NOBODY, true); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
24 | } else if ($task == 'parse-links') { |
||||
25 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
||||
26 | } |
||||
27 | foreach ($items as $item) { |
||||
28 | curl_setopt($ch, CURLOPT_URL, $item); |
||||
29 | $httpCode = 'ERR'; |
||||
30 | if ($content = curl_exec($ch)) { |
||||
0 ignored issues
–
show
It seems like
$ch can also be of type false ; however, parameter $ch of curl_exec() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
31 | $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
||||
0 ignored issues
–
show
It seems like
$ch can also be of type false ; however, parameter $ch of curl_getinfo() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
32 | if ($httpCode == 501 && $task == 'check') { |
||||
33 | curl_setopt($ch, CURLOPT_NOBODY, false); |
||||
34 | if ($content = curl_exec($ch)) { |
||||
35 | $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
||||
36 | } |
||||
37 | curl_setopt($ch, CURLOPT_NOBODY, true); |
||||
38 | } |
||||
39 | } |
||||
40 | |||||
41 | $pdo->setAttribute(PDO::ATTR_TIMEOUT, 100); |
||||
42 | $stm = $pdo->prepare('UPDATE pages SET status=:status WHERE url=:url'); |
||||
43 | $stm->execute([':status' => $httpCode, ':url' => $item]); |
||||
44 | unset($stm); |
||||
45 | |||||
46 | if ($task == 'parse-links') { |
||||
47 | $Page = new Page($content, $item); |
||||
48 | $links = $Page->links; |
||||
49 | foreach ($links as &$link) { |
||||
50 | $link = Link::normalize($link, $Page->base); |
||||
51 | } |
||||
52 | $links = array_unique(array_filter($links)); |
||||
53 | |||||
54 | foreach ($links as $key => &$link) { |
||||
55 | $link = [$item, $Page->links[$key], $link]; |
||||
56 | } |
||||
57 | $links = array_filter($links, function ($value) use ($pdo) { |
||||
58 | $stm = $pdo->prepare('SELECT COUNT(url) FROM pages WHERE url=:url'); |
||||
59 | $stm->execute([':url' => $value[2]]); |
||||
60 | $result = $stm->fetchColumn(); |
||||
61 | return $result == 0; |
||||
62 | }); |
||||
63 | if (!empty($links)) { |
||||
64 | $parameters = []; |
||||
65 | foreach ($links as $link) { |
||||
66 | $parameters[] = $link[0]; |
||||
67 | $parameters[] = $link[1]; |
||||
68 | $parameters[] = $link[2]; |
||||
69 | } |
||||
70 | |||||
71 | $pdo->setAttribute(PDO::ATTR_TIMEOUT, 100); |
||||
72 | $stm = $pdo->prepare('INSERT OR IGNORE INTO links (`page`, link, url) VALUES ' . implode(',', array_pad([], count($links), '(?,?,?)'))); |
||||
73 | if ($stm == false) { |
||||
74 | fwrite(STDERR, $pdo->errorInfo()[2] . PHP_EOL); |
||||
75 | exit; |
||||
76 | } |
||||
77 | $stm->execute($parameters); |
||||
78 | unset($stm); |
||||
79 | } |
||||
80 | } |
||||
81 | |||||
82 | if ($pPID && !isRunning($pPID)) { |
||||
83 | exit(1); |
||||
84 | } |
||||
85 | } |
||||
86 | |||||
87 | unset($pdo); |
||||
88 | curl_close($ch); |
||||
0 ignored issues
–
show
It seems like
$ch can also be of type false ; however, parameter $ch of curl_close() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
89 | |||||
90 | function isRunning($pid) |
||||
91 | { |
||||
92 | if (function_exists('posix_kill')) { |
||||
93 | return posix_kill($pid, 0); |
||||
94 | } |
||||
95 | exec('ps -W -p ' . $pid, $out); |
||||
96 | return count($out) > 1; |
||||
97 | } |
||||
98 |