These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
0 ignored issues
–
show
|
|||
2 | require_once __DIR__ . '/config.php'; |
||
3 | |||
4 | use Symfony\Component\Console\Helper\ProgressBar; |
||
5 | use Symfony\Component\Console\Output\OutputInterface; |
||
6 | |||
7 | define('PID', getmypid()); |
||
8 | /** |
||
9 | * @param array $pages |
||
10 | * @param string $task |
||
11 | * @param $output OutputInterface |
||
12 | */ |
||
13 | function runProcesses($pages, $task, &$output) |
||
14 | { |
||
15 | $pdo = new PDO(DSN); |
||
16 | $pdo->query('DROP TABLE IF EXISTS pages'); |
||
17 | $pdo->query('CREATE TABLE pages (url TEXT NOT NULL, status TEXT, scope INTEGER NOT NULL)'); |
||
18 | |||
19 | for ($i = 0; $i < PROCESSES; $i++) { |
||
20 | $sliceSize = ceil(count($pages) / PROCESSES); |
||
21 | $slice = array_slice($pages, $i * $sliceSize, $sliceSize); |
||
22 | |||
23 | foreach (array_chunk($slice, 999) as $part) { |
||
24 | $values = implode(',', array_pad([], count($part), "(?,$i)")); |
||
25 | $stmt = $pdo->prepare("INSERT INTO pages (url, scope) VALUES $values"); |
||
26 | if (!$stmt) { |
||
27 | fwrite(STDERR, $pdo->errorInfo()[2] . PHP_EOL); |
||
28 | } |
||
29 | $stmt->execute($part); |
||
30 | unset($stmt); |
||
31 | } |
||
32 | |||
33 | $cPath = __DIR__ . '/_process.php'; |
||
34 | execInBackground("php $cPath -s $i -p " . PID . " -t $task"); |
||
35 | } |
||
36 | |||
37 | $do = count($pages); |
||
38 | $done = 0; |
||
39 | $progress = new ProgressBar($output, $do); |
||
40 | $progress->setRedrawFrequency(10); |
||
41 | $progress->setFormatDefinition('custom', ' %current%/%max% %bar% %message%'); |
||
42 | $progress->setFormat('custom'); |
||
43 | $progress->setMessage(''); |
||
44 | $progress->start(); |
||
45 | while ($done < $do) { |
||
46 | $done = $pdo->query('SELECT COUNT(status) FROM pages WHERE status IS NOT NULL')->fetchColumn(); |
||
47 | $codes = $pdo->query('SELECT status, COUNT(status) AS count FROM pages WHERE status IS NOT NULL GROUP BY status')->fetchAll(PDO::FETCH_ASSOC); |
||
48 | $message = []; |
||
49 | foreach ($codes as $code) { |
||
50 | $message[] = str_replace(200, 'ok', $code['status']) . ': ' . $code['count']; |
||
51 | } |
||
52 | $progress->setMessage(implode(' ', $message)); |
||
53 | $progress->setProgress($done); |
||
54 | usleep(500000); |
||
55 | } |
||
56 | $progress->finish(); |
||
57 | } |
||
58 | |||
59 | /** |
||
60 | * @param $website_url |
||
61 | * @param $output OutputInterface |
||
62 | * @return array |
||
63 | * @throws Exception |
||
64 | */ |
||
65 | function getPages($website_url, &$output) |
||
66 | { |
||
67 | $output->writeln(" Getting sitemap of $website_url ..."); |
||
68 | $sitemap = simplexml_load_file($website_url . '/sitemap.xml'); |
||
69 | |||
70 | if ($sitemap === false) { |
||
71 | throw new Exception("Can't get $website_url . /sitemap.xml"); |
||
72 | } |
||
73 | |||
74 | $pages = []; |
||
75 | foreach ($sitemap as $url) { |
||
76 | $pages[] = urldecode($url->loc); |
||
77 | } |
||
78 | return $pages; |
||
79 | } |
||
80 | |||
81 | /** |
||
82 | * @param $cmd string |
||
83 | */ |
||
84 | function execInBackground($cmd) |
||
85 | { |
||
86 | if (substr(php_uname(), 0, 7) == "Windows") { |
||
87 | pclose(popen("start /B " . $cmd, "r")); |
||
88 | } else { |
||
89 | exec($cmd . " > /dev/null 2>/dev/null &"); |
||
90 | } |
||
91 | } |
||
92 | |||
93 | |||
94 | $app = new Silly\Application(); |
||
95 | |||
96 | $app->command('check website_url', function ($website_url, OutputInterface $output) { |
||
97 | try { |
||
98 | $pages = getPages($website_url, $output); |
||
99 | } catch (Exception $exception) { |
||
100 | $output->writeln($exception->getMessage()); |
||
101 | exit; |
||
102 | } |
||
103 | runProcesses($pages, 'check', $output); |
||
104 | }); |
||
105 | |||
106 | $app->command('links website_url', function ($website_url, OutputInterface $output) { |
||
107 | try { |
||
108 | $pages = getPages($website_url, $output); |
||
109 | } catch (Exception $exception) { |
||
110 | $output->writeln($exception->getMessage()); |
||
111 | exit; |
||
112 | }; |
||
113 | |||
114 | $pdo = new PDO(DSN); |
||
115 | $pdo->query('DROP TABLE IF EXISTS links'); |
||
116 | $pdo->query('CREATE TABLE links (url TEXT NOT NULL, status TEXT)'); |
||
117 | $pdo->query('CREATE UNIQUE INDEX links_url_uindex ON links (url);'); |
||
118 | unset($pdo); |
||
119 | |||
120 | runProcesses($pages, 'parse-links', $output); |
||
121 | |||
122 | $pdo = new PDO(DSN); |
||
123 | // $pdo->query("DELETE FROM links WHERE url NOT LIKE 'http%'"); |
||
0 ignored issues
–
show
Unused Code
Comprehensibility
introduced
by
75% of this comment could be valid code. Did you maybe forget this after debugging?
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it. The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production. This check looks for comments that seem to be mostly valid code and reports them. ![]() |
|||
124 | $pages = $pdo->query('SELECT url FROM links')->fetchAll(PDO::FETCH_COLUMN); |
||
125 | unset($pdo); |
||
126 | |||
127 | $output->writeln(''); |
||
128 | $output->writeln(' Checking founded links...'); |
||
129 | |||
130 | runProcesses($pages, 'check', $output); |
||
131 | }); |
||
132 | |||
133 | $app->command('metadata website_url', function ($website_url, OutputInterface $output) { |
||
134 | /** |
||
135 | * @param array $tree |
||
136 | * @param callable $function |
||
137 | * @param int $level |
||
138 | * @param string $path |
||
139 | */ |
||
140 | function walker($tree, $function, $level = 0, $path = '') |
||
141 | { |
||
142 | foreach ($tree as $branchName => $branch) { |
||
143 | if (isset($branch['_self'])) { |
||
144 | $function($branch['_self'], $level, $path); |
||
145 | unset($branch['_self']); |
||
146 | } |
||
147 | if (count($branch) > 0) |
||
148 | walker($branch, $function, $level + 1, "$path/$branchName"); |
||
149 | } |
||
150 | } |
||
151 | |||
152 | $output->writeln(" Getting sitemap of $website_url ..."); |
||
153 | $sitemap = simplexml_load_file($website_url . '/sitemap.xml'); |
||
154 | |||
155 | $resultFile = __DIR__ . '/runtime/' . parse_url($website_url, PHP_URL_HOST) . '-metadata.csv'; |
||
156 | |||
157 | $paths = []; |
||
158 | foreach ($sitemap as $url) { |
||
159 | $paths[] = parse_url($url->loc, PHP_URL_PATH); |
||
160 | } |
||
161 | natsort($paths); |
||
162 | |||
163 | $tree = []; |
||
164 | foreach ($paths as $path) { |
||
165 | $levels = explode('/', $path); |
||
166 | $temp = &$tree; |
||
167 | foreach ($levels as $key => $level) { |
||
168 | // в условии неочевидное преобразование для анализа ссылки на главную страницу |
||
169 | if (!empty($level) || (empty(array_filter($levels)) && $level = '/')) { |
||
170 | if (!isset($temp[$level])) |
||
171 | $temp[$level] = []; |
||
172 | |||
173 | if ($key == (count($levels) - 1)) |
||
174 | $temp[$level]['_self'] = ['path' => $path]; |
||
175 | |||
176 | $temp = &$temp[$level]; |
||
177 | } |
||
178 | } |
||
179 | } |
||
180 | unset($temp); |
||
181 | |||
182 | file_put_contents($resultFile, 'URL, Title, Keywords, Description, "Build Time: ' . date('r') . '"' . PHP_EOL); |
||
183 | $previous = ''; |
||
184 | $progress = new \cli\progress\Bar(' Getting meta data', count($paths), 1000); |
||
185 | walker($tree, function (&$self, $level, $path) use ($website_url, &$previous, $resultFile, &$progress) { |
||
0 ignored issues
–
show
|
|||
186 | $data = []; |
||
187 | $page = file_get_contents($website_url . $self['path']); |
||
188 | preg_match('~<title>(.*?)</title>~', $page, $temp); |
||
189 | $data[] = $temp[1] ?? ''; |
||
190 | preg_match('~<meta name="keywords" content="(.*?)">~', $page, $temp); |
||
191 | $data[] = $temp[1] ?? ''; |
||
192 | preg_match('~<meta name="description" content="(.*?)">~', $page, $temp); |
||
193 | $data[] = $temp[1] ?? ''; |
||
194 | |||
195 | if ($data == $previous) { |
||
196 | foreach ($data as &$item) |
||
197 | $item = '--//--'; |
||
198 | } else |
||
199 | $previous = $data; |
||
200 | |||
201 | $row = []; |
||
202 | $row[] = $website_url . $self['path']; |
||
203 | $row = array_merge($row, $data); |
||
204 | |||
205 | foreach ($row as &$item) |
||
206 | $item = '"' . $item . '"'; |
||
207 | |||
208 | $line = implode(',', $row) . PHP_EOL; |
||
209 | file_put_contents($resultFile, $line, FILE_APPEND); |
||
210 | $progress->tick(); |
||
211 | }); |
||
212 | |||
213 | $progress->finish(); |
||
214 | }); |
||
215 | |||
216 | /** @noinspection PhpUnhandledExceptionInspection */ |
||
217 | $app->run(); |
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.