Completed
Push — master ( 9dc7a5...9e19cf )
by Denis
01:42
created

sp.php (5 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
require_once __DIR__ . '/config.php';
3
4
use Symfony\Component\Console\Helper\ProgressBar;
5
use Symfony\Component\Console\Output\OutputInterface;
6
7
define('PID', getmypid());
8
/**
9
 * @param array $pages
10
 * @param string $task
11
 * @param $output OutputInterface
12
 */
13
function runProcesses($pages, $task, &$output)
14
{
15
    $pdo = new PDO(DSN);
16
    $pdo->query('DROP TABLE IF EXISTS pages');
17
    $pdo->query('CREATE TABLE pages (url TEXT NOT NULL, status TEXT, scope INTEGER NOT NULL)');
18
19
    for ($i = 0; $i < PROCESSES; $i++) {
20
        $sliceSize = ceil(count($pages) / PROCESSES);
21
        $slice = array_slice($pages, $i * $sliceSize, $sliceSize);
22
23
        foreach (array_chunk($slice, 999) as $part) {
24
            $values = implode(',', array_pad([], count($part), "(?,$i)"));
25
            $stmt = $pdo->prepare("INSERT INTO pages (url, scope) VALUES $values");
26
            if (!$stmt) {
27
                fwrite(STDERR, $pdo->errorInfo()[2] . PHP_EOL);
28
            }
29
            $stmt->execute($part);
30
            unset($stmt);
31
        }
32
33
        $cPath = __DIR__ . '/_process.php';
34
        execInBackground("php $cPath -s $i -p " . PID . " -t $task");
35
    }
36
37
    $do = count($pages);
38
    $done = 0;
39
    $progress = new ProgressBar($output, $do);
40
    $progress->setRedrawFrequency(10);
41
    $progress->setFormatDefinition('custom', ' %current%/%max% %bar% %message%');
42
    $progress->setFormat('custom');
43
    $progress->setMessage('');
44
    $progress->start();
45
    while ($done < $do) {
46
        $done = $pdo->query('SELECT COUNT(status) FROM pages WHERE status IS NOT NULL')->fetchColumn();
47
        $codes = $pdo->query('SELECT status, COUNT(status) AS count FROM pages WHERE status IS NOT NULL GROUP BY status')->fetchAll(PDO::FETCH_ASSOC);
48
        $message = [];
49
        foreach ($codes as $code) {
50
            $message[] = str_replace(200, 'ok', $code['status']) . ': ' . $code['count'];
51
        }
52
        $progress->setMessage(implode(' ', $message));
53
        $progress->setProgress($done);
54
        usleep(500000);
55
    }
56
    $progress->finish();
57
}
58
59
/**
60
 * @param $website_url
61
 * @param $output OutputInterface
62
 * @return array
63
 */
64
function getPages($website_url, &$output)
65
{
66
    $output->writeln("  Getting sitemap of $website_url ...");
67
    $sitemap = simplexml_load_file($website_url . '/sitemap.xml');
68
69
    if ($sitemap === false) {
70
        $output->writeln("Can't get $website_url . /sitemap.xml");
71
        exit;
0 ignored issues
show
Coding Style Compatibility introduced by
The function getPages() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
72
    }
73
74
    $pages = [];
75
    foreach ($sitemap as $url) {
76
        $pages[] = urldecode($url->loc);
77
    }
78
    return $pages;
79
}
80
81
/**
82
 * @param $cmd string
83
 */
84
function execInBackground($cmd)
85
{
86
    if (substr(php_uname(), 0, 7) == "Windows") {
87
        pclose(popen("start /B " . $cmd, "r"));
88
    } else {
89
        exec($cmd . " > /dev/null 2>/dev/null &");
90
    }
91
}
92
93
94
$app = new Silly\Application();
95
96
$app->command('check website_url', function ($website_url, OutputInterface $output) {
97
    $pages = getPages($website_url, $output);
98
    runProcesses($pages, 'check', $output);
0 ignored issues
show
It seems like $pages defined by getPages($website_url, $output) on line 97 can also be of type null; however, runProcesses() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
99
});
100
101
$app->command('links website_url', function ($website_url, OutputInterface $output) {
102
    $pages = getPages($website_url, $output);
103
104
    $pdo = new PDO(DSN);
105
    $pdo->query('DROP TABLE IF EXISTS links');
106
    $pdo->query('CREATE TABLE links (url TEXT NOT NULL, status TEXT)');
107
    $pdo->query('CREATE UNIQUE INDEX links_url_uindex ON links (url);');
108
    unset($pdo);
109
110
    runProcesses($pages, 'parse-links', $output);
0 ignored issues
show
It seems like $pages defined by getPages($website_url, $output) on line 102 can also be of type null; however, runProcesses() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
111
112
    $pdo = new PDO(DSN);
113
//    $pdo->query("DELETE FROM links WHERE url NOT LIKE 'http%'");
114
    $pages = $pdo->query('SELECT url FROM links')->fetchAll(PDO::FETCH_COLUMN);
115
    unset($pdo);
116
117
    $output->writeln('');
118
    $output->writeln('  Checking founded links...');
119
120
    runProcesses($pages, 'check', $output);
121
});
122
123
$app->command('metadata website_url', function ($website_url, OutputInterface $output) {
124
    /**
125
     * @param array $tree
126
     * @param callable $function
127
     * @param int $level
128
     * @param string $path
129
     */
130
    function walker($tree, $function, $level = 0, $path = '')
131
    {
132
        foreach ($tree as $branchName => $branch) {
133
            if (isset($branch['_self'])) {
134
                $function($branch['_self'], $level, $path);
135
                unset($branch['_self']);
136
            }
137
            if (count($branch) > 0)
138
                walker($branch, $function, $level + 1, "$path/$branchName");
139
        }
140
    }
141
142
    $output->writeln("  Getting sitemap of $website_url ...");
143
    $sitemap = simplexml_load_file($website_url . '/sitemap.xml');
144
145
    $resultFile = __DIR__ . '/runtime/' . parse_url($website_url, PHP_URL_HOST) . '-metadata.csv';
146
147
    $paths = [];
148
    foreach ($sitemap as $url) {
149
        $paths[] = parse_url($url->loc, PHP_URL_PATH);
150
    }
151
    natsort($paths);
152
153
    $tree = [];
154
    foreach ($paths as $path) {
155
        $levels = explode('/', $path);
156
        $temp = &$tree;
157
        foreach ($levels as $key => $level) {
158
            // в условии неочевидное преобразование для анализа ссылки на главную страницу
159
            if (!empty($level) OR (empty(array_filter($levels)) AND $level = '/')) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as or instead of || is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
160
                if (!isset($temp[$level]))
161
                    $temp[$level] = [];
162
163
                if ($key == (count($levels) - 1))
164
                    $temp[$level]['_self'] = ['path' => $path];
165
166
                $temp = &$temp[$level];
167
            }
168
        }
169
    }
170
    unset($temp);
171
172
    file_put_contents($resultFile, 'URL, Title, Keywords, Description, "Build Time: ' . date('r') . '"' . PHP_EOL);
173
    $previous = '';
174
    $progress = new \cli\progress\Bar(' Getting meta data', count($paths), 1000);
175
    walker($tree, function (&$self, $level, $path) use ($website_url, &$previous, $resultFile, &$progress) {
176
        $data = [];
177
        $page = file_get_contents($website_url . $self['path']);
178
        preg_match('~<title>(.*?)</title>~', $page, $temp);
179
        $data[] = $temp[1] ?? '';
180
        preg_match('~<meta name="keywords" content="(.*?)">~', $page, $temp);
181
        $data[] = $temp[1] ?? '';
182
        preg_match('~<meta name="description" content="(.*?)">~', $page, $temp);
183
        $data[] = $temp[1] ?? '';
184
185
        if ($data == $previous) {
186
            foreach ($data as &$item)
187
                $item = '--//--';
188
        } else
189
            $previous = $data;
190
191
        $row = [];
192
        $row[] = $website_url . $self['path'];
193
        $row = array_merge($row, $data);
194
195
        foreach ($row as &$item)
196
            $item = '"' . $item . '"';
197
198
        $line = implode(',', $row) . PHP_EOL;
199
        file_put_contents($resultFile, $line, FILE_APPEND);
200
        $progress->tick();
201
    });
202
203
    $progress->finish();
204
});
205
206
/** @noinspection PhpUnhandledExceptionInspection */
207
$app->run();