Completed
Push — master ( 1b5be9...a2b567 )
by Dev
02:00
created

Crawler.php (1 issue)

Labels
Severity
1
<?php
2
3
include 'vendor/autoload.php';
4
5
// ---------------
6
// Configure
7
// ---------------
8
$config = [
9
    'start' => [
10
        'alias' => 's',
11
        'help' => 'Define where the crawl start.',
12
    ],
13
    'limit' => [
14
        'alias' => 'l',
15
        'default' => 5,
16
        'help' => 'Define where a depth limit for the crawler (default 5).',
17
        'filter' => 'int',
18
    ],
19
    'ignore' => [
20
        'alias' => 'i',
21
        'default' => null,
22
        'help' => 'Virtual Robots.txt wich will be interpreted for this crawl (could be a string or an URL).',
23
    ],
24
    'user-agent' => [
25
        'alias' => 'u',
26
        'default' => 'PockectCr. From PiedWeb',
27
        'help' => 'Define the user-agent used during the crawl',
28
    ],
29
    'verbose' => [
30
        'alias' => 'v',
31
        'default' => 1,
32
        'help' => 'Display debugging information (0/1, default 1).',
33
        'filter' => 'int',
34
    ],
35
36
];
37
38
$CliArgs = new CliArgs\CliArgs($config);
39
if ($CliArgs->isFlagExists('help', 'h')) echo $CliArgs->getHelp('help');
40
41
$startUrl = $CliArgs->getArg('start');
42
$limit    = intval($CliArgs->getArg('limit'));
43
44
$ignore   = $CliArgs->getArg('ignore') ?? '';
45
if (filter_var($ignore, FILTER_VALIDATE_URL)) $ignore = \PiedWeb\Curl\Request::get($ignore);
46
47
$userAgent = (string) $CliArgs->getArg('user-agent');
48
$debug     = $CliArgs->getArg('verbose');
49
50
// ---------------
51
// Crawler working
52
// ---------------
53
if ($startUrl === null) {
54
    throw new \Exception('--start is required');
55
}
56
57
if ($debug) echo PHP_EOL.PHP_EOL.PHP_EOL.'    Crawl starting'.PHP_EOL.'    '.$startUrl.PHP_EOL.PHP_EOL.PHP_EOL.PHP_EOL;
58
59
$crawl = new \PiedWeb\SeoPocketCrawler\Crawler($startUrl, $ignore, $limit, $userAgent);
0 ignored issues
show
The type PiedWeb\SeoPocketCrawler\Crawler was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
60
$crawl->crawl($debug);
61
62
if ($debug) {
63
    echo PHP_EOL.PHP_EOL.'---------------'.PHP_EOL.PHP_EOL.'    '.'Crawl succeed'.PHP_EOL;
64
    echo '    '.'You can find your data in '.PHP_EOL.$crawl->getDataFolder().'/index.csv'.PHP_EOL;
65
    echo '    You can find source code from html crawled pages in '.$crawl->getCacheFolder();
66
    echo PHP_EOL.PHP_EOL;
67
}
68