1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Spatie\HttpStatusCheck; |
4
|
|
|
|
5
|
|
|
use Spatie\Crawler\Crawler; |
6
|
|
|
use GuzzleHttp\RequestOptions; |
7
|
|
|
use Spatie\Crawler\CrawlAllUrls; |
8
|
|
|
use Spatie\Crawler\CrawlInternalUrls; |
9
|
|
|
use Symfony\Component\Console\Command\Command; |
10
|
|
|
use Symfony\Component\Console\Input\InputOption; |
11
|
|
|
use Symfony\Component\Console\Input\InputArgument; |
12
|
|
|
use Symfony\Component\Console\Input\InputInterface; |
13
|
|
|
use Symfony\Component\Console\Output\OutputInterface; |
14
|
|
|
use Symfony\Component\Console\Question\ConfirmationQuestion; |
15
|
|
|
|
16
|
|
|
class ScanCommand extends Command |
17
|
|
|
{ |
18
|
|
|
protected function configure() |
19
|
|
|
{ |
20
|
|
|
$this->setName('scan') |
21
|
|
|
->setDescription('Check the http status code of all links on a website.') |
22
|
|
|
->addArgument( |
23
|
|
|
'url', |
24
|
|
|
InputArgument::REQUIRED, |
25
|
|
|
'The url to check' |
26
|
|
|
) |
27
|
|
|
->addOption( |
28
|
|
|
'concurrency', |
29
|
|
|
'c', |
30
|
|
|
InputOption::VALUE_REQUIRED, |
31
|
|
|
'The amount of concurrent connections to use', |
32
|
|
|
10 |
33
|
|
|
) |
34
|
|
|
->addOption( |
35
|
|
|
'output', |
36
|
|
|
'o', |
37
|
|
|
InputOption::VALUE_REQUIRED, |
38
|
|
|
'Log all non-2xx and non-3xx responses in this file' |
39
|
|
|
) |
40
|
|
|
->addOption( |
41
|
|
|
'dont-crawl-external-links', |
42
|
|
|
'x', |
43
|
|
|
InputOption::VALUE_NONE, |
44
|
|
|
'Dont crawl external links' |
45
|
|
|
) |
46
|
|
|
->addOption( |
47
|
|
|
'timeout', |
48
|
|
|
't', |
49
|
|
|
InputOption::VALUE_OPTIONAL, |
50
|
|
|
'The maximum number of seconds the request can take', |
51
|
|
|
10 |
52
|
|
|
) |
53
|
|
|
->addOption( |
54
|
|
|
'user-agent', |
55
|
|
|
'u', |
56
|
|
|
InputOption::VALUE_OPTIONAL, |
57
|
|
|
'The User Agent to pass for the request', |
58
|
|
|
'' |
59
|
|
|
) |
60
|
|
|
->addOption( |
61
|
|
|
'skip-verification', |
62
|
|
|
's', |
63
|
|
|
InputOption::VALUE_NONE, |
64
|
|
|
'Skips checking the SSL certificate' |
65
|
|
|
) |
66
|
|
|
->addOption( |
67
|
|
|
'options', |
68
|
|
|
'opt', |
69
|
|
|
InputOption::VALUE_IS_ARRAY | InputOption::VALUE_OPTIONAL, |
70
|
|
|
'Additional options to the request', |
71
|
|
|
[] |
72
|
|
|
); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* @param \Symfony\Component\Console\Input\InputInterface $input |
77
|
|
|
* @param \Symfony\Component\Console\Output\OutputInterface $output |
78
|
|
|
* |
79
|
|
|
* @return int |
80
|
|
|
*/ |
81
|
|
|
protected function execute(InputInterface $input, OutputInterface $output) |
82
|
|
|
{ |
83
|
|
|
$baseUrl = $input->getArgument('url'); |
84
|
|
|
$crawlProfile = $input->getOption('dont-crawl-external-links') ? new CrawlInternalUrls($baseUrl) : new CrawlAllUrls(); |
85
|
|
|
|
86
|
|
|
$output->writeln("Start scanning {$baseUrl}"); |
87
|
|
|
$output->writeln(''); |
88
|
|
|
|
89
|
|
|
$crawlLogger = new CrawlLogger($output); |
90
|
|
|
|
91
|
|
|
if ($input->getOption('output')) { |
92
|
|
|
$outputFile = $input->getOption('output'); |
93
|
|
|
|
94
|
|
|
if (file_exists($outputFile)) { |
95
|
|
|
$helper = $this->getHelper('question'); |
96
|
|
|
$question = new ConfirmationQuestion( |
97
|
|
|
"The output file `{$outputFile}` already exists. Overwrite it? (y/n)", |
98
|
|
|
false |
99
|
|
|
); |
100
|
|
|
|
101
|
|
|
if (! $helper->ask($input, $output, $question)) { |
102
|
|
|
$output->writeln('Aborting...'); |
103
|
|
|
|
104
|
|
|
return 0; |
105
|
|
|
} |
106
|
|
|
} |
107
|
|
|
|
108
|
|
|
$crawlLogger->setOutputFile($input->getOption('output')); |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
$clientOptions = [ |
112
|
|
|
RequestOptions::TIMEOUT => $input->getOption('timeout'), |
113
|
|
|
RequestOptions::VERIFY => ! $input->getOption('skip-verification'), |
114
|
|
|
]; |
115
|
|
|
|
116
|
|
|
$clientOptions = array_merge($clientOptions, $input->getOption('options')); |
117
|
|
|
|
118
|
|
|
if ($input->getOption('user-agent')) { |
119
|
|
|
$clientOptions[RequestOptions::HEADERS]['user-agent'] = $input->getOption('user-agent'); |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
Crawler::create($clientOptions) |
123
|
|
|
->setConcurrency($input->getOption('concurrency')) |
124
|
|
|
->setCrawlObserver($crawlLogger) |
125
|
|
|
->setCrawlProfile($crawlProfile) |
126
|
|
|
->startCrawling($baseUrl); |
127
|
|
|
|
128
|
|
|
return 0; |
129
|
|
|
} |
130
|
|
|
} |
131
|
|
|
|