1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Spatie\HttpStatusCheck; |
4
|
|
|
|
5
|
|
|
use GuzzleHttp\RequestOptions; |
6
|
|
|
use Spatie\Crawler\CrawlAllUrls; |
7
|
|
|
use Spatie\Crawler\Crawler; |
8
|
|
|
use Spatie\Crawler\CrawlInternalUrls; |
9
|
|
|
use Symfony\Component\Console\Command\Command; |
10
|
|
|
use Symfony\Component\Console\Input\InputArgument; |
11
|
|
|
use Symfony\Component\Console\Input\InputInterface; |
12
|
|
|
use Symfony\Component\Console\Input\InputOption; |
13
|
|
|
use Symfony\Component\Console\Output\OutputInterface; |
14
|
|
|
use Symfony\Component\Console\Question\ConfirmationQuestion; |
15
|
|
|
|
16
|
|
|
class ScanCommand extends Command |
17
|
|
|
{ |
18
|
|
|
protected function configure() |
19
|
|
|
{ |
20
|
|
|
$this->setName('scan') |
21
|
|
|
->setDescription('Check the http status code of all links on a website.') |
22
|
|
|
->addArgument( |
23
|
|
|
'url', |
24
|
|
|
InputArgument::REQUIRED, |
25
|
|
|
'The url to check' |
26
|
|
|
) |
27
|
|
|
->addOption( |
28
|
|
|
'concurrency', |
29
|
|
|
'c', |
30
|
|
|
InputOption::VALUE_REQUIRED, |
31
|
|
|
'The amount of concurrent connections to use', |
32
|
|
|
10 |
33
|
|
|
) |
34
|
|
|
->addOption( |
35
|
|
|
'output', |
36
|
|
|
'o', |
37
|
|
|
InputOption::VALUE_REQUIRED, |
38
|
|
|
'Log all non-2xx and non-3xx responses in this file' |
39
|
|
|
) |
40
|
|
|
->addOption( |
41
|
|
|
'dont-crawl-external-links', |
42
|
|
|
'x', |
43
|
|
|
InputOption::VALUE_NONE, |
44
|
|
|
'Dont crawl external links' |
45
|
|
|
) |
46
|
|
|
->addOption( |
47
|
|
|
'timeout', |
48
|
|
|
't', |
49
|
|
|
InputOption::VALUE_OPTIONAL, |
50
|
|
|
'The maximum number of seconds the request can take', |
51
|
|
|
10 |
52
|
|
|
) |
53
|
|
|
->addOption( |
54
|
|
|
'user-agent', |
55
|
|
|
'u', |
56
|
|
|
InputOption::VALUE_OPTIONAL, |
57
|
|
|
'The User Agent to pass for the request', |
58
|
|
|
'' |
59
|
|
|
) |
60
|
|
|
->addOption( |
61
|
|
|
'skip-verification', |
62
|
|
|
's', |
63
|
|
|
InputOption::VALUE_NONE, |
64
|
|
|
'Skips checking the SSL certificate' |
65
|
|
|
) |
66
|
|
|
->addOption( |
67
|
|
|
'options', |
68
|
|
|
'opt', |
69
|
|
|
InputOption::VALUE_IS_ARRAY | InputOption::VALUE_OPTIONAL, |
70
|
|
|
'Additional options to the request', |
71
|
|
|
[] |
72
|
|
|
) |
73
|
|
|
->addOption( |
74
|
|
|
'ignore-robots', |
75
|
|
|
null, |
76
|
|
|
InputOption::VALUE_NONE, |
77
|
|
|
'Ignore robots checks' |
78
|
|
|
); |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* @param \Symfony\Component\Console\Input\InputInterface $input |
83
|
|
|
* @param \Symfony\Component\Console\Output\OutputInterface $output |
84
|
|
|
* |
85
|
|
|
* @return int |
86
|
|
|
*/ |
87
|
|
|
protected function execute(InputInterface $input, OutputInterface $output) |
88
|
|
|
{ |
89
|
|
|
$baseUrl = $input->getArgument('url'); |
90
|
|
|
$crawlProfile = $input->getOption('dont-crawl-external-links') ? new CrawlInternalUrls($baseUrl) : new CrawlAllUrls(); |
91
|
|
|
|
92
|
|
|
$output->writeln("Start scanning {$baseUrl}"); |
93
|
|
|
$output->writeln(''); |
94
|
|
|
|
95
|
|
|
$crawlLogger = new CrawlLogger($output); |
96
|
|
|
|
97
|
|
|
if ($input->getOption('output')) { |
98
|
|
|
$outputFile = $input->getOption('output'); |
99
|
|
|
|
100
|
|
|
if (file_exists($outputFile)) { |
101
|
|
|
$helper = $this->getHelper('question'); |
102
|
|
|
$question = new ConfirmationQuestion( |
103
|
|
|
"The output file `{$outputFile}` already exists. Overwrite it? (y/n)", |
104
|
|
|
false |
105
|
|
|
); |
106
|
|
|
|
107
|
|
|
if (! $helper->ask($input, $output, $question)) { |
108
|
|
|
$output->writeln('Aborting...'); |
109
|
|
|
|
110
|
|
|
return 0; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
unlink($outputFile); |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
$crawlLogger->setOutputFile($input->getOption('output')); |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
$clientOptions = [ |
120
|
|
|
RequestOptions::TIMEOUT => $input->getOption('timeout'), |
121
|
|
|
RequestOptions::VERIFY => ! $input->getOption('skip-verification'), |
122
|
|
|
RequestOptions::ALLOW_REDIRECTS => [ |
123
|
|
|
'track_redirects' => true, |
124
|
|
|
], |
125
|
|
|
]; |
126
|
|
|
|
127
|
|
|
$clientOptions = array_merge($clientOptions, $input->getOption('options')); |
128
|
|
|
|
129
|
|
|
if ($input->getOption('user-agent')) { |
130
|
|
|
$clientOptions[RequestOptions::HEADERS]['user-agent'] = $input->getOption('user-agent'); |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
$crawler = Crawler::create($clientOptions) |
134
|
|
|
->setConcurrency($input->getOption('concurrency')) |
135
|
|
|
->setCrawlObserver($crawlLogger) |
136
|
|
|
->setCrawlProfile($crawlProfile); |
137
|
|
|
|
138
|
|
|
if ($input->getOption('ignore-robots')) { |
139
|
|
|
$crawler->ignoreRobots(); |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
$crawler->startCrawling($baseUrl); |
|
|
|
|
143
|
|
|
|
144
|
|
|
return 0; |
145
|
|
|
} |
146
|
|
|
} |
147
|
|
|
|
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.