1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Spatie\HttpStatusCheck; |
4
|
|
|
|
5
|
|
|
use GuzzleHttp\RequestOptions; |
6
|
|
|
use Spatie\Crawler\CrawlAllUrls; |
7
|
|
|
use Spatie\Crawler\Crawler; |
8
|
|
|
use Spatie\Crawler\CrawlInternalUrls; |
9
|
|
|
use Symfony\Component\Console\Command\Command; |
10
|
|
|
use Symfony\Component\Console\Input\InputArgument; |
11
|
|
|
use Symfony\Component\Console\Input\InputInterface; |
12
|
|
|
use Symfony\Component\Console\Input\InputOption; |
13
|
|
|
use Symfony\Component\Console\Output\OutputInterface; |
14
|
|
|
use Symfony\Component\Console\Question\ConfirmationQuestion; |
15
|
|
|
|
16
|
|
|
class ScanCommand extends Command |
17
|
|
|
{ |
18
|
|
|
protected function configure() |
19
|
|
|
{ |
20
|
|
|
$this->setName('scan') |
21
|
|
|
->setDescription('Check the http status code of all links on a website.') |
22
|
|
|
->addArgument( |
23
|
|
|
'url', |
24
|
|
|
InputArgument::REQUIRED, |
25
|
|
|
'The url to check' |
26
|
|
|
) |
27
|
|
|
->addOption( |
28
|
|
|
'concurrency', |
29
|
|
|
'c', |
30
|
|
|
InputOption::VALUE_REQUIRED, |
31
|
|
|
'The amount of concurrent connections to use', |
32
|
|
|
10 |
33
|
|
|
) |
34
|
|
|
->addOption( |
35
|
|
|
'output', |
36
|
|
|
'o', |
37
|
|
|
InputOption::VALUE_REQUIRED, |
38
|
|
|
'Log all non-2xx and non-3xx responses in this file' |
39
|
|
|
) |
40
|
|
|
->addOption( |
41
|
|
|
'csv', |
42
|
|
|
'f', |
43
|
|
|
InputOption::VALUE_REQUIRED, |
44
|
|
|
'Log all responses in this csv file' |
45
|
|
|
) |
46
|
|
|
->addOption( |
47
|
|
|
'dont-crawl-external-links', |
48
|
|
|
'x', |
49
|
|
|
InputOption::VALUE_NONE, |
50
|
|
|
'Dont crawl external links' |
51
|
|
|
) |
52
|
|
|
->addOption( |
53
|
|
|
'timeout', |
54
|
|
|
't', |
55
|
|
|
InputOption::VALUE_OPTIONAL, |
56
|
|
|
'The maximum number of seconds the request can take', |
57
|
|
|
10 |
58
|
|
|
) |
59
|
|
|
->addOption( |
60
|
|
|
'user-agent', |
61
|
|
|
'u', |
62
|
|
|
InputOption::VALUE_OPTIONAL, |
63
|
|
|
'The User Agent to pass for the request', |
64
|
|
|
'' |
65
|
|
|
) |
66
|
|
|
->addOption( |
67
|
|
|
'skip-verification', |
68
|
|
|
's', |
69
|
|
|
InputOption::VALUE_NONE, |
70
|
|
|
'Skips checking the SSL certificate' |
71
|
|
|
) |
72
|
|
|
->addOption( |
73
|
|
|
'options', |
74
|
|
|
'opt', |
75
|
|
|
InputOption::VALUE_IS_ARRAY | InputOption::VALUE_OPTIONAL, |
76
|
|
|
'Additional options to the request', |
77
|
|
|
[] |
78
|
|
|
) |
79
|
|
|
->addOption( |
80
|
|
|
'ignore-robots', |
81
|
|
|
null, |
82
|
|
|
InputOption::VALUE_NONE, |
83
|
|
|
'Ignore robots checks' |
84
|
|
|
); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* @param \Symfony\Component\Console\Input\InputInterface $input |
89
|
|
|
* @param \Symfony\Component\Console\Output\OutputInterface $output |
90
|
|
|
* |
91
|
|
|
* @return int |
92
|
|
|
*/ |
93
|
|
|
protected function execute(InputInterface $input, OutputInterface $output) |
94
|
|
|
{ |
95
|
|
|
$baseUrl = $input->getArgument('url'); |
96
|
|
|
$crawlProfile = $input->getOption('dont-crawl-external-links') ? new CrawlInternalUrls($baseUrl) : new CrawlAllUrls(); |
97
|
|
|
|
98
|
|
|
$output->writeln("Start scanning {$baseUrl}"); |
99
|
|
|
$output->writeln(''); |
100
|
|
|
|
101
|
|
|
$crawlLogger = new CrawlLogger($output); |
102
|
|
|
|
103
|
|
View Code Duplication |
if ($input->getOption('output')) { |
|
|
|
|
104
|
|
|
$outputFile = $input->getOption('output'); |
105
|
|
|
|
106
|
|
|
if (file_exists($outputFile)) { |
107
|
|
|
$helper = $this->getHelper('question'); |
108
|
|
|
$question = new ConfirmationQuestion( |
109
|
|
|
"The output file `{$outputFile}` already exists. Overwrite it? (y/n)", |
110
|
|
|
false |
111
|
|
|
); |
112
|
|
|
|
113
|
|
|
if (! $helper->ask($input, $output, $question)) { |
114
|
|
|
$output->writeln('Aborting...'); |
115
|
|
|
|
116
|
|
|
return 0; |
117
|
|
|
} |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
$crawlLogger->setOutputFile($input->getOption('output')); |
121
|
|
|
} |
122
|
|
|
|
123
|
|
View Code Duplication |
if ($input->getOption('csv')) { |
|
|
|
|
124
|
|
|
$csvFile = $input->getOption('csv'); |
125
|
|
|
|
126
|
|
|
if (file_exists($csvFile)) { |
127
|
|
|
$helper = $this->getHelper('question'); |
128
|
|
|
$question = new ConfirmationQuestion( |
129
|
|
|
"The csv file `{$csvFile}` already exists. Overwrite it? (y/n)", |
130
|
|
|
false |
131
|
|
|
); |
132
|
|
|
|
133
|
|
|
if (! $helper->ask($input, $output, $question)) { |
134
|
|
|
$output->writeln('Aborting...'); |
135
|
|
|
|
136
|
|
|
return 0; |
137
|
|
|
} |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
$crawlLogger->setCsvFile($input->getOption('csv')); |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
$clientOptions = [ |
144
|
|
|
RequestOptions::TIMEOUT => $input->getOption('timeout'), |
145
|
|
|
RequestOptions::VERIFY => ! $input->getOption('skip-verification'), |
146
|
|
|
RequestOptions::ALLOW_REDIRECTS => false, |
147
|
|
|
]; |
148
|
|
|
|
149
|
|
|
$clientOptions = array_merge($clientOptions, $input->getOption('options')); |
150
|
|
|
|
151
|
|
|
if ($input->getOption('user-agent')) { |
152
|
|
|
$clientOptions[RequestOptions::HEADERS]['user-agent'] = $input->getOption('user-agent'); |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
$crawler = Crawler::create($clientOptions) |
156
|
|
|
->setConcurrency($input->getOption('concurrency')) |
157
|
|
|
->setCrawlObserver($crawlLogger) |
158
|
|
|
->setCrawlProfile($crawlProfile); |
159
|
|
|
|
160
|
|
|
if ($input->getOption('ignore-robots')) { |
161
|
|
|
$crawler->ignoreRobots(); |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
$crawler->startCrawling($baseUrl); |
|
|
|
|
165
|
|
|
|
166
|
|
|
return 0; |
167
|
|
|
} |
168
|
|
|
} |
169
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.