1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Spatie\MixedContentScannerCli; |
4
|
|
|
|
5
|
|
|
use GuzzleHttp\RequestOptions; |
6
|
|
|
use Spatie\Crawler\Crawler; |
7
|
|
|
use Spatie\MixedContentScanner\MixedContentScanner; |
8
|
|
|
use Symfony\Component\Console\Command\Command; |
9
|
|
|
use Symfony\Component\Console\Input\InputArgument; |
10
|
|
|
use Symfony\Component\Console\Input\InputInterface; |
11
|
|
|
use Symfony\Component\Console\Input\InputOption; |
12
|
|
|
use Symfony\Component\Console\Output\OutputInterface; |
13
|
|
|
use Symfony\Component\Console\Style\SymfonyStyle; |
14
|
|
|
|
15
|
|
|
class ScanCommand extends Command |
16
|
|
|
{ |
17
|
|
|
protected function configure() |
18
|
|
|
{ |
19
|
|
|
$this |
20
|
|
|
->setName('scan') |
21
|
|
|
->setDescription('Scan a site for mixed content.') |
22
|
|
|
->addArgument('url', InputArgument::REQUIRED, 'Which argument do you want to scan') |
23
|
|
|
->addOption('filter', null, InputOption::VALUE_REQUIRED | InputOption::VALUE_IS_ARRAY, 'urls whose path pass the regex will be scanned') |
24
|
|
|
->addOption('ignore', null, InputOption::VALUE_REQUIRED | InputOption::VALUE_IS_ARRAY, 'urls whose path pass the regex will not be scanned') |
25
|
|
|
->addOption('ignore-robots', null, InputOption::VALUE_NONE, 'Ignore robots.txt, robots meta tags and -headers.') |
26
|
|
|
->addOption('verify-ssl', null, InputOption::VALUE_NONE, 'Verify the craweld urls have a valid certificate. If they do not an empty response will be the result of the crawl') |
27
|
|
|
->addOption('user-agent', null, InputOption::VALUE_REQUIRED, 'User agent string to use for requests'); |
28
|
|
|
} |
29
|
|
|
|
30
|
|
|
protected function execute(InputInterface $input, OutputInterface $output) |
31
|
|
|
{ |
32
|
|
|
$scanUrl = $input->getArgument('url'); |
33
|
|
|
|
34
|
|
|
$styledOutput = new SymfonyStyle($input, $output); |
35
|
|
|
|
36
|
|
|
$styledOutput->title("Start scanning {$scanUrl} for mixed content"); |
37
|
|
|
|
38
|
|
|
$mixedContentLogger = new MixedContentLogger($styledOutput); |
39
|
|
|
|
40
|
|
|
$crawlProfile = new CrawlProfile( |
41
|
|
|
$input->getArgument('url'), |
|
|
|
|
42
|
|
|
$input->getOption('filter'), |
43
|
|
|
$input->getOption('ignore') |
44
|
|
|
); |
45
|
|
|
|
46
|
|
|
$ignoreRobots = $input->getOption('ignore-robots'); |
47
|
|
|
$userAgent = $input->getOption('user-agent'); |
48
|
|
|
|
49
|
|
|
(new MixedContentScanner($mixedContentLogger)) |
50
|
|
|
->configureCrawler(function (Crawler $crawler) use ($ignoreRobots, $userAgent) { |
51
|
|
|
if ($ignoreRobots) { |
52
|
|
|
$crawler->ignoreRobots(); |
53
|
|
|
} |
54
|
|
|
if ($userAgent) { |
55
|
|
|
$crawler->setUserAgent($userAgent); |
56
|
|
|
} |
57
|
|
|
}) |
58
|
|
|
->setCrawlProfile($crawlProfile) |
59
|
|
|
->scan($scanUrl, $this->getClientOptions($input)); |
|
|
|
|
60
|
|
|
} |
61
|
|
|
|
62
|
|
|
protected function getClientOptions(InputInterface $input): array |
63
|
|
|
{ |
64
|
|
|
$httpClientOptions = [ |
65
|
|
|
RequestOptions::VERIFY => false, |
66
|
|
|
RequestOptions::COOKIES => true, |
67
|
|
|
RequestOptions::CONNECT_TIMEOUT => 10, |
68
|
|
|
RequestOptions::TIMEOUT => 10, |
69
|
|
|
RequestOptions::ALLOW_REDIRECTS => false, |
70
|
|
|
]; |
71
|
|
|
|
72
|
|
|
if ($input->getOption('verify-ssl')) { |
73
|
|
|
$httpClientOptions[RequestOptions::VERIFY] = true; |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
return $httpClientOptions; |
77
|
|
|
} |
78
|
|
|
} |
79
|
|
|
|
This check looks at variables that are passed out again to other methods.
If the outgoing method call has stricter type requirements than the method itself, an issue is raised.
An additional type check may prevent trouble.