1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | /* |
||
6 | * Copyright (c) 2022 Ne-Lexa <[email protected]> |
||
7 | * |
||
8 | * For the full copyright and license information, please view |
||
9 | * the LICENSE file that was distributed with this source code. |
||
10 | * |
||
11 | * @see https://github.com/Ne-Lexa/roach-php-bundle |
||
12 | */ |
||
13 | |||
14 | namespace Nelexa\RoachPhpBundle\Command; |
||
15 | |||
16 | use RoachPHP\Roach; |
||
17 | use RoachPHP\Spider\Configuration\Overrides; |
||
18 | use Symfony\Component\Console\Command\Command; |
||
19 | use Symfony\Component\Console\Input\InputArgument; |
||
20 | use Symfony\Component\Console\Input\InputInterface; |
||
21 | use Symfony\Component\Console\Input\InputOption; |
||
22 | use Symfony\Component\Console\Output\OutputInterface; |
||
23 | use Symfony\Component\Console\Style\OutputStyle; |
||
24 | use Symfony\Component\Console\Style\SymfonyStyle; |
||
25 | use Symfony\Component\DependencyInjection\ServiceLocator; |
||
26 | |||
27 | final class RunSpiderCommand extends Command |
||
28 | { |
||
29 | protected static $defaultName = 'roach:run'; |
||
30 | |||
31 | protected static $defaultDescription = 'Run the provided spider'; |
||
32 | |||
33 | /** @var array<class-string<\RoachPHP\Spider\SpiderInterface>, array<string>> */ |
||
0 ignored issues
–
show
Documentation
Bug
introduced
by
![]() |
|||
34 | private array $spiderNames; |
||
35 | |||
36 | 2 | public function __construct(private ServiceLocator $serviceLocator) |
|
37 | { |
||
38 | /** @var array<class-string<\RoachPHP\Spider\SpiderInterface>> $providedServices */ |
||
39 | 2 | $providedServices = $this->serviceLocator->getProvidedServices(); |
|
40 | 2 | $this->spiderNames = $this->buildSpiderNameAliases($providedServices); |
|
41 | 2 | parent::__construct(); |
|
42 | } |
||
43 | |||
44 | 2 | protected function configure(): void |
|
45 | { |
||
46 | 2 | $spiderArgDescription = "Spider class name\nSupport spiders:\n"; |
|
47 | |||
48 | 2 | foreach ($this->spiderNames as $className => $aliases) { |
|
49 | 2 | $spiderArgDescription .= '[*] <comment>' . $className . '</comment> or aliases <info>' |
|
50 | 2 | . implode('</info>, <info>', $aliases) |
|
51 | 2 | . '</info>' . \PHP_EOL; |
|
52 | } |
||
53 | |||
54 | $this |
||
55 | 2 | ->addArgument('spider', InputArgument::OPTIONAL, rtrim($spiderArgDescription)) |
|
56 | 2 | ->addOption('delay', 't', InputOption::VALUE_OPTIONAL, 'The delay (in seconds) between requests.') |
|
57 | 2 | ->addOption('concurrency', 'p', InputOption::VALUE_OPTIONAL, 'The number of concurrent requests.') |
|
58 | ; |
||
59 | } |
||
60 | |||
61 | /** |
||
62 | * @param array<class-string<\RoachPHP\Spider\SpiderInterface>> $services |
||
0 ignored issues
–
show
|
|||
63 | * |
||
64 | * @return array<class-string<\RoachPHP\Spider\SpiderInterface>, array<string>> |
||
0 ignored issues
–
show
|
|||
65 | */ |
||
66 | 2 | private function buildSpiderNameAliases(array $services): array |
|
67 | { |
||
68 | 2 | $aliasServices = []; |
|
69 | |||
70 | 2 | foreach ($services as $className) { |
|
71 | 2 | $aliases = []; |
|
72 | |||
73 | 2 | if (($lastPosDelim = strrpos($className, '\\')) !== false) { |
|
74 | 2 | $shortClassName = substr($className, $lastPosDelim + 1); |
|
75 | 2 | $aliases[] = $shortClassName; |
|
76 | } else { |
||
77 | $shortClassName = $className; |
||
78 | } |
||
79 | |||
80 | 2 | $snakeCaseClass = strtolower(ltrim(preg_replace('/[A-Z]([A-Z](?![a-z]))*/', '_$0', $shortClassName), '_')); |
|
81 | 2 | $aliases[] = $snakeCaseClass; |
|
82 | |||
83 | 2 | if (preg_match('~^(.*?)_spider$~', $snakeCaseClass, $matches)) { |
|
84 | 2 | $aliases[] = $matches[1]; |
|
85 | } |
||
86 | |||
87 | 2 | $aliasServices[$className] = $aliases; |
|
88 | } |
||
89 | |||
90 | 2 | return $aliasServices; |
|
91 | } |
||
92 | |||
93 | 2 | protected function interact(InputInterface $input, OutputInterface $output): void |
|
94 | { |
||
95 | 2 | $spiderName = $input->getArgument('spider'); |
|
96 | |||
97 | 2 | if ($spiderName === null) { |
|
98 | $spiderName = $this->selectSpiderClassName(new SymfonyStyle($input, $output)); |
||
99 | $input->setArgument('spider', $spiderName); |
||
100 | } |
||
101 | } |
||
102 | |||
103 | private function selectSpiderClassName(OutputStyle $io): string |
||
104 | { |
||
105 | return (string) $io->choice('Choose a spider class', array_values($this->serviceLocator->getProvidedServices())); |
||
106 | } |
||
107 | |||
108 | 2 | protected function execute(InputInterface $input, OutputInterface $output): int |
|
109 | { |
||
110 | 2 | $spiderName = $input->getArgument('spider'); |
|
111 | 2 | $spiderClassName = $this->findSpiderClass($spiderName); |
|
112 | |||
113 | 2 | if ($spiderClassName === null) { |
|
114 | 1 | \assert($spiderName !== null); |
|
115 | 1 | $io = new SymfonyStyle($input, $output); |
|
116 | 1 | $io->error('Unknown spider ' . $spiderName); |
|
117 | |||
118 | 1 | return self::FAILURE; |
|
119 | } |
||
120 | |||
121 | 1 | $delay = $input->getOption('delay'); |
|
122 | |||
123 | 1 | if ($delay !== null) { |
|
124 | $delay = max(0, (int) $delay); |
||
125 | } |
||
126 | |||
127 | 1 | $concurrency = $input->getOption('concurrency'); |
|
128 | |||
129 | 1 | if ($concurrency !== null) { |
|
130 | $concurrency = max(1, (int) $concurrency); |
||
131 | } |
||
132 | |||
133 | 1 | $overrides = new Overrides( |
|
134 | concurrency: $concurrency, |
||
135 | requestDelay: $delay, |
||
136 | ); |
||
137 | |||
138 | 1 | Roach::startSpider($spiderClassName, $overrides); |
|
139 | |||
140 | 1 | return self::SUCCESS; |
|
141 | } |
||
142 | |||
143 | /** |
||
144 | * @return class-string<\RoachPHP\Spider\SpiderInterface>|null |
||
0 ignored issues
–
show
|
|||
145 | */ |
||
146 | 2 | private function findSpiderClass(?string $spiderName): ?string |
|
147 | { |
||
148 | 2 | if ($spiderName !== null) { |
|
149 | 2 | foreach ($this->spiderNames as $className => $aliases) { |
|
150 | 2 | if ($className === $spiderName || \in_array($spiderName, $aliases, true)) { |
|
151 | 1 | return $className; |
|
152 | } |
||
153 | } |
||
154 | } |
||
155 | |||
156 | 1 | return null; |
|
157 | } |
||
158 | } |
||
159 |