Ne-Lexa /
roach-php-bundle
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | /* |
||
| 6 | * Copyright (c) 2022 Ne-Lexa <[email protected]> |
||
| 7 | * |
||
| 8 | * For the full copyright and license information, please view |
||
| 9 | * the LICENSE file that was distributed with this source code. |
||
| 10 | * |
||
| 11 | * @see https://github.com/Ne-Lexa/roach-php-bundle |
||
| 12 | */ |
||
| 13 | |||
| 14 | namespace Nelexa\RoachPhpBundle\Command; |
||
| 15 | |||
| 16 | use RoachPHP\Roach; |
||
| 17 | use RoachPHP\Spider\Configuration\Overrides; |
||
| 18 | use Symfony\Component\Console\Command\Command; |
||
| 19 | use Symfony\Component\Console\Input\InputArgument; |
||
| 20 | use Symfony\Component\Console\Input\InputInterface; |
||
| 21 | use Symfony\Component\Console\Input\InputOption; |
||
| 22 | use Symfony\Component\Console\Output\OutputInterface; |
||
| 23 | use Symfony\Component\Console\Style\OutputStyle; |
||
| 24 | use Symfony\Component\Console\Style\SymfonyStyle; |
||
| 25 | use Symfony\Component\DependencyInjection\ServiceLocator; |
||
| 26 | |||
| 27 | final class RunSpiderCommand extends Command |
||
| 28 | { |
||
| 29 | protected static $defaultName = 'roach:run'; |
||
| 30 | |||
| 31 | protected static $defaultDescription = 'Run the provided spider'; |
||
| 32 | |||
| 33 | /** @var array<class-string<\RoachPHP\Spider\SpiderInterface>, array<string>> */ |
||
|
0 ignored issues
–
show
Documentation
Bug
introduced
by
Loading history...
|
|||
| 34 | private array $spiderNames; |
||
| 35 | |||
| 36 | 2 | public function __construct(private ServiceLocator $serviceLocator) |
|
| 37 | { |
||
| 38 | /** @var array<class-string<\RoachPHP\Spider\SpiderInterface>> $providedServices */ |
||
| 39 | 2 | $providedServices = $this->serviceLocator->getProvidedServices(); |
|
| 40 | 2 | $this->spiderNames = $this->buildSpiderNameAliases($providedServices); |
|
| 41 | 2 | parent::__construct(); |
|
| 42 | } |
||
| 43 | |||
| 44 | 2 | protected function configure(): void |
|
| 45 | { |
||
| 46 | 2 | $spiderArgDescription = "Spider class name\nSupport spiders:\n"; |
|
| 47 | |||
| 48 | 2 | foreach ($this->spiderNames as $className => $aliases) { |
|
| 49 | 2 | $spiderArgDescription .= '[*] <comment>' . $className . '</comment> or aliases <info>' |
|
| 50 | 2 | . implode('</info>, <info>', $aliases) |
|
| 51 | 2 | . '</info>' . \PHP_EOL; |
|
| 52 | } |
||
| 53 | |||
| 54 | $this |
||
| 55 | 2 | ->addArgument('spider', InputArgument::OPTIONAL, rtrim($spiderArgDescription)) |
|
| 56 | 2 | ->addOption('delay', 't', InputOption::VALUE_OPTIONAL, 'The delay (in seconds) between requests.') |
|
| 57 | 2 | ->addOption('concurrency', 'p', InputOption::VALUE_OPTIONAL, 'The number of concurrent requests.') |
|
| 58 | ; |
||
| 59 | } |
||
| 60 | |||
| 61 | /** |
||
| 62 | * @param array<class-string<\RoachPHP\Spider\SpiderInterface>> $services |
||
|
0 ignored issues
–
show
|
|||
| 63 | * |
||
| 64 | * @return array<class-string<\RoachPHP\Spider\SpiderInterface>, array<string>> |
||
|
0 ignored issues
–
show
|
|||
| 65 | */ |
||
| 66 | 2 | private function buildSpiderNameAliases(array $services): array |
|
| 67 | { |
||
| 68 | 2 | $aliasServices = []; |
|
| 69 | |||
| 70 | 2 | foreach ($services as $className) { |
|
| 71 | 2 | $aliases = []; |
|
| 72 | |||
| 73 | 2 | if (($lastPosDelim = strrpos($className, '\\')) !== false) { |
|
| 74 | 2 | $shortClassName = substr($className, $lastPosDelim + 1); |
|
| 75 | 2 | $aliases[] = $shortClassName; |
|
| 76 | } else { |
||
| 77 | $shortClassName = $className; |
||
| 78 | } |
||
| 79 | |||
| 80 | 2 | $snakeCaseClass = strtolower(ltrim(preg_replace('/[A-Z]([A-Z](?![a-z]))*/', '_$0', $shortClassName), '_')); |
|
| 81 | 2 | $aliases[] = $snakeCaseClass; |
|
| 82 | |||
| 83 | 2 | if (preg_match('~^(.*?)_spider$~', $snakeCaseClass, $matches)) { |
|
| 84 | 2 | $aliases[] = $matches[1]; |
|
| 85 | } |
||
| 86 | |||
| 87 | 2 | $aliasServices[$className] = $aliases; |
|
| 88 | } |
||
| 89 | |||
| 90 | 2 | return $aliasServices; |
|
| 91 | } |
||
| 92 | |||
| 93 | 2 | protected function interact(InputInterface $input, OutputInterface $output): void |
|
| 94 | { |
||
| 95 | 2 | $spiderName = $input->getArgument('spider'); |
|
| 96 | |||
| 97 | 2 | if ($spiderName === null) { |
|
| 98 | $spiderName = $this->selectSpiderClassName(new SymfonyStyle($input, $output)); |
||
| 99 | $input->setArgument('spider', $spiderName); |
||
| 100 | } |
||
| 101 | } |
||
| 102 | |||
| 103 | private function selectSpiderClassName(OutputStyle $io): string |
||
| 104 | { |
||
| 105 | return (string) $io->choice('Choose a spider class', array_values($this->serviceLocator->getProvidedServices())); |
||
| 106 | } |
||
| 107 | |||
| 108 | 2 | protected function execute(InputInterface $input, OutputInterface $output): int |
|
| 109 | { |
||
| 110 | 2 | $spiderName = $input->getArgument('spider'); |
|
| 111 | 2 | $spiderClassName = $this->findSpiderClass($spiderName); |
|
| 112 | |||
| 113 | 2 | if ($spiderClassName === null) { |
|
| 114 | 1 | \assert($spiderName !== null); |
|
| 115 | 1 | $io = new SymfonyStyle($input, $output); |
|
| 116 | 1 | $io->error('Unknown spider ' . $spiderName); |
|
| 117 | |||
| 118 | 1 | return self::FAILURE; |
|
| 119 | } |
||
| 120 | |||
| 121 | 1 | $delay = $input->getOption('delay'); |
|
| 122 | |||
| 123 | 1 | if ($delay !== null) { |
|
| 124 | $delay = max(0, (int) $delay); |
||
| 125 | } |
||
| 126 | |||
| 127 | 1 | $concurrency = $input->getOption('concurrency'); |
|
| 128 | |||
| 129 | 1 | if ($concurrency !== null) { |
|
| 130 | $concurrency = max(1, (int) $concurrency); |
||
| 131 | } |
||
| 132 | |||
| 133 | 1 | $overrides = new Overrides( |
|
| 134 | concurrency: $concurrency, |
||
| 135 | requestDelay: $delay, |
||
| 136 | ); |
||
| 137 | |||
| 138 | 1 | Roach::startSpider($spiderClassName, $overrides); |
|
| 139 | |||
| 140 | 1 | return self::SUCCESS; |
|
| 141 | } |
||
| 142 | |||
| 143 | /** |
||
| 144 | * @return class-string<\RoachPHP\Spider\SpiderInterface>|null |
||
|
0 ignored issues
–
show
|
|||
| 145 | */ |
||
| 146 | 2 | private function findSpiderClass(?string $spiderName): ?string |
|
| 147 | { |
||
| 148 | 2 | if ($spiderName !== null) { |
|
| 149 | 2 | foreach ($this->spiderNames as $className => $aliases) { |
|
| 150 | 2 | if ($className === $spiderName || \in_array($spiderName, $aliases, true)) { |
|
| 151 | 1 | return $className; |
|
| 152 | } |
||
| 153 | } |
||
| 154 | } |
||
| 155 | |||
| 156 | 1 | return null; |
|
| 157 | } |
||
| 158 | } |
||
| 159 |