ScrapeStartCommand::findScrapers()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 10

Duplication

Lines 10
Ratio 100 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
dl 10
loc 10
ccs 0
cts 5
cp 0
rs 9.9332
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 6
1
<?php
2
3
namespace TreeHouse\IoBundle\Command;
4
5
use Doctrine\Common\Persistence\ManagerRegistry;
6
use Symfony\Component\Console\Command\Command;
7
use Symfony\Component\Console\Input\InputArgument;
8
use Symfony\Component\Console\Input\InputInterface;
9
use Symfony\Component\Console\Input\InputOption;
10
use Symfony\Component\Console\Output\OutputInterface;
11
use TreeHouse\IoBundle\Entity\Scraper;
12
use TreeHouse\IoBundle\Entity\Scraper as ScraperEntity;
13
use TreeHouse\IoBundle\Scrape\Crawler\RateLimit\EnablingRateLimitInterface;
14
use TreeHouse\IoBundle\Scrape\EventListener\ScrapeOutputSubscriber;
15
use TreeHouse\IoBundle\Scrape\Exception\CrawlException;
16
use TreeHouse\IoBundle\Scrape\ScraperFactory;
17
18
class ScrapeStartCommand extends Command
19
{
20
    /**
21
     * @var ManagerRegistry
22
     */
23
    protected $doctrine;
24
25
    /**
26
     * @var ScraperFactory
27
     */
28
    protected $factory;
29
30
    /**
31
     * @param ManagerRegistry $doctrine
32
     * @param ScraperFactory  $factory
33
     */
34
    public function __construct(ManagerRegistry $doctrine, ScraperFactory $factory)
35
    {
36
        $this->doctrine = $doctrine;
37
        $this->factory = $factory;
38
39
        parent::__construct();
40
    }
41
42
    /**
43
     * @inheritdoc
44
     */
45 View Code Duplication
    protected function configure()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
46
    {
47
        $this->setName('io:scrape:start');
48
        $this->setDescription('Starts scraper(s)');
49
        $this->addArgument('scraper', InputArgument::IS_ARRAY | InputArgument::OPTIONAL, 'The scraper id');
50
        $this->addOption(
51
            'async',
52
            'a',
53
            InputOption::VALUE_NONE,
54
            'Whether to scrape asynchronous. Doing so will queue next pages, rather them processing them right away'
55
        );
56
        $this->addOption('no-limit', null, InputOption::VALUE_NONE, 'Disables the rate limit');
57
    }
58
59
    /**
60
     * @inheritdoc
61
     */
62
    protected function execute(InputInterface $input, OutputInterface $output)
63
    {
64
        if ($output->getVerbosity() >= OutputInterface::VERBOSITY_NORMAL) {
65
            $dispatcher = $this->factory->getEventDispatcher();
66
            $dispatcher->addSubscriber(new ScrapeOutputSubscriber($output));
67
        }
68
69
        $scrapers = $this->findScrapers($input->getArgument('scraper'));
0 ignored issues
show
Documentation introduced by
$input->getArgument('scraper') is of type string|array<integer,string>|null, but the function expects a array<integer,integer>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
70
        foreach ($scrapers as $scraperEntity) {
71
            $url = $scraperEntity->getUrl();
72
73
            $output->writeln(sprintf('Found scraper: <info>%s</info>', $url));
74
            $output->writeln(sprintf('- Start every <info>%s</info> hours', $scraperEntity->getStartFrequency()));
75
76
            if ($date = $scraperEntity->getDatetimeLastStarted()) {
77
                $output->writeln(sprintf('- Last started at <info>%s</info>', $date->format(DATE_RFC2822)));
78
79
                $nextStart = $date->add(new \DateInterval(sprintf('PT%sH', $scraperEntity->getStartFrequency())));
80
                if ($nextStart > new \DateTime()) {
81
                    $output->writeln(sprintf('- Next start time at <info>%s</info>', $nextStart->format(DATE_RFC2822)));
82
83
                    continue;
84
                }
85
            } else {
86
                $output->writeln('- Scraper has <info>never</info> started');
87
            }
88
89
            $output->writeln('Starting scraper...');
90
            $output->writeln('-------------------');
91
92
            try {
93
                $this->scrape($input, $scraperEntity);
94
            } catch (CrawlException $e) {
95
                $output->writeln(sprintf('<error>%s</error>', $e->getMessage()));
96
            }
97
98
            $output->writeln('-------------------');
99
            $output->writeln('');
100
        }
101
    }
102
103
    /**
104
     * @param integer[] $ids
105
     *
106
     * @return ScraperEntity[]
107
     */
108 View Code Duplication
    protected function findScrapers(array $ids)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
109
    {
110
        $repo = $this->doctrine->getRepository('TreeHouseIoBundle:Scraper');
111
112
        if (!empty($ids)) {
113
            return $repo->findBy(['id' => $ids]);
114
        }
115
116
        return $repo->findAll();
117
    }
118
119
    /**
120
     * @param InputInterface $input
121
     * @param Scraper        $scraperEntity
122
     */
123
    protected function scrape(InputInterface $input, $scraperEntity)
124
    {
125
        $scraper = $this->factory->createScraper($scraperEntity);
126
127
        if ($input->getOption('async')) {
128
            $scraper->setAsync(true);
129
        }
130
131
        if ($input->getOption('no-limit')) {
132
            $limit = $scraper->getCrawler()->getRateLimit();
133
            if ($limit instanceof EnablingRateLimitInterface) {
134
                $limit->disable();
135
            }
136
        }
137
138
        $scraper->scrape($scraperEntity, $scraperEntity->getUrl());
139
140
        $scraperEntity->setDatetimeLastStarted(new \DateTime());
141
        $this->doctrine->getManager()->flush($scraperEntity);
142
    }
143
}
144