ScrapeStartCommand   A
last analyzed

Complexity

Total Complexity 14

Size/Duplication

Total Lines 126
Duplicated Lines 18.25 %

Coupling/Cohesion

Components 2
Dependencies 13

Test Coverage

Coverage 0%

Importance

Changes 0
Metric Value
wmc 14
lcom 2
cbo 13
dl 23
loc 126
ccs 0
cts 57
cp 0
rs 10
c 0
b 0
f 0

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 1
A configure() 13 13 1
A findScrapers() 10 10 2
B execute() 0 40 6
A scrape() 0 20 4

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
namespace TreeHouse\IoBundle\Command;
4
5
use Doctrine\Common\Persistence\ManagerRegistry;
6
use Symfony\Component\Console\Command\Command;
7
use Symfony\Component\Console\Input\InputArgument;
8
use Symfony\Component\Console\Input\InputInterface;
9
use Symfony\Component\Console\Input\InputOption;
10
use Symfony\Component\Console\Output\OutputInterface;
11
use TreeHouse\IoBundle\Entity\Scraper;
12
use TreeHouse\IoBundle\Entity\Scraper as ScraperEntity;
13
use TreeHouse\IoBundle\Scrape\Crawler\RateLimit\EnablingRateLimitInterface;
14
use TreeHouse\IoBundle\Scrape\EventListener\ScrapeOutputSubscriber;
15
use TreeHouse\IoBundle\Scrape\Exception\CrawlException;
16
use TreeHouse\IoBundle\Scrape\ScraperFactory;
17
18
class ScrapeStartCommand extends Command
19
{
20
    /**
21
     * @var ManagerRegistry
22
     */
23
    protected $doctrine;
24
25
    /**
26
     * @var ScraperFactory
27
     */
28
    protected $factory;
29
30
    /**
31
     * @param ManagerRegistry $doctrine
32
     * @param ScraperFactory  $factory
33
     */
34
    public function __construct(ManagerRegistry $doctrine, ScraperFactory $factory)
35
    {
36
        $this->doctrine = $doctrine;
37
        $this->factory = $factory;
38
39
        parent::__construct();
40
    }
41
42
    /**
43
     * @inheritdoc
44
     */
45 View Code Duplication
    protected function configure()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
46
    {
47
        $this->setName('io:scrape:start');
48
        $this->setDescription('Starts scraper(s)');
49
        $this->addArgument('scraper', InputArgument::IS_ARRAY | InputArgument::OPTIONAL, 'The scraper id');
50
        $this->addOption(
51
            'async',
52
            'a',
53
            InputOption::VALUE_NONE,
54
            'Whether to scrape asynchronous. Doing so will queue next pages, rather them processing them right away'
55
        );
56
        $this->addOption('no-limit', null, InputOption::VALUE_NONE, 'Disables the rate limit');
57
    }
58
59
    /**
60
     * @inheritdoc
61
     */
62
    protected function execute(InputInterface $input, OutputInterface $output)
63
    {
64
        if ($output->getVerbosity() >= OutputInterface::VERBOSITY_NORMAL) {
65
            $dispatcher = $this->factory->getEventDispatcher();
66
            $dispatcher->addSubscriber(new ScrapeOutputSubscriber($output));
67
        }
68
69
        $scrapers = $this->findScrapers($input->getArgument('scraper'));
0 ignored issues
show
Documentation introduced by
$input->getArgument('scraper') is of type string|array<integer,string>|null, but the function expects a array<integer,integer>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
70
        foreach ($scrapers as $scraperEntity) {
71
            $url = $scraperEntity->getUrl();
72
73
            $output->writeln(sprintf('Found scraper: <info>%s</info>', $url));
74
            $output->writeln(sprintf('- Start every <info>%s</info> hours', $scraperEntity->getStartFrequency()));
75
76
            if ($date = $scraperEntity->getDatetimeLastStarted()) {
77
                $output->writeln(sprintf('- Last started at <info>%s</info>', $date->format(DATE_RFC2822)));
78
79
                $nextStart = $date->add(new \DateInterval(sprintf('PT%sH', $scraperEntity->getStartFrequency())));
80
                if ($nextStart > new \DateTime()) {
81
                    $output->writeln(sprintf('- Next start time at <info>%s</info>', $nextStart->format(DATE_RFC2822)));
82
83
                    continue;
84
                }
85
            } else {
86
                $output->writeln('- Scraper has <info>never</info> started');
87
            }
88
89
            $output->writeln('Starting scraper...');
90
            $output->writeln('-------------------');
91
92
            try {
93
                $this->scrape($input, $scraperEntity);
94
            } catch (CrawlException $e) {
95
                $output->writeln(sprintf('<error>%s</error>', $e->getMessage()));
96
            }
97
98
            $output->writeln('-------------------');
99
            $output->writeln('');
100
        }
101
    }
102
103
    /**
104
     * @param integer[] $ids
105
     *
106
     * @return ScraperEntity[]
107
     */
108 View Code Duplication
    protected function findScrapers(array $ids)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
109
    {
110
        $repo = $this->doctrine->getRepository('TreeHouseIoBundle:Scraper');
111
112
        if (!empty($ids)) {
113
            return $repo->findBy(['id' => $ids]);
114
        }
115
116
        return $repo->findAll();
117
    }
118
119
    /**
120
     * @param InputInterface $input
121
     * @param Scraper        $scraperEntity
122
     */
123
    protected function scrape(InputInterface $input, $scraperEntity)
124
    {
125
        $scraper = $this->factory->createScraper($scraperEntity);
126
127
        if ($input->getOption('async')) {
128
            $scraper->setAsync(true);
129
        }
130
131
        if ($input->getOption('no-limit')) {
132
            $limit = $scraper->getCrawler()->getRateLimit();
133
            if ($limit instanceof EnablingRateLimitInterface) {
134
                $limit->disable();
135
            }
136
        }
137
138
        $scraper->scrape($scraperEntity, $scraperEntity->getUrl());
139
140
        $scraperEntity->setDatetimeLastStarted(new \DateTime());
141
        $this->doctrine->getManager()->flush($scraperEntity);
142
    }
143
}
144