ScrapeUrlExecutor   A
last analyzed

Complexity

Total Complexity 9

Size/Duplication

Total Lines 99
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 11

Test Coverage

Coverage 0%

Importance

Changes 0
Metric Value
wmc 9
c 0
b 0
f 0
lcom 2
cbo 11
dl 0
loc 99
ccs 0
cts 46
cp 0
rs 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 1
A getName() 0 4 1
A configurePayload() 0 15 2
A execute() 0 27 4
A findScraper() 0 4 1
1
<?php
2
3
namespace TreeHouse\IoBundle\Bridge\WorkerBundle\Executor;
4
5
use Doctrine\Common\Persistence\ManagerRegistry;
6
use Psr\Log\LoggerInterface;
7
use Symfony\Component\OptionsResolver\Exception\InvalidArgumentException;
8
use Symfony\Component\OptionsResolver\Options;
9
use Symfony\Component\OptionsResolver\OptionsResolver;
10
use TreeHouse\IoBundle\Entity\Scraper as ScraperEntity;
11
use TreeHouse\IoBundle\Scrape\Exception\CrawlException;
12
use TreeHouse\IoBundle\Scrape\Exception\RateLimitException;
13
use TreeHouse\IoBundle\Scrape\ScraperFactory;
14
use TreeHouse\WorkerBundle\Exception\RescheduleException;
15
use TreeHouse\WorkerBundle\Executor\AbstractExecutor;
16
17
class ScrapeUrlExecutor extends AbstractExecutor
18
{
19
    const NAME = 'scrape.url';
20
21
    /**
22
     * @var ManagerRegistry
23
     */
24
    protected $doctrine;
25
26
    /**
27
     * @var ScraperFactory
28
     */
29
    protected $factory;
30
31
    /**
32
     * @var LoggerInterface
33
     */
34
    protected $logger;
35
36
    /**
37
     * @param ManagerRegistry $doctrine
38
     * @param ScraperFactory  $factory
39
     * @param LoggerInterface $logger
40
     */
41
    public function __construct(ManagerRegistry $doctrine, ScraperFactory $factory, LoggerInterface $logger)
42
    {
43
        $this->doctrine = $doctrine;
44
        $this->factory = $factory;
45
        $this->logger = $logger;
46
    }
47
48
    /**
49
     * @inheritdoc
50
     */
51
    public function getName()
52
    {
53
        return self::NAME;
54
    }
55
56
    /**
57
     * @inheritdoc
58
     */
59
    public function configurePayload(OptionsResolver $resolver)
60
    {
61
        $resolver->setRequired(0);
62
        $resolver->setAllowedTypes(0, 'numeric');
63
        $resolver->setNormalizer(0, function (Options $options, $value) {
64
            if (null === $scraper = $this->findScraper($value)) {
65
                throw new InvalidArgumentException(sprintf('Could not find scraper with id %d', $value));
66
            }
67
68
            return $scraper;
69
        });
70
71
        $resolver->setRequired(1);
72
        $resolver->setAllowedTypes(1, 'string');
73
    }
74
75
    /**
76
     * @inheritdoc
77
     */
78
    public function execute(array $payload)
79
    {
80
        /** @var ScraperEntity $entity */
81
        /** @var string $url */
82
        list($entity, $url) = $payload;
83
84
        $scraper = $this->factory->createScraper($entity);
85
        $scraper->setAsync(true);
86
87
        try {
88
            $scraper->scrape($entity, $url);
89
90
            return true;
91
        } catch (RateLimitException $e) {
92
            $re = new RescheduleException();
93
94
            if ($date = $e->getRetryDate()) {
95
                $re->setRescheduleDate($date);
96
            }
97
98
            throw $re;
99
        } catch (CrawlException $e) {
100
            $this->logger->error($e->getMessage(), ['url' => $e->getUrl()]);
101
102
            return false;
103
        }
104
    }
105
106
    /**
107
     * @param int $scraperId
108
     *
109
     * @return ScraperEntity
110
     */
111
    protected function findScraper($scraperId)
112
    {
113
        return $this->doctrine->getRepository('TreeHouseIoBundle:Scraper')->find($scraperId);
114
    }
115
}
116