1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace TreeHouse\IoBundle\Bridge\WorkerBundle\Executor; |
4
|
|
|
|
5
|
|
|
use Psr\Log\LoggerInterface; |
6
|
|
|
use Symfony\Component\OptionsResolver\Exception\InvalidArgumentException; |
7
|
|
|
use Symfony\Component\OptionsResolver\Options; |
8
|
|
|
use Symfony\Component\OptionsResolver\OptionsResolver; |
9
|
|
|
use TreeHouse\IoBundle\Model\SourceInterface; |
10
|
|
|
use TreeHouse\IoBundle\Scrape\Exception\CrawlException; |
11
|
|
|
use TreeHouse\IoBundle\Scrape\Exception\RateLimitException; |
12
|
|
|
use TreeHouse\IoBundle\Scrape\SourceRevisitor; |
13
|
|
|
use TreeHouse\IoBundle\Source\SourceManagerInterface; |
14
|
|
|
use TreeHouse\WorkerBundle\Exception\RescheduleException; |
15
|
|
|
use TreeHouse\WorkerBundle\Executor\AbstractExecutor; |
16
|
|
|
use TreeHouse\WorkerBundle\Executor\ObjectPayloadInterface; |
17
|
|
|
|
18
|
|
|
class ScrapeRevisitSourceExecutor extends AbstractExecutor implements ObjectPayloadInterface |
19
|
|
|
{ |
20
|
|
|
const NAME = 'scrape.source.revisit'; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* @var SourceManagerInterface |
24
|
|
|
*/ |
25
|
|
|
protected $sourceManager; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* @var SourceRevisitor |
29
|
|
|
*/ |
30
|
|
|
protected $revisitor; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @var LoggerInterface |
34
|
|
|
*/ |
35
|
|
|
protected $logger; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @param SourceManagerInterface $sourceManager |
39
|
|
|
* @param SourceRevisitor $revisitor |
40
|
|
|
* @param LoggerInterface $logger |
41
|
|
|
*/ |
42
|
|
|
public function __construct(SourceManagerInterface $sourceManager, SourceRevisitor $revisitor, LoggerInterface $logger) |
43
|
|
|
{ |
44
|
|
|
$this->sourceManager = $sourceManager; |
45
|
|
|
$this->revisitor = $revisitor; |
46
|
|
|
$this->logger = $logger; |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* @inheritdoc |
51
|
|
|
*/ |
52
|
|
|
public function getName() |
53
|
|
|
{ |
54
|
|
|
return self::NAME; |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @inheritdoc |
59
|
|
|
*/ |
60
|
|
|
public function supportsObject($object) |
61
|
|
|
{ |
62
|
|
|
return $object instanceof SourceInterface; |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* @inheritdoc |
67
|
|
|
* |
68
|
|
|
* @param SourceInterface $object |
69
|
|
|
*/ |
70
|
|
|
public function getObjectPayload($object) |
71
|
|
|
{ |
72
|
|
|
return [$object->getId()]; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* @inheritdoc |
77
|
|
|
*/ |
78
|
|
View Code Duplication |
public function configurePayload(OptionsResolver $resolver) |
|
|
|
|
79
|
|
|
{ |
80
|
|
|
$resolver->setRequired(0); |
81
|
|
|
$resolver->setAllowedTypes(0, 'numeric'); |
82
|
|
|
$resolver->setNormalizer(0, function (Options $options, $value) { |
83
|
|
|
if (null === $source = $this->findSource($value)) { |
84
|
|
|
throw new InvalidArgumentException(sprintf('Could not find source with id %d', $value)); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
return $source; |
88
|
|
|
}); |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* @inheritdoc |
93
|
|
|
*/ |
94
|
|
|
public function execute(array $payload) |
95
|
|
|
{ |
96
|
|
|
/** @var SourceInterface $source */ |
97
|
|
|
list($source) = $payload; |
98
|
|
|
|
99
|
|
|
try { |
100
|
|
|
$this->revisitor->revisit($source, true); |
101
|
|
|
|
102
|
|
|
return true; |
103
|
|
|
} catch (RateLimitException $e) { |
104
|
|
|
$re = new RescheduleException(); |
105
|
|
|
|
106
|
|
|
if ($date = $e->getRetryDate()) { |
107
|
|
|
$re->setRescheduleDate($date); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
throw $re; |
111
|
|
|
} catch (CrawlException $e) { |
112
|
|
|
$this->logger->error($e->getMessage(), ['url' => $e->getUrl()]); |
113
|
|
|
|
114
|
|
|
return false; |
115
|
|
|
} |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
/** |
119
|
|
|
* @param int $sourceId |
120
|
|
|
* |
121
|
|
|
* @return SourceInterface |
122
|
|
|
*/ |
123
|
|
|
protected function findSource($sourceId) |
124
|
|
|
{ |
125
|
|
|
return $this->sourceManager->findById($sourceId); |
126
|
|
|
} |
127
|
|
|
} |
128
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.