1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace TreeHouse\IoBundle\Source\Cleaner; |
4
|
|
|
|
5
|
|
|
use Doctrine\Common\Persistence\ManagerRegistry; |
6
|
|
|
use Psr\Log\LoggerInterface; |
7
|
|
|
use Symfony\Component\EventDispatcher\EventDispatcherInterface; |
8
|
|
|
use TreeHouse\IoBundle\Entity\Feed; |
9
|
|
|
use TreeHouse\IoBundle\Entity\ImportRepository; |
10
|
|
|
use TreeHouse\IoBundle\Event\FeedCleanupEvent; |
11
|
|
|
use TreeHouse\IoBundle\Event\FeedCleanupHaltEvent; |
12
|
|
|
use TreeHouse\IoBundle\Event\FeedEvent; |
13
|
|
|
use TreeHouse\IoBundle\IoEvents; |
14
|
|
|
use TreeHouse\IoBundle\Source\SourceManagerInterface; |
15
|
|
|
|
16
|
|
|
class IdleSourceCleaner implements SourceCleanerInterface |
17
|
|
|
{ |
18
|
|
|
/** |
19
|
|
|
* @var ManagerRegistry |
20
|
|
|
*/ |
21
|
|
|
protected $doctrine; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* @var SourceManagerInterface |
25
|
|
|
*/ |
26
|
|
|
protected $sourceManager; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @var EventDispatcherInterface |
30
|
|
|
*/ |
31
|
|
|
protected $eventDispatcher; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @var LoggerInterface |
35
|
|
|
*/ |
36
|
|
|
protected $logger; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @param ManagerRegistry $doctrine |
40
|
|
|
* @param SourceManagerInterface $sourceManager |
41
|
|
|
* @param EventDispatcherInterface $dispatcher |
42
|
|
|
* @param LoggerInterface $logger |
43
|
|
|
*/ |
44
|
|
|
public function __construct( |
45
|
|
|
ManagerRegistry $doctrine, |
46
|
|
|
SourceManagerInterface $sourceManager, |
47
|
|
|
EventDispatcherInterface $dispatcher, |
48
|
|
|
LoggerInterface $logger |
49
|
|
|
) { |
50
|
|
|
$this->doctrine = $doctrine; |
51
|
|
|
$this->sourceManager = $sourceManager; |
52
|
|
|
$this->eventDispatcher = $dispatcher; |
53
|
|
|
$this->logger = $logger; |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* @inheritdoc |
58
|
|
|
*/ |
59
|
|
|
public function clean(DelegatingSourceCleaner $cleaner, ThresholdVoterInterface $voter) |
60
|
|
|
{ |
61
|
|
|
$numCleaned = 0; |
62
|
|
|
|
63
|
|
|
$query = $this->doctrine |
64
|
|
|
->getRepository('TreeHouseIoBundle:Feed') |
65
|
|
|
->createQueryBuilder('f') |
66
|
|
|
->getQuery() |
67
|
|
|
; |
68
|
|
|
|
69
|
|
|
/** @var Feed $feed */ |
70
|
|
|
foreach ($query->iterate() as list($feed)) { |
71
|
|
|
if (false !== $cleaned = $this->cleanFeed($cleaner, $feed, $voter, $numCleaned)) { |
|
|
|
|
72
|
|
|
$numCleaned += $cleaned; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
// cleanup uow after cleaning the feed |
76
|
|
|
$this->doctrine->getManager()->clear(); |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
return $numCleaned; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* @param DelegatingSourceCleaner $cleaner |
84
|
|
|
* @param Feed $feed |
85
|
|
|
* @param ThresholdVoterInterface $voter |
86
|
|
|
* |
87
|
|
|
* @return bool |
88
|
|
|
*/ |
89
|
|
|
public function cleanFeed(DelegatingSourceCleaner $cleaner, Feed $feed, ThresholdVoterInterface $voter) |
90
|
|
|
{ |
91
|
|
|
if (null === $expireDate = $this->getLastFullImportDate($feed)) { |
92
|
|
|
$this->logger->debug( |
93
|
|
|
sprintf('Skipping %s, because it has no recent imports', $feed) |
94
|
|
|
); |
95
|
|
|
|
96
|
|
|
$this->eventDispatcher->dispatch(IoEvents::FEED_CLEANUP_SKIP, new FeedCleanupEvent($feed, 0)); |
|
|
|
|
97
|
|
|
|
98
|
|
|
return false; |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
$this->eventDispatcher->dispatch(IoEvents::PRE_CLEAN_FEED, new FeedEvent($feed)); |
|
|
|
|
102
|
|
|
|
103
|
|
|
$this->logger->debug( |
104
|
|
|
sprintf( |
105
|
|
|
'Checking sources of %s that have not been visited since %s', |
106
|
|
|
$feed, |
107
|
|
|
$expireDate->format('Y-m-d H:i:s') |
108
|
|
|
) |
109
|
|
|
); |
110
|
|
|
|
111
|
|
|
// get sources that haven't been visited since $expireDate |
112
|
|
|
$sourceRepo = $this->sourceManager->getRepository(); |
113
|
|
|
$count = $sourceRepo->countByFeedAndUnvisitedSince($feed, $expireDate); |
114
|
|
|
|
115
|
|
|
// fail safe: see if percentage of sources to be removed is not too high |
116
|
|
|
$total = $sourceRepo->countByFeed($feed); |
117
|
|
|
$max = $this->getThreshold($total); |
118
|
|
|
|
119
|
|
|
// see if threshold is reached |
120
|
|
|
if ($count > $max) { |
121
|
|
|
$message = sprintf( |
122
|
|
|
'Stopping cleanup for %s, because %s of %s sources were to be deleted, %s is the maximum.', |
123
|
|
|
$feed, |
124
|
|
|
$count, |
125
|
|
|
$total, |
126
|
|
|
$max |
127
|
|
|
); |
128
|
|
|
|
129
|
|
|
if (!$voter->vote($count, $total, $max, $message)) { |
130
|
|
|
$this->eventDispatcher->dispatch( |
131
|
|
|
IoEvents::FEED_CLEANUP_HALT, |
132
|
|
|
new FeedCleanupHaltEvent($feed, $count, $total, $max) |
|
|
|
|
133
|
|
|
); |
134
|
|
|
|
135
|
|
|
return false; |
136
|
|
|
} |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
$this->logger->debug( |
140
|
|
|
sprintf('Cleaning %d sources for %s', $count, $feed) |
141
|
|
|
); |
142
|
|
|
|
143
|
|
|
$builder = $sourceRepo->queryByFeedAndUnvisitedSince($feed, $expireDate); |
144
|
|
|
$numCleaned = $cleaner->cleanByQuery($builder->getQuery()); |
145
|
|
|
|
146
|
|
|
$this->eventDispatcher->dispatch(IoEvents::POST_CLEAN_FEED, new FeedCleanupEvent($feed, $numCleaned)); |
|
|
|
|
147
|
|
|
|
148
|
|
|
return $numCleaned; |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
/** |
152
|
|
|
* Returns the last date after which the given feed has had a full import. |
153
|
|
|
* |
154
|
|
|
* @param Feed $feed |
155
|
|
|
* |
156
|
|
|
* @return \DateTime |
157
|
|
|
*/ |
158
|
|
|
public function getLastFullImportDate(Feed $feed) |
159
|
|
|
{ |
160
|
|
|
// we can only have a full import when the feed is not partial |
161
|
|
|
if ($feed->isPartial()) { |
162
|
|
|
return null; |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
$imports = $this->getImportRepository()->findCompletedByFeed($feed); |
166
|
|
|
|
167
|
|
|
// find the import dates for this feed, but only non-partial imports |
168
|
|
|
$dates = []; |
169
|
|
|
foreach ($imports as $import) { |
170
|
|
|
// don't count imports with errors |
171
|
|
|
if ($import->hasErrors()) { |
172
|
|
|
continue; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
// don't count partial imports |
176
|
|
|
if ($import->isPartial()) { |
177
|
|
|
continue; |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
// imports without any items are excluded also |
181
|
|
|
if ($import->getTotalNumberOfItems() === 0) { |
182
|
|
|
continue; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
$dates[] = $import->getDatetimeStarted(); |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
// if we have no date for this feed, we can't consider it to be fully imported |
189
|
|
|
if (empty($dates)) { |
190
|
|
|
return null; |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
// return the latest of the dates |
194
|
|
|
return max($dates); |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
/** |
198
|
|
|
* Calculates maximum number of cleanups that may take place. |
199
|
|
|
* |
200
|
|
|
* @param int $total |
201
|
|
|
* @param int $factor |
202
|
|
|
* |
203
|
|
|
* @return float |
204
|
|
|
* |
205
|
|
|
* @see http://math.stackexchange.com/a/398263/78794 |
206
|
|
|
*/ |
207
|
|
|
protected function getThreshold($total, $factor = 6) |
208
|
|
|
{ |
209
|
|
|
$ratio = (3 * pow(100, 1 / $factor) - 3) / ((17 * pow($total, 1 / $factor)) + (3 * pow(100, 1 / $factor)) - 20); |
210
|
|
|
|
211
|
|
|
return ceil($total * $ratio); |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
/** |
215
|
|
|
* @return ImportRepository |
216
|
|
|
*/ |
217
|
|
|
protected function getImportRepository() |
218
|
|
|
{ |
219
|
|
|
return $this->doctrine->getRepository('TreeHouseIoBundle:Import'); |
220
|
|
|
} |
221
|
|
|
} |
222
|
|
|
|
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.
If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.
In this case you can add the
@ignore
PhpDoc annotation to the duplicate definition and it will be ignored.