1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace TreeHouse\IoBundle\Source\Manager; |
4
|
|
|
|
5
|
|
|
use TreeHouse\IoBundle\Entity\Feed; |
6
|
|
|
use TreeHouse\IoBundle\Entity\Scraper; |
7
|
|
|
use TreeHouse\IoBundle\Model\SourceInterface; |
8
|
|
|
use TreeHouse\IoBundle\Source\SourceManagerInterface; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Wrapper around the main source manger implementation. This sourcemanager is used |
12
|
|
|
* during imports and has one main purpose besides the regular implementation: to |
13
|
|
|
* cache sources and touch the visited timestamp of a source when it's searched. |
14
|
|
|
* |
15
|
|
|
* This is necessary because during imports we want to keep track of sources that |
16
|
|
|
* we encounter, but not necessarily handle or modify. |
17
|
|
|
*/ |
18
|
|
View Code Duplication |
class ImportSourceManager implements SourceManagerInterface |
|
|
|
|
19
|
|
|
{ |
20
|
|
|
/** |
21
|
|
|
* @var SourceManagerInterface |
22
|
|
|
*/ |
23
|
|
|
protected $sourceManager; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var array |
27
|
|
|
*/ |
28
|
|
|
protected $sources = []; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @var array |
32
|
|
|
*/ |
33
|
|
|
protected $originSources = []; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @param SourceManagerInterface $sourceManager |
37
|
|
|
*/ |
38
|
|
|
public function __construct(SourceManagerInterface $sourceManager) |
39
|
|
|
{ |
40
|
|
|
$this->sourceManager = $sourceManager; |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* @inheritdoc |
45
|
|
|
*/ |
46
|
|
|
public function getRepository() |
47
|
|
|
{ |
48
|
|
|
return $this->sourceManager->getRepository(); |
49
|
|
|
} |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* @inheritdoc |
53
|
|
|
*/ |
54
|
|
|
public function findById($sourceId) |
55
|
|
|
{ |
56
|
|
|
if (null === $source = $this->findCachedById($sourceId)) { |
57
|
|
|
$source = $this->sourceManager->findById($sourceId); |
58
|
|
|
$this->cache($source); |
59
|
|
|
} |
60
|
|
|
|
61
|
|
|
return $source; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
/** |
65
|
|
|
* @inheritdoc |
66
|
|
|
*/ |
67
|
|
|
public function findSourceByFeed(Feed $feed, $originalId) |
68
|
|
|
{ |
69
|
|
|
if (null === $source = $this->findCachedByFeed($feed, $originalId)) { |
70
|
|
|
$source = $this->sourceManager->findSourceByFeed($feed, $originalId); |
71
|
|
|
$this->cache($source); |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
return $source; |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* @inheritdoc |
79
|
|
|
*/ |
80
|
|
|
public function findSourceByScraper(Scraper $scraper, $originalId) |
81
|
|
|
{ |
82
|
|
|
if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { |
83
|
|
|
$source = $this->sourceManager->findSourceByScraper($scraper, $originalId); |
84
|
|
|
$this->cache($source); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
return $source; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* @inheritdoc |
92
|
|
|
*/ |
93
|
|
|
public function findSourceByFeedOrCreate(Feed $feed, $originalId, $originalUrl = null) |
94
|
|
|
{ |
95
|
|
|
if (null === $source = $this->findCachedByFeed($feed, $originalId)) { |
96
|
|
|
$source = $this->sourceManager->findSourceByFeedOrCreate($feed, $originalId, $originalUrl); |
97
|
|
|
$this->cache($source); |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
return $source; |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* @inheritdoc |
105
|
|
|
*/ |
106
|
|
|
public function findSourceByScraperOrCreate(Scraper $scraper, $originalId, $originalUrl) |
107
|
|
|
{ |
108
|
|
|
if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { |
109
|
|
|
$source = $this->sourceManager->findSourceByScraperOrCreate($scraper, $originalId, $originalUrl); |
110
|
|
|
$this->cache($source); |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
return $source; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* @inheritdoc |
118
|
|
|
*/ |
119
|
|
|
public function persist(SourceInterface $source) |
120
|
|
|
{ |
121
|
|
|
$this->sourceManager->persist($source); |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* @inheritdoc |
126
|
|
|
*/ |
127
|
|
|
public function remove(SourceInterface $source) |
128
|
|
|
{ |
129
|
|
|
$this->sourceManager->remove($source); |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
/** |
133
|
|
|
* @inheritdoc |
134
|
|
|
*/ |
135
|
|
|
public function detach(SourceInterface $source) |
136
|
|
|
{ |
137
|
|
|
$this->sourceManager->detach($source); |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* @inheritdoc |
142
|
|
|
*/ |
143
|
|
|
public function flush(SourceInterface $source = null) |
144
|
|
|
{ |
145
|
|
|
$this->sourceManager->flush($source); |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
/** |
149
|
|
|
* @inheritdoc |
150
|
|
|
*/ |
151
|
|
|
public function clear() |
152
|
|
|
{ |
153
|
|
|
$this->sourceManager->clear(); |
154
|
|
|
$this->sources = []; |
155
|
|
|
$this->originSources = []; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* Adds source to the internal cache. |
160
|
|
|
* |
161
|
|
|
* @param SourceInterface $source |
162
|
|
|
*/ |
163
|
|
|
protected function cache(SourceInterface $source = null) |
164
|
|
|
{ |
165
|
|
|
if (null === $source) { |
166
|
|
|
return; |
167
|
|
|
} |
168
|
|
|
|
169
|
|
|
// mark as visited |
170
|
|
|
$source->setDatetimeLastVisited(new \DateTime()); |
171
|
|
|
|
172
|
|
|
// cache by id |
173
|
|
|
if ($source->getId()) { |
174
|
|
|
$this->sources[$source->getId()] = $source; |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
$hash = null; |
178
|
|
|
if (null !== $feed = $source->getFeed()) { |
179
|
|
|
$hash = $this->getFeedHash($feed); |
180
|
|
|
} elseif (null !== $scraper = $source->getScraper()) { |
181
|
|
|
$hash = $this->getScraperHash($scraper); |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
// must have hash and original id |
185
|
|
|
if (!$hash || !$source->getOriginalId()) { |
|
|
|
|
186
|
|
|
return; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
$this->originSources[$hash][$source->getOriginalId()] = $source; |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* @param int $sourceId |
194
|
|
|
* |
195
|
|
|
* @return SourceInterface|null |
196
|
|
|
*/ |
197
|
|
|
protected function findCachedById($sourceId) |
198
|
|
|
{ |
199
|
|
|
if (!array_key_exists($sourceId, $this->sources)) { |
200
|
|
|
return null; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
return $this->sources[$sourceId]; |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
/** |
207
|
|
|
* @param string $hash |
208
|
|
|
* @param string $originalId |
209
|
|
|
* |
210
|
|
|
* @return SourceInterface|null |
211
|
|
|
*/ |
212
|
|
|
protected function findCachedByOrigin($hash, $originalId) |
213
|
|
|
{ |
214
|
|
|
// create origin cache if necessary |
215
|
|
|
if (!isset($this->originSources[$hash])) { |
216
|
|
|
$this->originSources[$hash] = []; |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
// see if we have a cached mapping, return the cached entry |
220
|
|
|
if (!array_key_exists($originalId, $this->originSources[$hash])) { |
221
|
|
|
return null; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
return $this->originSources[$hash][$originalId]; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
/** |
228
|
|
|
* @param Feed $feed |
229
|
|
|
* @param string $originalId |
230
|
|
|
* |
231
|
|
|
* @return SourceInterface|null |
232
|
|
|
*/ |
233
|
|
|
protected function findCachedByFeed(Feed $feed, $originalId) |
234
|
|
|
{ |
235
|
|
|
$hash = $this->getFeedHash($feed); |
236
|
|
|
|
237
|
|
|
return $this->findCachedByOrigin($hash, $originalId); |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
/** |
241
|
|
|
* @param Scraper $scraper |
242
|
|
|
* @param string $originalId |
243
|
|
|
* |
244
|
|
|
* @return SourceInterface|null |
245
|
|
|
*/ |
246
|
|
|
protected function findCachedByScraper(Scraper $scraper, $originalId) |
247
|
|
|
{ |
248
|
|
|
$hash = $this->getScraperHash($scraper); |
249
|
|
|
|
250
|
|
|
return $this->findCachedByOrigin($hash, $originalId); |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
/** |
254
|
|
|
* Returns a unique hash for a feed. |
255
|
|
|
* |
256
|
|
|
* @param Feed $feed |
257
|
|
|
* |
258
|
|
|
* @return string |
259
|
|
|
*/ |
260
|
|
|
protected function getFeedHash(Feed $feed) |
261
|
|
|
{ |
262
|
|
|
return md5('feed' . $feed->getId()); |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
/** |
266
|
|
|
* Returns a unique hash for a scraper. |
267
|
|
|
* |
268
|
|
|
* @param Scraper $scraper |
269
|
|
|
* |
270
|
|
|
* @return string |
271
|
|
|
*/ |
272
|
|
|
protected function getScraperHash(Scraper $scraper) |
273
|
|
|
{ |
274
|
|
|
return md5('scraper' . $scraper->getId()); |
275
|
|
|
} |
276
|
|
|
} |
277
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.