| @@ 18-276 (lines=259) @@ | ||
| 15 | * This is necessary because during imports/scrapes we want to keep track of sources |
|
| 16 | * that we encounter, but not necessarily handle or modify. |
|
| 17 | */ |
|
| 18 | class CachedSourceManager implements SourceManagerInterface |
|
| 19 | { |
|
| 20 | /** |
|
| 21 | * @var SourceManagerInterface |
|
| 22 | */ |
|
| 23 | protected $sourceManager; |
|
| 24 | ||
| 25 | /** |
|
| 26 | * @var array |
|
| 27 | */ |
|
| 28 | protected $sources = []; |
|
| 29 | ||
| 30 | /** |
|
| 31 | * @var array |
|
| 32 | */ |
|
| 33 | protected $originSources = []; |
|
| 34 | ||
| 35 | /** |
|
| 36 | * @param SourceManagerInterface $sourceManager |
|
| 37 | */ |
|
| 38 | public function __construct(SourceManagerInterface $sourceManager) |
|
| 39 | { |
|
| 40 | $this->sourceManager = $sourceManager; |
|
| 41 | } |
|
| 42 | ||
| 43 | /** |
|
| 44 | * @inheritdoc |
|
| 45 | */ |
|
| 46 | public function getRepository() |
|
| 47 | { |
|
| 48 | return $this->sourceManager->getRepository(); |
|
| 49 | } |
|
| 50 | ||
| 51 | /** |
|
| 52 | * @inheritdoc |
|
| 53 | */ |
|
| 54 | public function findById($sourceId) |
|
| 55 | { |
|
| 56 | if (null === $source = $this->findCachedById($sourceId)) { |
|
| 57 | $source = $this->sourceManager->findById($sourceId); |
|
| 58 | $this->cache($source); |
|
| 59 | } |
|
| 60 | ||
| 61 | return $source; |
|
| 62 | } |
|
| 63 | ||
| 64 | /** |
|
| 65 | * @inheritdoc |
|
| 66 | */ |
|
| 67 | public function findSourceByFeed(Feed $feed, $originalId) |
|
| 68 | { |
|
| 69 | if (null === $source = $this->findCachedByFeed($feed, $originalId)) { |
|
| 70 | $source = $this->sourceManager->findSourceByFeed($feed, $originalId); |
|
| 71 | $this->cache($source); |
|
| 72 | } |
|
| 73 | ||
| 74 | return $source; |
|
| 75 | } |
|
| 76 | ||
| 77 | /** |
|
| 78 | * @inheritdoc |
|
| 79 | */ |
|
| 80 | public function findSourceByScraper(Scraper $scraper, $originalId) |
|
| 81 | { |
|
| 82 | if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { |
|
| 83 | $source = $this->sourceManager->findSourceByScraper($scraper, $originalId); |
|
| 84 | $this->cache($source); |
|
| 85 | } |
|
| 86 | ||
| 87 | return $source; |
|
| 88 | } |
|
| 89 | ||
| 90 | /** |
|
| 91 | * @inheritdoc |
|
| 92 | */ |
|
| 93 | public function findSourceByFeedOrCreate(Feed $feed, $originalId, $originalUrl = null) |
|
| 94 | { |
|
| 95 | if (null === $source = $this->findCachedByFeed($feed, $originalId)) { |
|
| 96 | $source = $this->sourceManager->findSourceByFeedOrCreate($feed, $originalId, $originalUrl); |
|
| 97 | $this->cache($source); |
|
| 98 | } |
|
| 99 | ||
| 100 | return $source; |
|
| 101 | } |
|
| 102 | ||
| 103 | /** |
|
| 104 | * @inheritdoc |
|
| 105 | */ |
|
| 106 | public function findSourceByScraperOrCreate(Scraper $scraper, $originalId, $originalUrl) |
|
| 107 | { |
|
| 108 | if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { |
|
| 109 | $source = $this->sourceManager->findSourceByScraperOrCreate($scraper, $originalId, $originalUrl); |
|
| 110 | $this->cache($source); |
|
| 111 | } |
|
| 112 | ||
| 113 | return $source; |
|
| 114 | } |
|
| 115 | ||
| 116 | /** |
|
| 117 | * @inheritdoc |
|
| 118 | */ |
|
| 119 | public function persist(SourceInterface $source) |
|
| 120 | { |
|
| 121 | $this->sourceManager->persist($source); |
|
| 122 | } |
|
| 123 | ||
| 124 | /** |
|
| 125 | * @inheritdoc |
|
| 126 | */ |
|
| 127 | public function remove(SourceInterface $source) |
|
| 128 | { |
|
| 129 | $this->sourceManager->remove($source); |
|
| 130 | } |
|
| 131 | ||
| 132 | /** |
|
| 133 | * @inheritdoc |
|
| 134 | */ |
|
| 135 | public function detach(SourceInterface $source) |
|
| 136 | { |
|
| 137 | $this->sourceManager->detach($source); |
|
| 138 | } |
|
| 139 | ||
| 140 | /** |
|
| 141 | * @inheritdoc |
|
| 142 | */ |
|
| 143 | public function flush(SourceInterface $source = null) |
|
| 144 | { |
|
| 145 | $this->sourceManager->flush($source); |
|
| 146 | } |
|
| 147 | ||
| 148 | /** |
|
| 149 | * @inheritdoc |
|
| 150 | */ |
|
| 151 | public function clear() |
|
| 152 | { |
|
| 153 | $this->sourceManager->clear(); |
|
| 154 | $this->sources = []; |
|
| 155 | $this->originSources = []; |
|
| 156 | } |
|
| 157 | ||
| 158 | /** |
|
| 159 | * Adds source to the internal cache. |
|
| 160 | * |
|
| 161 | * @param SourceInterface $source |
|
| 162 | */ |
|
| 163 | protected function cache(SourceInterface $source = null) |
|
| 164 | { |
|
| 165 | if (null === $source) { |
|
| 166 | return; |
|
| 167 | } |
|
| 168 | ||
| 169 | // mark as visited |
|
| 170 | $source->setDatetimeLastVisited(new \DateTime()); |
|
| 171 | ||
| 172 | // cache by id |
|
| 173 | if ($source->getId()) { |
|
| 174 | $this->sources[$source->getId()] = $source; |
|
| 175 | } |
|
| 176 | ||
| 177 | $hash = null; |
|
| 178 | if (null !== $feed = $source->getFeed()) { |
|
| 179 | $hash = $this->getFeedHash($feed); |
|
| 180 | } elseif (null !== $scraper = $source->getScraper()) { |
|
| 181 | $hash = $this->getScraperHash($scraper); |
|
| 182 | } |
|
| 183 | ||
| 184 | // must have hash and original id |
|
| 185 | if (!$hash || !$source->getOriginalId()) { |
|
| 186 | return; |
|
| 187 | } |
|
| 188 | ||
| 189 | $this->originSources[$hash][$source->getOriginalId()] = $source; |
|
| 190 | } |
|
| 191 | ||
| 192 | /** |
|
| 193 | * @param int $sourceId |
|
| 194 | * |
|
| 195 | * @return SourceInterface|null |
|
| 196 | */ |
|
| 197 | protected function findCachedById($sourceId) |
|
| 198 | { |
|
| 199 | if (!array_key_exists($sourceId, $this->sources)) { |
|
| 200 | return null; |
|
| 201 | } |
|
| 202 | ||
| 203 | return $this->sources[$sourceId]; |
|
| 204 | } |
|
| 205 | ||
| 206 | /** |
|
| 207 | * @param string $hash |
|
| 208 | * @param string $originalId |
|
| 209 | * |
|
| 210 | * @return SourceInterface|null |
|
| 211 | */ |
|
| 212 | protected function findCachedByOrigin($hash, $originalId) |
|
| 213 | { |
|
| 214 | // create origin cache if necessary |
|
| 215 | if (!isset($this->originSources[$hash])) { |
|
| 216 | $this->originSources[$hash] = []; |
|
| 217 | } |
|
| 218 | ||
| 219 | // see if we have a cached mapping, return the cached entry |
|
| 220 | if (!array_key_exists($originalId, $this->originSources[$hash])) { |
|
| 221 | return null; |
|
| 222 | } |
|
| 223 | ||
| 224 | return $this->originSources[$hash][$originalId]; |
|
| 225 | } |
|
| 226 | ||
| 227 | /** |
|
| 228 | * @param Feed $feed |
|
| 229 | * @param string $originalId |
|
| 230 | * |
|
| 231 | * @return SourceInterface|null |
|
| 232 | */ |
|
| 233 | protected function findCachedByFeed(Feed $feed, $originalId) |
|
| 234 | { |
|
| 235 | $hash = $this->getFeedHash($feed); |
|
| 236 | ||
| 237 | return $this->findCachedByOrigin($hash, $originalId); |
|
| 238 | } |
|
| 239 | ||
| 240 | /** |
|
| 241 | * @param Scraper $scraper |
|
| 242 | * @param string $originalId |
|
| 243 | * |
|
| 244 | * @return SourceInterface|null |
|
| 245 | */ |
|
| 246 | protected function findCachedByScraper(Scraper $scraper, $originalId) |
|
| 247 | { |
|
| 248 | $hash = $this->getScraperHash($scraper); |
|
| 249 | ||
| 250 | return $this->findCachedByOrigin($hash, $originalId); |
|
| 251 | } |
|
| 252 | ||
| 253 | /** |
|
| 254 | * Returns a unique hash for a feed. |
|
| 255 | * |
|
| 256 | * @param Feed $feed |
|
| 257 | * |
|
| 258 | * @return string |
|
| 259 | */ |
|
| 260 | protected function getFeedHash(Feed $feed) |
|
| 261 | { |
|
| 262 | return md5('feed' . $feed->getId()); |
|
| 263 | } |
|
| 264 | ||
| 265 | /** |
|
| 266 | * Returns a unique hash for a scraper. |
|
| 267 | * |
|
| 268 | * @param Scraper $scraper |
|
| 269 | * |
|
| 270 | * @return string |
|
| 271 | */ |
|
| 272 | protected function getScraperHash(Scraper $scraper) |
|
| 273 | { |
|
| 274 | return md5('scraper' . $scraper->getId()); |
|
| 275 | } |
|
| 276 | } |
|
| 277 | ||
| @@ 18-276 (lines=259) @@ | ||
| 15 | * This is necessary because during imports we want to keep track of sources that |
|
| 16 | * we encounter, but not necessarily handle or modify. |
|
| 17 | */ |
|
| 18 | class ImportSourceManager implements SourceManagerInterface |
|
| 19 | { |
|
| 20 | /** |
|
| 21 | * @var SourceManagerInterface |
|
| 22 | */ |
|
| 23 | protected $sourceManager; |
|
| 24 | ||
| 25 | /** |
|
| 26 | * @var array |
|
| 27 | */ |
|
| 28 | protected $sources = []; |
|
| 29 | ||
| 30 | /** |
|
| 31 | * @var array |
|
| 32 | */ |
|
| 33 | protected $originSources = []; |
|
| 34 | ||
| 35 | /** |
|
| 36 | * @param SourceManagerInterface $sourceManager |
|
| 37 | */ |
|
| 38 | public function __construct(SourceManagerInterface $sourceManager) |
|
| 39 | { |
|
| 40 | $this->sourceManager = $sourceManager; |
|
| 41 | } |
|
| 42 | ||
| 43 | /** |
|
| 44 | * @inheritdoc |
|
| 45 | */ |
|
| 46 | public function getRepository() |
|
| 47 | { |
|
| 48 | return $this->sourceManager->getRepository(); |
|
| 49 | } |
|
| 50 | ||
| 51 | /** |
|
| 52 | * @inheritdoc |
|
| 53 | */ |
|
| 54 | public function findById($sourceId) |
|
| 55 | { |
|
| 56 | if (null === $source = $this->findCachedById($sourceId)) { |
|
| 57 | $source = $this->sourceManager->findById($sourceId); |
|
| 58 | $this->cache($source); |
|
| 59 | } |
|
| 60 | ||
| 61 | return $source; |
|
| 62 | } |
|
| 63 | ||
| 64 | /** |
|
| 65 | * @inheritdoc |
|
| 66 | */ |
|
| 67 | public function findSourceByFeed(Feed $feed, $originalId) |
|
| 68 | { |
|
| 69 | if (null === $source = $this->findCachedByFeed($feed, $originalId)) { |
|
| 70 | $source = $this->sourceManager->findSourceByFeed($feed, $originalId); |
|
| 71 | $this->cache($source); |
|
| 72 | } |
|
| 73 | ||
| 74 | return $source; |
|
| 75 | } |
|
| 76 | ||
| 77 | /** |
|
| 78 | * @inheritdoc |
|
| 79 | */ |
|
| 80 | public function findSourceByScraper(Scraper $scraper, $originalId) |
|
| 81 | { |
|
| 82 | if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { |
|
| 83 | $source = $this->sourceManager->findSourceByScraper($scraper, $originalId); |
|
| 84 | $this->cache($source); |
|
| 85 | } |
|
| 86 | ||
| 87 | return $source; |
|
| 88 | } |
|
| 89 | ||
| 90 | /** |
|
| 91 | * @inheritdoc |
|
| 92 | */ |
|
| 93 | public function findSourceByFeedOrCreate(Feed $feed, $originalId, $originalUrl = null) |
|
| 94 | { |
|
| 95 | if (null === $source = $this->findCachedByFeed($feed, $originalId)) { |
|
| 96 | $source = $this->sourceManager->findSourceByFeedOrCreate($feed, $originalId, $originalUrl); |
|
| 97 | $this->cache($source); |
|
| 98 | } |
|
| 99 | ||
| 100 | return $source; |
|
| 101 | } |
|
| 102 | ||
| 103 | /** |
|
| 104 | * @inheritdoc |
|
| 105 | */ |
|
| 106 | public function findSourceByScraperOrCreate(Scraper $scraper, $originalId, $originalUrl) |
|
| 107 | { |
|
| 108 | if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { |
|
| 109 | $source = $this->sourceManager->findSourceByScraperOrCreate($scraper, $originalId, $originalUrl); |
|
| 110 | $this->cache($source); |
|
| 111 | } |
|
| 112 | ||
| 113 | return $source; |
|
| 114 | } |
|
| 115 | ||
| 116 | /** |
|
| 117 | * @inheritdoc |
|
| 118 | */ |
|
| 119 | public function persist(SourceInterface $source) |
|
| 120 | { |
|
| 121 | $this->sourceManager->persist($source); |
|
| 122 | } |
|
| 123 | ||
| 124 | /** |
|
| 125 | * @inheritdoc |
|
| 126 | */ |
|
| 127 | public function remove(SourceInterface $source) |
|
| 128 | { |
|
| 129 | $this->sourceManager->remove($source); |
|
| 130 | } |
|
| 131 | ||
| 132 | /** |
|
| 133 | * @inheritdoc |
|
| 134 | */ |
|
| 135 | public function detach(SourceInterface $source) |
|
| 136 | { |
|
| 137 | $this->sourceManager->detach($source); |
|
| 138 | } |
|
| 139 | ||
| 140 | /** |
|
| 141 | * @inheritdoc |
|
| 142 | */ |
|
| 143 | public function flush(SourceInterface $source = null) |
|
| 144 | { |
|
| 145 | $this->sourceManager->flush($source); |
|
| 146 | } |
|
| 147 | ||
| 148 | /** |
|
| 149 | * @inheritdoc |
|
| 150 | */ |
|
| 151 | public function clear() |
|
| 152 | { |
|
| 153 | $this->sourceManager->clear(); |
|
| 154 | $this->sources = []; |
|
| 155 | $this->originSources = []; |
|
| 156 | } |
|
| 157 | ||
| 158 | /** |
|
| 159 | * Adds source to the internal cache. |
|
| 160 | * |
|
| 161 | * @param SourceInterface $source |
|
| 162 | */ |
|
| 163 | protected function cache(SourceInterface $source = null) |
|
| 164 | { |
|
| 165 | if (null === $source) { |
|
| 166 | return; |
|
| 167 | } |
|
| 168 | ||
| 169 | // mark as visited |
|
| 170 | $source->setDatetimeLastVisited(new \DateTime()); |
|
| 171 | ||
| 172 | // cache by id |
|
| 173 | if ($source->getId()) { |
|
| 174 | $this->sources[$source->getId()] = $source; |
|
| 175 | } |
|
| 176 | ||
| 177 | $hash = null; |
|
| 178 | if (null !== $feed = $source->getFeed()) { |
|
| 179 | $hash = $this->getFeedHash($feed); |
|
| 180 | } elseif (null !== $scraper = $source->getScraper()) { |
|
| 181 | $hash = $this->getScraperHash($scraper); |
|
| 182 | } |
|
| 183 | ||
| 184 | // must have hash and original id |
|
| 185 | if (!$hash || !$source->getOriginalId()) { |
|
| 186 | return; |
|
| 187 | } |
|
| 188 | ||
| 189 | $this->originSources[$hash][$source->getOriginalId()] = $source; |
|
| 190 | } |
|
| 191 | ||
| 192 | /** |
|
| 193 | * @param int $sourceId |
|
| 194 | * |
|
| 195 | * @return SourceInterface|null |
|
| 196 | */ |
|
| 197 | protected function findCachedById($sourceId) |
|
| 198 | { |
|
| 199 | if (!array_key_exists($sourceId, $this->sources)) { |
|
| 200 | return null; |
|
| 201 | } |
|
| 202 | ||
| 203 | return $this->sources[$sourceId]; |
|
| 204 | } |
|
| 205 | ||
| 206 | /** |
|
| 207 | * @param string $hash |
|
| 208 | * @param string $originalId |
|
| 209 | * |
|
| 210 | * @return SourceInterface|null |
|
| 211 | */ |
|
| 212 | protected function findCachedByOrigin($hash, $originalId) |
|
| 213 | { |
|
| 214 | // create origin cache if necessary |
|
| 215 | if (!isset($this->originSources[$hash])) { |
|
| 216 | $this->originSources[$hash] = []; |
|
| 217 | } |
|
| 218 | ||
| 219 | // see if we have a cached mapping, return the cached entry |
|
| 220 | if (!array_key_exists($originalId, $this->originSources[$hash])) { |
|
| 221 | return null; |
|
| 222 | } |
|
| 223 | ||
| 224 | return $this->originSources[$hash][$originalId]; |
|
| 225 | } |
|
| 226 | ||
| 227 | /** |
|
| 228 | * @param Feed $feed |
|
| 229 | * @param string $originalId |
|
| 230 | * |
|
| 231 | * @return SourceInterface|null |
|
| 232 | */ |
|
| 233 | protected function findCachedByFeed(Feed $feed, $originalId) |
|
| 234 | { |
|
| 235 | $hash = $this->getFeedHash($feed); |
|
| 236 | ||
| 237 | return $this->findCachedByOrigin($hash, $originalId); |
|
| 238 | } |
|
| 239 | ||
| 240 | /** |
|
| 241 | * @param Scraper $scraper |
|
| 242 | * @param string $originalId |
|
| 243 | * |
|
| 244 | * @return SourceInterface|null |
|
| 245 | */ |
|
| 246 | protected function findCachedByScraper(Scraper $scraper, $originalId) |
|
| 247 | { |
|
| 248 | $hash = $this->getScraperHash($scraper); |
|
| 249 | ||
| 250 | return $this->findCachedByOrigin($hash, $originalId); |
|
| 251 | } |
|
| 252 | ||
| 253 | /** |
|
| 254 | * Returns a unique hash for a feed. |
|
| 255 | * |
|
| 256 | * @param Feed $feed |
|
| 257 | * |
|
| 258 | * @return string |
|
| 259 | */ |
|
| 260 | protected function getFeedHash(Feed $feed) |
|
| 261 | { |
|
| 262 | return md5('feed' . $feed->getId()); |
|
| 263 | } |
|
| 264 | ||
| 265 | /** |
|
| 266 | * Returns a unique hash for a scraper. |
|
| 267 | * |
|
| 268 | * @param Scraper $scraper |
|
| 269 | * |
|
| 270 | * @return string |
|
| 271 | */ |
|
| 272 | protected function getScraperHash(Scraper $scraper) |
|
| 273 | { |
|
| 274 | return md5('scraper' . $scraper->getId()); |
|
| 275 | } |
|
| 276 | } |
|
| 277 | ||