| @@ 18-276 (lines=259) @@ | ||
| 15 | * This is necessary because during imports/scrapes we want to keep track of sources | |
| 16 | * that we encounter, but not necessarily handle or modify. | |
| 17 | */ | |
| 18 | class CachedSourceManager implements SourceManagerInterface | |
| 19 | { | |
| 20 | /** | |
| 21 | * @var SourceManagerInterface | |
| 22 | */ | |
| 23 | protected $sourceManager; | |
| 24 | ||
| 25 | /** | |
| 26 | * @var array | |
| 27 | */ | |
| 28 | protected $sources = []; | |
| 29 | ||
| 30 | /** | |
| 31 | * @var array | |
| 32 | */ | |
| 33 | protected $originSources = []; | |
| 34 | ||
| 35 | /** | |
| 36 | * @param SourceManagerInterface $sourceManager | |
| 37 | */ | |
| 38 | public function __construct(SourceManagerInterface $sourceManager) | |
| 39 |     { | |
| 40 | $this->sourceManager = $sourceManager; | |
| 41 | } | |
| 42 | ||
| 43 | /** | |
| 44 | * @inheritdoc | |
| 45 | */ | |
| 46 | public function getRepository() | |
| 47 |     { | |
| 48 | return $this->sourceManager->getRepository(); | |
| 49 | } | |
| 50 | ||
| 51 | /** | |
| 52 | * @inheritdoc | |
| 53 | */ | |
| 54 | public function findById($sourceId) | |
| 55 |     { | |
| 56 |         if (null === $source = $this->findCachedById($sourceId)) { | |
| 57 | $source = $this->sourceManager->findById($sourceId); | |
| 58 | $this->cache($source); | |
| 59 | } | |
| 60 | ||
| 61 | return $source; | |
| 62 | } | |
| 63 | ||
| 64 | /** | |
| 65 | * @inheritdoc | |
| 66 | */ | |
| 67 | public function findSourceByFeed(Feed $feed, $originalId) | |
| 68 |     { | |
| 69 |         if (null === $source = $this->findCachedByFeed($feed, $originalId)) { | |
| 70 | $source = $this->sourceManager->findSourceByFeed($feed, $originalId); | |
| 71 | $this->cache($source); | |
| 72 | } | |
| 73 | ||
| 74 | return $source; | |
| 75 | } | |
| 76 | ||
| 77 | /** | |
| 78 | * @inheritdoc | |
| 79 | */ | |
| 80 | public function findSourceByScraper(Scraper $scraper, $originalId) | |
| 81 |     { | |
| 82 |         if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { | |
| 83 | $source = $this->sourceManager->findSourceByScraper($scraper, $originalId); | |
| 84 | $this->cache($source); | |
| 85 | } | |
| 86 | ||
| 87 | return $source; | |
| 88 | } | |
| 89 | ||
| 90 | /** | |
| 91 | * @inheritdoc | |
| 92 | */ | |
| 93 | public function findSourceByFeedOrCreate(Feed $feed, $originalId, $originalUrl = null) | |
| 94 |     { | |
| 95 |         if (null === $source = $this->findCachedByFeed($feed, $originalId)) { | |
| 96 | $source = $this->sourceManager->findSourceByFeedOrCreate($feed, $originalId, $originalUrl); | |
| 97 | $this->cache($source); | |
| 98 | } | |
| 99 | ||
| 100 | return $source; | |
| 101 | } | |
| 102 | ||
| 103 | /** | |
| 104 | * @inheritdoc | |
| 105 | */ | |
| 106 | public function findSourceByScraperOrCreate(Scraper $scraper, $originalId, $originalUrl) | |
| 107 |     { | |
| 108 |         if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { | |
| 109 | $source = $this->sourceManager->findSourceByScraperOrCreate($scraper, $originalId, $originalUrl); | |
| 110 | $this->cache($source); | |
| 111 | } | |
| 112 | ||
| 113 | return $source; | |
| 114 | } | |
| 115 | ||
| 116 | /** | |
| 117 | * @inheritdoc | |
| 118 | */ | |
| 119 | public function persist(SourceInterface $source) | |
| 120 |     { | |
| 121 | $this->sourceManager->persist($source); | |
| 122 | } | |
| 123 | ||
| 124 | /** | |
| 125 | * @inheritdoc | |
| 126 | */ | |
| 127 | public function remove(SourceInterface $source) | |
| 128 |     { | |
| 129 | $this->sourceManager->remove($source); | |
| 130 | } | |
| 131 | ||
| 132 | /** | |
| 133 | * @inheritdoc | |
| 134 | */ | |
| 135 | public function detach(SourceInterface $source) | |
| 136 |     { | |
| 137 | $this->sourceManager->detach($source); | |
| 138 | } | |
| 139 | ||
| 140 | /** | |
| 141 | * @inheritdoc | |
| 142 | */ | |
| 143 | public function flush(SourceInterface $source = null) | |
| 144 |     { | |
| 145 | $this->sourceManager->flush($source); | |
| 146 | } | |
| 147 | ||
| 148 | /** | |
| 149 | * @inheritdoc | |
| 150 | */ | |
| 151 | public function clear() | |
| 152 |     { | |
| 153 | $this->sourceManager->clear(); | |
| 154 | $this->sources = []; | |
| 155 | $this->originSources = []; | |
| 156 | } | |
| 157 | ||
| 158 | /** | |
| 159 | * Adds source to the internal cache. | |
| 160 | * | |
| 161 | * @param SourceInterface $source | |
| 162 | */ | |
| 163 | protected function cache(SourceInterface $source = null) | |
| 164 |     { | |
| 165 |         if (null === $source) { | |
| 166 | return; | |
| 167 | } | |
| 168 | ||
| 169 | // mark as visited | |
| 170 | $source->setDatetimeLastVisited(new \DateTime()); | |
| 171 | ||
| 172 | // cache by id | |
| 173 |         if ($source->getId()) { | |
| 174 | $this->sources[$source->getId()] = $source; | |
| 175 | } | |
| 176 | ||
| 177 | $hash = null; | |
| 178 |         if (null !== $feed = $source->getFeed()) { | |
| 179 | $hash = $this->getFeedHash($feed); | |
| 180 |         } elseif (null !== $scraper = $source->getScraper()) { | |
| 181 | $hash = $this->getScraperHash($scraper); | |
| 182 | } | |
| 183 | ||
| 184 | // must have hash and original id | |
| 185 |         if (!$hash || !$source->getOriginalId()) { | |
| 186 | return; | |
| 187 | } | |
| 188 | ||
| 189 | $this->originSources[$hash][$source->getOriginalId()] = $source; | |
| 190 | } | |
| 191 | ||
| 192 | /** | |
| 193 | * @param int $sourceId | |
| 194 | * | |
| 195 | * @return SourceInterface|null | |
| 196 | */ | |
| 197 | protected function findCachedById($sourceId) | |
| 198 |     { | |
| 199 |         if (!array_key_exists($sourceId, $this->sources)) { | |
| 200 | return null; | |
| 201 | } | |
| 202 | ||
| 203 | return $this->sources[$sourceId]; | |
| 204 | } | |
| 205 | ||
| 206 | /** | |
| 207 | * @param string $hash | |
| 208 | * @param string $originalId | |
| 209 | * | |
| 210 | * @return SourceInterface|null | |
| 211 | */ | |
| 212 | protected function findCachedByOrigin($hash, $originalId) | |
| 213 |     { | |
| 214 | // create origin cache if necessary | |
| 215 |         if (!isset($this->originSources[$hash])) { | |
| 216 | $this->originSources[$hash] = []; | |
| 217 | } | |
| 218 | ||
| 219 | // see if we have a cached mapping, return the cached entry | |
| 220 |         if (!array_key_exists($originalId, $this->originSources[$hash])) { | |
| 221 | return null; | |
| 222 | } | |
| 223 | ||
| 224 | return $this->originSources[$hash][$originalId]; | |
| 225 | } | |
| 226 | ||
| 227 | /** | |
| 228 | * @param Feed $feed | |
| 229 | * @param string $originalId | |
| 230 | * | |
| 231 | * @return SourceInterface|null | |
| 232 | */ | |
| 233 | protected function findCachedByFeed(Feed $feed, $originalId) | |
| 234 |     { | |
| 235 | $hash = $this->getFeedHash($feed); | |
| 236 | ||
| 237 | return $this->findCachedByOrigin($hash, $originalId); | |
| 238 | } | |
| 239 | ||
| 240 | /** | |
| 241 | * @param Scraper $scraper | |
| 242 | * @param string $originalId | |
| 243 | * | |
| 244 | * @return SourceInterface|null | |
| 245 | */ | |
| 246 | protected function findCachedByScraper(Scraper $scraper, $originalId) | |
| 247 |     { | |
| 248 | $hash = $this->getScraperHash($scraper); | |
| 249 | ||
| 250 | return $this->findCachedByOrigin($hash, $originalId); | |
| 251 | } | |
| 252 | ||
| 253 | /** | |
| 254 | * Returns a unique hash for a feed. | |
| 255 | * | |
| 256 | * @param Feed $feed | |
| 257 | * | |
| 258 | * @return string | |
| 259 | */ | |
| 260 | protected function getFeedHash(Feed $feed) | |
| 261 |     { | |
| 262 |         return md5('feed' . $feed->getId()); | |
| 263 | } | |
| 264 | ||
| 265 | /** | |
| 266 | * Returns a unique hash for a scraper. | |
| 267 | * | |
| 268 | * @param Scraper $scraper | |
| 269 | * | |
| 270 | * @return string | |
| 271 | */ | |
| 272 | protected function getScraperHash(Scraper $scraper) | |
| 273 |     { | |
| 274 |         return md5('scraper' . $scraper->getId()); | |
| 275 | } | |
| 276 | } | |
| 277 | ||
| @@ 18-276 (lines=259) @@ | ||
| 15 | * This is necessary because during imports we want to keep track of sources that | |
| 16 | * we encounter, but not necessarily handle or modify. | |
| 17 | */ | |
| 18 | class ImportSourceManager implements SourceManagerInterface | |
| 19 | { | |
| 20 | /** | |
| 21 | * @var SourceManagerInterface | |
| 22 | */ | |
| 23 | protected $sourceManager; | |
| 24 | ||
| 25 | /** | |
| 26 | * @var array | |
| 27 | */ | |
| 28 | protected $sources = []; | |
| 29 | ||
| 30 | /** | |
| 31 | * @var array | |
| 32 | */ | |
| 33 | protected $originSources = []; | |
| 34 | ||
| 35 | /** | |
| 36 | * @param SourceManagerInterface $sourceManager | |
| 37 | */ | |
| 38 | public function __construct(SourceManagerInterface $sourceManager) | |
| 39 |     { | |
| 40 | $this->sourceManager = $sourceManager; | |
| 41 | } | |
| 42 | ||
| 43 | /** | |
| 44 | * @inheritdoc | |
| 45 | */ | |
| 46 | public function getRepository() | |
| 47 |     { | |
| 48 | return $this->sourceManager->getRepository(); | |
| 49 | } | |
| 50 | ||
| 51 | /** | |
| 52 | * @inheritdoc | |
| 53 | */ | |
| 54 | public function findById($sourceId) | |
| 55 |     { | |
| 56 |         if (null === $source = $this->findCachedById($sourceId)) { | |
| 57 | $source = $this->sourceManager->findById($sourceId); | |
| 58 | $this->cache($source); | |
| 59 | } | |
| 60 | ||
| 61 | return $source; | |
| 62 | } | |
| 63 | ||
| 64 | /** | |
| 65 | * @inheritdoc | |
| 66 | */ | |
| 67 | public function findSourceByFeed(Feed $feed, $originalId) | |
| 68 |     { | |
| 69 |         if (null === $source = $this->findCachedByFeed($feed, $originalId)) { | |
| 70 | $source = $this->sourceManager->findSourceByFeed($feed, $originalId); | |
| 71 | $this->cache($source); | |
| 72 | } | |
| 73 | ||
| 74 | return $source; | |
| 75 | } | |
| 76 | ||
| 77 | /** | |
| 78 | * @inheritdoc | |
| 79 | */ | |
| 80 | public function findSourceByScraper(Scraper $scraper, $originalId) | |
| 81 |     { | |
| 82 |         if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { | |
| 83 | $source = $this->sourceManager->findSourceByScraper($scraper, $originalId); | |
| 84 | $this->cache($source); | |
| 85 | } | |
| 86 | ||
| 87 | return $source; | |
| 88 | } | |
| 89 | ||
| 90 | /** | |
| 91 | * @inheritdoc | |
| 92 | */ | |
| 93 | public function findSourceByFeedOrCreate(Feed $feed, $originalId, $originalUrl = null) | |
| 94 |     { | |
| 95 |         if (null === $source = $this->findCachedByFeed($feed, $originalId)) { | |
| 96 | $source = $this->sourceManager->findSourceByFeedOrCreate($feed, $originalId, $originalUrl); | |
| 97 | $this->cache($source); | |
| 98 | } | |
| 99 | ||
| 100 | return $source; | |
| 101 | } | |
| 102 | ||
| 103 | /** | |
| 104 | * @inheritdoc | |
| 105 | */ | |
| 106 | public function findSourceByScraperOrCreate(Scraper $scraper, $originalId, $originalUrl) | |
| 107 |     { | |
| 108 |         if (null === $source = $this->findCachedByScraper($scraper, $originalId)) { | |
| 109 | $source = $this->sourceManager->findSourceByScraperOrCreate($scraper, $originalId, $originalUrl); | |
| 110 | $this->cache($source); | |
| 111 | } | |
| 112 | ||
| 113 | return $source; | |
| 114 | } | |
| 115 | ||
| 116 | /** | |
| 117 | * @inheritdoc | |
| 118 | */ | |
| 119 | public function persist(SourceInterface $source) | |
| 120 |     { | |
| 121 | $this->sourceManager->persist($source); | |
| 122 | } | |
| 123 | ||
| 124 | /** | |
| 125 | * @inheritdoc | |
| 126 | */ | |
| 127 | public function remove(SourceInterface $source) | |
| 128 |     { | |
| 129 | $this->sourceManager->remove($source); | |
| 130 | } | |
| 131 | ||
| 132 | /** | |
| 133 | * @inheritdoc | |
| 134 | */ | |
| 135 | public function detach(SourceInterface $source) | |
| 136 |     { | |
| 137 | $this->sourceManager->detach($source); | |
| 138 | } | |
| 139 | ||
| 140 | /** | |
| 141 | * @inheritdoc | |
| 142 | */ | |
| 143 | public function flush(SourceInterface $source = null) | |
| 144 |     { | |
| 145 | $this->sourceManager->flush($source); | |
| 146 | } | |
| 147 | ||
| 148 | /** | |
| 149 | * @inheritdoc | |
| 150 | */ | |
| 151 | public function clear() | |
| 152 |     { | |
| 153 | $this->sourceManager->clear(); | |
| 154 | $this->sources = []; | |
| 155 | $this->originSources = []; | |
| 156 | } | |
| 157 | ||
| 158 | /** | |
| 159 | * Adds source to the internal cache. | |
| 160 | * | |
| 161 | * @param SourceInterface $source | |
| 162 | */ | |
| 163 | protected function cache(SourceInterface $source = null) | |
| 164 |     { | |
| 165 |         if (null === $source) { | |
| 166 | return; | |
| 167 | } | |
| 168 | ||
| 169 | // mark as visited | |
| 170 | $source->setDatetimeLastVisited(new \DateTime()); | |
| 171 | ||
| 172 | // cache by id | |
| 173 |         if ($source->getId()) { | |
| 174 | $this->sources[$source->getId()] = $source; | |
| 175 | } | |
| 176 | ||
| 177 | $hash = null; | |
| 178 |         if (null !== $feed = $source->getFeed()) { | |
| 179 | $hash = $this->getFeedHash($feed); | |
| 180 |         } elseif (null !== $scraper = $source->getScraper()) { | |
| 181 | $hash = $this->getScraperHash($scraper); | |
| 182 | } | |
| 183 | ||
| 184 | // must have hash and original id | |
| 185 |         if (!$hash || !$source->getOriginalId()) { | |
| 186 | return; | |
| 187 | } | |
| 188 | ||
| 189 | $this->originSources[$hash][$source->getOriginalId()] = $source; | |
| 190 | } | |
| 191 | ||
| 192 | /** | |
| 193 | * @param int $sourceId | |
| 194 | * | |
| 195 | * @return SourceInterface|null | |
| 196 | */ | |
| 197 | protected function findCachedById($sourceId) | |
| 198 |     { | |
| 199 |         if (!array_key_exists($sourceId, $this->sources)) { | |
| 200 | return null; | |
| 201 | } | |
| 202 | ||
| 203 | return $this->sources[$sourceId]; | |
| 204 | } | |
| 205 | ||
| 206 | /** | |
| 207 | * @param string $hash | |
| 208 | * @param string $originalId | |
| 209 | * | |
| 210 | * @return SourceInterface|null | |
| 211 | */ | |
| 212 | protected function findCachedByOrigin($hash, $originalId) | |
| 213 |     { | |
| 214 | // create origin cache if necessary | |
| 215 |         if (!isset($this->originSources[$hash])) { | |
| 216 | $this->originSources[$hash] = []; | |
| 217 | } | |
| 218 | ||
| 219 | // see if we have a cached mapping, return the cached entry | |
| 220 |         if (!array_key_exists($originalId, $this->originSources[$hash])) { | |
| 221 | return null; | |
| 222 | } | |
| 223 | ||
| 224 | return $this->originSources[$hash][$originalId]; | |
| 225 | } | |
| 226 | ||
| 227 | /** | |
| 228 | * @param Feed $feed | |
| 229 | * @param string $originalId | |
| 230 | * | |
| 231 | * @return SourceInterface|null | |
| 232 | */ | |
| 233 | protected function findCachedByFeed(Feed $feed, $originalId) | |
| 234 |     { | |
| 235 | $hash = $this->getFeedHash($feed); | |
| 236 | ||
| 237 | return $this->findCachedByOrigin($hash, $originalId); | |
| 238 | } | |
| 239 | ||
| 240 | /** | |
| 241 | * @param Scraper $scraper | |
| 242 | * @param string $originalId | |
| 243 | * | |
| 244 | * @return SourceInterface|null | |
| 245 | */ | |
| 246 | protected function findCachedByScraper(Scraper $scraper, $originalId) | |
| 247 |     { | |
| 248 | $hash = $this->getScraperHash($scraper); | |
| 249 | ||
| 250 | return $this->findCachedByOrigin($hash, $originalId); | |
| 251 | } | |
| 252 | ||
| 253 | /** | |
| 254 | * Returns a unique hash for a feed. | |
| 255 | * | |
| 256 | * @param Feed $feed | |
| 257 | * | |
| 258 | * @return string | |
| 259 | */ | |
| 260 | protected function getFeedHash(Feed $feed) | |
| 261 |     { | |
| 262 |         return md5('feed' . $feed->getId()); | |
| 263 | } | |
| 264 | ||
| 265 | /** | |
| 266 | * Returns a unique hash for a scraper. | |
| 267 | * | |
| 268 | * @param Scraper $scraper | |
| 269 | * | |
| 270 | * @return string | |
| 271 | */ | |
| 272 | protected function getScraperHash(Scraper $scraper) | |
| 273 |     { | |
| 274 |         return md5('scraper' . $scraper->getId()); | |
| 275 | } | |
| 276 | } | |
| 277 | ||