| Total Complexity | 44 |
| Total Lines | 349 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like CrawlController often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use CrawlController, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | class CrawlController |
||
| 15 | { |
||
| 16 | protected $crawler; |
||
| 17 | protected $crawler_id; |
||
| 18 | protected $override_fail_status = false; |
||
| 19 | protected $logging = false; |
||
| 20 | protected $logs = []; |
||
| 21 | |||
| 22 | |||
| 23 | public function __construct() |
||
| 24 | { |
||
| 25 | $this->startLogging(); |
||
| 26 | } |
||
| 27 | public function __destruct() |
||
| 28 | { |
||
| 29 | $this->stopLogging(); |
||
| 30 | } |
||
| 31 | |||
| 32 | public function overrideFailStatus(bool $state) { |
||
| 33 | $this->log('Setup overrideFailStatus to: '.$state); |
||
| 34 | |||
| 35 | $this->override_fail_status = $state; |
||
| 36 | } |
||
| 37 | |||
| 38 | /** |
||
| 39 | * Get the latest crawler data from the database |
||
| 40 | */ |
||
| 41 | protected function getCrawler() |
||
| 42 | { |
||
| 43 | if (empty($this->crawler) || $this->crawler->id != $this->crawler_id) { |
||
| 44 | $this->log('Loading new crawler data'); |
||
| 45 | $this->crawler = Crawlers::findOrFail($this->crawler_id); |
||
| 46 | $this->log('Loaded new crawler data'); |
||
| 47 | } else { |
||
| 48 | $this->log('Refreshing crawler data'); |
||
| 49 | $this->crawler = $this->crawler->fresh(); |
||
| 50 | $this->log('Refreshed crawler data'); |
||
| 51 | } |
||
| 52 | } |
||
| 53 | |||
| 54 | /** |
||
| 55 | * Set the crawler id |
||
| 56 | * |
||
| 57 | * @param $crawler_id |
||
| 58 | */ |
||
| 59 | public function setCrawlerId($crawler_id) |
||
| 60 | { |
||
| 61 | $this->log('Setting crawler_id'); |
||
| 62 | $this->crawler_id = $crawler_id; |
||
| 63 | $this->log('Set crawler_id'); |
||
| 64 | } |
||
| 65 | |||
| 66 | /** |
||
| 67 | * Return the crawler id |
||
| 68 | * |
||
| 69 | * @return mixed |
||
| 70 | */ |
||
| 71 | public function getCrawlerId() |
||
| 72 | { |
||
| 73 | $this->log('Getting crawler_id'); |
||
| 74 | |||
| 75 | return $this->crawler_id; |
||
| 76 | } |
||
| 77 | |||
| 78 | /** |
||
| 79 | * Check if the controller is setup correctly |
||
| 80 | * |
||
| 81 | * @return bool |
||
| 82 | */ |
||
| 83 | protected function controllerIsSetup() { |
||
| 84 | $this->log('Check if controllerIsSetup'); |
||
| 85 | |||
| 86 | if(!is_null($this->crawler_id)) { |
||
| 87 | return true; |
||
| 88 | } |
||
| 89 | |||
| 90 | return false; |
||
| 91 | } |
||
| 92 | |||
| 93 | /** |
||
| 94 | * Setup the crawler so it won't run twice at the same time |
||
| 95 | * |
||
| 96 | * @param $crawler_id |
||
| 97 | */ |
||
| 98 | public function setupCrawler($crawler_id = null) |
||
| 99 | { |
||
| 100 | $this->log('Setup crawler'); |
||
| 101 | |||
| 102 | if(!is_null($crawler_id)) { |
||
| 103 | $this->log('Setup crawler, crawler_id is not set'); |
||
| 104 | $this->setCrawlerId($crawler_id); |
||
| 105 | } |
||
| 106 | |||
| 107 | if ($this->controllerIsSetup()) { |
||
| 108 | $times = config('laravel-job-handler.run_times', 10); |
||
| 109 | |||
| 110 | for ($x = 0; $x <= $times; $x++) { |
||
| 111 | //fetch the last data |
||
| 112 | $this->getCrawler(); |
||
| 113 | |||
| 114 | $this->log('Checking if crawler is enabled'); |
||
| 115 | if (!$this->crawler->enabled) { |
||
| 116 | $this->log('Crawler is not enabled'); |
||
| 117 | throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - crawler isnt enabled in database'); |
||
| 118 | } |
||
| 119 | |||
| 120 | $this->log('Checking if crawler can be runned'); |
||
| 121 | $checkIfCrawlerCanBeRunned = $this->canCrawlerRunAfterPeriod(); |
||
| 122 | |||
| 123 | if ($checkIfCrawlerCanBeRunned['status']) { |
||
| 124 | $this->log('Checked if crawler can runned'); |
||
| 125 | if (is_null($this->crawler->latest_status)) { |
||
| 126 | $this->log('Crawler can be runned, it the first time'); |
||
| 127 | |||
| 128 | //first time it runs... |
||
| 129 | break; |
||
| 130 | } |
||
| 131 | if ($this->crawler->latest_status == 2) { |
||
| 132 | $this->log('Crawler can be runned, last crawler runned successfully'); |
||
| 133 | |||
| 134 | //Done running... |
||
| 135 | break; |
||
| 136 | } |
||
| 137 | |||
| 138 | |||
| 139 | |||
| 140 | if ($this->crawler->latest_status == 3) { |
||
| 141 | if($this->override_fail_status) { |
||
| 142 | $this->log('Last crawler failed, but it is forced to run'); |
||
| 143 | |||
| 144 | //override the failed state, this will force to rerun... |
||
| 145 | break; |
||
| 146 | } |
||
| 147 | |||
| 148 | $this->log('Last crawler failed, force run is not enabled'); |
||
| 149 | throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - last run had an error and override_fail_status is not enabled'); |
||
| 150 | } |
||
| 151 | } else { |
||
| 152 | $this->log('Crawler needs to wait ('.$checkIfCrawlerCanBeRunned['retry_in'].' seconds) before running again'); |
||
| 153 | throw new CrawlerNotReachedTimeBetweenJobsException('Has to wait ' . $checkIfCrawlerCanBeRunned['retry_in'] . ' more seconds to run'); |
||
| 154 | } |
||
| 155 | |||
| 156 | if ($x == $times) { |
||
| 157 | $this->log('Crawler exceeded the max execution time'); |
||
| 158 | $this->failCrawler('Crawler (#' . $this->crawler_id . ') - max execution time'); |
||
| 159 | } |
||
| 160 | |||
| 161 | if ($this->crawler->status == 1) { |
||
| 162 | if ($this->crawler->multiple_crawlers) { |
||
| 163 | $this->log('Crawler can run multiple crawlers at the same time'); |
||
| 164 | break; |
||
| 165 | } |
||
| 166 | |||
| 167 | $wait = config('laravel-job-handler.retry_in_seconds', 3); |
||
| 168 | |||
| 169 | $this->log('Waiting for rechecking ('.$wait.' seconds) if crawler can be runned'); |
||
| 170 | |||
| 171 | sleep($wait); |
||
| 172 | } |
||
| 173 | } |
||
| 174 | |||
| 175 | $this->log('All setup, starting crawler'); |
||
| 176 | $this->startCrawler(); |
||
| 177 | } else { |
||
| 178 | throw new CrawlerException('CrawlController is not setup correctly.'); |
||
| 179 | } |
||
| 180 | } |
||
| 181 | /** |
||
| 182 | * Start the crawler and save it to the database |
||
| 183 | * |
||
| 184 | * @param string $output |
||
| 185 | */ |
||
| 186 | public function startCrawler($output = '') |
||
| 187 | { |
||
| 188 | $this->log('Starting crawler'); |
||
| 189 | |||
| 190 | return $this->addStatus(1, $output); //start running |
||
| 191 | } |
||
| 192 | /** |
||
| 193 | * set the crawler as done so other scripts can run |
||
| 194 | * |
||
| 195 | * @param string $output |
||
| 196 | */ |
||
| 197 | public function doneCrawler($output = '') |
||
| 198 | { |
||
| 199 | $this->log('Crawler done'); |
||
| 200 | |||
| 201 | return $this->addStatus(2, $output); //done running |
||
| 202 | } |
||
| 203 | |||
| 204 | /** |
||
| 205 | * Finishing the crawler |
||
| 206 | * |
||
| 207 | * @param string $output |
||
| 208 | * @return bool |
||
| 209 | */ |
||
| 210 | public function finish($output = '') |
||
| 211 | { |
||
| 212 | $this->log('Finishing crawler'); |
||
| 213 | |||
| 214 | return $this->doneCrawler($output); |
||
| 215 | } |
||
| 216 | /** |
||
| 217 | * crawler failed... |
||
| 218 | * |
||
| 219 | * @param string $output |
||
| 220 | */ |
||
| 221 | public function failCrawler($output = '') |
||
| 222 | { |
||
| 223 | $this->log('Crawler failed'); |
||
| 224 | |||
| 225 | $this->addStatus(3, $output); //failed |
||
| 226 | |||
| 227 | throw new CrawlerException($output.' - status 3'); |
||
| 228 | } |
||
| 229 | /** |
||
| 230 | * Save the latest crawler status to the database |
||
| 231 | * |
||
| 232 | * @param $status |
||
| 233 | * @param string $output |
||
| 234 | * @return bool |
||
| 235 | */ |
||
| 236 | protected function addStatus($status, $output = '') |
||
| 273 | } |
||
| 274 | } |
||
| 275 | protected function saveLog($crawlerstatus_id) { |
||
| 292 | } |
||
| 293 | } |
||
| 294 | |||
| 295 | /** |
||
| 296 | * This will define when the job can be runned again |
||
| 297 | * |
||
| 298 | * @return array |
||
| 299 | */ |
||
| 300 | public function canCrawlerRunAfterPeriod() |
||
| 301 | { |
||
| 302 | $this->getCrawler(); |
||
| 303 | |||
| 304 | if (is_null($this->crawler->time_between)) { |
||
| 305 | $this->log('Not time_between specified'); |
||
| 306 | |||
| 307 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
| 308 | } else { |
||
| 309 | $seconds = $this->crawler->time_between; |
||
| 310 | } |
||
| 311 | |||
| 312 | if (!is_null($this->crawler->last_runned_at)) { |
||
| 313 | if ($this->crawler->last_runned_at <= Carbon::now()->subSeconds($seconds)) { |
||
| 314 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
| 315 | } |
||
| 316 | |||
| 317 | return $this->canCrawlerRunAfterPeriodStatus(false, Carbon::parse($this->crawler->last_runned_at)->diffInSeconds(Carbon::now()->subSeconds($seconds))); |
||
| 318 | } else { |
||
| 319 | //crawler never runned, so it can run now |
||
| 320 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
| 321 | } |
||
| 322 | } |
||
| 323 | |||
| 324 | /** |
||
| 325 | * Return the status for canCrawlerRunAfterPeriod method |
||
| 326 | * |
||
| 327 | * @param $status |
||
| 328 | * @param int $retry_in |
||
| 329 | * @return array |
||
| 330 | */ |
||
| 331 | public function canCrawlerRunAfterPeriodStatus($status, $retry_in = 0) |
||
| 332 | { |
||
| 333 | return [ |
||
| 334 | 'status' => $status, |
||
| 335 | 'retry_in' => $retry_in |
||
| 336 | ]; |
||
| 337 | } |
||
| 338 | |||
| 339 | |||
| 340 | |||
| 341 | protected function startLogging() |
||
| 342 | { |
||
| 343 | $this->logging = true; |
||
| 344 | $this->log('Started logging'); |
||
| 345 | } |
||
| 346 | protected function stopLogging($crawlerstatus_id = null) |
||
| 347 | { |
||
| 348 | $this->log('Stop logging'); |
||
| 349 | $this->logging = false; |
||
| 350 | |||
| 351 | if(!is_null($crawlerstatus_id)) { |
||
| 352 | $this->saveLog($crawlerstatus_id); |
||
| 353 | } |
||
| 354 | } |
||
| 355 | protected function log($item = '') |
||
| 363 | } |
||
| 364 | } |
||
| 365 | } |
||
| 366 |