| Total Complexity | 46 |
| Total Lines | 357 |
| Duplicated Lines | 0 % |
| Changes | 5 | ||
| Bugs | 2 | Features | 0 |
Complex classes like CrawlController often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use CrawlController, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | class CrawlController |
||
| 15 | { |
||
| 16 | protected $crawler; |
||
| 17 | protected $crawler_id; |
||
| 18 | protected $override_fail_status = false; |
||
| 19 | protected $logging = false; |
||
| 20 | protected $logs = []; |
||
| 21 | |||
| 22 | |||
| 23 | public function __construct() |
||
| 26 | } |
||
| 27 | public function __destruct() |
||
| 30 | } |
||
| 31 | |||
| 32 | public function overrideFailStatus(bool $state) |
||
| 33 | { |
||
| 34 | $this->log('Setup overrideFailStatus to: '.$state); |
||
| 35 | |||
| 36 | $this->override_fail_status = $state; |
||
| 37 | } |
||
| 38 | |||
| 39 | /** |
||
| 40 | * Get the latest crawler data from the database |
||
| 41 | */ |
||
| 42 | protected function getCrawler() |
||
| 52 | } |
||
| 53 | } |
||
| 54 | |||
| 55 | /** |
||
| 56 | * Set the crawler id |
||
| 57 | * |
||
| 58 | * @param $crawler_id |
||
| 59 | */ |
||
| 60 | public function setCrawlerId($crawler_id) |
||
| 61 | { |
||
| 62 | $this->log('Setting crawler_id'); |
||
| 63 | $this->crawler_id = $crawler_id; |
||
| 64 | $this->log('Set crawler_id'); |
||
| 65 | } |
||
| 66 | |||
| 67 | /** |
||
| 68 | * Return the crawler id |
||
| 69 | * |
||
| 70 | * @return mixed |
||
| 71 | */ |
||
| 72 | public function getCrawlerId() |
||
| 73 | { |
||
| 74 | $this->log('Getting crawler_id'); |
||
| 75 | |||
| 76 | return $this->crawler_id; |
||
| 77 | } |
||
| 78 | |||
| 79 | /** |
||
| 80 | * Check if the controller is setup correctly |
||
| 81 | * |
||
| 82 | * @return bool |
||
| 83 | */ |
||
| 84 | protected function controllerIsSetup() |
||
| 85 | { |
||
| 86 | $this->log('Check if controllerIsSetup'); |
||
| 87 | |||
| 88 | if (!is_null($this->crawler_id)) { |
||
| 89 | return true; |
||
| 90 | } |
||
| 91 | |||
| 92 | return false; |
||
| 93 | } |
||
| 94 | |||
| 95 | /** |
||
| 96 | * Setup the crawler so it won't run twice at the same time |
||
| 97 | * |
||
| 98 | * @param $crawler_id |
||
| 99 | */ |
||
| 100 | public function setupCrawler($crawler_id = null) |
||
| 101 | { |
||
| 102 | $this->log('Setup crawler'); |
||
| 103 | |||
| 104 | if (!is_null($crawler_id)) { |
||
| 105 | $this->log('Setup crawler, crawler_id is not set'); |
||
| 106 | $this->setCrawlerId($crawler_id); |
||
| 107 | } |
||
| 108 | |||
| 109 | if ($this->controllerIsSetup()) { |
||
| 110 | $times = config('laravel-job-handler.run_times', 10); |
||
| 111 | |||
| 112 | for ($x = 0; $x <= $times; $x++) { |
||
| 113 | //fetch the last data |
||
| 114 | $this->getCrawler(); |
||
| 115 | |||
| 116 | $this->log('Checking if crawler is enabled'); |
||
| 117 | if (!$this->crawler->enabled) { |
||
| 118 | $this->log('Crawler is not enabled'); |
||
| 119 | throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - crawler isnt enabled in database'); |
||
| 120 | } |
||
| 121 | |||
| 122 | $this->log('Checking if crawler can be runned'); |
||
| 123 | $checkIfCrawlerCanBeRunned = $this->canCrawlerRunAfterPeriod(); |
||
| 124 | |||
| 125 | if ($checkIfCrawlerCanBeRunned['status']) { |
||
| 126 | $this->log('Checked if crawler can runned'); |
||
| 127 | if (is_null($this->crawler->latest_status)) { |
||
| 128 | $this->log('Crawler can be runned, it the first time'); |
||
| 129 | |||
| 130 | //first time it runs... |
||
| 131 | break; |
||
| 132 | } |
||
| 133 | if ($this->crawler->latest_status == 2) { |
||
| 134 | $this->log('Crawler can be runned, last crawler runned successfully'); |
||
| 135 | |||
| 136 | //Done running... |
||
| 137 | break; |
||
| 138 | } |
||
| 139 | |||
| 140 | |||
| 141 | |||
| 142 | if ($this->crawler->latest_status == 3) { |
||
| 143 | if ($this->override_fail_status) { |
||
| 144 | $this->log('Last crawler failed, but it is forced to run'); |
||
| 145 | |||
| 146 | //override the failed state, this will force to rerun... |
||
| 147 | break; |
||
| 148 | } |
||
| 149 | |||
| 150 | $this->log('Last crawler failed, force run is not enabled'); |
||
| 151 | throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - last run had an error and override_fail_status is not enabled'); |
||
| 152 | } |
||
| 153 | } else { |
||
| 154 | $this->log('Crawler needs to wait ('.$checkIfCrawlerCanBeRunned['retry_in'].' seconds) before running again'); |
||
| 155 | throw new CrawlerNotReachedTimeBetweenJobsException('Has to wait ' . $checkIfCrawlerCanBeRunned['retry_in'] . ' more seconds to run'); |
||
| 156 | } |
||
| 157 | |||
| 158 | if ($x == $times && !$this->override_fail_status) { |
||
| 159 | $this->log('Crawler exceeded the max execution time'); |
||
| 160 | $this->failCrawler('Crawler (#' . $this->crawler_id . ') - max execution time'); |
||
| 161 | } |
||
| 162 | |||
| 163 | if ($this->crawler->status == 1) { |
||
| 164 | if ($this->crawler->multiple_crawlers) { |
||
| 165 | $this->log('Crawler can run multiple crawlers at the same time'); |
||
| 166 | break; |
||
| 167 | } |
||
| 168 | |||
| 169 | $wait = config('laravel-job-handler.retry_in_seconds', 3); |
||
| 170 | |||
| 171 | $this->log('Waiting for rechecking ('.$wait.' seconds) if crawler can be runned'); |
||
| 172 | |||
| 173 | sleep($wait); |
||
| 174 | } |
||
| 175 | } |
||
| 176 | |||
| 177 | $this->log('All setup, starting crawler'); |
||
| 178 | $this->startCrawler(); |
||
| 179 | } else { |
||
| 180 | throw new CrawlerException('CrawlController is not setup correctly.'); |
||
| 181 | } |
||
| 182 | } |
||
| 183 | /** |
||
| 184 | * Start the crawler and save it to the database |
||
| 185 | * |
||
| 186 | * @param string $output |
||
| 187 | */ |
||
| 188 | public function startCrawler($output = '') |
||
| 189 | { |
||
| 190 | $this->log('Starting crawler'); |
||
| 191 | |||
| 192 | return $this->addStatus(1, $output); //start running |
||
| 193 | } |
||
| 194 | /** |
||
| 195 | * set the crawler as done so other scripts can run |
||
| 196 | * |
||
| 197 | * @param string $output |
||
| 198 | */ |
||
| 199 | public function doneCrawler($output = '') |
||
| 200 | { |
||
| 201 | $this->log('Crawler done'); |
||
| 202 | |||
| 203 | return $this->addStatus(2, $output); //done running |
||
| 204 | } |
||
| 205 | |||
| 206 | /** |
||
| 207 | * Finishing the crawler |
||
| 208 | * |
||
| 209 | * @param string $output |
||
| 210 | * @return bool |
||
| 211 | */ |
||
| 212 | public function finish($output = '') |
||
| 213 | { |
||
| 214 | $this->log('Finishing crawler'); |
||
| 215 | |||
| 216 | return $this->doneCrawler($output); |
||
| 217 | } |
||
| 218 | /** |
||
| 219 | * crawler failed... |
||
| 220 | * |
||
| 221 | * @param string $output |
||
| 222 | */ |
||
| 223 | public function failCrawler($output = '') |
||
| 230 | } |
||
| 231 | /** |
||
| 232 | * Save the latest crawler status to the database |
||
| 233 | * |
||
| 234 | * @param $status |
||
| 235 | * @param string $output |
||
| 236 | * @return bool |
||
| 237 | */ |
||
| 238 | protected function addStatus($status, $output = '') |
||
| 239 | { |
||
| 240 | $this->log('Registering status ('.$status.')'); |
||
| 241 | |||
| 242 | $crawlerstatus = new CrawlerStatus(); |
||
| 243 | |||
| 244 | $crawlerstatus->crawler_id = $this->crawler_id; |
||
| 245 | $crawlerstatus->status = $status; |
||
| 246 | |||
| 247 | if ($crawlerstatus->save()) { |
||
| 248 | $this->log('Registered status ('.$status.')'); |
||
| 249 | |||
| 250 | if ($this->crawler) { |
||
| 251 | $this->log('Setting crawler latest status (' . $status . ') attribute'); |
||
| 252 | |||
| 253 | $this->crawler->latest_status = $status; |
||
| 254 | |||
| 255 | $this->crawler->save(); |
||
| 256 | $this->log('Set crawler latest status (' . $status . ') attribute'); |
||
| 257 | } |
||
| 258 | |||
| 259 | if (!empty($output)) { |
||
| 260 | $formatted_logs[] = [ |
||
|
|
|||
| 261 | 'status_id' => $crawlerstatus->id, |
||
| 262 | 'output' => $output, |
||
| 263 | 'created_at' => Carbon::now(), |
||
| 264 | 'updated_at' => Carbon::now(), |
||
| 265 | ]; |
||
| 266 | |||
| 267 | CrawlerStatusLogs::insert($formatted_logs); |
||
| 268 | } |
||
| 269 | |||
| 270 | if ($status == 2) { |
||
| 271 | $this->stopLogging($crawlerstatus->id); |
||
| 272 | } |
||
| 273 | |||
| 274 | $this->getCrawler(); |
||
| 275 | |||
| 276 | return true; |
||
| 277 | } else { |
||
| 278 | throw new CrawlerSaveException('Cannot save crawlerstatus to database...'); |
||
| 279 | } |
||
| 280 | } |
||
| 281 | protected function saveLog($crawlerstatus_id) |
||
| 301 | } |
||
| 302 | } |
||
| 303 | |||
| 304 | /** |
||
| 305 | * This will define when the job can be runned again |
||
| 306 | * |
||
| 307 | * @return array |
||
| 308 | */ |
||
| 309 | public function canCrawlerRunAfterPeriod() |
||
| 310 | { |
||
| 311 | $this->getCrawler(); |
||
| 312 | |||
| 313 | if (is_null($this->crawler->time_between)) { |
||
| 314 | $this->log('Not a time_between specified'); |
||
| 315 | |||
| 316 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
| 317 | } else { |
||
| 318 | $seconds = $this->crawler->time_between; |
||
| 319 | } |
||
| 320 | |||
| 321 | if (!is_null($this->crawler->last_runned_at)) { |
||
| 322 | if ($this->crawler->last_runned_at <= Carbon::now()->subSeconds($seconds)) { |
||
| 323 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
| 324 | } |
||
| 325 | |||
| 326 | return $this->canCrawlerRunAfterPeriodStatus(false, Carbon::parse($this->crawler->last_runned_at)->diffInSeconds(Carbon::now()->subSeconds($seconds))); |
||
| 327 | } else { |
||
| 328 | //crawler never runned, so it can run now |
||
| 329 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
| 330 | } |
||
| 331 | } |
||
| 332 | |||
| 333 | /** |
||
| 334 | * Return the status for canCrawlerRunAfterPeriod method |
||
| 335 | * |
||
| 336 | * @param $status |
||
| 337 | * @param int $retry_in |
||
| 338 | * @return array |
||
| 339 | */ |
||
| 340 | public function canCrawlerRunAfterPeriodStatus($status, $retry_in = 0) |
||
| 341 | { |
||
| 342 | return [ |
||
| 343 | 'status' => $status, |
||
| 344 | 'retry_in' => $retry_in |
||
| 345 | ]; |
||
| 346 | } |
||
| 347 | |||
| 348 | |||
| 349 | |||
| 350 | protected function startLogging() |
||
| 351 | { |
||
| 352 | $this->logging = true; |
||
| 353 | $this->log('Started logging'); |
||
| 354 | } |
||
| 355 | protected function stopLogging($crawlerstatus_id = null) |
||
| 356 | { |
||
| 357 | $this->log('Stop logging'); |
||
| 358 | $this->logging = false; |
||
| 359 | |||
| 360 | if (!is_null($crawlerstatus_id)) { |
||
| 361 | $this->saveLog($crawlerstatus_id); |
||
| 362 | } |
||
| 363 | } |
||
| 364 | protected function log($item = '') |
||
| 371 | } |
||
| 372 | } |
||
| 373 | } |
||
| 374 |