Total Complexity | 44 |
Total Lines | 349 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like CrawlController often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use CrawlController, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
14 | class CrawlController |
||
15 | { |
||
16 | protected $crawler; |
||
17 | protected $crawler_id; |
||
18 | protected $override_fail_status = false; |
||
19 | protected $logging = false; |
||
20 | protected $logs = []; |
||
21 | |||
22 | |||
23 | public function __construct() |
||
24 | { |
||
25 | $this->startLogging(); |
||
26 | } |
||
27 | public function __destruct() |
||
28 | { |
||
29 | $this->stopLogging(); |
||
30 | } |
||
31 | |||
32 | public function overrideFailStatus(bool $state) { |
||
33 | $this->log('Setup overrideFailStatus to: '.$state); |
||
34 | |||
35 | $this->override_fail_status = $state; |
||
36 | } |
||
37 | |||
38 | /** |
||
39 | * Get the latest crawler data from the database |
||
40 | */ |
||
41 | protected function getCrawler() |
||
42 | { |
||
43 | if (empty($this->crawler) || $this->crawler->id != $this->crawler_id) { |
||
44 | $this->log('Loading new crawler data'); |
||
45 | $this->crawler = Crawlers::findOrFail($this->crawler_id); |
||
46 | $this->log('Loaded new crawler data'); |
||
47 | } else { |
||
48 | $this->log('Refreshing crawler data'); |
||
49 | $this->crawler = $this->crawler->fresh(); |
||
50 | $this->log('Refreshed crawler data'); |
||
51 | } |
||
52 | } |
||
53 | |||
54 | /** |
||
55 | * Set the crawler id |
||
56 | * |
||
57 | * @param $crawler_id |
||
58 | */ |
||
59 | public function setCrawlerId($crawler_id) |
||
60 | { |
||
61 | $this->log('Setting crawler_id'); |
||
62 | $this->crawler_id = $crawler_id; |
||
63 | $this->log('Set crawler_id'); |
||
64 | } |
||
65 | |||
66 | /** |
||
67 | * Return the crawler id |
||
68 | * |
||
69 | * @return mixed |
||
70 | */ |
||
71 | public function getCrawlerId() |
||
72 | { |
||
73 | $this->log('Getting crawler_id'); |
||
74 | |||
75 | return $this->crawler_id; |
||
76 | } |
||
77 | |||
78 | /** |
||
79 | * Check if the controller is setup correctly |
||
80 | * |
||
81 | * @return bool |
||
82 | */ |
||
83 | protected function controllerIsSetup() { |
||
84 | $this->log('Check if controllerIsSetup'); |
||
85 | |||
86 | if(!is_null($this->crawler_id)) { |
||
87 | return true; |
||
88 | } |
||
89 | |||
90 | return false; |
||
91 | } |
||
92 | |||
93 | /** |
||
94 | * Setup the crawler so it won't run twice at the same time |
||
95 | * |
||
96 | * @param $crawler_id |
||
97 | */ |
||
98 | public function setupCrawler($crawler_id = null) |
||
99 | { |
||
100 | $this->log('Setup crawler'); |
||
101 | |||
102 | if(!is_null($crawler_id)) { |
||
103 | $this->log('Setup crawler, crawler_id is not set'); |
||
104 | $this->setCrawlerId($crawler_id); |
||
105 | } |
||
106 | |||
107 | if ($this->controllerIsSetup()) { |
||
108 | $times = config('laravel-job-handler.run_times', 10); |
||
109 | |||
110 | for ($x = 0; $x <= $times; $x++) { |
||
111 | //fetch the last data |
||
112 | $this->getCrawler(); |
||
113 | |||
114 | $this->log('Checking if crawler is enabled'); |
||
115 | if (!$this->crawler->enabled) { |
||
116 | $this->log('Crawler is not enabled'); |
||
117 | throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - crawler isnt enabled in database'); |
||
118 | } |
||
119 | |||
120 | $this->log('Checking if crawler can be runned'); |
||
121 | $checkIfCrawlerCanBeRunned = $this->canCrawlerRunAfterPeriod(); |
||
122 | |||
123 | if ($checkIfCrawlerCanBeRunned['status']) { |
||
124 | $this->log('Checked if crawler can runned'); |
||
125 | if (is_null($this->crawler->latest_status)) { |
||
126 | $this->log('Crawler can be runned, it the first time'); |
||
127 | |||
128 | //first time it runs... |
||
129 | break; |
||
130 | } |
||
131 | if ($this->crawler->latest_status == 2) { |
||
132 | $this->log('Crawler can be runned, last crawler runned successfully'); |
||
133 | |||
134 | //Done running... |
||
135 | break; |
||
136 | } |
||
137 | |||
138 | |||
139 | |||
140 | if ($this->crawler->latest_status == 3) { |
||
141 | if($this->override_fail_status) { |
||
142 | $this->log('Last crawler failed, but it is forced to run'); |
||
143 | |||
144 | //override the failed state, this will force to rerun... |
||
145 | break; |
||
146 | } |
||
147 | |||
148 | $this->log('Last crawler failed, force run is not enabled'); |
||
149 | throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - last run had an error and override_fail_status is not enabled'); |
||
150 | } |
||
151 | } else { |
||
152 | $this->log('Crawler needs to wait ('.$checkIfCrawlerCanBeRunned['retry_in'].' seconds) before running again'); |
||
153 | throw new CrawlerNotReachedTimeBetweenJobsException('Has to wait ' . $checkIfCrawlerCanBeRunned['retry_in'] . ' more seconds to run'); |
||
154 | } |
||
155 | |||
156 | if ($x == $times) { |
||
157 | $this->log('Crawler exceeded the max execution time'); |
||
158 | $this->failCrawler('Crawler (#' . $this->crawler_id . ') - max execution time'); |
||
159 | } |
||
160 | |||
161 | if ($this->crawler->status == 1) { |
||
162 | if ($this->crawler->multiple_crawlers) { |
||
163 | $this->log('Crawler can run multiple crawlers at the same time'); |
||
164 | break; |
||
165 | } |
||
166 | |||
167 | $wait = config('laravel-job-handler.retry_in_seconds', 3); |
||
168 | |||
169 | $this->log('Waiting for rechecking ('.$wait.' seconds) if crawler can be runned'); |
||
170 | |||
171 | sleep($wait); |
||
172 | } |
||
173 | } |
||
174 | |||
175 | $this->log('All setup, starting crawler'); |
||
176 | $this->startCrawler(); |
||
177 | } else { |
||
178 | throw new CrawlerException('CrawlController is not setup correctly.'); |
||
179 | } |
||
180 | } |
||
181 | /** |
||
182 | * Start the crawler and save it to the database |
||
183 | * |
||
184 | * @param string $output |
||
185 | */ |
||
186 | public function startCrawler($output = '') |
||
187 | { |
||
188 | $this->log('Starting crawler'); |
||
189 | |||
190 | return $this->addStatus(1, $output); //start running |
||
191 | } |
||
192 | /** |
||
193 | * set the crawler as done so other scripts can run |
||
194 | * |
||
195 | * @param string $output |
||
196 | */ |
||
197 | public function doneCrawler($output = '') |
||
198 | { |
||
199 | $this->log('Crawler done'); |
||
200 | |||
201 | return $this->addStatus(2, $output); //done running |
||
202 | } |
||
203 | |||
204 | /** |
||
205 | * Finishing the crawler |
||
206 | * |
||
207 | * @param string $output |
||
208 | * @return bool |
||
209 | */ |
||
210 | public function finish($output = '') |
||
211 | { |
||
212 | $this->log('Finishing crawler'); |
||
213 | |||
214 | return $this->doneCrawler($output); |
||
215 | } |
||
216 | /** |
||
217 | * crawler failed... |
||
218 | * |
||
219 | * @param string $output |
||
220 | */ |
||
221 | public function failCrawler($output = '') |
||
222 | { |
||
223 | $this->log('Crawler failed'); |
||
224 | |||
225 | $this->addStatus(3, $output); //failed |
||
226 | |||
227 | throw new CrawlerException($output.' - status 3'); |
||
228 | } |
||
229 | /** |
||
230 | * Save the latest crawler status to the database |
||
231 | * |
||
232 | * @param $status |
||
233 | * @param string $output |
||
234 | * @return bool |
||
235 | */ |
||
236 | protected function addStatus($status, $output = '') |
||
273 | } |
||
274 | } |
||
275 | protected function saveLog($crawlerstatus_id) { |
||
292 | } |
||
293 | } |
||
294 | |||
295 | /** |
||
296 | * This will define when the job can be runned again |
||
297 | * |
||
298 | * @return array |
||
299 | */ |
||
300 | public function canCrawlerRunAfterPeriod() |
||
301 | { |
||
302 | $this->getCrawler(); |
||
303 | |||
304 | if (is_null($this->crawler->time_between)) { |
||
305 | $this->log('Not time_between specified'); |
||
306 | |||
307 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
308 | } else { |
||
309 | $seconds = $this->crawler->time_between; |
||
310 | } |
||
311 | |||
312 | if (!is_null($this->crawler->last_runned_at)) { |
||
313 | if ($this->crawler->last_runned_at <= Carbon::now()->subSeconds($seconds)) { |
||
314 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
315 | } |
||
316 | |||
317 | return $this->canCrawlerRunAfterPeriodStatus(false, Carbon::parse($this->crawler->last_runned_at)->diffInSeconds(Carbon::now()->subSeconds($seconds))); |
||
318 | } else { |
||
319 | //crawler never runned, so it can run now |
||
320 | return $this->canCrawlerRunAfterPeriodStatus(true); |
||
321 | } |
||
322 | } |
||
323 | |||
324 | /** |
||
325 | * Return the status for canCrawlerRunAfterPeriod method |
||
326 | * |
||
327 | * @param $status |
||
328 | * @param int $retry_in |
||
329 | * @return array |
||
330 | */ |
||
331 | public function canCrawlerRunAfterPeriodStatus($status, $retry_in = 0) |
||
332 | { |
||
333 | return [ |
||
334 | 'status' => $status, |
||
335 | 'retry_in' => $retry_in |
||
336 | ]; |
||
337 | } |
||
338 | |||
339 | |||
340 | |||
341 | protected function startLogging() |
||
342 | { |
||
343 | $this->logging = true; |
||
344 | $this->log('Started logging'); |
||
345 | } |
||
346 | protected function stopLogging($crawlerstatus_id = null) |
||
347 | { |
||
348 | $this->log('Stop logging'); |
||
349 | $this->logging = false; |
||
350 | |||
351 | if(!is_null($crawlerstatus_id)) { |
||
352 | $this->saveLog($crawlerstatus_id); |
||
353 | } |
||
354 | } |
||
355 | protected function log($item = '') |
||
363 | } |
||
364 | } |
||
365 | } |
||
366 |