Passed
Push — master ( 534ea0...bf2ec8 )
by Robin
03:24
created

CrawlController::log()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
1
<?php
2
3
namespace Famdirksen\LaravelJobHandler\Http\Controllers;
4
5
use Carbon\Carbon;
6
use Famdirksen\LaravelJobHandler\Exceptions\CrawlerException;
7
use Famdirksen\LaravelJobHandler\Exceptions\CrawlerNotReachedTimeBetweenJobsException;
8
use Famdirksen\LaravelJobHandler\Exceptions\CrawlerSaveException;
9
use Famdirksen\LaravelJobHandler\Models\Crawlers;
10
use Famdirksen\LaravelJobHandler\Models\CrawlerStatus;
11
use Famdirksen\LaravelJobHandler\Models\CrawlerStatusLogs;
12
use Illuminate\Support\Facades\Log;
13
14
class CrawlController
15
{
16
    protected $crawler;
17
    protected $crawler_id;
18
    protected $override_fail_status = false;
19
    protected $logging = false;
20
    protected $logs = [];
21
22
23
    public function __construct()
24
    {
25
        $this->startLogging();
26
    }
27
    public function __destruct()
28
    {
29
        $this->stopLogging();
30
    }
31
32
    public function overrideFailStatus(bool $state) {
33
        $this->log('Setup overrideFailStatus to: '.$state);
34
35
        $this->override_fail_status = $state;
36
    }
37
38
    /**
39
     * Get the latest crawler data from the database
40
     */
41
    protected function getCrawler()
42
    {
43
        if (empty($this->crawler) || $this->crawler->id != $this->crawler_id) {
44
            $this->log('Loading new crawler data');
45
            $this->crawler = Crawlers::findOrFail($this->crawler_id);
46
            $this->log('Loaded new crawler data');
47
        } else {
48
            $this->log('Refreshing crawler data');
49
            $this->crawler = $this->crawler->fresh();
50
            $this->log('Refreshed crawler data');
51
        }
52
    }
53
54
    /**
55
     * Set the crawler id
56
     *
57
     * @param $crawler_id
58
     */
59
    public function setCrawlerId($crawler_id)
60
    {
61
        $this->log('Setting crawler_id');
62
        $this->crawler_id = $crawler_id;
63
        $this->log('Set crawler_id');
64
    }
65
66
    /**
67
     * Return the crawler id
68
     *
69
     * @return mixed
70
     */
71
    public function getCrawlerId()
72
    {
73
        $this->log('Getting crawler_id');
74
75
        return $this->crawler_id;
76
    }
77
78
    /**
79
     * Check if the controller is setup correctly
80
     *
81
     * @return bool
82
     */
83
    protected function controllerIsSetup() {
84
        $this->log('Check if controllerIsSetup');
85
86
        if(!is_null($this->crawler_id)) {
87
            return true;
88
        }
89
90
        return false;
91
    }
92
93
    /**
94
     * Setup the crawler so it won't run twice at the same time
95
     *
96
     * @param $crawler_id
97
     */
98
    public function setupCrawler($crawler_id = null)
99
    {
100
        $this->log('Setup crawler');
101
102
        if(!is_null($crawler_id)) {
103
            $this->log('Setup crawler, crawler_id is not set');
104
            $this->setCrawlerId($crawler_id);
105
        }
106
107
        if ($this->controllerIsSetup()) {
108
            $times = config('laravel-job-handler.run_times', 10);
109
110
            for ($x = 0; $x <= $times; $x++) {
111
                //fetch the last data
112
                $this->getCrawler();
113
114
                $this->log('Checking if crawler is enabled');
115
                if (!$this->crawler->enabled) {
116
                    $this->log('Crawler is not enabled');
117
                    throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - crawler isnt enabled in database');
118
                }
119
120
                $this->log('Checking if crawler can be runned');
121
                $checkIfCrawlerCanBeRunned = $this->canCrawlerRunAfterPeriod();
122
123
                if ($checkIfCrawlerCanBeRunned['status']) {
124
                    $this->log('Checked if crawler can runned');
125
                    if (is_null($this->crawler->latest_status)) {
126
                        $this->log('Crawler can be runned, it the first time');
127
128
                        //first time it runs...
129
                        break;
130
                    }
131
                    if ($this->crawler->latest_status == 2) {
132
                        $this->log('Crawler can be runned, last crawler runned successfully');
133
134
                        //Done running...
135
                        break;
136
                    }
137
138
139
140
                    if ($this->crawler->latest_status == 3) {
141
                        if($this->override_fail_status) {
142
                            $this->log('Last crawler failed, but it is forced to run');
143
144
                            //override the failed state, this will force to rerun...
145
                            break;
146
                        }
147
148
                        $this->log('Last crawler failed, force run is not enabled');
149
                        throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - last run had an error and override_fail_status is not enabled');
150
                    }
151
                } else {
152
                    $this->log('Crawler needs to wait ('.$checkIfCrawlerCanBeRunned['retry_in'].' seconds) before running again');
153
                    throw new CrawlerNotReachedTimeBetweenJobsException('Has to wait ' . $checkIfCrawlerCanBeRunned['retry_in'] . ' more seconds to run');
154
                }
155
156
                if ($x == $times) {
157
                    $this->log('Crawler exceeded the max execution time');
158
                    $this->failCrawler('Crawler (#' . $this->crawler_id . ') - max execution time');
159
                }
160
161
                if ($this->crawler->status == 1) {
162
                    if ($this->crawler->multiple_crawlers) {
163
                        $this->log('Crawler can run multiple crawlers at the same time');
164
                        break;
165
                    }
166
167
                    $wait = config('laravel-job-handler.retry_in_seconds', 3);
168
169
                    $this->log('Waiting for rechecking ('.$wait.' seconds) if crawler can be runned');
170
171
                    sleep($wait);
172
                }
173
            }
174
175
            $this->log('All setup, starting crawler');
176
            $this->startCrawler();
177
        } else {
178
            throw new CrawlerException('CrawlController is not setup correctly.');
179
        }
180
    }
181
    /**
182
     * Start the crawler and save it to the database
183
     *
184
     * @param string $output
185
     */
186
    public function startCrawler($output = '')
187
    {
188
        $this->log('Starting crawler');
189
190
        return $this->addStatus(1, $output); //start running
191
    }
192
    /**
193
     * set the crawler as done so other scripts can run
194
     *
195
     * @param string $output
196
     */
197
    public function doneCrawler($output = '')
198
    {
199
        $this->log('Crawler done');
200
201
        return $this->addStatus(2, $output); //done running
202
    }
203
204
    /**
205
     * Finishing the crawler
206
     *
207
     * @param string $output
208
     * @return bool
209
     */
210
    public function finish($output = '')
211
    {
212
        $this->log('Finishing crawler');
213
214
        return $this->doneCrawler($output);
215
    }
216
    /**
217
     * crawler failed...
218
     *
219
     * @param string $output
220
     */
221
    public function failCrawler($output = '')
222
    {
223
        $this->log('Crawler failed');
224
225
        $this->addStatus(3, $output); //failed
226
227
        throw new CrawlerException($output.' - status 3');
228
    }
229
    /**
230
     * Save the latest crawler status to the database
231
     *
232
     * @param $status
233
     * @param string $output
234
     * @return bool
235
     */
236
    protected function addStatus($status, $output = '')
237
    {
238
        $this->log('Registering status ('.$status.')');
239
240
        $crawlerstatus = new CrawlerStatus();
241
242
        $crawlerstatus->crawler_id = $this->crawler_id;
243
        $crawlerstatus->status = $status;
244
245
        if ($crawlerstatus->save()) {
246
            $this->log('Registered status ('.$status.')');
247
            $this->log('Setting crawler latest status ('.$status.') attribute');
248
249
            $this->crawler->latest_status = $status;
250
251
            $this->crawler->save();
252
            $this->log('Set crawler latest status ('.$status.') attribute');
253
254
255
            if (!empty($output)) {
256
                $formatted_logs[] = [
0 ignored issues
show
Comprehensibility Best Practice introduced by
$formatted_logs was never initialized. Although not strictly required by PHP, it is generally a good practice to add $formatted_logs = array(); before regardless.
Loading history...
257
                    'status_id' => $crawlerstatus->id,
258
                    'output' => $output
259
                ];
260
261
                CrawlerStatusLogs::insert($formatted_logs);
262
            }
263
264
            if($status == 2) {
265
                $this->stopLogging($crawlerstatus->id);
266
            }
267
268
            $this->getCrawler();
269
270
            return true;
271
        } else {
272
            throw new CrawlerSaveException('Cannot save crawlerstatus to database...');
273
        }
274
    }
275
    protected function saveLog($crawlerstatus_id) {
276
        $formatted_logs = [];
277
278
        foreach($this->logs as $log) {
279
            $formatted_logs[] = [
280
                'status_id' => $crawlerstatus_id,
281
                'output' => $log
282
            ];
283
        }
284
        if(count($formatted_logs) > 0) {
285
            $this->log('Registering crawler logs');
286
287
            CrawlerStatusLogs::insert($formatted_logs);
288
289
            $this->log('Registered crawler logs (count: ' . count($formatted_logs) . ')');
290
        } else {
291
            $this->log('Log output is not set, skipping inserting');
292
        }
293
    }
294
295
    /**
296
     * This will define when the job can be runned again
297
     *
298
     * @return array
299
     */
300
    public function canCrawlerRunAfterPeriod()
301
    {
302
        $this->getCrawler();
303
304
        if (is_null($this->crawler->time_between)) {
305
            $this->log('Not time_between specified');
306
307
            return $this->canCrawlerRunAfterPeriodStatus(true);
308
        } else {
309
            $seconds = $this->crawler->time_between;
310
        }
311
312
        if (!is_null($this->crawler->last_runned_at)) {
313
            if ($this->crawler->last_runned_at <= Carbon::now()->subSeconds($seconds)) {
314
                return $this->canCrawlerRunAfterPeriodStatus(true);
315
            }
316
317
            return $this->canCrawlerRunAfterPeriodStatus(false, Carbon::parse($this->crawler->last_runned_at)->diffInSeconds(Carbon::now()->subSeconds($seconds)));
318
        } else {
319
            //crawler never runned, so it can run now
320
            return $this->canCrawlerRunAfterPeriodStatus(true);
321
        }
322
    }
323
324
    /**
325
     * Return the status for canCrawlerRunAfterPeriod method
326
     *
327
     * @param $status
328
     * @param int $retry_in
329
     * @return array
330
     */
331
    public function canCrawlerRunAfterPeriodStatus($status, $retry_in = 0)
332
    {
333
        return [
334
            'status' => $status,
335
            'retry_in' => $retry_in
336
        ];
337
    }
338
339
340
341
    protected function startLogging()
342
    {
343
        $this->logging = true;
344
        $this->log('Started logging');
345
    }
346
    protected function stopLogging($crawlerstatus_id = null)
347
    {
348
        $this->log('Stop logging');
349
        $this->logging = false;
350
351
        if(!is_null($crawlerstatus_id)) {
352
            $this->saveLog($crawlerstatus_id);
353
        }
354
    }
355
    protected function log($item = '')
356
    {
357
        if($this->logging)
358
        {
359
            $log = $item.' (crawler_id: '.$this->crawler_id.')';
360
361
            $this->logs[] = $log;
362
            Log::info($log);
363
        }
364
    }
365
}
366