CrawlController::stopLogging()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
1
<?php
2
3
namespace Famdirksen\LaravelJobHandler\Http\Controllers;
4
5
use Carbon\Carbon;
6
use Famdirksen\LaravelJobHandler\Exceptions\CrawlerException;
7
use Famdirksen\LaravelJobHandler\Exceptions\CrawlerNotReachedTimeBetweenJobsException;
8
use Famdirksen\LaravelJobHandler\Exceptions\CrawlerSaveException;
9
use Famdirksen\LaravelJobHandler\Models\Crawlers;
10
use Famdirksen\LaravelJobHandler\Models\CrawlerStatus;
11
use Famdirksen\LaravelJobHandler\Models\CrawlerStatusLogs;
12
use Illuminate\Support\Facades\Log;
13
14
class CrawlController
15
{
16
    protected $crawler;
17
    protected $crawler_id;
18
    protected $override_fail_status = false;
19
    protected $logging = false;
20
    protected $logs = [];
21
22
23
    public function __construct()
24
    {
25
        $this->startLogging();
26
    }
27
    public function __destruct()
28
    {
29
        $this->stopLogging();
30
    }
31
32
    public function overrideFailStatus(bool $state)
33
    {
34
        $this->log('Setup overrideFailStatus to: '.$state);
35
36
        $this->override_fail_status = $state;
37
    }
38
39
    /**
40
     * Get the latest crawler data from the database
41
     */
42
    protected function getCrawler()
43
    {
44
        if (empty($this->crawler) || $this->crawler->id != $this->crawler_id) {
45
            $this->log('Loading new crawler data');
46
            $this->crawler = Crawlers::findOrFail($this->crawler_id);
47
            $this->log('Loaded new crawler data');
48
        } else {
49
            $this->log('Refreshing crawler data');
50
            $this->crawler = $this->crawler->fresh();
51
            $this->log('Refreshed crawler data');
52
        }
53
    }
54
55
    /**
56
     * Set the crawler id
57
     *
58
     * @param $crawler_id
59
     */
60
    public function setCrawlerId($crawler_id)
61
    {
62
        $this->log('Setting crawler_id');
63
        $this->crawler_id = $crawler_id;
64
        $this->log('Set crawler_id');
65
    }
66
67
    /**
68
     * Return the crawler id
69
     *
70
     * @return mixed
71
     */
72
    public function getCrawlerId()
73
    {
74
        $this->log('Getting crawler_id');
75
76
        return $this->crawler_id;
77
    }
78
79
    /**
80
     * Check if the controller is setup correctly
81
     *
82
     * @return bool
83
     */
84
    protected function controllerIsSetup()
85
    {
86
        $this->log('Check if controllerIsSetup');
87
88
        if (!is_null($this->crawler_id)) {
89
            return true;
90
        }
91
92
        return false;
93
    }
94
95
    /**
96
     * Setup the crawler so it won't run twice at the same time
97
     *
98
     * @param $crawler_id
99
     */
100
    public function setupCrawler($crawler_id = null)
101
    {
102
        $this->log('Setup crawler');
103
104
        if (!is_null($crawler_id)) {
105
            $this->log('Setup crawler, crawler_id is not set');
106
            $this->setCrawlerId($crawler_id);
107
        }
108
109
        if ($this->controllerIsSetup()) {
110
            $times = config('laravel-job-handler.run_times', 10);
111
112
            for ($x = 0; $x <= $times; $x++) {
113
                //fetch the last data
114
                $this->getCrawler();
115
116
                $this->log('Checking if crawler is enabled');
117
                if (!$this->crawler->enabled) {
118
                    $this->log('Crawler is not enabled');
119
                    throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - crawler isnt enabled in database');
120
                }
121
122
                $this->log('Checking if crawler can be runned');
123
                $checkIfCrawlerCanBeRunned = $this->canCrawlerRunAfterPeriod();
124
125
                if ($checkIfCrawlerCanBeRunned['status']) {
126
                    $this->log('Checked if crawler can runned');
127
                    if (is_null($this->crawler->latest_status)) {
128
                        $this->log('Crawler can be runned, it the first time');
129
130
                        //first time it runs...
131
                        break;
132
                    }
133
                    if ($this->crawler->latest_status == 2) {
134
                        $this->log('Crawler can be runned, last crawler runned successfully');
135
136
                        //Done running...
137
                        break;
138
                    }
139
140
141
142
                    if ($this->crawler->latest_status == 3) {
143
                        if ($this->override_fail_status) {
144
                            $this->log('Last crawler failed, but it is forced to run');
145
146
                            //override the failed state, this will force to rerun...
147
                            break;
148
                        }
149
150
                        $this->log('Last crawler failed, force run is not enabled');
151
                        throw new CrawlerException('Crawler (#' . $this->crawler_id . ') - last run had an error and override_fail_status is not enabled');
152
                    }
153
                } else {
154
                    $this->log('Crawler needs to wait ('.$checkIfCrawlerCanBeRunned['retry_in'].' seconds) before running again');
155
                    throw new CrawlerNotReachedTimeBetweenJobsException('Has to wait ' . $checkIfCrawlerCanBeRunned['retry_in'] . ' more seconds to run');
156
                }
157
158
                if ($x == $times && !$this->override_fail_status) {
159
                    $this->log('Crawler exceeded the max execution time');
160
                    $this->failCrawler('Crawler (#' . $this->crawler_id . ') - max execution time');
161
                }
162
163
                if ($this->crawler->status == 1) {
164
                    if ($this->crawler->multiple_crawlers) {
165
                        $this->log('Crawler can run multiple crawlers at the same time');
166
                        break;
167
                    }
168
169
                    $wait = config('laravel-job-handler.retry_in_seconds', 3);
170
171
                    $this->log('Waiting for rechecking ('.$wait.' seconds) if crawler can be runned');
172
173
                    sleep($wait);
174
                }
175
            }
176
177
            $this->log('All setup, starting crawler');
178
            $this->startCrawler();
179
        } else {
180
            throw new CrawlerException('CrawlController is not setup correctly.');
181
        }
182
    }
183
    /**
184
     * Start the crawler and save it to the database
185
     *
186
     * @param string $output
187
     */
188
    public function startCrawler($output = '')
189
    {
190
        $this->log('Starting crawler');
191
192
        return $this->addStatus(1, $output); //start running
193
    }
194
    /**
195
     * set the crawler as done so other scripts can run
196
     *
197
     * @param string $output
198
     */
199
    public function doneCrawler($output = '')
200
    {
201
        $this->log('Crawler done');
202
203
        return $this->addStatus(2, $output); //done running
204
    }
205
206
    /**
207
     * Finishing the crawler
208
     *
209
     * @param string $output
210
     * @return bool
211
     */
212
    public function finish($output = '')
213
    {
214
        $this->log('Finishing crawler');
215
216
        return $this->doneCrawler($output);
217
    }
218
    /**
219
     * crawler failed...
220
     *
221
     * @param string $output
222
     */
223
    public function failCrawler($output = '')
224
    {
225
        $this->log('Crawler failed');
226
227
        $this->addStatus(3, $output); //failed
228
229
        throw new CrawlerException($output.' - status 3');
230
    }
231
    /**
232
     * Save the latest crawler status to the database
233
     *
234
     * @param $status
235
     * @param string $output
236
     * @return bool
237
     */
238
    protected function addStatus($status, $output = '')
239
    {
240
        $this->log('Registering status ('.$status.')');
241
242
        $crawlerstatus = new CrawlerStatus();
243
244
        $crawlerstatus->crawler_id = $this->crawler_id;
245
        $crawlerstatus->status = $status;
246
247
        if ($crawlerstatus->save()) {
248
            $this->log('Registered status ('.$status.')');
249
250
            if ($this->crawler) {
251
                $this->log('Setting crawler latest status (' . $status . ') attribute');
252
253
                $this->crawler->latest_status = $status;
254
255
                $this->crawler->save();
256
                $this->log('Set crawler latest status (' . $status . ') attribute');
257
            }
258
259
            if (!empty($output)) {
260
                $formatted_logs[] = [
0 ignored issues
show
Comprehensibility Best Practice introduced by
$formatted_logs was never initialized. Although not strictly required by PHP, it is generally a good practice to add $formatted_logs = array(); before regardless.
Loading history...
261
                    'status_id' => $crawlerstatus->id,
262
                    'output' => $output,
263
                    'created_at' => Carbon::now(),
264
                    'updated_at' => Carbon::now(),
265
                ];
266
267
                CrawlerStatusLogs::insert($formatted_logs);
268
            }
269
270
            if ($status == 2) {
271
                $this->stopLogging($crawlerstatus->id);
272
            }
273
274
            $this->getCrawler();
275
276
            return true;
277
        } else {
278
            throw new CrawlerSaveException('Cannot save crawlerstatus to database...');
279
        }
280
    }
281
    protected function saveLog($crawlerstatus_id)
282
    {
283
        $formatted_logs = [];
284
285
        foreach ($this->logs as $log) {
286
            $formatted_logs[] = [
287
                'status_id' => $crawlerstatus_id,
288
                'output' => $log,
289
                'created_at' => Carbon::now(),
290
                'updated_at' => Carbon::now(),
291
            ];
292
        }
293
        if (count($formatted_logs) > 0) {
294
            $this->log('Registering crawler logs');
295
296
            CrawlerStatusLogs::insert($formatted_logs);
297
298
            $this->log('Registered crawler logs (count: ' . count($formatted_logs) . ')');
299
        } else {
300
            $this->log('Log output is not set, skipping inserting');
301
        }
302
    }
303
304
    /**
305
     * This will define when the job can be runned again
306
     *
307
     * @return array
308
     */
309
    public function canCrawlerRunAfterPeriod()
310
    {
311
        $this->getCrawler();
312
313
        if (is_null($this->crawler->time_between)) {
314
            $this->log('Not a time_between specified');
315
316
            return $this->canCrawlerRunAfterPeriodStatus(true);
317
        } else {
318
            $seconds = $this->crawler->time_between;
319
        }
320
321
        if (!is_null($this->crawler->last_runned_at)) {
322
            if ($this->crawler->last_runned_at <= Carbon::now()->subSeconds($seconds)) {
323
                return $this->canCrawlerRunAfterPeriodStatus(true);
324
            }
325
326
            return $this->canCrawlerRunAfterPeriodStatus(false, Carbon::parse($this->crawler->last_runned_at)->diffInSeconds(Carbon::now()->subSeconds($seconds)));
327
        } else {
328
            //crawler never runned, so it can run now
329
            return $this->canCrawlerRunAfterPeriodStatus(true);
330
        }
331
    }
332
333
    /**
334
     * Return the status for canCrawlerRunAfterPeriod method
335
     *
336
     * @param $status
337
     * @param int $retry_in
338
     * @return array
339
     */
340
    public function canCrawlerRunAfterPeriodStatus($status, $retry_in = 0)
341
    {
342
        return [
343
            'status' => $status,
344
            'retry_in' => $retry_in
345
        ];
346
    }
347
348
349
350
    protected function startLogging()
351
    {
352
        $this->logging = true;
353
        $this->log('Started logging');
354
    }
355
    protected function stopLogging($crawlerstatus_id = null)
356
    {
357
        $this->log('Stop logging');
358
        $this->logging = false;
359
360
        if (!is_null($crawlerstatus_id)) {
361
            $this->saveLog($crawlerstatus_id);
362
        }
363
    }
364
    protected function log($item = '')
365
    {
366
        if ($this->logging) {
367
            $log = $item.' (crawler_id: '.$this->crawler_id.')';
368
369
            $this->logs[] = $log;
370
            Log::debug($log);
371
        }
372
    }
373
}
374