IRCScraper::_startScraping()   B
last analyzed

Complexity

Conditions 10
Paths 67

Size

Total Lines 59
Code Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 39
dl 0
loc 59
rs 7.6666
c 0
b 0
f 0
cc 10
nc 67
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace App\Services;
4
5
use App\Facades\Search;
6
use App\Models\Predb;
7
use App\Models\UsenetGroup;
8
use Illuminate\Support\Carbon;
9
use Illuminate\Support\Facades\DB;
10
11
/**
12
 * Class IRCScraper.
13
 */
14
class IRCScraper extends IRCClient
15
{
16
    /**
17
     * Regex to ignore categories.
18
     */
19
    protected string|false $_categoryIgnoreRegex = false;
20
21
    /**
22
     * Array of current pre info.
23
     */
24
    protected array $_curPre;
25
26
    /**
27
     * List of groups and their id's.
28
     */
29
    protected array $_groupList;
30
31
    /**
32
     * Array of ignored channels.
33
     */
34
    protected array $_ignoredChannels;
35
36
    /**
37
     * Is this pre nuked or un nuked?
38
     */
39
    protected bool $_nuked;
40
41
    protected $_oldPre;
42
43
    /**
44
     * Run this in silent mode (no text output).
45
     */
46
    protected bool $_silent;
47
48
    /**
49
     * Regex to ignore PRE titles.
50
     */
51
    protected string|false $_titleIgnoreRegex = false;
52
53
54
    /**
55
     * Construct.
56
     *
57
     * @param  bool  $silent  Run this in silent mode (no text output).
58
     * @param  bool  $debug  Turn on debug? Shows sent/received socket buffer messages.
59
     *
60
     * @throws \Exception
61
     */
62
    public function __construct(bool $silent, bool $debug)
63
    {
64
        if (config('irc_settings.scrape_irc_source_ignore')) {
65
            try {
66
                $ignored = unserialize(
67
                    (string) config('irc_settings.scrape_irc_source_ignore'),
68
                    ['allowed_classes' => false]
69
                );
70
                $this->_ignoredChannels = is_array($ignored) ? $ignored : [];
71
            } catch (\ValueError $e) {
72
                $this->_ignoredChannels = [];
73
            }
74
        } else {
75
            $this->_ignoredChannels = [
76
                '#a.b.cd.image' => false,
77
                '#a.b.console.ps3' => false,
78
                '#a.b.dvd' => false,
79
                '#a.b.erotica' => false,
80
                '#a.b.flac' => false,
81
                '#a.b.foreign' => false,
82
                '#a.b.games.nintendods' => false,
83
                '#a.b.inner-sanctum' => false,
84
                '#a.b.moovee' => false,
85
                '#a.b.movies.divx' => false,
86
                '#a.b.sony.psp' => false,
87
                '#a.b.sounds.mp3.complete_cd' => false,
88
                '#a.b.teevee' => false,
89
                '#a.b.games.wii' => false,
90
                '#a.b.warez' => false,
91
                '#a.b.games.xbox360' => false,
92
                '#pre@corrupt' => false,
93
                '#scnzb' => false,
94
                '#tvnzb' => false,
95
                'srrdb' => false,
96
            ];
97
        }
98
99
        if (config('irc_settings.scrape_irc_category_ignore') !== '') {
100
            $this->_categoryIgnoreRegex = (string) config('irc_settings.scrape_irc_category_ignore');
101
        }
102
103
        if (config('irc_settings.scrape_irc_title_ignore') !== '') {
104
            $this->_titleIgnoreRegex = (string) config('irc_settings.scrape_irc_title_ignore');
105
        }
106
107
108
        $this->_groupList = [];
109
        $this->_silent = $silent;
110
        $this->_debug = $debug;
111
        $this->_resetPreVariables();
112
        $this->_startScraping();
113
    }
114
115
    /**
116
     * Main method for scraping.
117
     */
118
    protected function _startScraping(): void
119
    {
120
        // Connect to IRC.
121
        if ($this->connect((string) config('irc_settings.scrape_irc_server'), (int) config('irc_settings.scrape_irc_port'), (bool) config('irc_settings.scrape_irc_tls')) === false) {
122
            exit(
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
123
                'Error connecting to ('.
124
                config('irc_settings.scrape_irc_server').
125
                ':'.
126
                config('irc_settings.scrape_irc_port').
127
                '). Please verify your server information and try again.'.
128
                PHP_EOL
129
            );
130
        }
131
132
        // Normalize password to ?string
133
        $password = config('irc_settings.scrape_irc_password');
134
        $password = ($password === false || $password === '' || $password === null) ? null : (string) $password;
135
136
        // Login to IRC. Note parameter order: nick, user, real, pass.
137
        if ($this->login((string) config('irc_settings.scrape_irc_nickname'), (string) config('irc_settings.scrape_irc_username'), (string) config('irc_settings.scrape_irc_realname'), $password) === false) {
138
            exit(
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
139
                'Error logging in to: ('.
140
                config('irc_settings.scrape_irc_server').':'.config('irc_settings.scrape_irc_port').') nickname: ('.config('irc_settings.scrape_irc_nickname').
141
                '). Verify your connection information, you might also be banned from this server or there might have been a connection issue.'.
142
                PHP_EOL
143
            );
144
        }
145
146
        // Join channels.
147
        $channelsCfg = config('irc_settings.scrape_irc_channels');
148
        if ($channelsCfg) {
149
            try {
150
                $channels = unserialize((string) $channelsCfg, ['allowed_classes' => false]);
151
            } catch (\ValueError $e) {
152
                $channels = ['#PreNNTmux' => null];
153
            }
154
            if (! is_array($channels)) {
155
                $channels = ['#PreNNTmux' => null];
156
            }
157
        } else {
158
            $channels = ['#PreNNTmux' => null];
159
        }
160
        $this->joinChannels($channels);
161
162
        if (! $this->_silent) {
163
            echo '['.
164
                date('r').
165
                '] [Scraping of IRC channels for ('.
166
                config('irc_settings.scrape_irc_server').
167
                ':'.
168
                config('irc_settings.scrape_irc_port').
169
                ') ('.
170
                config('irc_settings.scrape_irc_nickname').
171
                ') started.]'.
172
                PHP_EOL;
173
        }
174
175
        // Scan incoming IRC messages.
176
        $this->readIncoming();
177
    }
178
179
    /**
180
     * Process bot messages, insert/update PREs.
181
     *
182
     * @throws \Exception
183
     */
184
    protected function processChannelMessages(): void
185
    {
186
        if ($this->_debug && ! $this->_silent) {
187
            echo '[DEBUG] Processing message: '.$this->_channelData['message'].PHP_EOL;
188
        }
189
190
        if (preg_match(
191
            '/^(NEW|UPD|NUK): \[DT: (?P<time>.+?)\]\s?\[TT: (?P<title>.+?)\]\s?\[SC: (?P<source>.+?)\]\s?\[CT: (?P<category>.+?)\]\s?\[RQ: (?P<req>.+?)\]'.
192
            '\s?\[SZ: (?P<size>.+?)\]\s?\[FL: (?P<files>.+?)\]\s?(\[FN: (?P<filename>.+?)\]\s?)?(\[(?P<nuked>(UN|MOD|RE|OLD)?NUKED?): (?P<reason>.+?)\])?$/i',
193
            $this->_channelData['message'],
194
            $hits
195
        )) {
196
            if ($this->_debug && ! $this->_silent) {
197
                echo '[DEBUG] Regex matched! Title: '.$hits['title'].' | Source: '.$hits['source'].' | Category: '.$hits['category'].PHP_EOL;
198
            }
199
200
            if (isset($this->_ignoredChannels[$hits['source']]) && $this->_ignoredChannels[$hits['source']] === true) {
201
                if ($this->_debug && ! $this->_silent) {
202
                    echo '[DEBUG] Source '.$hits['source'].' is ignored, skipping...'.PHP_EOL;
203
                }
204
205
                return;
206
            }
207
208
            if ($this->_categoryIgnoreRegex !== false && preg_match((string) $this->_categoryIgnoreRegex, $hits['category'])) {
209
                if ($this->_debug && ! $this->_silent) {
210
                    echo '[DEBUG] Category '.$hits['category'].' is ignored by regex, skipping...'.PHP_EOL;
211
                }
212
213
                return;
214
            }
215
216
            if ($this->_titleIgnoreRegex !== false && preg_match((string) $this->_titleIgnoreRegex, $hits['title'])) {
217
                if ($this->_debug && ! $this->_silent) {
218
                    echo '[DEBUG] Title '.$hits['title'].' is ignored by regex, skipping...'.PHP_EOL;
219
                }
220
221
                return;
222
            }
223
224
            $utime = Carbon::createFromTimeString($hits['time'], 'UTC')->timestamp;
225
226
            $this->_curPre['predate'] = 'FROM_UNIXTIME('.$utime.')';
227
            $this->_curPre['title'] = $hits['title'];
228
            $this->_curPre['source'] = $hits['source'];
229
            if ($hits['category'] !== 'N/A') {
230
                $this->_curPre['category'] = $hits['category'];
231
            }
232
            if ($hits['req'] !== 'N/A' && preg_match('/^(?P<req>\d+):(?P<group>.+)$/i', $hits['req'], $matches2)) {
233
                $this->_curPre['reqid'] = $matches2['req'];
234
                $this->_curPre['group_id'] = $this->_getGroupID($matches2['group']);
235
            }
236
            if ($hits['size'] !== 'N/A') {
237
                $this->_curPre['size'] = $hits['size'];
238
            }
239
            if ($hits['files'] !== 'N/A') {
240
                $this->_curPre['files'] = substr($hits['files'], 0, 50);
241
            }
242
243
            if (isset($hits['filename']) && $hits['filename'] !== 'N/A') {
244
                $this->_curPre['filename'] = $hits['filename'];
245
            }
246
247
            if (isset($hits['nuked'])) {
248
                switch ($hits['nuked']) {
249
                    case 'NUKED':
250
                        $this->_curPre['nuked'] = Predb::PRE_NUKED;
251
                        break;
252
                    case 'UNNUKED':
253
                        $this->_curPre['nuked'] = Predb::PRE_UNNUKED;
254
                        break;
255
                    case 'MODNUKED':
256
                        $this->_curPre['nuked'] = Predb::PRE_MODNUKE;
257
                        break;
258
                    case 'RENUKED':
259
                        $this->_curPre['nuked'] = Predb::PRE_RENUKED;
260
                        break;
261
                    case 'OLDNUKE':
262
                        $this->_curPre['nuked'] = Predb::PRE_OLDNUKE;
263
                        break;
264
                }
265
                $this->_curPre['reason'] = (isset($hits['reason']) ? substr($hits['reason'], 0, 255) : '');
266
                $this->_nuked = true; // flag for output
267
            }
268
            $this->_checkForDupe();
269
        } else {
270
            if ($this->_debug && ! $this->_silent) {
271
                echo '[DEBUG] Message did not match PRE regex pattern'.PHP_EOL;
272
            }
273
        }
274
    }
275
276
    /**
277
     * Check if we already have the PRE, update if we have it, insert if not.
278
     *
279
     * @throws \Exception
280
     */
281
    protected function _checkForDupe(): void
282
    {
283
        $this->_oldPre = Predb::query()->where('title', $this->_curPre['title'])->select(['category', 'size'])->first();
284
        if ($this->_oldPre === null) {
285
            if ($this->_debug && ! $this->_silent) {
286
                echo '[DEBUG] New PRE found, inserting: '.$this->_curPre['title'].PHP_EOL;
287
            }
288
            $this->_insertNewPre();
289
        } else {
290
            if ($this->_debug && ! $this->_silent) {
291
                echo '[DEBUG] PRE already exists, updating: '.$this->_curPre['title'].PHP_EOL;
292
            }
293
            $this->_updatePre();
294
        }
295
        $this->_resetPreVariables();
296
    }
297
298
    /**
299
     * Insert new PRE into the DB.
300
     *
301
     *
302
     * @throws \Exception
303
     */
304
    protected function _insertNewPre(): void
305
    {
306
        // Check if title is empty first
307
        if (empty($this->_curPre['title'])) {
308
            if ($this->_debug && ! $this->_silent) {
309
                echo '[DEBUG] PRE title is empty, skipping insert'.PHP_EOL;
310
            }
311
312
            return;
313
        }
314
315
        // Double-check database to ensure we don't have stale search index data
316
        $existingPre = Predb::query()->where('title', $this->_curPre['title'])->first();
317
        if ($existingPre !== null) {
318
            if ($this->_debug && ! $this->_silent) {
319
                echo '[DEBUG] PRE already exists in database (ID: '.$existingPre->id.'), skipping insert'.PHP_EOL;
320
            }
321
322
            return;
323
        }
324
325
        if ($this->_debug && ! $this->_silent) {
326
            echo '[DEBUG] PRE not in database, proceeding with insert...'.PHP_EOL;
327
        }
328
329
        $query = 'INSERT INTO predb (';
330
331
        $query .= (! empty($this->_curPre['size']) ? 'size, ' : '');
332
        $query .= (! empty($this->_curPre['category']) ? 'category, ' : '');
333
        $query .= (! empty($this->_curPre['source']) ? 'source, ' : '');
334
        $query .= (! empty($this->_curPre['reason']) ? 'nukereason, ' : '');
335
        $query .= (! empty($this->_curPre['files']) ? 'files, ' : '');
336
        $query .= (! empty($this->_curPre['reqid']) ? 'requestid, ' : '');
337
        $query .= (! empty($this->_curPre['group_id']) ? 'groups_id, ' : '');
338
        $query .= (! empty($this->_curPre['nuked']) ? 'nuked, ' : '');
339
        $query .= (! empty($this->_curPre['filename']) ? 'filename, ' : '');
340
341
        $query .= 'predate, title) VALUES (';
342
343
        $query .= (! empty($this->_curPre['size']) ? escapeString($this->_curPre['size']).', ' : '');
344
        $query .= (! empty($this->_curPre['category']) ? escapeString($this->_curPre['category']).', ' : '');
345
        $query .= (! empty($this->_curPre['source']) ? escapeString($this->_curPre['source']).', ' : '');
346
        $query .= (! empty($this->_curPre['reason']) ? escapeString($this->_curPre['reason']).', ' : '');
347
        $query .= (! empty($this->_curPre['files']) ? escapeString($this->_curPre['files']).', ' : '');
348
        $query .= (! empty($this->_curPre['reqid']) ? $this->_curPre['reqid'].', ' : '');
349
        $query .= (! empty($this->_curPre['group_id']) ? $this->_curPre['group_id'].', ' : '');
350
        $query .= (! empty($this->_curPre['nuked']) ? $this->_curPre['nuked'].', ' : '');
351
        $query .= (! empty($this->_curPre['filename']) ? escapeString($this->_curPre['filename']).', ' : '');
352
        $query .= (! empty($this->_curPre['predate']) ? $this->_curPre['predate'].', ' : 'NOW(), ');
353
354
        $query .= '%s)';
355
356
        if ($this->_debug && ! $this->_silent) {
357
            echo '[DEBUG] Executing SQL: '.substr($query, 0, 100).'...'.PHP_EOL;
358
        }
359
360
        try {
361
            DB::insert(
362
                sprintf(
363
                    $query,
364
                    escapeString($this->_curPre['title'])
365
                )
366
            );
367
368
            $lastId = DB::connection()->getPdo()->lastInsertId();
369
370
            if ($this->_debug && ! $this->_silent) {
371
                echo '[DEBUG] Successfully inserted PRE with ID: '.$lastId.PHP_EOL;
372
            }
373
374
            $parameters = [
375
                'id' => $lastId,
376
                'title' => $this->_curPre['title'],
377
                'filename' => $this->_curPre['filename'] ?? null,
378
                'source' => $this->_curPre['source'] ?? null,
379
            ];
380
381
            Search::insertPredb($parameters);
382
383
            $this->_doEcho(true);
384
        } catch (\Exception $e) {
385
            if ($this->_debug && ! $this->_silent) {
386
                echo '[DEBUG] ERROR inserting PRE: '.$e->getMessage().PHP_EOL;
387
            }
388
        }
389
    }
390
391
    /**
392
     * Updates PRE data in the DB.
393
     *
394
     *
395
     * @throws \Exception
396
     */
397
    protected function _updatePre(): void
398
    {
399
        if (empty($this->_curPre['title'])) {
400
            return;
401
        }
402
403
        $query = 'UPDATE predb SET ';
404
405
        $query .= (! empty($this->_curPre['size']) ? 'size = '.escapeString($this->_curPre['size']).', ' : '');
406
        $query .= (! empty($this->_curPre['source']) ? 'source = '.escapeString($this->_curPre['source']).', ' : '');
407
        $query .= (! empty($this->_curPre['files']) ? 'files = '.escapeString($this->_curPre['files']).', ' : '');
408
        $query .= (! empty($this->_curPre['reason']) ? 'nukereason = '.escapeString($this->_curPre['reason']).', ' : '');
409
        $query .= (! empty($this->_curPre['reqid']) ? 'requestid = '.$this->_curPre['reqid'].', ' : '');
410
        $query .= (! empty($this->_curPre['group_id']) ? 'groups_id = '.$this->_curPre['group_id'].', ' : '');
411
        $query .= (! empty($this->_curPre['predate']) ? 'predate = '.$this->_curPre['predate'].', ' : '');
412
        $query .= (! empty($this->_curPre['nuked']) ? 'nuked = '.$this->_curPre['nuked'].', ' : '');
413
        $query .= (! empty($this->_curPre['filename']) ? 'filename = '.escapeString($this->_curPre['filename']).', ' : '');
414
        $query .= (
415
            (empty($this->_oldPre['category']) && ! empty($this->_curPre['category']))
416
                ? 'category = '.escapeString($this->_curPre['category']).', '
417
                : ''
418
        );
419
420
        if ($query === 'UPDATE predb SET ') {
421
            return;
422
        }
423
424
        $query .= 'title = '.escapeString($this->_curPre['title']);
425
        $query .= ' WHERE title = '.escapeString($this->_curPre['title']);
426
427
        // Execute the update and then fetch the affected row ID by title.
428
        DB::update($query);
429
430
        // Look up the predb row ID by title for indexing backends.
431
        $predbId = Predb::query()->where('title', $this->_curPre['title'])->value('id');
432
433
        if (! empty($predbId)) {
434
            $parameters = [
435
                'id' => $predbId,
436
                'title' => $this->_curPre['title'],
437
                'filename' => $this->_curPre['filename'] ?? null,
438
                'source' => $this->_curPre['source'] ?? null,
439
            ];
440
441
            Search::updatePreDb($parameters);
442
        }
443
444
        $this->_doEcho(false);
445
    }
446
447
    /**
448
     * Echo new or update pre to CLI.
449
     */
450
    protected function _doEcho(bool $new = true): void
451
    {
452
        if (! $this->_silent) {
453
            $nukeString = '';
454
            if ($this->_nuked !== false) {
455
                switch ((int) $this->_curPre['nuked']) {
456
                    case Predb::PRE_NUKED:
457
                        $nukeString = '[ NUKED ] ';
458
                        break;
459
                    case Predb::PRE_UNNUKED:
460
                        $nukeString = '[UNNUKED] ';
461
                        break;
462
                    case Predb::PRE_MODNUKE:
463
                        $nukeString = '[MODNUKE] ';
464
                        break;
465
                    case Predb::PRE_OLDNUKE:
466
                        $nukeString = '[OLDNUKE] ';
467
                        break;
468
                    case Predb::PRE_RENUKED:
469
                        $nukeString = '[RENUKED] ';
470
                        break;
471
                    default:
472
                        break;
473
                }
474
                $nukeString .= '['.($this->_curPre['reason'] ?? '').'] ';
475
            }
476
477
            echo '['.
478
                date('r').
479
                ($new ? '] [ Added Pre ] [' : '] [Updated Pre] [').
480
                ($this->_curPre['source'] ?? '').
481
                '] '.
482
                $nukeString.
483
                '['.
484
                $this->_curPre['title'].
485
                ']'.
486
                (
487
                    ! empty($this->_curPre['category'])
488
                    ? ' ['.$this->_curPre['category'].']'
489
                    : (
490
                        ! empty($this->_oldPre['category'])
491
                        ? ' ['.$this->_oldPre['category'].']'
492
                        : ''
493
                    )
494
                ).
495
                (! empty($this->_curPre['size']) ? ' ['.$this->_curPre['size'].']' : '').
496
                PHP_EOL;
497
        }
498
    }
499
500
    /**
501
     * Get a group id for a group name.
502
     */
503
    protected function _getGroupID(string $groupName): mixed
504
    {
505
        if (! isset($this->_groupList[$groupName])) {
506
            $group = UsenetGroup::query()->where('name', $groupName)->first(['id']);
507
            $this->_groupList[$groupName] = $group !== null ? $group['id'] : '';
508
        }
509
510
        return $this->_groupList[$groupName];
511
    }
512
513
    /**
514
     * After updating or inserting new PRE, reset these.
515
     */
516
    protected function _resetPreVariables(): void
517
    {
518
        $this->_nuked = false;
519
        $this->_oldPre = [];
520
        $this->_curPre =
521
            [
522
                'title' => '',
523
                'size' => '',
524
                'predate' => '',
525
                'category' => '',
526
                'source' => '',
527
                'group_id' => '',
528
                'reqid' => '',
529
                'nuked' => '',
530
                'reason' => '',
531
                'files' => '',
532
                'filename' => '',
533
            ];
534
    }
535
}
536