IRCScraper::processChannelMessages()   F
last analyzed

Complexity

Conditions 34
Paths 1692

Size

Total Lines 88
Code Lines 59

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 59
dl 0
loc 88
rs 0
c 0
b 0
f 0
cc 34
nc 1692
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace App\Services;
4
5
use App\Facades\Search;
6
use App\Models\Predb;
7
use App\Models\UsenetGroup;
8
use Illuminate\Support\Carbon;
9
use Illuminate\Support\Facades\DB;
10
11
/**
12
 * Class IRCScraper.
13
 */
14
class IRCScraper extends IRCClient
15
{
16
    /**
17
     * Regex to ignore categories.
18
     */
19
    protected string|false $_categoryIgnoreRegex = false;
20
21
    /**
22
     * Array of current pre info.
23
     */
24
    protected array $_curPre;
25
26
    /**
27
     * List of groups and their id's.
28
     */
29
    protected array $_groupList;
30
31
    /**
32
     * Array of ignored channels.
33
     */
34
    protected array $_ignoredChannels;
35
36
    /**
37
     * Is this pre nuked or un nuked?
38
     */
39
    protected bool $_nuked;
40
41
    protected $_oldPre;
42
43
    /**
44
     * Run this in silent mode (no text output).
45
     */
46
    protected bool $_silent;
47
48
    /**
49
     * Regex to ignore PRE titles.
50
     */
51
    protected string|false $_titleIgnoreRegex = false;
52
53
    /**
54
     * Construct.
55
     *
56
     * @param  bool  $silent  Run this in silent mode (no text output).
57
     * @param  bool  $debug  Turn on debug? Shows sent/received socket buffer messages.
58
     *
59
     * @throws \Exception
60
     */
61
    public function __construct(bool $silent, bool $debug)
62
    {
63
        if (config('irc_settings.scrape_irc_source_ignore')) {
64
            try {
65
                $ignored = unserialize(
66
                    (string) config('irc_settings.scrape_irc_source_ignore'),
67
                    ['allowed_classes' => false]
68
                );
69
                $this->_ignoredChannels = is_array($ignored) ? $ignored : [];
70
            } catch (\ValueError $e) {
71
                $this->_ignoredChannels = [];
72
            }
73
        } else {
74
            $this->_ignoredChannels = [
75
                '#a.b.cd.image' => false,
76
                '#a.b.console.ps3' => false,
77
                '#a.b.dvd' => false,
78
                '#a.b.erotica' => false,
79
                '#a.b.flac' => false,
80
                '#a.b.foreign' => false,
81
                '#a.b.games.nintendods' => false,
82
                '#a.b.inner-sanctum' => false,
83
                '#a.b.moovee' => false,
84
                '#a.b.movies.divx' => false,
85
                '#a.b.sony.psp' => false,
86
                '#a.b.sounds.mp3.complete_cd' => false,
87
                '#a.b.teevee' => false,
88
                '#a.b.games.wii' => false,
89
                '#a.b.warez' => false,
90
                '#a.b.games.xbox360' => false,
91
                '#pre@corrupt' => false,
92
                '#scnzb' => false,
93
                '#tvnzb' => false,
94
                'srrdb' => false,
95
            ];
96
        }
97
98
        if (config('irc_settings.scrape_irc_category_ignore') !== '') {
99
            $this->_categoryIgnoreRegex = (string) config('irc_settings.scrape_irc_category_ignore');
100
        }
101
102
        if (config('irc_settings.scrape_irc_title_ignore') !== '') {
103
            $this->_titleIgnoreRegex = (string) config('irc_settings.scrape_irc_title_ignore');
104
        }
105
106
        $this->_groupList = [];
107
        $this->_silent = $silent;
108
        $this->_debug = $debug;
109
        $this->_resetPreVariables();
110
        $this->_startScraping();
111
    }
112
113
    /**
114
     * Main method for scraping.
115
     */
116
    protected function _startScraping(): void
117
    {
118
        // Connect to IRC.
119
        if ($this->connect((string) config('irc_settings.scrape_irc_server'), (int) config('irc_settings.scrape_irc_port'), (bool) config('irc_settings.scrape_irc_tls')) === false) {
120
            exit(
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
121
                'Error connecting to ('.
122
                config('irc_settings.scrape_irc_server').
123
                ':'.
124
                config('irc_settings.scrape_irc_port').
125
                '). Please verify your server information and try again.'.
126
                PHP_EOL
127
            );
128
        }
129
130
        // Normalize password to ?string
131
        $password = config('irc_settings.scrape_irc_password');
132
        $password = ($password === false || $password === '' || $password === null) ? null : (string) $password;
133
134
        // Login to IRC. Note parameter order: nick, user, real, pass.
135
        if ($this->login((string) config('irc_settings.scrape_irc_nickname'), (string) config('irc_settings.scrape_irc_username'), (string) config('irc_settings.scrape_irc_realname'), $password) === false) {
136
            exit(
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
137
                'Error logging in to: ('.
138
                config('irc_settings.scrape_irc_server').':'.config('irc_settings.scrape_irc_port').') nickname: ('.config('irc_settings.scrape_irc_nickname').
139
                '). Verify your connection information, you might also be banned from this server or there might have been a connection issue.'.
140
                PHP_EOL
141
            );
142
        }
143
144
        // Join channels.
145
        $channelsCfg = config('irc_settings.scrape_irc_channels');
146
        if ($channelsCfg) {
147
            try {
148
                $channels = unserialize((string) $channelsCfg, ['allowed_classes' => false]);
149
            } catch (\ValueError $e) {
150
                $channels = ['#PreNNTmux' => null];
151
            }
152
            if (! is_array($channels)) {
153
                $channels = ['#PreNNTmux' => null];
154
            }
155
        } else {
156
            $channels = ['#PreNNTmux' => null];
157
        }
158
        $this->joinChannels($channels);
159
160
        if (! $this->_silent) {
161
            echo '['.
162
                date('r').
163
                '] [Scraping of IRC channels for ('.
164
                config('irc_settings.scrape_irc_server').
165
                ':'.
166
                config('irc_settings.scrape_irc_port').
167
                ') ('.
168
                config('irc_settings.scrape_irc_nickname').
169
                ') started.]'.
170
                PHP_EOL;
171
        }
172
173
        // Scan incoming IRC messages.
174
        $this->readIncoming();
175
    }
176
177
    /**
178
     * Process bot messages, insert/update PREs.
179
     *
180
     * @throws \Exception
181
     */
182
    protected function processChannelMessages(): void
183
    {
184
        if ($this->_debug && ! $this->_silent) {
185
            echo '[DEBUG] Processing message: '.$this->_channelData['message'].PHP_EOL;
186
        }
187
188
        if (preg_match(
189
            '/^(NEW|UPD|NUK): \[DT: (?P<time>.+?)\]\s?\[TT: (?P<title>.+?)\]\s?\[SC: (?P<source>.+?)\]\s?\[CT: (?P<category>.+?)\]\s?\[RQ: (?P<req>.+?)\]'.
190
            '\s?\[SZ: (?P<size>.+?)\]\s?\[FL: (?P<files>.+?)\]\s?(\[FN: (?P<filename>.+?)\]\s?)?(\[(?P<nuked>(UN|MOD|RE|OLD)?NUKED?): (?P<reason>.+?)\])?$/i',
191
            $this->_channelData['message'],
192
            $hits
193
        )) {
194
            if ($this->_debug && ! $this->_silent) {
195
                echo '[DEBUG] Regex matched! Title: '.$hits['title'].' | Source: '.$hits['source'].' | Category: '.$hits['category'].PHP_EOL;
196
            }
197
198
            if (isset($this->_ignoredChannels[$hits['source']]) && $this->_ignoredChannels[$hits['source']] === true) {
199
                if ($this->_debug && ! $this->_silent) {
200
                    echo '[DEBUG] Source '.$hits['source'].' is ignored, skipping...'.PHP_EOL;
201
                }
202
203
                return;
204
            }
205
206
            if ($this->_categoryIgnoreRegex !== false && preg_match((string) $this->_categoryIgnoreRegex, $hits['category'])) {
207
                if ($this->_debug && ! $this->_silent) {
208
                    echo '[DEBUG] Category '.$hits['category'].' is ignored by regex, skipping...'.PHP_EOL;
209
                }
210
211
                return;
212
            }
213
214
            if ($this->_titleIgnoreRegex !== false && preg_match((string) $this->_titleIgnoreRegex, $hits['title'])) {
215
                if ($this->_debug && ! $this->_silent) {
216
                    echo '[DEBUG] Title '.$hits['title'].' is ignored by regex, skipping...'.PHP_EOL;
217
                }
218
219
                return;
220
            }
221
222
            $utime = Carbon::createFromTimeString($hits['time'], 'UTC')->timestamp;
223
224
            $this->_curPre['predate'] = 'FROM_UNIXTIME('.$utime.')';
225
            $this->_curPre['title'] = $hits['title'];
226
            $this->_curPre['source'] = $hits['source'];
227
            if ($hits['category'] !== 'N/A') {
228
                $this->_curPre['category'] = $hits['category'];
229
            }
230
            if ($hits['req'] !== 'N/A' && preg_match('/^(?P<req>\d+):(?P<group>.+)$/i', $hits['req'], $matches2)) {
231
                $this->_curPre['reqid'] = $matches2['req'];
232
                $this->_curPre['group_id'] = $this->_getGroupID($matches2['group']);
233
            }
234
            if ($hits['size'] !== 'N/A') {
235
                $this->_curPre['size'] = $hits['size'];
236
            }
237
            if ($hits['files'] !== 'N/A') {
238
                $this->_curPre['files'] = substr($hits['files'], 0, 50);
239
            }
240
241
            if (isset($hits['filename']) && $hits['filename'] !== 'N/A') {
242
                $this->_curPre['filename'] = $hits['filename'];
243
            }
244
245
            if (isset($hits['nuked'])) {
246
                switch ($hits['nuked']) {
247
                    case 'NUKED':
248
                        $this->_curPre['nuked'] = Predb::PRE_NUKED;
249
                        break;
250
                    case 'UNNUKED':
251
                        $this->_curPre['nuked'] = Predb::PRE_UNNUKED;
252
                        break;
253
                    case 'MODNUKED':
254
                        $this->_curPre['nuked'] = Predb::PRE_MODNUKE;
255
                        break;
256
                    case 'RENUKED':
257
                        $this->_curPre['nuked'] = Predb::PRE_RENUKED;
258
                        break;
259
                    case 'OLDNUKE':
260
                        $this->_curPre['nuked'] = Predb::PRE_OLDNUKE;
261
                        break;
262
                }
263
                $this->_curPre['reason'] = (isset($hits['reason']) ? substr($hits['reason'], 0, 255) : '');
264
                $this->_nuked = true; // flag for output
265
            }
266
            $this->_checkForDupe();
267
        } else {
268
            if ($this->_debug && ! $this->_silent) {
269
                echo '[DEBUG] Message did not match PRE regex pattern'.PHP_EOL;
270
            }
271
        }
272
    }
273
274
    /**
275
     * Check if we already have the PRE, update if we have it, insert if not.
276
     *
277
     * @throws \Exception
278
     */
279
    protected function _checkForDupe(): void
280
    {
281
        $this->_oldPre = Predb::query()->where('title', $this->_curPre['title'])->select(['category', 'size'])->first();
282
        if ($this->_oldPre === null) {
283
            if ($this->_debug && ! $this->_silent) {
284
                echo '[DEBUG] New PRE found, inserting: '.$this->_curPre['title'].PHP_EOL;
285
            }
286
            $this->_insertNewPre();
287
        } else {
288
            if ($this->_debug && ! $this->_silent) {
289
                echo '[DEBUG] PRE already exists, updating: '.$this->_curPre['title'].PHP_EOL;
290
            }
291
            $this->_updatePre();
292
        }
293
        $this->_resetPreVariables();
294
    }
295
296
    /**
297
     * Insert new PRE into the DB.
298
     *
299
     *
300
     * @throws \Exception
301
     */
302
    protected function _insertNewPre(): void
303
    {
304
        // Check if title is empty first
305
        if (empty($this->_curPre['title'])) {
306
            if ($this->_debug && ! $this->_silent) {
307
                echo '[DEBUG] PRE title is empty, skipping insert'.PHP_EOL;
308
            }
309
310
            return;
311
        }
312
313
        // Double-check database to ensure we don't have stale search index data
314
        $existingPre = Predb::query()->where('title', $this->_curPre['title'])->first();
315
        if ($existingPre !== null) {
316
            if ($this->_debug && ! $this->_silent) {
317
                echo '[DEBUG] PRE already exists in database (ID: '.$existingPre->id.'), skipping insert'.PHP_EOL;
318
            }
319
320
            return;
321
        }
322
323
        if ($this->_debug && ! $this->_silent) {
324
            echo '[DEBUG] PRE not in database, proceeding with insert...'.PHP_EOL;
325
        }
326
327
        $query = 'INSERT INTO predb (';
328
329
        $query .= (! empty($this->_curPre['size']) ? 'size, ' : '');
330
        $query .= (! empty($this->_curPre['category']) ? 'category, ' : '');
331
        $query .= (! empty($this->_curPre['source']) ? 'source, ' : '');
332
        $query .= (! empty($this->_curPre['reason']) ? 'nukereason, ' : '');
333
        $query .= (! empty($this->_curPre['files']) ? 'files, ' : '');
334
        $query .= (! empty($this->_curPre['reqid']) ? 'requestid, ' : '');
335
        $query .= (! empty($this->_curPre['group_id']) ? 'groups_id, ' : '');
336
        $query .= (! empty($this->_curPre['nuked']) ? 'nuked, ' : '');
337
        $query .= (! empty($this->_curPre['filename']) ? 'filename, ' : '');
338
339
        $query .= 'predate, title) VALUES (';
340
341
        $query .= (! empty($this->_curPre['size']) ? escapeString($this->_curPre['size']).', ' : '');
342
        $query .= (! empty($this->_curPre['category']) ? escapeString($this->_curPre['category']).', ' : '');
343
        $query .= (! empty($this->_curPre['source']) ? escapeString($this->_curPre['source']).', ' : '');
344
        $query .= (! empty($this->_curPre['reason']) ? escapeString($this->_curPre['reason']).', ' : '');
345
        $query .= (! empty($this->_curPre['files']) ? escapeString($this->_curPre['files']).', ' : '');
346
        $query .= (! empty($this->_curPre['reqid']) ? $this->_curPre['reqid'].', ' : '');
347
        $query .= (! empty($this->_curPre['group_id']) ? $this->_curPre['group_id'].', ' : '');
348
        $query .= (! empty($this->_curPre['nuked']) ? $this->_curPre['nuked'].', ' : '');
349
        $query .= (! empty($this->_curPre['filename']) ? escapeString($this->_curPre['filename']).', ' : '');
350
        $query .= (! empty($this->_curPre['predate']) ? $this->_curPre['predate'].', ' : 'NOW(), ');
351
352
        $query .= '%s)';
353
354
        if ($this->_debug && ! $this->_silent) {
355
            echo '[DEBUG] Executing SQL: '.substr($query, 0, 100).'...'.PHP_EOL;
356
        }
357
358
        try {
359
            DB::insert(
360
                sprintf(
361
                    $query,
362
                    escapeString($this->_curPre['title'])
363
                )
364
            );
365
366
            $lastId = DB::connection()->getPdo()->lastInsertId();
367
368
            if ($this->_debug && ! $this->_silent) {
369
                echo '[DEBUG] Successfully inserted PRE with ID: '.$lastId.PHP_EOL;
370
            }
371
372
            $parameters = [
373
                'id' => $lastId,
374
                'title' => $this->_curPre['title'],
375
                'filename' => $this->_curPre['filename'] ?? null,
376
                'source' => $this->_curPre['source'] ?? null,
377
            ];
378
379
            Search::insertPredb($parameters);
380
381
            $this->_doEcho(true);
382
        } catch (\Exception $e) {
383
            if ($this->_debug && ! $this->_silent) {
384
                echo '[DEBUG] ERROR inserting PRE: '.$e->getMessage().PHP_EOL;
385
            }
386
        }
387
    }
388
389
    /**
390
     * Updates PRE data in the DB.
391
     *
392
     *
393
     * @throws \Exception
394
     */
395
    protected function _updatePre(): void
396
    {
397
        if (empty($this->_curPre['title'])) {
398
            return;
399
        }
400
401
        $query = 'UPDATE predb SET ';
402
403
        $query .= (! empty($this->_curPre['size']) ? 'size = '.escapeString($this->_curPre['size']).', ' : '');
404
        $query .= (! empty($this->_curPre['source']) ? 'source = '.escapeString($this->_curPre['source']).', ' : '');
405
        $query .= (! empty($this->_curPre['files']) ? 'files = '.escapeString($this->_curPre['files']).', ' : '');
406
        $query .= (! empty($this->_curPre['reason']) ? 'nukereason = '.escapeString($this->_curPre['reason']).', ' : '');
407
        $query .= (! empty($this->_curPre['reqid']) ? 'requestid = '.$this->_curPre['reqid'].', ' : '');
408
        $query .= (! empty($this->_curPre['group_id']) ? 'groups_id = '.$this->_curPre['group_id'].', ' : '');
409
        $query .= (! empty($this->_curPre['predate']) ? 'predate = '.$this->_curPre['predate'].', ' : '');
410
        $query .= (! empty($this->_curPre['nuked']) ? 'nuked = '.$this->_curPre['nuked'].', ' : '');
411
        $query .= (! empty($this->_curPre['filename']) ? 'filename = '.escapeString($this->_curPre['filename']).', ' : '');
412
        $query .= (
413
            (empty($this->_oldPre['category']) && ! empty($this->_curPre['category']))
414
                ? 'category = '.escapeString($this->_curPre['category']).', '
415
                : ''
416
        );
417
418
        if ($query === 'UPDATE predb SET ') {
419
            return;
420
        }
421
422
        $query .= 'title = '.escapeString($this->_curPre['title']);
423
        $query .= ' WHERE title = '.escapeString($this->_curPre['title']);
424
425
        // Execute the update and then fetch the affected row ID by title.
426
        DB::update($query);
427
428
        // Look up the predb row ID by title for indexing backends.
429
        $predbId = Predb::query()->where('title', $this->_curPre['title'])->value('id');
430
431
        if (! empty($predbId)) {
432
            $parameters = [
433
                'id' => $predbId,
434
                'title' => $this->_curPre['title'],
435
                'filename' => $this->_curPre['filename'] ?? null,
436
                'source' => $this->_curPre['source'] ?? null,
437
            ];
438
439
            Search::updatePreDb($parameters);
440
        }
441
442
        $this->_doEcho(false);
443
    }
444
445
    /**
446
     * Echo new or update pre to CLI.
447
     */
448
    protected function _doEcho(bool $new = true): void
449
    {
450
        if (! $this->_silent) {
451
            $nukeString = '';
452
            if ($this->_nuked !== false) {
453
                switch ((int) $this->_curPre['nuked']) {
454
                    case Predb::PRE_NUKED:
455
                        $nukeString = '[ NUKED ] ';
456
                        break;
457
                    case Predb::PRE_UNNUKED:
458
                        $nukeString = '[UNNUKED] ';
459
                        break;
460
                    case Predb::PRE_MODNUKE:
461
                        $nukeString = '[MODNUKE] ';
462
                        break;
463
                    case Predb::PRE_OLDNUKE:
464
                        $nukeString = '[OLDNUKE] ';
465
                        break;
466
                    case Predb::PRE_RENUKED:
467
                        $nukeString = '[RENUKED] ';
468
                        break;
469
                    default:
470
                        break;
471
                }
472
                $nukeString .= '['.($this->_curPre['reason'] ?? '').'] ';
473
            }
474
475
            echo '['.
476
                date('r').
477
                ($new ? '] [ Added Pre ] [' : '] [Updated Pre] [').
478
                ($this->_curPre['source'] ?? '').
479
                '] '.
480
                $nukeString.
481
                '['.
482
                $this->_curPre['title'].
483
                ']'.
484
                (
485
                    ! empty($this->_curPre['category'])
486
                    ? ' ['.$this->_curPre['category'].']'
487
                    : (
488
                        ! empty($this->_oldPre['category'])
489
                        ? ' ['.$this->_oldPre['category'].']'
490
                        : ''
491
                    )
492
                ).
493
                (! empty($this->_curPre['size']) ? ' ['.$this->_curPre['size'].']' : '').
494
                PHP_EOL;
495
        }
496
    }
497
498
    /**
499
     * Get a group id for a group name.
500
     */
501
    protected function _getGroupID(string $groupName): mixed
502
    {
503
        if (! isset($this->_groupList[$groupName])) {
504
            $group = UsenetGroup::query()->where('name', $groupName)->first(['id']);
505
            $this->_groupList[$groupName] = $group !== null ? $group['id'] : '';
506
        }
507
508
        return $this->_groupList[$groupName];
509
    }
510
511
    /**
512
     * After updating or inserting new PRE, reset these.
513
     */
514
    protected function _resetPreVariables(): void
515
    {
516
        $this->_nuked = false;
517
        $this->_oldPre = [];
518
        $this->_curPre =
519
            [
520
                'title' => '',
521
                'size' => '',
522
                'predate' => '',
523
                'category' => '',
524
                'source' => '',
525
                'group_id' => '',
526
                'reqid' => '',
527
                'nuked' => '',
528
                'reason' => '',
529
                'files' => '',
530
                'filename' => '',
531
            ];
532
    }
533
}
534