NfoService::extractGroupName()   B
last analyzed

Complexity

Conditions 11
Paths 13

Size

Total Lines 43
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 21
c 1
b 0
f 0
dl 0
loc 43
rs 7.3166
cc 11
nc 13
nop 1

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Services;
6
7
use App\Models\Release;
8
use App\Models\ReleaseNfo;
9
use App\Models\Settings;
10
use App\Models\UsenetGroup;
11
use App\Services\NNTP\NNTPService;
12
use App\Services\Nzb\NzbContentsService;
13
use dariusiii\rarinfo\Par2Info;
14
use dariusiii\rarinfo\SfvInfo;
15
use Illuminate\Support\Facades\Cache;
16
use Illuminate\Support\Facades\DB;
17
use Illuminate\Support\Facades\File;
18
use Illuminate\Support\Facades\Log;
19
use Throwable;
20
21
/**
22
 * Class NfoService - Handles NFO file processing, validation, and metadata extraction.
23
 *
24
 * NFO files are text files commonly used in the warez scene to provide information
25
 * about releases. This class handles detection, validation, parsing and storage of NFO content.
26
 */
27
class NfoService
28
{
29
    /**
30
     * Regex to detect common non-NFO file headers/signatures.
31
     * Matches XML, NZB, RIFF (media), PAR/RAR archives, and other binary formats.
32
     */
33
    protected string $_nonNfoHeaderRegex = '/\A(\s*<\?xml|=newz\[NZB\]=|RIFF|\s*[RP]AR|.{0,10}(JFIF|matroska|ftyp|ID3)|PK\x03\x04|\x1f\x8b\x08|MZ|%PDF|GIF8[79]a|\x89PNG)|;\s*Generated\s*by.*SF\w/i';
34
35
    /**
36
     * Regex to identify text encoding from the 'file' command output.
37
     */
38
    protected string $_textFileRegex = '/(ASCII|ISO-8859|UTF-(8|16|32).*?|Non-ISO extended-ASCII)\s*text/i';
39
40
    /**
41
     * Regex to identify common binary file types from the 'file' command output.
42
     */
43
    protected string $_binaryFileRegex = '/^(JPE?G|Parity|PNG|RAR|XML|(7-)?[Zz]ip|PDF|GIF|executable|archive|compressed|data|binary)/i';
44
45
    /**
46
     * Regex to detect binary characters within the content.
47
     * Excludes common control characters that may appear in NFOs (tab, newline, carriage return).
48
     */
49
    protected string $_binaryCharsRegex = '/[\x00-\x08\x0B\x0C\x0E-\x1F]/';
50
51
    /**
52
     * Common NFO keywords that help identify legitimate NFO files.
53
     */
54
    protected array $_nfoKeywords = [
55
        // Release information
56
        'release', 'group', 'date', 'size', 'format', 'source', 'genre', 'codec',
57
        'bitrate', 'resolution', 'language', 'subtitle', 'ripped', 'cracked',
58
        'keygen', 'serial', 'patch', 'trainer', 'install', 'notes', 'greets',
59
        'nfo', 'ascii', 'artwork', 'presents', 'proudly', 'brings', 'another',
60
        // Scene terminology
61
        'scene', 'rls', 'nuked', 'proper', 'repack', 'internal', 'retail',
62
        'webdl', 'webrip', 'bluray', 'bdrip', 'dvdrip', 'hdtv', 'pdtv',
63
        // Media info
64
        'video', 'audio', 'duration', 'runtime', 'aspect', 'fps', 'channels',
65
        'sample', 'encoder', 'x264', 'x265', 'hevc', 'avc', 'xvid', 'divx',
66
        'aac', 'ac3', 'dts', 'truehd', 'atmos', 'flac', 'mp3',
67
        // Content info
68
        'movie', 'film', 'episode', 'season', 'series', 'title', 'year',
69
        'director', 'cast', 'actors', 'plot', 'synopsis', 'imdb', 'rating',
70
        // Software
71
        'crack', 'readme', 'setup', 'installer', 'license', 'registration',
72
        'protection', 'requirements', 'platform', 'operating', 'system',
73
        // Contact/Group info
74
        'contact', 'irc', 'www', 'http', 'ftp', 'email', 'apply', 'join',
75
    ];
76
77
    /**
78
     * Scene group patterns for improved detection.
79
     */
80
    protected array $_sceneGroupPatterns = [
81
        '/(?:^|\n)\s*[-=*]{3,}.*?([A-Z0-9]{2,15})\s*[-=*]{3,}/i',
82
        '/(?:presents?|brought\s+(?:to\s+)?(?:you\s+)?by|from)\s*[:\-]?\s*([A-Z][A-Z0-9]{1,14})/i',
83
        '/(?:greets?\s+(?:go(?:es)?\s+)?(?:out\s+)?to|respect\s+to)\s*[:\-]?\s*([\w,\s&]+)/i',
84
        '/(?:^|\n)\s*([A-Z][A-Z0-9]{1,14})\s+(?:nfo|info|release)\s*(?:$|\n)/i',
85
        '/(?:released\s+by|rls\s+by)\s*[:\-]?\s*([A-Z][A-Z0-9]{1,14})/i',
86
    ];
87
88
    /**
89
     * Maximum NFO file size in bytes (64KB).
90
     */
91
    protected const MAX_NFO_SIZE = 65535;
92
93
    /**
94
     * Minimum NFO file size in bytes.
95
     */
96
    protected const MIN_NFO_SIZE = 12;
97
98
    /**
99
     * Cache TTL for settings in seconds.
100
     */
101
    protected const SETTINGS_CACHE_TTL = 300;
102
103
    /**
104
     * @var int Number of NFOs to process per batch.
105
     */
106
    private int $nzbs;
107
108
    /**
109
     * @var int Maximum release size to process NFO (in GB).
110
     */
111
    protected int $maxSize;
112
113
    /**
114
     * @var int Maximum retry attempts for failed NFO fetches.
115
     */
116
    private int $maxRetries;
117
118
    /**
119
     * @var int Minimum release size to process NFO (in MB).
120
     */
121
    protected int $minSize;
122
123
    /**
124
     * @var string Temporary path for processing files.
125
     */
126
    private string $tmpPath;
127
128
    /**
129
     * @var bool Whether to echo output to CLI.
130
     */
131
    protected bool $echo;
132
133
    public const NFO_FAILED = -9; // We failed to get a NFO after admin set max retries.
134
135
    public const NFO_UNPROC = -1; // Release has not been processed yet.
136
137
    public const NFO_NONFO = 0; // Release has no NFO.
138
139
    public const NFO_FOUND = 1; // Release has an NFO.
140
141
    /**
142
     * Default constructor.
143
     *
144
     * Initializes NFO processing settings from database/config with caching.
145
     *
146
     * @throws \Exception
147
     */
148
    public function __construct()
149
    {
150
        $this->echo = (bool) config('nntmux.echocli');
151
152
        // Cache settings to reduce database queries
153
        // Note: Cast after Cache::remember as cached values may be stored as strings
154
        $this->nzbs = (int) Cache::remember('nfo_maxnfoprocessed', self::SETTINGS_CACHE_TTL, function () {
155
            $value = Settings::settingValue('maxnfoprocessed');
156
157
            return $value !== '' ? (int) $value : 100;
158
        });
159
160
        $maxRetries = (int) Cache::remember('nfo_maxnforetries', self::SETTINGS_CACHE_TTL, function () {
161
            return (int) Settings::settingValue('maxnforetries');
162
        });
163
        $this->maxRetries = $maxRetries >= 0 ? -($maxRetries + 1) : self::NFO_UNPROC;
164
        $this->maxRetries = max($this->maxRetries, -8);
165
166
        $this->maxSize = (int) Cache::remember('nfo_maxsizetoprocessnfo', self::SETTINGS_CACHE_TTL, function () {
167
            return (int) Settings::settingValue('maxsizetoprocessnfo');
168
        });
169
170
        $this->minSize = (int) Cache::remember('nfo_minsizetoprocessnfo', self::SETTINGS_CACHE_TTL, function () {
171
            return (int) Settings::settingValue('minsizetoprocessnfo');
172
        });
173
174
        $this->tmpPath = rtrim((string) config('nntmux.tmp_unrar_path'), '/\\').'/';
175
    }
176
177
    /**
178
     * Look for a TV Show ID or Movie ID in a string.
179
     *
180
     * Supports: TVMaze, IMDB, TVDB (legacy & modern), TMDB, AniDB
181
     *
182
     * @param  string  $str  The string with a Show ID.
183
     * @return array{showid: string, site: string}|false Return array with show ID and site source or false on failure.
184
     */
185
    public function parseShowId(string $str): array|false
186
    {
187
        // TVMaze
188
        if (preg_match('/tvmaze\.com\/shows\/(\d{1,6})/i', $str, $hits)) {
189
            return ['showid' => trim($hits[1]), 'site' => 'tvmaze'];
190
        }
191
192
        // IMDB (movies and TV shows)
193
        if (preg_match('/imdb\.com\/title\/(tt\d{7,8})/i', $str, $hits)) {
194
            return ['showid' => trim($hits[1]), 'site' => 'imdb'];
195
        }
196
197
        // TVDB - Legacy URL format
198
        if (preg_match('/thetvdb\.com\/\?tab=series&id=(\d{1,8})/i', $str, $hits)) {
199
            return ['showid' => trim($hits[1]), 'site' => 'thetvdb'];
200
        }
201
202
        // TVDB - Modern URL format (series/slug or series/id)
203
        if (preg_match('/thetvdb\.com\/series\/(\d{1,8}|[\w-]+)/i', $str, $hits)) {
204
            return ['showid' => trim($hits[1]), 'site' => 'thetvdb'];
205
        }
206
207
        // TMDB - Movie
208
        if (preg_match('/themoviedb\.org\/movie\/(\d{1,8})/i', $str, $hits)) {
209
            return ['showid' => trim($hits[1]), 'site' => 'tmdb_movie'];
210
        }
211
212
        // TMDB - TV Show
213
        if (preg_match('/themoviedb\.org\/tv\/(\d{1,8})/i', $str, $hits)) {
214
            return ['showid' => trim($hits[1]), 'site' => 'tmdb_tv'];
215
        }
216
217
        // AniDB
218
        if (preg_match('/anidb\.net\/(?:perl-bin\/animedb\.pl\?show=anime&aid=|anime\/)(\d{1,6})/i', $str, $hits)) {
219
            return ['showid' => trim($hits[1]), 'site' => 'anidb'];
220
        }
221
222
        // Trakt.tv
223
        if (preg_match('/trakt\.tv\/(?:shows|movies)\/([\w-]+)/i', $str, $hits)) {
224
            return ['showid' => trim($hits[1]), 'site' => 'trakt'];
225
        }
226
227
        return false;
228
    }
229
230
    /**
231
     * Confirm this is an NFO file.
232
     *
233
     * Uses multiple validation strategies:
234
     * 1. Size validation (too large/small = not NFO)
235
     * 2. Binary header detection (known file signatures)
236
     * 3. File type detection via 'file' command
237
     * 4. PAR2/SFV structure detection
238
     * 5. Binary character content analysis
239
     * 6. NFO keyword/content heuristics
240
     *
241
     * @param  bool|string  $possibleNFO  The nfo content.
242
     * @param  string  $guid  The guid of the release.
243
     * @return bool True if it's likely an NFO, False otherwise.
244
     */
245
    public function isNFO(bool|string &$possibleNFO, string $guid): bool
246
    {
247
        if ($possibleNFO === false || $possibleNFO === '') {
248
            return false;
249
        }
250
251
        $size = \strlen($possibleNFO);
0 ignored issues
show
Bug introduced by
It seems like $possibleNFO can also be of type true; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

251
        $size = \strlen(/** @scrutinizer ignore-type */ $possibleNFO);
Loading history...
252
253
        // Basic size and signature checks using constants
254
        if ($size >= self::MAX_NFO_SIZE || $size < self::MIN_NFO_SIZE) {
255
            return false;
256
        }
257
258
        // Quick check for known non-NFO file signatures
259
        if (preg_match($this->_nonNfoHeaderRegex, $possibleNFO)) {
0 ignored issues
show
Bug introduced by
It seems like $possibleNFO can also be of type true; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

259
        if (preg_match($this->_nonNfoHeaderRegex, /** @scrutinizer ignore-type */ $possibleNFO)) {
Loading history...
260
            return false;
261
        }
262
263
        // Additional binary format checks
264
        if ($this->detectBinaryFormat($possibleNFO)) {
0 ignored issues
show
Bug introduced by
It seems like $possibleNFO can also be of type true; however, parameter $data of App\Services\NfoService::detectBinaryFormat() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

264
        if ($this->detectBinaryFormat(/** @scrutinizer ignore-type */ $possibleNFO)) {
Loading history...
265
            return false;
266
        }
267
268
        $tmpPath = $this->tmpPath.$guid.'.nfo';
269
        $isNfo = false;
270
271
        try {
272
            // File/GetId3 work with files, so save to disk.
273
            File::put($tmpPath, $possibleNFO);
0 ignored issues
show
Bug introduced by
It seems like $possibleNFO can also be of type true; however, parameter $contents of Illuminate\Support\Facades\File::put() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

273
            File::put($tmpPath, /** @scrutinizer ignore-type */ $possibleNFO);
Loading history...
274
275
            // Use 'file' command via fileInfo if available
276
            $result = fileInfo($tmpPath);
277
            if (! empty($result)) {
278
                if (preg_match($this->_textFileRegex, $result)) {
279
                    $isNfo = true;
280
                } elseif (preg_match($this->_binaryFileRegex, $result) || preg_match($this->_binaryCharsRegex, $possibleNFO)) {
281
                    $isNfo = false;
282
                }
283
284
                // If fileInfo gave a result, apply additional heuristics before returning
285
                if ($isNfo) {
286
                    // Additional content validation for text files
287
                    $isNfo = $this->validateNfoContent($possibleNFO);
0 ignored issues
show
Bug introduced by
It seems like $possibleNFO can also be of type true; however, parameter $content of App\Services\NfoService::validateNfoContent() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

287
                    $isNfo = $this->validateNfoContent(/** @scrutinizer ignore-type */ $possibleNFO);
Loading history...
288
                }
289
290
                return $isNfo;
291
            }
292
293
            // Fallback checks if 'file' command is unavailable or inconclusive
294
            // Check if it's a PAR2 file
295
            $par2info = new Par2Info;
296
            $par2info->setData($possibleNFO);
0 ignored issues
show
Bug introduced by
It seems like $possibleNFO can also be of type true; however, parameter $data of dariusiii\rarinfo\ArchiveReader::setData() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

296
            $par2info->setData(/** @scrutinizer ignore-type */ $possibleNFO);
Loading history...
297
            if (! $par2info->error) {
298
                return false;
299
            }
300
301
            // Check if it's an SFV file
302
            $sfv = new SfvInfo;
303
            $sfv->setData($possibleNFO);
304
            if (! $sfv->error) {
305
                return false;
306
            }
307
308
            // Check for binary characters
309
            if (preg_match($this->_binaryCharsRegex, $possibleNFO)) {
310
                return false;
311
            }
312
313
            // Final content-based validation
314
            $isNfo = $this->validateNfoContent($possibleNFO);
315
316
        } catch (Throwable $e) {
317
            Log::error("Error processing potential NFO for GUID {$guid}: ".$e->getMessage());
318
            $isNfo = false;
319
        } finally {
320
            // Ensure temporary file is always deleted
321
            if (File::exists($tmpPath)) {
322
                try {
323
                    File::delete($tmpPath);
324
                } catch (Throwable $e) {
325
                    Log::error("Error deleting temporary NFO file {$tmpPath}: ".$e->getMessage());
326
                }
327
            }
328
        }
329
330
        return $isNfo;
331
    }
332
333
    /**
334
     * Detect binary file formats by magic bytes.
335
     *
336
     * @param  string  $data  The file content to check.
337
     * @return bool True if binary format detected.
338
     */
339
    protected function detectBinaryFormat(string $data): bool
340
    {
341
        if (strlen($data) < 4) {
342
            return false;
343
        }
344
345
        // Magic bytes for common binary formats
346
        $magicBytes = [
347
            "\x50\x4B\x03\x04" => 'ZIP',         // ZIP/DOCX/XLSX etc.
348
            "\x50\x4B\x05\x06" => 'ZIP_EMPTY',   // Empty ZIP
349
            "\x52\x61\x72\x21" => 'RAR',         // RAR
350
            "\x37\x7A\xBC\xAF" => '7Z',          // 7-Zip
351
            "\x1F\x8B\x08" => 'GZIP',            // GZip
352
            "\x42\x5A\x68" => 'BZIP2',           // BZip2
353
            "\xFD\x37\x7A\x58" => 'XZ',          // XZ
354
            "\x89\x50\x4E\x47" => 'PNG',         // PNG
355
            "\xFF\xD8\xFF" => 'JPEG',            // JPEG
356
            "\x47\x49\x46\x38" => 'GIF',         // GIF
357
            "\x25\x50\x44\x46" => 'PDF',         // PDF
358
            "\x49\x44\x33" => 'MP3_ID3',         // MP3 with ID3
359
            "\xFF\xFB" => 'MP3',                 // MP3
360
            "\x4F\x67\x67\x53" => 'OGG',         // OGG
361
            "\x66\x4C\x61\x43" => 'FLAC',        // FLAC
362
            "\x52\x49\x46\x46" => 'RIFF',        // WAV/AVI
363
            "\x00\x00\x01\xBA" => 'MPEG',        // MPEG video
364
            "\x00\x00\x01\xB3" => 'MPEG',        // MPEG video
365
            "\x1A\x45\xDF\xA3" => 'MKV',         // Matroska/WebM
366
            "\x4D\x5A" => 'EXE',                 // Windows EXE
367
            "\x7F\x45\x4C\x46" => 'ELF',         // Linux executable
368
            "\xCA\xFE\xBA\xBE" => 'JAVA',        // Java class
369
            "\xD0\xCF\x11\xE0" => 'OLE',         // MS Office old format
370
        ];
371
372
        foreach ($magicBytes as $magic => $type) {
373
            if (str_starts_with($data, $magic)) {
374
                return true;
375
            }
376
        }
377
378
        // Check for UTF-16 BOM (could be text, but unlikely NFO)
379
        if (str_starts_with($data, "\xFF\xFE") || str_starts_with($data, "\xFE\xFF")) {
380
            // UTF-16 - could be valid, let other checks handle it
381
            return false;
382
        }
383
384
        return false;
385
    }
386
387
    /**
388
     * Validate NFO content using heuristics.
389
     *
390
     * @param  string  $content  The content to validate.
391
     * @return bool True if content appears to be a valid NFO.
392
     */
393
    protected function validateNfoContent(string $content): bool
394
    {
395
        $length = strlen($content);
396
397
        // Too short to be meaningful
398
        if ($length < 50) {
399
            return false;
400
        }
401
402
        // Count printable ASCII characters
403
        $printableCount = preg_match_all('/[\x20-\x7E]/', $content);
404
        $printableRatio = $printableCount / $length;
405
406
        // NFOs should be mostly printable characters
407
        if ($printableRatio < 0.7) {
408
            return false;
409
        }
410
411
        // Check for minimum text content (words, not just symbols)
412
        $wordCount = preg_match_all('/[A-Za-z]{2,}/', $content);
413
        if ($wordCount < 5) {
414
            return false;
415
        }
416
417
        // Check for NFO-like content patterns
418
        $nfoIndicators = 0;
419
420
        // Look for common NFO keywords
421
        foreach ($this->_nfoKeywords as $keyword) {
422
            if (stripos($content, $keyword) !== false) {
423
                $nfoIndicators++;
424
                if ($nfoIndicators >= 3) {
425
                    return true; // High confidence if multiple keywords found
426
                }
427
            }
428
        }
429
430
        // Check for scene-style formatting
431
        if (preg_match('/[-=*]{5,}/', $content)) {
432
            $nfoIndicators++;
433
        }
434
435
        // Check for URL presence (common in NFOs)
436
        if (preg_match('/https?:\/\/|www\./i', $content)) {
437
            $nfoIndicators++;
438
        }
439
440
        // Check for media IDs
441
        if (preg_match('/imdb\.com|thetvdb\.com|themoviedb\.org|anidb\.net/i', $content)) {
442
            $nfoIndicators += 2;
443
        }
444
445
        // Check for field:value patterns
446
        if (preg_match_all('/^[A-Za-z\s]{2,20}\s*[:\.]\s*.+$/m', $content, $matches)) {
447
            $nfoIndicators += min(count($matches[0]) / 3, 2);
448
        }
449
450
        return $nfoIndicators >= 2;
451
    }
452
453
    /**
454
     * Add an NFO from alternate sources. ex.: PreDB, rar, zip, etc...
455
     *
456
     * @param  string  $nfo  The nfo.
457
     * @param  NNTPService  $nntp  Instance of class NNTPService.
458
     * @return bool True on success, False on failure.
459
     *
460
     * @throws \Exception
461
     */
462
    public function addAlternateNfo(string &$nfo, $release, NNTPService $nntp): bool
463
    {
464
        if ($release->id > 0 && $this->isNFO($nfo, $release->guid)) {
465
            $check = ReleaseNfo::whereReleasesId($release->id)->first(['releases_id']);
466
467
            if ($check === null) {
468
                ReleaseNfo::query()->insert(['releases_id' => $release->id, 'nfo' => "\x1f\x8b\x08\x00".gzcompress($nfo)]);
469
            }
470
471
            Release::whereId($release->id)->update(['nfostatus' => self::NFO_FOUND]);
472
473
            if (! isset($release->completion)) {
474
                $release->completion = 0;
475
            }
476
477
            if ($release->completion === 0) {
478
                $nzbContentsService = app(NzbContentsService::class);
479
                $nzbContentsService->setNntp($nntp);
480
                $nzbContentsService->setNfo($this);
481
                $nzbContentsService->setEchoOutput($this->echo);
482
                $nzbContentsService->parseNzb($release->guid, $release->id, $release->groups_id ?? 0);
483
            }
484
485
            return true;
486
        }
487
488
        return false;
489
    }
490
491
    /**
492
     * Attempt to find NFO files inside the NZB's of releases.
493
     *
494
     * @param  NNTPService  $nntp  The NNTP connection object
495
     * @param  string  $groupID  (optional) Group ID to filter releases by
496
     * @param  string  $guidChar  (optional) First character of the GUID for parallel processing
497
     * @param  bool  $processImdb  (optional) Process IMDB IDs (currently unused)
498
     * @param  bool  $processTv  (optional) Process TV IDs (currently unused)
499
     * @return int Count of successfully processed NFO files
500
     *
501
     * @throws \Exception If NNTP operations fail
502
     */
503
    public function processNfoFiles(NNTPService $nntp, string $groupID = '', string $guidChar = '', bool $processImdb = true, bool $processTv = true): int
0 ignored issues
show
Unused Code introduced by
The parameter $processImdb is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

503
    public function processNfoFiles(NNTPService $nntp, string $groupID = '', string $guidChar = '', /** @scrutinizer ignore-unused */ bool $processImdb = true, bool $processTv = true): int

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $processTv is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

503
    public function processNfoFiles(NNTPService $nntp, string $groupID = '', string $guidChar = '', bool $processImdb = true, /** @scrutinizer ignore-unused */ bool $processTv = true): int

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
504
    {
505
        $processedCount = 0;
506
507
        // Build base query with all filters
508
        $baseQuery = $this->buildNfoProcessingQuery($groupID, $guidChar);
509
510
        // Fetch releases to process
511
        $releases = $baseQuery->clone()
512
            ->orderBy('nfostatus')
513
            ->orderByDesc('postdate')
0 ignored issues
show
Bug introduced by
'postdate' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderByDesc(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

513
            ->orderByDesc(/** @scrutinizer ignore-type */ 'postdate')
Loading history...
514
            ->limit($this->nzbs)
515
            ->get(['id', 'guid', 'groups_id', 'name']);
516
517
        $nfoCount = $releases->count();
518
519
        if ($nfoCount > 0) {
520
            // Display processing information
521
            $this->displayProcessingHeader($guidChar, $groupID, $nfoCount);
522
523
            // Show detailed stats if echo is enabled
524
            if ($this->echo) {
525
                $this->displayNfoStatusStats($baseQuery);
526
            }
527
528
            // Process each release
529
            $nzbContentsService = app(NzbContentsService::class);
530
            $nzbContentsService->setNntp($nntp);
531
            $nzbContentsService->setNfo($this);
532
533
            foreach ($releases as $release) {
534
                try {
535
                    $groupName = UsenetGroup::getNameByID($release['groups_id']);
536
                    $fetchedBinary = $nzbContentsService->getNfoFromNzb($release['guid'], $release['id'], $release['groups_id'], $groupName);
537
538
                    if ($fetchedBinary !== false) {
539
                        DB::beginTransaction();
540
                        try {
541
                            // Only insert if not already present
542
                            $exists = ReleaseNfo::whereReleasesId($release['id'])->exists();
543
                            if (! $exists) {
544
                                ReleaseNfo::query()->insert([
545
                                    'releases_id' => $release['id'],
546
                                    'nfo' => "\x1f\x8b\x08\x00".gzcompress($fetchedBinary),
547
                                ]);
548
                            }
549
550
                            // Update status
551
                            Release::whereId($release['id'])->update(['nfostatus' => self::NFO_FOUND]);
552
                            DB::commit();
553
                            $processedCount++;
554
                        } catch (\Exception $e) {
555
                            DB::rollBack();
556
                            if ($this->echo) {
557
                                cli()->error("Error saving NFO for release {$release['id']}: {$e->getMessage()}");
558
                            }
559
                        }
560
                    }
561
                } catch (\Exception $e) {
562
                    if ($this->echo) {
563
                        cli()->error("Error processing release {$release['id']}: {$e->getMessage()}");
564
                    }
565
                }
566
            }
567
        }
568
569
        // Process failed NFO attempts
570
        $this->handleFailedNfoAttempts($groupID, $guidChar);
571
572
        // Output results
573
        if ($this->echo) {
574
            if ($nfoCount > 0) {
575
                echo PHP_EOL;
576
            }
577
            if ($processedCount > 0) {
578
                cli()->primary($processedCount.' NFO file(s) found/processed.');
579
            }
580
        }
581
582
        return $processedCount;
583
    }
584
585
    /**
586
     * Build base query for NFO processing with all common filters
587
     */
588
    private function buildNfoProcessingQuery(string $groupID, string $guidChar): \Illuminate\Database\Eloquent\Builder
589
    {
590
        $query = Release::query()
591
            ->whereBetween('nfostatus', [$this->maxRetries, self::NFO_UNPROC]);
592
593
        if ($guidChar !== '') {
594
            $query->where('leftguid', $guidChar);
595
        }
596
597
        if ($groupID !== '') {
598
            $query->where('groups_id', $groupID);
599
        }
600
601
        if ($this->maxSize > 0) {
602
            $query->where('size', '<', $this->maxSize * 1073741824);
603
        }
604
605
        if ($this->minSize > 0) {
606
            $query->where('size', '>', $this->minSize * 1048576);
607
        }
608
609
        return $query;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $query could return the type Illuminate\Database\Query\Builder which is incompatible with the type-hinted return Illuminate\Database\Eloquent\Builder. Consider adding an additional type-check to rule them out.
Loading history...
610
    }
611
612
    /**
613
     * Display header information about the NFO processing
614
     */
615
    private function displayProcessingHeader(string $guidChar, string $groupID, int $nfoCount): void
616
    {
617
        cli()->primary(
618
            PHP_EOL.
619
            ($guidChar === '' ? '' : '['.$guidChar.'] ').
620
            ($groupID === '' ? '' : '['.$groupID.'] ').
621
            'Processing '.$nfoCount.
622
            ' NFO(s), starting at '.$this->nzbs.
623
            ' * = hidden NFO, + = NFO, - = no NFO, f = download failed.'
624
        );
625
    }
626
627
    /**
628
     * Display statistics about NFO status counts
629
     */
630
    private function displayNfoStatusStats(\Illuminate\Database\Eloquent\Builder $baseQuery): void
631
    {
632
        $nfoStats = $baseQuery->clone()
633
            ->select(['nfostatus as status', DB::raw('COUNT(id) as count')])
634
            ->groupBy(['nfostatus'])
635
            ->orderBy('nfostatus')
0 ignored issues
show
Bug introduced by
'nfostatus' of type string is incompatible with the type Closure|Illuminate\Datab...\Database\Query\Builder expected by parameter $column of Illuminate\Database\Query\Builder::orderBy(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

635
            ->orderBy(/** @scrutinizer ignore-type */ 'nfostatus')
Loading history...
636
            ->get();
637
638
        if ($nfoStats instanceof \Traversable && $nfoStats->count() > 0) {
0 ignored issues
show
Bug introduced by
The method count() does not exist on Traversable. It seems like you code against a sub-type of Traversable such as parallel\Events or Yaf_Config_Simple or Yaf\Session or Threaded or Volatile or SimpleXMLElement or Thread or Yaf_Session or Ds\Collection or pq\Result or Yaf\Config\Simple or Yaf\Config\Ini or Worker or Intervention\Image\Interfaces\CollectionInterface or Yaf_Config_Ini or MongoGridFSCursor or Symfony\Component\Routing\RouteCollection or Predis\Connection\Cluster\PredisCluster or DOMNodeList or voku\helper\SimpleXmlDom or PHPUnit\Event\EventCollection or FFMpeg\FFProbe\DataMapping\StreamCollection or PharIo\Manifest\AuthorCollection or Predis\Cluster\SimpleSlotMap or GuzzleHttp\Ring\Future\FutureArrayInterface or Illuminate\Pagination\LengthAwarePaginator or RectorPrefix202512\Illum...ner\RewindableGenerator or RectorPrefix202512\Nette\Utils\ArrayList or Illuminate\Http\Resources\Json\ResourceCollection or Illuminate\Support\Fluent or GuzzleHttp\Cookie\CookieJarInterface or Symfony\Component\HttpFo...\Attribute\AttributeBag or Ramsey\Collection\ArrayInterface or SplFixedArray or Illuminate\Container\RewindableGenerator or MabeEnum\EnumSet or Predis\Cluster\SlotMap or voku\helper\SimpleXmlDomNodeInterface or MabeEnum\EnumMap or Nette\Utils\Html or voku\helper\SimpleXmlDomInterface or PHPUnit\TextUI\XmlConfiguration\SnapshotNodeList or League\CommonMark\Util\ArrayCollection or Symfony\Component\HttpFoundation\Session\Session or WeakMap or Predis\Client or Illuminate\Pagination\CursorPaginator or Intervention\Image\Interfaces\ImageInterface or DOMNamedNodeMap or TheSeer\Tokenizer\TokenCollection or voku\helper\SimpleHtmlDomInterface or Nette\Utils\ArrayHash or voku\helper\SimpleXmlDomBlank or Http\Message\CookieJar or FFMpeg\Filters\FiltersCollection or League\Uri\Contracts\SegmentedPathInterface or League\Uri\UriTemplate\VariableBag or Symfony\Component\Console\Helper\TreeNode or RectorPrefix202512\Symfony\Component\Finder\Finder or RectorPrefix202512\Symfo...viceCollectionInterface or PharIo\Manifest\BundledComponentCollection or Illuminate\Pagination\Paginator or League\Uri\Contracts\DomainHostInterface or Symfony\Contracts\Servic...viceCollectionInterface or Symfony\Component\Finder\Finder or SebastianBergmann\CodeCoverage\Node\Directory or Nette\Utils\ArrayList or Illuminate\View\InvokableComponentVariable or Predis\Connection\Cluster\RedisCluster or Intervention\Image\Geometry\Polygon or Intervention\Image\Geometry\Bezier or Illuminate\View\ComponentAttributeBag or Whoops\Exception\FrameCollection or voku\helper\SimpleHtmlDomBlank or League\Uri\Contracts\QueryInterface or Symfony\Component\HttpFoundation\ParameterBag or Intervention\Image\Typography\Line or League\CommonMark\Reference\ReferenceMapInterface or ArrayObject or Intervention\Image\Collection or PHPUnit\Framework\TestSuite or voku\helper\SimpleHtmlDom or Symfony\Component\HttpFoundation\HeaderBag or Illuminate\Routing\AbstractRouteCollection or PharIo\Manifest\RequirementCollection or voku\helper\SimpleHtmlDomNodeInterface or ResourceBundle or Illuminate\Support\Enumerable or RectorPrefix202512\Nette\Utils\ArrayHash or RectorPrefix202512\Nette\Utils\Html or JsonSchema\Iterator\ObjectIterator or SplDoublyLinkedList or Manticoresearch\ResultSet or HttpMessage or HttpRequestPool or RdKafka\Metadata\Collection or Yaf_Config_Simple or SplFixedArray or SplObjectStorage or Manticoresearch\Results\PercolateDocsResultSet or Yaf\Session or SQLiteResult or Cassandra\UserTypeValue or Imagick or Intervention\Image\Drivers\Imagick\Core or Cassandra\Collection or SimpleXMLElement or http\Message or Yaf_Session or SplPriorityQueue or Cassandra\Rows or Cassandra\Map or Yaf\Config\Simple or Elasticsearch\Helper\Iterators\SearchHitIterator or Yaf\Config\Ini or MongoCursor or Predis\Response\Iterator\MultiBulkIterator or Yaf_Config_Ini or SplHeap or Cassandra\Set or MongoGridFSCursor or Cassandra\Tuple or Predis\Response\Iterator\MultiBulkTuple or CachingIterator or Phar or ArrayIterator or GlobIterator or Phar or Symfony\Component\Proper...ss\PropertyPathIterator or Phar or RecursiveCachingIterator or SimpleXMLElement or RecursiveArrayIterator or SimpleXMLIterator or Phar or PhpCsFixer\Runner\FileCachingLintingFileIterator. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

638
        if ($nfoStats instanceof \Traversable && $nfoStats->/** @scrutinizer ignore-call */ count() > 0) {
Loading history...
639
            $outString = PHP_EOL.'Available to process';
640
            foreach ($nfoStats as $row) {
641
                $outString .= ', '.$row['status'].' = '.number_format($row['count']);
642
            }
643
            cli()->header($outString.'.');
644
        }
645
    }
646
647
    /**
648
     * Handle releases that have failed too many NFO fetch attempts
649
     */
650
    private function handleFailedNfoAttempts(string $groupID, string $guidChar): void
651
    {
652
        $failedQuery = Release::query()
653
            ->where('nfostatus', '<', $this->maxRetries)
654
            ->where('nfostatus', '>', self::NFO_FAILED);
655
656
        if ($guidChar !== '') {
657
            $failedQuery->where('leftguid', $guidChar);
658
        }
659
660
        if ($groupID !== '') {
661
            $failedQuery->where('groups_id', $groupID);
662
        }
663
664
        // Process in chunks to avoid memory issues with large result sets
665
        $failedQuery->select(['id'])->chunk(100, function ($releases) {
666
            DB::beginTransaction();
667
            try {
668
                foreach ($releases as $release) {
669
                    // Remove any releasenfo for failed attempts
670
                    ReleaseNfo::whereReleasesId($release->id)->delete();
671
672
                    // Set release.nfostatus to failed
673
                    Release::whereId($release->id)->update(['nfostatus' => self::NFO_FAILED]);
674
                }
675
                DB::commit();
676
            } catch (\Exception $e) {
677
                DB::rollBack();
678
                if ($this->echo) {
679
                    cli()->error("Error handling failed NFO attempts: {$e->getMessage()}");
680
                }
681
            }
682
        });
683
    }
684
685
    /**
686
     * Get a string like this:
687
     * "AND r.nfostatus BETWEEN -8 AND -1 AND r.size < 1073741824 AND r.size > 1048576"
688
     * To use in a query.
689
     *
690
     *
691
     * @throws \Exception
692
     *
693
     * @static
694
     */
695
    public static function NfoQueryString(): string
696
    {
697
        $maxSize = (int) Settings::settingValue('maxsizetoprocessnfo');
698
        $minSize = (int) Settings::settingValue('minsizetoprocessnfo');
699
        $dummy = (int) Settings::settingValue('maxnforetries');
700
        $maxRetries = ($dummy >= 0 ? -($dummy + 1) : self::NFO_UNPROC);
701
702
        return sprintf(
703
            'AND r.nfostatus BETWEEN %d AND %d %s %s',
704
            ($maxRetries < -8 ? -8 : $maxRetries),
705
            self::NFO_UNPROC,
706
            ($maxSize > 0 ? ('AND r.size < '.($maxSize * 1073741824)) : ''),
707
            ($minSize > 0 ? ('AND r.size > '.($minSize * 1048576)) : '')
708
        );
709
    }
710
711
    /**
712
     * Extract URLs from NFO content.
713
     *
714
     * @param  string  $nfoContent  The NFO content to parse.
715
     * @return array Array of found URLs.
716
     */
717
    public function extractUrls(string $nfoContent): array
718
    {
719
        $urls = [];
720
721
        // Match HTTP/HTTPS URLs
722
        if (preg_match_all('/https?:\/\/[^\s<>"\']+/i', $nfoContent, $matches)) {
723
            $urls = array_merge($urls, $matches[0]);
724
        }
725
726
        // Match www URLs without protocol
727
        if (preg_match_all('/(?<![\/\.])\bwww\.[a-z0-9][-a-z0-9]*\.[^\s<>"\']+/i', $nfoContent, $matches)) {
728
            foreach ($matches[0] as $url) {
729
                $urls[] = 'http://'.$url;
730
            }
731
        }
732
733
        return array_unique(array_filter($urls));
734
    }
735
736
    /**
737
     * Extract release group name from NFO content.
738
     *
739
     * Uses multiple detection strategies including:
740
     * - Common presentation phrases
741
     * - Scene-style headers with ASCII borders
742
     * - Greetings sections
743
     * - Footer signatures
744
     *
745
     * @param  string  $nfoContent  The NFO content to parse.
746
     * @return string|null The group name if found, null otherwise.
747
     */
748
    public function extractGroupName(string $nfoContent): ?string
749
    {
750
        // False positives to filter out
751
        $falsePositives = [
752
            'THE', 'AND', 'FOR', 'NFO', 'INFO', 'DVD', 'BLU', 'RAY', 'WEB', 'HDTV',
753
            'RELEASE', 'GROUP', 'DATE', 'SIZE', 'CODEC', 'VIDEO', 'AUDIO', 'FORMAT',
754
            'NOTES', 'INSTALL', 'GREETS', 'PRESENTS', 'TEAM', 'SCENE', 'FILE', 'FILES',
755
        ];
756
757
        // Use configured scene group patterns
758
        foreach ($this->_sceneGroupPatterns as $pattern) {
759
            if (preg_match($pattern, $nfoContent, $matches)) {
760
                $groupName = trim($matches[1]);
761
                if (! in_array(strtoupper($groupName), $falsePositives, true) && strlen($groupName) >= 2 && strlen($groupName) <= 20) {
762
                    return $groupName;
763
                }
764
            }
765
        }
766
767
        // Additional patterns for group name detection
768
        $additionalPatterns = [
769
            // "GROUP presents" or "GROUP brings you"
770
            '/\b([A-Z][A-Z0-9]{1,14})\s+(?:presents?|brings?\s+you)/i',
771
            // Common footer format: "--- GROUP ---"
772
            '/[-=]{2,}\s*([A-Z][A-Z0-9]{1,14})\s*[-=]{2,}$/mi',
773
            // Contact section: "irc.server.net #GROUP"
774
            '/irc\.[a-z0-9.-]+\s+#([A-Z][A-Z0-9]{1,14})/i',
775
            // Website: "www.GROUP.com/org/net"
776
            '/www\.([a-z][a-z0-9]{1,14})\.(?:com|org|net|info)/i',
777
            // ASCII art name extraction (common pattern at start)
778
            '/^\s*[^a-zA-Z0-9]*([A-Z][A-Z0-9]{2,14})[^a-zA-Z0-9]*\s*$/mi',
779
        ];
780
781
        foreach ($additionalPatterns as $pattern) {
782
            if (preg_match($pattern, $nfoContent, $matches)) {
783
                $groupName = trim($matches[1]);
784
                if (! in_array(strtoupper($groupName), $falsePositives, true) && strlen($groupName) >= 2 && strlen($groupName) <= 20) {
785
                    return strtoupper($groupName);
786
                }
787
            }
788
        }
789
790
        return null;
791
    }
792
793
    /**
794
     * Extract release date from NFO content.
795
     *
796
     * @param  string  $nfoContent  The NFO content to parse.
797
     * @return string|null ISO date string if found, null otherwise.
798
     */
799
    public function extractReleaseDate(string $nfoContent): ?string
800
    {
801
        $patterns = [
802
            // DD/MM/YYYY or MM/DD/YYYY
803
            '/(?:date|released?|rls)\s*[:\-]?\s*(\d{1,2})[\/\-.](\d{1,2})[\/\-.](\d{2,4})/i',
804
            // YYYY-MM-DD
805
            '/(?:date|released?|rls)\s*[:\-]?\s*(\d{4})[\/\-.](\d{1,2})[\/\-.](\d{1,2})/i',
806
            // Month DD, YYYY
807
            '/(?:date|released?|rls)\s*[:\-]?\s*(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\.?\s+(\d{1,2}),?\s+(\d{4})/i',
808
        ];
809
810
        foreach ($patterns as $index => $pattern) {
811
            if (preg_match($pattern, $nfoContent, $matches)) {
812
                try {
813
                    if ($index === 0) {
814
                        // Try both DD/MM and MM/DD formats
815
                        $year = strlen($matches[3]) === 2 ? '20'.$matches[3] : $matches[3];
816
817
                        // Assume DD/MM/YYYY format (more common internationally)
818
                        return sprintf('%04d-%02d-%02d', (int) $year, (int) $matches[2], (int) $matches[1]);
819
                    } elseif ($index === 1) {
820
                        // YYYY-MM-DD
821
                        return sprintf('%04d-%02d-%02d', (int) $matches[1], (int) $matches[2], (int) $matches[3]);
822
                    } else {
823
                        // Month name format
824
                        $months = ['jan' => 1, 'feb' => 2, 'mar' => 3, 'apr' => 4, 'may' => 5, 'jun' => 6, 'jul' => 7, 'aug' => 8, 'sep' => 9, 'oct' => 10, 'nov' => 11, 'dec' => 12];
825
                        $month = $months[strtolower(substr($matches[1], 0, 3))] ?? 1;
826
827
                        return sprintf('%04d-%02d-%02d', (int) $matches[3], $month, (int) $matches[2]);
828
                    }
829
                } catch (Throwable) {
830
                    continue;
831
                }
832
            }
833
        }
834
835
        return null;
836
    }
837
838
    /**
839
     * Extract video/audio codec information from NFO content.
840
     *
841
     * @param  string  $nfoContent  The NFO content to parse.
842
     * @return array{video?: string, audio?: string, resolution?: string} Array with codec info.
843
     */
844
    public function extractCodecInfo(string $nfoContent): array
845
    {
846
        $result = [];
847
848
        // Video codecs
849
        $videoPatterns = [
850
            '/(?:video|codec)\s*[:\-]?\s*(x264|x265|hevc|h\.?264|h\.?265|xvid|divx|av1|vp9|mpeg[24]?)/i',
851
            '/\b(x264|x265|HEVC|H\.?264|H\.?265|XviD|DivX|AV1|VP9)\b/i',
852
        ];
853
        foreach ($videoPatterns as $pattern) {
854
            if (preg_match($pattern, $nfoContent, $matches)) {
855
                $result['video'] = strtoupper(str_replace('.', '', $matches[1]));
856
                break;
857
            }
858
        }
859
860
        // Audio codecs
861
        $audioPatterns = [
862
            '/(?:audio|sound)\s*[:\-]?\s*(aac|ac3|dts(?:-(?:hd|ma|x))?|truehd|atmos|flac|mp3|eac3|dd[+p]?|dolby)/i',
863
            '/\b(AAC|AC3|DTS(?:-(?:HD|MA|X))?|TrueHD|Atmos|FLAC|EAC3|DD[+P]?)\b/i',
864
        ];
865
        foreach ($audioPatterns as $pattern) {
866
            if (preg_match($pattern, $nfoContent, $matches)) {
867
                $result['audio'] = strtoupper($matches[1]);
868
                break;
869
            }
870
        }
871
872
        // Resolution
873
        $resolutionPatterns = [
874
            '/(?:resolution|quality)\s*[:\-]?\s*(\d{3,4}[xX×]\d{3,4}|\d{3,4}p|[48]K|UHD|FHD|HD)/i',
875
            '/\b(2160p|1080p|720p|480p|4K|UHD|FHD|HD)\b/i',
876
            '/\b(\d{3,4})\s*[xX×]\s*(\d{3,4})\b/',
877
        ];
878
        foreach ($resolutionPatterns as $index => $pattern) {
879
            if (preg_match($pattern, $nfoContent, $matches)) {
880
                if ($index === 2) {
881
                    $result['resolution'] = $matches[1].'x'.$matches[2];
882
                } else {
883
                    $result['resolution'] = strtoupper($matches[1]);
884
                }
885
                break;
886
            }
887
        }
888
889
        return $result;
890
    }
891
892
    /**
893
     * Extract file size information from NFO content.
894
     *
895
     * @param  string  $nfoContent  The NFO content to parse.
896
     * @return int|null File size in bytes if found, null otherwise.
897
     */
898
    public function extractFileSize(string $nfoContent): ?int
899
    {
900
        $patterns = [
901
            '/(?:size|file\s*size)\s*[:\-]?\s*(\d+(?:[.,]\d+)?)\s*(bytes?|[KMGTP]B|[KMGTP]iB)/i',
902
            '/\b(\d+(?:[.,]\d+)?)\s*(GB|GiB|MB|MiB|TB|TiB)\b/i',
903
        ];
904
905
        $multipliers = [
906
            'B' => 1, 'BYTE' => 1, 'BYTES' => 1,
907
            'KB' => 1024, 'KIB' => 1024,
908
            'MB' => 1024 * 1024, 'MIB' => 1024 * 1024,
909
            'GB' => 1024 * 1024 * 1024, 'GIB' => 1024 * 1024 * 1024,
910
            'TB' => 1024 * 1024 * 1024 * 1024, 'TIB' => 1024 * 1024 * 1024 * 1024,
911
            'PB' => 1024 * 1024 * 1024 * 1024 * 1024, 'PIB' => 1024 * 1024 * 1024 * 1024 * 1024,
912
        ];
913
914
        foreach ($patterns as $pattern) {
915
            if (preg_match($pattern, $nfoContent, $matches)) {
916
                $value = (float) str_replace(',', '.', $matches[1]);
917
                $unit = strtoupper($matches[2]);
918
919
                if (isset($multipliers[$unit])) {
920
                    return (int) ($value * $multipliers[$unit]);
921
                }
922
            }
923
        }
924
925
        return null;
926
    }
927
928
    /**
929
     * Extract all media IDs (IMDB, TVDB, TMDB, etc.) from NFO content.
930
     *
931
     * @param  string  $nfoContent  The NFO content to parse.
932
     * @return array Array of media IDs with their sources.
933
     */
934
    public function extractAllMediaIds(string $nfoContent): array
935
    {
936
        $ids = [];
937
938
        // IMDB
939
        if (preg_match_all('/imdb\.com\/title\/(tt\d{7,8})/i', $nfoContent, $matches)) {
940
            foreach ($matches[1] as $id) {
941
                $ids[] = ['id' => $id, 'source' => 'imdb'];
942
            }
943
        }
944
945
        // TVDB
946
        if (preg_match_all('/thetvdb\.com\/(?:\?tab=series&id=|series\/)(\d{1,8})/i', $nfoContent, $matches)) {
947
            foreach ($matches[1] as $id) {
948
                $ids[] = ['id' => $id, 'source' => 'thetvdb'];
949
            }
950
        }
951
952
        // TMDB Movie
953
        if (preg_match_all('/themoviedb\.org\/movie\/(\d{1,8})/i', $nfoContent, $matches)) {
954
            foreach ($matches[1] as $id) {
955
                $ids[] = ['id' => $id, 'source' => 'tmdb_movie'];
956
            }
957
        }
958
959
        // TMDB TV
960
        if (preg_match_all('/themoviedb\.org\/tv\/(\d{1,8})/i', $nfoContent, $matches)) {
961
            foreach ($matches[1] as $id) {
962
                $ids[] = ['id' => $id, 'source' => 'tmdb_tv'];
963
            }
964
        }
965
966
        // TVMaze
967
        if (preg_match_all('/tvmaze\.com\/shows\/(\d{1,6})/i', $nfoContent, $matches)) {
968
            foreach ($matches[1] as $id) {
969
                $ids[] = ['id' => $id, 'source' => 'tvmaze'];
970
            }
971
        }
972
973
        // AniDB
974
        if (preg_match_all('/anidb\.net\/(?:perl-bin\/animedb\.pl\?show=anime&aid=|anime\/)(\d{1,6})/i', $nfoContent, $matches)) {
975
            foreach ($matches[1] as $id) {
976
                $ids[] = ['id' => $id, 'source' => 'anidb'];
977
            }
978
        }
979
980
        // MyAnimeList (MAL)
981
        if (preg_match_all('/myanimelist\.net\/anime\/(\d{1,6})/i', $nfoContent, $matches)) {
982
            foreach ($matches[1] as $id) {
983
                $ids[] = ['id' => $id, 'source' => 'mal'];
984
            }
985
        }
986
987
        return $ids;
988
    }
989
990
    /**
991
     * Parse and extract comprehensive metadata from NFO content.
992
     *
993
     * @param  string  $nfoContent  The NFO content to parse.
994
     * @return array Associative array with extracted metadata.
995
     */
996
    public function parseNfoMetadata(string $nfoContent): array
997
    {
998
        return [
999
            'urls' => $this->extractUrls($nfoContent),
1000
            'group' => $this->extractGroupName($nfoContent),
1001
            'release_date' => $this->extractReleaseDate($nfoContent),
1002
            'codec_info' => $this->extractCodecInfo($nfoContent),
1003
            'file_size' => $this->extractFileSize($nfoContent),
1004
            'media_ids' => $this->extractAllMediaIds($nfoContent),
1005
            'show_id' => $this->parseShowId($nfoContent),
1006
            'language' => $this->extractLanguage($nfoContent),
1007
            'runtime' => $this->extractRuntime($nfoContent),
1008
            'genre' => $this->extractGenre($nfoContent),
1009
            'software_info' => $this->extractSoftwareInfo($nfoContent),
1010
            'release_title' => $this->extractReleaseTitle($nfoContent),
1011
        ];
1012
    }
1013
1014
    /**
1015
     * Extract language information from NFO content.
1016
     *
1017
     * @param  string  $nfoContent  The NFO content to parse.
1018
     * @return array Array of detected languages.
1019
     */
1020
    public function extractLanguage(string $nfoContent): array
1021
    {
1022
        $languages = [];
1023
1024
        // Common language patterns in NFOs
1025
        $patterns = [
1026
            '/(?:language|audio|spoken?|dialogue)\s*[:\-]?\s*([A-Za-z]+(?:\s*[,\/&]\s*[A-Za-z]+)*)/i',
1027
            '/(?:subs?|subtitles?)\s*[:\-]?\s*([A-Za-z]+(?:\s*[,\/&]\s*[A-Za-z]+)*)/i',
1028
        ];
1029
1030
        // Known language names
1031
        $knownLanguages = [
1032
            'english', 'german', 'french', 'spanish', 'italian', 'dutch', 'portuguese',
1033
            'russian', 'japanese', 'korean', 'chinese', 'mandarin', 'cantonese',
1034
            'swedish', 'norwegian', 'danish', 'finnish', 'polish', 'czech', 'hungarian',
1035
            'turkish', 'arabic', 'hindi', 'thai', 'vietnamese', 'indonesian', 'malay',
1036
            'multi', 'dual', 'english/german', 'eng', 'ger', 'fre', 'spa', 'ita',
1037
        ];
1038
1039
        foreach ($patterns as $pattern) {
1040
            if (preg_match($pattern, $nfoContent, $matches)) {
1041
                $langs = preg_split('/[\s,\/&]+/', strtolower($matches[1]));
1042
                foreach ($langs as $lang) {
1043
                    $lang = trim($lang);
1044
                    if (in_array($lang, $knownLanguages, true) && ! in_array($lang, $languages, true)) {
1045
                        $languages[] = ucfirst($lang);
1046
                    }
1047
                }
1048
            }
1049
        }
1050
1051
        return $languages;
1052
    }
1053
1054
    /**
1055
     * Extract runtime/duration from NFO content.
1056
     *
1057
     * @param  string  $nfoContent  The NFO content to parse.
1058
     * @return int|null Runtime in minutes, or null if not found.
1059
     */
1060
    public function extractRuntime(string $nfoContent): ?int
1061
    {
1062
        $patterns = [
1063
            // "Runtime: 1h 30m" or "Duration: 90min"
1064
            '/(?:runtime|duration|length|playtime)\s*[:\-]?\s*(?:(\d{1,2})\s*h(?:ours?)?\s*)?(\d{1,3})\s*m(?:in(?:utes?)?)?/i',
1065
            // "Runtime: 01:30:00" or "1:30:00"
1066
            '/(?:runtime|duration|length|playtime)\s*[:\-]?\s*(\d{1,2}):(\d{2})(?::(\d{2}))?/i',
1067
            // "90 minutes" standalone
1068
            '/\b(\d{2,3})\s*(?:min(?:utes?)?|mins)\b/i',
1069
        ];
1070
1071
        foreach ($patterns as $index => $pattern) {
1072
            if (preg_match($pattern, $nfoContent, $matches)) {
1073
                if ($index === 0) {
1074
                    $hours = ! empty($matches[1]) ? (int) $matches[1] : 0;
1075
                    $minutes = (int) $matches[2];
1076
1077
                    return ($hours * 60) + $minutes;
1078
                } elseif ($index === 1) {
1079
                    $hours = (int) $matches[1];
1080
                    $minutes = (int) $matches[2];
1081
1082
                    return ($hours * 60) + $minutes;
1083
                } else {
1084
                    return (int) $matches[1];
1085
                }
1086
            }
1087
        }
1088
1089
        return null;
1090
    }
1091
1092
    /**
1093
     * Extract genre information from NFO content.
1094
     *
1095
     * @param  string  $nfoContent  The NFO content to parse.
1096
     * @return array Array of detected genres.
1097
     */
1098
    public function extractGenre(string $nfoContent): array
1099
    {
1100
        $genres = [];
1101
1102
        if (preg_match('/(?:genre|category|type)\s*[:\-]?\s*([^\n\r]+)/i', $nfoContent, $matches)) {
1103
            $genreString = $matches[1];
1104
            // Split on common separators
1105
            $parts = preg_split('/[\s,\/&|]+/', $genreString);
1106
1107
            // Known valid genres
1108
            $validGenres = [
1109
                'action', 'adventure', 'animation', 'biography', 'comedy', 'crime',
1110
                'documentary', 'drama', 'family', 'fantasy', 'history', 'horror',
1111
                'music', 'musical', 'mystery', 'romance', 'sci-fi', 'scifi', 'sport',
1112
                'thriller', 'war', 'western', 'adult', 'xxx', 'erotic', 'anime',
1113
                'rpg', 'fps', 'strategy', 'simulation', 'puzzle', 'racing', 'sports',
1114
                'rock', 'pop', 'electronic', 'hip-hop', 'rap', 'classical', 'jazz',
1115
            ];
1116
1117
            foreach ($parts as $part) {
1118
                $part = strtolower(trim($part));
1119
                if (in_array($part, $validGenres, true) && ! in_array(ucfirst($part), $genres, true)) {
1120
                    $genres[] = ucfirst($part);
1121
                }
1122
            }
1123
        }
1124
1125
        return $genres;
1126
    }
1127
1128
    /**
1129
     * Extract software-specific information from NFO content.
1130
     *
1131
     * @param  string  $nfoContent  The NFO content to parse.
1132
     * @return array Software info including platform, version, protection, etc.
1133
     */
1134
    public function extractSoftwareInfo(string $nfoContent): array
1135
    {
1136
        $info = [];
1137
1138
        // Platform/OS detection
1139
        $platformPatterns = [
1140
            '/(?:platform|os|system|requires?)\s*[:\-]?\s*(windows?|linux|mac(?:os)?|unix|android|ios)/i',
1141
        ];
1142
        foreach ($platformPatterns as $pattern) {
1143
            if (preg_match($pattern, $nfoContent, $matches)) {
1144
                $info['platform'] = ucfirst(strtolower($matches[1]));
1145
                break;
1146
            }
1147
        }
1148
1149
        // Version detection
1150
        if (preg_match('/(?:version|ver|v)\s*[:\-]?\s*(\d+(?:\.\d+)*(?:\s*(?:build|b)\s*\d+)?)/i', $nfoContent, $matches)) {
1151
            $info['version'] = trim($matches[1]);
1152
        }
1153
1154
        // Protection type
1155
        $protectionPatterns = [
1156
            '/(?:protection|drm|copy[ -]?protection)\s*[:\-]?\s*([^\n\r]+)/i',
1157
        ];
1158
        foreach ($protectionPatterns as $pattern) {
1159
            if (preg_match($pattern, $nfoContent, $matches)) {
1160
                $protection = trim($matches[1]);
1161
                if (strlen($protection) > 2 && strlen($protection) < 50) {
1162
                    $info['protection'] = $protection;
1163
                }
1164
                break;
1165
            }
1166
        }
1167
1168
        // Crack/Keygen/Serial info
1169
        if (preg_match('/\b(cracked|keygen|serial|patch|loader|activator)\b/i', $nfoContent)) {
1170
            $info['has_crack'] = true;
1171
        }
1172
1173
        return $info;
1174
    }
1175
1176
    /**
1177
     * Extract release title from NFO content.
1178
     *
1179
     * @param  string  $nfoContent  The NFO content to parse.
1180
     * @return string|null The release title if found.
1181
     */
1182
    public function extractReleaseTitle(string $nfoContent): ?string
1183
    {
1184
        $patterns = [
1185
            // "Title: Movie Name" or "Release: Title.Goes.Here"
1186
            '/(?:title|release|name)\s*[:\-]?\s*([^\n\r]{5,100})/i',
1187
            // Scene-style title in header
1188
            '/(?:^|\n)\s*(?:[\-=*~]{3,}\s*)?([A-Za-z0-9][\w.\-\s]{10,80}?)(?:\s*[\-=*~]{3,})?\s*(?:\n|$)/m',
1189
        ];
1190
1191
        foreach ($patterns as $pattern) {
1192
            if (preg_match($pattern, $nfoContent, $matches)) {
1193
                $title = trim($matches[1]);
1194
                // Filter out common non-title content
1195
                if (! preg_match('/^(?:date|size|codec|format|video|audio|language|runtime|genre)\s*:/i', $title)
1196
                    && strlen($title) >= 5 && strlen($title) <= 100) {
1197
                    return $title;
1198
                }
1199
            }
1200
        }
1201
1202
        return null;
1203
    }
1204
1205
    /**
1206
     * Clean and normalize NFO content.
1207
     *
1208
     * @param  string  $nfoContent  Raw NFO content.
1209
     * @return string Cleaned NFO content.
1210
     */
1211
    public function cleanNfoContent(string $nfoContent): string
1212
    {
1213
        // Convert to UTF-8 if needed (CP437 is common for NFOs)
1214
        $content = cp437toUTF($nfoContent);
1215
1216
        // Normalize line endings
1217
        $content = str_replace(["\r\n", "\r"], "\n", $content);
1218
1219
        // Remove excessive whitespace while preserving NFO art
1220
        $lines = explode("\n", $content);
1221
        $cleanedLines = [];
1222
        $emptyLineCount = 0;
1223
1224
        foreach ($lines as $line) {
1225
            if (trim($line) === '') {
1226
                $emptyLineCount++;
1227
                // Allow max 2 consecutive empty lines
1228
                if ($emptyLineCount <= 2) {
1229
                    $cleanedLines[] = '';
1230
                }
1231
            } else {
1232
                $emptyLineCount = 0;
1233
                $cleanedLines[] = rtrim($line);
1234
            }
1235
        }
1236
1237
        return implode("\n", $cleanedLines);
1238
    }
1239
1240
    /**
1241
     * Calculate an NFO quality score based on content analysis.
1242
     *
1243
     * Scoring factors:
1244
     * - Content length (too short or too long penalized)
1245
     * - Keyword presence (scene terminology, media info)
1246
     * - Media ID presence (IMDB, TVDB, etc.)
1247
     * - URL presence
1248
     * - Codec information
1249
     * - ASCII art detection (scene NFOs often have artistic headers)
1250
     * - Structural elements (proper formatting)
1251
     *
1252
     * @param  string  $nfoContent  The NFO content to analyze.
1253
     * @return int Quality score from 0-100.
1254
     */
1255
    public function calculateNfoQuality(string $nfoContent): int
1256
    {
1257
        $score = 50; // Base score
1258
1259
        $length = strlen($nfoContent);
1260
1261
        // Length bonus/penalty
1262
        if ($length < 100) {
1263
            $score -= 20;
1264
        } elseif ($length > 500 && $length < 20000) {
1265
            $score += 15;
1266
        } elseif ($length >= 20000) {
1267
            $score += 5; // Longer NFOs might have too much filler
1268
        }
1269
1270
        // Keyword matching
1271
        $keywordMatches = 0;
1272
        foreach ($this->_nfoKeywords as $keyword) {
1273
            if (stripos($nfoContent, $keyword) !== false) {
1274
                $keywordMatches++;
1275
            }
1276
        }
1277
        $score += min($keywordMatches * 2, 20);
1278
1279
        // Media ID presence bonus
1280
        $mediaIds = $this->extractAllMediaIds($nfoContent);
1281
        if (! empty($mediaIds)) {
1282
            $score += min(count($mediaIds) * 5, 15);
1283
        }
1284
1285
        // URL presence
1286
        $urls = $this->extractUrls($nfoContent);
1287
        if (! empty($urls)) {
1288
            $score += min(count($urls) * 2, 10);
1289
        }
1290
1291
        // Codec info presence
1292
        $codecInfo = $this->extractCodecInfo($nfoContent);
1293
        $score += count(array_filter($codecInfo)) * 3;
1294
1295
        // ASCII art detection (scene NFOs often have decorative borders)
1296
        if ($this->hasAsciiArt($nfoContent)) {
1297
            $score += 10;
1298
        }
1299
1300
        // Structural elements bonus
1301
        $structuralScore = $this->analyzeStructure($nfoContent);
1302
        $score += $structuralScore;
1303
1304
        // Group name detection bonus
1305
        if ($this->extractGroupName($nfoContent) !== null) {
1306
            $score += 8;
1307
        }
1308
1309
        // Release date detection bonus
1310
        if ($this->extractReleaseDate($nfoContent) !== null) {
1311
            $score += 5;
1312
        }
1313
1314
        // Language info bonus
1315
        $languages = $this->extractLanguage($nfoContent);
1316
        if (! empty($languages)) {
1317
            $score += min(count($languages) * 2, 6);
1318
        }
1319
1320
        // Runtime detection bonus
1321
        if ($this->extractRuntime($nfoContent) !== null) {
1322
            $score += 4;
1323
        }
1324
1325
        // Penalty for binary content remnants
1326
        if (preg_match_all('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', $nfoContent, $binaryMatches)) {
1327
            $score -= min(count($binaryMatches[0]) * 5, 20);
1328
        }
1329
1330
        return max(0, min(100, $score));
1331
    }
1332
1333
    /**
1334
     * Detect ASCII art in NFO content.
1335
     *
1336
     * @param  string  $nfoContent  The NFO content to analyze.
1337
     * @return bool True if ASCII art is detected.
1338
     */
1339
    protected function hasAsciiArt(string $nfoContent): bool
1340
    {
1341
        // Check for common ASCII art characters in repeated sequences
1342
        $asciiArtPatterns = [
1343
            // Decorative borders
1344
            '/[-=*~#@]{5,}/',
1345
            // Box drawing characters
1346
            '/[┌┐└┘├┤┬┴┼│─╔╗╚╝║═]{3,}/',
1347
            // Extended ASCII art characters
1348
            '/[░▒▓█▄▀■□▪▫]{3,}/',
1349
            // Common ASCII art patterns
1350
            '/[\/\\|_]{3,}.*[\/\\|_]{3,}/',
1351
            // Repeated special chars in artistic patterns
1352
            '/(\S)\1{4,}/',
1353
        ];
1354
1355
        foreach ($asciiArtPatterns as $pattern) {
1356
            if (preg_match($pattern, $nfoContent)) {
1357
                return true;
1358
            }
1359
        }
1360
1361
        return false;
1362
    }
1363
1364
    /**
1365
     * Analyze structural elements of NFO content.
1366
     *
1367
     * @param  string  $nfoContent  The NFO content to analyze.
1368
     * @return int Score based on structural quality (0-15).
1369
     */
1370
    protected function analyzeStructure(string $nfoContent): int
1371
    {
1372
        $score = 0;
1373
1374
        // Check for section headers
1375
        $sectionPatterns = [
1376
            '/^[ \t]*[-=*]{2,}.*[-=*]{2,}[ \t]*$/m', // Decorative section dividers
1377
            '/^[ \t]*\[.*\][ \t]*$/m',                 // [Section Name]
1378
            '/^[ \t]*<.*>[ \t]*$/m',                   // <Section Name>
1379
        ];
1380
1381
        foreach ($sectionPatterns as $pattern) {
1382
            if (preg_match_all($pattern, $nfoContent, $matches)) {
1383
                $score += min(count($matches[0]), 3);
1384
            }
1385
        }
1386
1387
        // Check for labeled fields (Field: Value format)
1388
        if (preg_match_all('/^[ \t]*[A-Za-z][A-Za-z\s]{2,20}\s*[:\.].*$/m', $nfoContent, $matches)) {
1389
            $score += min(count($matches[0]) / 2, 5);
1390
        }
1391
1392
        // Check for consistent line endings and formatting
1393
        $lines = explode("\n", $nfoContent);
1394
        $nonEmptyLines = array_filter($lines, fn ($line) => trim($line) !== '');
1395
1396
        if (count($nonEmptyLines) >= 10) {
1397
            $score += 2;
1398
        }
1399
1400
        return min(15, (int) $score);
1401
    }
1402
1403
    /**
1404
     * Decompress and retrieve NFO content from a release.
1405
     *
1406
     * @param  int  $releaseId  The release ID.
1407
     * @return string|null The NFO content or null if not found.
1408
     */
1409
    public function getNfoContent(int $releaseId): ?string
1410
    {
1411
        $nfoRecord = ReleaseNfo::getReleaseNfo($releaseId);
1412
1413
        if ($nfoRecord === null || empty($nfoRecord->nfo)) {
1414
            return null;
1415
        }
1416
1417
        return $nfoRecord->nfo;
1418
    }
1419
1420
    /**
1421
     * Store NFO content for a release.
1422
     *
1423
     * @param  int  $releaseId  The release ID.
1424
     * @param  string  $nfoContent  The NFO content to store.
1425
     * @param  bool  $compress  Whether to compress the content.
1426
     * @return bool True on success, false on failure.
1427
     */
1428
    public function storeNfoContent(int $releaseId, string $nfoContent, bool $compress = true): bool
1429
    {
1430
        try {
1431
            $data = $compress ? "\x1f\x8b\x08\x00".gzcompress($nfoContent) : $nfoContent;
1432
1433
            ReleaseNfo::updateOrCreate(
1434
                ['releases_id' => $releaseId],
1435
                ['nfo' => $data]
1436
            );
1437
1438
            Release::whereId($releaseId)->update(['nfostatus' => self::NFO_FOUND]);
1439
1440
            return true;
1441
        } catch (Throwable $e) {
1442
            Log::error("Failed to store NFO for release {$releaseId}: ".$e->getMessage());
1443
1444
            return false;
1445
        }
1446
    }
1447
1448
    /**
1449
     * Clear the settings cache.
1450
     *
1451
     * Useful when settings have been updated and need to be reloaded.
1452
     */
1453
    public function clearSettingsCache(): void
1454
    {
1455
        Cache::forget('nfo_maxnfoprocessed');
1456
        Cache::forget('nfo_maxnforetries');
1457
        Cache::forget('nfo_maxsizetoprocessnfo');
1458
        Cache::forget('nfo_minsizetoprocessnfo');
1459
    }
1460
}
1461