|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace App\Services; |
|
6
|
|
|
|
|
7
|
|
|
use App\Models\Release; |
|
8
|
|
|
use App\Models\ReleaseNfo; |
|
9
|
|
|
use App\Models\Settings; |
|
10
|
|
|
use App\Models\UsenetGroup; |
|
11
|
|
|
use App\Services\NNTP\NNTPService; |
|
12
|
|
|
use App\Services\Nzb\NzbContentsService; |
|
13
|
|
|
use dariusiii\rarinfo\Par2Info; |
|
14
|
|
|
use dariusiii\rarinfo\SfvInfo; |
|
15
|
|
|
use Illuminate\Support\Facades\Cache; |
|
16
|
|
|
use Illuminate\Support\Facades\DB; |
|
17
|
|
|
use Illuminate\Support\Facades\File; |
|
18
|
|
|
use Illuminate\Support\Facades\Log; |
|
19
|
|
|
use Throwable; |
|
20
|
|
|
|
|
21
|
|
|
/** |
|
22
|
|
|
* Class NfoService - Handles NFO file processing, validation, and metadata extraction. |
|
23
|
|
|
* |
|
24
|
|
|
* NFO files are text files commonly used in the warez scene to provide information |
|
25
|
|
|
* about releases. This class handles detection, validation, parsing and storage of NFO content. |
|
26
|
|
|
*/ |
|
27
|
|
|
class NfoService |
|
28
|
|
|
{ |
|
29
|
|
|
/** |
|
30
|
|
|
* Regex to detect common non-NFO file headers/signatures. |
|
31
|
|
|
* Matches XML, NZB, RIFF (media), PAR/RAR archives, and other binary formats. |
|
32
|
|
|
*/ |
|
33
|
|
|
protected string $_nonNfoHeaderRegex = '/\A(\s*<\?xml|=newz\[NZB\]=|RIFF|\s*[RP]AR|.{0,10}(JFIF|matroska|ftyp|ID3)|PK\x03\x04|\x1f\x8b\x08|MZ|%PDF|GIF8[79]a|\x89PNG)|;\s*Generated\s*by.*SF\w/i'; |
|
34
|
|
|
|
|
35
|
|
|
/** |
|
36
|
|
|
* Regex to identify text encoding from the 'file' command output. |
|
37
|
|
|
*/ |
|
38
|
|
|
protected string $_textFileRegex = '/(ASCII|ISO-8859|UTF-(8|16|32).*?|Non-ISO extended-ASCII)\s*text/i'; |
|
39
|
|
|
|
|
40
|
|
|
/** |
|
41
|
|
|
* Regex to identify common binary file types from the 'file' command output. |
|
42
|
|
|
*/ |
|
43
|
|
|
protected string $_binaryFileRegex = '/^(JPE?G|Parity|PNG|RAR|XML|(7-)?[Zz]ip|PDF|GIF|executable|archive|compressed|data|binary)/i'; |
|
44
|
|
|
|
|
45
|
|
|
/** |
|
46
|
|
|
* Regex to detect binary characters within the content. |
|
47
|
|
|
* Excludes common control characters that may appear in NFOs (tab, newline, carriage return). |
|
48
|
|
|
*/ |
|
49
|
|
|
protected string $_binaryCharsRegex = '/[\x00-\x08\x0B\x0C\x0E-\x1F]/'; |
|
50
|
|
|
|
|
51
|
|
|
/** |
|
52
|
|
|
* Common NFO keywords that help identify legitimate NFO files. |
|
53
|
|
|
*/ |
|
54
|
|
|
protected array $_nfoKeywords = [ |
|
55
|
|
|
// Release information |
|
56
|
|
|
'release', 'group', 'date', 'size', 'format', 'source', 'genre', 'codec', |
|
57
|
|
|
'bitrate', 'resolution', 'language', 'subtitle', 'ripped', 'cracked', |
|
58
|
|
|
'keygen', 'serial', 'patch', 'trainer', 'install', 'notes', 'greets', |
|
59
|
|
|
'nfo', 'ascii', 'artwork', 'presents', 'proudly', 'brings', 'another', |
|
60
|
|
|
// Scene terminology |
|
61
|
|
|
'scene', 'rls', 'nuked', 'proper', 'repack', 'internal', 'retail', |
|
62
|
|
|
'webdl', 'webrip', 'bluray', 'bdrip', 'dvdrip', 'hdtv', 'pdtv', |
|
63
|
|
|
// Media info |
|
64
|
|
|
'video', 'audio', 'duration', 'runtime', 'aspect', 'fps', 'channels', |
|
65
|
|
|
'sample', 'encoder', 'x264', 'x265', 'hevc', 'avc', 'xvid', 'divx', |
|
66
|
|
|
'aac', 'ac3', 'dts', 'truehd', 'atmos', 'flac', 'mp3', |
|
67
|
|
|
// Content info |
|
68
|
|
|
'movie', 'film', 'episode', 'season', 'series', 'title', 'year', |
|
69
|
|
|
'director', 'cast', 'actors', 'plot', 'synopsis', 'imdb', 'rating', |
|
70
|
|
|
// Software |
|
71
|
|
|
'crack', 'readme', 'setup', 'installer', 'license', 'registration', |
|
72
|
|
|
'protection', 'requirements', 'platform', 'operating', 'system', |
|
73
|
|
|
// Contact/Group info |
|
74
|
|
|
'contact', 'irc', 'www', 'http', 'ftp', 'email', 'apply', 'join', |
|
75
|
|
|
]; |
|
76
|
|
|
|
|
77
|
|
|
/** |
|
78
|
|
|
* Scene group patterns for improved detection. |
|
79
|
|
|
*/ |
|
80
|
|
|
protected array $_sceneGroupPatterns = [ |
|
81
|
|
|
'/(?:^|\n)\s*[-=*]{3,}.*?([A-Z0-9]{2,15})\s*[-=*]{3,}/i', |
|
82
|
|
|
'/(?:presents?|brought\s+(?:to\s+)?(?:you\s+)?by|from)\s*[:\-]?\s*([A-Z][A-Z0-9]{1,14})/i', |
|
83
|
|
|
'/(?:greets?\s+(?:go(?:es)?\s+)?(?:out\s+)?to|respect\s+to)\s*[:\-]?\s*([\w,\s&]+)/i', |
|
84
|
|
|
'/(?:^|\n)\s*([A-Z][A-Z0-9]{1,14})\s+(?:nfo|info|release)\s*(?:$|\n)/i', |
|
85
|
|
|
'/(?:released\s+by|rls\s+by)\s*[:\-]?\s*([A-Z][A-Z0-9]{1,14})/i', |
|
86
|
|
|
]; |
|
87
|
|
|
|
|
88
|
|
|
/** |
|
89
|
|
|
* Maximum NFO file size in bytes (64KB). |
|
90
|
|
|
*/ |
|
91
|
|
|
protected const MAX_NFO_SIZE = 65535; |
|
92
|
|
|
|
|
93
|
|
|
/** |
|
94
|
|
|
* Minimum NFO file size in bytes. |
|
95
|
|
|
*/ |
|
96
|
|
|
protected const MIN_NFO_SIZE = 12; |
|
97
|
|
|
|
|
98
|
|
|
/** |
|
99
|
|
|
* Cache TTL for settings in seconds. |
|
100
|
|
|
*/ |
|
101
|
|
|
protected const SETTINGS_CACHE_TTL = 300; |
|
102
|
|
|
|
|
103
|
|
|
/** |
|
104
|
|
|
* @var int Number of NFOs to process per batch. |
|
105
|
|
|
*/ |
|
106
|
|
|
private int $nzbs; |
|
107
|
|
|
|
|
108
|
|
|
/** |
|
109
|
|
|
* @var int Maximum release size to process NFO (in GB). |
|
110
|
|
|
*/ |
|
111
|
|
|
protected int $maxSize; |
|
112
|
|
|
|
|
113
|
|
|
/** |
|
114
|
|
|
* @var int Maximum retry attempts for failed NFO fetches. |
|
115
|
|
|
*/ |
|
116
|
|
|
private int $maxRetries; |
|
117
|
|
|
|
|
118
|
|
|
/** |
|
119
|
|
|
* @var int Minimum release size to process NFO (in MB). |
|
120
|
|
|
*/ |
|
121
|
|
|
protected int $minSize; |
|
122
|
|
|
|
|
123
|
|
|
/** |
|
124
|
|
|
* @var string Temporary path for processing files. |
|
125
|
|
|
*/ |
|
126
|
|
|
private string $tmpPath; |
|
127
|
|
|
|
|
128
|
|
|
/** |
|
129
|
|
|
* @var bool Whether to echo output to CLI. |
|
130
|
|
|
*/ |
|
131
|
|
|
protected bool $echo; |
|
132
|
|
|
|
|
133
|
|
|
public const NFO_FAILED = -9; // We failed to get a NFO after admin set max retries. |
|
134
|
|
|
|
|
135
|
|
|
public const NFO_UNPROC = -1; // Release has not been processed yet. |
|
136
|
|
|
|
|
137
|
|
|
public const NFO_NONFO = 0; // Release has no NFO. |
|
138
|
|
|
|
|
139
|
|
|
public const NFO_FOUND = 1; // Release has an NFO. |
|
140
|
|
|
|
|
141
|
|
|
/** |
|
142
|
|
|
* Default constructor. |
|
143
|
|
|
* |
|
144
|
|
|
* Initializes NFO processing settings from database/config with caching. |
|
145
|
|
|
* |
|
146
|
|
|
* @throws \Exception |
|
147
|
|
|
*/ |
|
148
|
|
|
public function __construct() |
|
149
|
|
|
{ |
|
150
|
|
|
$this->echo = (bool) config('nntmux.echocli'); |
|
151
|
|
|
|
|
152
|
|
|
// Cache settings to reduce database queries |
|
153
|
|
|
// Note: Cast after Cache::remember as cached values may be stored as strings |
|
154
|
|
|
$this->nzbs = (int) Cache::remember('nfo_maxnfoprocessed', self::SETTINGS_CACHE_TTL, function () { |
|
155
|
|
|
$value = Settings::settingValue('maxnfoprocessed'); |
|
156
|
|
|
|
|
157
|
|
|
return $value !== '' ? (int) $value : 100; |
|
158
|
|
|
}); |
|
159
|
|
|
|
|
160
|
|
|
$maxRetries = (int) Cache::remember('nfo_maxnforetries', self::SETTINGS_CACHE_TTL, function () { |
|
161
|
|
|
return (int) Settings::settingValue('maxnforetries'); |
|
162
|
|
|
}); |
|
163
|
|
|
$this->maxRetries = $maxRetries >= 0 ? -($maxRetries + 1) : self::NFO_UNPROC; |
|
164
|
|
|
$this->maxRetries = max($this->maxRetries, -8); |
|
165
|
|
|
|
|
166
|
|
|
$this->maxSize = (int) Cache::remember('nfo_maxsizetoprocessnfo', self::SETTINGS_CACHE_TTL, function () { |
|
167
|
|
|
return (int) Settings::settingValue('maxsizetoprocessnfo'); |
|
168
|
|
|
}); |
|
169
|
|
|
|
|
170
|
|
|
$this->minSize = (int) Cache::remember('nfo_minsizetoprocessnfo', self::SETTINGS_CACHE_TTL, function () { |
|
171
|
|
|
return (int) Settings::settingValue('minsizetoprocessnfo'); |
|
172
|
|
|
}); |
|
173
|
|
|
|
|
174
|
|
|
$this->tmpPath = rtrim((string) config('nntmux.tmp_unrar_path'), '/\\').'/'; |
|
175
|
|
|
} |
|
176
|
|
|
|
|
177
|
|
|
/** |
|
178
|
|
|
* Look for a TV Show ID or Movie ID in a string. |
|
179
|
|
|
* |
|
180
|
|
|
* Supports: TVMaze, IMDB, TVDB (legacy & modern), TMDB, AniDB |
|
181
|
|
|
* |
|
182
|
|
|
* @param string $str The string with a Show ID. |
|
183
|
|
|
* @return array{showid: string, site: string}|false Return array with show ID and site source or false on failure. |
|
184
|
|
|
*/ |
|
185
|
|
|
public function parseShowId(string $str): array|false |
|
186
|
|
|
{ |
|
187
|
|
|
// TVMaze |
|
188
|
|
|
if (preg_match('/tvmaze\.com\/shows\/(\d{1,6})/i', $str, $hits)) { |
|
189
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'tvmaze']; |
|
190
|
|
|
} |
|
191
|
|
|
|
|
192
|
|
|
// IMDB (movies and TV shows) |
|
193
|
|
|
if (preg_match('/imdb\.com\/title\/(tt\d{7,8})/i', $str, $hits)) { |
|
194
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'imdb']; |
|
195
|
|
|
} |
|
196
|
|
|
|
|
197
|
|
|
// TVDB - Legacy URL format |
|
198
|
|
|
if (preg_match('/thetvdb\.com\/\?tab=series&id=(\d{1,8})/i', $str, $hits)) { |
|
199
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'thetvdb']; |
|
200
|
|
|
} |
|
201
|
|
|
|
|
202
|
|
|
// TVDB - Modern URL format (series/slug or series/id) |
|
203
|
|
|
if (preg_match('/thetvdb\.com\/series\/(\d{1,8}|[\w-]+)/i', $str, $hits)) { |
|
204
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'thetvdb']; |
|
205
|
|
|
} |
|
206
|
|
|
|
|
207
|
|
|
// TMDB - Movie |
|
208
|
|
|
if (preg_match('/themoviedb\.org\/movie\/(\d{1,8})/i', $str, $hits)) { |
|
209
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'tmdb_movie']; |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
// TMDB - TV Show |
|
213
|
|
|
if (preg_match('/themoviedb\.org\/tv\/(\d{1,8})/i', $str, $hits)) { |
|
214
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'tmdb_tv']; |
|
215
|
|
|
} |
|
216
|
|
|
|
|
217
|
|
|
// AniDB |
|
218
|
|
|
if (preg_match('/anidb\.net\/(?:perl-bin\/animedb\.pl\?show=anime&aid=|anime\/)(\d{1,6})/i', $str, $hits)) { |
|
219
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'anidb']; |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
// Trakt.tv |
|
223
|
|
|
if (preg_match('/trakt\.tv\/(?:shows|movies)\/([\w-]+)/i', $str, $hits)) { |
|
224
|
|
|
return ['showid' => trim($hits[1]), 'site' => 'trakt']; |
|
225
|
|
|
} |
|
226
|
|
|
|
|
227
|
|
|
return false; |
|
228
|
|
|
} |
|
229
|
|
|
|
|
230
|
|
|
/** |
|
231
|
|
|
* Confirm this is an NFO file. |
|
232
|
|
|
* |
|
233
|
|
|
* Uses multiple validation strategies: |
|
234
|
|
|
* 1. Size validation (too large/small = not NFO) |
|
235
|
|
|
* 2. Binary header detection (known file signatures) |
|
236
|
|
|
* 3. File type detection via 'file' command |
|
237
|
|
|
* 4. PAR2/SFV structure detection |
|
238
|
|
|
* 5. Binary character content analysis |
|
239
|
|
|
* 6. NFO keyword/content heuristics |
|
240
|
|
|
* |
|
241
|
|
|
* @param bool|string $possibleNFO The nfo content. |
|
242
|
|
|
* @param string $guid The guid of the release. |
|
243
|
|
|
* @return bool True if it's likely an NFO, False otherwise. |
|
244
|
|
|
*/ |
|
245
|
|
|
public function isNFO(bool|string &$possibleNFO, string $guid): bool |
|
246
|
|
|
{ |
|
247
|
|
|
if ($possibleNFO === false || $possibleNFO === '') { |
|
248
|
|
|
return false; |
|
249
|
|
|
} |
|
250
|
|
|
|
|
251
|
|
|
$size = \strlen($possibleNFO); |
|
|
|
|
|
|
252
|
|
|
|
|
253
|
|
|
// Basic size and signature checks using constants |
|
254
|
|
|
if ($size >= self::MAX_NFO_SIZE || $size < self::MIN_NFO_SIZE) { |
|
255
|
|
|
return false; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
// Quick check for known non-NFO file signatures |
|
259
|
|
|
if (preg_match($this->_nonNfoHeaderRegex, $possibleNFO)) { |
|
|
|
|
|
|
260
|
|
|
return false; |
|
261
|
|
|
} |
|
262
|
|
|
|
|
263
|
|
|
// Additional binary format checks |
|
264
|
|
|
if ($this->detectBinaryFormat($possibleNFO)) { |
|
|
|
|
|
|
265
|
|
|
return false; |
|
266
|
|
|
} |
|
267
|
|
|
|
|
268
|
|
|
$tmpPath = $this->tmpPath.$guid.'.nfo'; |
|
269
|
|
|
$isNfo = false; |
|
270
|
|
|
|
|
271
|
|
|
try { |
|
272
|
|
|
// File/GetId3 work with files, so save to disk. |
|
273
|
|
|
File::put($tmpPath, $possibleNFO); |
|
|
|
|
|
|
274
|
|
|
|
|
275
|
|
|
// Use 'file' command via fileInfo if available |
|
276
|
|
|
$result = fileInfo($tmpPath); |
|
277
|
|
|
if (! empty($result)) { |
|
278
|
|
|
if (preg_match($this->_textFileRegex, $result)) { |
|
279
|
|
|
$isNfo = true; |
|
280
|
|
|
} elseif (preg_match($this->_binaryFileRegex, $result) || preg_match($this->_binaryCharsRegex, $possibleNFO)) { |
|
281
|
|
|
$isNfo = false; |
|
282
|
|
|
} |
|
283
|
|
|
|
|
284
|
|
|
// If fileInfo gave a result, apply additional heuristics before returning |
|
285
|
|
|
if ($isNfo) { |
|
286
|
|
|
// Additional content validation for text files |
|
287
|
|
|
$isNfo = $this->validateNfoContent($possibleNFO); |
|
|
|
|
|
|
288
|
|
|
} |
|
289
|
|
|
|
|
290
|
|
|
return $isNfo; |
|
291
|
|
|
} |
|
292
|
|
|
|
|
293
|
|
|
// Fallback checks if 'file' command is unavailable or inconclusive |
|
294
|
|
|
// Check if it's a PAR2 file |
|
295
|
|
|
$par2info = new Par2Info; |
|
296
|
|
|
$par2info->setData($possibleNFO); |
|
|
|
|
|
|
297
|
|
|
if (! $par2info->error) { |
|
298
|
|
|
return false; |
|
299
|
|
|
} |
|
300
|
|
|
|
|
301
|
|
|
// Check if it's an SFV file |
|
302
|
|
|
$sfv = new SfvInfo; |
|
303
|
|
|
$sfv->setData($possibleNFO); |
|
304
|
|
|
if (! $sfv->error) { |
|
305
|
|
|
return false; |
|
306
|
|
|
} |
|
307
|
|
|
|
|
308
|
|
|
// Check for binary characters |
|
309
|
|
|
if (preg_match($this->_binaryCharsRegex, $possibleNFO)) { |
|
310
|
|
|
return false; |
|
311
|
|
|
} |
|
312
|
|
|
|
|
313
|
|
|
// Final content-based validation |
|
314
|
|
|
$isNfo = $this->validateNfoContent($possibleNFO); |
|
315
|
|
|
|
|
316
|
|
|
} catch (Throwable $e) { |
|
317
|
|
|
Log::error("Error processing potential NFO for GUID {$guid}: ".$e->getMessage()); |
|
318
|
|
|
$isNfo = false; |
|
319
|
|
|
} finally { |
|
320
|
|
|
// Ensure temporary file is always deleted |
|
321
|
|
|
if (File::exists($tmpPath)) { |
|
322
|
|
|
try { |
|
323
|
|
|
File::delete($tmpPath); |
|
324
|
|
|
} catch (Throwable $e) { |
|
325
|
|
|
Log::error("Error deleting temporary NFO file {$tmpPath}: ".$e->getMessage()); |
|
326
|
|
|
} |
|
327
|
|
|
} |
|
328
|
|
|
} |
|
329
|
|
|
|
|
330
|
|
|
return $isNfo; |
|
331
|
|
|
} |
|
332
|
|
|
|
|
333
|
|
|
/** |
|
334
|
|
|
* Detect binary file formats by magic bytes. |
|
335
|
|
|
* |
|
336
|
|
|
* @param string $data The file content to check. |
|
337
|
|
|
* @return bool True if binary format detected. |
|
338
|
|
|
*/ |
|
339
|
|
|
protected function detectBinaryFormat(string $data): bool |
|
340
|
|
|
{ |
|
341
|
|
|
if (strlen($data) < 4) { |
|
342
|
|
|
return false; |
|
343
|
|
|
} |
|
344
|
|
|
|
|
345
|
|
|
// Magic bytes for common binary formats |
|
346
|
|
|
$magicBytes = [ |
|
347
|
|
|
"\x50\x4B\x03\x04" => 'ZIP', // ZIP/DOCX/XLSX etc. |
|
348
|
|
|
"\x50\x4B\x05\x06" => 'ZIP_EMPTY', // Empty ZIP |
|
349
|
|
|
"\x52\x61\x72\x21" => 'RAR', // RAR |
|
350
|
|
|
"\x37\x7A\xBC\xAF" => '7Z', // 7-Zip |
|
351
|
|
|
"\x1F\x8B\x08" => 'GZIP', // GZip |
|
352
|
|
|
"\x42\x5A\x68" => 'BZIP2', // BZip2 |
|
353
|
|
|
"\xFD\x37\x7A\x58" => 'XZ', // XZ |
|
354
|
|
|
"\x89\x50\x4E\x47" => 'PNG', // PNG |
|
355
|
|
|
"\xFF\xD8\xFF" => 'JPEG', // JPEG |
|
356
|
|
|
"\x47\x49\x46\x38" => 'GIF', // GIF |
|
357
|
|
|
"\x25\x50\x44\x46" => 'PDF', // PDF |
|
358
|
|
|
"\x49\x44\x33" => 'MP3_ID3', // MP3 with ID3 |
|
359
|
|
|
"\xFF\xFB" => 'MP3', // MP3 |
|
360
|
|
|
"\x4F\x67\x67\x53" => 'OGG', // OGG |
|
361
|
|
|
"\x66\x4C\x61\x43" => 'FLAC', // FLAC |
|
362
|
|
|
"\x52\x49\x46\x46" => 'RIFF', // WAV/AVI |
|
363
|
|
|
"\x00\x00\x01\xBA" => 'MPEG', // MPEG video |
|
364
|
|
|
"\x00\x00\x01\xB3" => 'MPEG', // MPEG video |
|
365
|
|
|
"\x1A\x45\xDF\xA3" => 'MKV', // Matroska/WebM |
|
366
|
|
|
"\x4D\x5A" => 'EXE', // Windows EXE |
|
367
|
|
|
"\x7F\x45\x4C\x46" => 'ELF', // Linux executable |
|
368
|
|
|
"\xCA\xFE\xBA\xBE" => 'JAVA', // Java class |
|
369
|
|
|
"\xD0\xCF\x11\xE0" => 'OLE', // MS Office old format |
|
370
|
|
|
]; |
|
371
|
|
|
|
|
372
|
|
|
foreach ($magicBytes as $magic => $type) { |
|
373
|
|
|
if (str_starts_with($data, $magic)) { |
|
374
|
|
|
return true; |
|
375
|
|
|
} |
|
376
|
|
|
} |
|
377
|
|
|
|
|
378
|
|
|
// Check for UTF-16 BOM (could be text, but unlikely NFO) |
|
379
|
|
|
if (str_starts_with($data, "\xFF\xFE") || str_starts_with($data, "\xFE\xFF")) { |
|
380
|
|
|
// UTF-16 - could be valid, let other checks handle it |
|
381
|
|
|
return false; |
|
382
|
|
|
} |
|
383
|
|
|
|
|
384
|
|
|
return false; |
|
385
|
|
|
} |
|
386
|
|
|
|
|
387
|
|
|
/** |
|
388
|
|
|
* Validate NFO content using heuristics. |
|
389
|
|
|
* |
|
390
|
|
|
* @param string $content The content to validate. |
|
391
|
|
|
* @return bool True if content appears to be a valid NFO. |
|
392
|
|
|
*/ |
|
393
|
|
|
protected function validateNfoContent(string $content): bool |
|
394
|
|
|
{ |
|
395
|
|
|
$length = strlen($content); |
|
396
|
|
|
|
|
397
|
|
|
// Too short to be meaningful |
|
398
|
|
|
if ($length < 50) { |
|
399
|
|
|
return false; |
|
400
|
|
|
} |
|
401
|
|
|
|
|
402
|
|
|
// Count printable ASCII characters |
|
403
|
|
|
$printableCount = preg_match_all('/[\x20-\x7E]/', $content); |
|
404
|
|
|
$printableRatio = $printableCount / $length; |
|
405
|
|
|
|
|
406
|
|
|
// NFOs should be mostly printable characters |
|
407
|
|
|
if ($printableRatio < 0.7) { |
|
408
|
|
|
return false; |
|
409
|
|
|
} |
|
410
|
|
|
|
|
411
|
|
|
// Check for minimum text content (words, not just symbols) |
|
412
|
|
|
$wordCount = preg_match_all('/[A-Za-z]{2,}/', $content); |
|
413
|
|
|
if ($wordCount < 5) { |
|
414
|
|
|
return false; |
|
415
|
|
|
} |
|
416
|
|
|
|
|
417
|
|
|
// Check for NFO-like content patterns |
|
418
|
|
|
$nfoIndicators = 0; |
|
419
|
|
|
|
|
420
|
|
|
// Look for common NFO keywords |
|
421
|
|
|
foreach ($this->_nfoKeywords as $keyword) { |
|
422
|
|
|
if (stripos($content, $keyword) !== false) { |
|
423
|
|
|
$nfoIndicators++; |
|
424
|
|
|
if ($nfoIndicators >= 3) { |
|
425
|
|
|
return true; // High confidence if multiple keywords found |
|
426
|
|
|
} |
|
427
|
|
|
} |
|
428
|
|
|
} |
|
429
|
|
|
|
|
430
|
|
|
// Check for scene-style formatting |
|
431
|
|
|
if (preg_match('/[-=*]{5,}/', $content)) { |
|
432
|
|
|
$nfoIndicators++; |
|
433
|
|
|
} |
|
434
|
|
|
|
|
435
|
|
|
// Check for URL presence (common in NFOs) |
|
436
|
|
|
if (preg_match('/https?:\/\/|www\./i', $content)) { |
|
437
|
|
|
$nfoIndicators++; |
|
438
|
|
|
} |
|
439
|
|
|
|
|
440
|
|
|
// Check for media IDs |
|
441
|
|
|
if (preg_match('/imdb\.com|thetvdb\.com|themoviedb\.org|anidb\.net/i', $content)) { |
|
442
|
|
|
$nfoIndicators += 2; |
|
443
|
|
|
} |
|
444
|
|
|
|
|
445
|
|
|
// Check for field:value patterns |
|
446
|
|
|
if (preg_match_all('/^[A-Za-z\s]{2,20}\s*[:\.]\s*.+$/m', $content, $matches)) { |
|
447
|
|
|
$nfoIndicators += min(count($matches[0]) / 3, 2); |
|
448
|
|
|
} |
|
449
|
|
|
|
|
450
|
|
|
return $nfoIndicators >= 2; |
|
451
|
|
|
} |
|
452
|
|
|
|
|
453
|
|
|
/** |
|
454
|
|
|
* Add an NFO from alternate sources. ex.: PreDB, rar, zip, etc... |
|
455
|
|
|
* |
|
456
|
|
|
* @param string $nfo The nfo. |
|
457
|
|
|
* @param NNTPService $nntp Instance of class NNTPService. |
|
458
|
|
|
* @return bool True on success, False on failure. |
|
459
|
|
|
* |
|
460
|
|
|
* @throws \Exception |
|
461
|
|
|
*/ |
|
462
|
|
|
public function addAlternateNfo(string &$nfo, $release, NNTPService $nntp): bool |
|
463
|
|
|
{ |
|
464
|
|
|
if ($release->id > 0 && $this->isNFO($nfo, $release->guid)) { |
|
465
|
|
|
$check = ReleaseNfo::whereReleasesId($release->id)->first(['releases_id']); |
|
466
|
|
|
|
|
467
|
|
|
if ($check === null) { |
|
468
|
|
|
ReleaseNfo::query()->insert(['releases_id' => $release->id, 'nfo' => "\x1f\x8b\x08\x00".gzcompress($nfo)]); |
|
469
|
|
|
} |
|
470
|
|
|
|
|
471
|
|
|
Release::whereId($release->id)->update(['nfostatus' => self::NFO_FOUND]); |
|
472
|
|
|
|
|
473
|
|
|
if (! isset($release->completion)) { |
|
474
|
|
|
$release->completion = 0; |
|
475
|
|
|
} |
|
476
|
|
|
|
|
477
|
|
|
if ($release->completion === 0) { |
|
478
|
|
|
$nzbContentsService = app(NzbContentsService::class); |
|
479
|
|
|
$nzbContentsService->setNntp($nntp); |
|
480
|
|
|
$nzbContentsService->setNfo($this); |
|
481
|
|
|
$nzbContentsService->setEchoOutput($this->echo); |
|
482
|
|
|
$nzbContentsService->parseNzb($release->guid, $release->id, $release->groups_id ?? 0); |
|
483
|
|
|
} |
|
484
|
|
|
|
|
485
|
|
|
return true; |
|
486
|
|
|
} |
|
487
|
|
|
|
|
488
|
|
|
return false; |
|
489
|
|
|
} |
|
490
|
|
|
|
|
491
|
|
|
/** |
|
492
|
|
|
* Attempt to find NFO files inside the NZB's of releases. |
|
493
|
|
|
* |
|
494
|
|
|
* @param NNTPService $nntp The NNTP connection object |
|
495
|
|
|
* @param string $groupID (optional) Group ID to filter releases by |
|
496
|
|
|
* @param string $guidChar (optional) First character of the GUID for parallel processing |
|
497
|
|
|
* @param bool $processImdb (optional) Process IMDB IDs (currently unused) |
|
498
|
|
|
* @param bool $processTv (optional) Process TV IDs (currently unused) |
|
499
|
|
|
* @return int Count of successfully processed NFO files |
|
500
|
|
|
* |
|
501
|
|
|
* @throws \Exception If NNTP operations fail |
|
502
|
|
|
*/ |
|
503
|
|
|
public function processNfoFiles(NNTPService $nntp, string $groupID = '', string $guidChar = '', bool $processImdb = true, bool $processTv = true): int |
|
|
|
|
|
|
504
|
|
|
{ |
|
505
|
|
|
$processedCount = 0; |
|
506
|
|
|
|
|
507
|
|
|
// Build base query with all filters |
|
508
|
|
|
$baseQuery = $this->buildNfoProcessingQuery($groupID, $guidChar); |
|
509
|
|
|
|
|
510
|
|
|
// Fetch releases to process |
|
511
|
|
|
$releases = $baseQuery->clone() |
|
512
|
|
|
->orderBy('nfostatus') |
|
513
|
|
|
->orderByDesc('postdate') |
|
|
|
|
|
|
514
|
|
|
->limit($this->nzbs) |
|
515
|
|
|
->get(['id', 'guid', 'groups_id', 'name']); |
|
516
|
|
|
|
|
517
|
|
|
$nfoCount = $releases->count(); |
|
518
|
|
|
|
|
519
|
|
|
if ($nfoCount > 0) { |
|
520
|
|
|
// Display processing information |
|
521
|
|
|
$this->displayProcessingHeader($guidChar, $groupID, $nfoCount); |
|
522
|
|
|
|
|
523
|
|
|
// Show detailed stats if echo is enabled |
|
524
|
|
|
if ($this->echo) { |
|
525
|
|
|
$this->displayNfoStatusStats($baseQuery); |
|
526
|
|
|
} |
|
527
|
|
|
|
|
528
|
|
|
// Process each release |
|
529
|
|
|
$nzbContentsService = app(NzbContentsService::class); |
|
530
|
|
|
$nzbContentsService->setNntp($nntp); |
|
531
|
|
|
$nzbContentsService->setNfo($this); |
|
532
|
|
|
|
|
533
|
|
|
foreach ($releases as $release) { |
|
534
|
|
|
try { |
|
535
|
|
|
$groupName = UsenetGroup::getNameByID($release['groups_id']); |
|
536
|
|
|
$fetchedBinary = $nzbContentsService->getNfoFromNzb($release['guid'], $release['id'], $release['groups_id'], $groupName); |
|
537
|
|
|
|
|
538
|
|
|
if ($fetchedBinary !== false) { |
|
539
|
|
|
DB::beginTransaction(); |
|
540
|
|
|
try { |
|
541
|
|
|
// Only insert if not already present |
|
542
|
|
|
$exists = ReleaseNfo::whereReleasesId($release['id'])->exists(); |
|
543
|
|
|
if (! $exists) { |
|
544
|
|
|
ReleaseNfo::query()->insert([ |
|
545
|
|
|
'releases_id' => $release['id'], |
|
546
|
|
|
'nfo' => "\x1f\x8b\x08\x00".gzcompress($fetchedBinary), |
|
547
|
|
|
]); |
|
548
|
|
|
} |
|
549
|
|
|
|
|
550
|
|
|
// Update status |
|
551
|
|
|
Release::whereId($release['id'])->update(['nfostatus' => self::NFO_FOUND]); |
|
552
|
|
|
DB::commit(); |
|
553
|
|
|
$processedCount++; |
|
554
|
|
|
} catch (\Exception $e) { |
|
555
|
|
|
DB::rollBack(); |
|
556
|
|
|
if ($this->echo) { |
|
557
|
|
|
cli()->error("Error saving NFO for release {$release['id']}: {$e->getMessage()}"); |
|
558
|
|
|
} |
|
559
|
|
|
} |
|
560
|
|
|
} |
|
561
|
|
|
} catch (\Exception $e) { |
|
562
|
|
|
if ($this->echo) { |
|
563
|
|
|
cli()->error("Error processing release {$release['id']}: {$e->getMessage()}"); |
|
564
|
|
|
} |
|
565
|
|
|
} |
|
566
|
|
|
} |
|
567
|
|
|
} |
|
568
|
|
|
|
|
569
|
|
|
// Process failed NFO attempts |
|
570
|
|
|
$this->handleFailedNfoAttempts($groupID, $guidChar); |
|
571
|
|
|
|
|
572
|
|
|
// Output results |
|
573
|
|
|
if ($this->echo) { |
|
574
|
|
|
if ($nfoCount > 0) { |
|
575
|
|
|
echo PHP_EOL; |
|
576
|
|
|
} |
|
577
|
|
|
if ($processedCount > 0) { |
|
578
|
|
|
cli()->primary($processedCount.' NFO file(s) found/processed.'); |
|
579
|
|
|
} |
|
580
|
|
|
} |
|
581
|
|
|
|
|
582
|
|
|
return $processedCount; |
|
583
|
|
|
} |
|
584
|
|
|
|
|
585
|
|
|
/** |
|
586
|
|
|
* Build base query for NFO processing with all common filters |
|
587
|
|
|
*/ |
|
588
|
|
|
private function buildNfoProcessingQuery(string $groupID, string $guidChar): \Illuminate\Database\Eloquent\Builder |
|
589
|
|
|
{ |
|
590
|
|
|
$query = Release::query() |
|
591
|
|
|
->whereBetween('nfostatus', [$this->maxRetries, self::NFO_UNPROC]); |
|
592
|
|
|
|
|
593
|
|
|
if ($guidChar !== '') { |
|
594
|
|
|
$query->where('leftguid', $guidChar); |
|
595
|
|
|
} |
|
596
|
|
|
|
|
597
|
|
|
if ($groupID !== '') { |
|
598
|
|
|
$query->where('groups_id', $groupID); |
|
599
|
|
|
} |
|
600
|
|
|
|
|
601
|
|
|
if ($this->maxSize > 0) { |
|
602
|
|
|
$query->where('size', '<', $this->maxSize * 1073741824); |
|
603
|
|
|
} |
|
604
|
|
|
|
|
605
|
|
|
if ($this->minSize > 0) { |
|
606
|
|
|
$query->where('size', '>', $this->minSize * 1048576); |
|
607
|
|
|
} |
|
608
|
|
|
|
|
609
|
|
|
return $query; |
|
|
|
|
|
|
610
|
|
|
} |
|
611
|
|
|
|
|
612
|
|
|
/** |
|
613
|
|
|
* Display header information about the NFO processing |
|
614
|
|
|
*/ |
|
615
|
|
|
private function displayProcessingHeader(string $guidChar, string $groupID, int $nfoCount): void |
|
616
|
|
|
{ |
|
617
|
|
|
cli()->primary( |
|
618
|
|
|
PHP_EOL. |
|
619
|
|
|
($guidChar === '' ? '' : '['.$guidChar.'] '). |
|
620
|
|
|
($groupID === '' ? '' : '['.$groupID.'] '). |
|
621
|
|
|
'Processing '.$nfoCount. |
|
622
|
|
|
' NFO(s), starting at '.$this->nzbs. |
|
623
|
|
|
' * = hidden NFO, + = NFO, - = no NFO, f = download failed.' |
|
624
|
|
|
); |
|
625
|
|
|
} |
|
626
|
|
|
|
|
627
|
|
|
/** |
|
628
|
|
|
* Display statistics about NFO status counts |
|
629
|
|
|
*/ |
|
630
|
|
|
private function displayNfoStatusStats(\Illuminate\Database\Eloquent\Builder $baseQuery): void |
|
631
|
|
|
{ |
|
632
|
|
|
$nfoStats = $baseQuery->clone() |
|
633
|
|
|
->select(['nfostatus as status', DB::raw('COUNT(id) as count')]) |
|
634
|
|
|
->groupBy(['nfostatus']) |
|
635
|
|
|
->orderBy('nfostatus') |
|
|
|
|
|
|
636
|
|
|
->get(); |
|
637
|
|
|
|
|
638
|
|
|
if ($nfoStats instanceof \Traversable && $nfoStats->count() > 0) { |
|
|
|
|
|
|
639
|
|
|
$outString = PHP_EOL.'Available to process'; |
|
640
|
|
|
foreach ($nfoStats as $row) { |
|
641
|
|
|
$outString .= ', '.$row['status'].' = '.number_format($row['count']); |
|
642
|
|
|
} |
|
643
|
|
|
cli()->header($outString.'.'); |
|
644
|
|
|
} |
|
645
|
|
|
} |
|
646
|
|
|
|
|
647
|
|
|
/** |
|
648
|
|
|
* Handle releases that have failed too many NFO fetch attempts |
|
649
|
|
|
*/ |
|
650
|
|
|
private function handleFailedNfoAttempts(string $groupID, string $guidChar): void |
|
651
|
|
|
{ |
|
652
|
|
|
$failedQuery = Release::query() |
|
653
|
|
|
->where('nfostatus', '<', $this->maxRetries) |
|
654
|
|
|
->where('nfostatus', '>', self::NFO_FAILED); |
|
655
|
|
|
|
|
656
|
|
|
if ($guidChar !== '') { |
|
657
|
|
|
$failedQuery->where('leftguid', $guidChar); |
|
658
|
|
|
} |
|
659
|
|
|
|
|
660
|
|
|
if ($groupID !== '') { |
|
661
|
|
|
$failedQuery->where('groups_id', $groupID); |
|
662
|
|
|
} |
|
663
|
|
|
|
|
664
|
|
|
// Process in chunks to avoid memory issues with large result sets |
|
665
|
|
|
$failedQuery->select(['id'])->chunk(100, function ($releases) { |
|
666
|
|
|
DB::beginTransaction(); |
|
667
|
|
|
try { |
|
668
|
|
|
foreach ($releases as $release) { |
|
669
|
|
|
// Remove any releasenfo for failed attempts |
|
670
|
|
|
ReleaseNfo::whereReleasesId($release->id)->delete(); |
|
671
|
|
|
|
|
672
|
|
|
// Set release.nfostatus to failed |
|
673
|
|
|
Release::whereId($release->id)->update(['nfostatus' => self::NFO_FAILED]); |
|
674
|
|
|
} |
|
675
|
|
|
DB::commit(); |
|
676
|
|
|
} catch (\Exception $e) { |
|
677
|
|
|
DB::rollBack(); |
|
678
|
|
|
if ($this->echo) { |
|
679
|
|
|
cli()->error("Error handling failed NFO attempts: {$e->getMessage()}"); |
|
680
|
|
|
} |
|
681
|
|
|
} |
|
682
|
|
|
}); |
|
683
|
|
|
} |
|
684
|
|
|
|
|
685
|
|
|
/** |
|
686
|
|
|
* Get a string like this: |
|
687
|
|
|
* "AND r.nfostatus BETWEEN -8 AND -1 AND r.size < 1073741824 AND r.size > 1048576" |
|
688
|
|
|
* To use in a query. |
|
689
|
|
|
* |
|
690
|
|
|
* |
|
691
|
|
|
* @throws \Exception |
|
692
|
|
|
* |
|
693
|
|
|
* @static |
|
694
|
|
|
*/ |
|
695
|
|
|
public static function NfoQueryString(): string |
|
696
|
|
|
{ |
|
697
|
|
|
$maxSize = (int) Settings::settingValue('maxsizetoprocessnfo'); |
|
698
|
|
|
$minSize = (int) Settings::settingValue('minsizetoprocessnfo'); |
|
699
|
|
|
$dummy = (int) Settings::settingValue('maxnforetries'); |
|
700
|
|
|
$maxRetries = ($dummy >= 0 ? -($dummy + 1) : self::NFO_UNPROC); |
|
701
|
|
|
|
|
702
|
|
|
return sprintf( |
|
703
|
|
|
'AND r.nfostatus BETWEEN %d AND %d %s %s', |
|
704
|
|
|
($maxRetries < -8 ? -8 : $maxRetries), |
|
705
|
|
|
self::NFO_UNPROC, |
|
706
|
|
|
($maxSize > 0 ? ('AND r.size < '.($maxSize * 1073741824)) : ''), |
|
707
|
|
|
($minSize > 0 ? ('AND r.size > '.($minSize * 1048576)) : '') |
|
708
|
|
|
); |
|
709
|
|
|
} |
|
710
|
|
|
|
|
711
|
|
|
/** |
|
712
|
|
|
* Extract URLs from NFO content. |
|
713
|
|
|
* |
|
714
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
715
|
|
|
* @return array Array of found URLs. |
|
716
|
|
|
*/ |
|
717
|
|
|
public function extractUrls(string $nfoContent): array |
|
718
|
|
|
{ |
|
719
|
|
|
$urls = []; |
|
720
|
|
|
|
|
721
|
|
|
// Match HTTP/HTTPS URLs |
|
722
|
|
|
if (preg_match_all('/https?:\/\/[^\s<>"\']+/i', $nfoContent, $matches)) { |
|
723
|
|
|
$urls = array_merge($urls, $matches[0]); |
|
724
|
|
|
} |
|
725
|
|
|
|
|
726
|
|
|
// Match www URLs without protocol |
|
727
|
|
|
if (preg_match_all('/(?<![\/\.])\bwww\.[a-z0-9][-a-z0-9]*\.[^\s<>"\']+/i', $nfoContent, $matches)) { |
|
728
|
|
|
foreach ($matches[0] as $url) { |
|
729
|
|
|
$urls[] = 'http://'.$url; |
|
730
|
|
|
} |
|
731
|
|
|
} |
|
732
|
|
|
|
|
733
|
|
|
return array_unique(array_filter($urls)); |
|
734
|
|
|
} |
|
735
|
|
|
|
|
736
|
|
|
/** |
|
737
|
|
|
* Extract release group name from NFO content. |
|
738
|
|
|
* |
|
739
|
|
|
* Uses multiple detection strategies including: |
|
740
|
|
|
* - Common presentation phrases |
|
741
|
|
|
* - Scene-style headers with ASCII borders |
|
742
|
|
|
* - Greetings sections |
|
743
|
|
|
* - Footer signatures |
|
744
|
|
|
* |
|
745
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
746
|
|
|
* @return string|null The group name if found, null otherwise. |
|
747
|
|
|
*/ |
|
748
|
|
|
public function extractGroupName(string $nfoContent): ?string |
|
749
|
|
|
{ |
|
750
|
|
|
// False positives to filter out |
|
751
|
|
|
$falsePositives = [ |
|
752
|
|
|
'THE', 'AND', 'FOR', 'NFO', 'INFO', 'DVD', 'BLU', 'RAY', 'WEB', 'HDTV', |
|
753
|
|
|
'RELEASE', 'GROUP', 'DATE', 'SIZE', 'CODEC', 'VIDEO', 'AUDIO', 'FORMAT', |
|
754
|
|
|
'NOTES', 'INSTALL', 'GREETS', 'PRESENTS', 'TEAM', 'SCENE', 'FILE', 'FILES', |
|
755
|
|
|
]; |
|
756
|
|
|
|
|
757
|
|
|
// Use configured scene group patterns |
|
758
|
|
|
foreach ($this->_sceneGroupPatterns as $pattern) { |
|
759
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
760
|
|
|
$groupName = trim($matches[1]); |
|
761
|
|
|
if (! in_array(strtoupper($groupName), $falsePositives, true) && strlen($groupName) >= 2 && strlen($groupName) <= 20) { |
|
762
|
|
|
return $groupName; |
|
763
|
|
|
} |
|
764
|
|
|
} |
|
765
|
|
|
} |
|
766
|
|
|
|
|
767
|
|
|
// Additional patterns for group name detection |
|
768
|
|
|
$additionalPatterns = [ |
|
769
|
|
|
// "GROUP presents" or "GROUP brings you" |
|
770
|
|
|
'/\b([A-Z][A-Z0-9]{1,14})\s+(?:presents?|brings?\s+you)/i', |
|
771
|
|
|
// Common footer format: "--- GROUP ---" |
|
772
|
|
|
'/[-=]{2,}\s*([A-Z][A-Z0-9]{1,14})\s*[-=]{2,}$/mi', |
|
773
|
|
|
// Contact section: "irc.server.net #GROUP" |
|
774
|
|
|
'/irc\.[a-z0-9.-]+\s+#([A-Z][A-Z0-9]{1,14})/i', |
|
775
|
|
|
// Website: "www.GROUP.com/org/net" |
|
776
|
|
|
'/www\.([a-z][a-z0-9]{1,14})\.(?:com|org|net|info)/i', |
|
777
|
|
|
// ASCII art name extraction (common pattern at start) |
|
778
|
|
|
'/^\s*[^a-zA-Z0-9]*([A-Z][A-Z0-9]{2,14})[^a-zA-Z0-9]*\s*$/mi', |
|
779
|
|
|
]; |
|
780
|
|
|
|
|
781
|
|
|
foreach ($additionalPatterns as $pattern) { |
|
782
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
783
|
|
|
$groupName = trim($matches[1]); |
|
784
|
|
|
if (! in_array(strtoupper($groupName), $falsePositives, true) && strlen($groupName) >= 2 && strlen($groupName) <= 20) { |
|
785
|
|
|
return strtoupper($groupName); |
|
786
|
|
|
} |
|
787
|
|
|
} |
|
788
|
|
|
} |
|
789
|
|
|
|
|
790
|
|
|
return null; |
|
791
|
|
|
} |
|
792
|
|
|
|
|
793
|
|
|
/** |
|
794
|
|
|
* Extract release date from NFO content. |
|
795
|
|
|
* |
|
796
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
797
|
|
|
* @return string|null ISO date string if found, null otherwise. |
|
798
|
|
|
*/ |
|
799
|
|
|
public function extractReleaseDate(string $nfoContent): ?string |
|
800
|
|
|
{ |
|
801
|
|
|
$patterns = [ |
|
802
|
|
|
// DD/MM/YYYY or MM/DD/YYYY |
|
803
|
|
|
'/(?:date|released?|rls)\s*[:\-]?\s*(\d{1,2})[\/\-.](\d{1,2})[\/\-.](\d{2,4})/i', |
|
804
|
|
|
// YYYY-MM-DD |
|
805
|
|
|
'/(?:date|released?|rls)\s*[:\-]?\s*(\d{4})[\/\-.](\d{1,2})[\/\-.](\d{1,2})/i', |
|
806
|
|
|
// Month DD, YYYY |
|
807
|
|
|
'/(?:date|released?|rls)\s*[:\-]?\s*(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\.?\s+(\d{1,2}),?\s+(\d{4})/i', |
|
808
|
|
|
]; |
|
809
|
|
|
|
|
810
|
|
|
foreach ($patterns as $index => $pattern) { |
|
811
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
812
|
|
|
try { |
|
813
|
|
|
if ($index === 0) { |
|
814
|
|
|
// Try both DD/MM and MM/DD formats |
|
815
|
|
|
$year = strlen($matches[3]) === 2 ? '20'.$matches[3] : $matches[3]; |
|
816
|
|
|
|
|
817
|
|
|
// Assume DD/MM/YYYY format (more common internationally) |
|
818
|
|
|
return sprintf('%04d-%02d-%02d', (int) $year, (int) $matches[2], (int) $matches[1]); |
|
819
|
|
|
} elseif ($index === 1) { |
|
820
|
|
|
// YYYY-MM-DD |
|
821
|
|
|
return sprintf('%04d-%02d-%02d', (int) $matches[1], (int) $matches[2], (int) $matches[3]); |
|
822
|
|
|
} else { |
|
823
|
|
|
// Month name format |
|
824
|
|
|
$months = ['jan' => 1, 'feb' => 2, 'mar' => 3, 'apr' => 4, 'may' => 5, 'jun' => 6, 'jul' => 7, 'aug' => 8, 'sep' => 9, 'oct' => 10, 'nov' => 11, 'dec' => 12]; |
|
825
|
|
|
$month = $months[strtolower(substr($matches[1], 0, 3))] ?? 1; |
|
826
|
|
|
|
|
827
|
|
|
return sprintf('%04d-%02d-%02d', (int) $matches[3], $month, (int) $matches[2]); |
|
828
|
|
|
} |
|
829
|
|
|
} catch (Throwable) { |
|
830
|
|
|
continue; |
|
831
|
|
|
} |
|
832
|
|
|
} |
|
833
|
|
|
} |
|
834
|
|
|
|
|
835
|
|
|
return null; |
|
836
|
|
|
} |
|
837
|
|
|
|
|
838
|
|
|
/** |
|
839
|
|
|
* Extract video/audio codec information from NFO content. |
|
840
|
|
|
* |
|
841
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
842
|
|
|
* @return array{video?: string, audio?: string, resolution?: string} Array with codec info. |
|
843
|
|
|
*/ |
|
844
|
|
|
public function extractCodecInfo(string $nfoContent): array |
|
845
|
|
|
{ |
|
846
|
|
|
$result = []; |
|
847
|
|
|
|
|
848
|
|
|
// Video codecs |
|
849
|
|
|
$videoPatterns = [ |
|
850
|
|
|
'/(?:video|codec)\s*[:\-]?\s*(x264|x265|hevc|h\.?264|h\.?265|xvid|divx|av1|vp9|mpeg[24]?)/i', |
|
851
|
|
|
'/\b(x264|x265|HEVC|H\.?264|H\.?265|XviD|DivX|AV1|VP9)\b/i', |
|
852
|
|
|
]; |
|
853
|
|
|
foreach ($videoPatterns as $pattern) { |
|
854
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
855
|
|
|
$result['video'] = strtoupper(str_replace('.', '', $matches[1])); |
|
856
|
|
|
break; |
|
857
|
|
|
} |
|
858
|
|
|
} |
|
859
|
|
|
|
|
860
|
|
|
// Audio codecs |
|
861
|
|
|
$audioPatterns = [ |
|
862
|
|
|
'/(?:audio|sound)\s*[:\-]?\s*(aac|ac3|dts(?:-(?:hd|ma|x))?|truehd|atmos|flac|mp3|eac3|dd[+p]?|dolby)/i', |
|
863
|
|
|
'/\b(AAC|AC3|DTS(?:-(?:HD|MA|X))?|TrueHD|Atmos|FLAC|EAC3|DD[+P]?)\b/i', |
|
864
|
|
|
]; |
|
865
|
|
|
foreach ($audioPatterns as $pattern) { |
|
866
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
867
|
|
|
$result['audio'] = strtoupper($matches[1]); |
|
868
|
|
|
break; |
|
869
|
|
|
} |
|
870
|
|
|
} |
|
871
|
|
|
|
|
872
|
|
|
// Resolution |
|
873
|
|
|
$resolutionPatterns = [ |
|
874
|
|
|
'/(?:resolution|quality)\s*[:\-]?\s*(\d{3,4}[xX×]\d{3,4}|\d{3,4}p|[48]K|UHD|FHD|HD)/i', |
|
875
|
|
|
'/\b(2160p|1080p|720p|480p|4K|UHD|FHD|HD)\b/i', |
|
876
|
|
|
'/\b(\d{3,4})\s*[xX×]\s*(\d{3,4})\b/', |
|
877
|
|
|
]; |
|
878
|
|
|
foreach ($resolutionPatterns as $index => $pattern) { |
|
879
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
880
|
|
|
if ($index === 2) { |
|
881
|
|
|
$result['resolution'] = $matches[1].'x'.$matches[2]; |
|
882
|
|
|
} else { |
|
883
|
|
|
$result['resolution'] = strtoupper($matches[1]); |
|
884
|
|
|
} |
|
885
|
|
|
break; |
|
886
|
|
|
} |
|
887
|
|
|
} |
|
888
|
|
|
|
|
889
|
|
|
return $result; |
|
890
|
|
|
} |
|
891
|
|
|
|
|
892
|
|
|
/** |
|
893
|
|
|
* Extract file size information from NFO content. |
|
894
|
|
|
* |
|
895
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
896
|
|
|
* @return int|null File size in bytes if found, null otherwise. |
|
897
|
|
|
*/ |
|
898
|
|
|
public function extractFileSize(string $nfoContent): ?int |
|
899
|
|
|
{ |
|
900
|
|
|
$patterns = [ |
|
901
|
|
|
'/(?:size|file\s*size)\s*[:\-]?\s*(\d+(?:[.,]\d+)?)\s*(bytes?|[KMGTP]B|[KMGTP]iB)/i', |
|
902
|
|
|
'/\b(\d+(?:[.,]\d+)?)\s*(GB|GiB|MB|MiB|TB|TiB)\b/i', |
|
903
|
|
|
]; |
|
904
|
|
|
|
|
905
|
|
|
$multipliers = [ |
|
906
|
|
|
'B' => 1, 'BYTE' => 1, 'BYTES' => 1, |
|
907
|
|
|
'KB' => 1024, 'KIB' => 1024, |
|
908
|
|
|
'MB' => 1024 * 1024, 'MIB' => 1024 * 1024, |
|
909
|
|
|
'GB' => 1024 * 1024 * 1024, 'GIB' => 1024 * 1024 * 1024, |
|
910
|
|
|
'TB' => 1024 * 1024 * 1024 * 1024, 'TIB' => 1024 * 1024 * 1024 * 1024, |
|
911
|
|
|
'PB' => 1024 * 1024 * 1024 * 1024 * 1024, 'PIB' => 1024 * 1024 * 1024 * 1024 * 1024, |
|
912
|
|
|
]; |
|
913
|
|
|
|
|
914
|
|
|
foreach ($patterns as $pattern) { |
|
915
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
916
|
|
|
$value = (float) str_replace(',', '.', $matches[1]); |
|
917
|
|
|
$unit = strtoupper($matches[2]); |
|
918
|
|
|
|
|
919
|
|
|
if (isset($multipliers[$unit])) { |
|
920
|
|
|
return (int) ($value * $multipliers[$unit]); |
|
921
|
|
|
} |
|
922
|
|
|
} |
|
923
|
|
|
} |
|
924
|
|
|
|
|
925
|
|
|
return null; |
|
926
|
|
|
} |
|
927
|
|
|
|
|
928
|
|
|
/** |
|
929
|
|
|
* Extract all media IDs (IMDB, TVDB, TMDB, etc.) from NFO content. |
|
930
|
|
|
* |
|
931
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
932
|
|
|
* @return array Array of media IDs with their sources. |
|
933
|
|
|
*/ |
|
934
|
|
|
public function extractAllMediaIds(string $nfoContent): array |
|
935
|
|
|
{ |
|
936
|
|
|
$ids = []; |
|
937
|
|
|
|
|
938
|
|
|
// IMDB |
|
939
|
|
|
if (preg_match_all('/imdb\.com\/title\/(tt\d{7,8})/i', $nfoContent, $matches)) { |
|
940
|
|
|
foreach ($matches[1] as $id) { |
|
941
|
|
|
$ids[] = ['id' => $id, 'source' => 'imdb']; |
|
942
|
|
|
} |
|
943
|
|
|
} |
|
944
|
|
|
|
|
945
|
|
|
// TVDB |
|
946
|
|
|
if (preg_match_all('/thetvdb\.com\/(?:\?tab=series&id=|series\/)(\d{1,8})/i', $nfoContent, $matches)) { |
|
947
|
|
|
foreach ($matches[1] as $id) { |
|
948
|
|
|
$ids[] = ['id' => $id, 'source' => 'thetvdb']; |
|
949
|
|
|
} |
|
950
|
|
|
} |
|
951
|
|
|
|
|
952
|
|
|
// TMDB Movie |
|
953
|
|
|
if (preg_match_all('/themoviedb\.org\/movie\/(\d{1,8})/i', $nfoContent, $matches)) { |
|
954
|
|
|
foreach ($matches[1] as $id) { |
|
955
|
|
|
$ids[] = ['id' => $id, 'source' => 'tmdb_movie']; |
|
956
|
|
|
} |
|
957
|
|
|
} |
|
958
|
|
|
|
|
959
|
|
|
// TMDB TV |
|
960
|
|
|
if (preg_match_all('/themoviedb\.org\/tv\/(\d{1,8})/i', $nfoContent, $matches)) { |
|
961
|
|
|
foreach ($matches[1] as $id) { |
|
962
|
|
|
$ids[] = ['id' => $id, 'source' => 'tmdb_tv']; |
|
963
|
|
|
} |
|
964
|
|
|
} |
|
965
|
|
|
|
|
966
|
|
|
// TVMaze |
|
967
|
|
|
if (preg_match_all('/tvmaze\.com\/shows\/(\d{1,6})/i', $nfoContent, $matches)) { |
|
968
|
|
|
foreach ($matches[1] as $id) { |
|
969
|
|
|
$ids[] = ['id' => $id, 'source' => 'tvmaze']; |
|
970
|
|
|
} |
|
971
|
|
|
} |
|
972
|
|
|
|
|
973
|
|
|
// AniDB |
|
974
|
|
|
if (preg_match_all('/anidb\.net\/(?:perl-bin\/animedb\.pl\?show=anime&aid=|anime\/)(\d{1,6})/i', $nfoContent, $matches)) { |
|
975
|
|
|
foreach ($matches[1] as $id) { |
|
976
|
|
|
$ids[] = ['id' => $id, 'source' => 'anidb']; |
|
977
|
|
|
} |
|
978
|
|
|
} |
|
979
|
|
|
|
|
980
|
|
|
// MyAnimeList (MAL) |
|
981
|
|
|
if (preg_match_all('/myanimelist\.net\/anime\/(\d{1,6})/i', $nfoContent, $matches)) { |
|
982
|
|
|
foreach ($matches[1] as $id) { |
|
983
|
|
|
$ids[] = ['id' => $id, 'source' => 'mal']; |
|
984
|
|
|
} |
|
985
|
|
|
} |
|
986
|
|
|
|
|
987
|
|
|
return $ids; |
|
988
|
|
|
} |
|
989
|
|
|
|
|
990
|
|
|
/** |
|
991
|
|
|
* Parse and extract comprehensive metadata from NFO content. |
|
992
|
|
|
* |
|
993
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
994
|
|
|
* @return array Associative array with extracted metadata. |
|
995
|
|
|
*/ |
|
996
|
|
|
public function parseNfoMetadata(string $nfoContent): array |
|
997
|
|
|
{ |
|
998
|
|
|
return [ |
|
999
|
|
|
'urls' => $this->extractUrls($nfoContent), |
|
1000
|
|
|
'group' => $this->extractGroupName($nfoContent), |
|
1001
|
|
|
'release_date' => $this->extractReleaseDate($nfoContent), |
|
1002
|
|
|
'codec_info' => $this->extractCodecInfo($nfoContent), |
|
1003
|
|
|
'file_size' => $this->extractFileSize($nfoContent), |
|
1004
|
|
|
'media_ids' => $this->extractAllMediaIds($nfoContent), |
|
1005
|
|
|
'show_id' => $this->parseShowId($nfoContent), |
|
1006
|
|
|
'language' => $this->extractLanguage($nfoContent), |
|
1007
|
|
|
'runtime' => $this->extractRuntime($nfoContent), |
|
1008
|
|
|
'genre' => $this->extractGenre($nfoContent), |
|
1009
|
|
|
'software_info' => $this->extractSoftwareInfo($nfoContent), |
|
1010
|
|
|
'release_title' => $this->extractReleaseTitle($nfoContent), |
|
1011
|
|
|
]; |
|
1012
|
|
|
} |
|
1013
|
|
|
|
|
1014
|
|
|
/** |
|
1015
|
|
|
* Extract language information from NFO content. |
|
1016
|
|
|
* |
|
1017
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
1018
|
|
|
* @return array Array of detected languages. |
|
1019
|
|
|
*/ |
|
1020
|
|
|
public function extractLanguage(string $nfoContent): array |
|
1021
|
|
|
{ |
|
1022
|
|
|
$languages = []; |
|
1023
|
|
|
|
|
1024
|
|
|
// Common language patterns in NFOs |
|
1025
|
|
|
$patterns = [ |
|
1026
|
|
|
'/(?:language|audio|spoken?|dialogue)\s*[:\-]?\s*([A-Za-z]+(?:\s*[,\/&]\s*[A-Za-z]+)*)/i', |
|
1027
|
|
|
'/(?:subs?|subtitles?)\s*[:\-]?\s*([A-Za-z]+(?:\s*[,\/&]\s*[A-Za-z]+)*)/i', |
|
1028
|
|
|
]; |
|
1029
|
|
|
|
|
1030
|
|
|
// Known language names |
|
1031
|
|
|
$knownLanguages = [ |
|
1032
|
|
|
'english', 'german', 'french', 'spanish', 'italian', 'dutch', 'portuguese', |
|
1033
|
|
|
'russian', 'japanese', 'korean', 'chinese', 'mandarin', 'cantonese', |
|
1034
|
|
|
'swedish', 'norwegian', 'danish', 'finnish', 'polish', 'czech', 'hungarian', |
|
1035
|
|
|
'turkish', 'arabic', 'hindi', 'thai', 'vietnamese', 'indonesian', 'malay', |
|
1036
|
|
|
'multi', 'dual', 'english/german', 'eng', 'ger', 'fre', 'spa', 'ita', |
|
1037
|
|
|
]; |
|
1038
|
|
|
|
|
1039
|
|
|
foreach ($patterns as $pattern) { |
|
1040
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
1041
|
|
|
$langs = preg_split('/[\s,\/&]+/', strtolower($matches[1])); |
|
1042
|
|
|
foreach ($langs as $lang) { |
|
1043
|
|
|
$lang = trim($lang); |
|
1044
|
|
|
if (in_array($lang, $knownLanguages, true) && ! in_array($lang, $languages, true)) { |
|
1045
|
|
|
$languages[] = ucfirst($lang); |
|
1046
|
|
|
} |
|
1047
|
|
|
} |
|
1048
|
|
|
} |
|
1049
|
|
|
} |
|
1050
|
|
|
|
|
1051
|
|
|
return $languages; |
|
1052
|
|
|
} |
|
1053
|
|
|
|
|
1054
|
|
|
/** |
|
1055
|
|
|
* Extract runtime/duration from NFO content. |
|
1056
|
|
|
* |
|
1057
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
1058
|
|
|
* @return int|null Runtime in minutes, or null if not found. |
|
1059
|
|
|
*/ |
|
1060
|
|
|
public function extractRuntime(string $nfoContent): ?int |
|
1061
|
|
|
{ |
|
1062
|
|
|
$patterns = [ |
|
1063
|
|
|
// "Runtime: 1h 30m" or "Duration: 90min" |
|
1064
|
|
|
'/(?:runtime|duration|length|playtime)\s*[:\-]?\s*(?:(\d{1,2})\s*h(?:ours?)?\s*)?(\d{1,3})\s*m(?:in(?:utes?)?)?/i', |
|
1065
|
|
|
// "Runtime: 01:30:00" or "1:30:00" |
|
1066
|
|
|
'/(?:runtime|duration|length|playtime)\s*[:\-]?\s*(\d{1,2}):(\d{2})(?::(\d{2}))?/i', |
|
1067
|
|
|
// "90 minutes" standalone |
|
1068
|
|
|
'/\b(\d{2,3})\s*(?:min(?:utes?)?|mins)\b/i', |
|
1069
|
|
|
]; |
|
1070
|
|
|
|
|
1071
|
|
|
foreach ($patterns as $index => $pattern) { |
|
1072
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
1073
|
|
|
if ($index === 0) { |
|
1074
|
|
|
$hours = ! empty($matches[1]) ? (int) $matches[1] : 0; |
|
1075
|
|
|
$minutes = (int) $matches[2]; |
|
1076
|
|
|
|
|
1077
|
|
|
return ($hours * 60) + $minutes; |
|
1078
|
|
|
} elseif ($index === 1) { |
|
1079
|
|
|
$hours = (int) $matches[1]; |
|
1080
|
|
|
$minutes = (int) $matches[2]; |
|
1081
|
|
|
|
|
1082
|
|
|
return ($hours * 60) + $minutes; |
|
1083
|
|
|
} else { |
|
1084
|
|
|
return (int) $matches[1]; |
|
1085
|
|
|
} |
|
1086
|
|
|
} |
|
1087
|
|
|
} |
|
1088
|
|
|
|
|
1089
|
|
|
return null; |
|
1090
|
|
|
} |
|
1091
|
|
|
|
|
1092
|
|
|
/** |
|
1093
|
|
|
* Extract genre information from NFO content. |
|
1094
|
|
|
* |
|
1095
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
1096
|
|
|
* @return array Array of detected genres. |
|
1097
|
|
|
*/ |
|
1098
|
|
|
public function extractGenre(string $nfoContent): array |
|
1099
|
|
|
{ |
|
1100
|
|
|
$genres = []; |
|
1101
|
|
|
|
|
1102
|
|
|
if (preg_match('/(?:genre|category|type)\s*[:\-]?\s*([^\n\r]+)/i', $nfoContent, $matches)) { |
|
1103
|
|
|
$genreString = $matches[1]; |
|
1104
|
|
|
// Split on common separators |
|
1105
|
|
|
$parts = preg_split('/[\s,\/&|]+/', $genreString); |
|
1106
|
|
|
|
|
1107
|
|
|
// Known valid genres |
|
1108
|
|
|
$validGenres = [ |
|
1109
|
|
|
'action', 'adventure', 'animation', 'biography', 'comedy', 'crime', |
|
1110
|
|
|
'documentary', 'drama', 'family', 'fantasy', 'history', 'horror', |
|
1111
|
|
|
'music', 'musical', 'mystery', 'romance', 'sci-fi', 'scifi', 'sport', |
|
1112
|
|
|
'thriller', 'war', 'western', 'adult', 'xxx', 'erotic', 'anime', |
|
1113
|
|
|
'rpg', 'fps', 'strategy', 'simulation', 'puzzle', 'racing', 'sports', |
|
1114
|
|
|
'rock', 'pop', 'electronic', 'hip-hop', 'rap', 'classical', 'jazz', |
|
1115
|
|
|
]; |
|
1116
|
|
|
|
|
1117
|
|
|
foreach ($parts as $part) { |
|
1118
|
|
|
$part = strtolower(trim($part)); |
|
1119
|
|
|
if (in_array($part, $validGenres, true) && ! in_array(ucfirst($part), $genres, true)) { |
|
1120
|
|
|
$genres[] = ucfirst($part); |
|
1121
|
|
|
} |
|
1122
|
|
|
} |
|
1123
|
|
|
} |
|
1124
|
|
|
|
|
1125
|
|
|
return $genres; |
|
1126
|
|
|
} |
|
1127
|
|
|
|
|
1128
|
|
|
/** |
|
1129
|
|
|
* Extract software-specific information from NFO content. |
|
1130
|
|
|
* |
|
1131
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
1132
|
|
|
* @return array Software info including platform, version, protection, etc. |
|
1133
|
|
|
*/ |
|
1134
|
|
|
public function extractSoftwareInfo(string $nfoContent): array |
|
1135
|
|
|
{ |
|
1136
|
|
|
$info = []; |
|
1137
|
|
|
|
|
1138
|
|
|
// Platform/OS detection |
|
1139
|
|
|
$platformPatterns = [ |
|
1140
|
|
|
'/(?:platform|os|system|requires?)\s*[:\-]?\s*(windows?|linux|mac(?:os)?|unix|android|ios)/i', |
|
1141
|
|
|
]; |
|
1142
|
|
|
foreach ($platformPatterns as $pattern) { |
|
1143
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
1144
|
|
|
$info['platform'] = ucfirst(strtolower($matches[1])); |
|
1145
|
|
|
break; |
|
1146
|
|
|
} |
|
1147
|
|
|
} |
|
1148
|
|
|
|
|
1149
|
|
|
// Version detection |
|
1150
|
|
|
if (preg_match('/(?:version|ver|v)\s*[:\-]?\s*(\d+(?:\.\d+)*(?:\s*(?:build|b)\s*\d+)?)/i', $nfoContent, $matches)) { |
|
1151
|
|
|
$info['version'] = trim($matches[1]); |
|
1152
|
|
|
} |
|
1153
|
|
|
|
|
1154
|
|
|
// Protection type |
|
1155
|
|
|
$protectionPatterns = [ |
|
1156
|
|
|
'/(?:protection|drm|copy[ -]?protection)\s*[:\-]?\s*([^\n\r]+)/i', |
|
1157
|
|
|
]; |
|
1158
|
|
|
foreach ($protectionPatterns as $pattern) { |
|
1159
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
1160
|
|
|
$protection = trim($matches[1]); |
|
1161
|
|
|
if (strlen($protection) > 2 && strlen($protection) < 50) { |
|
1162
|
|
|
$info['protection'] = $protection; |
|
1163
|
|
|
} |
|
1164
|
|
|
break; |
|
1165
|
|
|
} |
|
1166
|
|
|
} |
|
1167
|
|
|
|
|
1168
|
|
|
// Crack/Keygen/Serial info |
|
1169
|
|
|
if (preg_match('/\b(cracked|keygen|serial|patch|loader|activator)\b/i', $nfoContent)) { |
|
1170
|
|
|
$info['has_crack'] = true; |
|
1171
|
|
|
} |
|
1172
|
|
|
|
|
1173
|
|
|
return $info; |
|
1174
|
|
|
} |
|
1175
|
|
|
|
|
1176
|
|
|
/** |
|
1177
|
|
|
* Extract release title from NFO content. |
|
1178
|
|
|
* |
|
1179
|
|
|
* @param string $nfoContent The NFO content to parse. |
|
1180
|
|
|
* @return string|null The release title if found. |
|
1181
|
|
|
*/ |
|
1182
|
|
|
public function extractReleaseTitle(string $nfoContent): ?string |
|
1183
|
|
|
{ |
|
1184
|
|
|
$patterns = [ |
|
1185
|
|
|
// "Title: Movie Name" or "Release: Title.Goes.Here" |
|
1186
|
|
|
'/(?:title|release|name)\s*[:\-]?\s*([^\n\r]{5,100})/i', |
|
1187
|
|
|
// Scene-style title in header |
|
1188
|
|
|
'/(?:^|\n)\s*(?:[\-=*~]{3,}\s*)?([A-Za-z0-9][\w.\-\s]{10,80}?)(?:\s*[\-=*~]{3,})?\s*(?:\n|$)/m', |
|
1189
|
|
|
]; |
|
1190
|
|
|
|
|
1191
|
|
|
foreach ($patterns as $pattern) { |
|
1192
|
|
|
if (preg_match($pattern, $nfoContent, $matches)) { |
|
1193
|
|
|
$title = trim($matches[1]); |
|
1194
|
|
|
// Filter out common non-title content |
|
1195
|
|
|
if (! preg_match('/^(?:date|size|codec|format|video|audio|language|runtime|genre)\s*:/i', $title) |
|
1196
|
|
|
&& strlen($title) >= 5 && strlen($title) <= 100) { |
|
1197
|
|
|
return $title; |
|
1198
|
|
|
} |
|
1199
|
|
|
} |
|
1200
|
|
|
} |
|
1201
|
|
|
|
|
1202
|
|
|
return null; |
|
1203
|
|
|
} |
|
1204
|
|
|
|
|
1205
|
|
|
/** |
|
1206
|
|
|
* Clean and normalize NFO content. |
|
1207
|
|
|
* |
|
1208
|
|
|
* @param string $nfoContent Raw NFO content. |
|
1209
|
|
|
* @return string Cleaned NFO content. |
|
1210
|
|
|
*/ |
|
1211
|
|
|
public function cleanNfoContent(string $nfoContent): string |
|
1212
|
|
|
{ |
|
1213
|
|
|
// Convert to UTF-8 if needed (CP437 is common for NFOs) |
|
1214
|
|
|
$content = cp437toUTF($nfoContent); |
|
1215
|
|
|
|
|
1216
|
|
|
// Normalize line endings |
|
1217
|
|
|
$content = str_replace(["\r\n", "\r"], "\n", $content); |
|
1218
|
|
|
|
|
1219
|
|
|
// Remove excessive whitespace while preserving NFO art |
|
1220
|
|
|
$lines = explode("\n", $content); |
|
1221
|
|
|
$cleanedLines = []; |
|
1222
|
|
|
$emptyLineCount = 0; |
|
1223
|
|
|
|
|
1224
|
|
|
foreach ($lines as $line) { |
|
1225
|
|
|
if (trim($line) === '') { |
|
1226
|
|
|
$emptyLineCount++; |
|
1227
|
|
|
// Allow max 2 consecutive empty lines |
|
1228
|
|
|
if ($emptyLineCount <= 2) { |
|
1229
|
|
|
$cleanedLines[] = ''; |
|
1230
|
|
|
} |
|
1231
|
|
|
} else { |
|
1232
|
|
|
$emptyLineCount = 0; |
|
1233
|
|
|
$cleanedLines[] = rtrim($line); |
|
1234
|
|
|
} |
|
1235
|
|
|
} |
|
1236
|
|
|
|
|
1237
|
|
|
return implode("\n", $cleanedLines); |
|
1238
|
|
|
} |
|
1239
|
|
|
|
|
1240
|
|
|
/** |
|
1241
|
|
|
* Calculate an NFO quality score based on content analysis. |
|
1242
|
|
|
* |
|
1243
|
|
|
* Scoring factors: |
|
1244
|
|
|
* - Content length (too short or too long penalized) |
|
1245
|
|
|
* - Keyword presence (scene terminology, media info) |
|
1246
|
|
|
* - Media ID presence (IMDB, TVDB, etc.) |
|
1247
|
|
|
* - URL presence |
|
1248
|
|
|
* - Codec information |
|
1249
|
|
|
* - ASCII art detection (scene NFOs often have artistic headers) |
|
1250
|
|
|
* - Structural elements (proper formatting) |
|
1251
|
|
|
* |
|
1252
|
|
|
* @param string $nfoContent The NFO content to analyze. |
|
1253
|
|
|
* @return int Quality score from 0-100. |
|
1254
|
|
|
*/ |
|
1255
|
|
|
public function calculateNfoQuality(string $nfoContent): int |
|
1256
|
|
|
{ |
|
1257
|
|
|
$score = 50; // Base score |
|
1258
|
|
|
|
|
1259
|
|
|
$length = strlen($nfoContent); |
|
1260
|
|
|
|
|
1261
|
|
|
// Length bonus/penalty |
|
1262
|
|
|
if ($length < 100) { |
|
1263
|
|
|
$score -= 20; |
|
1264
|
|
|
} elseif ($length > 500 && $length < 20000) { |
|
1265
|
|
|
$score += 15; |
|
1266
|
|
|
} elseif ($length >= 20000) { |
|
1267
|
|
|
$score += 5; // Longer NFOs might have too much filler |
|
1268
|
|
|
} |
|
1269
|
|
|
|
|
1270
|
|
|
// Keyword matching |
|
1271
|
|
|
$keywordMatches = 0; |
|
1272
|
|
|
foreach ($this->_nfoKeywords as $keyword) { |
|
1273
|
|
|
if (stripos($nfoContent, $keyword) !== false) { |
|
1274
|
|
|
$keywordMatches++; |
|
1275
|
|
|
} |
|
1276
|
|
|
} |
|
1277
|
|
|
$score += min($keywordMatches * 2, 20); |
|
1278
|
|
|
|
|
1279
|
|
|
// Media ID presence bonus |
|
1280
|
|
|
$mediaIds = $this->extractAllMediaIds($nfoContent); |
|
1281
|
|
|
if (! empty($mediaIds)) { |
|
1282
|
|
|
$score += min(count($mediaIds) * 5, 15); |
|
1283
|
|
|
} |
|
1284
|
|
|
|
|
1285
|
|
|
// URL presence |
|
1286
|
|
|
$urls = $this->extractUrls($nfoContent); |
|
1287
|
|
|
if (! empty($urls)) { |
|
1288
|
|
|
$score += min(count($urls) * 2, 10); |
|
1289
|
|
|
} |
|
1290
|
|
|
|
|
1291
|
|
|
// Codec info presence |
|
1292
|
|
|
$codecInfo = $this->extractCodecInfo($nfoContent); |
|
1293
|
|
|
$score += count(array_filter($codecInfo)) * 3; |
|
1294
|
|
|
|
|
1295
|
|
|
// ASCII art detection (scene NFOs often have decorative borders) |
|
1296
|
|
|
if ($this->hasAsciiArt($nfoContent)) { |
|
1297
|
|
|
$score += 10; |
|
1298
|
|
|
} |
|
1299
|
|
|
|
|
1300
|
|
|
// Structural elements bonus |
|
1301
|
|
|
$structuralScore = $this->analyzeStructure($nfoContent); |
|
1302
|
|
|
$score += $structuralScore; |
|
1303
|
|
|
|
|
1304
|
|
|
// Group name detection bonus |
|
1305
|
|
|
if ($this->extractGroupName($nfoContent) !== null) { |
|
1306
|
|
|
$score += 8; |
|
1307
|
|
|
} |
|
1308
|
|
|
|
|
1309
|
|
|
// Release date detection bonus |
|
1310
|
|
|
if ($this->extractReleaseDate($nfoContent) !== null) { |
|
1311
|
|
|
$score += 5; |
|
1312
|
|
|
} |
|
1313
|
|
|
|
|
1314
|
|
|
// Language info bonus |
|
1315
|
|
|
$languages = $this->extractLanguage($nfoContent); |
|
1316
|
|
|
if (! empty($languages)) { |
|
1317
|
|
|
$score += min(count($languages) * 2, 6); |
|
1318
|
|
|
} |
|
1319
|
|
|
|
|
1320
|
|
|
// Runtime detection bonus |
|
1321
|
|
|
if ($this->extractRuntime($nfoContent) !== null) { |
|
1322
|
|
|
$score += 4; |
|
1323
|
|
|
} |
|
1324
|
|
|
|
|
1325
|
|
|
// Penalty for binary content remnants |
|
1326
|
|
|
if (preg_match_all('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', $nfoContent, $binaryMatches)) { |
|
1327
|
|
|
$score -= min(count($binaryMatches[0]) * 5, 20); |
|
1328
|
|
|
} |
|
1329
|
|
|
|
|
1330
|
|
|
return max(0, min(100, $score)); |
|
1331
|
|
|
} |
|
1332
|
|
|
|
|
1333
|
|
|
/** |
|
1334
|
|
|
* Detect ASCII art in NFO content. |
|
1335
|
|
|
* |
|
1336
|
|
|
* @param string $nfoContent The NFO content to analyze. |
|
1337
|
|
|
* @return bool True if ASCII art is detected. |
|
1338
|
|
|
*/ |
|
1339
|
|
|
protected function hasAsciiArt(string $nfoContent): bool |
|
1340
|
|
|
{ |
|
1341
|
|
|
// Check for common ASCII art characters in repeated sequences |
|
1342
|
|
|
$asciiArtPatterns = [ |
|
1343
|
|
|
// Decorative borders |
|
1344
|
|
|
'/[-=*~#@]{5,}/', |
|
1345
|
|
|
// Box drawing characters |
|
1346
|
|
|
'/[┌┐└┘├┤┬┴┼│─╔╗╚╝║═]{3,}/', |
|
1347
|
|
|
// Extended ASCII art characters |
|
1348
|
|
|
'/[░▒▓█▄▀■□▪▫]{3,}/', |
|
1349
|
|
|
// Common ASCII art patterns |
|
1350
|
|
|
'/[\/\\|_]{3,}.*[\/\\|_]{3,}/', |
|
1351
|
|
|
// Repeated special chars in artistic patterns |
|
1352
|
|
|
'/(\S)\1{4,}/', |
|
1353
|
|
|
]; |
|
1354
|
|
|
|
|
1355
|
|
|
foreach ($asciiArtPatterns as $pattern) { |
|
1356
|
|
|
if (preg_match($pattern, $nfoContent)) { |
|
1357
|
|
|
return true; |
|
1358
|
|
|
} |
|
1359
|
|
|
} |
|
1360
|
|
|
|
|
1361
|
|
|
return false; |
|
1362
|
|
|
} |
|
1363
|
|
|
|
|
1364
|
|
|
/** |
|
1365
|
|
|
* Analyze structural elements of NFO content. |
|
1366
|
|
|
* |
|
1367
|
|
|
* @param string $nfoContent The NFO content to analyze. |
|
1368
|
|
|
* @return int Score based on structural quality (0-15). |
|
1369
|
|
|
*/ |
|
1370
|
|
|
protected function analyzeStructure(string $nfoContent): int |
|
1371
|
|
|
{ |
|
1372
|
|
|
$score = 0; |
|
1373
|
|
|
|
|
1374
|
|
|
// Check for section headers |
|
1375
|
|
|
$sectionPatterns = [ |
|
1376
|
|
|
'/^[ \t]*[-=*]{2,}.*[-=*]{2,}[ \t]*$/m', // Decorative section dividers |
|
1377
|
|
|
'/^[ \t]*\[.*\][ \t]*$/m', // [Section Name] |
|
1378
|
|
|
'/^[ \t]*<.*>[ \t]*$/m', // <Section Name> |
|
1379
|
|
|
]; |
|
1380
|
|
|
|
|
1381
|
|
|
foreach ($sectionPatterns as $pattern) { |
|
1382
|
|
|
if (preg_match_all($pattern, $nfoContent, $matches)) { |
|
1383
|
|
|
$score += min(count($matches[0]), 3); |
|
1384
|
|
|
} |
|
1385
|
|
|
} |
|
1386
|
|
|
|
|
1387
|
|
|
// Check for labeled fields (Field: Value format) |
|
1388
|
|
|
if (preg_match_all('/^[ \t]*[A-Za-z][A-Za-z\s]{2,20}\s*[:\.].*$/m', $nfoContent, $matches)) { |
|
1389
|
|
|
$score += min(count($matches[0]) / 2, 5); |
|
1390
|
|
|
} |
|
1391
|
|
|
|
|
1392
|
|
|
// Check for consistent line endings and formatting |
|
1393
|
|
|
$lines = explode("\n", $nfoContent); |
|
1394
|
|
|
$nonEmptyLines = array_filter($lines, fn ($line) => trim($line) !== ''); |
|
1395
|
|
|
|
|
1396
|
|
|
if (count($nonEmptyLines) >= 10) { |
|
1397
|
|
|
$score += 2; |
|
1398
|
|
|
} |
|
1399
|
|
|
|
|
1400
|
|
|
return min(15, (int) $score); |
|
1401
|
|
|
} |
|
1402
|
|
|
|
|
1403
|
|
|
/** |
|
1404
|
|
|
* Decompress and retrieve NFO content from a release. |
|
1405
|
|
|
* |
|
1406
|
|
|
* @param int $releaseId The release ID. |
|
1407
|
|
|
* @return string|null The NFO content or null if not found. |
|
1408
|
|
|
*/ |
|
1409
|
|
|
public function getNfoContent(int $releaseId): ?string |
|
1410
|
|
|
{ |
|
1411
|
|
|
$nfoRecord = ReleaseNfo::getReleaseNfo($releaseId); |
|
1412
|
|
|
|
|
1413
|
|
|
if ($nfoRecord === null || empty($nfoRecord->nfo)) { |
|
1414
|
|
|
return null; |
|
1415
|
|
|
} |
|
1416
|
|
|
|
|
1417
|
|
|
return $nfoRecord->nfo; |
|
1418
|
|
|
} |
|
1419
|
|
|
|
|
1420
|
|
|
/** |
|
1421
|
|
|
* Store NFO content for a release. |
|
1422
|
|
|
* |
|
1423
|
|
|
* @param int $releaseId The release ID. |
|
1424
|
|
|
* @param string $nfoContent The NFO content to store. |
|
1425
|
|
|
* @param bool $compress Whether to compress the content. |
|
1426
|
|
|
* @return bool True on success, false on failure. |
|
1427
|
|
|
*/ |
|
1428
|
|
|
public function storeNfoContent(int $releaseId, string $nfoContent, bool $compress = true): bool |
|
1429
|
|
|
{ |
|
1430
|
|
|
try { |
|
1431
|
|
|
$data = $compress ? "\x1f\x8b\x08\x00".gzcompress($nfoContent) : $nfoContent; |
|
1432
|
|
|
|
|
1433
|
|
|
ReleaseNfo::updateOrCreate( |
|
1434
|
|
|
['releases_id' => $releaseId], |
|
1435
|
|
|
['nfo' => $data] |
|
1436
|
|
|
); |
|
1437
|
|
|
|
|
1438
|
|
|
Release::whereId($releaseId)->update(['nfostatus' => self::NFO_FOUND]); |
|
1439
|
|
|
|
|
1440
|
|
|
return true; |
|
1441
|
|
|
} catch (Throwable $e) { |
|
1442
|
|
|
Log::error("Failed to store NFO for release {$releaseId}: ".$e->getMessage()); |
|
1443
|
|
|
|
|
1444
|
|
|
return false; |
|
1445
|
|
|
} |
|
1446
|
|
|
} |
|
1447
|
|
|
|
|
1448
|
|
|
/** |
|
1449
|
|
|
* Clear the settings cache. |
|
1450
|
|
|
* |
|
1451
|
|
|
* Useful when settings have been updated and need to be reloaded. |
|
1452
|
|
|
*/ |
|
1453
|
|
|
public function clearSettingsCache(): void |
|
1454
|
|
|
{ |
|
1455
|
|
|
Cache::forget('nfo_maxnfoprocessed'); |
|
1456
|
|
|
Cache::forget('nfo_maxnforetries'); |
|
1457
|
|
|
Cache::forget('nfo_maxsizetoprocessnfo'); |
|
1458
|
|
|
Cache::forget('nfo_minsizetoprocessnfo'); |
|
1459
|
|
|
} |
|
1460
|
|
|
} |
|
1461
|
|
|
|