|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace App\Services\Nzb; |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* Service for parsing and extracting information from NZB file contents. |
|
9
|
|
|
* Handles parsing NZB XML structure and extracting file lists, extensions, sizes, etc. |
|
10
|
|
|
*/ |
|
11
|
|
|
class NzbParserService |
|
12
|
|
|
{ |
|
13
|
|
|
/** |
|
14
|
|
|
* Retrieve various information on an NZB file (the subject, # of pars, |
|
15
|
|
|
* file extensions, file sizes, file completion, group names, # of parts). |
|
16
|
|
|
* |
|
17
|
|
|
* @param string $nzb The NZB contents in a string. |
|
18
|
|
|
* @param array $options Optional settings: |
|
19
|
|
|
* - 'no-file-key': Use numeric keys instead of subject (default: true) |
|
20
|
|
|
* - 'strip-count': Strip file/part count from subject for sorting (default: false) |
|
21
|
|
|
* @return array $result Empty if not an NZB or the contents of the NZB. |
|
22
|
|
|
*/ |
|
23
|
|
|
public function parseNzbFileList(string $nzb, array $options = []): array |
|
24
|
|
|
{ |
|
25
|
|
|
$defaults = [ |
|
26
|
|
|
'no-file-key' => true, |
|
27
|
|
|
'strip-count' => false, |
|
28
|
|
|
]; |
|
29
|
|
|
$options += $defaults; |
|
30
|
|
|
|
|
31
|
|
|
$i = 0; |
|
32
|
|
|
$result = []; |
|
33
|
|
|
|
|
34
|
|
|
if (! $nzb) { |
|
35
|
|
|
return $result; |
|
36
|
|
|
} |
|
37
|
|
|
|
|
38
|
|
|
$xml = @simplexml_load_string(str_replace("\x0F", '', $nzb)); |
|
39
|
|
|
if (! $xml || strtolower($xml->getName()) !== 'nzb') { |
|
40
|
|
|
return $result; |
|
41
|
|
|
} |
|
42
|
|
|
|
|
43
|
|
|
foreach ($xml->file as $file) { |
|
44
|
|
|
// Subject. |
|
45
|
|
|
$title = (string) $file->attributes()->subject; |
|
|
|
|
|
|
46
|
|
|
|
|
47
|
|
|
if ($options['no-file-key'] === false) { |
|
48
|
|
|
$i = $title; |
|
49
|
|
|
if ($options['strip-count']) { |
|
50
|
|
|
// Strip file / part count to get proper sorting. |
|
51
|
|
|
$i = preg_replace('#\d+[- ._]?(/|\||[o0]f)[- ._]?\d+?(?![- ._]\d)#i', '', $i); |
|
52
|
|
|
// Change .rar and .par2 to be sorted before .part0x.rar and .volxxx+xxx.par2 |
|
53
|
|
|
if (str_contains($i, '.par2') && ! preg_match('#\.vol\d+\+\d+\.par2#i', $i)) { |
|
54
|
|
|
$i = str_replace('.par2', '.vol0.par2', $i); |
|
55
|
|
|
} elseif (preg_match('#\.rar[^a-z0-9]#i', $i) && ! preg_match('#\.part\d+\.rar$#i', $i)) { |
|
56
|
|
|
$i = preg_replace('#\.rar(?:[^a-z0-9])#i', '.part0.rar', $i); |
|
57
|
|
|
} |
|
58
|
|
|
} |
|
59
|
|
|
} |
|
60
|
|
|
|
|
61
|
|
|
$result[$i]['title'] = $title; |
|
62
|
|
|
|
|
63
|
|
|
// Extensions. |
|
64
|
|
|
if (preg_match( |
|
65
|
|
|
'/\.(\d{2,3}|7z|ace|ai7|srr|srt|sub|aiff|asc|avi|audio|bin|bz2|' |
|
66
|
|
|
.'c|cfc|cfm|chm|class|conf|cpp|cs|css|csv|cue|deb|divx|doc|dot|' |
|
67
|
|
|
.'eml|enc|exe|file|gif|gz|hlp|htm|html|image|iso|jar|java|jpeg|' |
|
68
|
|
|
.'jpg|js|lua|m|m3u|mkv|mm|mov|mp3|mp4|mpg|nfo|nzb|odc|odf|odg|odi|odp|' |
|
69
|
|
|
.'ods|odt|ogg|par2|parity|pdf|pgp|php|pl|png|ppt|ps|py|r\d{2,3}|' |
|
70
|
|
|
.'ram|rar|rb|rm|rpm|rtf|sfv|sig|sql|srs|swf|sxc|sxd|sxi|sxw|tar|' |
|
71
|
|
|
.'tex|tgz|txt|vcf|video|vsd|wav|wma|wmv|xls|xml|xpi|xvid|zip7|zip)' |
|
72
|
|
|
.'[" ](?!([\)|\-]))/i', |
|
73
|
|
|
$title, |
|
74
|
|
|
$ext |
|
75
|
|
|
) |
|
76
|
|
|
) { |
|
77
|
|
|
if (preg_match('/\.r\d{2,3}/i', $ext[0])) { |
|
78
|
|
|
$ext[1] = 'rar'; |
|
79
|
|
|
} |
|
80
|
|
|
$result[$i]['ext'] = strtolower($ext[1]); |
|
81
|
|
|
} else { |
|
82
|
|
|
$result[$i]['ext'] = ''; |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
$fileSize = $numSegments = 0; |
|
86
|
|
|
|
|
87
|
|
|
// Parts. |
|
88
|
|
|
if (! isset($result[$i]['segments'])) { |
|
89
|
|
|
$result[$i]['segments'] = []; |
|
90
|
|
|
} |
|
91
|
|
|
|
|
92
|
|
|
// File size. |
|
93
|
|
|
foreach ($file->segments->segment as $segment) { |
|
94
|
|
|
$result[$i]['segments'][] = (string) $segment; |
|
95
|
|
|
$fileSize += $segment->attributes()->bytes; |
|
|
|
|
|
|
96
|
|
|
$numSegments++; |
|
97
|
|
|
} |
|
98
|
|
|
$result[$i]['size'] = $fileSize; |
|
99
|
|
|
|
|
100
|
|
|
// File completion. |
|
101
|
|
|
if (preg_match('/(\d+)\)$/', $title, $parts)) { |
|
102
|
|
|
$result[$i]['partstotal'] = $parts[1]; |
|
103
|
|
|
} |
|
104
|
|
|
$result[$i]['partsactual'] = $numSegments; |
|
105
|
|
|
|
|
106
|
|
|
// Groups. |
|
107
|
|
|
if (! isset($result[$i]['groups'])) { |
|
108
|
|
|
$result[$i]['groups'] = []; |
|
109
|
|
|
} |
|
110
|
|
|
foreach ($file->groups->group as $g) { |
|
111
|
|
|
$result[$i]['groups'][] = (string) $g; |
|
112
|
|
|
} |
|
113
|
|
|
|
|
114
|
|
|
if ($options['no-file-key']) { |
|
115
|
|
|
$i++; |
|
116
|
|
|
} |
|
117
|
|
|
} |
|
118
|
|
|
|
|
119
|
|
|
return $result; |
|
120
|
|
|
} |
|
121
|
|
|
|
|
122
|
|
|
/** |
|
123
|
|
|
* Load and parse an NZB file from its contents string. |
|
124
|
|
|
* |
|
125
|
|
|
* @param string $nzbContents The decompressed NZB file contents |
|
126
|
|
|
* @param bool $echoErrors Whether to echo errors on parse failure |
|
127
|
|
|
* @param string $guid Optional GUID for error messages |
|
128
|
|
|
* @return \SimpleXMLElement|false The parsed NZB file as SimpleXMLElement or false on failure |
|
129
|
|
|
*/ |
|
130
|
|
|
public function parseNzbXml(string $nzbContents, bool $echoErrors = false, string $guid = ''): \SimpleXMLElement|false |
|
131
|
|
|
{ |
|
132
|
|
|
if (empty($nzbContents)) { |
|
133
|
|
|
return false; |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
// Safely parse the XML content |
|
137
|
|
|
libxml_use_internal_errors(true); |
|
138
|
|
|
$nzbFile = simplexml_load_string($nzbContents); |
|
139
|
|
|
|
|
140
|
|
|
if ($nzbFile === false) { |
|
141
|
|
|
if ($echoErrors) { |
|
142
|
|
|
$errors = libxml_get_errors(); |
|
143
|
|
|
$errorMsg = ! empty($errors) ? ' - XML error: '.$errors[0]->message : ''; |
|
144
|
|
|
echo PHP_EOL."Unable to load NZB: {$guid} appears to be an invalid NZB{$errorMsg}, skipping.".PHP_EOL; |
|
145
|
|
|
libxml_clear_errors(); |
|
146
|
|
|
} |
|
147
|
|
|
|
|
148
|
|
|
return false; |
|
149
|
|
|
} |
|
150
|
|
|
|
|
151
|
|
|
return $nzbFile; |
|
152
|
|
|
} |
|
153
|
|
|
|
|
154
|
|
|
/** |
|
155
|
|
|
* Get the file extension from a subject line. |
|
156
|
|
|
* |
|
157
|
|
|
* @param string $subject The file subject/name |
|
158
|
|
|
* @return string The detected extension, or empty string if none found |
|
159
|
|
|
*/ |
|
160
|
|
|
public function detectFileExtension(string $subject): string |
|
161
|
|
|
{ |
|
162
|
|
|
if (preg_match( |
|
163
|
|
|
'/\.(\d{2,3}|7z|ace|ai7|srr|srt|sub|aiff|asc|avi|audio|bin|bz2|' |
|
164
|
|
|
.'c|cfc|cfm|chm|class|conf|cpp|cs|css|csv|cue|deb|divx|doc|dot|' |
|
165
|
|
|
.'eml|enc|exe|file|gif|gz|hlp|htm|html|image|iso|jar|java|jpeg|' |
|
166
|
|
|
.'jpg|js|lua|m|m3u|mkv|mm|mov|mp3|mp4|mpg|nfo|nzb|odc|odf|odg|odi|odp|' |
|
167
|
|
|
.'ods|odt|ogg|par2|parity|pdf|pgp|php|pl|png|ppt|ps|py|r\d{2,3}|' |
|
168
|
|
|
.'ram|rar|rb|rm|rpm|rtf|sfv|sig|sql|srs|swf|sxc|sxd|sxi|sxw|tar|' |
|
169
|
|
|
.'tex|tgz|txt|vcf|video|vsd|wav|wma|wmv|xls|xml|xpi|xvid|zip7|zip)' |
|
170
|
|
|
.'[" ](?!([\)|\-]))/i', |
|
171
|
|
|
$subject, |
|
172
|
|
|
$ext |
|
173
|
|
|
) |
|
174
|
|
|
) { |
|
175
|
|
|
if (preg_match('/\.r\d{2,3}/i', $ext[0])) { |
|
176
|
|
|
return 'rar'; |
|
177
|
|
|
} |
|
178
|
|
|
|
|
179
|
|
|
return strtolower($ext[1]); |
|
180
|
|
|
} |
|
181
|
|
|
|
|
182
|
|
|
return ''; |
|
183
|
|
|
} |
|
184
|
|
|
|
|
185
|
|
|
/** |
|
186
|
|
|
* Check if a subject indicates an NFO file. |
|
187
|
|
|
* |
|
188
|
|
|
* @param string $subject The file subject |
|
189
|
|
|
* @return array|false Returns array with detection info or false if not an NFO |
|
190
|
|
|
*/ |
|
191
|
|
|
public function detectNfoFile(string $subject): array|false |
|
192
|
|
|
{ |
|
193
|
|
|
// Standard NFO extensions |
|
194
|
|
|
if (preg_match('/\.\b(nfo|diz|info?)\b(?![.-])/i', $subject)) { |
|
195
|
|
|
return ['hidden' => false, 'priority' => 1]; |
|
196
|
|
|
} |
|
197
|
|
|
|
|
198
|
|
|
// Alternative NFO naming patterns (group-specific or obfuscated) |
|
199
|
|
|
if (preg_match('/(?:^|["\s])(?:file(?:_?id)?|readme|release|info(?:rmation)?|about|desc(?:ription)?|notes?|read\.?me|00-|000-|0-|_-_).*?\.(?:txt|nfo|diz)(?:["\s]|$)/i', $subject)) { |
|
200
|
|
|
return ['hidden' => false, 'priority' => 2]; |
|
201
|
|
|
} |
|
202
|
|
|
|
|
203
|
|
|
return false; |
|
204
|
|
|
} |
|
205
|
|
|
|
|
206
|
|
|
/** |
|
207
|
|
|
* Check if a subject might indicate a hidden NFO file. |
|
208
|
|
|
* |
|
209
|
|
|
* @param string $subject The file subject |
|
210
|
|
|
* @param int $segmentCount The number of segments in the file |
|
211
|
|
|
* @return array|false Returns array with detection info or false if not a hidden NFO |
|
212
|
|
|
*/ |
|
213
|
|
|
public function detectHiddenNfoFile(string $subject, int $segmentCount): array|false |
|
214
|
|
|
{ |
|
215
|
|
|
$isHiddenNfoCandidate = false; |
|
216
|
|
|
|
|
217
|
|
|
// Pattern 1: Single segment files with (1/1) |
|
218
|
|
|
if ($segmentCount === 1 && preg_match('/\(1\/1\)$/i', $subject)) { |
|
219
|
|
|
$isHiddenNfoCandidate = true; |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
// Pattern 2: Small segment count (1-2) with NFO-like names but no extension |
|
223
|
|
|
if (! $isHiddenNfoCandidate && $segmentCount <= 2 && preg_match('/(?:^|["\s])(?:nfo|info|readme|release|file_?id|about)(?:["\s]|$)/i', $subject)) { |
|
224
|
|
|
$isHiddenNfoCandidate = true; |
|
225
|
|
|
} |
|
226
|
|
|
|
|
227
|
|
|
// Pattern 3: Scene-style NFO naming (group-release.nfo without extension visible) |
|
228
|
|
|
if (! $isHiddenNfoCandidate && $segmentCount === 1 && preg_match('/^[a-z0-9._-]+["\s]*\(1\/1\)/i', $subject)) { |
|
229
|
|
|
// Check for scene-like naming pattern |
|
230
|
|
|
if (preg_match('/^[a-z0-9]+[._-][a-z0-9._-]+["\s]*\(1\/1\)/i', $subject)) { |
|
231
|
|
|
$isHiddenNfoCandidate = true; |
|
232
|
|
|
} |
|
233
|
|
|
} |
|
234
|
|
|
|
|
235
|
|
|
// Pattern 4: Very small files (NFOs are typically small) |
|
236
|
|
|
// Files described as very small in bytes could be NFOs |
|
237
|
|
|
if (! $isHiddenNfoCandidate && $segmentCount === 1 && preg_match('/yEnc\s*\(\d+\)\s*\[1\/1\]/i', $subject)) { |
|
238
|
|
|
$isHiddenNfoCandidate = true; |
|
239
|
|
|
} |
|
240
|
|
|
|
|
241
|
|
|
if (! $isHiddenNfoCandidate) { |
|
242
|
|
|
return false; |
|
243
|
|
|
} |
|
244
|
|
|
|
|
245
|
|
|
// Enhanced exclusion: check if it's NOT likely another common file type |
|
246
|
|
|
$excludedExtensions = '/\.(?:' . |
|
247
|
|
|
// Executables |
|
248
|
|
|
'exe|com|bat|cmd|scr|dll|msi|pkg|deb|rpm|apk|ipa|app|' . |
|
249
|
|
|
// Archives |
|
250
|
|
|
'zip|rar|[rst]\d{2}|7z|ace|tar|gz|bz2|xz|lzma|cab|iso|bin|cue|img|mdf|nrg|dmg|vhd|' . |
|
251
|
|
|
// Audio |
|
252
|
|
|
'mp3|flac|ogg|aac|wav|wma|m4a|opus|ape|wv|mpc|' . |
|
253
|
|
|
// Video |
|
254
|
|
|
'avi|mkv|mp4|mov|wmv|mpg|mpeg|ts|vob|m2ts|webm|flv|ogv|divx|xvid|' . |
|
255
|
|
|
// Images |
|
256
|
|
|
'jpg|jpeg|png|gif|bmp|tif|tiff|psd|webp|svg|ico|raw|cr2|nef|' . |
|
257
|
|
|
// Documents |
|
258
|
|
|
'pdf|doc|docx|xls|xlsx|ppt|pptx|odt|ods|odp|rtf|epub|mobi|azw|' . |
|
259
|
|
|
// Code |
|
260
|
|
|
'html|htm|css|js|php|py|java|c|cpp|h|cs|sql|json|xml|yml|yaml|' . |
|
261
|
|
|
// Data |
|
262
|
|
|
'db|dbf|mdb|accdb|sqlite|csv|' . |
|
263
|
|
|
// Verification |
|
264
|
|
|
'par2?|sfv|md5|sha1|sha256|sha512|crc|' . |
|
265
|
|
|
// Misc |
|
266
|
|
|
'url|lnk|cfg|ini|inf|sys|tmp|bak|log|srt|sub|idx|ass|ssa|vtt' . |
|
267
|
|
|
')\b/i'; |
|
268
|
|
|
|
|
269
|
|
|
if (preg_match($excludedExtensions, $subject)) { |
|
270
|
|
|
return false; |
|
271
|
|
|
} |
|
272
|
|
|
|
|
273
|
|
|
return ['hidden' => true, 'priority' => 10]; |
|
274
|
|
|
} |
|
275
|
|
|
|
|
276
|
|
|
/** |
|
277
|
|
|
* Check if a subject indicates a PAR2 index file. |
|
278
|
|
|
* |
|
279
|
|
|
* @param string $subject The file subject |
|
280
|
|
|
* @return bool True if it's a PAR2 index file |
|
281
|
|
|
*/ |
|
282
|
|
|
public function detectPar2IndexFile(string $subject): bool |
|
283
|
|
|
{ |
|
284
|
|
|
return (bool) preg_match('/\.par2$/i', $subject); |
|
285
|
|
|
} |
|
286
|
|
|
|
|
287
|
|
|
/** |
|
288
|
|
|
* Calculate artificial parts from a subject line. |
|
289
|
|
|
* |
|
290
|
|
|
* @param string $subject The file subject |
|
291
|
|
|
* @return int The estimated total parts, or 0 if not determinable |
|
292
|
|
|
*/ |
|
293
|
|
|
public function extractPartsTotal(string $subject): int |
|
294
|
|
|
{ |
|
295
|
|
|
// Improve artificial parts calculation robustness (e.g., "[15/20]", "(15/20)") |
|
296
|
|
|
if (preg_match('/(?:[(\[])?(\d+)[\/)\\]](\d+)[)\]]?$/', $subject, $parts)) { |
|
297
|
|
|
if (isset($parts[2]) && (int) $parts[2] > 0) { |
|
298
|
|
|
return (int) $parts[2]; |
|
299
|
|
|
} |
|
300
|
|
|
} |
|
301
|
|
|
|
|
302
|
|
|
// Fallback to original simple check |
|
303
|
|
|
if (preg_match('/(\d+)\)$/', $subject, $parts)) { |
|
304
|
|
|
return (int) $parts[1]; |
|
305
|
|
|
} |
|
306
|
|
|
|
|
307
|
|
|
return 0; |
|
308
|
|
|
} |
|
309
|
|
|
} |
|
310
|
|
|
|
|
311
|
|
|
|
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.