|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace App\Services\AdditionalProcessing; |
|
4
|
|
|
|
|
5
|
|
|
use Blacklight\NZB; |
|
6
|
|
|
use Blacklight\utility\Utility; |
|
7
|
|
|
use Illuminate\Support\Facades\File; |
|
8
|
|
|
use Illuminate\Support\Facades\Log; |
|
9
|
|
|
|
|
10
|
|
|
/** |
|
11
|
|
|
* Service for parsing NZB file contents and extracting file metadata. |
|
12
|
|
|
* Handles NZB repair, file listing, and message ID extraction. |
|
13
|
|
|
*/ |
|
14
|
|
|
class NzbContentParser |
|
15
|
|
|
{ |
|
16
|
|
|
public function __construct( |
|
17
|
|
|
private readonly NZB $nzb, |
|
18
|
|
|
private readonly bool $debugMode = false, |
|
19
|
|
|
private readonly bool $echoCLI = false |
|
20
|
|
|
) {} |
|
21
|
|
|
|
|
22
|
|
|
/** |
|
23
|
|
|
* Parse an NZB file and return its contents as an array of files. |
|
24
|
|
|
* |
|
25
|
|
|
* @param string $guid The release GUID to find the NZB for |
|
26
|
|
|
* @return array{contents: array, error: string|null} |
|
27
|
|
|
*/ |
|
28
|
|
|
public function parseNzb(string $guid): array |
|
29
|
|
|
{ |
|
30
|
|
|
$nzbPath = $this->nzb->NZBPath($guid); |
|
31
|
|
|
if ($nzbPath === false) { |
|
32
|
|
|
return ['contents' => [], 'error' => 'NZB not found for GUID: '.$guid]; |
|
33
|
|
|
} |
|
34
|
|
|
|
|
35
|
|
|
$nzbContents = Utility::unzipGzipFile($nzbPath); |
|
36
|
|
|
if (! $nzbContents) { |
|
|
|
|
|
|
37
|
|
|
// Try repair on raw file contents |
|
38
|
|
|
$nzbContents = $this->attemptRawRepair($nzbPath); |
|
39
|
|
|
if (! $nzbContents) { |
|
40
|
|
|
return ['contents' => [], 'error' => 'NZB is empty or broken for GUID: '.$guid]; |
|
41
|
|
|
} |
|
42
|
|
|
} |
|
43
|
|
|
|
|
44
|
|
|
// Get a list of files in the NZB |
|
45
|
|
|
$fileList = $this->nzb->nzbFileList($nzbContents, ['no-file-key' => false, 'strip-count' => true]); |
|
46
|
|
|
if (count($fileList) === 0) { |
|
47
|
|
|
// Attempt repair if initial parse yielded no files |
|
48
|
|
|
$repaired = $this->repairNzb($nzbContents, $nzbPath, $guid); |
|
49
|
|
|
if ($repaired !== null) { |
|
50
|
|
|
$fileList = $this->nzb->nzbFileList($repaired, ['no-file-key' => false, 'strip-count' => true]); |
|
51
|
|
|
} |
|
52
|
|
|
if (count($fileList) === 0) { |
|
53
|
|
|
return ['contents' => [], 'error' => 'NZB is potentially broken for GUID: '.$guid]; |
|
54
|
|
|
} |
|
55
|
|
|
} |
|
56
|
|
|
|
|
57
|
|
|
// Sort keys naturally |
|
58
|
|
|
ksort($fileList, SORT_NATURAL); |
|
59
|
|
|
|
|
60
|
|
|
return ['contents' => $fileList, 'error' => null]; |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
|
|
/** |
|
64
|
|
|
* Attempt to repair raw file contents before XML parsing. |
|
65
|
|
|
*/ |
|
66
|
|
|
private function attemptRawRepair(string $nzbPath): ?string |
|
67
|
|
|
{ |
|
68
|
|
|
try { |
|
69
|
|
|
$rawFile = @File::get($nzbPath); |
|
70
|
|
|
if (! $rawFile) { |
|
71
|
|
|
return null; |
|
72
|
|
|
} |
|
73
|
|
|
|
|
74
|
|
|
// If gzipped, attempt decompress |
|
75
|
|
|
if (str_ends_with(strtolower($nzbPath), '.gz')) { |
|
76
|
|
|
$decompressed = @gzdecode($rawFile); |
|
77
|
|
|
if ($decompressed !== false) { |
|
78
|
|
|
return $this->repairNzb($decompressed, $nzbPath, ''); |
|
79
|
|
|
} |
|
80
|
|
|
} else { |
|
81
|
|
|
return $this->repairNzb($rawFile, $nzbPath, ''); |
|
82
|
|
|
} |
|
83
|
|
|
} catch (\Throwable) { |
|
84
|
|
|
// Ignore |
|
85
|
|
|
} |
|
86
|
|
|
|
|
87
|
|
|
return null; |
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
/** |
|
91
|
|
|
* Attempt to repair a potentially broken NZB XML string. |
|
92
|
|
|
*/ |
|
93
|
|
|
public function repairNzb(string $raw, string $originalPath, string $guid): ?string |
|
94
|
|
|
{ |
|
95
|
|
|
// Remove common binary / control chars except tab, newline, carriage return |
|
96
|
|
|
$fixed = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $raw); |
|
97
|
|
|
|
|
98
|
|
|
// If missing opening <nzb ...> tag, wrap content |
|
99
|
|
|
if (! str_contains(strtolower($fixed), '<nzb')) { |
|
100
|
|
|
$fixed = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<nzb xmlns=\"http://www.newzbin.com/DTD/2003/nzb\">\n".$fixed."\n</nzb>"; |
|
101
|
|
|
} else { |
|
102
|
|
|
// Ensure closing tag |
|
103
|
|
|
if (! preg_match('/<\/nzb>\s*$/i', $fixed)) { |
|
104
|
|
|
$fixed .= "\n</nzb>"; |
|
105
|
|
|
} |
|
106
|
|
|
} |
|
107
|
|
|
|
|
108
|
|
|
// Try to parse using libxml recovery |
|
109
|
|
|
$opts = LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_COMPACT | LIBXML_NONET | LIBXML_NOCDATA | LIBXML_PARSEHUGE; |
|
110
|
|
|
$prev = libxml_use_internal_errors(true); |
|
111
|
|
|
$xml = simplexml_load_string($fixed, 'SimpleXMLElement', $opts); |
|
112
|
|
|
$errors = libxml_get_errors(); |
|
113
|
|
|
libxml_clear_errors(); |
|
114
|
|
|
libxml_use_internal_errors($prev); |
|
115
|
|
|
|
|
116
|
|
|
if ($xml === false || empty($xml->file)) { |
|
117
|
|
|
if ($this->debugMode && $this->echoCLI) { |
|
118
|
|
|
$guidInfo = $guid ? ' for GUID: '.$guid : ''; |
|
119
|
|
|
echo 'NZB repair failed'.$guidInfo.' ('.count($errors).' XML errors)'.PHP_EOL; |
|
120
|
|
|
} |
|
121
|
|
|
return null; |
|
122
|
|
|
} |
|
123
|
|
|
|
|
124
|
|
|
// Persist a repaired version if content changed |
|
125
|
|
|
try { |
|
126
|
|
|
if ($fixed !== $raw) { |
|
127
|
|
|
if (str_ends_with(strtolower($originalPath), '.gz')) { |
|
128
|
|
|
@File::put($originalPath, gzencode($fixed)); |
|
|
|
|
|
|
129
|
|
|
} else { |
|
130
|
|
|
@File::put($originalPath, $fixed); |
|
131
|
|
|
} |
|
132
|
|
|
} |
|
133
|
|
|
} catch (\Throwable $e) { |
|
134
|
|
|
if ($this->debugMode) { |
|
135
|
|
|
Log::debug('Failed to persist repaired NZB: '.$e->getMessage()); |
|
136
|
|
|
} |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
|
|
return $fixed; |
|
140
|
|
|
} |
|
141
|
|
|
|
|
142
|
|
|
/** |
|
143
|
|
|
* Process NZB contents to extract message IDs for different file types. |
|
144
|
|
|
* |
|
145
|
|
|
* @return array{ |
|
|
|
|
|
|
146
|
|
|
* hasCompressedFile: bool, |
|
147
|
|
|
* sampleMessageIDs: array, |
|
148
|
|
|
* jpgMessageIDs: array, |
|
149
|
|
|
* mediaInfoMessageID: string, |
|
150
|
|
|
* audioInfoMessageID: string, |
|
151
|
|
|
* audioInfoExtension: string, |
|
152
|
|
|
* bookFileCount: int |
|
153
|
|
|
* } |
|
154
|
|
|
*/ |
|
155
|
|
|
public function extractMessageIDs( |
|
156
|
|
|
array $nzbContents, |
|
157
|
|
|
string $groupName, |
|
158
|
|
|
int $segmentsToDownload, |
|
159
|
|
|
bool $processThumbnails, |
|
160
|
|
|
bool $processJPGSample, |
|
161
|
|
|
bool $processMediaInfo, |
|
162
|
|
|
bool $processAudioInfo, |
|
163
|
|
|
string $audioFileRegex, |
|
164
|
|
|
string $videoFileRegex, |
|
165
|
|
|
string $supportFileRegex, |
|
166
|
|
|
string $ignoreBookRegex |
|
167
|
|
|
): array { |
|
168
|
|
|
$result = [ |
|
169
|
|
|
'hasCompressedFile' => false, |
|
170
|
|
|
'sampleMessageIDs' => [], |
|
171
|
|
|
'jpgMessageIDs' => [], |
|
172
|
|
|
'mediaInfoMessageID' => '', |
|
173
|
|
|
'audioInfoMessageID' => '', |
|
174
|
|
|
'audioInfoExtension' => '', |
|
175
|
|
|
'bookFileCount' => 0, |
|
176
|
|
|
]; |
|
177
|
|
|
|
|
178
|
|
|
foreach ($nzbContents as $file) { |
|
179
|
|
|
try { |
|
180
|
|
|
$title = $file['title'] ?? ''; |
|
181
|
|
|
$segments = $file['segments'] ?? []; |
|
182
|
|
|
|
|
183
|
|
|
// Skip support/nfo files |
|
184
|
|
|
if (preg_match('/(?:'.$supportFileRegex.'|nfo\\b|inf\\b|ofn\\b)($|[ ")]|-])(?!.{20,})/i', $title)) { |
|
185
|
|
|
continue; |
|
186
|
|
|
} |
|
187
|
|
|
|
|
188
|
|
|
// Compressed file detection |
|
189
|
|
|
if (! $result['hasCompressedFile'] && preg_match( |
|
190
|
|
|
'/(\\.(part\\d+|[rz]\\d+|rar|0+|0*10?|zipr\\d{2,3}|zipx?|7z(?:\\.\\d{3})?|(?:tar\\.)?(?:gz|bz2|xz))("|\\s*\\.rar)*($|[ ")]|-])|"[a-f0-9]{32}\\.[1-9]\\d{1,2}".*\\(\\d+\\/\\d{2,}\\)$)/i', |
|
191
|
|
|
$title |
|
192
|
|
|
)) { |
|
193
|
|
|
$result['hasCompressedFile'] = true; |
|
194
|
|
|
} |
|
195
|
|
|
|
|
196
|
|
|
// Look for a video sample (not an image) |
|
197
|
|
|
if ($processThumbnails && empty($result['sampleMessageIDs']) && ! empty($segments) |
|
198
|
|
|
&& stripos($title, 'sample') !== false |
|
199
|
|
|
&& ! preg_match('/\.jpe?g$/i', $title) |
|
200
|
|
|
) { |
|
201
|
|
|
$result['sampleMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload); |
|
202
|
|
|
} |
|
203
|
|
|
|
|
204
|
|
|
// Look for a JPG picture (not a CD cover) |
|
205
|
|
|
if ($processJPGSample && empty($result['jpgMessageIDs']) && ! empty($segments) |
|
206
|
|
|
&& ! preg_match('/flac|lossless|mp3|music|inner-sanctum|sound/i', $groupName) |
|
207
|
|
|
&& preg_match('/\.jpe?g[. ")\]]/i', $title) |
|
208
|
|
|
) { |
|
209
|
|
|
$result['jpgMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload); |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
// Look for a video file for MediaInfo (sample video) |
|
213
|
|
|
if ($processMediaInfo && empty($result['mediaInfoMessageID']) && ! empty($segments[0]) |
|
214
|
|
|
&& stripos($title, 'sample') !== false |
|
215
|
|
|
&& preg_match('/'.$videoFileRegex.'[. ")\]]/i', $title) |
|
216
|
|
|
) { |
|
217
|
|
|
$result['mediaInfoMessageID'] = (string) $segments[0]; |
|
218
|
|
|
} |
|
219
|
|
|
|
|
220
|
|
|
// Look for an audio file |
|
221
|
|
|
if ($processAudioInfo && empty($result['audioInfoMessageID']) && ! empty($segments) |
|
222
|
|
|
&& preg_match('/'.$audioFileRegex.'[. ")\]]/i', $title, $type) |
|
223
|
|
|
) { |
|
224
|
|
|
$result['audioInfoExtension'] = $type[1]; |
|
225
|
|
|
$result['audioInfoMessageID'] = (string) $segments[0]; |
|
226
|
|
|
} |
|
227
|
|
|
|
|
228
|
|
|
// Count book files |
|
229
|
|
|
if (preg_match($ignoreBookRegex, $title)) { |
|
230
|
|
|
$result['bookFileCount']++; |
|
231
|
|
|
} |
|
232
|
|
|
} catch (\ErrorException $e) { |
|
233
|
|
|
Log::debug($e->getTraceAsString()); |
|
234
|
|
|
} |
|
235
|
|
|
} |
|
236
|
|
|
|
|
237
|
|
|
return $result; |
|
238
|
|
|
} |
|
239
|
|
|
|
|
240
|
|
|
/** |
|
241
|
|
|
* Extract segment message IDs up to a limit. |
|
242
|
|
|
*/ |
|
243
|
|
|
private function extractSegments(array $segments, int $limit): array |
|
244
|
|
|
{ |
|
245
|
|
|
$ids = []; |
|
246
|
|
|
$segCount = count($segments) - 1; |
|
247
|
|
|
for ($i = 0; $i < $limit; $i++) { |
|
248
|
|
|
if ($i > $segCount) { |
|
249
|
|
|
break; |
|
250
|
|
|
} |
|
251
|
|
|
$ids[] = (string) $segments[$i]; |
|
252
|
|
|
} |
|
253
|
|
|
return $ids; |
|
254
|
|
|
} |
|
255
|
|
|
|
|
256
|
|
|
/** |
|
257
|
|
|
* Get the NZB path for a GUID. |
|
258
|
|
|
*/ |
|
259
|
|
|
public function getNzbPath(string $guid): string|false |
|
260
|
|
|
{ |
|
261
|
|
|
return $this->nzb->NZBPath($guid); |
|
262
|
|
|
} |
|
263
|
|
|
} |
|
264
|
|
|
|
|
265
|
|
|
|