NzbContentParser::repairNzb()   C
last analyzed

Complexity

Conditions 12
Paths 36

Size

Total Lines 47
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 27
dl 0
loc 47
rs 6.9666
c 0
b 0
f 0
cc 12
nc 36
nop 3

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace App\Services\AdditionalProcessing;
4
5
use Blacklight\NZB;
6
use Illuminate\Support\Facades\File;
7
use Illuminate\Support\Facades\Log;
8
9
/**
10
 * Service for parsing NZB file contents and extracting file metadata.
11
 * Handles NZB repair, file listing, and message ID extraction.
12
 */
13
class NzbContentParser
14
{
15
    public function __construct(
16
        private readonly NZB $nzb,
17
        private readonly bool $debugMode = false,
18
        private readonly bool $echoCLI = false
19
    ) {}
20
21
    /**
22
     * Parse an NZB file and return its contents as an array of files.
23
     *
24
     * @param string $guid The release GUID to find the NZB for
25
     * @return array{contents: array, error: string|null}
26
     */
27
    public function parseNzb(string $guid): array
28
    {
29
        $nzbPath = $this->nzb->NZBPath($guid);
30
        if ($nzbPath === false) {
31
            return ['contents' => [], 'error' => 'NZB not found for GUID: '.$guid];
32
        }
33
34
        $nzbContents = unzipGzipFile($nzbPath);
35
        if (! $nzbContents) {
0 ignored issues
show
introduced by
The condition $nzbContents is always false.
Loading history...
36
            // Try repair on raw file contents
37
            $nzbContents = $this->attemptRawRepair($nzbPath);
38
            if (! $nzbContents) {
39
                return ['contents' => [], 'error' => 'NZB is empty or broken for GUID: '.$guid];
40
            }
41
        }
42
43
        // Get a list of files in the NZB
44
        $fileList = $this->nzb->nzbFileList($nzbContents, ['no-file-key' => false, 'strip-count' => true]);
45
        if (count($fileList) === 0) {
46
            // Attempt repair if initial parse yielded no files
47
            $repaired = $this->repairNzb($nzbContents, $nzbPath, $guid);
48
            if ($repaired !== null) {
49
                $fileList = $this->nzb->nzbFileList($repaired, ['no-file-key' => false, 'strip-count' => true]);
50
            }
51
            if (count($fileList) === 0) {
52
                return ['contents' => [], 'error' => 'NZB is potentially broken for GUID: '.$guid];
53
            }
54
        }
55
56
        // Sort keys naturally
57
        ksort($fileList, SORT_NATURAL);
58
59
        return ['contents' => $fileList, 'error' => null];
60
    }
61
62
    /**
63
     * Attempt to repair raw file contents before XML parsing.
64
     */
65
    private function attemptRawRepair(string $nzbPath): ?string
66
    {
67
        try {
68
            $rawFile = @File::get($nzbPath);
69
            if (! $rawFile) {
70
                return null;
71
            }
72
73
            // If gzipped, attempt decompress
74
            if (str_ends_with(strtolower($nzbPath), '.gz')) {
75
                $decompressed = @gzdecode($rawFile);
76
                if ($decompressed !== false) {
77
                    return $this->repairNzb($decompressed, $nzbPath, '');
78
                }
79
            } else {
80
                return $this->repairNzb($rawFile, $nzbPath, '');
81
            }
82
        } catch (\Throwable) {
83
            // Ignore
84
        }
85
86
        return null;
87
    }
88
89
    /**
90
     * Attempt to repair a potentially broken NZB XML string.
91
     */
92
    public function repairNzb(string $raw, string $originalPath, string $guid): ?string
93
    {
94
        // Remove common binary / control chars except tab, newline, carriage return
95
        $fixed = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $raw);
96
97
        // If missing opening <nzb ...> tag, wrap content
98
        if (! str_contains(strtolower($fixed), '<nzb')) {
99
            $fixed = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<nzb xmlns=\"http://www.newzbin.com/DTD/2003/nzb\">\n".$fixed."\n</nzb>";
100
        } else {
101
            // Ensure closing tag
102
            if (! preg_match('/<\/nzb>\s*$/i', $fixed)) {
103
                $fixed .= "\n</nzb>";
104
            }
105
        }
106
107
        // Try to parse using libxml recovery
108
        $opts = LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_COMPACT | LIBXML_NONET | LIBXML_NOCDATA | LIBXML_PARSEHUGE;
109
        $prev = libxml_use_internal_errors(true);
110
        $xml = simplexml_load_string($fixed, 'SimpleXMLElement', $opts);
111
        $errors = libxml_get_errors();
112
        libxml_clear_errors();
113
        libxml_use_internal_errors($prev);
114
115
        if ($xml === false || empty($xml->file)) {
116
            if ($this->debugMode && $this->echoCLI) {
117
                $guidInfo = $guid ? ' for GUID: '.$guid : '';
118
                echo 'NZB repair failed'.$guidInfo.' ('.count($errors).' XML errors)'.PHP_EOL;
119
            }
120
            return null;
121
        }
122
123
        // Persist a repaired version if content changed
124
        try {
125
            if ($fixed !== $raw) {
126
                if (str_ends_with(strtolower($originalPath), '.gz')) {
127
                    @File::put($originalPath, gzencode($fixed));
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for put(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

127
                    /** @scrutinizer ignore-unhandled */ @File::put($originalPath, gzencode($fixed));

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
128
                } else {
129
                    @File::put($originalPath, $fixed);
130
                }
131
            }
132
        } catch (\Throwable $e) {
133
            if ($this->debugMode) {
134
                Log::debug('Failed to persist repaired NZB: '.$e->getMessage());
135
            }
136
        }
137
138
        return $fixed;
139
    }
140
141
    /**
142
     * Process NZB contents to extract message IDs for different file types.
143
     *
144
     * @return array{
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{ at position 2 could not be parsed: the token is null at position 2.
Loading history...
145
     *     hasCompressedFile: bool,
146
     *     sampleMessageIDs: array,
147
     *     jpgMessageIDs: array,
148
     *     mediaInfoMessageID: string,
149
     *     audioInfoMessageID: string,
150
     *     audioInfoExtension: string,
151
     *     bookFileCount: int
152
     * }
153
     */
154
    public function extractMessageIDs(
155
        array $nzbContents,
156
        string $groupName,
157
        int $segmentsToDownload,
158
        bool $processThumbnails,
159
        bool $processJPGSample,
160
        bool $processMediaInfo,
161
        bool $processAudioInfo,
162
        string $audioFileRegex,
163
        string $videoFileRegex,
164
        string $supportFileRegex,
165
        string $ignoreBookRegex
166
    ): array {
167
        $result = [
168
            'hasCompressedFile' => false,
169
            'sampleMessageIDs' => [],
170
            'jpgMessageIDs' => [],
171
            'mediaInfoMessageID' => '',
172
            'audioInfoMessageID' => '',
173
            'audioInfoExtension' => '',
174
            'bookFileCount' => 0,
175
        ];
176
177
        foreach ($nzbContents as $file) {
178
            try {
179
                $title = $file['title'] ?? '';
180
                $segments = $file['segments'] ?? [];
181
182
                // Skip support/nfo files
183
                if (preg_match('/(?:'.$supportFileRegex.'|nfo\\b|inf\\b|ofn\\b)($|[ ")]|-])(?!.{20,})/i', $title)) {
184
                    continue;
185
                }
186
187
                // Compressed file detection
188
                if (! $result['hasCompressedFile'] && preg_match(
189
                    '/(\\.(part\\d+|[rz]\\d+|rar|0+|0*10?|zipr\\d{2,3}|zipx?|7z(?:\\.\\d{3})?|(?:tar\\.)?(?:gz|bz2|xz))("|\\s*\\.rar)*($|[ ")]|-])|"[a-f0-9]{32}\\.[1-9]\\d{1,2}".*\\(\\d+\\/\\d{2,}\\)$)/i',
190
                    $title
191
                )) {
192
                    $result['hasCompressedFile'] = true;
193
                }
194
195
                // Look for a video sample (not an image)
196
                if ($processThumbnails && empty($result['sampleMessageIDs']) && ! empty($segments)
197
                    && stripos($title, 'sample') !== false
198
                    && ! preg_match('/\.jpe?g$/i', $title)
199
                ) {
200
                    $result['sampleMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload);
201
                }
202
203
                // Look for a JPG picture (not a CD cover)
204
                if ($processJPGSample && empty($result['jpgMessageIDs']) && ! empty($segments)
205
                    && ! preg_match('/flac|lossless|mp3|music|inner-sanctum|sound/i', $groupName)
206
                    && preg_match('/\.jpe?g[. ")\]]/i', $title)
207
                ) {
208
                    $result['jpgMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload);
209
                }
210
211
                // Look for a video file for MediaInfo (sample video)
212
                if ($processMediaInfo && empty($result['mediaInfoMessageID']) && ! empty($segments[0])
213
                    && stripos($title, 'sample') !== false
214
                    && preg_match('/'.$videoFileRegex.'[. ")\]]/i', $title)
215
                ) {
216
                    $result['mediaInfoMessageID'] = (string) $segments[0];
217
                }
218
219
                // Look for an audio file
220
                if ($processAudioInfo && empty($result['audioInfoMessageID']) && ! empty($segments)
221
                    && preg_match('/'.$audioFileRegex.'[. ")\]]/i', $title, $type)
222
                ) {
223
                    $result['audioInfoExtension'] = $type[1];
224
                    $result['audioInfoMessageID'] = (string) $segments[0];
225
                }
226
227
                // Count book files
228
                if (preg_match($ignoreBookRegex, $title)) {
229
                    $result['bookFileCount']++;
230
                }
231
            } catch (\ErrorException $e) {
232
                Log::debug($e->getTraceAsString());
233
            }
234
        }
235
236
        return $result;
237
    }
238
239
    /**
240
     * Extract segment message IDs up to a limit.
241
     */
242
    private function extractSegments(array $segments, int $limit): array
243
    {
244
        $ids = [];
245
        $segCount = count($segments) - 1;
246
        for ($i = 0; $i < $limit; $i++) {
247
            if ($i > $segCount) {
248
                break;
249
            }
250
            $ids[] = (string) $segments[$i];
251
        }
252
        return $ids;
253
    }
254
255
    /**
256
     * Get the NZB path for a GUID.
257
     */
258
    public function getNzbPath(string $guid): string|false
259
    {
260
        return $this->nzb->NZBPath($guid);
261
    }
262
}
263
264