NzbContentParser::extractMessageIDs()   F
last analyzed

Complexity

Conditions 26
Paths 211

Size

Total Lines 83
Code Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 39
dl 0
loc 83
rs 3.1958
c 0
b 0
f 0
cc 26
nc 211
nop 11

How to fix   Long Method    Complexity    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
namespace App\Services\AdditionalProcessing;
4
5
use Blacklight\NZB;
6
use Illuminate\Support\Facades\File;
7
use Illuminate\Support\Facades\Log;
8
9
/**
10
 * Service for parsing NZB file contents and extracting file metadata.
11
 * Handles NZB repair, file listing, and message ID extraction.
12
 */
13
class NzbContentParser
14
{
15
    public function __construct(
16
        private readonly NZB $nzb,
17
        private readonly bool $debugMode = false,
18
        private readonly bool $echoCLI = false
19
    ) {}
20
21
    /**
22
     * Parse an NZB file and return its contents as an array of files.
23
     *
24
     * @param string $guid The release GUID to find the NZB for
25
     * @return array{contents: array, error: string|null}
26
     */
27
    public function parseNzb(string $guid): array
28
    {
29
        $nzbPath = $this->nzb->NZBPath($guid);
30
        if ($nzbPath === false) {
31
            return ['contents' => [], 'error' => 'NZB not found for GUID: '.$guid];
32
        }
33
34
        $nzbContents = unzipGzipFile($nzbPath);
35
        if (! $nzbContents) {
0 ignored issues
show
introduced by
The condition $nzbContents is always false.
Loading history...
36
            // Try repair on raw file contents
37
            $nzbContents = $this->attemptRawRepair($nzbPath);
38
            if (! $nzbContents) {
39
                return ['contents' => [], 'error' => 'NZB is empty or broken for GUID: '.$guid];
40
            }
41
        }
42
43
        // Get a list of files in the NZB
44
        $fileList = $this->nzb->nzbFileList($nzbContents, ['no-file-key' => false, 'strip-count' => true]);
45
        if (count($fileList) === 0) {
46
            // Attempt repair if initial parse yielded no files
47
            $repaired = $this->repairNzb($nzbContents, $nzbPath, $guid);
48
            if ($repaired !== null) {
49
                $fileList = $this->nzb->nzbFileList($repaired, ['no-file-key' => false, 'strip-count' => true]);
50
            }
51
            if (count($fileList) === 0) {
52
                return ['contents' => [], 'error' => 'NZB is potentially broken for GUID: '.$guid];
53
            }
54
        }
55
56
        // Sort keys naturally
57
        ksort($fileList, SORT_NATURAL);
58
59
        return ['contents' => $fileList, 'error' => null];
60
    }
61
62
    /**
63
     * Attempt to repair raw file contents before XML parsing.
64
     */
65
    private function attemptRawRepair(string $nzbPath): ?string
66
    {
67
        try {
68
            $rawFile = @File::get($nzbPath);
69
            if (! $rawFile) {
70
                return null;
71
            }
72
73
            // If gzipped, attempt decompress
74
            if (str_ends_with(strtolower($nzbPath), '.gz')) {
75
                $decompressed = @gzdecode($rawFile);
76
                if ($decompressed !== false) {
77
                    return $this->repairNzb($decompressed, $nzbPath, '');
78
                }
79
            } else {
80
                return $this->repairNzb($rawFile, $nzbPath, '');
81
            }
82
        } catch (\Throwable) {
83
            // Ignore
84
        }
85
86
        return null;
87
    }
88
89
    /**
90
     * Attempt to repair a potentially broken NZB XML string.
91
     */
92
    public function repairNzb(string $raw, string $originalPath, string $guid): ?string
93
    {
94
        // Remove common binary / control chars except tab, newline, carriage return
95
        $fixed = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $raw);
96
97
        // If missing opening <nzb ...> tag, wrap content
98
        if (! str_contains(strtolower($fixed), '<nzb')) {
99
            $fixed = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<nzb xmlns=\"http://www.newzbin.com/DTD/2003/nzb\">\n".$fixed."\n</nzb>";
100
        } else {
101
            // Ensure closing tag
102
            if (! preg_match('/<\/nzb>\s*$/i', $fixed)) {
103
                $fixed .= "\n</nzb>";
104
            }
105
        }
106
107
        // Try to parse using libxml recovery
108
        $opts = LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_COMPACT | LIBXML_NONET | LIBXML_NOCDATA | LIBXML_PARSEHUGE;
109
        $prev = libxml_use_internal_errors(true);
110
        $xml = simplexml_load_string($fixed, 'SimpleXMLElement', $opts);
111
        $errors = libxml_get_errors();
112
        libxml_clear_errors();
113
        libxml_use_internal_errors($prev);
114
115
        if ($xml === false || empty($xml->file)) {
116
            if ($this->debugMode && $this->echoCLI) {
117
                $guidInfo = $guid ? ' for GUID: '.$guid : '';
118
                echo 'NZB repair failed'.$guidInfo.' ('.count($errors).' XML errors)'.PHP_EOL;
119
            }
120
            return null;
121
        }
122
123
        // Persist a repaired version if content changed
124
        try {
125
            if ($fixed !== $raw) {
126
                if (str_ends_with(strtolower($originalPath), '.gz')) {
127
                    @File::put($originalPath, gzencode($fixed));
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for put(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

127
                    /** @scrutinizer ignore-unhandled */ @File::put($originalPath, gzencode($fixed));

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
128
                } else {
129
                    @File::put($originalPath, $fixed);
130
                }
131
            }
132
        } catch (\Throwable $e) {
133
            if ($this->debugMode) {
134
                Log::debug('Failed to persist repaired NZB: '.$e->getMessage());
135
            }
136
        }
137
138
        return $fixed;
139
    }
140
141
    /**
142
     * Process NZB contents to extract message IDs for different file types.
143
     *
144
     * @return array{
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{ at position 2 could not be parsed: the token is null at position 2.
Loading history...
145
     *     hasCompressedFile: bool,
146
     *     sampleMessageIDs: array,
147
     *     jpgMessageIDs: array,
148
     *     mediaInfoMessageID: string,
149
     *     audioInfoMessageID: string,
150
     *     audioInfoExtension: string,
151
     *     bookFileCount: int
152
     * }
153
     */
154
    public function extractMessageIDs(
155
        array $nzbContents,
156
        string $groupName,
157
        int $segmentsToDownload,
158
        bool $processThumbnails,
159
        bool $processJPGSample,
160
        bool $processMediaInfo,
161
        bool $processAudioInfo,
162
        string $audioFileRegex,
163
        string $videoFileRegex,
164
        string $supportFileRegex,
165
        string $ignoreBookRegex
166
    ): array {
167
        $result = [
168
            'hasCompressedFile' => false,
169
            'sampleMessageIDs' => [],
170
            'jpgMessageIDs' => [],
171
            'mediaInfoMessageID' => '',
172
            'audioInfoMessageID' => '',
173
            'audioInfoExtension' => '',
174
            'bookFileCount' => 0,
175
        ];
176
177
        foreach ($nzbContents as $file) {
178
            try {
179
                $title = $file['title'] ?? '';
180
                $segments = $file['segments'] ?? [];
181
182
                // Skip support/nfo files
183
                if (preg_match('/(?:'.$supportFileRegex.'|nfo\\b|inf\\b|ofn\\b)($|[ ")]|-])(?!.{20,})/i', $title)) {
184
                    continue;
185
                }
186
187
                // Compressed file detection
188
                if (! $result['hasCompressedFile'] && preg_match(
189
                    '/(\\.(part\\d+|[rz]\\d+|rar|0+|0*10?|zipr\\d{2,3}|zipx?|7z(?:\\.\\d{3})?|(?:tar\\.)?(?:gz|bz2|xz))("|\\s*\\.rar)*($|[ ")]|-])|"[a-f0-9]{32}\\.[1-9]\\d{1,2}".*\\(\\d+\\/\\d{2,}\\)$)/i',
190
                    $title
191
                )) {
192
                    $result['hasCompressedFile'] = true;
193
                }
194
195
                // Look for a video sample (not an image)
196
                if ($processThumbnails && empty($result['sampleMessageIDs']) && ! empty($segments)
197
                    && stripos($title, 'sample') !== false
198
                    && ! preg_match('/\.jpe?g$/i', $title)
199
                ) {
200
                    $result['sampleMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload);
201
                }
202
203
                // Look for a JPG picture (not a CD cover)
204
                if ($processJPGSample && empty($result['jpgMessageIDs']) && ! empty($segments)
205
                    && ! preg_match('/flac|lossless|mp3|music|inner-sanctum|sound/i', $groupName)
206
                    && preg_match('/\.jpe?g[. ")\]]/i', $title)
207
                ) {
208
                    $result['jpgMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload);
209
                }
210
211
                // Look for a video file for MediaInfo (sample video)
212
                if ($processMediaInfo && empty($result['mediaInfoMessageID']) && ! empty($segments[0])
213
                    && stripos($title, 'sample') !== false
214
                    && preg_match('/'.$videoFileRegex.'[. ")\]]/i', $title)
215
                ) {
216
                    $result['mediaInfoMessageID'] = (string) $segments[0];
217
                }
218
219
                // Look for an audio file
220
                if ($processAudioInfo && empty($result['audioInfoMessageID']) && ! empty($segments)
221
                    && preg_match('/'.$audioFileRegex.'[. ")\]]/i', $title, $type)
222
                ) {
223
                    $result['audioInfoExtension'] = $type[1];
224
                    $result['audioInfoMessageID'] = (string) $segments[0];
225
                }
226
227
                // Count book files
228
                if (preg_match($ignoreBookRegex, $title)) {
229
                    $result['bookFileCount']++;
230
                }
231
            } catch (\ErrorException $e) {
232
                Log::debug($e->getTraceAsString());
233
            }
234
        }
235
236
        return $result;
237
    }
238
239
    /**
240
     * Extract segment message IDs up to a limit.
241
     */
242
    private function extractSegments(array $segments, int $limit): array
243
    {
244
        $ids = [];
245
        $segCount = count($segments) - 1;
246
        for ($i = 0; $i < $limit; $i++) {
247
            if ($i > $segCount) {
248
                break;
249
            }
250
            $ids[] = (string) $segments[$i];
251
        }
252
        return $ids;
253
    }
254
255
    /**
256
     * Get the NZB path for a GUID.
257
     */
258
    public function getNzbPath(string $guid): string|false
259
    {
260
        return $this->nzb->NZBPath($guid);
261
    }
262
}
263
264