Passed
Push — master ( 25b802...040228 )
by Darko
10:52
created

NzbContentParser::extractMessageIDs()   F

Complexity

Conditions 26
Paths 211

Size

Total Lines 83
Code Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 39
dl 0
loc 83
rs 3.1958
c 0
b 0
f 0
cc 26
nc 211
nop 11

How to fix   Long Method    Complexity    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
namespace App\Services\AdditionalProcessing;
4
5
use Blacklight\NZB;
6
use Blacklight\utility\Utility;
7
use Illuminate\Support\Facades\File;
8
use Illuminate\Support\Facades\Log;
9
10
/**
11
 * Service for parsing NZB file contents and extracting file metadata.
12
 * Handles NZB repair, file listing, and message ID extraction.
13
 */
14
class NzbContentParser
15
{
16
    public function __construct(
17
        private readonly NZB $nzb,
18
        private readonly bool $debugMode = false,
19
        private readonly bool $echoCLI = false
20
    ) {}
21
22
    /**
23
     * Parse an NZB file and return its contents as an array of files.
24
     *
25
     * @param string $guid The release GUID to find the NZB for
26
     * @return array{contents: array, error: string|null}
27
     */
28
    public function parseNzb(string $guid): array
29
    {
30
        $nzbPath = $this->nzb->NZBPath($guid);
31
        if ($nzbPath === false) {
32
            return ['contents' => [], 'error' => 'NZB not found for GUID: '.$guid];
33
        }
34
35
        $nzbContents = Utility::unzipGzipFile($nzbPath);
36
        if (! $nzbContents) {
0 ignored issues
show
introduced by
The condition $nzbContents is always false.
Loading history...
37
            // Try repair on raw file contents
38
            $nzbContents = $this->attemptRawRepair($nzbPath);
39
            if (! $nzbContents) {
40
                return ['contents' => [], 'error' => 'NZB is empty or broken for GUID: '.$guid];
41
            }
42
        }
43
44
        // Get a list of files in the NZB
45
        $fileList = $this->nzb->nzbFileList($nzbContents, ['no-file-key' => false, 'strip-count' => true]);
46
        if (count($fileList) === 0) {
47
            // Attempt repair if initial parse yielded no files
48
            $repaired = $this->repairNzb($nzbContents, $nzbPath, $guid);
49
            if ($repaired !== null) {
50
                $fileList = $this->nzb->nzbFileList($repaired, ['no-file-key' => false, 'strip-count' => true]);
51
            }
52
            if (count($fileList) === 0) {
53
                return ['contents' => [], 'error' => 'NZB is potentially broken for GUID: '.$guid];
54
            }
55
        }
56
57
        // Sort keys naturally
58
        ksort($fileList, SORT_NATURAL);
59
60
        return ['contents' => $fileList, 'error' => null];
61
    }
62
63
    /**
64
     * Attempt to repair raw file contents before XML parsing.
65
     */
66
    private function attemptRawRepair(string $nzbPath): ?string
67
    {
68
        try {
69
            $rawFile = @File::get($nzbPath);
70
            if (! $rawFile) {
71
                return null;
72
            }
73
74
            // If gzipped, attempt decompress
75
            if (str_ends_with(strtolower($nzbPath), '.gz')) {
76
                $decompressed = @gzdecode($rawFile);
77
                if ($decompressed !== false) {
78
                    return $this->repairNzb($decompressed, $nzbPath, '');
79
                }
80
            } else {
81
                return $this->repairNzb($rawFile, $nzbPath, '');
82
            }
83
        } catch (\Throwable) {
84
            // Ignore
85
        }
86
87
        return null;
88
    }
89
90
    /**
91
     * Attempt to repair a potentially broken NZB XML string.
92
     */
93
    public function repairNzb(string $raw, string $originalPath, string $guid): ?string
94
    {
95
        // Remove common binary / control chars except tab, newline, carriage return
96
        $fixed = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $raw);
97
98
        // If missing opening <nzb ...> tag, wrap content
99
        if (! str_contains(strtolower($fixed), '<nzb')) {
100
            $fixed = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<nzb xmlns=\"http://www.newzbin.com/DTD/2003/nzb\">\n".$fixed."\n</nzb>";
101
        } else {
102
            // Ensure closing tag
103
            if (! preg_match('/<\/nzb>\s*$/i', $fixed)) {
104
                $fixed .= "\n</nzb>";
105
            }
106
        }
107
108
        // Try to parse using libxml recovery
109
        $opts = LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_COMPACT | LIBXML_NONET | LIBXML_NOCDATA | LIBXML_PARSEHUGE;
110
        $prev = libxml_use_internal_errors(true);
111
        $xml = simplexml_load_string($fixed, 'SimpleXMLElement', $opts);
112
        $errors = libxml_get_errors();
113
        libxml_clear_errors();
114
        libxml_use_internal_errors($prev);
115
116
        if ($xml === false || empty($xml->file)) {
117
            if ($this->debugMode && $this->echoCLI) {
118
                $guidInfo = $guid ? ' for GUID: '.$guid : '';
119
                echo 'NZB repair failed'.$guidInfo.' ('.count($errors).' XML errors)'.PHP_EOL;
120
            }
121
            return null;
122
        }
123
124
        // Persist a repaired version if content changed
125
        try {
126
            if ($fixed !== $raw) {
127
                if (str_ends_with(strtolower($originalPath), '.gz')) {
128
                    @File::put($originalPath, gzencode($fixed));
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for put(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

128
                    /** @scrutinizer ignore-unhandled */ @File::put($originalPath, gzencode($fixed));

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
129
                } else {
130
                    @File::put($originalPath, $fixed);
131
                }
132
            }
133
        } catch (\Throwable $e) {
134
            if ($this->debugMode) {
135
                Log::debug('Failed to persist repaired NZB: '.$e->getMessage());
136
            }
137
        }
138
139
        return $fixed;
140
    }
141
142
    /**
143
     * Process NZB contents to extract message IDs for different file types.
144
     *
145
     * @return array{
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{ at position 2 could not be parsed: the token is null at position 2.
Loading history...
146
     *     hasCompressedFile: bool,
147
     *     sampleMessageIDs: array,
148
     *     jpgMessageIDs: array,
149
     *     mediaInfoMessageID: string,
150
     *     audioInfoMessageID: string,
151
     *     audioInfoExtension: string,
152
     *     bookFileCount: int
153
     * }
154
     */
155
    public function extractMessageIDs(
156
        array $nzbContents,
157
        string $groupName,
158
        int $segmentsToDownload,
159
        bool $processThumbnails,
160
        bool $processJPGSample,
161
        bool $processMediaInfo,
162
        bool $processAudioInfo,
163
        string $audioFileRegex,
164
        string $videoFileRegex,
165
        string $supportFileRegex,
166
        string $ignoreBookRegex
167
    ): array {
168
        $result = [
169
            'hasCompressedFile' => false,
170
            'sampleMessageIDs' => [],
171
            'jpgMessageIDs' => [],
172
            'mediaInfoMessageID' => '',
173
            'audioInfoMessageID' => '',
174
            'audioInfoExtension' => '',
175
            'bookFileCount' => 0,
176
        ];
177
178
        foreach ($nzbContents as $file) {
179
            try {
180
                $title = $file['title'] ?? '';
181
                $segments = $file['segments'] ?? [];
182
183
                // Skip support/nfo files
184
                if (preg_match('/(?:'.$supportFileRegex.'|nfo\\b|inf\\b|ofn\\b)($|[ ")]|-])(?!.{20,})/i', $title)) {
185
                    continue;
186
                }
187
188
                // Compressed file detection
189
                if (! $result['hasCompressedFile'] && preg_match(
190
                    '/(\\.(part\\d+|[rz]\\d+|rar|0+|0*10?|zipr\\d{2,3}|zipx?|7z(?:\\.\\d{3})?|(?:tar\\.)?(?:gz|bz2|xz))("|\\s*\\.rar)*($|[ ")]|-])|"[a-f0-9]{32}\\.[1-9]\\d{1,2}".*\\(\\d+\\/\\d{2,}\\)$)/i',
191
                    $title
192
                )) {
193
                    $result['hasCompressedFile'] = true;
194
                }
195
196
                // Look for a video sample (not an image)
197
                if ($processThumbnails && empty($result['sampleMessageIDs']) && ! empty($segments)
198
                    && stripos($title, 'sample') !== false
199
                    && ! preg_match('/\.jpe?g$/i', $title)
200
                ) {
201
                    $result['sampleMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload);
202
                }
203
204
                // Look for a JPG picture (not a CD cover)
205
                if ($processJPGSample && empty($result['jpgMessageIDs']) && ! empty($segments)
206
                    && ! preg_match('/flac|lossless|mp3|music|inner-sanctum|sound/i', $groupName)
207
                    && preg_match('/\.jpe?g[. ")\]]/i', $title)
208
                ) {
209
                    $result['jpgMessageIDs'] = $this->extractSegments($segments, $segmentsToDownload);
210
                }
211
212
                // Look for a video file for MediaInfo (sample video)
213
                if ($processMediaInfo && empty($result['mediaInfoMessageID']) && ! empty($segments[0])
214
                    && stripos($title, 'sample') !== false
215
                    && preg_match('/'.$videoFileRegex.'[. ")\]]/i', $title)
216
                ) {
217
                    $result['mediaInfoMessageID'] = (string) $segments[0];
218
                }
219
220
                // Look for an audio file
221
                if ($processAudioInfo && empty($result['audioInfoMessageID']) && ! empty($segments)
222
                    && preg_match('/'.$audioFileRegex.'[. ")\]]/i', $title, $type)
223
                ) {
224
                    $result['audioInfoExtension'] = $type[1];
225
                    $result['audioInfoMessageID'] = (string) $segments[0];
226
                }
227
228
                // Count book files
229
                if (preg_match($ignoreBookRegex, $title)) {
230
                    $result['bookFileCount']++;
231
                }
232
            } catch (\ErrorException $e) {
233
                Log::debug($e->getTraceAsString());
234
            }
235
        }
236
237
        return $result;
238
    }
239
240
    /**
241
     * Extract segment message IDs up to a limit.
242
     */
243
    private function extractSegments(array $segments, int $limit): array
244
    {
245
        $ids = [];
246
        $segCount = count($segments) - 1;
247
        for ($i = 0; $i < $limit; $i++) {
248
            if ($i > $segCount) {
249
                break;
250
            }
251
            $ids[] = (string) $segments[$i];
252
        }
253
        return $ids;
254
    }
255
256
    /**
257
     * Get the NZB path for a GUID.
258
     */
259
    public function getNzbPath(string $guid): string|false
260
    {
261
        return $this->nzb->NZBPath($guid);
262
    }
263
}
264
265