NzbParserService::extractPartsTotal()   A
last analyzed

Complexity

Conditions 5
Paths 5

Size

Total Lines 15
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 6
c 1
b 0
f 0
dl 0
loc 15
rs 9.6111
cc 5
nc 5
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Services\Nzb;
6
7
/**
8
 * Service for parsing and extracting information from NZB file contents.
9
 * Handles parsing NZB XML structure and extracting file lists, extensions, sizes, etc.
10
 */
11
class NzbParserService
12
{
13
    /**
14
     * Retrieve various information on an NZB file (the subject, # of pars,
15
     * file extensions, file sizes, file completion, group names, # of parts).
16
     *
17
     * @param  string  $nzb  The NZB contents in a string.
18
     * @param  array  $options  Optional settings:
19
     *                          - 'no-file-key': Use numeric keys instead of subject (default: true)
20
     *                          - 'strip-count': Strip file/part count from subject for sorting (default: false)
21
     * @return array $result Empty if not an NZB or the contents of the NZB.
22
     */
23
    public function parseNzbFileList(string $nzb, array $options = []): array
24
    {
25
        $defaults = [
26
            'no-file-key' => true,
27
            'strip-count' => false,
28
        ];
29
        $options += $defaults;
30
31
        $i = 0;
32
        $result = [];
33
34
        if (! $nzb) {
35
            return $result;
36
        }
37
38
        $xml = @simplexml_load_string(str_replace("\x0F", '', $nzb));
39
        if (! $xml || strtolower($xml->getName()) !== 'nzb') {
40
            return $result;
41
        }
42
43
        foreach ($xml->file as $file) {
44
            // Subject.
45
            $title = (string) $file->attributes()->subject;
0 ignored issues
show
Bug introduced by
The method attributes() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

45
            $title = (string) $file->/** @scrutinizer ignore-call */ attributes()->subject;

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
46
47
            if ($options['no-file-key'] === false) {
48
                $i = $title;
49
                if ($options['strip-count']) {
50
                    // Strip file / part count to get proper sorting.
51
                    $i = preg_replace('#\d+[- ._]?(/|\||[o0]f)[- ._]?\d+?(?![- ._]\d)#i', '', $i);
52
                    // Change .rar and .par2 to be sorted before .part0x.rar and .volxxx+xxx.par2
53
                    if (str_contains($i, '.par2') && ! preg_match('#\.vol\d+\+\d+\.par2#i', $i)) {
54
                        $i = str_replace('.par2', '.vol0.par2', $i);
55
                    } elseif (preg_match('#\.rar[^a-z0-9]#i', $i) && ! preg_match('#\.part\d+\.rar$#i', $i)) {
56
                        $i = preg_replace('#\.rar(?:[^a-z0-9])#i', '.part0.rar', $i);
57
                    }
58
                }
59
            }
60
61
            $result[$i]['title'] = $title;
62
63
            // Extensions.
64
            if (preg_match(
65
                '/\.(\d{2,3}|7z|ace|ai7|srr|srt|sub|aiff|asc|avi|audio|bin|bz2|'
66
                .'c|cfc|cfm|chm|class|conf|cpp|cs|css|csv|cue|deb|divx|doc|dot|'
67
                .'eml|enc|exe|file|gif|gz|hlp|htm|html|image|iso|jar|java|jpeg|'
68
                .'jpg|js|lua|m|m3u|mkv|mm|mov|mp3|mp4|mpg|nfo|nzb|odc|odf|odg|odi|odp|'
69
                .'ods|odt|ogg|par2|parity|pdf|pgp|php|pl|png|ppt|ps|py|r\d{2,3}|'
70
                .'ram|rar|rb|rm|rpm|rtf|sfv|sig|sql|srs|swf|sxc|sxd|sxi|sxw|tar|'
71
                .'tex|tgz|txt|vcf|video|vsd|wav|wma|wmv|xls|xml|xpi|xvid|zip7|zip)'
72
                .'[" ](?!([\)|\-]))/i',
73
                $title,
74
                $ext
75
            )
76
            ) {
77
                if (preg_match('/\.r\d{2,3}/i', $ext[0])) {
78
                    $ext[1] = 'rar';
79
                }
80
                $result[$i]['ext'] = strtolower($ext[1]);
81
            } else {
82
                $result[$i]['ext'] = '';
83
            }
84
85
            $fileSize = $numSegments = 0;
86
87
            // Parts.
88
            if (! isset($result[$i]['segments'])) {
89
                $result[$i]['segments'] = [];
90
            }
91
92
            // File size.
93
            foreach ($file->segments->segment as $segment) {
94
                $result[$i]['segments'][] = (string) $segment;
95
                $fileSize += $segment->attributes()->bytes;
0 ignored issues
show
Bug introduced by
The method attributes() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

95
                $fileSize += $segment->/** @scrutinizer ignore-call */ attributes()->bytes;

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
96
                $numSegments++;
97
            }
98
            $result[$i]['size'] = $fileSize;
99
100
            // File completion.
101
            if (preg_match('/(\d+)\)$/', $title, $parts)) {
102
                $result[$i]['partstotal'] = $parts[1];
103
            }
104
            $result[$i]['partsactual'] = $numSegments;
105
106
            // Groups.
107
            if (! isset($result[$i]['groups'])) {
108
                $result[$i]['groups'] = [];
109
            }
110
            foreach ($file->groups->group as $g) {
111
                $result[$i]['groups'][] = (string) $g;
112
            }
113
114
            if ($options['no-file-key']) {
115
                $i++;
116
            }
117
        }
118
119
        return $result;
120
    }
121
122
    /**
123
     * Load and parse an NZB file from its contents string.
124
     *
125
     * @param  string  $nzbContents  The decompressed NZB file contents
126
     * @param  bool  $echoErrors  Whether to echo errors on parse failure
127
     * @param  string  $guid  Optional GUID for error messages
128
     * @return \SimpleXMLElement|false The parsed NZB file as SimpleXMLElement or false on failure
129
     */
130
    public function parseNzbXml(string $nzbContents, bool $echoErrors = false, string $guid = ''): \SimpleXMLElement|false
131
    {
132
        if (empty($nzbContents)) {
133
            return false;
134
        }
135
136
        // Safely parse the XML content
137
        libxml_use_internal_errors(true);
138
        $nzbFile = simplexml_load_string($nzbContents);
139
140
        if ($nzbFile === false) {
141
            if ($echoErrors) {
142
                $errors = libxml_get_errors();
143
                $errorMsg = ! empty($errors) ? ' - XML error: '.$errors[0]->message : '';
144
                echo PHP_EOL."Unable to load NZB: {$guid} appears to be an invalid NZB{$errorMsg}, skipping.".PHP_EOL;
145
                libxml_clear_errors();
146
            }
147
148
            return false;
149
        }
150
151
        return $nzbFile;
152
    }
153
154
    /**
155
     * Get the file extension from a subject line.
156
     *
157
     * @param  string  $subject  The file subject/name
158
     * @return string The detected extension, or empty string if none found
159
     */
160
    public function detectFileExtension(string $subject): string
161
    {
162
        if (preg_match(
163
            '/\.(\d{2,3}|7z|ace|ai7|srr|srt|sub|aiff|asc|avi|audio|bin|bz2|'
164
            .'c|cfc|cfm|chm|class|conf|cpp|cs|css|csv|cue|deb|divx|doc|dot|'
165
            .'eml|enc|exe|file|gif|gz|hlp|htm|html|image|iso|jar|java|jpeg|'
166
            .'jpg|js|lua|m|m3u|mkv|mm|mov|mp3|mp4|mpg|nfo|nzb|odc|odf|odg|odi|odp|'
167
            .'ods|odt|ogg|par2|parity|pdf|pgp|php|pl|png|ppt|ps|py|r\d{2,3}|'
168
            .'ram|rar|rb|rm|rpm|rtf|sfv|sig|sql|srs|swf|sxc|sxd|sxi|sxw|tar|'
169
            .'tex|tgz|txt|vcf|video|vsd|wav|wma|wmv|xls|xml|xpi|xvid|zip7|zip)'
170
            .'[" ](?!([\)|\-]))/i',
171
            $subject,
172
            $ext
173
        )
174
        ) {
175
            if (preg_match('/\.r\d{2,3}/i', $ext[0])) {
176
                return 'rar';
177
            }
178
179
            return strtolower($ext[1]);
180
        }
181
182
        return '';
183
    }
184
185
    /**
186
     * Check if a subject indicates an NFO file.
187
     *
188
     * @param  string  $subject  The file subject
189
     * @return array|false Returns array with detection info or false if not an NFO
190
     */
191
    public function detectNfoFile(string $subject): array|false
192
    {
193
        // Standard NFO extensions
194
        if (preg_match('/\.\b(nfo|diz|info?)\b(?![.-])/i', $subject)) {
195
            return ['hidden' => false, 'priority' => 1];
196
        }
197
198
        // Alternative NFO naming patterns (group-specific or obfuscated)
199
        if (preg_match('/(?:^|["\s])(?:file(?:_?id)?|readme|release|info(?:rmation)?|about|desc(?:ription)?|notes?|read\.?me|00-|000-|0-|_-_).*?\.(?:txt|nfo|diz)(?:["\s]|$)/i', $subject)) {
200
            return ['hidden' => false, 'priority' => 2];
201
        }
202
203
        return false;
204
    }
205
206
    /**
207
     * Check if a subject might indicate a hidden NFO file.
208
     *
209
     * @param  string  $subject  The file subject
210
     * @param  int  $segmentCount  The number of segments in the file
211
     * @return array|false Returns array with detection info or false if not a hidden NFO
212
     */
213
    public function detectHiddenNfoFile(string $subject, int $segmentCount): array|false
214
    {
215
        $isHiddenNfoCandidate = false;
216
217
        // Pattern 1: Single segment files with (1/1)
218
        if ($segmentCount === 1 && preg_match('/\(1\/1\)$/i', $subject)) {
219
            $isHiddenNfoCandidate = true;
220
        }
221
222
        // Pattern 2: Small segment count (1-2) with NFO-like names but no extension
223
        if (! $isHiddenNfoCandidate && $segmentCount <= 2 && preg_match('/(?:^|["\s])(?:nfo|info|readme|release|file_?id|about)(?:["\s]|$)/i', $subject)) {
224
            $isHiddenNfoCandidate = true;
225
        }
226
227
        // Pattern 3: Scene-style NFO naming (group-release.nfo without extension visible)
228
        if (! $isHiddenNfoCandidate && $segmentCount === 1 && preg_match('/^[a-z0-9._-]+["\s]*\(1\/1\)/i', $subject)) {
229
            // Check for scene-like naming pattern
230
            if (preg_match('/^[a-z0-9]+[._-][a-z0-9._-]+["\s]*\(1\/1\)/i', $subject)) {
231
                $isHiddenNfoCandidate = true;
232
            }
233
        }
234
235
        // Pattern 4: Very small files (NFOs are typically small)
236
        // Files described as very small in bytes could be NFOs
237
        if (! $isHiddenNfoCandidate && $segmentCount === 1 && preg_match('/yEnc\s*\(\d+\)\s*\[1\/1\]/i', $subject)) {
238
            $isHiddenNfoCandidate = true;
239
        }
240
241
        if (! $isHiddenNfoCandidate) {
242
            return false;
243
        }
244
245
        // Enhanced exclusion: check if it's NOT likely another common file type
246
        $excludedExtensions = '/\.(?:' .
247
            // Executables
248
            'exe|com|bat|cmd|scr|dll|msi|pkg|deb|rpm|apk|ipa|app|' .
249
            // Archives
250
            'zip|rar|[rst]\d{2}|7z|ace|tar|gz|bz2|xz|lzma|cab|iso|bin|cue|img|mdf|nrg|dmg|vhd|' .
251
            // Audio
252
            'mp3|flac|ogg|aac|wav|wma|m4a|opus|ape|wv|mpc|' .
253
            // Video
254
            'avi|mkv|mp4|mov|wmv|mpg|mpeg|ts|vob|m2ts|webm|flv|ogv|divx|xvid|' .
255
            // Images
256
            'jpg|jpeg|png|gif|bmp|tif|tiff|psd|webp|svg|ico|raw|cr2|nef|' .
257
            // Documents
258
            'pdf|doc|docx|xls|xlsx|ppt|pptx|odt|ods|odp|rtf|epub|mobi|azw|' .
259
            // Code
260
            'html|htm|css|js|php|py|java|c|cpp|h|cs|sql|json|xml|yml|yaml|' .
261
            // Data
262
            'db|dbf|mdb|accdb|sqlite|csv|' .
263
            // Verification
264
            'par2?|sfv|md5|sha1|sha256|sha512|crc|' .
265
            // Misc
266
            'url|lnk|cfg|ini|inf|sys|tmp|bak|log|srt|sub|idx|ass|ssa|vtt' .
267
            ')\b/i';
268
269
        if (preg_match($excludedExtensions, $subject)) {
270
            return false;
271
        }
272
273
        return ['hidden' => true, 'priority' => 10];
274
    }
275
276
    /**
277
     * Check if a subject indicates a PAR2 index file.
278
     *
279
     * @param  string  $subject  The file subject
280
     * @return bool True if it's a PAR2 index file
281
     */
282
    public function detectPar2IndexFile(string $subject): bool
283
    {
284
        return (bool) preg_match('/\.par2$/i', $subject);
285
    }
286
287
    /**
288
     * Calculate artificial parts from a subject line.
289
     *
290
     * @param  string  $subject  The file subject
291
     * @return int The estimated total parts, or 0 if not determinable
292
     */
293
    public function extractPartsTotal(string $subject): int
294
    {
295
        // Improve artificial parts calculation robustness (e.g., "[15/20]", "(15/20)")
296
        if (preg_match('/(?:[(\[])?(\d+)[\/)\\]](\d+)[)\]]?$/', $subject, $parts)) {
297
            if (isset($parts[2]) && (int) $parts[2] > 0) {
298
                return (int) $parts[2];
299
            }
300
        }
301
302
        // Fallback to original simple check
303
        if (preg_match('/(\d+)\)$/', $subject, $parts)) {
304
            return (int) $parts[1];
305
        }
306
307
        return 0;
308
    }
309
}
310
311