MusicCategorizer   F
last analyzed

Complexity

Total Complexity 73

Size/Duplication

Total Lines 228
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 73
eloc 101
c 1
b 0
f 0
dl 0
loc 228
rs 2.56

10 Methods

Rating   Name   Duplication   Size   Complexity  
B checkMusicVideo() 0 24 10
C checkOther() 0 29 12
B checkAudiobook() 0 23 8
A shouldSkip() 0 6 3
C checkMP3() 0 32 13
A checkForeign() 0 3 1
A getName() 0 3 1
C checkLossless() 0 33 13
B categorize() 0 30 7
A checkPodcast() 0 18 5

How to fix   Complexity   

Complex Class

Complex classes like MusicCategorizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use MusicCategorizer, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace App\Services\Categorization\Categorizers;
4
5
use App\Models\Category;
6
use App\Services\Categorization\CategorizationResult;
7
use App\Services\Categorization\ReleaseContext;
8
9
/**
10
 * Categorizer for Music content (MP3, Lossless, Video, Audiobook, Podcast).
11
 */
12
class MusicCategorizer extends AbstractCategorizer
13
{
14
    protected int $priority = 40;
15
16
    // Language patterns for foreign music
17
    protected const FOREIGN_LANGUAGES = 'arabic|brazilian|bulgarian|cantonese|chinese|croatian|czech|danish|deutsch|dutch|estonian|finnish|flemish|french|german|greek|hebrew|hungarian|icelandic|indian|iranian|italian|japanese|korean|latin|latvian|lithuanian|macedonian|mandarin|nordic|norwegian|persian|polish|portuguese|romanian|russian|serbian|slovenian|spanish|spanisch|swedish|thai|turkish|ukrainian|vietnamese';
18
19
    protected const LANGUAGE_CODES = 'ar|bg|bl|cs|cz|da|de|dk|el|es|et|fi|fr|ger|gr|heb|hr|hu|hun|is|it|ita|jp|jap|ko|kor|lt|lv|mk|nl|no|pl|pt|ro|rs|ru|se|sk|sl|sr|sv|th|tr|ua|vi|zh';
20
21
    public function getName(): string
22
    {
23
        return 'Music';
24
    }
25
26
    public function shouldSkip(ReleaseContext $context): bool
27
    {
28
        if ($context->hasAdultMarkers()) return true;
29
        // Skip TV shows (season patterns)
30
        if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) return true;
31
        return false;
32
    }
33
34
    public function categorize(ReleaseContext $context): CategorizationResult
35
    {
36
        $name = $context->releaseName;
37
38
        // Try each music category
39
        if ($result = $this->checkAudiobook($name)) {
40
            return $result;
41
        }
42
43
        if ($result = $this->checkPodcast($name)) {
44
            return $result;
45
        }
46
47
        if ($result = $this->checkMusicVideo($name, $context->categorizeForeign)) {
48
            return $result;
49
        }
50
51
        if ($result = $this->checkLossless($name, $context->categorizeForeign)) {
52
            return $result;
53
        }
54
55
        if ($result = $this->checkMP3($name, $context->categorizeForeign)) {
56
            return $result;
57
        }
58
59
        if ($result = $this->checkOther($name, $context->categorizeForeign)) {
60
            return $result;
61
        }
62
63
        return $this->noMatch();
64
    }
65
66
    protected function checkForeign(string $name): bool
67
    {
68
        return (bool) preg_match('/(?:^|[\s\.\-_])(?:' . self::FOREIGN_LANGUAGES . '|' . self::LANGUAGE_CODES . ')(?:$|[\s\.\-_])/i', $name);
69
    }
70
71
    protected function checkAudiobook(string $name): ?CategorizationResult
72
    {
73
        // Explicit audiobook indicators
74
        if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Audiobook|Audio\s*Book|Talking\s*Book|ABEE|Audible)/i', $name)) {
75
            if (preg_match('/\b(?:Unabridged|Abridged|Narrated|Narrator|MP3|M4A|M4B|AAC|Read\s+By|Tantor|Blackstone|Brilliance|GraphicAudio|Penguin|Audible)\b/i', $name) ||
76
                preg_match('/\d+\s*CDs|\d+\s*Hours|Spoken\s+Word/i', $name) ||
77
                preg_match('/\.(mp3|m4a|m4b|aac|flac|ogg|wma)$/i', $name)) {
78
                return $this->matched(Category::MUSIC_AUDIOBOOK, 0.95, 'audiobook');
79
            }
80
        }
81
82
        // Audiobook patterns
83
        if (preg_match('/(?:[\(_\[])(?:Audiobook|AB|Unabridged)(?:[\)_\]])/i', $name) ||
84
            preg_match('/Read\s+By\s+[A-Z][a-z]+\s+[A-Z][a-z]+/i', $name)) {
85
            return $this->matched(Category::MUSIC_AUDIOBOOK, 0.9, 'audiobook_pattern');
86
        }
87
88
        // Legacy pattern
89
        if (preg_match('/(Audiobook|Audio.?Book)/i', $name)) {
90
            return $this->matched(Category::MUSIC_AUDIOBOOK, 0.85, 'audiobook_legacy');
91
        }
92
93
        return null;
94
    }
95
96
    protected function checkPodcast(string $name): ?CategorizationResult
97
    {
98
        if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Podcast|Pod[._ -]?cast|Pod[._ -]Show)/i', $name)) {
99
            return $this->matched(Category::MUSIC_PODCAST, 0.9, 'podcast');
100
        }
101
102
        // Known podcast networks with episode indicators
103
        if (preg_match('/\b(?:NPR|BBC[._ -]Sounds|Gimlet|Wondery|Stitcher|iHeart[._ -]?Radio|Joe[._ -]Rogan|RadioLab|Serial)\b/i', $name) &&
104
            preg_match('/\b(?:Podcast|Episode|EP?[._ -]?\d+|Show)\b/i', $name)) {
105
            return $this->matched(Category::MUSIC_PODCAST, 0.85, 'podcast_network');
106
        }
107
108
        // Simple podcast match
109
        if (preg_match('/podcast/i', $name)) {
110
            return $this->matched(Category::MUSIC_PODCAST, 0.8, 'podcast_simple');
111
        }
112
113
        return null;
114
    }
115
116
    protected function checkMusicVideo(string $name, bool $categorizeForeign): ?CategorizationResult
117
    {
118
        // Music video indicators
119
        if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Music\s*Video|Concert|Live\s*Show|Tour|Festival|MV|MTV)|\b(?:MVID|MVid)\b/i', $name)) {
120
            if (preg_match('/\b(?:720p|1080[pi]|2160p|BDRip|BluRay|DVDRip|HDTV|WebRip|WEB-DL|x264|x265)\b/i', $name) ||
121
                preg_match('/\b(?:Live|Unplugged|Acoustic|World\s*Tour|in\s*Concert|Official\s*Video|Bootleg|Remastered)\b/i', $name) ||
122
                preg_match('/\.(mkv|mp4|avi|ts|m2ts|mpg|mpeg|mov|wmv|vob|m4v)$/i', $name)) {
123
124
                if ($categorizeForeign && $this->checkForeign($name)) {
125
                    return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'music_video_foreign');
126
                }
127
                return $this->matched(Category::MUSIC_VIDEO, 0.9, 'music_video');
128
            }
129
        }
130
131
        // Artist-title pattern with video format
132
        if (preg_match('/^[A-Z0-9][A-Za-z0-9\.\s\&\'\(\)\-]+\s+\-\s+[A-Z0-9][A-Za-z0-9\.\s\&\'\(\)\-]+.*?\b(720p|1080[pi]|2160p|Bluray|x264|x265)\b/i', $name)) {
133
            if ($categorizeForeign && $this->checkForeign($name)) {
134
                return $this->matched(Category::MUSIC_FOREIGN, 0.8, 'music_video_foreign');
135
            }
136
            return $this->matched(Category::MUSIC_VIDEO, 0.8, 'music_video_artist');
137
        }
138
139
        return null;
140
    }
141
142
    protected function checkLossless(string $name, bool $categorizeForeign): ?CategorizationResult
143
    {
144
        // Lossless format indicators
145
        if (preg_match('/(?:^|[^a-zA-Z0-9])(?:FLAC|APE|WAV|ALAC|DSD|DSF|AIFF|PCM|Lossless)|\b(?:FLAC|APE|WAV|ALAC|DSD|DSF|AIFF|PCM)\b/i', $name)) {
146
            if (preg_match('/\b(?:24[Bb]it|96kHz|192kHz|Hi[- ]?Res|HD[- ]?Tracks|Vinyl[- ]?Rip|CD[- ]?Rip|WEB[- ]?Rip|HDtracks|Qobuz|Tidal|MQA|SACD)\b/i', $name) ||
147
                preg_match('/\.(flac|ape|wav|aiff|dsf|dff|m4a|tak)$/i', $name)) {
148
149
                if ($categorizeForeign && $this->checkForeign($name)) {
150
                    return $this->matched(Category::MUSIC_FOREIGN, 0.9, 'lossless_foreign');
151
                }
152
                return $this->matched(Category::MUSIC_LOSSLESS, 0.9, 'lossless');
153
            }
154
        }
155
156
        // FLAC patterns
157
        if (preg_match('/\[(19|20)\d\d\][._ -]\[FLAC\]|([\(\[])flac([\)\]])|FLAC\-(19|20)\d\d\-[a-z0-9]{1,12}|\.flac"|(19|20)\d\d\sFLAC|[._ -]FLAC.+(19|20)\d\d[._ -]| FLAC$/i', $name) ||
158
            preg_match('/\d{3,4}kbps[._ -]FLAC|\[FLAC\]|\(FLAC\)|FLACME|FLAC[._ -]\d{3,4}(kbps)?|WEB[._ -]FLAC/i', $name)) {
159
160
            if ($categorizeForeign && $this->checkForeign($name)) {
161
                return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'flac_foreign');
162
            }
163
            return $this->matched(Category::MUSIC_LOSSLESS, 0.85, 'flac');
164
        }
165
166
        // Other lossless formats
167
        if (preg_match('/\b(?:APE|Monkey\'s[._ -]Audio|WavPack|WV|TAK|TTA|ALAC|Apple[._ -]Lossless)\b|\.(ape|wv|tak|tta)$/i', $name)) {
168
            if ($categorizeForeign && $this->checkForeign($name)) {
169
                return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'lossless_format_foreign');
170
            }
171
            return $this->matched(Category::MUSIC_LOSSLESS, 0.85, 'lossless_format');
172
        }
173
174
        return null;
175
    }
176
177
    protected function checkMP3(string $name, bool $categorizeForeign): ?CategorizationResult
178
    {
179
        // MP3 indicators
180
        if (preg_match('/(?:^|[^a-zA-Z0-9])(?:MP3|320kbps|256kbps|192kbps|128kbps|CBR|VBR)|\b(?:MP3)\b|[\._-](?:MP3)[\._-]|\.mp3$/i', $name)) {
181
            if (preg_match('/\b(?:320|256|192|128)[._-]?kbps|\b(?:320|256|192|128)[._-]?K|\((?:320|256|192|128)\)|\[(?:320|256|192|128)\]|V0|V2|VBR/i', $name) ||
182
                preg_match('/\b(?:CD[._-]?Rip|Web[._-]?Rip|WEB|iTunes|AmazonRip|Spotify[._-]?Rip|MP3\s*\-\s*\d{3}kbps)\b/i', $name) ||
183
                preg_match('/\.(m3u|mp3)"|rip(?:192|256|320)|[._-]FM[._-].+MP3/i', $name)) {
184
185
                if ($categorizeForeign && $this->checkForeign($name)) {
186
                    return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'mp3_foreign');
187
                }
188
                return $this->matched(Category::MUSIC_MP3, 0.85, 'mp3');
189
            }
190
        }
191
192
        // MP3 scene patterns
193
        if (preg_match('/^[a-zA-Z0-9]{1,12}[._-](19|20)\d\d[._-][a-zA-Z0-9]{1,12}$|[a-z0-9]{1,12}\-(19|20)\d\d\-[a-z0-9]{1,12}/i', $name)) {
194
            if ($categorizeForeign && $this->checkForeign($name)) {
195
                return $this->matched(Category::MUSIC_FOREIGN, 0.75, 'mp3_scene_foreign');
196
            }
197
            return $this->matched(Category::MUSIC_MP3, 0.75, 'mp3_scene');
198
        }
199
200
        // Bitrate patterns
201
        if (preg_match('/[\.\-\(\[_ ]\d{2,3}k[\.\-\)\]_ ]|\((192|256|320)\)|(320|cd|eac|vbr)[._-]+mp3|(cd|eac|mp3|vbr)[._-]+320/i', $name)) {
202
            if ($categorizeForeign && $this->checkForeign($name)) {
203
                return $this->matched(Category::MUSIC_FOREIGN, 0.8, 'mp3_bitrate_foreign');
204
            }
205
            return $this->matched(Category::MUSIC_MP3, 0.8, 'mp3_bitrate');
206
        }
207
208
        return null;
209
    }
210
211
    protected function checkOther(string $name, bool $categorizeForeign): ?CategorizationResult
212
    {
213
        // Compilation and VA indicators
214
        if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Compilation|Various[._ -]Artists|OST|Soundtrack|B-Sides|Greatest[._ -]Hits|Anthology)|\b(?:VA|V\.A|Bonus[._ -]Track|Discography|Box[._ -]Set)\b/i', $name)) {
215
            if ($categorizeForeign && $this->checkForeign($name)) {
216
                return $this->matched(Category::MUSIC_FOREIGN, 0.8, 'music_other_foreign');
217
            }
218
            return $this->matched(Category::MUSIC_OTHER, 0.8, 'music_other');
219
        }
220
221
        // Album/CD patterns
222
        if (preg_match('/(?:\d)[._ -](?:CD|Albums|LP)[._ -](?:Set|Compilation)|CD[._ -](Collection|Box|SET)|(\d)-?CD[._ -]/i', $name) ||
223
            preg_match('/Vinyl[._ -](?:24[._ -]96|2496|Collection|RIP)|WEB[._ -](?:Single|Album)|EP[._ -]\d{4}|\bEP\b.+(?:19|20)\d\d|Live[._ -](?:at|At|@)/i', $name)) {
224
            if ($categorizeForeign && $this->checkForeign($name)) {
225
                return $this->matched(Category::MUSIC_FOREIGN, 0.75, 'music_album_foreign');
226
            }
227
            return $this->matched(Category::MUSIC_OTHER, 0.75, 'music_album');
228
        }
229
230
        // DJ mixes and labels
231
        if (preg_match('/\b(?:Ministry[._ -]of[._ -]Sound|Hed[._ -]Kandi|Cream|Fabric[._ -]Live|Ultra[._ -]Music)\b/i', $name) ||
232
            preg_match('/\b(?:DJ[._ -]Mix|Mixed[._ -]By|Tiesto[._ -]Club|Radio[._ -]Show|Club[._ -]Hits)\b/i', $name)) {
233
            if ($categorizeForeign && $this->checkForeign($name)) {
234
                return $this->matched(Category::MUSIC_FOREIGN, 0.75, 'music_dj_foreign');
235
            }
236
            return $this->matched(Category::MUSIC_OTHER, 0.75, 'music_dj');
237
        }
238
239
        return null;
240
    }
241
}
242
243