| Total Complexity | 73 |
| Total Lines | 228 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like MusicCategorizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use MusicCategorizer, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 12 | class MusicCategorizer extends AbstractCategorizer |
||
| 13 | { |
||
| 14 | protected int $priority = 40; |
||
| 15 | |||
| 16 | // Language patterns for foreign music |
||
| 17 | protected const FOREIGN_LANGUAGES = 'arabic|brazilian|bulgarian|cantonese|chinese|croatian|czech|danish|deutsch|dutch|estonian|finnish|flemish|french|german|greek|hebrew|hungarian|icelandic|indian|iranian|italian|japanese|korean|latin|latvian|lithuanian|macedonian|mandarin|nordic|norwegian|persian|polish|portuguese|romanian|russian|serbian|slovenian|spanish|spanisch|swedish|thai|turkish|ukrainian|vietnamese'; |
||
| 18 | |||
| 19 | protected const LANGUAGE_CODES = 'ar|bg|bl|cs|cz|da|de|dk|el|es|et|fi|fr|ger|gr|heb|hr|hu|hun|is|it|ita|jp|jap|ko|kor|lt|lv|mk|nl|no|pl|pt|ro|rs|ru|se|sk|sl|sr|sv|th|tr|ua|vi|zh'; |
||
| 20 | |||
| 21 | public function getName(): string |
||
| 22 | { |
||
| 23 | return 'Music'; |
||
| 24 | } |
||
| 25 | |||
| 26 | public function shouldSkip(ReleaseContext $context): bool |
||
| 27 | { |
||
| 28 | if ($context->hasAdultMarkers()) return true; |
||
| 29 | // Skip TV shows (season patterns) |
||
| 30 | if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) return true; |
||
| 31 | return false; |
||
| 32 | } |
||
| 33 | |||
| 34 | public function categorize(ReleaseContext $context): CategorizationResult |
||
| 35 | { |
||
| 36 | $name = $context->releaseName; |
||
| 37 | |||
| 38 | // Try each music category |
||
| 39 | if ($result = $this->checkAudiobook($name)) { |
||
| 40 | return $result; |
||
| 41 | } |
||
| 42 | |||
| 43 | if ($result = $this->checkPodcast($name)) { |
||
| 44 | return $result; |
||
| 45 | } |
||
| 46 | |||
| 47 | if ($result = $this->checkMusicVideo($name, $context->categorizeForeign)) { |
||
| 48 | return $result; |
||
| 49 | } |
||
| 50 | |||
| 51 | if ($result = $this->checkLossless($name, $context->categorizeForeign)) { |
||
| 52 | return $result; |
||
| 53 | } |
||
| 54 | |||
| 55 | if ($result = $this->checkMP3($name, $context->categorizeForeign)) { |
||
| 56 | return $result; |
||
| 57 | } |
||
| 58 | |||
| 59 | if ($result = $this->checkOther($name, $context->categorizeForeign)) { |
||
| 60 | return $result; |
||
| 61 | } |
||
| 62 | |||
| 63 | return $this->noMatch(); |
||
| 64 | } |
||
| 65 | |||
| 66 | protected function checkForeign(string $name): bool |
||
| 67 | { |
||
| 68 | return (bool) preg_match('/(?:^|[\s\.\-_])(?:' . self::FOREIGN_LANGUAGES . '|' . self::LANGUAGE_CODES . ')(?:$|[\s\.\-_])/i', $name); |
||
| 69 | } |
||
| 70 | |||
| 71 | protected function checkAudiobook(string $name): ?CategorizationResult |
||
| 72 | { |
||
| 73 | // Explicit audiobook indicators |
||
| 74 | if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Audiobook|Audio\s*Book|Talking\s*Book|ABEE|Audible)/i', $name)) { |
||
| 75 | if (preg_match('/\b(?:Unabridged|Abridged|Narrated|Narrator|MP3|M4A|M4B|AAC|Read\s+By|Tantor|Blackstone|Brilliance|GraphicAudio|Penguin|Audible)\b/i', $name) || |
||
| 76 | preg_match('/\d+\s*CDs|\d+\s*Hours|Spoken\s+Word/i', $name) || |
||
| 77 | preg_match('/\.(mp3|m4a|m4b|aac|flac|ogg|wma)$/i', $name)) { |
||
| 78 | return $this->matched(Category::MUSIC_AUDIOBOOK, 0.95, 'audiobook'); |
||
| 79 | } |
||
| 80 | } |
||
| 81 | |||
| 82 | // Audiobook patterns |
||
| 83 | if (preg_match('/(?:[\(_\[])(?:Audiobook|AB|Unabridged)(?:[\)_\]])/i', $name) || |
||
| 84 | preg_match('/Read\s+By\s+[A-Z][a-z]+\s+[A-Z][a-z]+/i', $name)) { |
||
| 85 | return $this->matched(Category::MUSIC_AUDIOBOOK, 0.9, 'audiobook_pattern'); |
||
| 86 | } |
||
| 87 | |||
| 88 | // Legacy pattern |
||
| 89 | if (preg_match('/(Audiobook|Audio.?Book)/i', $name)) { |
||
| 90 | return $this->matched(Category::MUSIC_AUDIOBOOK, 0.85, 'audiobook_legacy'); |
||
| 91 | } |
||
| 92 | |||
| 93 | return null; |
||
| 94 | } |
||
| 95 | |||
| 96 | protected function checkPodcast(string $name): ?CategorizationResult |
||
| 97 | { |
||
| 98 | if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Podcast|Pod[._ -]?cast|Pod[._ -]Show)/i', $name)) { |
||
| 99 | return $this->matched(Category::MUSIC_PODCAST, 0.9, 'podcast'); |
||
| 100 | } |
||
| 101 | |||
| 102 | // Known podcast networks with episode indicators |
||
| 103 | if (preg_match('/\b(?:NPR|BBC[._ -]Sounds|Gimlet|Wondery|Stitcher|iHeart[._ -]?Radio|Joe[._ -]Rogan|RadioLab|Serial)\b/i', $name) && |
||
| 104 | preg_match('/\b(?:Podcast|Episode|EP?[._ -]?\d+|Show)\b/i', $name)) { |
||
| 105 | return $this->matched(Category::MUSIC_PODCAST, 0.85, 'podcast_network'); |
||
| 106 | } |
||
| 107 | |||
| 108 | // Simple podcast match |
||
| 109 | if (preg_match('/podcast/i', $name)) { |
||
| 110 | return $this->matched(Category::MUSIC_PODCAST, 0.8, 'podcast_simple'); |
||
| 111 | } |
||
| 112 | |||
| 113 | return null; |
||
| 114 | } |
||
| 115 | |||
| 116 | protected function checkMusicVideo(string $name, bool $categorizeForeign): ?CategorizationResult |
||
| 140 | } |
||
| 141 | |||
| 142 | protected function checkLossless(string $name, bool $categorizeForeign): ?CategorizationResult |
||
| 143 | { |
||
| 144 | // Lossless format indicators |
||
| 145 | if (preg_match('/(?:^|[^a-zA-Z0-9])(?:FLAC|APE|WAV|ALAC|DSD|DSF|AIFF|PCM|Lossless)|\b(?:FLAC|APE|WAV|ALAC|DSD|DSF|AIFF|PCM)\b/i', $name)) { |
||
| 146 | if (preg_match('/\b(?:24[Bb]it|96kHz|192kHz|Hi[- ]?Res|HD[- ]?Tracks|Vinyl[- ]?Rip|CD[- ]?Rip|WEB[- ]?Rip|HDtracks|Qobuz|Tidal|MQA|SACD)\b/i', $name) || |
||
| 147 | preg_match('/\.(flac|ape|wav|aiff|dsf|dff|m4a|tak)$/i', $name)) { |
||
| 148 | |||
| 149 | if ($categorizeForeign && $this->checkForeign($name)) { |
||
| 150 | return $this->matched(Category::MUSIC_FOREIGN, 0.9, 'lossless_foreign'); |
||
| 151 | } |
||
| 152 | return $this->matched(Category::MUSIC_LOSSLESS, 0.9, 'lossless'); |
||
| 153 | } |
||
| 154 | } |
||
| 155 | |||
| 156 | // FLAC patterns |
||
| 157 | if (preg_match('/\[(19|20)\d\d\][._ -]\[FLAC\]|([\(\[])flac([\)\]])|FLAC\-(19|20)\d\d\-[a-z0-9]{1,12}|\.flac"|(19|20)\d\d\sFLAC|[._ -]FLAC.+(19|20)\d\d[._ -]| FLAC$/i', $name) || |
||
| 158 | preg_match('/\d{3,4}kbps[._ -]FLAC|\[FLAC\]|\(FLAC\)|FLACME|FLAC[._ -]\d{3,4}(kbps)?|WEB[._ -]FLAC/i', $name)) { |
||
| 159 | |||
| 160 | if ($categorizeForeign && $this->checkForeign($name)) { |
||
| 161 | return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'flac_foreign'); |
||
| 162 | } |
||
| 163 | return $this->matched(Category::MUSIC_LOSSLESS, 0.85, 'flac'); |
||
| 164 | } |
||
| 165 | |||
| 166 | // Other lossless formats |
||
| 167 | if (preg_match('/\b(?:APE|Monkey\'s[._ -]Audio|WavPack|WV|TAK|TTA|ALAC|Apple[._ -]Lossless)\b|\.(ape|wv|tak|tta)$/i', $name)) { |
||
| 168 | if ($categorizeForeign && $this->checkForeign($name)) { |
||
| 169 | return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'lossless_format_foreign'); |
||
| 170 | } |
||
| 171 | return $this->matched(Category::MUSIC_LOSSLESS, 0.85, 'lossless_format'); |
||
| 172 | } |
||
| 173 | |||
| 174 | return null; |
||
| 175 | } |
||
| 176 | |||
| 177 | protected function checkMP3(string $name, bool $categorizeForeign): ?CategorizationResult |
||
| 178 | { |
||
| 179 | // MP3 indicators |
||
| 180 | if (preg_match('/(?:^|[^a-zA-Z0-9])(?:MP3|320kbps|256kbps|192kbps|128kbps|CBR|VBR)|\b(?:MP3)\b|[\._-](?:MP3)[\._-]|\.mp3$/i', $name)) { |
||
| 181 | if (preg_match('/\b(?:320|256|192|128)[._-]?kbps|\b(?:320|256|192|128)[._-]?K|\((?:320|256|192|128)\)|\[(?:320|256|192|128)\]|V0|V2|VBR/i', $name) || |
||
| 182 | preg_match('/\b(?:CD[._-]?Rip|Web[._-]?Rip|WEB|iTunes|AmazonRip|Spotify[._-]?Rip|MP3\s*\-\s*\d{3}kbps)\b/i', $name) || |
||
| 183 | preg_match('/\.(m3u|mp3)"|rip(?:192|256|320)|[._-]FM[._-].+MP3/i', $name)) { |
||
| 184 | |||
| 185 | if ($categorizeForeign && $this->checkForeign($name)) { |
||
| 186 | return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'mp3_foreign'); |
||
| 187 | } |
||
| 188 | return $this->matched(Category::MUSIC_MP3, 0.85, 'mp3'); |
||
| 189 | } |
||
| 190 | } |
||
| 191 | |||
| 192 | // MP3 scene patterns |
||
| 193 | if (preg_match('/^[a-zA-Z0-9]{1,12}[._-](19|20)\d\d[._-][a-zA-Z0-9]{1,12}$|[a-z0-9]{1,12}\-(19|20)\d\d\-[a-z0-9]{1,12}/i', $name)) { |
||
| 194 | if ($categorizeForeign && $this->checkForeign($name)) { |
||
| 195 | return $this->matched(Category::MUSIC_FOREIGN, 0.75, 'mp3_scene_foreign'); |
||
| 196 | } |
||
| 197 | return $this->matched(Category::MUSIC_MP3, 0.75, 'mp3_scene'); |
||
| 198 | } |
||
| 199 | |||
| 200 | // Bitrate patterns |
||
| 201 | if (preg_match('/[\.\-\(\[_ ]\d{2,3}k[\.\-\)\]_ ]|\((192|256|320)\)|(320|cd|eac|vbr)[._-]+mp3|(cd|eac|mp3|vbr)[._-]+320/i', $name)) { |
||
| 202 | if ($categorizeForeign && $this->checkForeign($name)) { |
||
| 203 | return $this->matched(Category::MUSIC_FOREIGN, 0.8, 'mp3_bitrate_foreign'); |
||
| 204 | } |
||
| 205 | return $this->matched(Category::MUSIC_MP3, 0.8, 'mp3_bitrate'); |
||
| 206 | } |
||
| 207 | |||
| 208 | return null; |
||
| 209 | } |
||
| 210 | |||
| 211 | protected function checkOther(string $name, bool $categorizeForeign): ?CategorizationResult |
||
| 240 | } |
||
| 241 | } |
||
| 242 | |||
| 243 |