|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace App\Services\Categorization\Categorizers; |
|
4
|
|
|
|
|
5
|
|
|
use App\Models\Category; |
|
6
|
|
|
use App\Services\Categorization\CategorizationResult; |
|
7
|
|
|
use App\Services\Categorization\ReleaseContext; |
|
8
|
|
|
|
|
9
|
|
|
/** |
|
10
|
|
|
* Categorizer for Music content (MP3, Lossless, Video, Audiobook, Podcast). |
|
11
|
|
|
*/ |
|
12
|
|
|
class MusicCategorizer extends AbstractCategorizer |
|
13
|
|
|
{ |
|
14
|
|
|
protected int $priority = 40; |
|
15
|
|
|
|
|
16
|
|
|
// Language patterns for foreign music |
|
17
|
|
|
protected const FOREIGN_LANGUAGES = 'arabic|brazilian|bulgarian|cantonese|chinese|croatian|czech|danish|deutsch|dutch|estonian|finnish|flemish|french|german|greek|hebrew|hungarian|icelandic|indian|iranian|italian|japanese|korean|latin|latvian|lithuanian|macedonian|mandarin|nordic|norwegian|persian|polish|portuguese|romanian|russian|serbian|slovenian|spanish|spanisch|swedish|thai|turkish|ukrainian|vietnamese'; |
|
18
|
|
|
|
|
19
|
|
|
protected const LANGUAGE_CODES = 'ar|bg|bl|cs|cz|da|de|dk|el|es|et|fi|fr|ger|gr|heb|hr|hu|hun|is|it|ita|jp|jap|ko|kor|lt|lv|mk|nl|no|pl|pt|ro|rs|ru|se|sk|sl|sr|sv|th|tr|ua|vi|zh'; |
|
20
|
|
|
|
|
21
|
|
|
public function getName(): string |
|
22
|
|
|
{ |
|
23
|
|
|
return 'Music'; |
|
24
|
|
|
} |
|
25
|
|
|
|
|
26
|
|
|
public function shouldSkip(ReleaseContext $context): bool |
|
27
|
|
|
{ |
|
28
|
|
|
if ($context->hasAdultMarkers()) return true; |
|
29
|
|
|
// Skip TV shows (season patterns) |
|
30
|
|
|
if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) return true; |
|
31
|
|
|
return false; |
|
32
|
|
|
} |
|
33
|
|
|
|
|
34
|
|
|
public function categorize(ReleaseContext $context): CategorizationResult |
|
35
|
|
|
{ |
|
36
|
|
|
$name = $context->releaseName; |
|
37
|
|
|
|
|
38
|
|
|
// Try each music category |
|
39
|
|
|
if ($result = $this->checkAudiobook($name)) { |
|
40
|
|
|
return $result; |
|
41
|
|
|
} |
|
42
|
|
|
|
|
43
|
|
|
if ($result = $this->checkPodcast($name)) { |
|
44
|
|
|
return $result; |
|
45
|
|
|
} |
|
46
|
|
|
|
|
47
|
|
|
if ($result = $this->checkMusicVideo($name, $context->categorizeForeign)) { |
|
48
|
|
|
return $result; |
|
49
|
|
|
} |
|
50
|
|
|
|
|
51
|
|
|
if ($result = $this->checkLossless($name, $context->categorizeForeign)) { |
|
52
|
|
|
return $result; |
|
53
|
|
|
} |
|
54
|
|
|
|
|
55
|
|
|
if ($result = $this->checkMP3($name, $context->categorizeForeign)) { |
|
56
|
|
|
return $result; |
|
57
|
|
|
} |
|
58
|
|
|
|
|
59
|
|
|
if ($result = $this->checkOther($name, $context->categorizeForeign)) { |
|
60
|
|
|
return $result; |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
|
|
return $this->noMatch(); |
|
64
|
|
|
} |
|
65
|
|
|
|
|
66
|
|
|
protected function checkForeign(string $name): bool |
|
67
|
|
|
{ |
|
68
|
|
|
return (bool) preg_match('/(?:^|[\s\.\-_])(?:' . self::FOREIGN_LANGUAGES . '|' . self::LANGUAGE_CODES . ')(?:$|[\s\.\-_])/i', $name); |
|
69
|
|
|
} |
|
70
|
|
|
|
|
71
|
|
|
protected function checkAudiobook(string $name): ?CategorizationResult |
|
72
|
|
|
{ |
|
73
|
|
|
// Explicit audiobook indicators |
|
74
|
|
|
if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Audiobook|Audio\s*Book|Talking\s*Book|ABEE|Audible)/i', $name)) { |
|
75
|
|
|
if (preg_match('/\b(?:Unabridged|Abridged|Narrated|Narrator|MP3|M4A|M4B|AAC|Read\s+By|Tantor|Blackstone|Brilliance|GraphicAudio|Penguin|Audible)\b/i', $name) || |
|
76
|
|
|
preg_match('/\d+\s*CDs|\d+\s*Hours|Spoken\s+Word/i', $name) || |
|
77
|
|
|
preg_match('/\.(mp3|m4a|m4b|aac|flac|ogg|wma)$/i', $name)) { |
|
78
|
|
|
return $this->matched(Category::MUSIC_AUDIOBOOK, 0.95, 'audiobook'); |
|
79
|
|
|
} |
|
80
|
|
|
} |
|
81
|
|
|
|
|
82
|
|
|
// Audiobook patterns |
|
83
|
|
|
if (preg_match('/(?:[\(_\[])(?:Audiobook|AB|Unabridged)(?:[\)_\]])/i', $name) || |
|
84
|
|
|
preg_match('/Read\s+By\s+[A-Z][a-z]+\s+[A-Z][a-z]+/i', $name)) { |
|
85
|
|
|
return $this->matched(Category::MUSIC_AUDIOBOOK, 0.9, 'audiobook_pattern'); |
|
86
|
|
|
} |
|
87
|
|
|
|
|
88
|
|
|
// Legacy pattern |
|
89
|
|
|
if (preg_match('/(Audiobook|Audio.?Book)/i', $name)) { |
|
90
|
|
|
return $this->matched(Category::MUSIC_AUDIOBOOK, 0.85, 'audiobook_legacy'); |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
return null; |
|
94
|
|
|
} |
|
95
|
|
|
|
|
96
|
|
|
protected function checkPodcast(string $name): ?CategorizationResult |
|
97
|
|
|
{ |
|
98
|
|
|
if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Podcast|Pod[._ -]?cast|Pod[._ -]Show)/i', $name)) { |
|
99
|
|
|
return $this->matched(Category::MUSIC_PODCAST, 0.9, 'podcast'); |
|
100
|
|
|
} |
|
101
|
|
|
|
|
102
|
|
|
// Known podcast networks with episode indicators |
|
103
|
|
|
if (preg_match('/\b(?:NPR|BBC[._ -]Sounds|Gimlet|Wondery|Stitcher|iHeart[._ -]?Radio|Joe[._ -]Rogan|RadioLab|Serial)\b/i', $name) && |
|
104
|
|
|
preg_match('/\b(?:Podcast|Episode|EP?[._ -]?\d+|Show)\b/i', $name)) { |
|
105
|
|
|
return $this->matched(Category::MUSIC_PODCAST, 0.85, 'podcast_network'); |
|
106
|
|
|
} |
|
107
|
|
|
|
|
108
|
|
|
// Simple podcast match |
|
109
|
|
|
if (preg_match('/podcast/i', $name)) { |
|
110
|
|
|
return $this->matched(Category::MUSIC_PODCAST, 0.8, 'podcast_simple'); |
|
111
|
|
|
} |
|
112
|
|
|
|
|
113
|
|
|
return null; |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
|
|
protected function checkMusicVideo(string $name, bool $categorizeForeign): ?CategorizationResult |
|
117
|
|
|
{ |
|
118
|
|
|
// Music video indicators |
|
119
|
|
|
if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Music\s*Video|Concert|Live\s*Show|Tour|Festival|MV|MTV)|\b(?:MVID|MVid)\b/i', $name)) { |
|
120
|
|
|
if (preg_match('/\b(?:720p|1080[pi]|2160p|BDRip|BluRay|DVDRip|HDTV|WebRip|WEB-DL|x264|x265)\b/i', $name) || |
|
121
|
|
|
preg_match('/\b(?:Live|Unplugged|Acoustic|World\s*Tour|in\s*Concert|Official\s*Video|Bootleg|Remastered)\b/i', $name) || |
|
122
|
|
|
preg_match('/\.(mkv|mp4|avi|ts|m2ts|mpg|mpeg|mov|wmv|vob|m4v)$/i', $name)) { |
|
123
|
|
|
|
|
124
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
125
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'music_video_foreign'); |
|
126
|
|
|
} |
|
127
|
|
|
return $this->matched(Category::MUSIC_VIDEO, 0.9, 'music_video'); |
|
128
|
|
|
} |
|
129
|
|
|
} |
|
130
|
|
|
|
|
131
|
|
|
// Artist-title pattern with video format |
|
132
|
|
|
if (preg_match('/^[A-Z0-9][A-Za-z0-9\.\s\&\'\(\)\-]+\s+\-\s+[A-Z0-9][A-Za-z0-9\.\s\&\'\(\)\-]+.*?\b(720p|1080[pi]|2160p|Bluray|x264|x265)\b/i', $name)) { |
|
133
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
134
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.8, 'music_video_foreign'); |
|
135
|
|
|
} |
|
136
|
|
|
return $this->matched(Category::MUSIC_VIDEO, 0.8, 'music_video_artist'); |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
|
|
return null; |
|
140
|
|
|
} |
|
141
|
|
|
|
|
142
|
|
|
protected function checkLossless(string $name, bool $categorizeForeign): ?CategorizationResult |
|
143
|
|
|
{ |
|
144
|
|
|
// Lossless format indicators |
|
145
|
|
|
if (preg_match('/(?:^|[^a-zA-Z0-9])(?:FLAC|APE|WAV|ALAC|DSD|DSF|AIFF|PCM|Lossless)|\b(?:FLAC|APE|WAV|ALAC|DSD|DSF|AIFF|PCM)\b/i', $name)) { |
|
146
|
|
|
if (preg_match('/\b(?:24[Bb]it|96kHz|192kHz|Hi[- ]?Res|HD[- ]?Tracks|Vinyl[- ]?Rip|CD[- ]?Rip|WEB[- ]?Rip|HDtracks|Qobuz|Tidal|MQA|SACD)\b/i', $name) || |
|
147
|
|
|
preg_match('/\.(flac|ape|wav|aiff|dsf|dff|m4a|tak)$/i', $name)) { |
|
148
|
|
|
|
|
149
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
150
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.9, 'lossless_foreign'); |
|
151
|
|
|
} |
|
152
|
|
|
return $this->matched(Category::MUSIC_LOSSLESS, 0.9, 'lossless'); |
|
153
|
|
|
} |
|
154
|
|
|
} |
|
155
|
|
|
|
|
156
|
|
|
// FLAC patterns |
|
157
|
|
|
if (preg_match('/\[(19|20)\d\d\][._ -]\[FLAC\]|([\(\[])flac([\)\]])|FLAC\-(19|20)\d\d\-[a-z0-9]{1,12}|\.flac"|(19|20)\d\d\sFLAC|[._ -]FLAC.+(19|20)\d\d[._ -]| FLAC$/i', $name) || |
|
158
|
|
|
preg_match('/\d{3,4}kbps[._ -]FLAC|\[FLAC\]|\(FLAC\)|FLACME|FLAC[._ -]\d{3,4}(kbps)?|WEB[._ -]FLAC/i', $name)) { |
|
159
|
|
|
|
|
160
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
161
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'flac_foreign'); |
|
162
|
|
|
} |
|
163
|
|
|
return $this->matched(Category::MUSIC_LOSSLESS, 0.85, 'flac'); |
|
164
|
|
|
} |
|
165
|
|
|
|
|
166
|
|
|
// Other lossless formats |
|
167
|
|
|
if (preg_match('/\b(?:APE|Monkey\'s[._ -]Audio|WavPack|WV|TAK|TTA|ALAC|Apple[._ -]Lossless)\b|\.(ape|wv|tak|tta)$/i', $name)) { |
|
168
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
169
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'lossless_format_foreign'); |
|
170
|
|
|
} |
|
171
|
|
|
return $this->matched(Category::MUSIC_LOSSLESS, 0.85, 'lossless_format'); |
|
172
|
|
|
} |
|
173
|
|
|
|
|
174
|
|
|
return null; |
|
175
|
|
|
} |
|
176
|
|
|
|
|
177
|
|
|
protected function checkMP3(string $name, bool $categorizeForeign): ?CategorizationResult |
|
178
|
|
|
{ |
|
179
|
|
|
// MP3 indicators |
|
180
|
|
|
if (preg_match('/(?:^|[^a-zA-Z0-9])(?:MP3|320kbps|256kbps|192kbps|128kbps|CBR|VBR)|\b(?:MP3)\b|[\._-](?:MP3)[\._-]|\.mp3$/i', $name)) { |
|
181
|
|
|
if (preg_match('/\b(?:320|256|192|128)[._-]?kbps|\b(?:320|256|192|128)[._-]?K|\((?:320|256|192|128)\)|\[(?:320|256|192|128)\]|V0|V2|VBR/i', $name) || |
|
182
|
|
|
preg_match('/\b(?:CD[._-]?Rip|Web[._-]?Rip|WEB|iTunes|AmazonRip|Spotify[._-]?Rip|MP3\s*\-\s*\d{3}kbps)\b/i', $name) || |
|
183
|
|
|
preg_match('/\.(m3u|mp3)"|rip(?:192|256|320)|[._-]FM[._-].+MP3/i', $name)) { |
|
184
|
|
|
|
|
185
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
186
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.85, 'mp3_foreign'); |
|
187
|
|
|
} |
|
188
|
|
|
return $this->matched(Category::MUSIC_MP3, 0.85, 'mp3'); |
|
189
|
|
|
} |
|
190
|
|
|
} |
|
191
|
|
|
|
|
192
|
|
|
// MP3 scene patterns |
|
193
|
|
|
if (preg_match('/^[a-zA-Z0-9]{1,12}[._-](19|20)\d\d[._-][a-zA-Z0-9]{1,12}$|[a-z0-9]{1,12}\-(19|20)\d\d\-[a-z0-9]{1,12}/i', $name)) { |
|
194
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
195
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.75, 'mp3_scene_foreign'); |
|
196
|
|
|
} |
|
197
|
|
|
return $this->matched(Category::MUSIC_MP3, 0.75, 'mp3_scene'); |
|
198
|
|
|
} |
|
199
|
|
|
|
|
200
|
|
|
// Bitrate patterns |
|
201
|
|
|
if (preg_match('/[\.\-\(\[_ ]\d{2,3}k[\.\-\)\]_ ]|\((192|256|320)\)|(320|cd|eac|vbr)[._-]+mp3|(cd|eac|mp3|vbr)[._-]+320/i', $name)) { |
|
202
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
203
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.8, 'mp3_bitrate_foreign'); |
|
204
|
|
|
} |
|
205
|
|
|
return $this->matched(Category::MUSIC_MP3, 0.8, 'mp3_bitrate'); |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
return null; |
|
209
|
|
|
} |
|
210
|
|
|
|
|
211
|
|
|
protected function checkOther(string $name, bool $categorizeForeign): ?CategorizationResult |
|
212
|
|
|
{ |
|
213
|
|
|
// Compilation and VA indicators |
|
214
|
|
|
if (preg_match('/(?:^|[^a-zA-Z0-9])(?:Compilation|Various[._ -]Artists|OST|Soundtrack|B-Sides|Greatest[._ -]Hits|Anthology)|\b(?:VA|V\.A|Bonus[._ -]Track|Discography|Box[._ -]Set)\b/i', $name)) { |
|
215
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
216
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.8, 'music_other_foreign'); |
|
217
|
|
|
} |
|
218
|
|
|
return $this->matched(Category::MUSIC_OTHER, 0.8, 'music_other'); |
|
219
|
|
|
} |
|
220
|
|
|
|
|
221
|
|
|
// Album/CD patterns |
|
222
|
|
|
if (preg_match('/(?:\d)[._ -](?:CD|Albums|LP)[._ -](?:Set|Compilation)|CD[._ -](Collection|Box|SET)|(\d)-?CD[._ -]/i', $name) || |
|
223
|
|
|
preg_match('/Vinyl[._ -](?:24[._ -]96|2496|Collection|RIP)|WEB[._ -](?:Single|Album)|EP[._ -]\d{4}|\bEP\b.+(?:19|20)\d\d|Live[._ -](?:at|At|@)/i', $name)) { |
|
224
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
225
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.75, 'music_album_foreign'); |
|
226
|
|
|
} |
|
227
|
|
|
return $this->matched(Category::MUSIC_OTHER, 0.75, 'music_album'); |
|
228
|
|
|
} |
|
229
|
|
|
|
|
230
|
|
|
// DJ mixes and labels |
|
231
|
|
|
if (preg_match('/\b(?:Ministry[._ -]of[._ -]Sound|Hed[._ -]Kandi|Cream|Fabric[._ -]Live|Ultra[._ -]Music)\b/i', $name) || |
|
232
|
|
|
preg_match('/\b(?:DJ[._ -]Mix|Mixed[._ -]By|Tiesto[._ -]Club|Radio[._ -]Show|Club[._ -]Hits)\b/i', $name)) { |
|
233
|
|
|
if ($categorizeForeign && $this->checkForeign($name)) { |
|
234
|
|
|
return $this->matched(Category::MUSIC_FOREIGN, 0.75, 'music_dj_foreign'); |
|
235
|
|
|
} |
|
236
|
|
|
return $this->matched(Category::MUSIC_OTHER, 0.75, 'music_dj'); |
|
237
|
|
|
} |
|
238
|
|
|
|
|
239
|
|
|
return null; |
|
240
|
|
|
} |
|
241
|
|
|
} |
|
242
|
|
|
|
|
243
|
|
|
|