XxxCategorizer::checkX264()   A
last analyzed

Complexity

Conditions 5
Paths 5

Size

Total Lines 25
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 10
c 1
b 0
f 0
dl 0
loc 25
rs 9.6111
cc 5
nc 5
nop 1
1
<?php
2
3
namespace App\Services\Categorization\Categorizers;
4
5
use App\Models\Category;
6
use App\Services\Categorization\CategorizationResult;
7
use App\Services\Categorization\ReleaseContext;
8
9
/**
10
 * Categorizer for Adult/XXX content.
11
 */
12
class XxxCategorizer extends AbstractCategorizer
13
{
14
    protected int $priority = 10; // High priority - should run early
15
16
    // Known adult studios/sites - comprehensive list
17
    protected const KNOWN_STUDIOS = 'Brazzers|NaughtyAmerica|RealityKings|Bangbros|BangBros18|TeenFidelity|PornPros|SexArt|WowGirls|Vixen|Blacked|Tushy|Deeper|Bellesa|Defloration|MetArt|MetArtX|TheLifeErotic|VivThomas|JoyMii|Nubiles|NubileFilms|Anilos|FamilyStrokes|X-Art|Babes|Twistys|WetAndPuffy|WowPorn|MomsTeachSex|Mofos|BangBus|Passion-HD|EvilAngel|DorcelClub|Private|Hustler|CherryPimps|PureTaboo|LadyLyne|TeamSkeet|GirlsWay|SweetSinner|NewSensations|Digital[._ -]?Playground|Wicked|Penthouse|Playboy|Kink|HardX|ArchAngel|JulesJordan|ManuelFerrara|LesbianX|AllAnal|DarkX|Elegant[._ -]?Angel|ZeroTolerance|Score|PornFidelity|Kelly[._ -]?Madison|DDF[._ -]?Network|21Sextury|21Naturals|Colette|SexMex|Bang|SpankBang|PornWorld|LegalPorno|AnalVids|GonzoXXX|RoccoSiffredi|Fake[._ -]?Hub|FakeAgent|FakeTaxi|FakeHostel|PublicAgent|StrandedTeens|Property[._ -]?Sex|Dane[._ -]?Jones|Lets[._ -]?Doe[._ -]?It|Office[._ -]?Obsession|SexyHub|Massage[._ -]?Rooms|Fitness[._ -]?Rooms|Female[._ -]?Agent|MissaX|All[._ -]?Girl[._ -]?Massage|Fantasy[._ -]?Massage|Nurumassage|Soapymassage|Reality[._ -]?Junkies|Perv[._ -]?Mom|Bad[._ -]?Milfs|Milf[._ -]?Body|Step[._ -]?Siblings|Sis[._ -]?Loves[._ -]?Me|Brother[._ -]??Crush|Dad[._ -]?Crush|Mom[._ -]?Knows[._ -]?Best|Bratty[._ -]?Sis|My[._ -]?Family[._ -]?Pies|Family[._ -]?Therapy|Nubiles[._ -]?Porn|Step[._ -]?Fantasy|Caught[._ -]?Fapping|She[._ -]?Will[._ -]?Cheat|Dirty[._ -]?Wives[._ -]?Club|Big[._ -]?Tits[._ -]?Round[._ -]?Asses|Ass[._ -]?Parade|Monsters[._ -]?Of[._ -]?Cock|Brown[._ -]?Bunnies|Teens[._ -]?Love[._ -]?Huge[._ -]?Cocks|Ass[._ -]?Masterpiece|Bang[._ -]?Casting|Holed|Tiny4K|Lubed|POVD|Exotic4K|CastingCouch[._ -]?X|Casting[._ -]?Couch|Creampie[._ -]?Angels|Digital[._ -]?Desire|Femjoy|Hegre|Joymii|Met[._ -]?Art|MPL[._ -]?Studios|Rylsky[._ -]?Art|Showy[._ -]?Beauty|Stunning18|Photodromm|Watch4Beauty|Wow[._ -]?Girls|Yonitale|Mommys[._ -]?Boy|AllOver30|MyFirst|10musume|Caribbeancom|Heyzo|Pacopacomama|1Pondo|TokyoHot';
18
19
    // Adult keywords
20
    protected const ADULT_KEYWORDS = 'Anal|Ass|BBW|BDSM|Blow|Boob|Bukkake|Casting|Couch|Cock|Compilation|Creampie|Cum|Dick|Dildo|Facial|Fetish|Fuck|Gang|Hardcore|Homemade|Horny|Interracial|Lesbian|MILF|Masturbat|Nympho|Oral|Orgasm|Penetrat|Pornstar|POV|Pussy|Riding|Seduct|Sex|Shaved|Slut|Squirt|Suck|Swallow|Threesome|Tits|Titty|Toy|Virgin|Whore';
21
22
    // VR sites
23
    protected const VR_SITES = 'SexBabesVR|LittleCapriceVR|VRoomed|VRMagic|TonightsGirlfriend|NaughtyAmericaVR|BaDoinkVR|WankzVR|VRBangers|StripzVR|RealJamVR|TmwVRnet|MilfVR|KinkVR|CzechVR(?:Fetish)?|HoloGirlsVR|WetVR|XSinsVR|VRCosplayX|BIBIVR|SLR|SexLikeReal';
24
25
    public function getName(): string
26
    {
27
        return 'XXX';
28
    }
29
30
    public function categorize(ReleaseContext $context): CategorizationResult
31
    {
32
        $name = $context->releaseName;
33
34
        // Check if it looks like adult content
35
        if (!$this->looksLikeXxx($name)) {
36
            return $this->noMatch();
37
        }
38
39
        // Try specific XXX subcategories in order of specificity
40
        if ($result = $this->checkOnlyFans($name)) {
41
            return $result;
42
        }
43
44
        if ($result = $this->checkVR($name)) {
45
            return $result;
46
        }
47
48
        if ($result = $this->checkUHD($name)) {
49
            return $result;
50
        }
51
52
        if ($result = $this->checkClipHD($name)) {
53
            return $result;
54
        }
55
56
        if ($result = $this->checkPack($name)) {
57
            return $result;
58
        }
59
60
        if ($result = $this->checkClipSD($name, $context->poster)) {
61
            return $result;
62
        }
63
64
        if ($result = $this->checkSD($name)) {
65
            return $result;
66
        }
67
68
        if ($context->catWebDL && ($result = $this->checkWebDL($name))) {
69
            return $result;
70
        }
71
72
        if ($result = $this->checkX264($name)) {
73
            return $result;
74
        }
75
76
        if ($result = $this->checkXvid($name)) {
77
            return $result;
78
        }
79
80
        if ($result = $this->checkImageset($name)) {
81
            return $result;
82
        }
83
84
        if ($result = $this->checkWMV($name)) {
85
            return $result;
86
        }
87
88
        if ($result = $this->checkDVD($name)) {
89
            return $result;
90
        }
91
92
        if ($result = $this->checkOther($name)) {
93
            return $result;
94
        }
95
96
        return $this->noMatch();
97
    }
98
99
    /**
100
     * Check if release name looks like adult content.
101
     */
102
    protected function looksLikeXxx(string $name): bool
103
    {
104
        // Check for XXX marker
105
        if (preg_match('/\bXXX\b/i', $name)) {
106
            return true;
107
        }
108
109
        // Check for known studios/sites
110
        if (preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name)) {
111
            return true;
112
        }
113
114
        // Check for known VR sites
115
        if (preg_match('/\b(' . self::VR_SITES . ')\b/i', $name)) {
116
            return true;
117
        }
118
119
        // Check for adult content indicators combined with video markers
120
        if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) &&
121
            preg_match('/\b(720p|1080p|2160p|4k|mp4|mkv|avi|wmv)\b/i', $name)) {
122
            return true;
123
        }
124
125
        // Check for JAV/AV marker (common in Japanese adult releases)
126
        if (preg_match('/\b(AV|JAV)\b/', $name) && preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name)) {
127
            return true;
128
        }
129
130
        // Site with date pattern: sitename.YYYY.MM.DD or sitename.YY.MM.DD
131
        // This pattern is very common for adult sites but rare for regular content
132
        if (preg_match('/^[A-Za-z]+[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ][A-Za-z]/i', $name)) {
133
            // Check it's not a TV daily show by checking for adult keywords or specific patterns
134
            if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) {
135
                return true;
136
            }
137
            // Check for performer name patterns (firstname.lastname) after the date
138
            if (preg_match('/\d{2}[.\-_ ]([a-z]+)[.\-_ ]([a-z]+)[.\-_ ]/i', $name)) {
139
                // Has a "firstname.lastname" pattern after date - likely adult
140
                // But exclude obvious TV patterns
141
                if (!preg_match('/\b(S\d{1,2}E\d{1,2}|Episode|Season|HDTV|PDTV)\b/i', $name)) {
142
                    return true;
143
                }
144
            }
145
        }
146
147
        return false;
148
    }
149
150
    protected function checkOnlyFans(string $name): ?CategorizationResult
151
    {
152
        // Skip photo packs unless there's a video hint
153
        if (preg_match('/\b(photo(set)?|image(set)?|pics?|wallpapers?|collection|pack)\b/i', $name) &&
154
            !preg_match('/\b(mp4|mkv|mov|wmv|avi|webm|h\.?264|x264|h\.?265|x265)\b/i', $name)) {
155
            return null;
156
        }
157
158
        if (preg_match('/\bOnly[-_ ]?Fans\b|^OF\./i', $name)) {
159
            return $this->matched(Category::XXX_ONLYFANS, 0.95, 'onlyfans');
160
        }
161
162
        return null;
163
    }
164
165
    protected function checkVR(string $name): ?CategorizationResult
166
    {
167
        if (stripos($name, 'vr') === false && stripos($name, 'oculus') === false && stripos($name, 'quest') === false) {
168
            return null;
169
        }
170
171
        // Check for known VR site
172
        $hasVRSite = preg_match('/\b(' . self::VR_SITES . ')\b/i', $name);
173
174
        // Require either a VR site token, explicit VR180/VR360, or VR device
175
        if (!preg_match('/\bVR(?:180|360)\b/i', $name) &&
176
            !$hasVRSite &&
177
            !preg_match('/\b(?:GearVR|Oculus|Quest[123]?|PSVR|Vive|Index|Pimax)\b/i', $name)) {
178
            return null;
179
        }
180
181
        // VR pattern matching - includes VR devices
182
        $vrPattern = '/\b(' . self::VR_SITES . ')\b|\bVR(?:180|360)\b|\b(?:5K|6K|7K|8K)\b.*\bVR\b|\b(?:GearVR|Oculus|Quest[123]?|PSVR|Vive|Index|Pimax)\b/i';
183
184
        if (preg_match($vrPattern, $name)) {
185
            // VR sites are definitively adult content
186
            if ($hasVRSite) {
187
                return $this->matched(Category::XXX_VR, 0.95, 'vr_site');
188
            }
189
            // VR device with adult keywords
190
            if (preg_match('/\bXXX\b/i', $name) || preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) {
191
                return $this->matched(Category::XXX_VR, 0.9, 'vr_device');
192
            }
193
        }
194
195
        return null;
196
    }
197
198
    protected function checkUHD(string $name): ?CategorizationResult
199
    {
200
        if (!preg_match('/\b(2160p|4k|UHD|Ultra[._ -]?HD)\b/i', $name)) {
201
            return null;
202
        }
203
204
        // Check for adult markers
205
        $hasAdultMarker = preg_match('/\bXXX\b/i', $name) ||
206
                          preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', strtolower($name)) ||
207
                          preg_match('/\b(Hardcore|Porn|Sex|Anal|Creampie|MILF|Lesbian|Teen|Interracial)\b/i', $name);
208
209
        if (!$hasAdultMarker) {
210
            return null;
211
        }
212
213
        // Known UHD release groups
214
        if (preg_match('/XXX.+2160p[\w\-.]+M[PO][V4]-(KTR|GUSH|FaiLED|SEXORS|hUSHhUSH|YAPG|WRB|NBQ|FETiSH)/i', $name)) {
215
            return $this->matched(Category::XXX_UHD, 0.95, 'uhd_group');
216
        }
217
218
        return $this->matched(Category::XXX_UHD, 0.9, 'uhd');
219
    }
220
221
    protected function checkClipHD(string $name): ?CategorizationResult
222
    {
223
        // Exclude packs and collections
224
        if (preg_match('/^(Complete|Pack|Collection|Anthology|Siterip|SiteRip)\b/i', $name)) {
225
            return null;
226
        }
227
228
        // Exclude TV shows
229
        if (preg_match('/\b(S\d{1,2}E\d{1,2}|S\d{1,2}|Season\s\d{1,2})\b/i', $name)) {
230
            return null;
231
        }
232
233
        // Check for HD resolution
234
        $hasHD = preg_match('/\b(720p|1080p|2160p|HD|4K)\b/i', $name);
235
236
        // Studio + performer + HD resolution
237
        if (preg_match('/^(' . self::KNOWN_STUDIOS . ')\.([A-Z][a-z]+).*?(720p|1080p|2160p|HD|4K)/i', $name)) {
238
            return $this->matched(Category::XXX_CLIPHD, 0.9, 'clip_hd_studio');
239
        }
240
241
        // Known studio with date pattern: site.YYYY.MM.DD or site.YY.MM.DD
242
        if (preg_match('/^(' . self::KNOWN_STUDIOS . ')[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}/i', $name)) {
243
            if ($hasHD) {
244
                return $this->matched(Category::XXX_CLIPHD, 0.95, 'clip_hd_studio_date');
245
            }
246
            // Even without HD marker, if it's a known studio with date pattern, likely XXX
247
            return $this->matched(Category::XXX_X264, 0.85, 'studio_date');
248
        }
249
250
        // Date pattern with 4-digit year: site.YYYY.MM.DD.performer.title.1080p
251
        if (preg_match('/^([A-Z][a-zA-Z0-9]+)[.\-_ ](19|20)\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ]/i', $name) &&
252
            !preg_match('/\b(S\d{2}E\d{2}|Documentary|Series)\b/i', $name)) {
253
            // Check if it has adult keywords or HD resolution
254
            if ($hasHD || preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) {
255
                return $this->matched(Category::XXX_CLIPHD, 0.85, 'clip_hd_date_4digit');
256
            }
257
        }
258
259
        // Date pattern with 2-digit year: site.YY.MM.DD.performer.title.1080p
260
        if (preg_match('/^([A-Z][a-zA-Z0-9]+)\.(\d{2})\.(\d{2})\.(\d{2})\..*?(720p|1080p|2160p|HD|4K)/i', $name) &&
261
            !preg_match('/\b(S\d{2}E\d{2}|Documentary|Series)\b/i', $name)) {
262
            return $this->matched(Category::XXX_CLIPHD, 0.85, 'clip_hd_date');
263
        }
264
265
        // JAV compact date pattern: site.YYMMDD (e.g., 10musume.121025)
266
        if (preg_match('/^(' . self::KNOWN_STUDIOS . ')[.\-_ ](\d{6})/i', $name)) {
267
            if ($hasHD) {
268
                return $this->matched(Category::XXX_CLIPHD, 0.9, 'clip_hd_jav_date');
269
            }
270
            return $this->matched(Category::XXX_X264, 0.85, 'jav_date');
271
        }
272
273
        // Known studio with XXX marker and HD resolution
274
        if (preg_match('/^(' . self::KNOWN_STUDIOS . ')[.\-_ ].*\bXXX\b.*?(720p|1080p|2160p|HD|4K)/i', $name)) {
275
            return $this->matched(Category::XXX_CLIPHD, 0.9, 'clip_hd_studio_xxx');
276
        }
277
278
        // XXX with HD resolution
279
        if (preg_match('/\b(XXX|MILF|Anal|Sex|Porn)[._ -]+(720p|1080p|2160p|HD|4K)\b/i', $name) ||
280
            preg_match('/\b(720p|1080p|2160p|HD|4K)[._ -]+(XXX|MILF|Anal|Sex|Porn)\b/i', $name)) {
281
            return $this->matched(Category::XXX_CLIPHD, 0.8, 'clip_hd_xxx');
282
        }
283
284
        return null;
285
    }
286
287
    protected function checkPack(string $name): ?CategorizationResult
288
    {
289
        if (preg_match('/[ .]PACK[ .]/i', $name)) {
290
            return $this->matched(Category::XXX_PACK, 0.85, 'pack');
291
        }
292
293
        return null;
294
    }
295
296
    protected function checkClipSD(string $name, string $poster): ?CategorizationResult
297
    {
298
        if (preg_match('/anon@y[.]com|@md-hobbys[.]com|oz@lot[.]com/i', $poster)) {
299
            return $this->matched(Category::XXX_CLIPSD, 0.85, 'clip_sd_poster');
300
        }
301
302
        if (preg_match('/(iPT\sTeam|KLEENEX)/i', $name) || stripos($name, 'SDPORN') !== false) {
303
            return $this->matched(Category::XXX_CLIPSD, 0.85, 'clip_sd');
304
        }
305
306
        return null;
307
    }
308
309
    protected function checkSD(string $name): ?CategorizationResult
310
    {
311
        if (preg_match('/SDX264XXX|XXX\.HR\./i', $name)) {
312
            return $this->matched(Category::XXX_SD, 0.85, 'sd');
313
        }
314
315
        return null;
316
    }
317
318
    protected function checkWebDL(string $name): ?CategorizationResult
319
    {
320
        // Exclude TV shows
321
        if (preg_match('/\b(S\d{1,2}E\d{1,2})\b/i', $name)) {
322
            return null;
323
        }
324
325
        if (preg_match('/web[._ -]dl|web-?rip/i', $name) &&
326
            (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) ||
327
             preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name) ||
328
             preg_match('/\b(XXX|Porn|Adult|JAV|Hentai)\b/i', $name))) {
329
            return $this->matched(Category::XXX_WEBDL, 0.85, 'webdl');
330
        }
331
332
        return null;
333
    }
334
335
    protected function checkX264(string $name): ?CategorizationResult
336
    {
337
        // Exclude HEVC/x265
338
        if (preg_match('/\b(x265|hevc)\b/i', $name)) {
339
            return null;
340
        }
341
342
        // Require H.264/x264/AVC
343
        if (!preg_match('/\b((x|h)[\.\-_ ]?264|AVC)\b/i', $name)) {
344
            return null;
345
        }
346
347
        // Reject obvious non-targets
348
        if (preg_match('/\bwmv\b|S\d{1,2}E\d{1,2}|\d+x\d+/i', $name)) {
349
            return null;
350
        }
351
352
        // Check for adult content
353
        $adultPattern = '/\bXXX\b|a\.b\.erotica|BangBros|Cum|Defloration|Err?oticax?|JoyMii|MetArt|Nubiles|Porn|SexArt|Tushy|Vixen|JAV|Brazzers|NaughtyAmerica|RealityKings/i';
354
355
        if (preg_match($adultPattern, $name)) {
356
            return $this->matched(Category::XXX_X264, 0.85, 'x264');
357
        }
358
359
        return null;
360
    }
361
362
    protected function checkXvid(string $name): ?CategorizationResult
363
    {
364
        if (preg_match('/(b[dr]|dvd)rip|detoxication|divx|nympho|pornolation|swe6|tesoro|xvid/i', $name)) {
365
            return $this->matched(Category::XXX_XVID, 0.8, 'xvid');
366
        }
367
368
        return null;
369
    }
370
371
    protected function checkImageset(string $name): ?CategorizationResult
372
    {
373
        if (preg_match('/IMAGESET|PICTURESET|ABPEA/i', $name)) {
374
            return $this->matched(Category::XXX_IMAGESET, 0.9, 'imageset');
375
        }
376
377
        return null;
378
    }
379
380
    protected function checkWMV(string $name): ?CategorizationResult
381
    {
382
        // Exclude modern formats
383
        if (preg_match('/\b(720p|1080p|2160p|x264|x265|h264|h265|hevc|XviD|MP4-|\.mp4)[._ -]/i', $name)) {
384
            return null;
385
        }
386
387
        if (preg_match('/\b(WMV|Windows\s?Media\s?Video)\b|\.wmv$|[._ -]wmv[._ -]/i', $name)) {
388
            return $this->matched(Category::XXX_WMV, 0.8, 'wmv');
389
        }
390
391
        return null;
392
    }
393
394
    protected function checkDVD(string $name): ?CategorizationResult
395
    {
396
        if (preg_match('/dvdr[^i]|dvd[59]/i', $name)) {
397
            return $this->matched(Category::XXX_DVD, 0.85, 'dvd');
398
        }
399
400
        return null;
401
    }
402
403
    protected function checkOther(string $name): ?CategorizationResult
404
    {
405
        if (preg_match('/[._ -]Brazzers|Creampie|[._ -]JAV[._ -]|North\.Pole|She[._ -]?Male|Transsexual|OLDER ANGELS/i', $name)) {
406
            return $this->matched(Category::XXX_OTHER, 0.7, 'other');
407
        }
408
409
        return null;
410
    }
411
}
412
413