Passed
Push — master ( 03c468...179943 )
by Darko
11:56
created

XxxCategorizer::checkClipHD()   C

Complexity

Conditions 14
Paths 12

Size

Total Lines 51
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 22
c 1
b 0
f 0
dl 0
loc 51
rs 6.2666
cc 14
nc 12
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace App\Services\Categorization\Categorizers;
4
5
use App\Models\Category;
6
use App\Services\Categorization\CategorizationResult;
7
use App\Services\Categorization\ReleaseContext;
8
9
/**
10
 * Categorizer for Adult/XXX content.
11
 */
12
class XxxCategorizer extends AbstractCategorizer
13
{
14
    protected int $priority = 10; // High priority - should run early
15
16
    // Known adult studios/sites - comprehensive list
17
    protected const KNOWN_STUDIOS = 'Brazzers|NaughtyAmerica|RealityKings|Bangbros|BangBros18|TeenFidelity|PornPros|SexArt|WowGirls|Vixen|Blacked|Tushy|Deeper|Bellesa|Defloration|MetArt|MetArtX|TheLifeErotic|VivThomas|JoyMii|Nubiles|NubileFilms|FamilyStrokes|X-Art|Babes|Twistys|WetAndPuffy|WowPorn|MomsTeachSex|Mofos|BangBus|Passion-HD|EvilAngel|DorcelClub|Private|Hustler|CherryPimps|PureTaboo|LadyLyne|TeamSkeet|GirlsWay|SweetSinner|NewSensations|Digital[._ -]?Playground|Wicked|Penthouse|Playboy|Kink|HardX|ArchAngel|JulesJordan|ManuelFerrara|LesbianX|AllAnal|DarkX|Elegant[._ -]?Angel|ZeroTolerance|Score|PornFidelity|Kelly[._ -]?Madison|DDF[._ -]?Network|21Sextury|21Naturals|Colette|SexMex|Bang|SpankBang|PornWorld|LegalPorno|AnalVids|GonzoXXX|RoccoSiffredi|Fake[._ -]?Hub|FakeAgent|FakeTaxi|FakeHostel|PublicAgent|StrandedTeens|Property[._ -]?Sex|Dane[._ -]?Jones|Lets[._ -]?Doe[._ -]?It|Office[._ -]?Obsession|SexyHub|Massage[._ -]?Rooms|Fitness[._ -]?Rooms|Female[._ -]?Agent|MissaX|All[._ -]?Girl[._ -]?Massage|Fantasy[._ -]?Massage|Nurumassage|Soapymassage|Reality[._ -]?Junkies|Perv[._ -]?Mom|Bad[._ -]?Milfs|Milf[._ -]?Body|Step[._ -]?Siblings|Sis[._ -]?Loves[._ -]?Me|Brother[._ -]??Crush|Dad[._ -]?Crush|Mom[._ -]?Knows[._ -]?Best|Bratty[._ -]?Sis|My[._ -]?Family[._ -]?Pies|Family[._ -]?Therapy|Nubiles[._ -]?Porn|Step[._ -]?Fantasy|Caught[._ -]?Fapping|She[._ -]?Will[._ -]?Cheat|Dirty[._ -]?Wives[._ -]?Club|Big[._ -]?Tits[._ -]?Round[._ -]?Asses|Ass[._ -]?Parade|Monsters[._ -]?Of[._ -]?Cock|Brown[._ -]?Bunnies|Teens[._ -]?Love[._ -]?Huge[._ -]?Cocks|Ass[._ -]?Masterpiece|Bang[._ -]?Casting|Holed|Tiny4K|Lubed|POVD|Exotic4K|CastingCouch[._ -]?X|Casting[._ -]?Couch|Creampie[._ -]?Angels|Digital[._ -]?Desire|Femjoy|Hegre|Joymii|Met[._ -]?Art|MPL[._ -]?Studios|Rylsky[._ -]?Art|Showy[._ -]?Beauty|Stunning18|Photodromm|Watch4Beauty|Wow[._ -]?Girls|Yonitale';
18
19
    // Adult keywords
20
    protected const ADULT_KEYWORDS = 'Anal|Ass|BBW|BDSM|Blow|Boob|Bukkake|Casting|Couch|Cock|Compilation|Creampie|Cum|Dick|Dildo|Facial|Fetish|Fuck|Gang|Hardcore|Homemade|Horny|Interracial|Lesbian|MILF|Masturbat|Nympho|Oral|Orgasm|Penetrat|Pornstar|POV|Pussy|Riding|Seduct|Sex|Shaved|Slut|Squirt|Suck|Swallow|Threesome|Tits|Titty|Toy|Virgin|Whore';
21
22
    // VR sites
23
    protected const VR_SITES = 'SexBabesVR|LittleCapriceVR|VRoomed|VRMagic|TonightsGirlfriend|NaughtyAmericaVR|BaDoinkVR|WankzVR|VRBangers|StripzVR|RealJamVR|TmwVRnet|MilfVR|KinkVR|CzechVR(?:Fetish)?|HoloGirlsVR|WetVR|XSinsVR|VRCosplayX|BIBIVR|SLR|SexLikeReal';
24
25
    public function getName(): string
26
    {
27
        return 'XXX';
28
    }
29
30
    public function categorize(ReleaseContext $context): CategorizationResult
31
    {
32
        $name = $context->releaseName;
33
34
        // Check if it looks like adult content
35
        if (!$this->looksLikeXxx($name)) {
36
            return $this->noMatch();
37
        }
38
39
        // Try specific XXX subcategories in order of specificity
40
        if ($result = $this->checkOnlyFans($name)) {
41
            return $result;
42
        }
43
44
        if ($result = $this->checkVR($name)) {
45
            return $result;
46
        }
47
48
        if ($result = $this->checkUHD($name)) {
49
            return $result;
50
        }
51
52
        if ($result = $this->checkClipHD($name)) {
53
            return $result;
54
        }
55
56
        if ($result = $this->checkPack($name)) {
57
            return $result;
58
        }
59
60
        if ($result = $this->checkClipSD($name, $context->poster)) {
61
            return $result;
62
        }
63
64
        if ($result = $this->checkSD($name)) {
65
            return $result;
66
        }
67
68
        if ($context->catWebDL && ($result = $this->checkWebDL($name))) {
69
            return $result;
70
        }
71
72
        if ($result = $this->checkX264($name)) {
73
            return $result;
74
        }
75
76
        if ($result = $this->checkXvid($name)) {
77
            return $result;
78
        }
79
80
        if ($result = $this->checkImageset($name)) {
81
            return $result;
82
        }
83
84
        if ($result = $this->checkWMV($name)) {
85
            return $result;
86
        }
87
88
        if ($result = $this->checkDVD($name)) {
89
            return $result;
90
        }
91
92
        if ($result = $this->checkOther($name)) {
93
            return $result;
94
        }
95
96
        return $this->noMatch();
97
    }
98
99
    /**
100
     * Check if release name looks like adult content.
101
     */
102
    protected function looksLikeXxx(string $name): bool
103
    {
104
        // Check for XXX marker
105
        if (preg_match('/\bXXX\b/i', $name)) {
106
            return true;
107
        }
108
109
        // Check for known studios/sites
110
        if (preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name)) {
111
            return true;
112
        }
113
114
        // Check for adult content indicators combined with video markers
115
        if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) &&
116
            preg_match('/\b(720p|1080p|2160p|4k|mp4|mkv|avi|wmv)\b/i', $name)) {
117
            return true;
118
        }
119
120
        // Site with date pattern: sitename.YYYY.MM.DD or sitename.YY.MM.DD
121
        // This pattern is very common for adult sites but rare for regular content
122
        if (preg_match('/^[A-Za-z]+[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ][A-Za-z]/i', $name)) {
123
            // Check it's not a TV daily show by checking for adult keywords or specific patterns
124
            if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) {
125
                return true;
126
            }
127
            // Check for performer name patterns (firstname.lastname) after the date
128
            if (preg_match('/\d{2}[.\-_ ]([a-z]+)[.\-_ ]([a-z]+)[.\-_ ]/i', $name)) {
129
                // Has a "firstname.lastname" pattern after date - likely adult
130
                // But exclude obvious TV patterns
131
                if (!preg_match('/\b(S\d{1,2}E\d{1,2}|Episode|Season|HDTV|PDTV)\b/i', $name)) {
132
                    return true;
133
                }
134
            }
135
        }
136
137
        return false;
138
    }
139
140
    protected function checkOnlyFans(string $name): ?CategorizationResult
141
    {
142
        // Skip photo packs unless there's a video hint
143
        if (preg_match('/\b(photo(set)?|image(set)?|pics?|wallpapers?|collection|pack)\b/i', $name) &&
144
            !preg_match('/\b(mp4|mkv|mov|wmv|avi|webm|h\.?264|x264|h\.?265|x265)\b/i', $name)) {
145
            return null;
146
        }
147
148
        if (preg_match('/\bOnly[-_ ]?Fans\b|^OF\./i', $name)) {
149
            return $this->matched(Category::XXX_ONLYFANS, 0.95, 'onlyfans');
150
        }
151
152
        return null;
153
    }
154
155
    protected function checkVR(string $name): ?CategorizationResult
156
    {
157
        if (stripos($name, 'vr') === false) {
158
            return null;
159
        }
160
161
        // Require either a VR site token or explicit VR180/VR360
162
        if (!preg_match('/\bVR(?:180|360)\b/i', $name) &&
163
            !preg_match('/\b(' . self::VR_SITES . ')\b/i', $name)) {
164
            return null;
165
        }
166
167
        // VR pattern matching
168
        $vrPattern = '/\b(' . self::VR_SITES . ')\b|\bVR(?:180|360)\b|\b(?:5K|6K|7K|8K)\b.*\bVR\b|\b(?:GearVR|Oculus|Quest[123]?|PSVR|Vive|Index|Pimax)\b/i';
169
170
        if (preg_match($vrPattern, $name)) {
171
            // Verify XXX content
172
            if (preg_match('/\b(' . self::VR_SITES . ')\b/i', $name) || preg_match('/\bXXX\b/i', $name)) {
173
                return $this->matched(Category::XXX_VR, 0.95, 'vr');
174
            }
175
        }
176
177
        return null;
178
    }
179
180
    protected function checkUHD(string $name): ?CategorizationResult
181
    {
182
        if (!preg_match('/\b(2160p|4k|UHD|Ultra[._ -]?HD)\b/i', $name)) {
183
            return null;
184
        }
185
186
        // Check for adult markers
187
        $hasAdultMarker = preg_match('/\bXXX\b/i', $name) ||
188
                          preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', strtolower($name)) ||
189
                          preg_match('/\b(Hardcore|Porn|Sex|Anal|Creampie|MILF|Lesbian|Teen|Interracial)\b/i', $name);
190
191
        if (!$hasAdultMarker) {
192
            return null;
193
        }
194
195
        // Known UHD release groups
196
        if (preg_match('/XXX.+2160p[\w\-.]+M[PO][V4]-(KTR|GUSH|FaiLED|SEXORS|hUSHhUSH|YAPG|WRB|NBQ|FETiSH)/i', $name)) {
197
            return $this->matched(Category::XXX_UHD, 0.95, 'uhd_group');
198
        }
199
200
        return $this->matched(Category::XXX_UHD, 0.9, 'uhd');
201
    }
202
203
    protected function checkClipHD(string $name): ?CategorizationResult
204
    {
205
        // Exclude packs and collections
206
        if (preg_match('/^(Complete|Pack|Collection|Anthology|Siterip|SiteRip)\b/i', $name)) {
207
            return null;
208
        }
209
210
        // Exclude TV shows
211
        if (preg_match('/\b(S\d{1,2}E\d{1,2}|S\d{1,2}|Season\s\d{1,2})\b/i', $name)) {
212
            return null;
213
        }
214
215
        // Check for HD resolution
216
        $hasHD = preg_match('/\b(720p|1080p|2160p|HD|4K)\b/i', $name);
217
218
        // Studio + performer + HD resolution
219
        if (preg_match('/^(' . self::KNOWN_STUDIOS . ')\.([A-Z][a-z]+).*?(720p|1080p|2160p|HD|4K)/i', $name)) {
220
            return $this->matched(Category::XXX_CLIPHD, 0.9, 'clip_hd_studio');
221
        }
222
223
        // Known studio with date pattern: site.YYYY.MM.DD or site.YY.MM.DD
224
        if (preg_match('/^(' . self::KNOWN_STUDIOS . ')[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}/i', $name)) {
225
            if ($hasHD) {
226
                return $this->matched(Category::XXX_CLIPHD, 0.95, 'clip_hd_studio_date');
227
            }
228
            // Even without HD marker, if it's a known studio with date pattern, likely XXX
229
            return $this->matched(Category::XXX_X264, 0.85, 'studio_date');
230
        }
231
232
        // Date pattern with 4-digit year: site.YYYY.MM.DD.performer.title.1080p
233
        if (preg_match('/^([A-Z][a-zA-Z0-9]+)[.\-_ ](19|20)\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ]/i', $name) &&
234
            !preg_match('/\b(S\d{2}E\d{2}|Documentary|Series)\b/i', $name)) {
235
            // Check if it has adult keywords or HD resolution
236
            if ($hasHD || preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) {
237
                return $this->matched(Category::XXX_CLIPHD, 0.85, 'clip_hd_date_4digit');
238
            }
239
        }
240
241
        // Date pattern with 2-digit year: site.YY.MM.DD.performer.title.1080p
242
        if (preg_match('/^([A-Z][a-zA-Z0-9]+)\.(\d{2})\.(\d{2})\.(\d{2})\..*?(720p|1080p|2160p|HD|4K)/i', $name) &&
243
            !preg_match('/\b(S\d{2}E\d{2}|Documentary|Series)\b/i', $name)) {
244
            return $this->matched(Category::XXX_CLIPHD, 0.85, 'clip_hd_date');
245
        }
246
247
        // XXX with HD resolution
248
        if (preg_match('/\b(XXX|MILF|Anal|Sex|Porn)[._ -]+(720p|1080p|2160p|HD|4K)\b/i', $name) ||
249
            preg_match('/\b(720p|1080p|2160p|HD|4K)[._ -]+(XXX|MILF|Anal|Sex|Porn)\b/i', $name)) {
250
            return $this->matched(Category::XXX_CLIPHD, 0.8, 'clip_hd_xxx');
251
        }
252
253
        return null;
254
    }
255
256
    protected function checkPack(string $name): ?CategorizationResult
257
    {
258
        if (preg_match('/[ .]PACK[ .]/i', $name)) {
259
            return $this->matched(Category::XXX_PACK, 0.85, 'pack');
260
        }
261
262
        return null;
263
    }
264
265
    protected function checkClipSD(string $name, string $poster): ?CategorizationResult
266
    {
267
        if (preg_match('/anon@y[.]com|@md-hobbys[.]com|oz@lot[.]com/i', $poster)) {
268
            return $this->matched(Category::XXX_CLIPSD, 0.85, 'clip_sd_poster');
269
        }
270
271
        if (preg_match('/(iPT\sTeam|KLEENEX)/i', $name) || stripos($name, 'SDPORN') !== false) {
272
            return $this->matched(Category::XXX_CLIPSD, 0.85, 'clip_sd');
273
        }
274
275
        return null;
276
    }
277
278
    protected function checkSD(string $name): ?CategorizationResult
279
    {
280
        if (preg_match('/SDX264XXX|XXX\.HR\./i', $name)) {
281
            return $this->matched(Category::XXX_SD, 0.85, 'sd');
282
        }
283
284
        return null;
285
    }
286
287
    protected function checkWebDL(string $name): ?CategorizationResult
288
    {
289
        // Exclude TV shows
290
        if (preg_match('/\b(S\d{1,2}E\d{1,2})\b/i', $name)) {
291
            return null;
292
        }
293
294
        if (preg_match('/web[._ -]dl|web-?rip/i', $name) &&
295
            (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) ||
296
             preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name) ||
297
             preg_match('/\b(XXX|Porn|Adult|JAV|Hentai)\b/i', $name))) {
298
            return $this->matched(Category::XXX_WEBDL, 0.85, 'webdl');
299
        }
300
301
        return null;
302
    }
303
304
    protected function checkX264(string $name): ?CategorizationResult
305
    {
306
        // Exclude HEVC/x265
307
        if (preg_match('/\b(x265|hevc)\b/i', $name)) {
308
            return null;
309
        }
310
311
        // Require H.264/x264/AVC
312
        if (!preg_match('/\b((x|h)[\.\-_ ]?264|AVC)\b/i', $name)) {
313
            return null;
314
        }
315
316
        // Reject obvious non-targets
317
        if (preg_match('/\bwmv\b|S\d{1,2}E\d{1,2}|\d+x\d+/i', $name)) {
318
            return null;
319
        }
320
321
        // Check for adult content
322
        $adultPattern = '/\bXXX\b|a\.b\.erotica|BangBros|Cum|Defloration|Err?oticax?|JoyMii|MetArt|Nubiles|Porn|SexArt|Tushy|Vixen|JAV|Brazzers|NaughtyAmerica|RealityKings/i';
323
324
        if (preg_match($adultPattern, $name)) {
325
            return $this->matched(Category::XXX_X264, 0.85, 'x264');
326
        }
327
328
        return null;
329
    }
330
331
    protected function checkXvid(string $name): ?CategorizationResult
332
    {
333
        if (preg_match('/(b[dr]|dvd)rip|detoxication|divx|nympho|pornolation|swe6|tesoro|xvid/i', $name)) {
334
            return $this->matched(Category::XXX_XVID, 0.8, 'xvid');
335
        }
336
337
        return null;
338
    }
339
340
    protected function checkImageset(string $name): ?CategorizationResult
341
    {
342
        if (preg_match('/IMAGESET|PICTURESET|ABPEA/i', $name)) {
343
            return $this->matched(Category::XXX_IMAGESET, 0.9, 'imageset');
344
        }
345
346
        return null;
347
    }
348
349
    protected function checkWMV(string $name): ?CategorizationResult
350
    {
351
        // Exclude modern formats
352
        if (preg_match('/\b(720p|1080p|2160p|x264|x265|h264|h265|hevc|XviD|MP4-|\.mp4)[._ -]/i', $name)) {
353
            return null;
354
        }
355
356
        if (preg_match('/\b(WMV|Windows\s?Media\s?Video)\b|\.wmv$|[._ -]wmv[._ -]/i', $name)) {
357
            return $this->matched(Category::XXX_WMV, 0.8, 'wmv');
358
        }
359
360
        return null;
361
    }
362
363
    protected function checkDVD(string $name): ?CategorizationResult
364
    {
365
        if (preg_match('/dvdr[^i]|dvd[59]/i', $name)) {
366
            return $this->matched(Category::XXX_DVD, 0.85, 'dvd');
367
        }
368
369
        return null;
370
    }
371
372
    protected function checkOther(string $name): ?CategorizationResult
373
    {
374
        if (preg_match('/[._ -]Brazzers|Creampie|[._ -]JAV[._ -]|North\.Pole|^Nubiles|She[._ -]?Male|Transsexual|OLDER ANGELS/i', $name)) {
375
            return $this->matched(Category::XXX_OTHER, 0.7, 'other');
376
        }
377
378
        return null;
379
    }
380
}
381
382