@@ -15,7 +15,7 @@ |
||
| 15 | 15 | public function register(): void |
| 16 | 16 | { |
| 17 | 17 | // Register the pipeline as a singleton |
| 18 | - $this->app->singleton(CategorizationPipeline::class, function ($app) { |
|
| 18 | + $this->app->singleton(CategorizationPipeline::class, function($app) { |
|
| 19 | 19 | return CategorizationPipeline::createDefault(); |
| 20 | 20 | }); |
| 21 | 21 | |
@@ -85,7 +85,7 @@ |
||
| 85 | 85 | { |
| 86 | 86 | $categorizers = $this->pipeline->getCategorizers(); |
| 87 | 87 | |
| 88 | - return $categorizers->map(function ($categorizer) { |
|
| 88 | + return $categorizers->map(function($categorizer) { |
|
| 89 | 89 | return [ |
| 90 | 90 | 'name' => $categorizer->getName(), |
| 91 | 91 | 'priority' => $categorizer->getPriority(), |
@@ -38,9 +38,9 @@ discard block |
||
| 38 | 38 | protected function checkTechnical(string $name): ?CategorizationResult |
| 39 | 39 | { |
| 40 | 40 | $publishers = 'Apress|Addison[._ -]Wesley|Manning|No[._ -]Starch|OReilly|Packt|Pragmatic|Wiley|Wrox'; |
| 41 | - if (preg_match('/\b(' . $publishers . ')\b/i', $name)) return $this->matched(Category::BOOKS_TECHNICAL, 0.9, 'technical_publisher'); |
|
| 41 | + if (preg_match('/\b('.$publishers.')\b/i', $name)) return $this->matched(Category::BOOKS_TECHNICAL, 0.9, 'technical_publisher'); |
|
| 42 | 42 | $subjects = 'Programming|Python|JavaScript|Java|Database|Linux|DevOps|Machine[._ -]Learning|Data[._ -]Science'; |
| 43 | - if (preg_match('/\b(' . $subjects . ')\b/i', $name) && preg_match('/\b(Book|Guide|Tutorial|Learn)\b/i', $name)) { |
|
| 43 | + if (preg_match('/\b('.$subjects.')\b/i', $name) && preg_match('/\b(Book|Guide|Tutorial|Learn)\b/i', $name)) { |
|
| 44 | 44 | return $this->matched(Category::BOOKS_TECHNICAL, 0.85, 'technical_subject'); |
| 45 | 45 | } |
| 46 | 46 | return null; |
@@ -49,14 +49,14 @@ discard block |
||
| 49 | 49 | { |
| 50 | 50 | if (preg_match('/[._ -](Monthly|Weekly|Annual|Quarterly|Issue)[._ -]/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.9, 'magazine_frequency'); |
| 51 | 51 | $magazines = 'Forbes|Fortune|GQ|National[._ -]Geographic|Newsweek|Time|Vogue|Wired|PC[._ -]Gamer'; |
| 52 | - if (preg_match('/\b(' . $magazines . ')\b/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.85, 'magazine_title'); |
|
| 52 | + if (preg_match('/\b('.$magazines.')\b/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.85, 'magazine_title'); |
|
| 53 | 53 | return null; |
| 54 | 54 | } |
| 55 | 55 | protected function checkEbook(string $name): ?CategorizationResult |
| 56 | 56 | { |
| 57 | 57 | $formats = 'EPUB|MOBI|AZW\d?|PDF|FB2|DJVU|LIT'; |
| 58 | - if (preg_match('/\.(' . $formats . ')$/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.9, 'ebook_format'); |
|
| 59 | - if (preg_match('/\b(' . $formats . ')\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.85, 'ebook_indicator'); |
|
| 58 | + if (preg_match('/\.('.$formats.')$/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.9, 'ebook_format'); |
|
| 59 | + if (preg_match('/\b('.$formats.')\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.85, 'ebook_indicator'); |
|
| 60 | 60 | if (preg_match('/\b(E-?book|Kindle|Kobo|Nook)\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.8, 'ebook_platform'); |
| 61 | 61 | return null; |
| 62 | 62 | } |
@@ -9,36 +9,62 @@ discard block |
||
| 9 | 9 | public function getName(): string { return 'Book'; } |
| 10 | 10 | public function shouldSkip(ReleaseContext $context): bool |
| 11 | 11 | { |
| 12 | - if ($context->hasAdultMarkers()) return true; |
|
| 13 | - if (preg_match('/\.PS4-[A-Z0-9]+$/i', $context->releaseName)) return true; |
|
| 14 | - if (preg_match('/\b(?:PS[1-5]|PlayStation|Xbox|Switch|Nintendo|Wii|3DS|GameCube)\b/i', $context->releaseName)) return true; |
|
| 12 | + if ($context->hasAdultMarkers()) { |
|
| 13 | + return true; |
|
| 14 | + } |
|
| 15 | + if (preg_match('/\.PS4-[A-Z0-9]+$/i', $context->releaseName)) { |
|
| 16 | + return true; |
|
| 17 | + } |
|
| 18 | + if (preg_match('/\b(?:PS[1-5]|PlayStation|Xbox|Switch|Nintendo|Wii|3DS|GameCube)\b/i', $context->releaseName)) { |
|
| 19 | + return true; |
|
| 20 | + } |
|
| 15 | 21 | // Skip TV shows (season patterns) |
| 16 | - if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) return true; |
|
| 22 | + if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) { |
|
| 23 | + return true; |
|
| 24 | + } |
|
| 17 | 25 | // Skip movies (year + quality patterns) |
| 18 | - if (preg_match('/\b(19|20)\d{2}\b.*\b(1080p|720p|2160p|BluRay|WEB-DL|BDRip|DVDRip)\b/i', $context->releaseName)) return true; |
|
| 26 | + if (preg_match('/\b(19|20)\d{2}\b.*\b(1080p|720p|2160p|BluRay|WEB-DL|BDRip|DVDRip)\b/i', $context->releaseName)) { |
|
| 27 | + return true; |
|
| 28 | + } |
|
| 19 | 29 | return false; |
| 20 | 30 | } |
| 21 | 31 | public function categorize(ReleaseContext $context): CategorizationResult |
| 22 | 32 | { |
| 23 | 33 | $name = $context->releaseName; |
| 24 | - if ($result = $this->checkComic($name)) return $result; |
|
| 25 | - if ($result = $this->checkTechnical($name)) return $result; |
|
| 26 | - if ($result = $this->checkMagazine($name)) return $result; |
|
| 27 | - if ($result = $this->checkEbook($name)) return $result; |
|
| 34 | + if ($result = $this->checkComic($name)) { |
|
| 35 | + return $result; |
|
| 36 | + } |
|
| 37 | + if ($result = $this->checkTechnical($name)) { |
|
| 38 | + return $result; |
|
| 39 | + } |
|
| 40 | + if ($result = $this->checkMagazine($name)) { |
|
| 41 | + return $result; |
|
| 42 | + } |
|
| 43 | + if ($result = $this->checkEbook($name)) { |
|
| 44 | + return $result; |
|
| 45 | + } |
|
| 28 | 46 | return $this->noMatch(); |
| 29 | 47 | } |
| 30 | 48 | protected function checkComic(string $name): ?CategorizationResult |
| 31 | 49 | { |
| 32 | - if (preg_match('/\b(?:CBR|CBZ|C2C)\b|\.(?:cbr|cbz)$/i', $name)) return $this->matched(Category::BOOKS_COMICS, 0.9, 'comic_format'); |
|
| 50 | + if (preg_match('/\b(?:CBR|CBZ|C2C)\b|\.(?:cbr|cbz)$/i', $name)) { |
|
| 51 | + return $this->matched(Category::BOOKS_COMICS, 0.9, 'comic_format'); |
|
| 52 | + } |
|
| 33 | 53 | if (preg_match('/\b(?:Marvel|DC[._ -]Comics|Image[._ -]Comics|Dark[._ -]Horse|IDW)\b/i', $name) && |
| 34 | - preg_match('/\b(?:Comics?|Annual|Issue|Vol|TPB)\b/i', $name)) return $this->matched(Category::BOOKS_COMICS, 0.85, 'comic_publisher'); |
|
| 35 | - if (preg_match('/\b(?:Manga|Manhwa|Manhua|Webtoon)\b/i', $name)) return $this->matched(Category::BOOKS_COMICS, 0.85, 'manga'); |
|
| 54 | + preg_match('/\b(?:Comics?|Annual|Issue|Vol|TPB)\b/i', $name)) { |
|
| 55 | + return $this->matched(Category::BOOKS_COMICS, 0.85, 'comic_publisher'); |
|
| 56 | + } |
|
| 57 | + if (preg_match('/\b(?:Manga|Manhwa|Manhua|Webtoon)\b/i', $name)) { |
|
| 58 | + return $this->matched(Category::BOOKS_COMICS, 0.85, 'manga'); |
|
| 59 | + } |
|
| 36 | 60 | return null; |
| 37 | 61 | } |
| 38 | 62 | protected function checkTechnical(string $name): ?CategorizationResult |
| 39 | 63 | { |
| 40 | 64 | $publishers = 'Apress|Addison[._ -]Wesley|Manning|No[._ -]Starch|OReilly|Packt|Pragmatic|Wiley|Wrox'; |
| 41 | - if (preg_match('/\b(' . $publishers . ')\b/i', $name)) return $this->matched(Category::BOOKS_TECHNICAL, 0.9, 'technical_publisher'); |
|
| 65 | + if (preg_match('/\b(' . $publishers . ')\b/i', $name)) { |
|
| 66 | + return $this->matched(Category::BOOKS_TECHNICAL, 0.9, 'technical_publisher'); |
|
| 67 | + } |
|
| 42 | 68 | $subjects = 'Programming|Python|JavaScript|Java|Database|Linux|DevOps|Machine[._ -]Learning|Data[._ -]Science'; |
| 43 | 69 | if (preg_match('/\b(' . $subjects . ')\b/i', $name) && preg_match('/\b(Book|Guide|Tutorial|Learn)\b/i', $name)) { |
| 44 | 70 | return $this->matched(Category::BOOKS_TECHNICAL, 0.85, 'technical_subject'); |
@@ -47,17 +73,27 @@ discard block |
||
| 47 | 73 | } |
| 48 | 74 | protected function checkMagazine(string $name): ?CategorizationResult |
| 49 | 75 | { |
| 50 | - if (preg_match('/[._ -](Monthly|Weekly|Annual|Quarterly|Issue)[._ -]/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.9, 'magazine_frequency'); |
|
| 76 | + if (preg_match('/[._ -](Monthly|Weekly|Annual|Quarterly|Issue)[._ -]/i', $name)) { |
|
| 77 | + return $this->matched(Category::BOOKS_MAGAZINES, 0.9, 'magazine_frequency'); |
|
| 78 | + } |
|
| 51 | 79 | $magazines = 'Forbes|Fortune|GQ|National[._ -]Geographic|Newsweek|Time|Vogue|Wired|PC[._ -]Gamer'; |
| 52 | - if (preg_match('/\b(' . $magazines . ')\b/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.85, 'magazine_title'); |
|
| 80 | + if (preg_match('/\b(' . $magazines . ')\b/i', $name)) { |
|
| 81 | + return $this->matched(Category::BOOKS_MAGAZINES, 0.85, 'magazine_title'); |
|
| 82 | + } |
|
| 53 | 83 | return null; |
| 54 | 84 | } |
| 55 | 85 | protected function checkEbook(string $name): ?CategorizationResult |
| 56 | 86 | { |
| 57 | 87 | $formats = 'EPUB|MOBI|AZW\d?|PDF|FB2|DJVU|LIT'; |
| 58 | - if (preg_match('/\.(' . $formats . ')$/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.9, 'ebook_format'); |
|
| 59 | - if (preg_match('/\b(' . $formats . ')\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.85, 'ebook_indicator'); |
|
| 60 | - if (preg_match('/\b(E-?book|Kindle|Kobo|Nook)\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.8, 'ebook_platform'); |
|
| 88 | + if (preg_match('/\.(' . $formats . ')$/i', $name)) { |
|
| 89 | + return $this->matched(Category::BOOKS_EBOOK, 0.9, 'ebook_format'); |
|
| 90 | + } |
|
| 91 | + if (preg_match('/\b(' . $formats . ')\b/i', $name)) { |
|
| 92 | + return $this->matched(Category::BOOKS_EBOOK, 0.85, 'ebook_indicator'); |
|
| 93 | + } |
|
| 94 | + if (preg_match('/\b(E-?book|Kindle|Kobo|Nook)\b/i', $name)) { |
|
| 95 | + return $this->matched(Category::BOOKS_EBOOK, 0.8, 'ebook_platform'); |
|
| 96 | + } |
|
| 61 | 97 | return null; |
| 62 | 98 | } |
| 63 | 99 | } |
@@ -10,14 +10,30 @@ |
||
| 10 | 10 | public function categorize(ReleaseContext $context): CategorizationResult |
| 11 | 11 | { |
| 12 | 12 | $groupName = $context->groupName; |
| 13 | - if (empty($groupName)) return $this->noMatch(); |
|
| 14 | - if (preg_match('/alt\.binaries\..*?(tv|hdtv|tvseries)/i', $groupName)) return $this->matched(Category::TV_OTHER, 0.6, 'group_tv'); |
|
| 15 | - if (preg_match('/alt\.binaries\..*?(movies?|dvd|bluray|x264)/i', $groupName)) return $this->matched(Category::MOVIE_OTHER, 0.6, 'group_movie'); |
|
| 16 | - if (preg_match('/alt\.binaries\..*?(erotica|pictures\.erotica|xxx)/i', $groupName)) return $this->matched(Category::XXX_OTHER, 0.7, 'group_xxx'); |
|
| 17 | - if (preg_match('/alt\.binaries\..*?(sounds?|mp3|music|lossless)/i', $groupName)) return $this->matched(Category::MUSIC_OTHER, 0.6, 'group_music'); |
|
| 18 | - if (preg_match('/alt\.binaries\..*?(games?|console|psx|nintendo)/i', $groupName)) return $this->matched(Category::GAME_OTHER, 0.6, 'group_game'); |
|
| 19 | - if (preg_match('/alt\.binaries\..*?(warez|0day|apps?|software)/i', $groupName)) return $this->matched(Category::PC_0DAY, 0.6, 'group_pc'); |
|
| 20 | - if (preg_match('/alt\.binaries\..*?(e-?book|ebook|comics?)/i', $groupName)) return $this->matched(Category::BOOKS_EBOOK, 0.6, 'group_book'); |
|
| 13 | + if (empty($groupName)) { |
|
| 14 | + return $this->noMatch(); |
|
| 15 | + } |
|
| 16 | + if (preg_match('/alt\.binaries\..*?(tv|hdtv|tvseries)/i', $groupName)) { |
|
| 17 | + return $this->matched(Category::TV_OTHER, 0.6, 'group_tv'); |
|
| 18 | + } |
|
| 19 | + if (preg_match('/alt\.binaries\..*?(movies?|dvd|bluray|x264)/i', $groupName)) { |
|
| 20 | + return $this->matched(Category::MOVIE_OTHER, 0.6, 'group_movie'); |
|
| 21 | + } |
|
| 22 | + if (preg_match('/alt\.binaries\..*?(erotica|pictures\.erotica|xxx)/i', $groupName)) { |
|
| 23 | + return $this->matched(Category::XXX_OTHER, 0.7, 'group_xxx'); |
|
| 24 | + } |
|
| 25 | + if (preg_match('/alt\.binaries\..*?(sounds?|mp3|music|lossless)/i', $groupName)) { |
|
| 26 | + return $this->matched(Category::MUSIC_OTHER, 0.6, 'group_music'); |
|
| 27 | + } |
|
| 28 | + if (preg_match('/alt\.binaries\..*?(games?|console|psx|nintendo)/i', $groupName)) { |
|
| 29 | + return $this->matched(Category::GAME_OTHER, 0.6, 'group_game'); |
|
| 30 | + } |
|
| 31 | + if (preg_match('/alt\.binaries\..*?(warez|0day|apps?|software)/i', $groupName)) { |
|
| 32 | + return $this->matched(Category::PC_0DAY, 0.6, 'group_pc'); |
|
| 33 | + } |
|
| 34 | + if (preg_match('/alt\.binaries\..*?(e-?book|ebook|comics?)/i', $groupName)) { |
|
| 35 | + return $this->matched(Category::BOOKS_EBOOK, 0.6, 'group_book'); |
|
| 36 | + } |
|
| 21 | 37 | return $this->noMatch(); |
| 22 | 38 | } |
| 23 | 39 | } |
@@ -107,12 +107,12 @@ discard block |
||
| 107 | 107 | } |
| 108 | 108 | |
| 109 | 109 | // Check for known studios/sites |
| 110 | - if (preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name)) { |
|
| 110 | + if (preg_match('/\b('.self::KNOWN_STUDIOS.')\b/i', $name)) { |
|
| 111 | 111 | return true; |
| 112 | 112 | } |
| 113 | 113 | |
| 114 | 114 | // Check for adult content indicators combined with video markers |
| 115 | - if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) && |
|
| 115 | + if (preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name) && |
|
| 116 | 116 | preg_match('/\b(720p|1080p|2160p|4k|mp4|mkv|avi|wmv)\b/i', $name)) { |
| 117 | 117 | return true; |
| 118 | 118 | } |
@@ -121,7 +121,7 @@ discard block |
||
| 121 | 121 | // This pattern is very common for adult sites but rare for regular content |
| 122 | 122 | if (preg_match('/^[A-Za-z]+[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ][A-Za-z]/i', $name)) { |
| 123 | 123 | // Check it's not a TV daily show by checking for adult keywords or specific patterns |
| 124 | - if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) { |
|
| 124 | + if (preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name)) { |
|
| 125 | 125 | return true; |
| 126 | 126 | } |
| 127 | 127 | // Check for performer name patterns (firstname.lastname) after the date |
@@ -160,16 +160,16 @@ discard block |
||
| 160 | 160 | |
| 161 | 161 | // Require either a VR site token or explicit VR180/VR360 |
| 162 | 162 | if (!preg_match('/\bVR(?:180|360)\b/i', $name) && |
| 163 | - !preg_match('/\b(' . self::VR_SITES . ')\b/i', $name)) { |
|
| 163 | + !preg_match('/\b('.self::VR_SITES.')\b/i', $name)) { |
|
| 164 | 164 | return null; |
| 165 | 165 | } |
| 166 | 166 | |
| 167 | 167 | // VR pattern matching |
| 168 | - $vrPattern = '/\b(' . self::VR_SITES . ')\b|\bVR(?:180|360)\b|\b(?:5K|6K|7K|8K)\b.*\bVR\b|\b(?:GearVR|Oculus|Quest[123]?|PSVR|Vive|Index|Pimax)\b/i'; |
|
| 168 | + $vrPattern = '/\b('.self::VR_SITES.')\b|\bVR(?:180|360)\b|\b(?:5K|6K|7K|8K)\b.*\bVR\b|\b(?:GearVR|Oculus|Quest[123]?|PSVR|Vive|Index|Pimax)\b/i'; |
|
| 169 | 169 | |
| 170 | 170 | if (preg_match($vrPattern, $name)) { |
| 171 | 171 | // Verify XXX content |
| 172 | - if (preg_match('/\b(' . self::VR_SITES . ')\b/i', $name) || preg_match('/\bXXX\b/i', $name)) { |
|
| 172 | + if (preg_match('/\b('.self::VR_SITES.')\b/i', $name) || preg_match('/\bXXX\b/i', $name)) { |
|
| 173 | 173 | return $this->matched(Category::XXX_VR, 0.95, 'vr'); |
| 174 | 174 | } |
| 175 | 175 | } |
@@ -185,7 +185,7 @@ discard block |
||
| 185 | 185 | |
| 186 | 186 | // Check for adult markers |
| 187 | 187 | $hasAdultMarker = preg_match('/\bXXX\b/i', $name) || |
| 188 | - preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', strtolower($name)) || |
|
| 188 | + preg_match('/\b('.self::KNOWN_STUDIOS.')\b/i', strtolower($name)) || |
|
| 189 | 189 | preg_match('/\b(Hardcore|Porn|Sex|Anal|Creampie|MILF|Lesbian|Teen|Interracial)\b/i', $name); |
| 190 | 190 | |
| 191 | 191 | if (!$hasAdultMarker) { |
@@ -216,12 +216,12 @@ discard block |
||
| 216 | 216 | $hasHD = preg_match('/\b(720p|1080p|2160p|HD|4K)\b/i', $name); |
| 217 | 217 | |
| 218 | 218 | // Studio + performer + HD resolution |
| 219 | - if (preg_match('/^(' . self::KNOWN_STUDIOS . ')\.([A-Z][a-z]+).*?(720p|1080p|2160p|HD|4K)/i', $name)) { |
|
| 219 | + if (preg_match('/^('.self::KNOWN_STUDIOS.')\.([A-Z][a-z]+).*?(720p|1080p|2160p|HD|4K)/i', $name)) { |
|
| 220 | 220 | return $this->matched(Category::XXX_CLIPHD, 0.9, 'clip_hd_studio'); |
| 221 | 221 | } |
| 222 | 222 | |
| 223 | 223 | // Known studio with date pattern: site.YYYY.MM.DD or site.YY.MM.DD |
| 224 | - if (preg_match('/^(' . self::KNOWN_STUDIOS . ')[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}/i', $name)) { |
|
| 224 | + if (preg_match('/^('.self::KNOWN_STUDIOS.')[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}/i', $name)) { |
|
| 225 | 225 | if ($hasHD) { |
| 226 | 226 | return $this->matched(Category::XXX_CLIPHD, 0.95, 'clip_hd_studio_date'); |
| 227 | 227 | } |
@@ -233,7 +233,7 @@ discard block |
||
| 233 | 233 | if (preg_match('/^([A-Z][a-zA-Z0-9]+)[.\-_ ](19|20)\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ]/i', $name) && |
| 234 | 234 | !preg_match('/\b(S\d{2}E\d{2}|Documentary|Series)\b/i', $name)) { |
| 235 | 235 | // Check if it has adult keywords or HD resolution |
| 236 | - if ($hasHD || preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) { |
|
| 236 | + if ($hasHD || preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name)) { |
|
| 237 | 237 | return $this->matched(Category::XXX_CLIPHD, 0.85, 'clip_hd_date_4digit'); |
| 238 | 238 | } |
| 239 | 239 | } |
@@ -292,8 +292,8 @@ discard block |
||
| 292 | 292 | } |
| 293 | 293 | |
| 294 | 294 | if (preg_match('/web[._ -]dl|web-?rip/i', $name) && |
| 295 | - (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) || |
|
| 296 | - preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name) || |
|
| 295 | + (preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name) || |
|
| 296 | + preg_match('/\b('.self::KNOWN_STUDIOS.')\b/i', $name) || |
|
| 297 | 297 | preg_match('/\b(XXX|Porn|Adult|JAV|Hentai)\b/i', $name))) { |
| 298 | 298 | return $this->matched(Category::XXX_WEBDL, 0.85, 'webdl'); |
| 299 | 299 | } |
@@ -8,26 +8,54 @@ discard block |
||
| 8 | 8 | protected int $priority = 35; |
| 9 | 9 | public function getName(): string { return 'Console'; } |
| 10 | 10 | public function shouldSkip(ReleaseContext $context): bool { |
| 11 | - if ($context->hasAdultMarkers()) return true; |
|
| 11 | + if ($context->hasAdultMarkers()) { |
|
| 12 | + return true; |
|
| 13 | + } |
|
| 12 | 14 | // Skip TV shows (season patterns) |
| 13 | - if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) return true; |
|
| 15 | + if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) { |
|
| 16 | + return true; |
|
| 17 | + } |
|
| 14 | 18 | return false; |
| 15 | 19 | } |
| 16 | 20 | public function categorize(ReleaseContext $context): CategorizationResult |
| 17 | 21 | { |
| 18 | 22 | $name = $context->releaseName; |
| 19 | - if ($result = $this->checkPS4($name)) return $result; |
|
| 20 | - if ($result = $this->checkPS3($name)) return $result; |
|
| 21 | - if ($result = $this->checkPSVita($name)) return $result; |
|
| 22 | - if ($result = $this->checkPSP($name)) return $result; |
|
| 23 | - if ($result = $this->checkXboxOne($name)) return $result; |
|
| 24 | - if ($result = $this->checkXbox360($name)) return $result; |
|
| 25 | - if ($result = $this->checkXbox($name)) return $result; |
|
| 26 | - if ($result = $this->checkWiiU($name)) return $result; |
|
| 27 | - if ($result = $this->checkWii($name)) return $result; |
|
| 28 | - if ($result = $this->check3DS($name)) return $result; |
|
| 29 | - if ($result = $this->checkNDS($name)) return $result; |
|
| 30 | - if ($result = $this->checkOther($name)) return $result; |
|
| 23 | + if ($result = $this->checkPS4($name)) { |
|
| 24 | + return $result; |
|
| 25 | + } |
|
| 26 | + if ($result = $this->checkPS3($name)) { |
|
| 27 | + return $result; |
|
| 28 | + } |
|
| 29 | + if ($result = $this->checkPSVita($name)) { |
|
| 30 | + return $result; |
|
| 31 | + } |
|
| 32 | + if ($result = $this->checkPSP($name)) { |
|
| 33 | + return $result; |
|
| 34 | + } |
|
| 35 | + if ($result = $this->checkXboxOne($name)) { |
|
| 36 | + return $result; |
|
| 37 | + } |
|
| 38 | + if ($result = $this->checkXbox360($name)) { |
|
| 39 | + return $result; |
|
| 40 | + } |
|
| 41 | + if ($result = $this->checkXbox($name)) { |
|
| 42 | + return $result; |
|
| 43 | + } |
|
| 44 | + if ($result = $this->checkWiiU($name)) { |
|
| 45 | + return $result; |
|
| 46 | + } |
|
| 47 | + if ($result = $this->checkWii($name)) { |
|
| 48 | + return $result; |
|
| 49 | + } |
|
| 50 | + if ($result = $this->check3DS($name)) { |
|
| 51 | + return $result; |
|
| 52 | + } |
|
| 53 | + if ($result = $this->checkNDS($name)) { |
|
| 54 | + return $result; |
|
| 55 | + } |
|
| 56 | + if ($result = $this->checkOther($name)) { |
|
| 57 | + return $result; |
|
| 58 | + } |
|
| 31 | 59 | return $this->noMatch(); |
| 32 | 60 | } |
| 33 | 61 | protected function checkPS4(string $name): ?CategorizationResult |
@@ -40,17 +68,23 @@ discard block |
||
| 40 | 68 | } |
| 41 | 69 | protected function checkPS3(string $name): ?CategorizationResult |
| 42 | 70 | { |
| 43 | - if (preg_match('/\bPS3\b|PlayStation\s*3/i', $name)) return $this->matched(Category::GAME_PS3, 0.9, 'ps3'); |
|
| 71 | + if (preg_match('/\bPS3\b|PlayStation\s*3/i', $name)) { |
|
| 72 | + return $this->matched(Category::GAME_PS3, 0.9, 'ps3'); |
|
| 73 | + } |
|
| 44 | 74 | return null; |
| 45 | 75 | } |
| 46 | 76 | protected function checkPSVita(string $name): ?CategorizationResult |
| 47 | 77 | { |
| 48 | - if (preg_match('/\bPS\s?Vita\b|PSV(ita)?\b/i', $name)) return $this->matched(Category::GAME_PSVITA, 0.9, 'psvita'); |
|
| 78 | + if (preg_match('/\bPS\s?Vita\b|PSV(ita)?\b/i', $name)) { |
|
| 79 | + return $this->matched(Category::GAME_PSVITA, 0.9, 'psvita'); |
|
| 80 | + } |
|
| 49 | 81 | return null; |
| 50 | 82 | } |
| 51 | 83 | protected function checkPSP(string $name): ?CategorizationResult |
| 52 | 84 | { |
| 53 | - if (preg_match('/\bPSP\b|PlayStation\s*Portable/i', $name)) return $this->matched(Category::GAME_PSP, 0.9, 'psp'); |
|
| 85 | + if (preg_match('/\bPSP\b|PlayStation\s*Portable/i', $name)) { |
|
| 86 | + return $this->matched(Category::GAME_PSP, 0.9, 'psp'); |
|
| 87 | + } |
|
| 54 | 88 | return null; |
| 55 | 89 | } |
| 56 | 90 | protected function checkXboxOne(string $name): ?CategorizationResult |
@@ -62,7 +96,9 @@ discard block |
||
| 62 | 96 | } |
| 63 | 97 | protected function checkXbox360(string $name): ?CategorizationResult |
| 64 | 98 | { |
| 65 | - if (preg_match('/\b(Xbox360|XBOX360|X360)\b/i', $name)) return $this->matched(Category::GAME_XBOX360, 0.9, 'xbox360'); |
|
| 99 | + if (preg_match('/\b(Xbox360|XBOX360|X360)\b/i', $name)) { |
|
| 100 | + return $this->matched(Category::GAME_XBOX360, 0.9, 'xbox360'); |
|
| 101 | + } |
|
| 66 | 102 | return null; |
| 67 | 103 | } |
| 68 | 104 | protected function checkXbox(string $name): ?CategorizationResult |
@@ -74,22 +110,30 @@ discard block |
||
| 74 | 110 | } |
| 75 | 111 | protected function checkWiiU(string $name): ?CategorizationResult |
| 76 | 112 | { |
| 77 | - if (preg_match('/\bWii\s*U\b|WiiU/i', $name)) return $this->matched(Category::GAME_WIIU, 0.9, 'wiiu'); |
|
| 113 | + if (preg_match('/\bWii\s*U\b|WiiU/i', $name)) { |
|
| 114 | + return $this->matched(Category::GAME_WIIU, 0.9, 'wiiu'); |
|
| 115 | + } |
|
| 78 | 116 | return null; |
| 79 | 117 | } |
| 80 | 118 | protected function checkWii(string $name): ?CategorizationResult |
| 81 | 119 | { |
| 82 | - if (preg_match('/\bWii\b/i', $name) && !preg_match('/WiiU/i', $name)) return $this->matched(Category::GAME_WII, 0.85, 'wii'); |
|
| 120 | + if (preg_match('/\bWii\b/i', $name) && !preg_match('/WiiU/i', $name)) { |
|
| 121 | + return $this->matched(Category::GAME_WII, 0.85, 'wii'); |
|
| 122 | + } |
|
| 83 | 123 | return null; |
| 84 | 124 | } |
| 85 | 125 | protected function check3DS(string $name): ?CategorizationResult |
| 86 | 126 | { |
| 87 | - if (preg_match('/\b3DS\b|Nintendo\s*3DS/i', $name)) return $this->matched(Category::GAME_3DS, 0.9, '3ds'); |
|
| 127 | + if (preg_match('/\b3DS\b|Nintendo\s*3DS/i', $name)) { |
|
| 128 | + return $this->matched(Category::GAME_3DS, 0.9, '3ds'); |
|
| 129 | + } |
|
| 88 | 130 | return null; |
| 89 | 131 | } |
| 90 | 132 | protected function checkNDS(string $name): ?CategorizationResult |
| 91 | 133 | { |
| 92 | - if (preg_match('/\bNDS\b|Nintendo\s*DS/i', $name)) return $this->matched(Category::GAME_NDS, 0.9, 'nds'); |
|
| 134 | + if (preg_match('/\bNDS\b|Nintendo\s*DS/i', $name)) { |
|
| 135 | + return $this->matched(Category::GAME_NDS, 0.9, 'nds'); |
|
| 136 | + } |
|
| 93 | 137 | return null; |
| 94 | 138 | } |
| 95 | 139 | protected function checkOther(string $name): ?CategorizationResult |
@@ -108,7 +108,7 @@ |
||
| 108 | 108 | } |
| 109 | 109 | |
| 110 | 110 | // Check for PC game patterns |
| 111 | - $pattern = '/(?:(?:^|[\s\._-])(?:' . self::PC_GROUPS . ')(?:$|[\s\._-])|' . self::PC_KEYWORDS . ')/i'; |
|
| 111 | + $pattern = '/(?:(?:^|[\s\._-])(?:'.self::PC_GROUPS.')(?:$|[\s\._-])|'.self::PC_KEYWORDS.')/i'; |
|
| 112 | 112 | |
| 113 | 113 | if (preg_match($pattern, $name)) { |
| 114 | 114 | return $this->matched(Category::PC_GAMES, 0.9, 'pc_game'); |
@@ -26,9 +26,13 @@ |
||
| 26 | 26 | |
| 27 | 27 | public function shouldSkip(ReleaseContext $context): bool |
| 28 | 28 | { |
| 29 | - if ($context->hasAdultMarkers()) return true; |
|
| 29 | + if ($context->hasAdultMarkers()) { |
|
| 30 | + return true; |
|
| 31 | + } |
|
| 30 | 32 | // Skip TV shows (season patterns) |
| 31 | - if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) return true; |
|
| 33 | + if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) { |
|
| 34 | + return true; |
|
| 35 | + } |
|
| 32 | 36 | return false; |
| 33 | 37 | } |
| 34 | 38 | |
@@ -65,7 +65,7 @@ |
||
| 65 | 65 | |
| 66 | 66 | protected function checkForeign(string $name): bool |
| 67 | 67 | { |
| 68 | - return (bool) preg_match('/(?:^|[\s\.\-_])(?:' . self::FOREIGN_LANGUAGES . '|' . self::LANGUAGE_CODES . ')(?:$|[\s\.\-_])/i', $name); |
|
| 68 | + return (bool) preg_match('/(?:^|[\s\.\-_])(?:'.self::FOREIGN_LANGUAGES.'|'.self::LANGUAGE_CODES.')(?:$|[\s\.\-_])/i', $name); |
|
| 69 | 69 | } |
| 70 | 70 | |
| 71 | 71 | protected function checkAudiobook(string $name): ?CategorizationResult |
@@ -25,9 +25,13 @@ |
||
| 25 | 25 | |
| 26 | 26 | public function shouldSkip(ReleaseContext $context): bool |
| 27 | 27 | { |
| 28 | - if ($context->hasAdultMarkers()) return true; |
|
| 28 | + if ($context->hasAdultMarkers()) { |
|
| 29 | + return true; |
|
| 30 | + } |
|
| 29 | 31 | // Skip TV shows (season patterns) |
| 30 | - if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) return true; |
|
| 32 | + if (preg_match('/[._ -]S\d{1,3}[._ -]?(E\d|Complete|Full|1080|720|480|2160|WEB|HDTV|BluRay)/i', $context->releaseName)) { |
|
| 33 | + return true; |
|
| 34 | + } |
|
| 31 | 35 | return false; |
| 32 | 36 | } |
| 33 | 37 | |
@@ -60,7 +60,7 @@ discard block |
||
| 60 | 60 | |
| 61 | 61 | if (isset($result['debug'])) { |
| 62 | 62 | $this->info("Matched By: {$result['debug']['matched_by']}"); |
| 63 | - $this->info("Confidence: " . ($result['debug']['final_confidence'] ?? 'N/A')); |
|
| 63 | + $this->info("Confidence: ".($result['debug']['final_confidence'] ?? 'N/A')); |
|
| 64 | 64 | } |
| 65 | 65 | } |
| 66 | 66 | |
@@ -121,7 +121,7 @@ discard block |
||
| 121 | 121 | foreach ($samples as $sample) { |
| 122 | 122 | $comparison = $service->compare(0, $sample); |
| 123 | 123 | $results[] = [ |
| 124 | - substr($sample, 0, 50) . (strlen($sample) > 50 ? '...' : ''), |
|
| 124 | + substr($sample, 0, 50).(strlen($sample) > 50 ? '...' : ''), |
|
| 125 | 125 | $comparison['pipeline']['category_name'], |
| 126 | 126 | $comparison['legacy']['category_name'], |
| 127 | 127 | $comparison['match'] ? '✓' : '✗', |