@@ -15,7 +15,7 @@ |
||
| 15 | 15 | public function register(): void |
| 16 | 16 | { |
| 17 | 17 | // Register the pipeline as a singleton |
| 18 | - $this->app->singleton(CategorizationPipeline::class, function ($app) { |
|
| 18 | + $this->app->singleton(CategorizationPipeline::class, function($app) { |
|
| 19 | 19 | return CategorizationPipeline::createDefault(); |
| 20 | 20 | }); |
| 21 | 21 | |
@@ -85,7 +85,7 @@ |
||
| 85 | 85 | { |
| 86 | 86 | $categorizers = $this->pipeline->getCategorizers(); |
| 87 | 87 | |
| 88 | - return $categorizers->map(function ($categorizer) { |
|
| 88 | + return $categorizers->map(function($categorizer) { |
|
| 89 | 89 | return [ |
| 90 | 90 | 'name' => $categorizer->getName(), |
| 91 | 91 | 'priority' => $categorizer->getPriority(), |
@@ -38,9 +38,9 @@ discard block |
||
| 38 | 38 | protected function checkTechnical(string $name): ?CategorizationResult |
| 39 | 39 | { |
| 40 | 40 | $publishers = 'Apress|Addison[._ -]Wesley|Manning|No[._ -]Starch|OReilly|Packt|Pragmatic|Wiley|Wrox'; |
| 41 | - if (preg_match('/\b(' . $publishers . ')\b/i', $name)) return $this->matched(Category::BOOKS_TECHNICAL, 0.9, 'technical_publisher'); |
|
| 41 | + if (preg_match('/\b('.$publishers.')\b/i', $name)) return $this->matched(Category::BOOKS_TECHNICAL, 0.9, 'technical_publisher'); |
|
| 42 | 42 | $subjects = 'Programming|Python|JavaScript|Java|Database|Linux|DevOps|Machine[._ -]Learning|Data[._ -]Science'; |
| 43 | - if (preg_match('/\b(' . $subjects . ')\b/i', $name) && preg_match('/\b(Book|Guide|Tutorial|Learn)\b/i', $name)) { |
|
| 43 | + if (preg_match('/\b('.$subjects.')\b/i', $name) && preg_match('/\b(Book|Guide|Tutorial|Learn)\b/i', $name)) { |
|
| 44 | 44 | return $this->matched(Category::BOOKS_TECHNICAL, 0.85, 'technical_subject'); |
| 45 | 45 | } |
| 46 | 46 | return null; |
@@ -49,14 +49,14 @@ discard block |
||
| 49 | 49 | { |
| 50 | 50 | if (preg_match('/[._ -](Monthly|Weekly|Annual|Quarterly|Issue)[._ -]/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.9, 'magazine_frequency'); |
| 51 | 51 | $magazines = 'Forbes|Fortune|GQ|National[._ -]Geographic|Newsweek|Time|Vogue|Wired|PC[._ -]Gamer'; |
| 52 | - if (preg_match('/\b(' . $magazines . ')\b/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.85, 'magazine_title'); |
|
| 52 | + if (preg_match('/\b('.$magazines.')\b/i', $name)) return $this->matched(Category::BOOKS_MAGAZINES, 0.85, 'magazine_title'); |
|
| 53 | 53 | return null; |
| 54 | 54 | } |
| 55 | 55 | protected function checkEbook(string $name): ?CategorizationResult |
| 56 | 56 | { |
| 57 | 57 | $formats = 'EPUB|MOBI|AZW\d?|PDF|FB2|DJVU|LIT'; |
| 58 | - if (preg_match('/\.(' . $formats . ')$/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.9, 'ebook_format'); |
|
| 59 | - if (preg_match('/\b(' . $formats . ')\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.85, 'ebook_indicator'); |
|
| 58 | + if (preg_match('/\.('.$formats.')$/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.9, 'ebook_format'); |
|
| 59 | + if (preg_match('/\b('.$formats.')\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.85, 'ebook_indicator'); |
|
| 60 | 60 | if (preg_match('/\b(E-?book|Kindle|Kobo|Nook)\b/i', $name)) return $this->matched(Category::BOOKS_EBOOK, 0.8, 'ebook_platform'); |
| 61 | 61 | return null; |
| 62 | 62 | } |
@@ -107,12 +107,12 @@ discard block |
||
| 107 | 107 | } |
| 108 | 108 | |
| 109 | 109 | // Check for known studios/sites |
| 110 | - if (preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name)) { |
|
| 110 | + if (preg_match('/\b('.self::KNOWN_STUDIOS.')\b/i', $name)) { |
|
| 111 | 111 | return true; |
| 112 | 112 | } |
| 113 | 113 | |
| 114 | 114 | // Check for adult content indicators combined with video markers |
| 115 | - if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) && |
|
| 115 | + if (preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name) && |
|
| 116 | 116 | preg_match('/\b(720p|1080p|2160p|4k|mp4|mkv|avi|wmv)\b/i', $name)) { |
| 117 | 117 | return true; |
| 118 | 118 | } |
@@ -121,7 +121,7 @@ discard block |
||
| 121 | 121 | // This pattern is very common for adult sites but rare for regular content |
| 122 | 122 | if (preg_match('/^[A-Za-z]+[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ][A-Za-z]/i', $name)) { |
| 123 | 123 | // Check it's not a TV daily show by checking for adult keywords or specific patterns |
| 124 | - if (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) { |
|
| 124 | + if (preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name)) { |
|
| 125 | 125 | return true; |
| 126 | 126 | } |
| 127 | 127 | // Check for performer name patterns (firstname.lastname) after the date |
@@ -160,16 +160,16 @@ discard block |
||
| 160 | 160 | |
| 161 | 161 | // Require either a VR site token or explicit VR180/VR360 |
| 162 | 162 | if (!preg_match('/\bVR(?:180|360)\b/i', $name) && |
| 163 | - !preg_match('/\b(' . self::VR_SITES . ')\b/i', $name)) { |
|
| 163 | + !preg_match('/\b('.self::VR_SITES.')\b/i', $name)) { |
|
| 164 | 164 | return null; |
| 165 | 165 | } |
| 166 | 166 | |
| 167 | 167 | // VR pattern matching |
| 168 | - $vrPattern = '/\b(' . self::VR_SITES . ')\b|\bVR(?:180|360)\b|\b(?:5K|6K|7K|8K)\b.*\bVR\b|\b(?:GearVR|Oculus|Quest[123]?|PSVR|Vive|Index|Pimax)\b/i'; |
|
| 168 | + $vrPattern = '/\b('.self::VR_SITES.')\b|\bVR(?:180|360)\b|\b(?:5K|6K|7K|8K)\b.*\bVR\b|\b(?:GearVR|Oculus|Quest[123]?|PSVR|Vive|Index|Pimax)\b/i'; |
|
| 169 | 169 | |
| 170 | 170 | if (preg_match($vrPattern, $name)) { |
| 171 | 171 | // Verify XXX content |
| 172 | - if (preg_match('/\b(' . self::VR_SITES . ')\b/i', $name) || preg_match('/\bXXX\b/i', $name)) { |
|
| 172 | + if (preg_match('/\b('.self::VR_SITES.')\b/i', $name) || preg_match('/\bXXX\b/i', $name)) { |
|
| 173 | 173 | return $this->matched(Category::XXX_VR, 0.95, 'vr'); |
| 174 | 174 | } |
| 175 | 175 | } |
@@ -185,7 +185,7 @@ discard block |
||
| 185 | 185 | |
| 186 | 186 | // Check for adult markers |
| 187 | 187 | $hasAdultMarker = preg_match('/\bXXX\b/i', $name) || |
| 188 | - preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', strtolower($name)) || |
|
| 188 | + preg_match('/\b('.self::KNOWN_STUDIOS.')\b/i', strtolower($name)) || |
|
| 189 | 189 | preg_match('/\b(Hardcore|Porn|Sex|Anal|Creampie|MILF|Lesbian|Teen|Interracial)\b/i', $name); |
| 190 | 190 | |
| 191 | 191 | if (!$hasAdultMarker) { |
@@ -216,12 +216,12 @@ discard block |
||
| 216 | 216 | $hasHD = preg_match('/\b(720p|1080p|2160p|HD|4K)\b/i', $name); |
| 217 | 217 | |
| 218 | 218 | // Studio + performer + HD resolution |
| 219 | - if (preg_match('/^(' . self::KNOWN_STUDIOS . ')\.([A-Z][a-z]+).*?(720p|1080p|2160p|HD|4K)/i', $name)) { |
|
| 219 | + if (preg_match('/^('.self::KNOWN_STUDIOS.')\.([A-Z][a-z]+).*?(720p|1080p|2160p|HD|4K)/i', $name)) { |
|
| 220 | 220 | return $this->matched(Category::XXX_CLIPHD, 0.9, 'clip_hd_studio'); |
| 221 | 221 | } |
| 222 | 222 | |
| 223 | 223 | // Known studio with date pattern: site.YYYY.MM.DD or site.YY.MM.DD |
| 224 | - if (preg_match('/^(' . self::KNOWN_STUDIOS . ')[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}/i', $name)) { |
|
| 224 | + if (preg_match('/^('.self::KNOWN_STUDIOS.')[.\-_ ](19|20)?\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}/i', $name)) { |
|
| 225 | 225 | if ($hasHD) { |
| 226 | 226 | return $this->matched(Category::XXX_CLIPHD, 0.95, 'clip_hd_studio_date'); |
| 227 | 227 | } |
@@ -233,7 +233,7 @@ discard block |
||
| 233 | 233 | if (preg_match('/^([A-Z][a-zA-Z0-9]+)[.\-_ ](19|20)\d{2}[.\-_ ]\d{2}[.\-_ ]\d{2}[.\-_ ]/i', $name) && |
| 234 | 234 | !preg_match('/\b(S\d{2}E\d{2}|Documentary|Series)\b/i', $name)) { |
| 235 | 235 | // Check if it has adult keywords or HD resolution |
| 236 | - if ($hasHD || preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name)) { |
|
| 236 | + if ($hasHD || preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name)) { |
|
| 237 | 237 | return $this->matched(Category::XXX_CLIPHD, 0.85, 'clip_hd_date_4digit'); |
| 238 | 238 | } |
| 239 | 239 | } |
@@ -292,8 +292,8 @@ discard block |
||
| 292 | 292 | } |
| 293 | 293 | |
| 294 | 294 | if (preg_match('/web[._ -]dl|web-?rip/i', $name) && |
| 295 | - (preg_match('/\b(' . self::ADULT_KEYWORDS . ')\b/i', $name) || |
|
| 296 | - preg_match('/\b(' . self::KNOWN_STUDIOS . ')\b/i', $name) || |
|
| 295 | + (preg_match('/\b('.self::ADULT_KEYWORDS.')\b/i', $name) || |
|
| 296 | + preg_match('/\b('.self::KNOWN_STUDIOS.')\b/i', $name) || |
|
| 297 | 297 | preg_match('/\b(XXX|Porn|Adult|JAV|Hentai)\b/i', $name))) { |
| 298 | 298 | return $this->matched(Category::XXX_WEBDL, 0.85, 'webdl'); |
| 299 | 299 | } |
@@ -108,7 +108,7 @@ |
||
| 108 | 108 | } |
| 109 | 109 | |
| 110 | 110 | // Check for PC game patterns |
| 111 | - $pattern = '/(?:(?:^|[\s\._-])(?:' . self::PC_GROUPS . ')(?:$|[\s\._-])|' . self::PC_KEYWORDS . ')/i'; |
|
| 111 | + $pattern = '/(?:(?:^|[\s\._-])(?:'.self::PC_GROUPS.')(?:$|[\s\._-])|'.self::PC_KEYWORDS.')/i'; |
|
| 112 | 112 | |
| 113 | 113 | if (preg_match($pattern, $name)) { |
| 114 | 114 | return $this->matched(Category::PC_GAMES, 0.9, 'pc_game'); |
@@ -65,7 +65,7 @@ |
||
| 65 | 65 | |
| 66 | 66 | protected function checkForeign(string $name): bool |
| 67 | 67 | { |
| 68 | - return (bool) preg_match('/(?:^|[\s\.\-_])(?:' . self::FOREIGN_LANGUAGES . '|' . self::LANGUAGE_CODES . ')(?:$|[\s\.\-_])/i', $name); |
|
| 68 | + return (bool) preg_match('/(?:^|[\s\.\-_])(?:'.self::FOREIGN_LANGUAGES.'|'.self::LANGUAGE_CODES.')(?:$|[\s\.\-_])/i', $name); |
|
| 69 | 69 | } |
| 70 | 70 | |
| 71 | 71 | protected function checkAudiobook(string $name): ?CategorizationResult |
@@ -60,7 +60,7 @@ discard block |
||
| 60 | 60 | |
| 61 | 61 | if (isset($result['debug'])) { |
| 62 | 62 | $this->info("Matched By: {$result['debug']['matched_by']}"); |
| 63 | - $this->info("Confidence: " . ($result['debug']['final_confidence'] ?? 'N/A')); |
|
| 63 | + $this->info("Confidence: ".($result['debug']['final_confidence'] ?? 'N/A')); |
|
| 64 | 64 | } |
| 65 | 65 | } |
| 66 | 66 | |
@@ -121,7 +121,7 @@ discard block |
||
| 121 | 121 | foreach ($samples as $sample) { |
| 122 | 122 | $comparison = $service->compare(0, $sample); |
| 123 | 123 | $results[] = [ |
| 124 | - substr($sample, 0, 50) . (strlen($sample) > 50 ? '...' : ''), |
|
| 124 | + substr($sample, 0, 50).(strlen($sample) > 50 ? '...' : ''), |
|
| 125 | 125 | $comparison['pipeline']['category_name'], |
| 126 | 126 | $comparison['legacy']['category_name'], |
| 127 | 127 | $comparison['match'] ? '✓' : '✗', |