NNTmux /
newznab-tmux
| 1 | <?php |
||||
| 2 | |||||
| 3 | namespace App\Console\Commands; |
||||
| 4 | |||||
| 5 | use App\Models\AnidbInfo; |
||||
| 6 | use App\Models\Category; |
||||
| 7 | use App\Models\Release; |
||||
| 8 | use Blacklight\PopulateAniList; |
||||
| 9 | use Illuminate\Console\Command; |
||||
| 10 | use Illuminate\Support\Facades\DB; |
||||
| 11 | |||||
| 12 | class RefreshAnimeData extends Command |
||||
| 13 | { |
||||
| 14 | /** |
||||
| 15 | * Conservative rate limit: 20 requests per minute (to stay well below AniList's 90/min limit). |
||||
| 16 | * This allows for multiple API calls per release (search + getById). |
||||
| 17 | */ |
||||
| 18 | private const RATE_LIMIT_PER_MINUTE = 20; |
||||
| 19 | |||||
| 20 | /** |
||||
| 21 | * Track API request timestamps for rate limiting. |
||||
| 22 | * |
||||
| 23 | * @var array<int> |
||||
| 24 | */ |
||||
| 25 | private array $requestTimestamps = []; |
||||
| 26 | |||||
| 27 | /** |
||||
| 28 | * The name and signature of the console command. |
||||
| 29 | * |
||||
| 30 | * @var string |
||||
| 31 | */ |
||||
| 32 | protected $signature = 'anime:refresh |
||||
| 33 | {--limit=0 : Maximum number of releases to process (0 = all)} |
||||
| 34 | {--chunk=100 : Process releases in chunks of this size} |
||||
| 35 | {--missing-only : Only refresh releases missing AniList data (no anilist_id)} |
||||
| 36 | {--retry-failed : Only refresh releases with anidbid <= 0 (failed processing: -1, -2, etc.)} |
||||
| 37 | {--force : Force refresh even if data exists}'; |
||||
| 38 | |||||
| 39 | /** |
||||
| 40 | * The console command description. |
||||
| 41 | * |
||||
| 42 | * @var string |
||||
| 43 | */ |
||||
| 44 | protected $description = 'Fetch and refresh AniList data for existing anime releases in TV->Anime category by matching release searchname'; |
||||
| 45 | |||||
| 46 | /** |
||||
| 47 | * Execute the console command. |
||||
| 48 | */ |
||||
| 49 | public function handle(): int |
||||
| 50 | { |
||||
| 51 | $limit = (int) $this->option('limit'); |
||||
| 52 | $chunkSize = (int) $this->option('chunk'); |
||||
| 53 | $missingOnly = $this->option('missing-only'); |
||||
| 54 | $retryFailed = $this->option('retry-failed'); |
||||
| 55 | $force = $this->option('force'); |
||||
| 56 | |||||
| 57 | $this->info('Starting AniList data refresh for anime releases...'); |
||||
| 58 | if ($retryFailed) { |
||||
| 59 | $this->info('Mode: Retrying failed releases (anidbid <= 0)...'); |
||||
| 60 | } elseif ($missingOnly) { |
||||
| 61 | $this->info('Mode: Missing AniList data only...'); |
||||
| 62 | } else { |
||||
| 63 | $this->info('Mode: All releases...'); |
||||
| 64 | } |
||||
| 65 | $this->info('Matching releases by searchname to AniList API...'); |
||||
| 66 | $this->newLine(); |
||||
| 67 | |||||
| 68 | // Build query for releases in TV_ANIME category |
||||
| 69 | $query = Release::query() |
||||
| 70 | ->select(['releases.id', 'releases.anidbid', 'releases.searchname']) |
||||
| 71 | ->where('categories_id', Category::TV_ANIME); |
||||
| 72 | |||||
| 73 | // If retry-failed, only get releases with anidbid <= 0 (failed processing) |
||||
| 74 | if ($retryFailed) { |
||||
| 75 | $query->where('releases.anidbid', '<=', 0); |
||||
| 76 | } |
||||
| 77 | |||||
| 78 | // If missing-only, only get releases without anilist_id |
||||
| 79 | if ($missingOnly) { |
||||
| 80 | $query->leftJoin('anidb_info as ai', 'ai.anidbid', '=', 'releases.anidbid') |
||||
| 81 | ->whereNull('ai.anilist_id'); |
||||
| 82 | } |
||||
| 83 | |||||
| 84 | // Get releases (not distinct anidbids, since we're matching by searchname) |
||||
| 85 | $releases = $query->orderBy('releases.id') |
||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 86 | ->get(); |
||||
| 87 | |||||
| 88 | $totalCount = $releases->count(); |
||||
| 89 | |||||
| 90 | if ($totalCount === 0) { |
||||
| 91 | $this->warn('No anime releases found to process.'); |
||||
| 92 | return self::SUCCESS; |
||||
| 93 | } |
||||
| 94 | |||||
| 95 | $this->info("Found {$totalCount} anime releases to process."); |
||||
| 96 | |||||
| 97 | if ($limit > 0) { |
||||
| 98 | $releases = $releases->take($limit); |
||||
| 99 | $totalCount = $releases->count(); |
||||
| 100 | $this->info("Processing {$totalCount} releases (limited)."); |
||||
| 101 | } |
||||
| 102 | |||||
| 103 | $this->newLine(); |
||||
| 104 | |||||
| 105 | $populateAniList = new PopulateAniList; |
||||
| 106 | $processed = 0; |
||||
| 107 | $successful = 0; |
||||
| 108 | $failed = 0; |
||||
| 109 | $skipped = 0; |
||||
| 110 | $notFound = 0; |
||||
| 111 | $failedSearchnames = []; // Track failed searchnames for summary |
||||
| 112 | |||||
| 113 | // Process in chunks |
||||
| 114 | $chunks = $releases->chunk($chunkSize); |
||||
| 115 | $progressBar = $this->output->createProgressBar($totalCount); |
||||
| 116 | $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% %elapsed:6s%/%estimated:-6s% %memory:6s% -- %message%'); |
||||
| 117 | $progressBar->setMessage('Starting...'); |
||||
| 118 | $progressBar->start(); |
||||
| 119 | |||||
| 120 | foreach ($chunks as $chunk) { |
||||
| 121 | foreach ($chunk as $release) { |
||||
| 122 | $searchname = $release->searchname ?? ''; |
||||
| 123 | $progressBar->setMessage("Processing: " . substr($searchname, 0, 50) . "..."); |
||||
| 124 | |||||
| 125 | try { |
||||
| 126 | // Extract clean title from searchname |
||||
| 127 | $titleData = $this->extractTitleFromSearchname($searchname); |
||||
| 128 | |||||
| 129 | if (empty($titleData) || empty($titleData['title'])) { |
||||
| 130 | $notFound++; |
||||
| 131 | $failedSearchnames[] = [ |
||||
| 132 | 'searchname' => $searchname, |
||||
| 133 | 'reason' => 'Failed to extract title', |
||||
| 134 | 'cleaned_title' => null, |
||||
| 135 | ]; |
||||
| 136 | if ($this->getOutput()->isVerbose()) { |
||||
| 137 | $this->newLine(); |
||||
| 138 | $this->warn("Failed to extract title from searchname: {$searchname}"); |
||||
| 139 | } |
||||
| 140 | $processed++; |
||||
| 141 | $progressBar->advance(); |
||||
| 142 | continue; |
||||
| 143 | } |
||||
| 144 | |||||
| 145 | $cleanTitle = $titleData['title']; |
||||
| 146 | |||||
| 147 | // Check if we should skip (if not forcing and data exists) |
||||
| 148 | // Don't skip if we're retrying failed releases (anidbid <= 0) |
||||
| 149 | if (! $force && ! $missingOnly && ! $retryFailed) { |
||||
| 150 | // Check if release already has complete AniList data |
||||
| 151 | if ($release->anidbid > 0) { |
||||
| 152 | $anidbInfo = DB::table('anidb_info') |
||||
| 153 | ->where('anidbid', $release->anidbid) |
||||
| 154 | ->whereNotNull('anilist_id') |
||||
| 155 | ->whereNotNull('country') |
||||
| 156 | ->whereNotNull('media_type') |
||||
| 157 | ->first(); |
||||
| 158 | |||||
| 159 | if ($anidbInfo) { |
||||
| 160 | $skipped++; |
||||
| 161 | $processed++; |
||||
| 162 | $progressBar->advance(); |
||||
| 163 | continue; |
||||
| 164 | } |
||||
| 165 | } |
||||
| 166 | } |
||||
| 167 | |||||
| 168 | // Search AniList for this title (with rate limiting) |
||||
| 169 | $this->enforceRateLimit(); |
||||
| 170 | $searchResults = $populateAniList->searchAnime($cleanTitle, 1); |
||||
| 171 | |||||
| 172 | if (! $searchResults || empty($searchResults)) { |
||||
| 173 | // Try with spaces replaced for broader matching |
||||
| 174 | $altTitle = preg_replace('/\s+/', ' ', $cleanTitle); |
||||
| 175 | if ($altTitle !== $cleanTitle) { |
||||
| 176 | $this->enforceRateLimit(); |
||||
| 177 | $searchResults = $populateAniList->searchAnime($altTitle, 1); |
||||
| 178 | } |
||||
| 179 | } |
||||
| 180 | |||||
| 181 | if (! $searchResults || empty($searchResults)) { |
||||
| 182 | $notFound++; |
||||
| 183 | $failedSearchnames[] = [ |
||||
| 184 | 'searchname' => $searchname, |
||||
| 185 | 'reason' => 'No AniList match found', |
||||
| 186 | 'cleaned_title' => $cleanTitle, |
||||
| 187 | ]; |
||||
| 188 | if ($this->getOutput()->isVerbose()) { |
||||
| 189 | $this->newLine(); |
||||
| 190 | $this->warn("No AniList match found for:"); |
||||
| 191 | $this->line(" Searchname: {$searchname}"); |
||||
| 192 | $this->line(" Cleaned title: {$cleanTitle}"); |
||||
| 193 | } |
||||
| 194 | $processed++; |
||||
| 195 | $progressBar->advance(); |
||||
| 196 | continue; |
||||
| 197 | } |
||||
| 198 | |||||
| 199 | $anilistData = $searchResults[0]; |
||||
| 200 | $anilistId = $anilistData['id'] ?? null; |
||||
| 201 | |||||
| 202 | if (! $anilistId) { |
||||
| 203 | $notFound++; |
||||
| 204 | $failedSearchnames[] = [ |
||||
| 205 | 'searchname' => $searchname, |
||||
| 206 | 'reason' => 'AniList result missing ID', |
||||
| 207 | 'cleaned_title' => $cleanTitle, |
||||
| 208 | ]; |
||||
| 209 | if ($this->getOutput()->isVerbose()) { |
||||
| 210 | $this->newLine(); |
||||
| 211 | $this->warn("AniList search returned result but no ID for:"); |
||||
| 212 | $this->line(" Searchname: {$searchname}"); |
||||
| 213 | $this->line(" Cleaned title: {$cleanTitle}"); |
||||
| 214 | } |
||||
| 215 | $processed++; |
||||
| 216 | $progressBar->advance(); |
||||
| 217 | continue; |
||||
| 218 | } |
||||
| 219 | |||||
| 220 | // Fetch full data from AniList and insert/update (with rate limiting) |
||||
| 221 | // This will create/update anidb_info entry using anilist_id as anidbid if needed |
||||
| 222 | $this->enforceRateLimit(); |
||||
| 223 | $populateAniList->populateTable('info', $anilistId); |
||||
| 224 | |||||
| 225 | // Get the anidbid that was created/updated (it uses anilist_id as anidbid) |
||||
| 226 | $anidbid = AnidbInfo::query() |
||||
| 227 | ->where('anilist_id', $anilistId) |
||||
| 228 | ->value('anidbid'); |
||||
| 229 | |||||
| 230 | if (! $anidbid) { |
||||
| 231 | // Fallback: use anilist_id as anidbid |
||||
| 232 | $anidbid = (int) $anilistId; |
||||
| 233 | } |
||||
| 234 | |||||
| 235 | // Update release with the anidbid |
||||
| 236 | Release::query() |
||||
| 237 | ->where('id', $release->id) |
||||
| 238 | ->update(['anidbid' => $anidbid]); |
||||
| 239 | |||||
| 240 | $successful++; |
||||
| 241 | } catch (\Exception $e) { |
||||
| 242 | // Check if this is a 429 rate limit error |
||||
| 243 | if (str_contains($e->getMessage(), '429') || str_contains($e->getMessage(), 'rate limit exceeded')) { |
||||
| 244 | $this->newLine(); |
||||
| 245 | $this->error('AniList API rate limit exceeded (429). Stopping processing for 15 minutes.'); |
||||
| 246 | $this->warn('Please wait 15 minutes before running this command again.'); |
||||
| 247 | $progressBar->finish(); |
||||
| 248 | $this->newLine(); |
||||
| 249 | |||||
| 250 | // Show summary of what was processed before the error |
||||
| 251 | $this->info('Summary (before rate limit error):'); |
||||
| 252 | $this->table( |
||||
| 253 | ['Status', 'Count'], |
||||
| 254 | [ |
||||
| 255 | ['Total Processed', $processed], |
||||
| 256 | ['Successful', $successful], |
||||
| 257 | ['Failed', $failed], |
||||
| 258 | ['Not Found', $notFound], |
||||
| 259 | ['Skipped', $skipped], |
||||
| 260 | ] |
||||
| 261 | ); |
||||
| 262 | |||||
| 263 | // Show failed searchnames if any |
||||
| 264 | if (!empty($failedSearchnames)) { |
||||
| 265 | $this->newLine(); |
||||
| 266 | $this->warn("Failed searchnames (before rate limit error):"); |
||||
| 267 | $this->line("Showing up to 10 examples:"); |
||||
| 268 | $examples = array_slice($failedSearchnames, 0, 10); |
||||
| 269 | foreach ($examples as $item) { |
||||
| 270 | $cleanedTitle = $item['cleaned_title'] ?? '(extraction failed)'; |
||||
| 271 | $this->line(" - {$item['searchname']} -> {$cleanedTitle} ({$item['reason']})"); |
||||
| 272 | } |
||||
| 273 | if (count($failedSearchnames) > 10) { |
||||
| 274 | $this->line(" ... and " . (count($failedSearchnames) - 10) . " more."); |
||||
| 275 | } |
||||
| 276 | } |
||||
| 277 | |||||
| 278 | return self::FAILURE; |
||||
| 279 | } |
||||
| 280 | |||||
| 281 | $failed++; |
||||
| 282 | if ($this->getOutput()->isVerbose()) { |
||||
| 283 | $this->newLine(); |
||||
| 284 | $this->error("Error processing release ID {$release->id}: " . $e->getMessage()); |
||||
| 285 | } |
||||
| 286 | } |
||||
| 287 | |||||
| 288 | $processed++; |
||||
| 289 | $progressBar->advance(); |
||||
| 290 | } |
||||
| 291 | } |
||||
| 292 | |||||
| 293 | $progressBar->setMessage('Complete!'); |
||||
| 294 | $progressBar->finish(); |
||||
| 295 | $this->newLine(2); |
||||
| 296 | |||||
| 297 | // Summary |
||||
| 298 | $this->info('Summary:'); |
||||
| 299 | $this->table( |
||||
| 300 | ['Status', 'Count'], |
||||
| 301 | [ |
||||
| 302 | ['Total Processed', $processed], |
||||
| 303 | ['Successful', $successful], |
||||
| 304 | ['Failed', $failed], |
||||
| 305 | ['Not Found', $notFound], |
||||
| 306 | ['Skipped', $skipped], |
||||
| 307 | ] |
||||
| 308 | ); |
||||
| 309 | |||||
| 310 | // Show failed searchnames if any |
||||
| 311 | if (!empty($failedSearchnames) && $notFound > 0) { |
||||
| 312 | $this->newLine(); |
||||
| 313 | $this->warn("Failed to fetch data for {$notFound} release(s):"); |
||||
| 314 | $this->newLine(); |
||||
| 315 | |||||
| 316 | // Show up to 20 examples |
||||
| 317 | $examples = array_slice($failedSearchnames, 0, 20); |
||||
| 318 | $rows = []; |
||||
| 319 | foreach ($examples as $item) { |
||||
| 320 | $cleanedTitle = $item['cleaned_title'] ?? '(extraction failed)'; |
||||
| 321 | $rows[] = [ |
||||
| 322 | substr($item['searchname'], 0, 60) . (strlen($item['searchname']) > 60 ? '...' : ''), |
||||
| 323 | substr($cleanedTitle, 0, 40) . (strlen($cleanedTitle) > 40 ? '...' : ''), |
||||
| 324 | $item['reason'], |
||||
| 325 | ]; |
||||
| 326 | } |
||||
| 327 | |||||
| 328 | $this->table( |
||||
| 329 | ['Searchname', 'Cleaned Title', 'Reason'], |
||||
| 330 | $rows |
||||
| 331 | ); |
||||
| 332 | |||||
| 333 | if (count($failedSearchnames) > 20) { |
||||
| 334 | $this->line("... and " . (count($failedSearchnames) - 20) . " more. Use --verbose to see all."); |
||||
| 335 | } |
||||
| 336 | } |
||||
| 337 | |||||
| 338 | return self::SUCCESS; |
||||
| 339 | } |
||||
| 340 | |||||
| 341 | /** |
||||
| 342 | * Extract clean anime title from release searchname. |
||||
| 343 | * Similar to extractTitleEpisode in AniDB.php but simplified. |
||||
| 344 | * |
||||
| 345 | * @return array{title: string}|array{} |
||||
| 346 | */ |
||||
| 347 | private function extractTitleFromSearchname(string $searchname): array |
||||
| 348 | { |
||||
| 349 | if (empty($searchname)) { |
||||
| 350 | return []; |
||||
| 351 | } |
||||
| 352 | |||||
| 353 | // Fix UTF-8 encoding issues (double-encoding, corrupted sequences) |
||||
| 354 | $s = $this->fixEncoding($searchname); |
||||
| 355 | |||||
| 356 | // Normalize common separators |
||||
| 357 | $s = str_replace(['_', '.'], ' ', $s); |
||||
| 358 | $s = preg_replace('/\s+/', ' ', $s); |
||||
| 359 | $s = trim($s); |
||||
| 360 | |||||
| 361 | // Strip leading group tags like [Group] |
||||
| 362 | $s = preg_replace('/^(?:\[[^\]]+\]\s*)+/', '', $s); |
||||
| 363 | $s = trim($s); |
||||
| 364 | |||||
| 365 | // Remove language codes and tags |
||||
| 366 | $s = preg_replace('/\[(?:ENG|JAP|JPN|SUB|DUB|MULTI|RAW|HARDSUB|SOFTSUB|HARDDUB|SOFTDUB|ITA|SPA|FRE|GER|RUS|CHI|KOR)\]/i', ' ', $s); |
||||
| 367 | $s = preg_replace('/\((?:ENG|JAP|JPN|SUB|DUB|MULTI|RAW|HARDSUB|SOFTSUB|HARDDUB|SOFTDUB|ITA|SPA|FRE|GER|RUS|CHI|KOR)\)/i', ' ', $s); |
||||
| 368 | |||||
| 369 | // Extract title by removing episode patterns |
||||
| 370 | $title = ''; |
||||
| 371 | |||||
| 372 | // Try to extract title by removing episode patterns |
||||
| 373 | // 1) Look for " S01E01" or " S1E1" pattern |
||||
| 374 | if (preg_match('/\sS\d+E\d+/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||||
| 375 | $title = substr($s, 0, (int) $m[0][1]); |
||||
| 376 | } |
||||
| 377 | // 2) Look for " 1x18" or " 2x05" pattern (season x episode) |
||||
| 378 | elseif (preg_match('/\s\d+x\d+/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||||
| 379 | $title = substr($s, 0, (int) $m[0][1]); |
||||
| 380 | } |
||||
| 381 | // 3) Look for " - NNN" and extract title before it |
||||
| 382 | elseif (preg_match('/\s-\s*(\d{1,3})\b/', $s, $m, PREG_OFFSET_CAPTURE)) { |
||||
| 383 | $title = substr($s, 0, (int) $m[0][1]); |
||||
| 384 | } |
||||
| 385 | // 4) If not found, look for " E0*NNN" or " Ep NNN" |
||||
| 386 | elseif (preg_match('/\sE(?:p(?:isode)?)?\s*0*(\d{1,3})\b/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||||
| 387 | $title = substr($s, 0, (int) $m[0][1]); |
||||
| 388 | } |
||||
| 389 | // 4) Keywords Movie/OVA/Complete Series |
||||
| 390 | elseif (preg_match('/\b(Movie|OVA|Complete Series|Complete|Full Series)\b/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||||
| 391 | $title = substr($s, 0, (int) $m[0][1]); |
||||
| 392 | } |
||||
| 393 | // 5) BD/resolution releases: pick title before next bracket token |
||||
| 394 | elseif (preg_match('/\[(?:BD|BDRip|BluRay|Blu-Ray|\d{3,4}[ipx]|HEVC|x264|x265|H264|H265)\]/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||||
| 395 | $title = substr($s, 0, (int) $m[0][1]); |
||||
| 396 | } else { |
||||
| 397 | // No episode pattern found, use the whole string as title |
||||
| 398 | $title = $s; |
||||
| 399 | } |
||||
| 400 | |||||
| 401 | $title = $this->cleanTitle($title); |
||||
| 402 | |||||
| 403 | if ($title === '') { |
||||
| 404 | return []; |
||||
| 405 | } |
||||
| 406 | |||||
| 407 | return ['title' => $title]; |
||||
| 408 | } |
||||
| 409 | |||||
| 410 | /** |
||||
| 411 | * Fix UTF-8 encoding issues in strings (double-encoding, corrupted sequences). |
||||
| 412 | */ |
||||
| 413 | private function fixEncoding(string $text): string |
||||
| 414 | { |
||||
| 415 | // Remove common corrupted character sequences (encoding artifacts) |
||||
| 416 | // Pattern: âÂ_Â, â Â, âÂ, etc. |
||||
| 417 | $text = preg_replace('/âÂ[_\sÂ]*/u', '', $text); |
||||
| 418 | $text = preg_replace('/Ã[¢Â©€£]/u', '', $text); |
||||
| 419 | |||||
| 420 | // Remove standalone  characters (common encoding artifact) |
||||
| 421 | $text = preg_replace('/Â+/u', '', $text); |
||||
| 422 | |||||
| 423 | // Remove any remaining à sequences (encoding artifacts) |
||||
| 424 | $text = preg_replace('/Ã[^\s]*/u', '', $text); |
||||
| 425 | |||||
| 426 | // Try to detect and fix double-encoding issues |
||||
| 427 | // Common patterns: é, Ã, etc. (UTF-8 interpreted as ISO-8859-1) |
||||
| 428 | if (preg_match('/Ã[^\s]/u', $text)) { |
||||
| 429 | // Try ISO-8859-1 -> UTF-8 conversion (common double-encoding fix) |
||||
| 430 | $converted = @mb_convert_encoding($text, 'UTF-8', 'ISO-8859-1'); |
||||
| 431 | if ($converted !== false && !preg_match('/Ã[^\s]/u', $converted)) { |
||||
|
0 ignored issues
–
show
It seems like
$converted can also be of type array; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 432 | $text = $converted; |
||||
| 433 | } |
||||
| 434 | } |
||||
| 435 | |||||
| 436 | // Remove any remaining non-printable or control characters except spaces |
||||
| 437 | $text = preg_replace('/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/u', '', $text); |
||||
| 438 | |||||
| 439 | // Normalize Unicode (NFD -> NFC) if available |
||||
| 440 | if (function_exists('normalizer_normalize')) { |
||||
| 441 | $text = normalizer_normalize($text, \Normalizer::FORM_C); |
||||
| 442 | } |
||||
| 443 | |||||
| 444 | // Final cleanup: remove any remaining isolated non-ASCII control-like characters |
||||
| 445 | // This catches any remaining encoding artifacts |
||||
| 446 | $text = preg_replace('/[\xC0-\xC1\xC2-\xC5]/u', '', $text); |
||||
| 447 | |||||
| 448 | return $text; |
||||
| 449 | } |
||||
| 450 | |||||
| 451 | /** |
||||
| 452 | * Strip stray separators, language codes, episode numbers, and other release tags from title. |
||||
| 453 | */ |
||||
| 454 | private function cleanTitle(string $title): string |
||||
| 455 | { |
||||
| 456 | // Fix encoding issues first |
||||
| 457 | $title = $this->fixEncoding($title); |
||||
| 458 | |||||
| 459 | // Remove all bracketed tags (language, quality, etc.) |
||||
| 460 | $title = preg_replace('/\[[^\]]+\]/', ' ', $title); |
||||
| 461 | |||||
| 462 | // Remove all parenthesized tags |
||||
| 463 | $title = preg_replace('/\([^)]+\)/', ' ', $title); |
||||
| 464 | |||||
| 465 | // Remove language codes (standalone or with separators) |
||||
| 466 | $title = preg_replace('/\b(ENG|JAP|JPN|SUB|DUB|MULTI|RAW|HARDSUB|SOFTSUB|HARDDUB|SOFTDUB|ITA|SPA|FRE|GER|RUS|CHI|KOR)\b/i', ' ', $title); |
||||
| 467 | |||||
| 468 | // Remove metadata words (JAV, Uncensored, Censored, etc.) |
||||
| 469 | $title = preg_replace('/\b(JAV|Uncensored|Censored|Mosaic|Mosaic-less|HD|SD|FHD|UHD)\b/i', ' ', $title); |
||||
| 470 | |||||
| 471 | // Remove date patterns (6-digit dates like 091919, 200101, etc.) |
||||
| 472 | $title = preg_replace('/\b\d{6}\b/', ' ', $title); |
||||
| 473 | |||||
| 474 | // Remove trailing numbers/underscores (like _01, 01, _001, etc.) |
||||
| 475 | $title = preg_replace('/[-_]\s*\d{1,4}\s*$/i', '', $title); |
||||
| 476 | $title = preg_replace('/\s+\d{1,4}\s*$/i', '', $title); |
||||
| 477 | |||||
| 478 | // Remove episode patterns (including episode titles that follow) |
||||
| 479 | // Remove " - 1x18 - Episode Title" or " - 1x18" patterns |
||||
| 480 | $title = preg_replace('/\s*-\s*\d+x\d+.*$/i', '', $title); |
||||
| 481 | // Remove " S01E01" or " S1E1" pattern |
||||
| 482 | $title = preg_replace('/\s+S\d+E\d+.*$/i', '', $title); |
||||
| 483 | // Remove " - NNN" or " - NNN - Episode Title" patterns |
||||
| 484 | $title = preg_replace('/\s*-\s*\d{1,4}(?:\s*-\s*.*)?\s*$/i', '', $title); |
||||
| 485 | $title = preg_replace('/\s*-\s*$/i', '', $title); |
||||
| 486 | // Remove " E0*NNN" or " Ep NNN" patterns |
||||
| 487 | $title = preg_replace('/\s+E(?:p(?:isode)?)?\s*0*\d{1,4}\s*$/i', '', $title); |
||||
| 488 | |||||
| 489 | // Remove quality/resolution tags |
||||
| 490 | $title = preg_replace('/\b(480p|720p|1080p|2160p|4K|BD|BDRip|BluRay|Blu-Ray|HEVC|x264|x265|H264|H265|WEB|WEBRip|DVDRip|TVRip)\b/i', ' ', $title); |
||||
| 491 | |||||
| 492 | // Remove common release tags |
||||
| 493 | $title = preg_replace('/\b(PROPER|REPACK|RIP|ISO|CRACK|BETA|ALPHA|FINAL|COMPLETE|FULL)\b/i', ' ', $title); |
||||
| 494 | |||||
| 495 | // Remove volume/chapter markers |
||||
| 496 | $title = preg_replace('/\s+Vol\.?\s*\d*\s*$/i', '', $title); |
||||
| 497 | $title = preg_replace('/\s+Ch\.?\s*\d*\s*$/i', '', $title); |
||||
| 498 | |||||
| 499 | // Remove trailing dashes and separators |
||||
| 500 | $title = preg_replace('/\s*[-_]\s*$/', '', $title); |
||||
| 501 | |||||
| 502 | // Normalize whitespace |
||||
| 503 | $title = preg_replace('/\s+/', ' ', $title); |
||||
| 504 | |||||
| 505 | return trim($title); |
||||
| 506 | } |
||||
| 507 | |||||
| 508 | /** |
||||
| 509 | * Enforce rate limiting: 35 requests per minute (conservative limit). |
||||
| 510 | * Adds delays between API calls to prevent hitting AniList's 90/min limit. |
||||
| 511 | */ |
||||
| 512 | private function enforceRateLimit(): void |
||||
| 513 | { |
||||
| 514 | $now = time(); |
||||
| 515 | |||||
| 516 | // Clean old timestamps (older than 1 minute) |
||||
| 517 | $this->requestTimestamps = array_filter($this->requestTimestamps, function ($timestamp) use ($now) { |
||||
| 518 | return ($now - $timestamp) < 60; |
||||
| 519 | }); |
||||
| 520 | |||||
| 521 | $requestCount = count($this->requestTimestamps); |
||||
| 522 | |||||
| 523 | // If we're at or over the limit, wait |
||||
| 524 | if ($requestCount >= self::RATE_LIMIT_PER_MINUTE) { |
||||
| 525 | // Calculate wait time based on oldest request |
||||
| 526 | if (! empty($this->requestTimestamps)) { |
||||
| 527 | $oldestRequest = min($this->requestTimestamps); |
||||
| 528 | $waitTime = 60 - ($now - $oldestRequest) + 1; // +1 for safety margin |
||||
| 529 | |||||
| 530 | if ($waitTime > 0 && $waitTime <= 60) { |
||||
| 531 | if ($this->getOutput()->isVerbose()) { |
||||
| 532 | $this->newLine(); |
||||
| 533 | $this->warn("Rate limit reached ({$requestCount}/" . self::RATE_LIMIT_PER_MINUTE . "). Waiting {$waitTime} seconds..."); |
||||
| 534 | } |
||||
| 535 | sleep($waitTime); |
||||
| 536 | |||||
| 537 | // Clean timestamps again after waiting |
||||
| 538 | $now = time(); |
||||
| 539 | $this->requestTimestamps = array_filter($this->requestTimestamps, function ($timestamp) use ($now) { |
||||
| 540 | return ($now - $timestamp) < 60; |
||||
| 541 | }); |
||||
| 542 | } |
||||
| 543 | } |
||||
| 544 | } |
||||
| 545 | |||||
| 546 | // Calculate minimum delay between requests (to maintain 20/min rate) |
||||
| 547 | // 60 seconds / 20 requests = 3 seconds per request |
||||
| 548 | $minDelay = 60.0 / self::RATE_LIMIT_PER_MINUTE; |
||||
| 549 | |||||
| 550 | // If we have recent requests, ensure we wait at least the minimum delay |
||||
| 551 | if (! empty($this->requestTimestamps)) { |
||||
| 552 | $lastRequest = max($this->requestTimestamps); |
||||
| 553 | $timeSinceLastRequest = $now - $lastRequest; |
||||
| 554 | |||||
| 555 | if ($timeSinceLastRequest < $minDelay) { |
||||
| 556 | $waitTime = $minDelay - $timeSinceLastRequest; |
||||
| 557 | if ($waitTime > 0 && $waitTime < 2) { // Only wait if less than 2 seconds |
||||
| 558 | usleep((int) ($waitTime * 1000000)); // Convert to microseconds |
||||
| 559 | $now = time(); // Update now after waiting |
||||
| 560 | } |
||||
| 561 | } |
||||
| 562 | } |
||||
| 563 | |||||
| 564 | // Record this request timestamp (after all delays) |
||||
| 565 | $this->requestTimestamps[] = $now; |
||||
| 566 | } |
||||
| 567 | } |
||||
| 568 | |||||
| 569 |