| Total Complexity | 64 |
| Total Lines | 554 |
| Duplicated Lines | 0 % |
| Changes | 5 | ||
| Bugs | 0 | Features | 0 |
Complex classes like RefreshAnimeData often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use RefreshAnimeData, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 12 | class RefreshAnimeData extends Command |
||
| 13 | { |
||
| 14 | /** |
||
| 15 | * Conservative rate limit: 20 requests per minute (to stay well below AniList's 90/min limit). |
||
| 16 | * This allows for multiple API calls per release (search + getById). |
||
| 17 | */ |
||
| 18 | private const RATE_LIMIT_PER_MINUTE = 20; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * Track API request timestamps for rate limiting. |
||
| 22 | * |
||
| 23 | * @var array<int> |
||
| 24 | */ |
||
| 25 | private array $requestTimestamps = []; |
||
| 26 | |||
| 27 | /** |
||
| 28 | * The name and signature of the console command. |
||
| 29 | * |
||
| 30 | * @var string |
||
| 31 | */ |
||
| 32 | protected $signature = 'anime:refresh |
||
| 33 | {--limit=0 : Maximum number of releases to process (0 = all)} |
||
| 34 | {--chunk=100 : Process releases in chunks of this size} |
||
| 35 | {--missing-only : Only refresh releases missing AniList data (no anilist_id)} |
||
| 36 | {--retry-failed : Only refresh releases with anidbid <= 0 (failed processing: -1, -2, etc.)} |
||
| 37 | {--force : Force refresh even if data exists}'; |
||
| 38 | |||
| 39 | /** |
||
| 40 | * The console command description. |
||
| 41 | * |
||
| 42 | * @var string |
||
| 43 | */ |
||
| 44 | protected $description = 'Fetch and refresh AniList data for existing anime releases in TV->Anime category by matching release searchname'; |
||
| 45 | |||
| 46 | /** |
||
| 47 | * Execute the console command. |
||
| 48 | */ |
||
| 49 | public function handle(): int |
||
| 339 | } |
||
| 340 | |||
| 341 | /** |
||
| 342 | * Extract clean anime title from release searchname. |
||
| 343 | * Similar to extractTitleEpisode in AniDB.php but simplified. |
||
| 344 | * |
||
| 345 | * @return array{title: string}|array{} |
||
| 346 | */ |
||
| 347 | private function extractTitleFromSearchname(string $searchname): array |
||
| 348 | { |
||
| 349 | if (empty($searchname)) { |
||
| 350 | return []; |
||
| 351 | } |
||
| 352 | |||
| 353 | // Fix UTF-8 encoding issues (double-encoding, corrupted sequences) |
||
| 354 | $s = $this->fixEncoding($searchname); |
||
| 355 | |||
| 356 | // Normalize common separators |
||
| 357 | $s = str_replace(['_', '.'], ' ', $s); |
||
| 358 | $s = preg_replace('/\s+/', ' ', $s); |
||
| 359 | $s = trim($s); |
||
| 360 | |||
| 361 | // Strip leading group tags like [Group] |
||
| 362 | $s = preg_replace('/^(?:\[[^\]]+\]\s*)+/', '', $s); |
||
| 363 | $s = trim($s); |
||
| 364 | |||
| 365 | // Remove language codes and tags |
||
| 366 | $s = preg_replace('/\[(?:ENG|JAP|JPN|SUB|DUB|MULTI|RAW|HARDSUB|SOFTSUB|HARDDUB|SOFTDUB|ITA|SPA|FRE|GER|RUS|CHI|KOR)\]/i', ' ', $s); |
||
| 367 | $s = preg_replace('/\((?:ENG|JAP|JPN|SUB|DUB|MULTI|RAW|HARDSUB|SOFTSUB|HARDDUB|SOFTDUB|ITA|SPA|FRE|GER|RUS|CHI|KOR)\)/i', ' ', $s); |
||
| 368 | |||
| 369 | // Extract title by removing episode patterns |
||
| 370 | $title = ''; |
||
| 371 | |||
| 372 | // Try to extract title by removing episode patterns |
||
| 373 | // 1) Look for " S01E01" or " S1E1" pattern |
||
| 374 | if (preg_match('/\sS\d+E\d+/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||
| 375 | $title = substr($s, 0, (int) $m[0][1]); |
||
| 376 | } |
||
| 377 | // 2) Look for " 1x18" or " 2x05" pattern (season x episode) |
||
| 378 | elseif (preg_match('/\s\d+x\d+/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||
| 379 | $title = substr($s, 0, (int) $m[0][1]); |
||
| 380 | } |
||
| 381 | // 3) Look for " - NNN" and extract title before it |
||
| 382 | elseif (preg_match('/\s-\s*(\d{1,3})\b/', $s, $m, PREG_OFFSET_CAPTURE)) { |
||
| 383 | $title = substr($s, 0, (int) $m[0][1]); |
||
| 384 | } |
||
| 385 | // 4) If not found, look for " E0*NNN" or " Ep NNN" |
||
| 386 | elseif (preg_match('/\sE(?:p(?:isode)?)?\s*0*(\d{1,3})\b/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||
| 387 | $title = substr($s, 0, (int) $m[0][1]); |
||
| 388 | } |
||
| 389 | // 4) Keywords Movie/OVA/Complete Series |
||
| 390 | elseif (preg_match('/\b(Movie|OVA|Complete Series|Complete|Full Series)\b/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||
| 391 | $title = substr($s, 0, (int) $m[0][1]); |
||
| 392 | } |
||
| 393 | // 5) BD/resolution releases: pick title before next bracket token |
||
| 394 | elseif (preg_match('/\[(?:BD|BDRip|BluRay|Blu-Ray|\d{3,4}[ipx]|HEVC|x264|x265|H264|H265)\]/i', $s, $m, PREG_OFFSET_CAPTURE)) { |
||
| 395 | $title = substr($s, 0, (int) $m[0][1]); |
||
| 396 | } else { |
||
| 397 | // No episode pattern found, use the whole string as title |
||
| 398 | $title = $s; |
||
| 399 | } |
||
| 400 | |||
| 401 | $title = $this->cleanTitle($title); |
||
| 402 | |||
| 403 | if ($title === '') { |
||
| 404 | return []; |
||
| 405 | } |
||
| 406 | |||
| 407 | return ['title' => $title]; |
||
| 408 | } |
||
| 409 | |||
| 410 | /** |
||
| 411 | * Fix UTF-8 encoding issues in strings (double-encoding, corrupted sequences). |
||
| 412 | */ |
||
| 413 | private function fixEncoding(string $text): string |
||
| 414 | { |
||
| 415 | // Remove common corrupted character sequences (encoding artifacts) |
||
| 416 | // Pattern: âÂ_Â, â Â, âÂ, etc. |
||
| 417 | $text = preg_replace('/âÂ[_\sÂ]*/u', '', $text); |
||
| 418 | $text = preg_replace('/Ã[¢Â©€£]/u', '', $text); |
||
| 419 | |||
| 420 | // Remove standalone  characters (common encoding artifact) |
||
| 421 | $text = preg_replace('/Â+/u', '', $text); |
||
| 422 | |||
| 423 | // Remove any remaining à sequences (encoding artifacts) |
||
| 424 | $text = preg_replace('/Ã[^\s]*/u', '', $text); |
||
| 425 | |||
| 426 | // Try to detect and fix double-encoding issues |
||
| 427 | // Common patterns: é, Ã, etc. (UTF-8 interpreted as ISO-8859-1) |
||
| 428 | if (preg_match('/Ã[^\s]/u', $text)) { |
||
| 429 | // Try ISO-8859-1 -> UTF-8 conversion (common double-encoding fix) |
||
| 430 | $converted = @mb_convert_encoding($text, 'UTF-8', 'ISO-8859-1'); |
||
| 431 | if ($converted !== false && !preg_match('/Ã[^\s]/u', $converted)) { |
||
| 432 | $text = $converted; |
||
| 433 | } |
||
| 434 | } |
||
| 435 | |||
| 436 | // Remove any remaining non-printable or control characters except spaces |
||
| 437 | $text = preg_replace('/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/u', '', $text); |
||
| 438 | |||
| 439 | // Normalize Unicode (NFD -> NFC) if available |
||
| 440 | if (function_exists('normalizer_normalize')) { |
||
| 441 | $text = normalizer_normalize($text, \Normalizer::FORM_C); |
||
| 442 | } |
||
| 443 | |||
| 444 | // Final cleanup: remove any remaining isolated non-ASCII control-like characters |
||
| 445 | // This catches any remaining encoding artifacts |
||
| 446 | $text = preg_replace('/[\xC0-\xC1\xC2-\xC5]/u', '', $text); |
||
| 447 | |||
| 448 | return $text; |
||
| 449 | } |
||
| 450 | |||
| 451 | /** |
||
| 452 | * Strip stray separators, language codes, episode numbers, and other release tags from title. |
||
| 453 | */ |
||
| 454 | private function cleanTitle(string $title): string |
||
| 455 | { |
||
| 456 | // Fix encoding issues first |
||
| 457 | $title = $this->fixEncoding($title); |
||
| 458 | |||
| 459 | // Remove all bracketed tags (language, quality, etc.) |
||
| 460 | $title = preg_replace('/\[[^\]]+\]/', ' ', $title); |
||
| 461 | |||
| 462 | // Remove all parenthesized tags |
||
| 463 | $title = preg_replace('/\([^)]+\)/', ' ', $title); |
||
| 464 | |||
| 465 | // Remove language codes (standalone or with separators) |
||
| 466 | $title = preg_replace('/\b(ENG|JAP|JPN|SUB|DUB|MULTI|RAW|HARDSUB|SOFTSUB|HARDDUB|SOFTDUB|ITA|SPA|FRE|GER|RUS|CHI|KOR)\b/i', ' ', $title); |
||
| 467 | |||
| 468 | // Remove metadata words (JAV, Uncensored, Censored, etc.) |
||
| 469 | $title = preg_replace('/\b(JAV|Uncensored|Censored|Mosaic|Mosaic-less|HD|SD|FHD|UHD)\b/i', ' ', $title); |
||
| 470 | |||
| 471 | // Remove date patterns (6-digit dates like 091919, 200101, etc.) |
||
| 472 | $title = preg_replace('/\b\d{6}\b/', ' ', $title); |
||
| 473 | |||
| 474 | // Remove trailing numbers/underscores (like _01, 01, _001, etc.) |
||
| 475 | $title = preg_replace('/[-_]\s*\d{1,4}\s*$/i', '', $title); |
||
| 476 | $title = preg_replace('/\s+\d{1,4}\s*$/i', '', $title); |
||
| 477 | |||
| 478 | // Remove episode patterns (including episode titles that follow) |
||
| 479 | // Remove " - 1x18 - Episode Title" or " - 1x18" patterns |
||
| 480 | $title = preg_replace('/\s*-\s*\d+x\d+.*$/i', '', $title); |
||
| 481 | // Remove " S01E01" or " S1E1" pattern |
||
| 482 | $title = preg_replace('/\s+S\d+E\d+.*$/i', '', $title); |
||
| 483 | // Remove " - NNN" or " - NNN - Episode Title" patterns |
||
| 484 | $title = preg_replace('/\s*-\s*\d{1,4}(?:\s*-\s*.*)?\s*$/i', '', $title); |
||
| 485 | $title = preg_replace('/\s*-\s*$/i', '', $title); |
||
| 486 | // Remove " E0*NNN" or " Ep NNN" patterns |
||
| 487 | $title = preg_replace('/\s+E(?:p(?:isode)?)?\s*0*\d{1,4}\s*$/i', '', $title); |
||
| 488 | |||
| 489 | // Remove quality/resolution tags |
||
| 490 | $title = preg_replace('/\b(480p|720p|1080p|2160p|4K|BD|BDRip|BluRay|Blu-Ray|HEVC|x264|x265|H264|H265|WEB|WEBRip|DVDRip|TVRip)\b/i', ' ', $title); |
||
| 491 | |||
| 492 | // Remove common release tags |
||
| 493 | $title = preg_replace('/\b(PROPER|REPACK|RIP|ISO|CRACK|BETA|ALPHA|FINAL|COMPLETE|FULL)\b/i', ' ', $title); |
||
| 494 | |||
| 495 | // Remove volume/chapter markers |
||
| 496 | $title = preg_replace('/\s+Vol\.?\s*\d*\s*$/i', '', $title); |
||
| 497 | $title = preg_replace('/\s+Ch\.?\s*\d*\s*$/i', '', $title); |
||
| 498 | |||
| 499 | // Remove trailing dashes and separators |
||
| 500 | $title = preg_replace('/\s*[-_]\s*$/', '', $title); |
||
| 501 | |||
| 502 | // Normalize whitespace |
||
| 503 | $title = preg_replace('/\s+/', ' ', $title); |
||
| 504 | |||
| 505 | return trim($title); |
||
| 506 | } |
||
| 507 | |||
| 508 | /** |
||
| 509 | * Enforce rate limiting: 35 requests per minute (conservative limit). |
||
| 510 | * Adds delays between API calls to prevent hitting AniList's 90/min limit. |
||
| 511 | */ |
||
| 512 | private function enforceRateLimit(): void |
||
| 566 | } |
||
| 567 | } |
||
| 568 | |||
| 569 |