Passed
Push — master ( ed202f...f50875 )
by Darko
10:55
created

AbstractAdultProviderPipe::getColorCli()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 3
c 1
b 0
f 0
dl 0
loc 6
rs 10
cc 2
nc 2
nop 0
1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
use App\Services\AdultProcessing\AgeVerificationManager;
8
use Closure;
9
use GuzzleHttp\Client;
10
use GuzzleHttp\Cookie\CookieJar;
11
use GuzzleHttp\Cookie\FileCookieJar;
12
use GuzzleHttp\Cookie\SetCookie;
13
use GuzzleHttp\Exception\ConnectException;
14
use GuzzleHttp\Exception\RequestException;
15
use GuzzleHttp\HandlerStack;
16
use GuzzleHttp\Middleware;
17
use GuzzleHttp\Psr7\Request;
18
use GuzzleHttp\Psr7\Response;
19
use Illuminate\Support\Facades\Cache;
20
use Illuminate\Support\Facades\Log;
21
use voku\helper\HtmlDomParser;
0 ignored issues
show
Bug introduced by
The type voku\helper\HtmlDomParser was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
22
23
/**
24
 * Base class for adult movie processing pipe handlers.
25
 *
26
 * Each pipe is responsible for processing releases through a specific adult site provider.
27
 *
28
 * Note: This class intentionally uses lazy loading for HtmlDomParser to avoid
29
 * serialization issues with DOMDocument when using Laravel's Concurrency facade.
30
 */
31
abstract class AbstractAdultProviderPipe
32
{
33
    protected int $priority = 50;
34
    protected bool $echoOutput = true;
35
    protected ?HtmlDomParser $html = null;
36
    protected ?string $cookie = null;
37
38
    /**
39
     * Minimum similarity threshold for matching (percentage).
40
     */
41
    protected float $minimumSimilarity = 90.0;
42
43
    /**
44
     * HTTP client for making requests.
45
     */
46
    protected ?Client $httpClient = null;
47
48
    /**
49
     * Cookie jar for maintaining session cookies.
50
     */
51
    protected CookieJar|FileCookieJar|null $cookieJar = null;
52
53
    /**
54
     * Age verification manager for handling site-specific cookies.
55
     */
56
    protected ?AgeVerificationManager $ageVerificationManager = null;
57
58
    /**
59
     * Maximum number of retry attempts for failed requests.
60
     */
61
    protected int $maxRetries = 3;
62
63
    /**
64
     * Delay between retries in milliseconds.
65
     */
66
    protected int $retryDelay = 1000;
67
68
    /**
69
     * Rate limit delay between requests in milliseconds.
70
     */
71
    protected int $rateLimitDelay = 500;
72
73
    /**
74
     * Last request timestamp for rate limiting.
75
     */
76
    protected static array $lastRequestTime = [];
77
78
    /**
79
     * Cache duration for search results in minutes.
80
     */
81
    protected int $cacheDuration = 60;
82
83
    /**
84
     * Whether to use caching for this provider.
85
     */
86
    protected bool $useCache = true;
87
88
    public function __construct()
89
    {
90
        // Lazy load HtmlDomParser to avoid serialization issues
91
    }
92
93
    /**
94
     * Get the HtmlDomParser instance (lazy loaded).
95
     */
96
    protected function getHtmlParser(): HtmlDomParser
97
    {
98
        if ($this->html === null) {
99
            $this->html = new HtmlDomParser();
100
        }
101
        return $this->html;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->html could return the type null which is incompatible with the type-hinted return voku\helper\HtmlDomParser. Consider adding an additional type-check to rule them out.
Loading history...
102
    }
103
104
    /**
105
     * Handle the adult movie processing request.
106
     */
107
    public function handle(AdultProcessingPassable $passable, Closure $next): AdultProcessingPassable
108
    {
109
        // If we already have a match, skip processing
110
        if ($passable->shouldStopProcessing()) {
111
            return $next($passable);
112
        }
113
114
        // Set the cookie from passable
115
        $this->cookie = $passable->getCookie();
116
117
        // Skip if this provider shouldn't process
118
        if ($this->shouldSkip($passable)) {
119
            $passable->updateResult(
120
                AdultProcessingResult::skipped('Provider skipped', $this->getName()),
121
                $this->getName()
122
            );
123
            return $next($passable);
124
        }
125
126
        // Output processing message
127
        if ($this->echoOutput) {
128
            $this->getColorCli()->info('Checking '.$this->getDisplayName().' for movie info');
0 ignored issues
show
Bug introduced by
The method getColorCli() does not exist on App\Services\AdultProces...stractAdultProviderPipe. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

128
            $this->/** @scrutinizer ignore-call */ 
129
                   getColorCli()->info('Checking '.$this->getDisplayName().' for movie info');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
129
        }
130
131
        try {
132
            // Apply rate limiting
133
            $this->applyRateLimit();
134
135
            // Attempt to process with this provider
136
            $result = $this->process($passable);
137
138
            // Update the result
139
            $passable->updateResult($result, $this->getName());
140
        } catch (\Exception $e) {
141
            Log::error('Adult provider '.$this->getName().' failed: '.$e->getMessage(), [
142
                'provider' => $this->getName(),
143
                'title' => $passable->getCleanTitle(),
144
                'exception' => get_class($e),
145
            ]);
146
147
            $passable->updateResult(
148
                AdultProcessingResult::failed($e->getMessage(), $this->getName()),
149
                $this->getName()
150
            );
151
        }
152
153
        return $next($passable);
154
    }
155
156
    /**
157
     * Apply rate limiting between requests to the same provider.
158
     */
159
    protected function applyRateLimit(): void
160
    {
161
        $providerName = $this->getName();
162
        $now = microtime(true) * 1000;
163
164
        if (isset(self::$lastRequestTime[$providerName])) {
165
            $elapsed = $now - self::$lastRequestTime[$providerName];
166
            if ($elapsed < $this->rateLimitDelay) {
167
                usleep((int)(($this->rateLimitDelay - $elapsed) * 1000));
168
            }
169
        }
170
171
        self::$lastRequestTime[$providerName] = microtime(true) * 1000;
172
    }
173
174
    /**
175
     * Get the priority of this provider (lower = higher priority).
176
     */
177
    public function getPriority(): int
178
    {
179
        return $this->priority;
180
    }
181
182
    /**
183
     * Get the internal name of this provider.
184
     */
185
    abstract public function getName(): string;
186
187
    /**
188
     * Get the display name for user-facing output.
189
     */
190
    abstract public function getDisplayName(): string;
191
192
    /**
193
     * Get the base URL for the provider.
194
     */
195
    abstract protected function getBaseUrl(): string;
196
197
    /**
198
     * Attempt to process the movie through this provider.
199
     */
200
    abstract protected function process(AdultProcessingPassable $passable): AdultProcessingResult;
201
202
    /**
203
     * Search for a movie on this provider.
204
     *
205
     * @return array|false Returns array with 'title' and 'url' keys on success, false on failure
206
     */
207
    abstract protected function search(string $movie): array|false;
208
209
    /**
210
     * Get all movie information from the provider.
211
     */
212
    abstract protected function getMovieInfo(): array|false;
213
214
    /**
215
     * Check if this provider should be skipped for the given passable.
216
     */
217
    protected function shouldSkip(AdultProcessingPassable $passable): bool
218
    {
219
        return empty($passable->getCleanTitle());
220
    }
221
222
    /**
223
     * Set echo output flag.
224
     */
225
    public function setEchoOutput(bool $echo): self
226
    {
227
        $this->echoOutput = $echo;
228
        return $this;
229
    }
230
231
    /**
232
     * Get cached search result if available.
233
     */
234
    protected function getCachedSearch(string $movie): array|false|null
235
    {
236
        if (!$this->useCache) {
237
            return null;
238
        }
239
240
        $cacheKey = 'adult_search_' . $this->getName() . '_' . md5(strtolower($movie));
241
        $cached = Cache::get($cacheKey);
242
243
        if ($cached !== null) {
244
            if ($this->echoOutput) {
245
                $this->getColorCli()->info('Using cached result for: ' . $movie);
246
            }
247
            return $cached;
248
        }
249
250
        return null;
251
    }
252
253
    /**
254
     * Cache a search result.
255
     */
256
    protected function cacheSearchResult(string $movie, array|false $result): void
257
    {
258
        if (!$this->useCache) {
259
            return;
260
        }
261
262
        $cacheKey = 'adult_search_' . $this->getName() . '_' . md5(strtolower($movie));
263
        Cache::put($cacheKey, $result, now()->addMinutes($this->cacheDuration));
264
    }
265
266
    /**
267
     * Fetch raw HTML from a URL with retry support.
268
     */
269
    protected function fetchHtml(string $url, ?string $cookie = null, ?array $postData = null): string|false
270
    {
271
        $attempt = 0;
272
        $lastException = null;
273
        $ageVerificationAttempted = false;
274
275
        while ($attempt < $this->maxRetries) {
276
            try {
277
                $attempt++;
278
                $client = $this->getHttpClient();
279
280
                $options = [
281
                    'headers' => $this->getDefaultHeaders(),
282
                ];
283
284
                // Add custom cookie if provided
285
                if ($cookie) {
286
                    $options['headers']['Cookie'] = $cookie;
287
                }
288
289
                // Handle POST data
290
                if ($postData !== null) {
291
                    $options['form_params'] = $postData;
292
                    $response = $client->post($url, $options);
293
                } else {
294
                    $response = $client->get($url, $options);
295
                }
296
297
                $body = $response->getBody()->getContents();
298
299
                // Check if we were redirected to an age verification page
300
                $finalUrl = $response->getHeaderLine('X-Guzzle-Redirect-History');
301
                if (empty($finalUrl)) {
302
                    // Use the effective URI if available
303
                    $effectiveUri = $response->getHeader('X-Guzzle-Redirect-History');
304
                    if (!empty($effectiveUri)) {
305
                        $finalUrl = end($effectiveUri);
0 ignored issues
show
Unused Code introduced by
The assignment to $finalUrl is dead and can be removed.
Loading history...
306
                    }
307
                }
308
309
                // Check for common error pages
310
                if ($this->isErrorPage($body)) {
311
                    Log::warning('Received error page from ' . $this->getName() . ': ' . $url);
312
                    if ($attempt < $this->maxRetries) {
313
                        usleep($this->retryDelay * 1000);
314
                        continue;
315
                    }
316
                    return false;
317
                }
318
319
                // Check for age verification requirement
320
                if ($this->requiresAgeVerification($body)) {
321
                    // If we haven't tried age verification yet, refresh cookies and retry
322
                    if (!$ageVerificationAttempted) {
323
                        $ageVerificationAttempted = true;
324
325
                        // Refresh cookies using the manager
326
                        $this->getAgeVerificationManager()->refreshCookies($this->getBaseUrl());
327
328
                        // Reset HTTP client to pick up new cookies
329
                        $this->httpClient = null;
330
                        $this->cookieJar = null;
331
332
                        Log::info('Refreshed age verification cookies for ' . $this->getName() . ', retrying...');
333
                        continue;
334
                    }
335
336
                    $body = $this->handleAgeVerification($url, $body);
337
                    if ($body === false) {
338
                        return false;
339
                    }
340
                }
341
342
                return $body;
343
344
            } catch (ConnectException $e) {
345
                $lastException = $e;
346
                Log::warning('Connection failed for ' . $this->getName() . ' (attempt ' . $attempt . '): ' . $e->getMessage());
347
348
                if ($attempt < $this->maxRetries) {
349
                    usleep($this->retryDelay * 1000 * $attempt); // Exponential backoff
350
                }
351
            } catch (RequestException $e) {
352
                $lastException = $e;
353
                $statusCode = $e->hasResponse() ? $e->getResponse()->getStatusCode() : 0;
354
355
                // Don't retry on 4xx client errors (except 429 rate limit)
356
                if ($statusCode >= 400 && $statusCode < 500 && $statusCode !== 429) {
357
                    Log::error('HTTP ' . $statusCode . ' for ' . $this->getName() . ': ' . $url);
358
                    return false;
359
                }
360
361
                Log::warning('Request failed for ' . $this->getName() . ' (attempt ' . $attempt . '): ' . $e->getMessage());
362
363
                if ($attempt < $this->maxRetries) {
364
                    // Longer delay for rate limit errors
365
                    $delay = $statusCode === 429 ? $this->retryDelay * 5 : $this->retryDelay * $attempt;
366
                    usleep($delay * 1000);
367
                }
368
            } catch (\Exception $e) {
369
                $lastException = $e;
370
                Log::error('Unexpected error for ' . $this->getName() . ': ' . $e->getMessage());
371
372
                if ($attempt < $this->maxRetries) {
373
                    usleep($this->retryDelay * 1000);
374
                }
375
            }
376
        }
377
378
        if ($lastException) {
379
            Log::error('All retry attempts failed for ' . $this->getName() . ': ' . $lastException->getMessage());
380
        }
381
382
        return false;
383
    }
384
385
    /**
386
     * Get default HTTP headers.
387
     */
388
    protected function getDefaultHeaders(): array
389
    {
390
        return [
391
            'User-Agent' => $this->getRandomUserAgent(),
392
            'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
393
            'Accept-Language' => 'en-US,en;q=0.9',
394
            'Accept-Encoding' => 'gzip, deflate, br',
395
            'Cache-Control' => 'no-cache',
396
            'Pragma' => 'no-cache',
397
            'Sec-Fetch-Dest' => 'document',
398
            'Sec-Fetch-Mode' => 'navigate',
399
            'Sec-Fetch-Site' => 'none',
400
            'Sec-Fetch-User' => '?1',
401
            'Upgrade-Insecure-Requests' => '1',
402
        ];
403
    }
404
405
    /**
406
     * Get a random user agent string.
407
     */
408
    protected function getRandomUserAgent(): string
409
    {
410
        $userAgents = [
411
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
412
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
413
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
414
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
415
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
416
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
417
        ];
418
419
        return $userAgents[array_rand($userAgents)];
420
    }
421
422
    /**
423
     * Check if the response is an error page.
424
     */
425
    protected function isErrorPage(string $html): bool
426
    {
427
        $errorPatterns = [
428
            'Access Denied',
429
            'Service Unavailable',
430
            '503 Service',
431
            '502 Bad Gateway',
432
            'temporarily unavailable',
433
            'maintenance mode',
434
            'rate limit exceeded',
435
        ];
436
437
        foreach ($errorPatterns as $pattern) {
438
            if (stripos($html, $pattern) !== false) {
439
                return true;
440
            }
441
        }
442
443
        return false;
444
    }
445
446
    /**
447
     * Check if the page requires age verification.
448
     */
449
    protected function requiresAgeVerification(string $html): bool
450
    {
451
        // First check if this looks like a proper content page
452
        // Content pages have actual movie info, cast, etc.
453
        $contentIndicators = [
454
            '<title>.*?DVD.*?</title>',
455
            'product-info',
456
            'movie-details',
457
            'cast-list',
458
            'genre-list',
459
            '"@type":\s*"Movie"',
460
            '"@type":\s*"Product"',
461
        ];
462
463
        foreach ($contentIndicators as $pattern) {
464
            // Note: Using # as delimiter to avoid issues with / in patterns like </title>
465
            if (preg_match('#' . $pattern . '#is', $html)) {
466
                return false; // This is a content page, not an age verification page
467
            }
468
        }
469
470
        // Check for short page that might just be a redirect/age gate
471
        if (strlen($html) < 500) {
472
            return true; // Very short response likely means we got redirected
473
        }
474
475
        // Now check for explicit age verification indicators
476
        $agePatterns = [
477
            'age verification',
478
            'are you 18',
479
            'are you over 18',
480
            'confirm your age',
481
            'enter your age',
482
            'must be 18',
483
            'age-gate',
484
            'ageGate',
485
            'AgeConfirmation', // PopPorn specific
486
            'ageConfirmationButton', // ADE specific
487
            'age-confirmation', // Generic
488
            'verify your age',
489
            'adult content warning',
490
            'I am 18 or older',
491
            'I am over 18',
492
            'this site contains adult',
493
        ];
494
495
        // Count how many patterns match - if multiple match on a short page, it's likely age verification
496
        $matchCount = 0;
497
        foreach ($agePatterns as $pattern) {
498
            if (stripos($html, $pattern) !== false) {
499
                $matchCount++;
500
                // If the page is relatively short and has an age pattern, it's probably an age gate
501
                if (strlen($html) < 10000) {
502
                    return true;
503
                }
504
            }
505
        }
506
507
        // If multiple patterns match, it's likely an age verification page
508
        return $matchCount >= 2;
509
    }
510
511
    /**
512
     * Handle age verification requirement.
513
     */
514
    protected function handleAgeVerification(string $url, string $html): string|false
515
    {
516
        // First, try to use site-specific cookies from the AgeVerificationManager
517
        $manager = $this->getAgeVerificationManager();
0 ignored issues
show
Unused Code introduced by
The assignment to $manager is dead and can be removed.
Loading history...
518
        $domain = parse_url($this->getBaseUrl(), PHP_URL_HOST);
519
        $domain = preg_replace('/^www\./', '', $domain);
520
521
        // Re-initialize cookies from the manager and retry
522
        if ($this->cookieJar) {
523
            // The manager already handles setting cookies, but let's ensure they're fresh
524
            Log::info('Attempting to handle age verification for ' . $this->getName() . ' with domain: ' . $domain);
525
        }
526
527
        // Try to find and submit age verification form
528
        $this->getHtmlParser()->loadHtml($html);
529
530
        // Look for common age verification form patterns
531
        $forms = $this->getHtmlParser()->find('form');
532
        foreach ($forms as $form) {
533
            $action = $form->action ?? '';
534
            $method = strtoupper($form->method ?? 'GET');
535
536
            // Check if this looks like an age verification form
537
            $formHtml = $form->innerHtml ?? '';
538
            if (stripos($formHtml, 'age') !== false || stripos($formHtml, '18') !== false ||
539
                stripos($formHtml, 'adult') !== false || stripos($formHtml, 'enter') !== false ||
540
                stripos($formHtml, 'confirm') !== false) {
541
                // Try to submit the form with age confirmation
542
                $postData = $this->extractAgeVerificationFormData($form);
543
544
                if (!empty($postData)) {
545
                    $submitUrl = $action;
546
                    if (!str_starts_with($submitUrl, 'http')) {
547
                        $submitUrl = $this->getBaseUrl() . '/' . ltrim($submitUrl, '/');
548
                    }
549
550
                    // Submit the age verification
551
                    try {
552
                        $response = $this->getHttpClient()->request($method, $submitUrl, [
553
                            'form_params' => $postData,
554
                            'headers' => $this->getDefaultHeaders(),
555
                        ]);
556
557
                        $body = $response->getBody()->getContents();
558
559
                        // Check if we still get age verification after submit
560
                        if (!$this->requiresAgeVerification($body)) {
561
                            return $body;
562
                        }
563
                    } catch (\Exception $e) {
564
                        Log::warning('Age verification submission failed for ' . $this->getName() . ': ' . $e->getMessage());
565
                    }
566
                }
567
            }
568
        }
569
570
        // Look for JavaScript-based age verification (click to enter)
571
        if (preg_match('/onclick\s*=\s*["\'].*?(enter|agree|confirm|over18|adult).*?["\']/i', $html) ||
572
            preg_match('/<a[^>]*href\s*=\s*["\']([^"\']*)["\'][^>]*>(Enter|I am over 18|Agree|Enter Site|I Agree)/i', $html, $matches)) {
573
            // Try to follow the link or simulate the click
574
            if (!empty($matches[1])) {
575
                $enterUrl = $matches[1];
576
                if (!str_starts_with($enterUrl, 'http')) {
577
                    $enterUrl = $this->getBaseUrl() . '/' . ltrim($enterUrl, '/');
578
                }
579
580
                try {
581
                    $response = $this->getHttpClient()->get($enterUrl, [
582
                        'headers' => $this->getDefaultHeaders(),
583
                    ]);
584
                    $body = $response->getBody()->getContents();
585
586
                    if (!$this->requiresAgeVerification($body)) {
587
                        return $body;
588
                    }
589
                } catch (\Exception $e) {
590
                    Log::warning('Age verification link follow failed for ' . $this->getName() . ': ' . $e->getMessage());
591
                }
592
            }
593
        }
594
595
        // If all else fails, try to just refetch the original URL
596
        // (sometimes the cookies from previous attempts work)
597
        try {
598
            $response = $this->getHttpClient()->get($url, [
599
                'headers' => $this->getDefaultHeaders(),
600
            ]);
601
            $body = $response->getBody()->getContents();
602
603
            if (!$this->requiresAgeVerification($body)) {
604
                return $body;
605
            }
606
        } catch (\Exception $e) {
607
            Log::warning('Age verification retry failed for ' . $this->getName() . ': ' . $e->getMessage());
608
        }
609
610
        // If we couldn't handle age verification, log and return false
611
        Log::warning('Could not handle age verification for ' . $this->getName() . ': ' . $url);
612
        return false;
613
    }
614
615
    /**
616
     * Extract form data for age verification submission.
617
     */
618
    protected function extractAgeVerificationFormData($form): array
619
    {
620
        $data = [];
621
622
        // Get all input fields
623
        foreach ($form->find('input') as $input) {
624
            $name = $input->name ?? '';
625
            $type = strtolower($input->type ?? 'text');
626
            $value = $input->value ?? '';
627
628
            if (empty($name)) {
629
                continue;
630
            }
631
632
            // Handle different input types
633
            switch ($type) {
634
                case 'hidden':
635
                    $data[$name] = $value;
636
                    break;
637
                case 'checkbox':
638
                    // Usually age verification checkboxes need to be checked
639
                    if (stripos($name, 'age') !== false || stripos($name, 'agree') !== false || stripos($name, 'confirm') !== false) {
640
                        $data[$name] = $value ?: '1';
641
                    }
642
                    break;
643
                case 'submit':
644
                    // Include submit button value if it has a name
645
                    if (!empty($value)) {
646
                        $data[$name] = $value;
647
                    }
648
                    break;
649
                default:
650
                    // For text inputs that might be age/birthdate
651
                    if (stripos($name, 'age') !== false || stripos($name, 'year') !== false) {
652
                        $data[$name] = '1990'; // Default to a valid birth year
653
                    }
654
            }
655
        }
656
657
        // Handle select elements (for birthdate selection)
658
        foreach ($form->find('select') as $select) {
659
            $name = $select->name ?? '';
660
            if (empty($name)) {
661
                continue;
662
            }
663
664
            if (stripos($name, 'year') !== false) {
665
                $data[$name] = '1990';
666
            } elseif (stripos($name, 'month') !== false) {
667
                $data[$name] = '01';
668
            } elseif (stripos($name, 'day') !== false) {
669
                $data[$name] = '01';
670
            }
671
        }
672
673
        return $data;
674
    }
675
676
    /**
677
     * Get the age verification manager instance.
678
     */
679
    protected function getAgeVerificationManager(): AgeVerificationManager
680
    {
681
        if ($this->ageVerificationManager === null) {
682
            $this->ageVerificationManager = new AgeVerificationManager();
683
        }
684
        return $this->ageVerificationManager;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->ageVerificationManager could return the type null which is incompatible with the type-hinted return App\Services\AdultProces...\AgeVerificationManager. Consider adding an additional type-check to rule them out.
Loading history...
685
    }
686
687
    /**
688
     * Get or create HTTP client with retry middleware.
689
     */
690
    protected function getHttpClient(): Client
691
    {
692
        if ($this->httpClient === null) {
693
            // Use the AgeVerificationManager to get proper cookie jar with age verification cookies
694
            $this->cookieJar = $this->getAgeVerificationManager()->getCookieJar($this->getBaseUrl());
695
696
            $this->httpClient = new Client([
697
                'timeout' => 30,
698
                'connect_timeout' => 15,
699
                'verify' => false,
700
                'cookies' => $this->cookieJar,
701
                'allow_redirects' => [
702
                    'max' => 5,
703
                    'strict' => false,
704
                    'referer' => true,
705
                    'track_redirects' => true,
706
                ],
707
                'http_errors' => true,
708
            ]);
709
        }
710
711
        return $this->httpClient;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->httpClient could return the type null which is incompatible with the type-hinted return GuzzleHttp\Client. Consider adding an additional type-check to rule them out.
Loading history...
712
    }
713
714
    /**
715
     * Calculate similarity between two strings using multiple algorithms.
716
     */
717
    protected function calculateSimilarity(string $searchTerm, string $resultTitle): float
718
    {
719
        // Clean up both strings for comparison
720
        $cleanSearch = $this->cleanTitleForComparison($searchTerm);
721
        $cleanResult = $this->cleanTitleForComparison($resultTitle);
722
723
        // Calculate similarity using multiple methods
724
        similar_text($cleanSearch, $cleanResult, $similarTextPercent);
725
726
        // Also calculate Levenshtein distance based similarity
727
        $maxLen = max(strlen($cleanSearch), strlen($cleanResult));
728
        if ($maxLen > 0) {
729
            $levenshtein = levenshtein($cleanSearch, $cleanResult);
730
            $levenshteinPercent = (1 - ($levenshtein / $maxLen)) * 100;
731
        } else {
732
            $levenshteinPercent = 0;
733
        }
734
735
        // Use the higher of the two similarity scores
736
        return max($similarTextPercent, $levenshteinPercent);
737
    }
738
739
    /**
740
     * Clean a title for comparison purposes.
741
     */
742
    protected function cleanTitleForComparison(string $title): string
743
    {
744
        $title = strtolower($title);
745
        $title = str_replace('/XXX/', '', $title);
746
747
        // Remove common adult movie prefixes/suffixes
748
        $removePatterns = [
749
            '/\b(xxx|adult|porn|erotic|hd|4k|1080p|720p|dvdrip|webrip|bluray)\b/i',
750
            '/\(.*?\)/',
751
            '/\[.*?\]/',
752
            '/[._-]+/',
753
            '/\s+/',
754
        ];
755
756
        foreach ($removePatterns as $pattern) {
757
            $title = preg_replace($pattern, ' ', $title);
758
        }
759
760
        return trim($title);
761
    }
762
763
    /**
764
     * Extract movie information from the loaded HTML.
765
     */
766
    protected function extractCovers(): array
767
    {
768
        return [];
769
    }
770
771
    protected function extractSynopsis(): array
772
    {
773
        return [];
774
    }
775
776
    protected function extractCast(): array
777
    {
778
        return [];
779
    }
780
781
    protected function extractGenres(): array
782
    {
783
        return [];
784
    }
785
786
    protected function extractProductInfo(bool $extras = false): array
0 ignored issues
show
Unused Code introduced by
The parameter $extras is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

786
    protected function extractProductInfo(/** @scrutinizer ignore-unused */ bool $extras = false): array

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
787
    {
788
        return [];
789
    }
790
791
    protected function extractTrailers(): array
792
    {
793
        return [];
794
    }
795
796
    /**
797
     * Output match success message.
798
     */
799
    protected function outputMatch(string $title): void
800
    {
801
        if (! $this->echoOutput) {
802
            return;
803
        }
804
805
        $this->getColorCli()->primary('Found match on '.$this->getDisplayName().': '.$title);
806
    }
807
808
    /**
809
     * Output failure message.
810
     */
811
    protected function outputNotFound(): void
812
    {
813
        if (! $this->echoOutput) {
814
            return;
815
        }
816
817
        $this->getColorCli()->notice('No match found on '.$this->getDisplayName());
818
    }
819
820
    /**
821
     * Parse JSON-LD structured data from HTML.
822
     */
823
    protected function extractJsonLd(string $html): ?array
824
    {
825
        // Note: Using # as delimiter because pattern contains / in </script>
826
        if (preg_match_all('#<script[^>]*type=["\']application/ld\+json["\'][^>]*>(.*?)</script>#si', $html, $matches)) {
827
            foreach ($matches[1] as $json) {
828
                $data = json_decode(trim($json), true);
829
                if (json_last_error() === JSON_ERROR_NONE && is_array($data)) {
830
                    // Handle both single object and array of objects
831
                    if (isset($data['@type'])) {
832
                        return $data;
833
                    } elseif (isset($data[0]['@type'])) {
834
                        return $data[0];
835
                    }
836
                }
837
            }
838
        }
839
840
        return null;
841
    }
842
843
    /**
844
     * Extract Open Graph meta data from HTML.
845
     */
846
    protected function extractOpenGraph(string $html): array
847
    {
848
        $og = [];
849
        $this->getHtmlParser()->loadHtml($html);
850
851
        $metaTags = [
852
            'og:title' => 'title',
853
            'og:description' => 'description',
854
            'og:image' => 'image',
855
            'og:url' => 'url',
856
        ];
857
858
        foreach ($metaTags as $property => $key) {
859
            $meta = $this->getHtmlParser()->findOne('meta[property="' . $property . '"]');
860
            if ($meta && isset($meta->content)) {
861
                $og[$key] = trim($meta->content);
862
            }
863
        }
864
865
        return $og;
866
    }
867
}
868
869