AbstractAdultProviderPipe::extractJsonLd()   B
last analyzed

Complexity

Conditions 7
Paths 4

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 9
c 2
b 0
f 0
dl 0
loc 18
rs 8.8333
cc 7
nc 4
nop 1
1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
use App\Services\AdultProcessing\AgeVerificationManager;
8
use Closure;
9
use GuzzleHttp\Client;
10
use GuzzleHttp\Cookie\CookieJar;
11
use GuzzleHttp\Cookie\FileCookieJar;
12
use GuzzleHttp\Exception\ConnectException;
13
use GuzzleHttp\Exception\RequestException;
14
use GuzzleHttp\Middleware;
15
use GuzzleHttp\Psr7\Request;
16
use GuzzleHttp\Psr7\Response;
17
use Illuminate\Support\Facades\Cache;
18
use Illuminate\Support\Facades\Log;
19
use voku\helper\HtmlDomParser;
0 ignored issues
show
Bug introduced by
The type voku\helper\HtmlDomParser was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
20
21
/**
22
 * Base class for adult movie processing pipe handlers.
23
 *
24
 * Each pipe is responsible for processing releases through a specific adult site provider.
25
 *
26
 * Note: This class intentionally uses lazy loading for HtmlDomParser to avoid
27
 * serialization issues with DOMDocument when using Laravel's Concurrency facade.
28
 */
29
abstract class AbstractAdultProviderPipe
30
{
31
    protected int $priority = 50;
32
33
    protected bool $echoOutput = true;
34
35
    protected ?HtmlDomParser $html = null;
36
37
    protected ?string $cookie = null;
38
39
    /**
40
     * Minimum similarity threshold for matching (percentage).
41
     */
42
    protected float $minimumSimilarity = 90.0;
43
44
    /**
45
     * HTTP client for making requests.
46
     */
47
    protected ?Client $httpClient = null;
48
49
    /**
50
     * Cookie jar for maintaining session cookies.
51
     */
52
    protected CookieJar|FileCookieJar|null $cookieJar = null;
53
54
    /**
55
     * Age verification manager for handling site-specific cookies.
56
     */
57
    protected ?AgeVerificationManager $ageVerificationManager = null;
58
59
    /**
60
     * Maximum number of retry attempts for failed requests.
61
     */
62
    protected int $maxRetries = 3;
63
64
    /**
65
     * Delay between retries in milliseconds.
66
     */
67
    protected int $retryDelay = 1000;
68
69
    /**
70
     * Rate limit delay between requests in milliseconds.
71
     */
72
    protected int $rateLimitDelay = 500;
73
74
    /**
75
     * Last request timestamp for rate limiting.
76
     */
77
    protected static array $lastRequestTime = [];
78
79
    /**
80
     * Cache duration for search results in minutes.
81
     */
82
    protected int $cacheDuration = 60;
83
84
    /**
85
     * Whether to use caching for this provider.
86
     */
87
    protected bool $useCache = true;
88
89
    public function __construct()
90
    {
91
        // Lazy load HtmlDomParser to avoid serialization issues
92
    }
93
94
    /**
95
     * Get the HtmlDomParser instance (lazy loaded).
96
     */
97
    protected function getHtmlParser(): HtmlDomParser
98
    {
99
        if ($this->html === null) {
100
            $this->html = new HtmlDomParser;
101
        }
102
103
        return $this->html;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->html could return the type null which is incompatible with the type-hinted return voku\helper\HtmlDomParser. Consider adding an additional type-check to rule them out.
Loading history...
104
    }
105
106
    /**
107
     * Handle the adult movie processing request.
108
     */
109
    public function handle(AdultProcessingPassable $passable, Closure $next): AdultProcessingPassable
110
    {
111
        // If we already have a match, skip processing
112
        if ($passable->shouldStopProcessing()) {
113
            return $next($passable);
114
        }
115
116
        // Set the cookie from passable
117
        $this->cookie = $passable->getCookie();
118
119
        // Skip if this provider shouldn't process
120
        if ($this->shouldSkip($passable)) {
121
            $passable->updateResult(
122
                AdultProcessingResult::skipped('Provider skipped', $this->getName()),
123
                $this->getName()
124
            );
125
126
            return $next($passable);
127
        }
128
129
        // Output processing message
130
        if ($this->echoOutput) {
131
            cli()->info('Checking '.$this->getDisplayName().' for movie info');
132
        }
133
134
        try {
135
            // Apply rate limiting
136
            $this->applyRateLimit();
137
138
            // Attempt to process with this provider
139
            $result = $this->process($passable);
140
141
            // Update the result
142
            $passable->updateResult($result, $this->getName());
143
        } catch (\Exception $e) {
144
            Log::error('Adult provider '.$this->getName().' failed: '.$e->getMessage(), [
145
                'provider' => $this->getName(),
146
                'title' => $passable->getCleanTitle(),
147
                'exception' => get_class($e),
148
            ]);
149
150
            $passable->updateResult(
151
                AdultProcessingResult::failed($e->getMessage(), $this->getName()),
152
                $this->getName()
153
            );
154
        }
155
156
        return $next($passable);
157
    }
158
159
    /**
160
     * Apply rate limiting between requests to the same provider.
161
     */
162
    protected function applyRateLimit(): void
163
    {
164
        $providerName = $this->getName();
165
        $now = microtime(true) * 1000;
166
167
        if (isset(self::$lastRequestTime[$providerName])) {
168
            $elapsed = $now - self::$lastRequestTime[$providerName];
169
            if ($elapsed < $this->rateLimitDelay) {
170
                usleep((int) (($this->rateLimitDelay - $elapsed) * 1000));
171
            }
172
        }
173
174
        self::$lastRequestTime[$providerName] = microtime(true) * 1000;
175
    }
176
177
    /**
178
     * Get the priority of this provider (lower = higher priority).
179
     */
180
    public function getPriority(): int
181
    {
182
        return $this->priority;
183
    }
184
185
    /**
186
     * Get the internal name of this provider.
187
     */
188
    abstract public function getName(): string;
189
190
    /**
191
     * Get the display name for user-facing output.
192
     */
193
    abstract public function getDisplayName(): string;
194
195
    /**
196
     * Get the base URL for the provider.
197
     */
198
    abstract protected function getBaseUrl(): string;
199
200
    /**
201
     * Attempt to process the movie through this provider.
202
     */
203
    abstract protected function process(AdultProcessingPassable $passable): AdultProcessingResult;
204
205
    /**
206
     * Search for a movie on this provider.
207
     *
208
     * @return array|false Returns array with 'title' and 'url' keys on success, false on failure
209
     */
210
    abstract protected function search(string $movie): array|false;
211
212
    /**
213
     * Get all movie information from the provider.
214
     */
215
    abstract protected function getMovieInfo(): array|false;
216
217
    /**
218
     * Check if this provider should be skipped for the given passable.
219
     */
220
    protected function shouldSkip(AdultProcessingPassable $passable): bool
221
    {
222
        return empty($passable->getCleanTitle());
223
    }
224
225
    /**
226
     * Set echo output flag.
227
     */
228
    public function setEchoOutput(bool $echo): self
229
    {
230
        $this->echoOutput = $echo;
231
232
        return $this;
233
    }
234
235
    /**
236
     * Get cached search result if available.
237
     */
238
    protected function getCachedSearch(string $movie): array|false|null
239
    {
240
        if (! $this->useCache) {
241
            return null;
242
        }
243
244
        $cacheKey = 'adult_search_'.$this->getName().'_'.md5(strtolower($movie));
245
        $cached = Cache::get($cacheKey);
246
247
        if ($cached !== null) {
248
            if ($this->echoOutput) {
249
                cli()->info('Using cached result for: '.$movie);
250
            }
251
252
            return $cached;
253
        }
254
255
        return null;
256
    }
257
258
    /**
259
     * Cache a search result.
260
     */
261
    protected function cacheSearchResult(string $movie, array|false $result): void
262
    {
263
        if (! $this->useCache) {
264
            return;
265
        }
266
267
        $cacheKey = 'adult_search_'.$this->getName().'_'.md5(strtolower($movie));
268
        Cache::put($cacheKey, $result, now()->addMinutes($this->cacheDuration));
269
    }
270
271
    /**
272
     * Fetch raw HTML from a URL with retry support.
273
     */
274
    protected function fetchHtml(string $url, ?string $cookie = null, ?array $postData = null): string|false
275
    {
276
        $attempt = 0;
277
        $lastException = null;
278
        $ageVerificationAttempted = false;
279
280
        while ($attempt < $this->maxRetries) {
281
            try {
282
                $attempt++;
283
                $client = $this->getHttpClient();
284
285
                $options = [
286
                    'headers' => $this->getDefaultHeaders(),
287
                ];
288
289
                // Add custom cookie if provided
290
                if ($cookie) {
291
                    $options['headers']['Cookie'] = $cookie;
292
                }
293
294
                // Handle POST data
295
                if ($postData !== null) {
296
                    $options['form_params'] = $postData;
297
                    $response = $client->post($url, $options);
298
                } else {
299
                    $response = $client->get($url, $options);
300
                }
301
302
                $body = $response->getBody()->getContents();
303
304
                // Check if we were redirected to an age verification page
305
                $finalUrl = $response->getHeaderLine('X-Guzzle-Redirect-History');
306
                if (empty($finalUrl)) {
307
                    // Use the effective URI if available
308
                    $effectiveUri = $response->getHeader('X-Guzzle-Redirect-History');
309
                    if (! empty($effectiveUri)) {
310
                        $finalUrl = end($effectiveUri);
0 ignored issues
show
Unused Code introduced by
The assignment to $finalUrl is dead and can be removed.
Loading history...
311
                    }
312
                }
313
314
                // Check for common error pages
315
                if ($this->isErrorPage($body)) {
316
                    Log::warning('Received error page from '.$this->getName().': '.$url);
317
                    if ($attempt < $this->maxRetries) {
318
                        usleep($this->retryDelay * 1000);
319
320
                        continue;
321
                    }
322
323
                    return false;
324
                }
325
326
                // Check for age verification requirement
327
                if ($this->requiresAgeVerification($body)) {
328
                    // If we haven't tried age verification yet, refresh cookies and retry
329
                    if (! $ageVerificationAttempted) {
330
                        $ageVerificationAttempted = true;
331
332
                        // Refresh cookies using the manager
333
                        $this->getAgeVerificationManager()->refreshCookies($this->getBaseUrl());
334
335
                        // Reset HTTP client to pick up new cookies
336
                        $this->httpClient = null;
337
                        $this->cookieJar = null;
338
339
                        Log::info('Refreshed age verification cookies for '.$this->getName().', retrying...');
340
341
                        continue;
342
                    }
343
344
                    $body = $this->handleAgeVerification($url, $body);
345
                    if ($body === false) {
346
                        return false;
347
                    }
348
                }
349
350
                return $body;
351
352
            } catch (ConnectException $e) {
353
                $lastException = $e;
354
                Log::warning('Connection failed for '.$this->getName().' (attempt '.$attempt.'): '.$e->getMessage());
355
356
                if ($attempt < $this->maxRetries) {
357
                    usleep($this->retryDelay * 1000 * $attempt); // Exponential backoff
358
                }
359
            } catch (RequestException $e) {
360
                $lastException = $e;
361
                $statusCode = $e->hasResponse() ? $e->getResponse()->getStatusCode() : 0;
362
363
                // Don't retry on 4xx client errors (except 429 rate limit)
364
                if ($statusCode >= 400 && $statusCode < 500 && $statusCode !== 429) {
365
                    Log::error('HTTP '.$statusCode.' for '.$this->getName().': '.$url);
366
367
                    return false;
368
                }
369
370
                Log::warning('Request failed for '.$this->getName().' (attempt '.$attempt.'): '.$e->getMessage());
371
372
                if ($attempt < $this->maxRetries) {
373
                    // Longer delay for rate limit errors
374
                    $delay = $statusCode === 429 ? $this->retryDelay * 5 : $this->retryDelay * $attempt;
375
                    usleep($delay * 1000);
376
                }
377
            } catch (\Exception $e) {
378
                $lastException = $e;
379
                Log::error('Unexpected error for '.$this->getName().': '.$e->getMessage());
380
381
                if ($attempt < $this->maxRetries) {
382
                    usleep($this->retryDelay * 1000);
383
                }
384
            }
385
        }
386
387
        if ($lastException) {
388
            Log::error('All retry attempts failed for '.$this->getName().': '.$lastException->getMessage());
389
        }
390
391
        return false;
392
    }
393
394
    /**
395
     * Get default HTTP headers.
396
     */
397
    protected function getDefaultHeaders(): array
398
    {
399
        return [
400
            'User-Agent' => $this->getRandomUserAgent(),
401
            'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
402
            'Accept-Language' => 'en-US,en;q=0.9',
403
            'Accept-Encoding' => 'gzip, deflate, br',
404
            'Cache-Control' => 'no-cache',
405
            'Pragma' => 'no-cache',
406
            'Sec-Fetch-Dest' => 'document',
407
            'Sec-Fetch-Mode' => 'navigate',
408
            'Sec-Fetch-Site' => 'none',
409
            'Sec-Fetch-User' => '?1',
410
            'Upgrade-Insecure-Requests' => '1',
411
        ];
412
    }
413
414
    /**
415
     * Get a random user agent string.
416
     */
417
    protected function getRandomUserAgent(): string
418
    {
419
        $userAgents = [
420
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
421
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
422
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
423
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
424
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
425
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
426
        ];
427
428
        return $userAgents[array_rand($userAgents)];
429
    }
430
431
    /**
432
     * Check if the response is an error page.
433
     */
434
    protected function isErrorPage(string $html): bool
435
    {
436
        $errorPatterns = [
437
            'Access Denied',
438
            'Service Unavailable',
439
            '503 Service',
440
            '502 Bad Gateway',
441
            'temporarily unavailable',
442
            'maintenance mode',
443
            'rate limit exceeded',
444
        ];
445
446
        foreach ($errorPatterns as $pattern) {
447
            if (stripos($html, $pattern) !== false) {
448
                return true;
449
            }
450
        }
451
452
        return false;
453
    }
454
455
    /**
456
     * Check if the page requires age verification.
457
     */
458
    protected function requiresAgeVerification(string $html): bool
459
    {
460
        // First check if this looks like a proper content page
461
        // Content pages have actual movie info, cast, etc.
462
        $contentIndicators = [
463
            '<title>.*?DVD.*?</title>',
464
            'product-info',
465
            'movie-details',
466
            'cast-list',
467
            'genre-list',
468
            '"@type":\s*"Movie"',
469
            '"@type":\s*"Product"',
470
        ];
471
472
        foreach ($contentIndicators as $pattern) {
473
            // Note: Using # as delimiter to avoid issues with / in patterns like </title>
474
            if (preg_match('#'.$pattern.'#is', $html)) {
475
                return false; // This is a content page, not an age verification page
476
            }
477
        }
478
479
        // Check for short page that might just be a redirect/age gate
480
        if (strlen($html) < 500) {
481
            return true; // Very short response likely means we got redirected
482
        }
483
484
        // Now check for explicit age verification indicators
485
        $agePatterns = [
486
            'age verification',
487
            'are you 18',
488
            'are you over 18',
489
            'confirm your age',
490
            'enter your age',
491
            'must be 18',
492
            'age-gate',
493
            'ageGate',
494
            'AgeConfirmation', // PopPorn specific
495
            'ageConfirmationButton', // ADE specific
496
            'age-confirmation', // Generic
497
            'verify your age',
498
            'adult content warning',
499
            'I am 18 or older',
500
            'I am over 18',
501
            'this site contains adult',
502
        ];
503
504
        // Count how many patterns match - if multiple match on a short page, it's likely age verification
505
        $matchCount = 0;
506
        foreach ($agePatterns as $pattern) {
507
            if (stripos($html, $pattern) !== false) {
508
                $matchCount++;
509
                // If the page is relatively short and has an age pattern, it's probably an age gate
510
                if (strlen($html) < 10000) {
511
                    return true;
512
                }
513
            }
514
        }
515
516
        // If multiple patterns match, it's likely an age verification page
517
        return $matchCount >= 2;
518
    }
519
520
    /**
521
     * Handle age verification requirement.
522
     */
523
    protected function handleAgeVerification(string $url, string $html): string|false
524
    {
525
        // First, try to use site-specific cookies from the AgeVerificationManager
526
        $manager = $this->getAgeVerificationManager();
0 ignored issues
show
Unused Code introduced by
The assignment to $manager is dead and can be removed.
Loading history...
527
        $domain = parse_url($this->getBaseUrl(), PHP_URL_HOST);
528
        $domain = preg_replace('/^www\./', '', $domain);
529
530
        // Re-initialize cookies from the manager and retry
531
        if ($this->cookieJar) {
532
            // The manager already handles setting cookies, but let's ensure they're fresh
533
            Log::info('Attempting to handle age verification for '.$this->getName().' with domain: '.$domain);
534
        }
535
536
        // Try to find and submit age verification form
537
        $this->getHtmlParser()->loadHtml($html);
538
539
        // Look for common age verification form patterns
540
        $forms = $this->getHtmlParser()->find('form');
541
        foreach ($forms as $form) {
542
            $action = $form->action ?? '';
543
            $method = strtoupper($form->method ?? 'GET');
544
545
            // Check if this looks like an age verification form
546
            $formHtml = $form->innerHtml ?? '';
547
            if (stripos($formHtml, 'age') !== false || stripos($formHtml, '18') !== false ||
548
                stripos($formHtml, 'adult') !== false || stripos($formHtml, 'enter') !== false ||
549
                stripos($formHtml, 'confirm') !== false) {
550
                // Try to submit the form with age confirmation
551
                $postData = $this->extractAgeVerificationFormData($form);
552
553
                if (! empty($postData)) {
554
                    $submitUrl = $action;
555
                    if (! str_starts_with($submitUrl, 'http')) {
556
                        $submitUrl = $this->getBaseUrl().'/'.ltrim($submitUrl, '/');
557
                    }
558
559
                    // Submit the age verification
560
                    try {
561
                        $response = $this->getHttpClient()->request($method, $submitUrl, [
562
                            'form_params' => $postData,
563
                            'headers' => $this->getDefaultHeaders(),
564
                        ]);
565
566
                        $body = $response->getBody()->getContents();
567
568
                        // Check if we still get age verification after submit
569
                        if (! $this->requiresAgeVerification($body)) {
570
                            return $body;
571
                        }
572
                    } catch (\Exception $e) {
573
                        Log::warning('Age verification submission failed for '.$this->getName().': '.$e->getMessage());
574
                    }
575
                }
576
            }
577
        }
578
579
        // Look for JavaScript-based age verification (click to enter)
580
        if (preg_match('/onclick\s*=\s*["\'].*?(enter|agree|confirm|over18|adult).*?["\']/i', $html) ||
581
            preg_match('/<a[^>]*href\s*=\s*["\']([^"\']*)["\'][^>]*>(Enter|I am over 18|Agree|Enter Site|I Agree)/i', $html, $matches)) {
582
            // Try to follow the link or simulate the click
583
            if (! empty($matches[1])) {
584
                $enterUrl = $matches[1];
585
                if (! str_starts_with($enterUrl, 'http')) {
586
                    $enterUrl = $this->getBaseUrl().'/'.ltrim($enterUrl, '/');
587
                }
588
589
                try {
590
                    $response = $this->getHttpClient()->get($enterUrl, [
591
                        'headers' => $this->getDefaultHeaders(),
592
                    ]);
593
                    $body = $response->getBody()->getContents();
594
595
                    if (! $this->requiresAgeVerification($body)) {
596
                        return $body;
597
                    }
598
                } catch (\Exception $e) {
599
                    Log::warning('Age verification link follow failed for '.$this->getName().': '.$e->getMessage());
600
                }
601
            }
602
        }
603
604
        // If all else fails, try to just refetch the original URL
605
        // (sometimes the cookies from previous attempts work)
606
        try {
607
            $response = $this->getHttpClient()->get($url, [
608
                'headers' => $this->getDefaultHeaders(),
609
            ]);
610
            $body = $response->getBody()->getContents();
611
612
            if (! $this->requiresAgeVerification($body)) {
613
                return $body;
614
            }
615
        } catch (\Exception $e) {
616
            Log::warning('Age verification retry failed for '.$this->getName().': '.$e->getMessage());
617
        }
618
619
        // If we couldn't handle age verification, log and return false
620
        Log::warning('Could not handle age verification for '.$this->getName().': '.$url);
621
622
        return false;
623
    }
624
625
    /**
626
     * Extract form data for age verification submission.
627
     */
628
    protected function extractAgeVerificationFormData($form): array
629
    {
630
        $data = [];
631
632
        // Get all input fields
633
        foreach ($form->find('input') as $input) {
634
            $name = $input->name ?? '';
635
            $type = strtolower($input->type ?? 'text');
636
            $value = $input->value ?? '';
637
638
            if (empty($name)) {
639
                continue;
640
            }
641
642
            // Handle different input types
643
            switch ($type) {
644
                case 'hidden':
645
                    $data[$name] = $value;
646
                    break;
647
                case 'checkbox':
648
                    // Usually age verification checkboxes need to be checked
649
                    if (stripos($name, 'age') !== false || stripos($name, 'agree') !== false || stripos($name, 'confirm') !== false) {
650
                        $data[$name] = $value ?: '1';
651
                    }
652
                    break;
653
                case 'submit':
654
                    // Include submit button value if it has a name
655
                    if (! empty($value)) {
656
                        $data[$name] = $value;
657
                    }
658
                    break;
659
                default:
660
                    // For text inputs that might be age/birthdate
661
                    if (stripos($name, 'age') !== false || stripos($name, 'year') !== false) {
662
                        $data[$name] = '1990'; // Default to a valid birth year
663
                    }
664
            }
665
        }
666
667
        // Handle select elements (for birthdate selection)
668
        foreach ($form->find('select') as $select) {
669
            $name = $select->name ?? '';
670
            if (empty($name)) {
671
                continue;
672
            }
673
674
            if (stripos($name, 'year') !== false) {
675
                $data[$name] = '1990';
676
            } elseif (stripos($name, 'month') !== false) {
677
                $data[$name] = '01';
678
            } elseif (stripos($name, 'day') !== false) {
679
                $data[$name] = '01';
680
            }
681
        }
682
683
        return $data;
684
    }
685
686
    /**
687
     * Get the age verification manager instance.
688
     */
689
    protected function getAgeVerificationManager(): AgeVerificationManager
690
    {
691
        if ($this->ageVerificationManager === null) {
692
            $this->ageVerificationManager = new AgeVerificationManager;
693
        }
694
695
        return $this->ageVerificationManager;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->ageVerificationManager could return the type null which is incompatible with the type-hinted return App\Services\AdultProces...\AgeVerificationManager. Consider adding an additional type-check to rule them out.
Loading history...
696
    }
697
698
    /**
699
     * Get or create HTTP client with retry middleware.
700
     */
701
    protected function getHttpClient(): Client
702
    {
703
        if ($this->httpClient === null) {
704
            // Use the AgeVerificationManager to get proper cookie jar with age verification cookies
705
            $this->cookieJar = $this->getAgeVerificationManager()->getCookieJar($this->getBaseUrl());
706
707
            $this->httpClient = new Client([
708
                'timeout' => 30,
709
                'connect_timeout' => 15,
710
                'verify' => false,
711
                'cookies' => $this->cookieJar,
712
                'allow_redirects' => [
713
                    'max' => 5,
714
                    'strict' => false,
715
                    'referer' => true,
716
                    'track_redirects' => true,
717
                ],
718
                'http_errors' => true,
719
            ]);
720
        }
721
722
        return $this->httpClient;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->httpClient could return the type null which is incompatible with the type-hinted return GuzzleHttp\Client. Consider adding an additional type-check to rule them out.
Loading history...
723
    }
724
725
    /**
726
     * Calculate similarity between two strings using multiple algorithms.
727
     */
728
    protected function calculateSimilarity(string $searchTerm, string $resultTitle): float
729
    {
730
        // Clean up both strings for comparison
731
        $cleanSearch = $this->cleanTitleForComparison($searchTerm);
732
        $cleanResult = $this->cleanTitleForComparison($resultTitle);
733
734
        // Calculate similarity using multiple methods
735
        similar_text($cleanSearch, $cleanResult, $similarTextPercent);
736
737
        // Also calculate Levenshtein distance based similarity
738
        $maxLen = max(strlen($cleanSearch), strlen($cleanResult));
739
        if ($maxLen > 0) {
740
            $levenshtein = levenshtein($cleanSearch, $cleanResult);
741
            $levenshteinPercent = (1 - ($levenshtein / $maxLen)) * 100;
742
        } else {
743
            $levenshteinPercent = 0;
744
        }
745
746
        // Use the higher of the two similarity scores
747
        return max($similarTextPercent, $levenshteinPercent);
748
    }
749
750
    /**
751
     * Clean a title for comparison purposes.
752
     */
753
    protected function cleanTitleForComparison(string $title): string
754
    {
755
        $title = strtolower($title);
756
        $title = str_replace('/XXX/', '', $title);
757
758
        // Remove common adult movie prefixes/suffixes
759
        $removePatterns = [
760
            '/\b(xxx|adult|porn|erotic|hd|4k|1080p|720p|dvdrip|webrip|bluray)\b/i',
761
            '/\(.*?\)/',
762
            '/\[.*?\]/',
763
            '/[._-]+/',
764
            '/\s+/',
765
        ];
766
767
        foreach ($removePatterns as $pattern) {
768
            $title = preg_replace($pattern, ' ', $title);
769
        }
770
771
        return trim($title);
772
    }
773
774
    /**
775
     * Extract movie information from the loaded HTML.
776
     */
777
    protected function extractCovers(): array
778
    {
779
        return [];
780
    }
781
782
    protected function extractSynopsis(): array
783
    {
784
        return [];
785
    }
786
787
    protected function extractCast(): array
788
    {
789
        return [];
790
    }
791
792
    protected function extractGenres(): array
793
    {
794
        return [];
795
    }
796
797
    protected function extractProductInfo(bool $extras = false): array
0 ignored issues
show
Unused Code introduced by
The parameter $extras is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

797
    protected function extractProductInfo(/** @scrutinizer ignore-unused */ bool $extras = false): array

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
798
    {
799
        return [];
800
    }
801
802
    protected function extractTrailers(): array
803
    {
804
        return [];
805
    }
806
807
    /**
808
     * Output match success message.
809
     */
810
    protected function outputMatch(string $title): void
811
    {
812
        if (! $this->echoOutput) {
813
            return;
814
        }
815
816
        cli()->primary('Found match on '.$this->getDisplayName().': '.$title);
817
    }
818
819
    /**
820
     * Output failure message.
821
     */
822
    protected function outputNotFound(): void
823
    {
824
        if (! $this->echoOutput) {
825
            return;
826
        }
827
828
        cli()->notice('No match found on '.$this->getDisplayName());
829
    }
830
831
    /**
832
     * Parse JSON-LD structured data from HTML.
833
     */
834
    protected function extractJsonLd(string $html): ?array
835
    {
836
        // Note: Using # as delimiter because pattern contains / in </script>
837
        if (preg_match_all('#<script[^>]*type=["\']application/ld\+json["\'][^>]*>(.*?)</script>#si', $html, $matches)) {
838
            foreach ($matches[1] as $json) {
839
                $data = json_decode(trim($json), true);
840
                if (json_last_error() === JSON_ERROR_NONE && is_array($data)) {
841
                    // Handle both single object and array of objects
842
                    if (isset($data['@type'])) {
843
                        return $data;
844
                    } elseif (isset($data[0]['@type'])) {
845
                        return $data[0];
846
                    }
847
                }
848
            }
849
        }
850
851
        return null;
852
    }
853
854
    /**
855
     * Extract Open Graph meta data from HTML.
856
     */
857
    protected function extractOpenGraph(string $html): array
858
    {
859
        $og = [];
860
        $this->getHtmlParser()->loadHtml($html);
861
862
        $metaTags = [
863
            'og:title' => 'title',
864
            'og:description' => 'description',
865
            'og:image' => 'image',
866
            'og:url' => 'url',
867
        ];
868
869
        foreach ($metaTags as $property => $key) {
870
            $meta = $this->getHtmlParser()->findOne('meta[property="'.$property.'"]');
871
            if ($meta && isset($meta->content)) {
872
                $og[$key] = trim($meta->content);
873
            }
874
        }
875
876
        return $og;
877
    }
878
}
879