PoppornPipe::extractGenres()   B
last analyzed

Complexity

Conditions 7
Paths 8

Size

Total Lines 35
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 19
c 1
b 0
f 0
dl 0
loc 35
rs 8.8333
cc 7
nc 8
nop 0
1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
8
/**
9
 * Popporn provider pipe.
10
 *
11
 * Handles movie information extraction from popporn.com
12
 */
13
class PoppornPipe extends AbstractAdultProviderPipe
14
{
15
    protected int $priority = 20;
16
17
    private const BASE_URL = 'https://www.popporn.com';
18
19
    private const SEARCH_ENDPOINT = '/search?q=';
20
21
    protected string $directUrl = '';
22
23
    protected string $title = '';
24
25
    protected string $response = '';
26
27
    public function getName(): string
28
    {
29
        return 'pop';
30
    }
31
32
    public function getDisplayName(): string
33
    {
34
        return 'PopPorn';
35
    }
36
37
    protected function getBaseUrl(): string
38
    {
39
        return self::BASE_URL;
40
    }
41
42
    protected function process(AdultProcessingPassable $passable): AdultProcessingResult
43
    {
44
        $movie = $passable->getCleanTitle();
45
46
        $searchResult = $this->search($movie);
47
48
        if ($searchResult === false) {
49
            $this->outputNotFound();
50
51
            return AdultProcessingResult::notFound($this->getName());
52
        }
53
54
        $this->title = $searchResult['title'];
55
        $this->directUrl = $searchResult['url'];
56
57
        // Fetch the movie details page
58
        $this->response = $this->fetchHtml($this->directUrl, $this->cookie);
59
60
        if ($this->response === false) {
0 ignored issues
show
introduced by
The condition $this->response === false is always false.
Loading history...
61
            return AdultProcessingResult::failed('Failed to fetch movie details page', $this->getName());
62
        }
63
64
        $this->getHtmlParser()->loadHtml($this->response);
65
66
        $movieInfo = $this->getMovieInfo();
67
68
        if ($movieInfo === false) {
69
            return AdultProcessingResult::notFound($this->getName());
70
        }
71
72
        $this->outputMatch($this->title);
73
74
        return AdultProcessingResult::matched(
75
            $this->title,
76
            $this->getName(),
77
            $movieInfo
78
        );
79
    }
80
81
    protected function search(string $movie): array|false
82
    {
83
        if (empty($movie)) {
84
            return false;
85
        }
86
87
        // First, establish a session by visiting the AgeConfirmation endpoint to set cookies
88
        $this->acceptAgeVerification();
89
90
        $searchUrl = self::BASE_URL.self::SEARCH_ENDPOINT.urlencode($movie);
91
        $response = $this->fetchHtml($searchUrl, $this->cookie);
92
93
        if (empty($response)) {
94
            return false;
95
        }
96
97
        $this->getHtmlParser()->loadHtml($response);
98
99
        $bestMatch = null;
100
        $highestSimilarity = 0;
101
102
        // Try multiple selector patterns for search results
103
        $resultSelectors = [
104
            'div.product-info a, div.title a',
105
            'div.product-title a',
106
            'h3.product-title a',
107
        ];
108
109
        foreach ($resultSelectors as $selector) {
110
            $results = $this->getHtmlParser()->find($selector);
111
112
            if (! empty($results)) {
113
                foreach ($results as $result) {
114
                    $title = $result->title ?? $result->plaintext;
115
                    $url = $result->href;
116
117
                    if (! empty($title)) {
118
                        $similarity = $this->calculateSimilarity($movie, $title);
119
120
                        if ($similarity > $highestSimilarity) {
121
                            $highestSimilarity = $similarity;
122
                            $bestMatch = [
123
                                'title' => trim($title),
124
                                'url' => str_starts_with($url, 'http') ? $url : self::BASE_URL.$url,
125
                            ];
126
                        }
127
                    }
128
                }
129
130
                break;
131
            }
132
        }
133
134
        if ($bestMatch !== null && $highestSimilarity >= $this->minimumSimilarity) {
135
            return $bestMatch;
136
        }
137
138
        return false;
139
    }
140
141
    protected function getMovieInfo(): array|false
142
    {
143
        $results = [];
144
145
        if (! empty($this->directUrl)) {
146
            if (! empty($this->title)) {
147
                $results['title'] = $this->title;
148
            }
149
            $results['directurl'] = $this->directUrl;
150
        }
151
152
        // Get all the movie data
153
        $synopsis = $this->extractSynopsis();
154
        if (is_array($synopsis)) {
0 ignored issues
show
introduced by
The condition is_array($synopsis) is always true.
Loading history...
155
            $results = array_merge($results, $synopsis);
156
        }
157
158
        $productInfo = $this->extractProductInfo(true);
159
        if (is_array($productInfo)) {
0 ignored issues
show
introduced by
The condition is_array($productInfo) is always true.
Loading history...
160
            $results = array_merge($results, $productInfo);
161
        }
162
163
        $cast = $this->extractCast();
164
        if (is_array($cast)) {
0 ignored issues
show
introduced by
The condition is_array($cast) is always true.
Loading history...
165
            $results = array_merge($results, $cast);
166
        }
167
168
        $genres = $this->extractGenres();
169
        if (is_array($genres)) {
0 ignored issues
show
introduced by
The condition is_array($genres) is always true.
Loading history...
170
            $results = array_merge($results, $genres);
171
        }
172
173
        $covers = $this->extractCovers();
174
        if (is_array($covers)) {
0 ignored issues
show
introduced by
The condition is_array($covers) is always true.
Loading history...
175
            $results = array_merge($results, $covers);
176
        }
177
178
        $trailers = $this->extractTrailers();
179
        if (is_array($trailers)) {
0 ignored issues
show
introduced by
The condition is_array($trailers) is always true.
Loading history...
180
            $results = array_merge($results, $trailers);
181
        }
182
183
        if (empty($results)) {
184
            return false;
185
        }
186
187
        return $results;
188
    }
189
190
    protected function extractCovers(): array
191
    {
192
        $res = [];
193
194
        // Method 1: Try structured data
195
        if (preg_match('/"image":\s*"(.*?)"/is', $this->response, $match)) {
196
            $res['boxcover'] = trim($match[1]);
197
            // Try to determine backcover from boxcover pattern
198
            if (stripos(trim($match[1]), '_aa') !== false) {
199
                $res['backcover'] = str_ireplace('_aa', '_bb', trim($match[1]));
200
            } else {
201
                $res['backcover'] = str_ireplace('.jpg', '_b.jpg', trim($match[1]));
202
            }
203
204
            return $res;
205
        }
206
207
        // Method 2: Try multiple selectors
208
        $selectors = [
209
            'div[id=box-art], a[rel=box-art]',
210
            'img.front',
211
            'div.box-cover img',
212
            'div.product-image img',
213
        ];
214
215
        foreach ($selectors as $selector) {
216
            if ($ret = $this->getHtmlParser()->findOne($selector)) {
217
                $res['boxcover'] = $ret->href ?? $ret->src;
218
219
                // Try to determine backcover
220
                if (stripos($res['boxcover'], '_aa') !== false) {
221
                    $res['backcover'] = str_ireplace('_aa', '_bb', $res['boxcover']);
222
                } else {
223
                    $res['backcover'] = str_ireplace('.jpg', '_b.jpg', $res['boxcover']);
224
                }
225
226
                return $res;
227
            }
228
        }
229
230
        return $res;
231
    }
232
233
    protected function extractSynopsis(): array
234
    {
235
        $res = [];
236
237
        // Method 1: Try structured data
238
        if (preg_match('/"description":\s*"(.*?)"/is', $this->response, $match)) {
239
            $res['synopsis'] = trim(html_entity_decode(str_replace('\\u', '\\u', $match[1])));
240
241
            return $res;
242
        }
243
244
        // Method 2: Try multiple selectors
245
        $selectors = [
246
            'div[id=product-info] h3[class=highlight] + *',
247
            'div.product-description',
248
            'div.synopsis',
249
            'meta[name="description"]',
250
        ];
251
252
        foreach ($selectors as $selector) {
253
            if ($ret = $this->getHtmlParser()->findOne($selector)) {
254
                $text = $ret->plaintext ?? $ret->content;
255
256
                // Filter out "POPPORN EXCLUSIVE" text
257
                if (stripos(trim($text), 'POPPORN EXCLUSIVE') !== false) {
258
                    if ($ret->next_sibling()) {
259
                        $text = trim($ret->next_sibling()->plaintext);
260
                    }
261
                }
262
263
                if (! empty($text)) {
264
                    $res['synopsis'] = trim($text);
265
266
                    return $res;
267
                }
268
            }
269
        }
270
271
        return $res;
272
    }
273
274
    protected function extractTrailers(): array
275
    {
276
        $res = [];
277
278
        // Method 1: Try structured data
279
        if (preg_match('/"contentUrl":\s*"(.*?)"/is', $this->response, $match)) {
280
            $url = trim($match[1]);
281
            if (! empty($url)) {
282
                $res['trailers']['url'] = $url;
283
284
                return $res;
285
            }
286
        }
287
288
        // Method 2: Modern video embeds
289
        $videoSelectors = [
290
            'video source',
291
            'iframe[src*="trailer"]',
292
            'video[src]',
293
        ];
294
295
        foreach ($videoSelectors as $selector) {
296
            $ret = $this->getHtmlParser()->findOne($selector);
297
            if ($ret && isset($ret->src) && ! empty(trim($ret->src))) {
298
                $res['trailers']['url'] = trim($ret->src);
299
300
                return $res;
301
            }
302
        }
303
304
        return $res;
305
    }
306
307
    protected function extractProductInfo(bool $extras = false): array
308
    {
309
        $res = [];
310
        $productInfo = [];
311
        $director = '';
312
313
        // Method 1: Try structured data
314
        if (preg_match('/"director":\s*{[^}]*"name":\s*"(.*?)"/is', $this->response, $match)) {
315
            $director = trim($match[1]);
316
        }
317
318
        // Method 2: Look for product details in various formats
319
        $selectors = [
320
            'div#lside',
321
            'div.product-details',
322
            'div.product-info',
323
        ];
324
325
        foreach ($selectors as $selector) {
326
            if ($ret = $this->getHtmlParser()->findOne($selector)) {
327
                // Extract country information
328
                $country = false;
329
                $rawInfo = [];
330
331
                foreach ($ret->find('text') as $e) {
332
                    $e = trim($e->innertext);
333
                    $e = str_replace([', ', '...', '&nbsp;'], '', $e);
334
335
                    if (stripos($e, 'Country:') !== false) {
336
                        $country = true;
337
                    }
338
339
                    if ($country === true) {
340
                        if (stripos($e, 'addthis_config') === false) {
341
                            if (! empty($e)) {
342
                                $rawInfo[] = $e;
343
                            }
344
                        } else {
345
                            break;
346
                        }
347
                    }
348
                }
349
350
                if (! empty($rawInfo)) {
351
                    $productInfo = array_chunk($rawInfo, 2, false);
352
                    break;
353
                }
354
            }
355
        }
356
357
        $res['productinfo'] = $productInfo;
358
        $res['director'] = $director;
359
360
        // Get extras if requested
361
        if ($extras === true) {
362
            $features = false;
363
            $extrasData = [];
364
365
            $featureSelectors = [
366
                'ul.stock-information',
367
                'div.features',
368
                'div.extras',
369
            ];
370
371
            foreach ($featureSelectors as $selector) {
372
                if ($ret = $this->getHtmlParser()->findOne($selector)) {
373
                    foreach ($ret->find('li') as $e) {
374
                        $text = trim($e->plaintext);
375
                        if ($text === 'Features:') {
376
                            $features = true;
377
378
                            continue;
379
                        }
380
381
                        if ($features === true && ! empty($text)) {
382
                            $extrasData[] = $text;
383
                        }
384
                    }
385
386
                    if (! empty($extrasData)) {
387
                        $res['extras'] = $extrasData;
388
                        break;
389
                    }
390
                }
391
            }
392
        }
393
394
        return $res;
395
    }
396
397
    protected function extractCast(): array
398
    {
399
        $res = [];
400
        $cast = [];
401
        $director = '';
402
403
        // Method 1: Try structured data
404
        if (preg_match_all('/"actor":\s*{[^}]*"name":\s*"(.*?)"/is', $this->response, $matches)) {
405
            foreach ($matches[1] as $actor) {
406
                $cast[] = trim($actor);
407
            }
408
        }
409
410
        if (preg_match('/"director":\s*{[^}]*"name":\s*"(.*?)"/is', $this->response, $match)) {
411
            $director = trim($match[1]);
412
        }
413
414
        // Method 2: Try multiple selectors
415
        if (empty($cast)) {
416
            $castSelectors = [
417
                'div.cast a',
418
                'div.stars a',
419
                'div.performers a',
420
            ];
421
422
            foreach ($castSelectors as $selector) {
423
                $elements = $this->getHtmlParser()->find($selector);
424
                if (! empty($elements)) {
425
                    foreach ($elements as $element) {
426
                        $cast[] = trim($element->plaintext);
427
                    }
428
                    break;
429
                }
430
            }
431
        }
432
433
        $res['cast'] = array_unique(array_filter($cast));
434
        $res['director'] = $director;
435
436
        return $res;
437
    }
438
439
    protected function extractGenres(): array
440
    {
441
        $res = [];
442
        $genres = [];
443
444
        // Method 1: Try structured data
445
        if (preg_match_all('/"genre":\s*"(.*?)"/is', $this->response, $matches)) {
446
            foreach ($matches[1] as $genre) {
447
                $genres[] = trim($genre);
448
            }
449
        }
450
451
        // Method 2: Try multiple selectors
452
        if (empty($genres)) {
453
            $selectors = [
454
                'div[id=thekeywords] a',
455
                'p[class=keywords] a',
456
                'div.categories a',
457
                'div.tags a',
458
            ];
459
460
            foreach ($selectors as $selector) {
461
                $elements = $this->getHtmlParser()->find($selector);
462
                if (! empty($elements)) {
463
                    foreach ($elements as $e) {
464
                        $genres[] = trim($e->plaintext);
465
                    }
466
                    break;
467
                }
468
            }
469
        }
470
471
        $res['genres'] = array_unique(array_filter($genres));
472
473
        return $res;
474
    }
475
476
    /**
477
     * Accept age verification by visiting the confirmation endpoint.
478
     * PopPorn uses a redirect-based age verification system.
479
     */
480
    protected function acceptAgeVerification(): void
481
    {
482
        try {
483
            // PopPorn requires visiting the AgeConfirmation endpoint with the URL you want to go to
484
            // The confirmation sets a cookie that allows access
485
            $client = $this->getHttpClient();
486
487
            // PopPorn redirects to /AgeConfirmation?url2=/ on first visit
488
            // We need to visit that page and follow through to set the etoken cookie
489
490
            // First, make a request that disables redirects to see where we're being sent
491
            try {
492
                $response = $client->get(self::BASE_URL.'/', [
493
                    'headers' => $this->getDefaultHeaders(),
494
                    'allow_redirects' => false,
495
                    'http_errors' => false,
496
                ]);
497
498
                // Check for redirect to age confirmation
499
                $statusCode = $response->getStatusCode();
500
                if ($statusCode === 302 || $statusCode === 301) {
501
                    $location = $response->getHeaderLine('Location');
502
503
                    // If redirected to AgeConfirmation, follow the flow
504
                    if (stripos($location, 'AgeConfirmation') !== false) {
505
                        // The redirect URL includes ?url2= parameter, we need to visit it
506
                        $ageConfirmUrl = $location;
507
                        if (! str_starts_with($ageConfirmUrl, 'http')) {
508
                            $ageConfirmUrl = self::BASE_URL.$ageConfirmUrl;
509
                        }
510
511
                        // Visit the age confirmation page
512
                        $ageResponse = $client->get($ageConfirmUrl, [
0 ignored issues
show
Unused Code introduced by
The assignment to $ageResponse is dead and can be removed.
Loading history...
513
                            'headers' => $this->getDefaultHeaders(),
514
                            'allow_redirects' => true,
515
                            'http_errors' => false,
516
                        ]);
517
518
                        \Illuminate\Support\Facades\Log::debug('PopPorn age confirmation visited: '.$ageConfirmUrl);
519
                    }
520
                }
521
            } catch (\Exception $e) {
522
                // Ignore redirect errors
523
            }
524
525
            // Wait a moment to simulate human behavior
526
            usleep(500000); // 500ms
527
528
        } catch (\Exception $e) {
529
            // Log but don't fail - we'll try the search anyway
530
            \Illuminate\Support\Facades\Log::debug('PopPorn age verification setup: '.$e->getMessage());
531
        }
532
    }
533
}
534