AebnPipe   F
last analyzed

Complexity

Total Complexity 107

Size/Duplication

Total Lines 529
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 107
eloc 280
c 1
b 0
f 0
dl 0
loc 529
rs 2

13 Methods

Rating   Name   Duplication   Size   Complexity  
B extractGenres() 0 34 7
F extractFromJsonLd() 0 71 19
A extractTrailers() 0 22 5
B process() 0 59 6
C extractCast() 0 47 14
F getMovieInfo() 0 52 19
A extractCovers() 0 36 5
A getDisplayName() 0 3 1
A getBaseUrl() 0 3 1
C search() 0 77 16
A getName() 0 3 1
B extractProductInfo() 0 43 9
A extractSynopsis() 0 31 4

How to fix   Complexity   

Complex Class

Complex classes like AebnPipe often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AebnPipe, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
8
/**
9
 * AEBN (Adult Entertainment Broadcast Network) provider pipe.
10
 *
11
 * Handles movie information extraction from straight.theater.aebn.net
12
 * with support for JSON-LD structured data and multiple fallback selectors.
13
 */
14
class AebnPipe extends AbstractAdultProviderPipe
15
{
16
    protected int $priority = 10;
17
18
    private const BASE_URL = 'https://straight.theater.aebn.net';
19
20
    private const SEARCH_URL = '/dispatcher/fts?theaterId=13992&genreId=101&locale=en&count=30&imageType=Large&targetSearchMode=basic&isAdvancedSearch=false&isFlushAdvancedSearchCriteria=false&sortType=Relevance&userQuery=title%3A+%2B';
21
22
    private const TRAILER_URL = '/dispatcher/previewPlayer?locale=en&theaterId=13992&genreId=101&movieId=';
23
24
    protected string $directUrl = '';
25
26
    protected string $title = '';
27
28
    protected array $res = [];
29
30
    protected string $response = '';
31
32
    protected ?array $jsonLdData = null;
33
34
    public function getName(): string
35
    {
36
        return 'aebn';
37
    }
38
39
    public function getDisplayName(): string
40
    {
41
        return 'Adult Entertainment Broadcast Network';
42
    }
43
44
    protected function getBaseUrl(): string
45
    {
46
        return self::BASE_URL;
47
    }
48
49
    protected function process(AdultProcessingPassable $passable): AdultProcessingResult
50
    {
51
        $movie = $passable->getCleanTitle();
52
53
        // Check cache first
54
        $cached = $this->getCachedSearch($movie);
55
        if ($cached !== null) {
56
            if ($cached === false) {
57
                return AdultProcessingResult::notFound($this->getName());
58
            }
59
60
            return AdultProcessingResult::matched(
61
                $cached['title'] ?? $movie,
62
                $this->getName(),
63
                $cached
64
            );
65
        }
66
67
        $searchResult = $this->search($movie);
68
69
        if ($searchResult === false) {
70
            $this->cacheSearchResult($movie, false);
71
            $this->outputNotFound();
72
73
            return AdultProcessingResult::notFound($this->getName());
74
        }
75
76
        $this->title = $searchResult['title'];
77
        $this->directUrl = $searchResult['url'];
78
79
        // Fetch the movie details page
80
        $this->response = $this->fetchHtml($this->directUrl, $this->cookie);
81
82
        if ($this->response === false) {
0 ignored issues
show
introduced by
The condition $this->response === false is always false.
Loading history...
83
            return AdultProcessingResult::failed('Failed to fetch movie details page', $this->getName());
84
        }
85
86
        // Try to extract JSON-LD data first
87
        $this->jsonLdData = $this->extractJsonLd($this->response);
88
89
        $this->getHtmlParser()->loadHtml($this->response);
90
91
        $movieInfo = $this->getMovieInfo();
92
93
        if ($movieInfo === false) {
94
            $this->cacheSearchResult($movie, false);
95
96
            return AdultProcessingResult::notFound($this->getName());
97
        }
98
99
        // Cache the successful result
100
        $this->cacheSearchResult($movie, $movieInfo);
101
102
        $this->outputMatch($this->title);
103
104
        return AdultProcessingResult::matched(
105
            $this->title,
106
            $this->getName(),
107
            $movieInfo
108
        );
109
    }
110
111
    protected function search(string $movie): array|false
112
    {
113
        if (empty($movie)) {
114
            return false;
115
        }
116
117
        $searchUrl = self::BASE_URL.self::SEARCH_URL.urlencode($movie);
118
        $response = $this->fetchHtml($searchUrl, $this->cookie);
119
120
        if ($response === false) {
121
            return false;
122
        }
123
124
        $this->getHtmlParser()->loadHtml($response);
125
126
        $bestMatch = null;
127
        $highestSimilarity = 0;
128
        $i = 1;
129
130
        // Try multiple container selectors for search results
131
        $containerSelectors = [
132
            'div.movie',
133
            'div.search-result',
134
            'div[class*="movie"]',
135
            'article.movie',
136
        ];
137
138
        $movies = [];
139
        foreach ($containerSelectors as $containerSelector) {
140
            $movies = $this->getHtmlParser()->find($containerSelector);
141
            if (! empty($movies) && count($movies) > 0) {
142
                break;
143
            }
144
        }
145
146
        foreach ($movies as $mov) {
147
            // Try multiple selector patterns for the title link
148
            $selectors = [
149
                'a#FTSMovieSearch_link_title_detail_'.$i,
150
                'a.title-link',
151
                'a[href*="/movie/"]',
152
                'h3 a',
153
                'a[title]',
154
            ];
155
156
            $ret = null;
157
            foreach ($selectors as $selector) {
158
                $ret = $mov->findOne($selector);
159
                if ($ret && isset($ret->href)) {
160
                    break;
161
                }
162
            }
163
164
            if ($ret && isset($ret->href)) {
165
                $title = $ret->title ?? trim($ret->plaintext ?? '');
166
167
                if (! empty($title)) {
168
                    $similarity = $this->calculateSimilarity($movie, $title);
169
170
                    if ($similarity > $highestSimilarity) {
171
                        $highestSimilarity = $similarity;
172
                        $bestMatch = [
173
                            'title' => trim($title),
174
                            'url' => self::BASE_URL.html_entity_decode($ret->href),
175
                            'similarity' => $similarity,
176
                        ];
177
                    }
178
                }
179
            }
180
            $i++;
181
        }
182
183
        if ($bestMatch !== null && $highestSimilarity >= $this->minimumSimilarity) {
184
            return $bestMatch;
185
        }
186
187
        return false;
188
    }
189
190
    protected function getMovieInfo(): array|false
191
    {
192
        $results = [];
193
194
        if (! empty($this->directUrl)) {
195
            if (! empty($this->title)) {
196
                $results['title'] = $this->title;
197
            }
198
            $results['directurl'] = $this->directUrl;
199
        }
200
201
        // Try to get data from JSON-LD first (most reliable)
202
        if ($this->jsonLdData !== null) {
203
            $results = array_merge($results, $this->extractFromJsonLd());
204
        }
205
206
        // Get all the movie data (HTML fallback)
207
        $synopsis = $this->extractSynopsis();
208
        if (is_array($synopsis) && ! empty($synopsis)) {
209
            $results = array_merge($results, $synopsis);
210
        }
211
212
        $productInfo = $this->extractProductInfo(true);
213
        if (is_array($productInfo) && ! empty($productInfo)) {
214
            $results = array_merge($results, $productInfo);
215
        }
216
217
        $cast = $this->extractCast();
218
        if (is_array($cast) && ! empty($cast)) {
219
            $results = array_merge($results, $cast);
220
        }
221
222
        $genres = $this->extractGenres();
223
        if (is_array($genres) && ! empty($genres)) {
224
            $results = array_merge($results, $genres);
225
        }
226
227
        $covers = $this->extractCovers();
228
        if (is_array($covers) && ! empty($covers)) {
229
            $results = array_merge($results, $covers);
230
        }
231
232
        $trailers = $this->extractTrailers();
233
        if (is_array($trailers) && ! empty($trailers)) {
234
            $results = array_merge($results, $trailers);
235
        }
236
237
        if (empty($results) || (empty($results['title'] ?? '') && empty($results['boxcover'] ?? ''))) {
238
            return false;
239
        }
240
241
        return $results;
242
    }
243
244
    /**
245
     * Extract data from JSON-LD structured data.
246
     */
247
    protected function extractFromJsonLd(): array
248
    {
249
        $results = [];
250
251
        if ($this->jsonLdData === null) {
252
            return $results;
253
        }
254
255
        // Title
256
        if (! empty($this->jsonLdData['name'])) {
257
            $results['title'] = $this->jsonLdData['name'];
258
        }
259
260
        // Synopsis/Description
261
        if (! empty($this->jsonLdData['description'])) {
262
            $results['synopsis'] = $this->jsonLdData['description'];
263
        }
264
265
        // Image/Cover
266
        if (! empty($this->jsonLdData['image'])) {
267
            $image = is_array($this->jsonLdData['image']) ? ($this->jsonLdData['image'][0] ?? '') : $this->jsonLdData['image'];
268
            if (! empty($image)) {
269
                $results['boxcover'] = $image;
270
                $results['backcover'] = str_ireplace(['xlf.jpg', 'front'], ['xlb.jpg', 'back'], $image);
271
            }
272
        }
273
274
        // Duration
275
        if (! empty($this->jsonLdData['duration'])) {
276
            $results['duration'] = $this->jsonLdData['duration'];
277
        }
278
279
        // Director
280
        if (! empty($this->jsonLdData['director'])) {
281
            $director = $this->jsonLdData['director'];
282
            if (is_array($director)) {
283
                $results['director'] = $director['name'] ?? ($director[0]['name'] ?? '');
284
            } else {
285
                $results['director'] = $director;
286
            }
287
        }
288
289
        // Actors
290
        if (! empty($this->jsonLdData['actor'])) {
291
            $actors = $this->jsonLdData['actor'];
292
            $cast = [];
293
            if (is_array($actors)) {
294
                foreach ($actors as $actor) {
295
                    if (is_array($actor) && ! empty($actor['name'])) {
296
                        $cast[] = $actor['name'];
297
                    } elseif (is_string($actor)) {
298
                        $cast[] = $actor;
299
                    }
300
                }
301
            }
302
            if (! empty($cast)) {
303
                $results['cast'] = $cast;
304
            }
305
        }
306
307
        // Genre
308
        if (! empty($this->jsonLdData['genre'])) {
309
            $genres = $this->jsonLdData['genre'];
310
            if (is_array($genres)) {
311
                $results['genres'] = $genres;
312
            } else {
313
                $results['genres'] = [$genres];
314
            }
315
        }
316
317
        return $results;
318
    }
319
320
    protected function extractTrailers(): array
321
    {
322
        $res = [];
323
324
        // Try multiple selectors
325
        $selectors = [
326
            'a[itemprop=trailer]',
327
            'a[href*="previewPlayer"]',
328
            'a.trailer-link',
329
        ];
330
331
        foreach ($selectors as $selector) {
332
            $ret = $this->getHtmlParser()->find($selector, 0);
333
            if ($ret && isset($ret->href) && preg_match('/movieId=(?<movieid>\d+)/', trim($ret->href), $hits)) {
334
                $movieid = $hits['movieid'];
335
                $res['trailers']['url'] = self::BASE_URL.self::TRAILER_URL.$movieid;
336
337
                return $res;
338
            }
339
        }
340
341
        return $res;
342
    }
343
344
    protected function extractCovers(): array
345
    {
346
        $res = [];
347
348
        // Try multiple selectors
349
        $selectors = [
350
            'img[itemprop=thumbnailUrl]',
351
            'img[itemprop=image]',
352
            'div#md-boxCover img',
353
            'div.boxcover img',
354
            'img.boxcover',
355
            'img[src*="boxcover"]',
356
            'meta[property="og:image"]',
357
        ];
358
359
        foreach ($selectors as $selector) {
360
            $ret = $this->getHtmlParser()->findOne($selector);
361
            if ($ret) {
362
                $coverUrl = $ret->src ?? $ret->content ?? null;
363
364
                if (! empty($coverUrl)) {
365
                    // Ensure URL has protocol
366
                    if (str_starts_with($coverUrl, '//')) {
367
                        $coverUrl = 'https:'.$coverUrl;
368
                    }
369
370
                    // Get high-resolution versions
371
                    $res['boxcover'] = str_ireplace(['160w.jpg', '120w.jpg', '_small', '_thumb'], ['xlf.jpg', 'xlf.jpg', '', ''], $coverUrl);
372
                    $res['backcover'] = str_ireplace(['160w.jpg', '120w.jpg', 'xlf.jpg', 'front'], ['xlb.jpg', 'xlb.jpg', 'xlb.jpg', 'back'], $res['boxcover']);
373
374
                    return $res;
375
                }
376
            }
377
        }
378
379
        return $res;
380
    }
381
382
    protected function extractGenres(): array
383
    {
384
        $res = [];
385
        $genres = [];
386
387
        // Try multiple selectors
388
        $selectors = [
389
            'div.md-detailsCategories a[itemprop=genre]',
390
            'a[itemprop=genre]',
391
            'span[itemprop=genre]',
392
            'div.categories a',
393
            'a[href*="/category/"]',
394
        ];
395
396
        foreach ($selectors as $selector) {
397
            $elements = $this->getHtmlParser()->find($selector);
398
            if (! empty($elements)) {
399
                foreach ($elements as $genre) {
400
                    $text = trim($genre->plaintext ?? '');
401
                    if (! empty($text)) {
402
                        $genres[] = $text;
403
                    }
404
                }
405
                if (! empty($genres)) {
406
                    break;
407
                }
408
            }
409
        }
410
411
        if (! empty($genres)) {
412
            $res['genres'] = array_unique($genres);
413
        }
414
415
        return $res;
416
    }
417
418
    protected function extractCast(): array
419
    {
420
        $res = [];
421
        $cast = [];
422
423
        // Try multiple selectors
424
        $selectors = [
425
            'div.starsFull span[itemprop=name]',
426
            'span[itemprop=name]',
427
            'a[href*="/stars/"]',
428
            'div.cast a',
429
            'div.performers a',
430
        ];
431
432
        foreach ($selectors as $selector) {
433
            $elements = $this->getHtmlParser()->find($selector);
434
            if (! empty($elements)) {
435
                foreach ($elements as $star) {
436
                    $text = trim($star->plaintext ?? '');
437
                    if (! empty($text) && strlen($text) > 2) {
438
                        $cast[] = $text;
439
                    }
440
                }
441
                if (! empty($cast)) {
442
                    break;
443
                }
444
            }
445
        }
446
447
        // Fallback: try detailsLink div
448
        if (empty($cast)) {
449
            $ret = $this->getHtmlParser()->findOne('div.detailsLink');
450
            if ($ret && ! ($ret instanceof \voku\helper\SimpleHtmlDomNodeBlank)) {
451
                foreach ($ret->find('span') as $star) {
452
                    $text = $star->plaintext ?? '';
453
                    if (str_contains($text, '/More/') && str_contains($text, '/Stars/')) {
454
                        $cast[] = trim($text);
455
                    }
456
                }
457
            }
458
        }
459
460
        if (! empty($cast)) {
461
            $res['cast'] = array_unique($cast);
462
        }
463
464
        return $res;
465
    }
466
467
    protected function extractProductInfo(bool $extras = false): array
468
    {
469
        $res = [];
470
471
        // Try multiple selectors
472
        $selectors = [
473
            'div#md-detailsLeft',
474
            'div.movie-details',
475
            'div.product-info',
476
        ];
477
478
        foreach ($selectors as $selector) {
479
            $ret = $this->getHtmlParser()->find($selector, 0);
480
            if ($ret) {
481
                $productinfo = [];
482
                foreach ($ret->find('div') as $div) {
483
                    foreach ($div->find('span') as $span) {
484
                        $text = rawurldecode($span->plaintext ?? '');
485
                        $text = preg_replace('/&nbsp;/', '', $text);
486
                        $text = trim($text);
487
                        if (! empty($text)) {
488
                            $productinfo[] = $text;
489
                        }
490
                    }
491
                }
492
493
                if (! empty($productinfo)) {
494
                    if (false !== $key = array_search('Running Time:', $productinfo, false)) {
495
                        unset($productinfo[$key + 2]);
496
                    }
497
498
                    if (false !== $key = array_search('Director:', $productinfo, false)) {
499
                        $res['director'] = $productinfo[$key + 1] ?? '';
500
                        unset($productinfo[$key], $productinfo[$key + 1]);
501
                    }
502
503
                    $res['productinfo'] = array_chunk(array_values($productinfo), 2, false);
504
                    break;
505
                }
506
            }
507
        }
508
509
        return $res;
510
    }
511
512
    protected function extractSynopsis(): array
513
    {
514
        $res = [];
515
516
        // Try multiple selectors in order of preference
517
        $selectors = [
518
            'span[itemprop=about]',
519
            'span[itemprop=description]',
520
            'div[itemprop=description]',
521
            'div.movieDetailDescription',
522
            'div.synopsis',
523
            'p.description',
524
            'meta[name="description"]',
525
            'meta[property="og:description"]',
526
        ];
527
528
        foreach ($selectors as $selector) {
529
            $ret = $this->getHtmlParser()->findOne($selector);
530
            if ($ret) {
531
                $text = $ret->plaintext ?? $ret->content ?? '';
532
                if (! empty(trim($text))) {
533
                    $text = trim($text);
534
                    $text = preg_replace('/^Description:\s*/i', '', $text);
535
                    $res['synopsis'] = $text;
536
537
                    return $res;
538
                }
539
            }
540
        }
541
542
        return $res;
543
    }
544
}
545