AebnPipe   F
last analyzed

Complexity

Total Complexity 107

Size/Duplication

Total Lines 518
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 107
eloc 280
c 1
b 0
f 0
dl 0
loc 518
rs 2

13 Methods

Rating   Name   Duplication   Size   Complexity  
B extractGenres() 0 34 7
F extractFromJsonLd() 0 71 19
A extractTrailers() 0 21 5
B process() 0 56 6
C extractCast() 0 47 14
F getMovieInfo() 0 52 19
A extractCovers() 0 36 5
A getDisplayName() 0 3 1
A getBaseUrl() 0 3 1
C search() 0 77 16
A getName() 0 3 1
B extractProductInfo() 0 43 9
A extractSynopsis() 0 30 4

How to fix   Complexity   

Complex Class

Complex classes like AebnPipe often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AebnPipe, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
8
/**
9
 * AEBN (Adult Entertainment Broadcast Network) provider pipe.
10
 *
11
 * Handles movie information extraction from straight.theater.aebn.net
12
 * with support for JSON-LD structured data and multiple fallback selectors.
13
 */
14
class AebnPipe extends AbstractAdultProviderPipe
15
{
16
    protected int $priority = 10;
17
18
    private const BASE_URL = 'https://straight.theater.aebn.net';
19
    private const SEARCH_URL = '/dispatcher/fts?theaterId=13992&genreId=101&locale=en&count=30&imageType=Large&targetSearchMode=basic&isAdvancedSearch=false&isFlushAdvancedSearchCriteria=false&sortType=Relevance&userQuery=title%3A+%2B';
20
    private const TRAILER_URL = '/dispatcher/previewPlayer?locale=en&theaterId=13992&genreId=101&movieId=';
21
22
    protected string $directUrl = '';
23
    protected string $title = '';
24
    protected array $res = [];
25
    protected string $response = '';
26
    protected ?array $jsonLdData = null;
27
28
    public function getName(): string
29
    {
30
        return 'aebn';
31
    }
32
33
    public function getDisplayName(): string
34
    {
35
        return 'Adult Entertainment Broadcast Network';
36
    }
37
38
    protected function getBaseUrl(): string
39
    {
40
        return self::BASE_URL;
41
    }
42
43
    protected function process(AdultProcessingPassable $passable): AdultProcessingResult
44
    {
45
        $movie = $passable->getCleanTitle();
46
47
        // Check cache first
48
        $cached = $this->getCachedSearch($movie);
49
        if ($cached !== null) {
50
            if ($cached === false) {
51
                return AdultProcessingResult::notFound($this->getName());
52
            }
53
            return AdultProcessingResult::matched(
54
                $cached['title'] ?? $movie,
55
                $this->getName(),
56
                $cached
57
            );
58
        }
59
60
        $searchResult = $this->search($movie);
61
62
        if ($searchResult === false) {
63
            $this->cacheSearchResult($movie, false);
64
            $this->outputNotFound();
65
            return AdultProcessingResult::notFound($this->getName());
66
        }
67
68
        $this->title = $searchResult['title'];
69
        $this->directUrl = $searchResult['url'];
70
71
        // Fetch the movie details page
72
        $this->response = $this->fetchHtml($this->directUrl, $this->cookie);
73
74
        if ($this->response === false) {
0 ignored issues
show
introduced by
The condition $this->response === false is always false.
Loading history...
75
            return AdultProcessingResult::failed('Failed to fetch movie details page', $this->getName());
76
        }
77
78
        // Try to extract JSON-LD data first
79
        $this->jsonLdData = $this->extractJsonLd($this->response);
80
81
        $this->getHtmlParser()->loadHtml($this->response);
82
83
        $movieInfo = $this->getMovieInfo();
84
85
        if ($movieInfo === false) {
86
            $this->cacheSearchResult($movie, false);
87
            return AdultProcessingResult::notFound($this->getName());
88
        }
89
90
        // Cache the successful result
91
        $this->cacheSearchResult($movie, $movieInfo);
92
93
        $this->outputMatch($this->title);
94
95
        return AdultProcessingResult::matched(
96
            $this->title,
97
            $this->getName(),
98
            $movieInfo
99
        );
100
    }
101
102
    protected function search(string $movie): array|false
103
    {
104
        if (empty($movie)) {
105
            return false;
106
        }
107
108
        $searchUrl = self::BASE_URL . self::SEARCH_URL . urlencode($movie);
109
        $response = $this->fetchHtml($searchUrl, $this->cookie);
110
111
        if ($response === false) {
112
            return false;
113
        }
114
115
        $this->getHtmlParser()->loadHtml($response);
116
117
        $bestMatch = null;
118
        $highestSimilarity = 0;
119
        $i = 1;
120
121
        // Try multiple container selectors for search results
122
        $containerSelectors = [
123
            'div.movie',
124
            'div.search-result',
125
            'div[class*="movie"]',
126
            'article.movie',
127
        ];
128
129
        $movies = [];
130
        foreach ($containerSelectors as $containerSelector) {
131
            $movies = $this->getHtmlParser()->find($containerSelector);
132
            if (!empty($movies) && count($movies) > 0) {
133
                break;
134
            }
135
        }
136
137
        foreach ($movies as $mov) {
138
            // Try multiple selector patterns for the title link
139
            $selectors = [
140
                'a#FTSMovieSearch_link_title_detail_' . $i,
141
                'a.title-link',
142
                'a[href*="/movie/"]',
143
                'h3 a',
144
                'a[title]',
145
            ];
146
147
            $ret = null;
148
            foreach ($selectors as $selector) {
149
                $ret = $mov->findOne($selector);
150
                if ($ret && isset($ret->href)) {
151
                    break;
152
                }
153
            }
154
155
            if ($ret && isset($ret->href)) {
156
                $title = $ret->title ?? trim($ret->plaintext ?? '');
157
158
                if (!empty($title)) {
159
                    $similarity = $this->calculateSimilarity($movie, $title);
160
161
                    if ($similarity > $highestSimilarity) {
162
                        $highestSimilarity = $similarity;
163
                        $bestMatch = [
164
                            'title' => trim($title),
165
                            'url' => self::BASE_URL . html_entity_decode($ret->href),
166
                            'similarity' => $similarity,
167
                        ];
168
                    }
169
                }
170
            }
171
            $i++;
172
        }
173
174
        if ($bestMatch !== null && $highestSimilarity >= $this->minimumSimilarity) {
175
            return $bestMatch;
176
        }
177
178
        return false;
179
    }
180
181
    protected function getMovieInfo(): array|false
182
    {
183
        $results = [];
184
185
        if (!empty($this->directUrl)) {
186
            if (!empty($this->title)) {
187
                $results['title'] = $this->title;
188
            }
189
            $results['directurl'] = $this->directUrl;
190
        }
191
192
        // Try to get data from JSON-LD first (most reliable)
193
        if ($this->jsonLdData !== null) {
194
            $results = array_merge($results, $this->extractFromJsonLd());
195
        }
196
197
        // Get all the movie data (HTML fallback)
198
        $synopsis = $this->extractSynopsis();
199
        if (is_array($synopsis) && !empty($synopsis)) {
200
            $results = array_merge($results, $synopsis);
201
        }
202
203
        $productInfo = $this->extractProductInfo(true);
204
        if (is_array($productInfo) && !empty($productInfo)) {
205
            $results = array_merge($results, $productInfo);
206
        }
207
208
        $cast = $this->extractCast();
209
        if (is_array($cast) && !empty($cast)) {
210
            $results = array_merge($results, $cast);
211
        }
212
213
        $genres = $this->extractGenres();
214
        if (is_array($genres) && !empty($genres)) {
215
            $results = array_merge($results, $genres);
216
        }
217
218
        $covers = $this->extractCovers();
219
        if (is_array($covers) && !empty($covers)) {
220
            $results = array_merge($results, $covers);
221
        }
222
223
        $trailers = $this->extractTrailers();
224
        if (is_array($trailers) && !empty($trailers)) {
225
            $results = array_merge($results, $trailers);
226
        }
227
228
        if (empty($results) || (empty($results['title'] ?? '') && empty($results['boxcover'] ?? ''))) {
229
            return false;
230
        }
231
232
        return $results;
233
    }
234
235
    /**
236
     * Extract data from JSON-LD structured data.
237
     */
238
    protected function extractFromJsonLd(): array
239
    {
240
        $results = [];
241
242
        if ($this->jsonLdData === null) {
243
            return $results;
244
        }
245
246
        // Title
247
        if (!empty($this->jsonLdData['name'])) {
248
            $results['title'] = $this->jsonLdData['name'];
249
        }
250
251
        // Synopsis/Description
252
        if (!empty($this->jsonLdData['description'])) {
253
            $results['synopsis'] = $this->jsonLdData['description'];
254
        }
255
256
        // Image/Cover
257
        if (!empty($this->jsonLdData['image'])) {
258
            $image = is_array($this->jsonLdData['image']) ? ($this->jsonLdData['image'][0] ?? '') : $this->jsonLdData['image'];
259
            if (!empty($image)) {
260
                $results['boxcover'] = $image;
261
                $results['backcover'] = str_ireplace(['xlf.jpg', 'front'], ['xlb.jpg', 'back'], $image);
262
            }
263
        }
264
265
        // Duration
266
        if (!empty($this->jsonLdData['duration'])) {
267
            $results['duration'] = $this->jsonLdData['duration'];
268
        }
269
270
        // Director
271
        if (!empty($this->jsonLdData['director'])) {
272
            $director = $this->jsonLdData['director'];
273
            if (is_array($director)) {
274
                $results['director'] = $director['name'] ?? ($director[0]['name'] ?? '');
275
            } else {
276
                $results['director'] = $director;
277
            }
278
        }
279
280
        // Actors
281
        if (!empty($this->jsonLdData['actor'])) {
282
            $actors = $this->jsonLdData['actor'];
283
            $cast = [];
284
            if (is_array($actors)) {
285
                foreach ($actors as $actor) {
286
                    if (is_array($actor) && !empty($actor['name'])) {
287
                        $cast[] = $actor['name'];
288
                    } elseif (is_string($actor)) {
289
                        $cast[] = $actor;
290
                    }
291
                }
292
            }
293
            if (!empty($cast)) {
294
                $results['cast'] = $cast;
295
            }
296
        }
297
298
        // Genre
299
        if (!empty($this->jsonLdData['genre'])) {
300
            $genres = $this->jsonLdData['genre'];
301
            if (is_array($genres)) {
302
                $results['genres'] = $genres;
303
            } else {
304
                $results['genres'] = [$genres];
305
            }
306
        }
307
308
        return $results;
309
    }
310
311
    protected function extractTrailers(): array
312
    {
313
        $res = [];
314
315
        // Try multiple selectors
316
        $selectors = [
317
            'a[itemprop=trailer]',
318
            'a[href*="previewPlayer"]',
319
            'a.trailer-link',
320
        ];
321
322
        foreach ($selectors as $selector) {
323
            $ret = $this->getHtmlParser()->find($selector, 0);
324
            if ($ret && isset($ret->href) && preg_match('/movieId=(?<movieid>\d+)/', trim($ret->href), $hits)) {
325
                $movieid = $hits['movieid'];
326
                $res['trailers']['url'] = self::BASE_URL . self::TRAILER_URL . $movieid;
327
                return $res;
328
            }
329
        }
330
331
        return $res;
332
    }
333
334
    protected function extractCovers(): array
335
    {
336
        $res = [];
337
338
        // Try multiple selectors
339
        $selectors = [
340
            'img[itemprop=thumbnailUrl]',
341
            'img[itemprop=image]',
342
            'div#md-boxCover img',
343
            'div.boxcover img',
344
            'img.boxcover',
345
            'img[src*="boxcover"]',
346
            'meta[property="og:image"]',
347
        ];
348
349
        foreach ($selectors as $selector) {
350
            $ret = $this->getHtmlParser()->findOne($selector);
351
            if ($ret) {
352
                $coverUrl = $ret->src ?? $ret->content ?? null;
353
354
                if (!empty($coverUrl)) {
355
                    // Ensure URL has protocol
356
                    if (str_starts_with($coverUrl, '//')) {
357
                        $coverUrl = 'https:' . $coverUrl;
358
                    }
359
360
                    // Get high-resolution versions
361
                    $res['boxcover'] = str_ireplace(['160w.jpg', '120w.jpg', '_small', '_thumb'], ['xlf.jpg', 'xlf.jpg', '', ''], $coverUrl);
362
                    $res['backcover'] = str_ireplace(['160w.jpg', '120w.jpg', 'xlf.jpg', 'front'], ['xlb.jpg', 'xlb.jpg', 'xlb.jpg', 'back'], $res['boxcover']);
363
364
                    return $res;
365
                }
366
            }
367
        }
368
369
        return $res;
370
    }
371
372
    protected function extractGenres(): array
373
    {
374
        $res = [];
375
        $genres = [];
376
377
        // Try multiple selectors
378
        $selectors = [
379
            'div.md-detailsCategories a[itemprop=genre]',
380
            'a[itemprop=genre]',
381
            'span[itemprop=genre]',
382
            'div.categories a',
383
            'a[href*="/category/"]',
384
        ];
385
386
        foreach ($selectors as $selector) {
387
            $elements = $this->getHtmlParser()->find($selector);
388
            if (!empty($elements)) {
389
                foreach ($elements as $genre) {
390
                    $text = trim($genre->plaintext ?? '');
391
                    if (!empty($text)) {
392
                        $genres[] = $text;
393
                    }
394
                }
395
                if (!empty($genres)) {
396
                    break;
397
                }
398
            }
399
        }
400
401
        if (!empty($genres)) {
402
            $res['genres'] = array_unique($genres);
403
        }
404
405
        return $res;
406
    }
407
408
    protected function extractCast(): array
409
    {
410
        $res = [];
411
        $cast = [];
412
413
        // Try multiple selectors
414
        $selectors = [
415
            'div.starsFull span[itemprop=name]',
416
            'span[itemprop=name]',
417
            'a[href*="/stars/"]',
418
            'div.cast a',
419
            'div.performers a',
420
        ];
421
422
        foreach ($selectors as $selector) {
423
            $elements = $this->getHtmlParser()->find($selector);
424
            if (!empty($elements)) {
425
                foreach ($elements as $star) {
426
                    $text = trim($star->plaintext ?? '');
427
                    if (!empty($text) && strlen($text) > 2) {
428
                        $cast[] = $text;
429
                    }
430
                }
431
                if (!empty($cast)) {
432
                    break;
433
                }
434
            }
435
        }
436
437
        // Fallback: try detailsLink div
438
        if (empty($cast)) {
439
            $ret = $this->getHtmlParser()->findOne('div.detailsLink');
440
            if ($ret && !($ret instanceof \voku\helper\SimpleHtmlDomNodeBlank)) {
441
                foreach ($ret->find('span') as $star) {
442
                    $text = $star->plaintext ?? '';
443
                    if (str_contains($text, '/More/') && str_contains($text, '/Stars/')) {
444
                        $cast[] = trim($text);
445
                    }
446
                }
447
            }
448
        }
449
450
        if (!empty($cast)) {
451
            $res['cast'] = array_unique($cast);
452
        }
453
454
        return $res;
455
    }
456
457
    protected function extractProductInfo(bool $extras = false): array
458
    {
459
        $res = [];
460
461
        // Try multiple selectors
462
        $selectors = [
463
            'div#md-detailsLeft',
464
            'div.movie-details',
465
            'div.product-info',
466
        ];
467
468
        foreach ($selectors as $selector) {
469
            $ret = $this->getHtmlParser()->find($selector, 0);
470
            if ($ret) {
471
                $productinfo = [];
472
                foreach ($ret->find('div') as $div) {
473
                    foreach ($div->find('span') as $span) {
474
                        $text = rawurldecode($span->plaintext ?? '');
475
                        $text = preg_replace('/&nbsp;/', '', $text);
476
                        $text = trim($text);
477
                        if (!empty($text)) {
478
                            $productinfo[] = $text;
479
                        }
480
                    }
481
                }
482
483
                if (!empty($productinfo)) {
484
                    if (false !== $key = array_search('Running Time:', $productinfo, false)) {
485
                        unset($productinfo[$key + 2]);
486
                    }
487
488
                    if (false !== $key = array_search('Director:', $productinfo, false)) {
489
                        $res['director'] = $productinfo[$key + 1] ?? '';
490
                        unset($productinfo[$key], $productinfo[$key + 1]);
491
                    }
492
493
                    $res['productinfo'] = array_chunk(array_values($productinfo), 2, false);
494
                    break;
495
                }
496
            }
497
        }
498
499
        return $res;
500
    }
501
502
    protected function extractSynopsis(): array
503
    {
504
        $res = [];
505
506
        // Try multiple selectors in order of preference
507
        $selectors = [
508
            'span[itemprop=about]',
509
            'span[itemprop=description]',
510
            'div[itemprop=description]',
511
            'div.movieDetailDescription',
512
            'div.synopsis',
513
            'p.description',
514
            'meta[name="description"]',
515
            'meta[property="og:description"]',
516
        ];
517
518
        foreach ($selectors as $selector) {
519
            $ret = $this->getHtmlParser()->findOne($selector);
520
            if ($ret) {
521
                $text = $ret->plaintext ?? $ret->content ?? '';
522
                if (!empty(trim($text))) {
523
                    $text = trim($text);
524
                    $text = preg_replace('/^Description:\s*/i', '', $text);
525
                    $res['synopsis'] = $text;
526
                    return $res;
527
                }
528
            }
529
        }
530
531
        return $res;
532
    }
533
}
534
535