Passed
Push — master ( 73c322...402528 )
by Darko
12:47
created

AdePipe   F

Complexity

Total Complexity 69

Size/Duplication

Total Lines 378
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 69
eloc 190
c 1
b 0
f 0
dl 0
loc 378
rs 2.88

13 Methods

Rating   Name   Duplication   Size   Complexity  
A getDisplayName() 0 3 1
A process() 0 35 4
A getName() 0 3 1
A getBaseUrl() 0 3 1
C search() 0 62 13
A initializeSession() 0 15 2
B extractCast() 0 30 7
D getMovieInfo() 0 47 10
A extractTrailers() 0 22 5
B extractGenres() 0 31 7
A extractCovers() 0 24 4
A extractSynopsis() 0 22 6
B extractProductInfo() 0 41 8

How to fix   Complexity   

Complex Class

Complex classes like AdePipe often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AdePipe, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
8
/**
9
 * Adult DVD Empire (ADE) provider pipe.
10
 *
11
 * Handles movie information extraction from adultdvdempire.com
12
 */
13
class AdePipe extends AbstractAdultProviderPipe
14
{
15
    protected int $priority = 40;
16
17
    private const BASE_URL = 'https://www.adultdvdempire.com';
18
    private const SEARCH_URL = '/dvd/search?q=';
19
20
    protected string $directUrl = '';
21
    protected string $title = '';
22
    protected string $response = '';
23
24
    public function getName(): string
25
    {
26
        return 'ade';
27
    }
28
29
    public function getDisplayName(): string
30
    {
31
        return 'Adult DVD Empire';
32
    }
33
34
    protected function getBaseUrl(): string
35
    {
36
        return self::BASE_URL;
37
    }
38
39
    protected function process(AdultProcessingPassable $passable): AdultProcessingResult
40
    {
41
        $movie = $passable->getCleanTitle();
42
43
        $searchResult = $this->search($movie);
44
45
        if ($searchResult === false) {
0 ignored issues
show
introduced by
The condition $searchResult === false is always true.
Loading history...
46
            $this->outputNotFound();
47
            return AdultProcessingResult::notFound($this->getName());
48
        }
49
50
        $this->title = $searchResult['title'];
51
        $this->directUrl = $searchResult['url'];
52
53
        // Fetch the movie details page
54
        $this->response = $this->fetchHtml($this->directUrl, $this->cookie);
55
56
        if ($this->response === false) {
57
            return AdultProcessingResult::failed('Failed to fetch movie details page', $this->getName());
58
        }
59
60
        $this->getHtmlParser()->loadHtml($this->response);
61
62
        $movieInfo = $this->getMovieInfo();
63
64
        if ($movieInfo === false) {
65
            return AdultProcessingResult::notFound($this->getName());
66
        }
67
68
        $this->outputMatch($this->title);
69
70
        return AdultProcessingResult::matched(
71
            $this->title,
72
            $this->getName(),
73
            $movieInfo
74
        );
75
    }
76
77
    protected function search(string $movie): array|false
78
    {
79
        if (empty($movie)) {
80
            return false;
81
        }
82
83
        // Initialize session with age verification cookies
84
        $this->initializeSession();
85
86
        $searchUrl = self::BASE_URL . self::SEARCH_URL . rawurlencode($movie);
87
        $response = $this->fetchHtml($searchUrl, $this->cookie);
88
89
        if ($response === false) {
90
            return false;
91
        }
92
93
        $this->getHtmlParser()->loadHtml($response);
94
95
        // Try multiple search result selectors
96
        $resultSelectors = [
97
            'a[class=fancybox-button]',
98
            'div.card a.boxcover-link',
99
            'a[href*="/item/"]',
100
        ];
101
102
        $bestMatch = null;
103
        $highestSimilarity = 0;
104
105
        foreach ($resultSelectors as $selector) {
106
            $res = $this->getHtmlParser()->find($selector);
107
            if (!empty($res)) {
108
                foreach ($res as $ret) {
109
                    $title = $ret->title ?? $ret->getAttribute('title') ?? trim($ret->plaintext);
110
                    $url = trim($ret->href ?? '');
111
112
                    if (empty($title) || empty($url)) {
113
                        continue;
114
                    }
115
116
                    $similarity = $this->calculateSimilarity($movie, $title);
117
118
                    if ($similarity > $highestSimilarity) {
119
                        $highestSimilarity = $similarity;
120
                        $bestMatch = [
121
                            'title' => trim($title),
122
                            'url' => str_starts_with($url, 'http') ? $url : self::BASE_URL . $url,
123
                        ];
124
                    }
125
                }
126
127
                // If we found results with this selector, don't try others
128
                if ($bestMatch !== null) {
129
                    break;
130
                }
131
            }
132
        }
133
134
        if ($bestMatch !== null && $highestSimilarity >= $this->minimumSimilarity) {
0 ignored issues
show
introduced by
The condition $bestMatch !== null is always false.
Loading history...
135
            return $bestMatch;
136
        }
137
138
        return false;
139
    }
140
141
    protected function getMovieInfo(): array|false
142
    {
143
        $results = [];
144
145
        if (!empty($this->directUrl)) {
146
            if (!empty($this->title)) {
147
                $results['title'] = $this->title;
148
            }
149
            $results['directurl'] = $this->directUrl;
150
        }
151
152
        // Get all the movie data
153
        $synopsis = $this->extractSynopsis();
154
        if (is_array($synopsis)) {
0 ignored issues
show
introduced by
The condition is_array($synopsis) is always true.
Loading history...
155
            $results = array_merge($results, $synopsis);
156
        }
157
158
        $productInfo = $this->extractProductInfo(true);
159
        if (is_array($productInfo)) {
0 ignored issues
show
introduced by
The condition is_array($productInfo) is always true.
Loading history...
160
            $results = array_merge($results, $productInfo);
161
        }
162
163
        $cast = $this->extractCast();
164
        if (is_array($cast)) {
0 ignored issues
show
introduced by
The condition is_array($cast) is always true.
Loading history...
165
            $results = array_merge($results, $cast);
166
        }
167
168
        $genres = $this->extractGenres();
169
        if (is_array($genres)) {
0 ignored issues
show
introduced by
The condition is_array($genres) is always true.
Loading history...
170
            $results = array_merge($results, $genres);
171
        }
172
173
        $covers = $this->extractCovers();
174
        if (is_array($covers)) {
0 ignored issues
show
introduced by
The condition is_array($covers) is always true.
Loading history...
175
            $results = array_merge($results, $covers);
176
        }
177
178
        $trailers = $this->extractTrailers();
179
        if (is_array($trailers)) {
0 ignored issues
show
introduced by
The condition is_array($trailers) is always true.
Loading history...
180
            $results = array_merge($results, $trailers);
181
        }
182
183
        if (empty($results)) {
184
            return false;
185
        }
186
187
        return $results;
188
    }
189
190
    protected function extractTrailers(): array
191
    {
192
        $res = [];
193
194
        $trailersUrl = str_replace('/item/', '/item/trailers/', $this->directUrl);
195
        $trailersResponse = $this->fetchHtml($trailersUrl, $this->cookie);
196
197
        if ($trailersResponse !== false) {
198
            if (preg_match("/([\"|'])(?P<swf>[^\"']+.swf)([\"|'])/i", $trailersResponse, $hits)) {
199
                $res['trailers']['url'] = self::BASE_URL . trim(trim($hits['swf']), '"');
200
201
                if (preg_match('#(?:streamID:\s\")(?P<streamid>[0-9A-Z]+)(?:\")#', $trailersResponse, $hits)) {
202
                    $res['trailers']['streamid'] = trim($hits['streamid']);
203
                }
204
205
                if (preg_match('#(?:BaseStreamingUrl:\s\")(?P<baseurl>[\d]+\.[\d]+\.[\d]+\.[\d]+)(?:\")#', $trailersResponse, $hits)) {
206
                    $res['trailers']['baseurl'] = $hits['baseurl'];
207
                }
208
            }
209
        }
210
211
        return $res;
212
    }
213
214
    protected function extractCovers(): array
215
    {
216
        $res = [];
217
218
        // Try multiple selectors for better reliability
219
        $selectors = [
220
            'div#Boxcover img[itemprop=image]',
221
            'img[itemprop=image]',
222
            'div#Boxcover img',
223
            'div.boxcover img',
224
        ];
225
226
        foreach ($selectors as $selector) {
227
            $ret = $this->getHtmlParser()->findOne($selector);
228
            if ($ret && isset($ret->src)) {
229
                // Get high-resolution covers
230
                $res['boxcover'] = preg_replace('/[ms]\.jpg$/', 'h.jpg', $ret->src);
231
                $res['backcover'] = preg_replace('/[ms]\.jpg$/', 'bh.jpg', $ret->src);
232
233
                return $res;
234
            }
235
        }
236
237
        return $res;
238
    }
239
240
    protected function extractSynopsis(): array
241
    {
242
        $res = [];
243
244
        // Try multiple selectors in priority order
245
        $selectors = [
246
            'meta[property="og:description"]' => 'content',
247
            'meta[name="description"]' => 'content',
248
            'div[itemprop="description"]' => 'plaintext',
249
            'p.synopsis' => 'plaintext',
250
        ];
251
252
        foreach ($selectors as $selector => $property) {
253
            $meta = $this->getHtmlParser()->findOne($selector);
254
            if ($meta && isset($meta->$property) && $meta->$property !== false && !empty(trim($meta->$property))) {
255
                $res['synopsis'] = trim($meta->$property);
256
257
                return $res;
258
            }
259
        }
260
261
        return $res;
262
    }
263
264
    protected function extractCast(): array
265
    {
266
        $res = [];
267
        $cast = [];
268
269
        // Try multiple selector strategies
270
        $selectors = [
271
            'div[itemprop="actor"] span[itemprop="name"]',
272
            'div.performer-list a',
273
            'a[href*="/performer/"]',
274
        ];
275
276
        foreach ($selectors as $selector) {
277
            $elements = $this->getHtmlParser()->find($selector);
278
            if (!empty($elements)) {
279
                foreach ($elements as $a) {
280
                    if ($a->plaintext !== false && !empty(trim($a->plaintext))) {
281
                        $cast[] = trim($a->plaintext);
282
                    }
283
                }
284
285
                if (!empty($cast)) {
286
                    break;
287
                }
288
            }
289
        }
290
291
        $res['cast'] = array_values(array_unique($cast));
292
293
        return $res;
294
    }
295
296
    protected function extractGenres(): array
297
    {
298
        $res = [];
299
        $genres = [];
300
301
        // Try multiple selector strategies
302
        $selectors = [
303
            'a[Label="Category"]',
304
            'div.categories a',
305
            'a[href*="/category/"]',
306
            'span[itemprop="genre"]',
307
        ];
308
309
        foreach ($selectors as $selector) {
310
            $elements = $this->getHtmlParser()->find($selector);
311
            if (!empty($elements)) {
312
                foreach ($elements as $a) {
313
                    if ($a->plaintext !== false && !empty(trim($a->plaintext))) {
314
                        $genres[] = trim($a->plaintext);
315
                    }
316
                }
317
318
                if (!empty($genres)) {
319
                    break;
320
                }
321
            }
322
        }
323
324
        $res['genres'] = array_values(array_unique($genres));
325
326
        return $res;
327
    }
328
329
    protected function extractProductInfo(bool $extras = false): array
330
    {
331
        $res = [];
332
        $dofeature = null;
333
334
        $tmpResponse = str_ireplace('Section ProductInfo', 'spdinfo', $this->response);
335
        $tmpHtml = new \voku\helper\HtmlDomParser();
0 ignored issues
show
Bug introduced by
The type voku\helper\HtmlDomParser was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
336
        $tmpHtml->loadHtml($tmpResponse);
337
338
        if ($ret = $tmpHtml->findOne('div[class=spdinfo]')) {
339
            $productinfo = [];
340
            $extrasData = [];
341
342
            $tmpResponse = trim($ret->outertext);
343
            $ret = $tmpHtml->loadHtml($tmpResponse);
344
345
            foreach ($ret->find('text') as $strong) {
346
                if (trim($strong->innertext) === 'Features') {
347
                    $dofeature = true;
348
                }
349
                if ($dofeature !== true) {
350
                    if (trim($strong->innertext) !== '&nbsp;') {
351
                        $productinfo[] = trim($strong->innertext);
352
                    }
353
                } else {
354
                    if ($extras === true) {
355
                        $extrasData[] = trim($strong->innertext);
356
                    }
357
                }
358
            }
359
360
            array_shift($productinfo);
361
            array_shift($productinfo);
362
            $res['productinfo'] = array_chunk($productinfo, 2, false);
363
364
            if (!empty($extrasData)) {
365
                $res['extras'] = $extrasData;
366
            }
367
        }
368
369
        return $res;
370
    }
371
372
    /**
373
     * Initialize session by visiting the site to establish cookies.
374
     * ADE uses JavaScript-based age verification with cookies.
375
     */
376
    protected function initializeSession(): void
377
    {
378
        try {
379
            $client = $this->getHttpClient();
380
381
            // Visit the homepage first to establish a session
382
            $client->get(self::BASE_URL, [
383
                'headers' => $this->getDefaultHeaders(),
384
                'allow_redirects' => true,
385
            ]);
386
387
            usleep(300000); // 300ms delay
388
389
        } catch (\Exception $e) {
390
            \Illuminate\Support\Facades\Log::debug('ADE session initialization: ' . $e->getMessage());
391
        }
392
    }
393
}
394
395