Passed
Push — master ( 73c322...402528 )
by Darko
12:47
created

HotmoviesPipe::extractGenres()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 9
c 1
b 0
f 0
dl 0
loc 16
rs 9.9666
cc 4
nc 2
nop 0
1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
8
/**
9
 * HotMovies provider pipe.
10
 *
11
 * Handles movie information extraction from hotmovies.com
12
 */
13
class HotmoviesPipe extends AbstractAdultProviderPipe
14
{
15
    protected int $priority = 50;
16
17
    private const BASE_URL = 'https://www.hotmovies.com';
18
    private const SEARCH_URL = '/search.php?words=';
19
    private const EXTRA_SEARCH = '&complete=on&search_in=video_title';
20
21
    protected string $directUrl = '';
22
    protected string $title = '';
23
    protected string $response = '';
24
25
    public function getName(): string
26
    {
27
        return 'hotm';
28
    }
29
30
    public function getDisplayName(): string
31
    {
32
        return 'HotMovies';
33
    }
34
35
    protected function getBaseUrl(): string
36
    {
37
        return self::BASE_URL;
38
    }
39
40
    protected function process(AdultProcessingPassable $passable): AdultProcessingResult
41
    {
42
        $movie = $passable->getCleanTitle();
43
44
        $searchResult = $this->search($movie);
45
46
        if ($searchResult === false) {
0 ignored issues
show
introduced by
The condition $searchResult === false is always true.
Loading history...
47
            $this->outputNotFound();
48
            return AdultProcessingResult::notFound($this->getName());
49
        }
50
51
        $this->title = $searchResult['title'];
52
        $this->directUrl = $searchResult['url'];
53
54
        // Fetch the movie details page
55
        $this->response = $this->fetchHtml($this->directUrl, $this->cookie);
56
57
        if ($this->response === false) {
58
            return AdultProcessingResult::failed('Failed to fetch movie details page', $this->getName());
59
        }
60
61
        $this->getHtmlParser()->loadHtml($this->response);
62
63
        $movieInfo = $this->getMovieInfo();
64
65
        if ($movieInfo === false) {
66
            return AdultProcessingResult::notFound($this->getName());
67
        }
68
69
        $this->outputMatch($this->title);
70
71
        return AdultProcessingResult::matched(
72
            $this->title,
73
            $this->getName(),
74
            $movieInfo
75
        );
76
    }
77
78
    protected function search(string $movie): array|false
79
    {
80
        if (empty($movie)) {
81
            return false;
82
        }
83
84
        // Initialize session with age verification
85
        $this->initializeSession();
86
87
        $searchUrl = self::BASE_URL . self::SEARCH_URL . urlencode($movie) . self::EXTRA_SEARCH;
88
        $response = $this->fetchHtml($searchUrl, $this->cookie);
89
90
        if ($response === false) {
91
            return false;
92
        }
93
94
        $this->getHtmlParser()->loadHtml($response);
95
96
        // Try multiple result selectors
97
        $resultSelectors = [
98
            'h3[class=title] a[title]',
99
            'h3.title a',
100
            'div.movie-title a',
101
        ];
102
103
        $bestMatch = null;
104
        $highestSimilarity = 0;
105
106
        foreach ($resultSelectors as $selector) {
107
            $elements = $this->getHtmlParser()->find($selector);
108
            if (!empty($elements)) {
109
                foreach ($elements as $ret) {
110
                    $title = $ret->title ?? $ret->plaintext ?? '';
111
                    $url = $ret->href ?? '';
112
113
                    if (empty($title) || empty($url)) {
114
                        continue;
115
                    }
116
117
                    $similarity = $this->calculateSimilarity($movie, $title);
118
119
                    if ($similarity > $highestSimilarity) {
120
                        $highestSimilarity = $similarity;
121
                        $bestMatch = [
122
                            'title' => trim($title),
123
                            'url' => str_starts_with($url, 'http') ? $url : self::BASE_URL . $url,
124
                        ];
125
                    }
126
                }
127
128
                // If we found results with this selector, don't try others
129
                if ($bestMatch !== null) {
130
                    break;
131
                }
132
            }
133
        }
134
135
        if ($bestMatch !== null && $highestSimilarity >= $this->minimumSimilarity) {
0 ignored issues
show
introduced by
The condition $bestMatch !== null is always false.
Loading history...
136
            return $bestMatch;
137
        }
138
139
        return false;
140
    }
141
142
    protected function getMovieInfo(): array|false
143
    {
144
        $results = [];
145
146
        if (!empty($this->directUrl)) {
147
            if (!empty($this->title)) {
148
                $results['title'] = $this->title;
149
            }
150
            $results['directurl'] = $this->directUrl;
151
        }
152
153
        // Get all the movie data
154
        $synopsis = $this->extractSynopsis();
155
        if (is_array($synopsis)) {
0 ignored issues
show
introduced by
The condition is_array($synopsis) is always true.
Loading history...
156
            $results = array_merge($results, $synopsis);
157
        }
158
159
        $productInfo = $this->extractProductInfo(true);
160
        if (is_array($productInfo)) {
0 ignored issues
show
introduced by
The condition is_array($productInfo) is always true.
Loading history...
161
            $results = array_merge($results, $productInfo);
162
        }
163
164
        $cast = $this->extractCast();
165
        if (is_array($cast)) {
0 ignored issues
show
introduced by
The condition is_array($cast) is always true.
Loading history...
166
            $results = array_merge($results, $cast);
167
        }
168
169
        $genres = $this->extractGenres();
170
        if (is_array($genres)) {
0 ignored issues
show
introduced by
The condition is_array($genres) is always true.
Loading history...
171
            $results = array_merge($results, $genres);
172
        }
173
174
        $covers = $this->extractCovers();
175
        if (is_array($covers)) {
0 ignored issues
show
introduced by
The condition is_array($covers) is always true.
Loading history...
176
            $results = array_merge($results, $covers);
177
        }
178
179
        if (empty($results)) {
180
            return false;
181
        }
182
183
        return $results;
184
    }
185
186
    protected function extractSynopsis(): array
187
    {
188
        $res = [];
189
        $res['synopsis'] = 'N/A';
190
191
        // Try multiple selectors
192
        $selectors = [
193
            '.video_description',
194
            'div.description',
195
            'div.synopsis',
196
            'meta[name="description"]',
197
        ];
198
199
        foreach ($selectors as $selector) {
200
            $ret = $this->getHtmlParser()->findOne($selector);
201
            if ($ret) {
202
                $text = $ret->innerText ?? $ret->plaintext ?? $ret->content ?? '';
203
                if (!empty(trim($text))) {
204
                    $res['synopsis'] = trim($text);
205
206
                    return $res;
207
                }
208
            }
209
        }
210
211
        return $res;
212
    }
213
214
    protected function extractProductInfo(bool $extras = false): array
215
    {
216
        $res = [];
217
        $studio = false;
218
        $director = false;
219
220
        if (($ret = $this->getHtmlParser()->find('div.page_video_info')) && !empty($ret->find('text'))) {
221
            $productinfo = [];
222
223
            foreach ($ret->find('text') as $e) {
224
                $e = trim($e->plaintext);
225
                $rArray = [',', '...', '&nbsp:'];
226
                $e = str_replace($rArray, '', $e);
227
228
                if (stripos($e, 'Studio:') !== false) {
229
                    $studio = true;
230
                }
231
232
                if (str_contains($e, 'Director:')) {
233
                    $director = true;
234
                    $e = null;
235
                }
236
237
                if ($studio === true) {
238
                    if ((stripos($e, 'Custodian of Records') === false) && stripos($e, 'Description') === false) {
0 ignored issues
show
Bug introduced by
It seems like $e can also be of type null; however, parameter $haystack of stripos() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

238
                    if ((stripos(/** @scrutinizer ignore-type */ $e, 'Custodian of Records') === false) && stripos($e, 'Description') === false) {
Loading history...
239
                        if ($director === true && !empty($e)) {
240
                            $res['director'] = $e;
241
                            $e = null;
242
                            $director = false;
243
                        }
244
                        if (!empty($e)) {
245
                            $productinfo[] = $e;
246
                        }
247
                    } else {
248
                        break;
249
                    }
250
                }
251
            }
252
253
            if (is_array($productinfo)) {
0 ignored issues
show
introduced by
The condition is_array($productinfo) is always true.
Loading history...
254
                $res['productinfo'] = array_chunk($productinfo, 2, false);
255
            }
256
        }
257
258
        return $res;
259
    }
260
261
    protected function extractCast(): array
262
    {
263
        $res = [];
264
        $cast = [];
265
266
        // Prefer scoped search within stars container to avoid unrelated links
267
        if ($container = $this->getHtmlParser()->findOne('.stars')) {
268
            foreach ($container->find('a[title]') as $e) {
269
                $name = trim($e->title);
270
                $name = preg_replace('/\((.*)\)/', '', $name);
271
                $name = trim($name);
272
                if ($name !== '') {
273
                    $cast[] = $name;
274
                }
275
            }
276
        }
277
278
        // Fallback: anchors that look like performer links
279
        if (empty($cast)) {
280
            foreach ($this->getHtmlParser()->find('a[href*="/performers/"]') as $e) {
281
                $name = trim($e->plaintext);
282
                if ($name !== '') {
283
                    $cast[] = $name;
284
                }
285
            }
286
        }
287
288
        if (!empty($cast)) {
289
            $res['cast'] = array_values(array_unique($cast));
290
        }
291
292
        return $res;
293
    }
294
295
    protected function extractGenres(): array
296
    {
297
        $res = [];
298
        $genres = [];
299
300
        if ($ret = $this->getHtmlParser()->findOne('div.categories')) {
301
            foreach ($ret->find('a') as $e) {
302
                if (str_contains($e->title, ' -> ')) {
303
                    $e = explode(' -> ', $e->plaintext);
304
                    $genres[] = trim($e[1]);
305
                }
306
            }
307
            $res['genres'] = $genres;
308
        }
309
310
        return $res;
311
    }
312
313
    protected function extractCovers(): array
314
    {
315
        $res = [];
316
317
        // Try multiple selectors
318
        $selectors = [
319
            'img#cover',
320
            'div#large_cover img',
321
            'img.boxcover',
322
            'div.product-image img',
323
        ];
324
325
        foreach ($selectors as $selector) {
326
            $ret = $this->getHtmlParser()->findOne($selector);
327
            if ($ret && isset($ret->src)) {
328
                $res['boxcover'] = trim($ret->src);
329
                $res['backcover'] = str_ireplace(['.cover', 'front'], ['.back', 'back'], trim($ret->src));
330
331
                return $res;
332
            }
333
        }
334
335
        return $res;
336
    }
337
338
    /**
339
     * Initialize session by visiting the site to establish cookies.
340
     * HotMovies uses cookie-based age verification.
341
     */
342
    protected function initializeSession(): void
343
    {
344
        try {
345
            $client = $this->getHttpClient();
346
347
            // Visit the homepage first to establish a session
348
            $client->get(self::BASE_URL, [
349
                'headers' => $this->getDefaultHeaders(),
350
                'allow_redirects' => true,
351
            ]);
352
353
            usleep(300000); // 300ms delay
354
355
        } catch (\Exception $e) {
356
            \Illuminate\Support\Facades\Log::debug('HotMovies session initialization: ' . $e->getMessage());
357
        }
358
    }
359
}
360
361