IafdPipe::getName()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 1
c 1
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
namespace App\Services\AdultProcessing\Pipes;
4
5
use App\Services\AdultProcessing\AdultProcessingPassable;
6
use App\Services\AdultProcessing\AdultProcessingResult;
7
8
/**
9
 * IAFD (Internet Adult Film Database) provider pipe.
10
 *
11
 * Handles movie information extraction from iafd.com
12
 * IAFD is a comprehensive database with excellent metadata coverage.
13
 */
14
class IafdPipe extends AbstractAdultProviderPipe
15
{
16
    protected int $priority = 12; // High priority - good database
17
18
    private const BASE_URL = 'https://www.iafd.com';
19
    private const SEARCH_URL = '/results.asp?searchtype=comprehensive&searchstring=';
20
21
    protected string $directUrl = '';
22
    protected string $title = '';
23
    protected string $response = '';
24
    protected ?array $jsonLdData = null;
25
26
    public function getName(): string
27
    {
28
        return 'iafd';
29
    }
30
31
    public function getDisplayName(): string
32
    {
33
        return 'Internet Adult Film Database';
34
    }
35
36
    protected function getBaseUrl(): string
37
    {
38
        return self::BASE_URL;
39
    }
40
41
    protected function process(AdultProcessingPassable $passable): AdultProcessingResult
42
    {
43
        $movie = $passable->getCleanTitle();
44
45
        // Check cache first
46
        $cached = $this->getCachedSearch($movie);
47
        if ($cached !== null) {
48
            if ($cached === false) {
49
                return AdultProcessingResult::notFound($this->getName());
50
            }
51
            return AdultProcessingResult::matched(
52
                $cached['title'] ?? $movie,
53
                $this->getName(),
54
                $cached
55
            );
56
        }
57
58
        $searchResult = $this->search($movie);
59
60
        if ($searchResult === false) {
61
            $this->cacheSearchResult($movie, false);
62
            $this->outputNotFound();
63
            return AdultProcessingResult::notFound($this->getName());
64
        }
65
66
        $this->title = $searchResult['title'];
67
        $this->directUrl = $searchResult['url'];
68
69
        // Fetch the movie details page
70
        $this->response = $this->fetchHtml($this->directUrl, $this->cookie);
71
72
        if ($this->response === false) {
0 ignored issues
show
introduced by
The condition $this->response === false is always false.
Loading history...
73
            return AdultProcessingResult::failed('Failed to fetch movie details page', $this->getName());
74
        }
75
76
        // Try to extract JSON-LD data first
77
        $this->jsonLdData = $this->extractJsonLd($this->response);
78
79
        $this->getHtmlParser()->loadHtml($this->response);
80
81
        $movieInfo = $this->getMovieInfo();
82
83
        if ($movieInfo === false) {
84
            $this->cacheSearchResult($movie, false);
85
            return AdultProcessingResult::notFound($this->getName());
86
        }
87
88
        // Cache the successful result
89
        $this->cacheSearchResult($movie, $movieInfo);
90
91
        $this->outputMatch($this->title);
92
93
        return AdultProcessingResult::matched(
94
            $this->title,
95
            $this->getName(),
96
            $movieInfo
97
        );
98
    }
99
100
    protected function search(string $movie): array|false
101
    {
102
        if (empty($movie)) {
103
            return false;
104
        }
105
106
        $searchUrl = self::BASE_URL . self::SEARCH_URL . urlencode($movie);
107
        $response = $this->fetchHtml($searchUrl, $this->cookie);
108
109
        if ($response === false) {
110
            return false;
111
        }
112
113
        $this->getHtmlParser()->loadHtml($response);
114
115
        $bestMatch = null;
116
        $highestSimilarity = 0;
117
118
        // Look for movie results table
119
        $movieResults = $this->getHtmlParser()->find('table#titleresult tr');
120
121
        if (empty($movieResults)) {
122
            // Try alternative selectors
123
            $movieResults = $this->getHtmlParser()->find('div.panel-body a[href*="/title.rme"]');
124
        }
125
126
        foreach ($movieResults as $result) {
127
            $link = null;
128
            $title = '';
129
130
            // Check if this is a table row
131
            if ($result->tag === 'tr') {
132
                $link = $result->findOne('a[href*="/title.rme"]');
133
                if ($link) {
134
                    $title = $link->title ?? trim($link->plaintext ?? '');
135
                }
136
            } else {
137
                // Direct link element
138
                $link = $result;
139
                $title = $link->title ?? trim($link->plaintext ?? '');
140
            }
141
142
            if ($link && isset($link->href) && !empty($title)) {
143
                $similarity = $this->calculateSimilarity($movie, $title);
144
145
                if ($similarity > $highestSimilarity) {
146
                    $highestSimilarity = $similarity;
147
                    $url = $link->href;
148
                    if (!str_starts_with($url, 'http')) {
149
                        $url = self::BASE_URL . '/' . ltrim($url, '/');
150
                    }
151
                    $bestMatch = [
152
                        'title' => trim($title),
153
                        'url' => $url,
154
                        'similarity' => $similarity,
155
                    ];
156
                }
157
            }
158
        }
159
160
        if ($bestMatch !== null && $highestSimilarity >= $this->minimumSimilarity) {
161
            return $bestMatch;
162
        }
163
164
        return false;
165
    }
166
167
    protected function getMovieInfo(): array|false
168
    {
169
        $results = [];
170
171
        if (!empty($this->directUrl)) {
172
            if (!empty($this->title)) {
173
                $results['title'] = $this->title;
174
            }
175
            $results['directurl'] = $this->directUrl;
176
        }
177
178
        // Try to get data from JSON-LD first (most reliable)
179
        if ($this->jsonLdData !== null) {
180
            $results = array_merge($results, $this->extractFromJsonLd());
181
        }
182
183
        // Get all the movie data (HTML fallback)
184
        $synopsis = $this->extractSynopsis();
185
        if (is_array($synopsis) && !empty($synopsis)) {
186
            $results = array_merge($results, $synopsis);
187
        }
188
189
        $productInfo = $this->extractProductInfo(true);
190
        if (is_array($productInfo) && !empty($productInfo)) {
191
            $results = array_merge($results, $productInfo);
192
        }
193
194
        $cast = $this->extractCast();
195
        if (is_array($cast) && !empty($cast)) {
196
            $results = array_merge($results, $cast);
197
        }
198
199
        $genres = $this->extractGenres();
200
        if (is_array($genres) && !empty($genres)) {
201
            $results = array_merge($results, $genres);
202
        }
203
204
        $covers = $this->extractCovers();
205
        if (is_array($covers) && !empty($covers)) {
206
            $results = array_merge($results, $covers);
207
        }
208
209
        if (empty($results) || (empty($results['title'] ?? '') && empty($results['boxcover'] ?? ''))) {
210
            return false;
211
        }
212
213
        return $results;
214
    }
215
216
    /**
217
     * Extract data from JSON-LD structured data.
218
     */
219
    protected function extractFromJsonLd(): array
220
    {
221
        $results = [];
222
223
        if ($this->jsonLdData === null) {
224
            return $results;
225
        }
226
227
        // Standard JSON-LD extraction
228
        if (!empty($this->jsonLdData['name'])) {
229
            $results['title'] = $this->jsonLdData['name'];
230
        }
231
232
        if (!empty($this->jsonLdData['description'])) {
233
            $results['synopsis'] = $this->jsonLdData['description'];
234
        }
235
236
        if (!empty($this->jsonLdData['image'])) {
237
            $image = is_array($this->jsonLdData['image']) ? ($this->jsonLdData['image'][0] ?? '') : $this->jsonLdData['image'];
238
            if (!empty($image)) {
239
                $results['boxcover'] = $image;
240
            }
241
        }
242
243
        if (!empty($this->jsonLdData['director'])) {
244
            $director = $this->jsonLdData['director'];
245
            if (is_array($director)) {
246
                $results['director'] = $director['name'] ?? ($director[0]['name'] ?? '');
247
            } else {
248
                $results['director'] = $director;
249
            }
250
        }
251
252
        if (!empty($this->jsonLdData['actor'])) {
253
            $actors = $this->jsonLdData['actor'];
254
            $cast = [];
255
            if (is_array($actors)) {
256
                foreach ($actors as $actor) {
257
                    if (is_array($actor) && !empty($actor['name'])) {
258
                        $cast[] = $actor['name'];
259
                    } elseif (is_string($actor)) {
260
                        $cast[] = $actor;
261
                    }
262
                }
263
            }
264
            if (!empty($cast)) {
265
                $results['cast'] = $cast;
266
            }
267
        }
268
269
        if (!empty($this->jsonLdData['genre'])) {
270
            $genres = $this->jsonLdData['genre'];
271
            if (is_array($genres)) {
272
                $results['genres'] = $genres;
273
            } else {
274
                $results['genres'] = [$genres];
275
            }
276
        }
277
278
        return $results;
279
    }
280
281
    protected function extractCovers(): array
282
    {
283
        $res = [];
284
285
        // IAFD typically uses specific cover image selectors
286
        $selectors = [
287
            'div#titlecover img',
288
            'img#titlecover',
289
            'div.coverbox img',
290
            'img[src*="cover"]',
291
            'meta[property="og:image"]',
292
        ];
293
294
        foreach ($selectors as $selector) {
295
            $ret = $this->getHtmlParser()->findOne($selector);
296
            if ($ret) {
297
                $coverUrl = $ret->src ?? $ret->content ?? null;
298
299
                if (!empty($coverUrl)) {
300
                    if (str_starts_with($coverUrl, '//')) {
301
                        $coverUrl = 'https:' . $coverUrl;
302
                    } elseif (!str_starts_with($coverUrl, 'http')) {
303
                        $coverUrl = self::BASE_URL . '/' . ltrim($coverUrl, '/');
304
                    }
305
306
                    $res['boxcover'] = $coverUrl;
307
                    return $res;
308
                }
309
            }
310
        }
311
312
        return $res;
313
    }
314
315
    protected function extractSynopsis(): array
316
    {
317
        $res = [];
318
319
        $selectors = [
320
            'div#synopsis',
321
            'div.synopsis',
322
            'td.syno',
323
            'p[class*="synopsis"]',
324
            'meta[name="description"]',
325
        ];
326
327
        foreach ($selectors as $selector) {
328
            $ret = $this->getHtmlParser()->findOne($selector);
329
            if ($ret) {
330
                $text = $ret->plaintext ?? $ret->content ?? '';
331
                if (!empty(trim($text))) {
332
                    $res['synopsis'] = trim($text);
333
                    return $res;
334
                }
335
            }
336
        }
337
338
        return $res;
339
    }
340
341
    protected function extractCast(): array
342
    {
343
        $res = [];
344
        $cast = [];
345
346
        // IAFD has a specific cast table structure
347
        $castTable = $this->getHtmlParser()->findOne('table#perfcast, div#perfcast');
348
349
        if ($castTable) {
350
            $performers = $castTable->find('a[href*="/person.rme"]');
351
            foreach ($performers as $performer) {
352
                $name = trim($performer->plaintext ?? '');
353
                if (!empty($name) && strlen($name) > 2) {
354
                    $cast[] = $name;
355
                }
356
            }
357
        }
358
359
        // Fallback to general performer links
360
        if (empty($cast)) {
361
            $selectors = [
362
                'a[href*="/person.rme"]',
363
                'a[href*="/performer/"]',
364
            ];
365
366
            foreach ($selectors as $selector) {
367
                $elements = $this->getHtmlParser()->find($selector);
368
                if (!empty($elements)) {
369
                    foreach ($elements as $element) {
370
                        $name = trim($element->plaintext ?? '');
371
                        if (!empty($name) && strlen($name) > 2 && !str_contains(strtolower($name), 'director')) {
372
                            $cast[] = $name;
373
                        }
374
                    }
375
                    if (!empty($cast)) {
376
                        break;
377
                    }
378
                }
379
            }
380
        }
381
382
        if (!empty($cast)) {
383
            $res['cast'] = array_unique($cast);
384
        }
385
386
        return $res;
387
    }
388
389
    protected function extractGenres(): array
390
    {
391
        $res = [];
392
        $genres = [];
393
394
        $selectors = [
395
            'a[href*="/genre.rme"]',
396
            'div.genres a',
397
            'span.genre a',
398
        ];
399
400
        foreach ($selectors as $selector) {
401
            $elements = $this->getHtmlParser()->find($selector);
402
            if (!empty($elements)) {
403
                foreach ($elements as $element) {
404
                    $text = trim($element->plaintext ?? '');
405
                    if (!empty($text) && strlen($text) > 1) {
406
                        $genres[] = $text;
407
                    }
408
                }
409
                if (!empty($genres)) {
410
                    break;
411
                }
412
            }
413
        }
414
415
        if (!empty($genres)) {
416
            $res['genres'] = array_unique($genres);
417
        }
418
419
        return $res;
420
    }
421
422
    protected function extractProductInfo(bool $extras = false): array
423
    {
424
        $res = [];
425
426
        // Look for studio
427
        $studio = $this->getHtmlParser()->findOne('a[href*="/studio.rme"]');
428
        if ($studio) {
429
            $res['studio'] = trim($studio->plaintext ?? '');
430
        }
431
432
        // Look for distributor
433
        $distributor = $this->getHtmlParser()->findOne('a[href*="/distrib.rme"]');
434
        if ($distributor) {
435
            $res['distributor'] = trim($distributor->plaintext ?? '');
436
        }
437
438
        // Look for release year
439
        if (preg_match('/\((\d{4})\)/', $this->response, $yearMatch)) {
440
            $res['year'] = $yearMatch[1];
441
        }
442
443
        // Look for director
444
        $directorSelectors = [
445
            'a[href*="/person.rme"][title*="director"]',
446
            'p:contains("Director") a',
447
        ];
448
449
        foreach ($directorSelectors as $selector) {
450
            $director = $this->getHtmlParser()->findOne($selector);
451
            if ($director) {
452
                $res['director'] = trim($director->plaintext ?? '');
453
                break;
454
            }
455
        }
456
457
        // Try to extract from info table
458
        $infoRows = $this->getHtmlParser()->find('table.biodata tr, div.biodata p');
459
        foreach ($infoRows as $row) {
460
            $text = $row->plaintext ?? '';
461
            if (stripos($text, 'Director:') !== false) {
462
                $parts = explode(':', $text, 2);
463
                if (count($parts) === 2) {
464
                    $res['director'] = trim($parts[1]);
465
                }
466
            }
467
            if (stripos($text, 'Studio:') !== false) {
468
                $parts = explode(':', $text, 2);
469
                if (count($parts) === 2 && empty($res['studio'] ?? '')) {
470
                    $res['studio'] = trim($parts[1]);
471
                }
472
            }
473
            if (stripos($text, 'Minutes:') !== false || stripos($text, 'Runtime:') !== false) {
474
                $parts = explode(':', $text, 2);
475
                if (count($parts) === 2) {
476
                    $res['runtime'] = trim($parts[1]);
477
                }
478
            }
479
        }
480
481
        return $res;
482
    }
483
}
484
485