Passed
Push — master ( 860b7e...86fe81 )
by Dispositif
03:31
created

convertIATimestampToDateTime()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 18
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
eloc 12
dl 0
loc 18
rs 9.8666
c 1
b 0
f 1
cc 2
nc 2
nop 1
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Infrastructure;
11
12
use App\Application\InfrastructurePorts\HttpClientInterface;
13
use App\Domain\InfrastructurePorts\DeadlinkArchiverInterface;
14
use App\Domain\Models\WebarchiveDTO;
15
use DateTime;
16
use DateTimeInterface;
17
use Psr\Log\LoggerInterface;
18
use Psr\Log\NullLogger;
19
20
/**
21
 * https://archive.org/help/wayback_api.php
22
 * todo closest by date
23
 */
24
class InternetArchiveAdapter implements DeadlinkArchiverInterface
25
{
26
    final public const ARCHIVER_NAME = '[[Internet Archive]]'; // [[Wayback Machine]] ?
27
    private const SEARCH_CLOSEST_TIMESTAMP = '20220101';
28
29
    public function __construct(
30
        protected readonly HttpClientInterface $client,
31
        protected readonly LoggerInterface     $log = new NullLogger()
32
    )
33
    {
34
    }
35
36
    public function searchWebarchive(string $url, ?DateTimeInterface $date = null): ?WebarchiveDTO
37
    {
38
        $archiveData = $this->requestInternetArchiveApi($url, $date);
39
        if (empty($archiveData)) {
40
            return null;
41
        }
42
43
        $iaDateOrNull = $this->convertIATimestampToDateTime($archiveData['timestamp'] ?? null);
44
45
        return new WebarchiveDTO(
46
            self::ARCHIVER_NAME,
47
            $url,
48
            (string)$archiveData['url'],
49
            $iaDateOrNull
50
        );
51
    }
52
53
    protected function requestInternetArchiveApi(string $url, ?DateTimeInterface $date = null): array
54
    {
55
        $response = $this->client->get(
56
            'https://archive.org/wayback/available?timestamp=' . self::SEARCH_CLOSEST_TIMESTAMP . '&url=' . urlencode($url),
57
            [
58
                'timeout' => 20,
59
                'allow_redirects' => true,
60
                'headers' => ['User-Agent' => getenv('USER_AGENT')],
61
                'http_errors' => false, // no Exception on 4xx 5xx
62
                'verify' => false,
63
            ]
64
        );
65
66
        if ($response->getStatusCode() !== 200) {
67
            $this->log->debug('InternetArchive: incorrect response', [
68
                'status' => $response->getStatusCode(),
69
                'content-type' => $response->getHeader('Content-Type'),
70
            ]);
71
            return [];
72
        }
73
        $jsonString = $response->getBody()->getContents();
74
        $data = json_decode($jsonString, true, 512, JSON_THROW_ON_ERROR) ?? [];
75
76
        if (!isset($data['archived_snapshots']['closest'])) {
77
            $this->log->info('InternetArchive: no closest snapshot', [
78
                'url' => $url,
79
                'date' => $date,
80
                'json' => $jsonString,
81
            ]);
82
83
            return [];
84
        }
85
86
        // validate snapshot data
87
        $closest = $data['archived_snapshots']['closest'];
88
        if ($closest['status'] !== "200" || $closest['available'] !== true || empty($closest['url'])) {
89
            $this->log->debug('InternetArchive: snapshot invalid', $closest);
90
            return [];
91
        }
92
93
        return $closest;
94
    }
95
96
    /**
97
     * "YYYYMMDDhhmmss"
98
     */
99
    protected function convertIATimestampToDateTime(?string $iaTimestamp): ?DateTimeInterface
100
    {
101
        if (empty($iaTimestamp)) {
102
            return null;
103
        }
104
        $iaDateTime = new DateTime();
105
        $iaDateTime->setDate(
106
            (int)substr($iaTimestamp, 0, 4),
107
            (int)substr($iaTimestamp, 4, 2),
108
            (int)substr($iaTimestamp, 6, 2)
109
        );
110
        $iaDateTime->setTime(
111
            (int)substr($iaTimestamp, 8, 2),
112
            (int)substr($iaTimestamp, 10, 2),
113
            (int)substr($iaTimestamp, 12, 2)
114
        );
115
116
        return $iaDateTime;
117
    }
118
}