LicenseLookup::extractCans()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 1
c 1
b 0
f 0
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Dominikb\ComposerLicenseChecker;
6
7
use DateTimeImmutable;
8
use Dominikb\ComposerLicenseChecker\Contracts\LicenseLookup as LicenseLookupContract;
9
use Dominikb\ComposerLicenseChecker\Exceptions\NoLookupPossibleException;
10
use GuzzleHttp\ClientInterface;
11
use GuzzleHttp\Exception\GuzzleException;
12
use Symfony\Component\Cache\Adapter\FilesystemAdapter;
13
use Symfony\Component\DomCrawler\Crawler;
14
use Symfony\Contracts\Cache\CacheInterface;
15
16
class LicenseLookup implements LicenseLookupContract
17
{
18
    const API_HOST = 'https://tldrlegal.com';
19
20
    /** @var ClientInterface */
21
    protected $http;
22
    /** @var CacheInterface */
23
    protected $cache;
24
    /** @var string[] */
25
    private static $noLookup = [
26
        'none',
27
        'proprietary',
28
    ];
29
30 4
    public function __construct(ClientInterface $http, ?CacheInterface $cache = null)
31
    {
32 4
        $this->http = $http;
33 4
        $this->cache = $cache ?? new FilesystemAdapter('FilesystemCache', 3600, __DIR__.'/../.cache');
34
    }
35
36 4
    public function lookUp(string $licenseName): License
37
    {
38 4
        return $this->cache->get($licenseName, function () use ($licenseName) {
39
            try {
40 4
                $detailsPageUrl = $this->queryForDetailPageUrl($licenseName);
41
42 2
                $license = $this->resolveLicenseInformation($licenseName, $detailsPageUrl);
43 3
            } catch (NoLookupPossibleException $exception) {
44 3
                $license = new NoLookupLicenses($licenseName);
45
            }
46
47 4
            return $license;
48 4
        });
49
    }
50
51
    /**
52
     * @param  string  $licenseShortName
53
     * @return string
54
     *
55
     * @throws NoLookupPossibleException
56
     */
57 4
    private function queryForDetailPageUrl(string $licenseShortName): string
58
    {
59 4
        if (in_array($licenseShortName, self::$noLookup)) {
60 1
            throw new NoLookupPossibleException;
61
        }
62
63 3
        $searchUrl = sprintf('%s/search?query=%s', static::API_HOST, $licenseShortName);
64
65
        try {
66 3
            $response = $this->http->request('get', $searchUrl);
67 1
        } catch (GuzzleException $exception) {
68 1
            throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception);
69
        }
70
71 2
        $crawler = $this->makeCrawler($response->getBody()->getContents());
72
73 2
        $headings = $crawler->filter('div.search-result-items .cc-semibold')->extract(['_text']);
74 2
        $links = $crawler->filter('div.search-result-items .c-link-arrow')->extract(['href']);
75
76 2
        $zipped = array_map(null, $headings, $links);
0 ignored issues
show
Bug introduced by
null of type null is incompatible with the type callable expected by parameter $callback of array_map(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

76
        $zipped = array_map(/** @scrutinizer ignore-type */ null, $headings, $links);
Loading history...
77
78 2
        $relativeUrl = $this->findBestMatch($zipped, $licenseShortName);
79
80 2
        return static::API_HOST.$relativeUrl;
81
    }
82
83 2
    private function makeCrawler(string $html): Crawler
84
    {
85 2
        return new Crawler($html);
86
    }
87
88
    /**
89
     * @param  string  $licenseShortName
90
     * @param  string  $detailsPageUrl
91
     * @return License
92
     *
93
     * @throws NoLookupPossibleException
94
     */
95 2
    private function resolveLicenseInformation(string $licenseShortName, string $detailsPageUrl): License
96
    {
97
        try {
98 2
            $response = $this->http->request('get', $detailsPageUrl);
99 1
            $pageContent = $response->getBody()->getContents();
100
101 1
            $crawler = $this->makeCrawler($pageContent);
102
103 1
            $license = (new License($licenseShortName))
104 1
                ->setCan($this->extractCans($crawler))
105 1
                ->setCannot($this->extractCannots($crawler))
106 1
                ->setMust($this->extractMusts($crawler))
107 1
                ->setSource($detailsPageUrl)
108 1
                ->setCreatedAt(new DateTimeImmutable);
109
110 1
            return $license;
111 1
        } catch (GuzzleException $exception) {
112 1
            throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception);
113
        }
114
    }
115
116 1
    private function extractCans(Crawler $crawler): array
117
    {
118 1
        return $this->extractListByIndex($crawler, 1);
119
    }
120
121 1
    private function extractCannots(Crawler $crawler): array
122
    {
123 1
        return $this->extractListByIndex($crawler, 3);
124
    }
125
126 1
    private function extractMusts(Crawler $crawler): array
127
    {
128 1
        return $this->extractListByIndex($crawler, 5);
129
    }
130
131
    // Index is the offset as children of div.features
132 1
    private function extractListByIndex(Crawler $crawler, $index): array
133
    {
134 1
        $headings = $crawler->filter('div.c-feature:nth-child('.$index.') .c-text-md.cc-semibold')
135 1
                            ->each(function (Crawler $crawler) {
136
                                return $crawler->getNode(0)->textContent;
137 1
                            });
138
139 1
        $bodies = $crawler->filter('div.c-feature:nth-child('.$index.') .c-text-sm')
140 1
                          ->each(function (Crawler $crawler) {
141
                              return $crawler->getNode(0)->textContent;
142 1
                          });
143
144 1
        return array_combine($headings, $bodies);
145
    }
146
147
    /**
148
     * Find the best matching link by comparing the similarity of the link and text.
149
     *
150
     * @param  array  $zipped
151
     * @param  string  $licenseShortName
152
     * @return string
153
     */
154 2
    private function findBestMatch(array $zipped, string $licenseShortName): string
155
    {
156 2
        $bestMatch = 0;
157 2
        $matchingLink = '';
158
159 2
        foreach ($zipped as [$title, $link]) {
160 2
            $titleMatch = similar_text($title, $licenseShortName);
161 2
            $linkMatch = similar_text($link, $licenseShortName);
162
163 2
            $totalMatch = $titleMatch + $linkMatch;
164
165 2
            if ($totalMatch > $bestMatch) {
166 2
                $bestMatch = $totalMatch;
167 2
                $matchingLink = $link;
168
            }
169
        }
170
171 2
        return $matchingLink;
172
    }
173
174
    public function setCache(CacheInterface $cache): void
175
    {
176
        $this->cache = $cache;
177
    }
178
}
179