Passed
Pull Request — master (#24)
by Philip
18:17
created

LicenseLookup   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 160
Duplicated Lines 0 %

Test Coverage

Coverage 94.12%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 17
eloc 64
c 1
b 0
f 0
dl 0
loc 160
ccs 64
cts 68
cp 0.9412
rs 10

11 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A queryForDetailPageUrl() 0 24 3
A extractCans() 0 3 1
A resolveLicenseInformation() 0 18 2
A extractCannots() 0 3 1
A extractMusts() 0 3 1
A extractListByColor() 0 13 1
A makeCrawler() 0 3 1
A lookUp() 0 12 2
A setCache() 0 3 1
A findBestMatch() 0 18 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Dominikb\ComposerLicenseChecker;
6
7
use DateTimeImmutable;
8
use Dominikb\ComposerLicenseChecker\Contracts\LicenseLookup as LicenseLookupContract;
9
use Dominikb\ComposerLicenseChecker\Exceptions\NoLookupPossibleException;
10
use GuzzleHttp\ClientInterface;
11
use GuzzleHttp\Exception\GuzzleException;
12
use Symfony\Component\Cache\Adapter\FilesystemAdapter;
13
use Symfony\Component\DomCrawler\Crawler;
14
use Symfony\Contracts\Cache\CacheInterface;
15
16
class LicenseLookup implements LicenseLookupContract
17
{
18
    const API_HOST = 'https://tldrlegal.com';
19
20
    /** @var ClientInterface */
21
    protected $http;
22
    /** @var CacheInterface */
23
    protected $cache;
24
    /** @var string[] */
25
    private static $noLookup = [
26
        'none',
27
        'proprietary',
28
    ];
29
30 4
    public function __construct(ClientInterface $http, CacheInterface $cache = null)
31
    {
32 4
        $this->http = $http;
33 4
        $this->cache = $cache ?? new FilesystemAdapter('FilesystemCache', 3600, __DIR__.'/../.cache');
34
    }
35
36 4
    public function lookUp(string $licenseName): License
37
    {
38 4
        return $this->cache->get($licenseName, function () use ($licenseName) {
39
            try {
40 4
                $detailsPageUrl = $this->queryForDetailPageUrl($licenseName);
41
42 2
                $license = $this->resolveLicenseInformation($licenseName, $detailsPageUrl);
43 3
            } catch (NoLookupPossibleException $exception) {
44 3
                $license = new NoLookupLicenses($licenseName);
45
            }
46
47 4
            return $license;
48 4
        });
49
    }
50
51
    /**
52
     * @param  string  $licenseShortName
53
     * @return string
54
     *
55
     * @throws NoLookupPossibleException
56
     */
57 4
    private function queryForDetailPageUrl(string $licenseShortName): string
58
    {
59 4
        if (in_array($licenseShortName, self::$noLookup)) {
60 1
            throw new NoLookupPossibleException;
61
        }
62
63 3
        $searchUrl = sprintf('%s/search?q=%s', static::API_HOST, $licenseShortName);
64
65
        try {
66 3
            $response = $this->http->request('get', $searchUrl);
67 1
        } catch (GuzzleException $exception) {
68 1
            throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception);
69
        }
70
71 2
        $crawler = $this->makeCrawler($response->getBody()->getContents());
72
73 2
        $headings = $crawler->filter('div#licenses > .search-result > a > h3')->extract(['_text']);
74 2
        $links = $crawler->filter('div#licenses > .search-result > a')->extract(['href']);
75
76 2
        $zipped = array_map(null, $headings, $links);
0 ignored issues
show
Bug introduced by
null of type null is incompatible with the type callable expected by parameter $callback of array_map(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

76
        $zipped = array_map(/** @scrutinizer ignore-type */ null, $headings, $links);
Loading history...
77
78 2
        $relativeUrl = $this->findBestMatch($zipped, $licenseShortName);
79
80 2
        return static::API_HOST.$relativeUrl;
81
    }
82
83 2
    private function makeCrawler(string $html): Crawler
84
    {
85 2
        return new Crawler($html);
86
    }
87
88
    /**
89
     * @param  string  $licenseShortName
90
     * @param  string  $detailsPageUrl
91
     * @return License
92
     *
93
     * @throws NoLookupPossibleException
94
     */
95 2
    private function resolveLicenseInformation(string $licenseShortName, string $detailsPageUrl): License
96
    {
97
        try {
98 2
            $response = $this->http->request('get', $detailsPageUrl);
99 1
            $pageContent = $response->getBody()->getContents();
100
101 1
            $crawler = $this->makeCrawler($pageContent);
102
103 1
            $license = (new License($licenseShortName))
104 1
                ->setCan($this->extractCans($crawler))
105 1
                ->setCannot($this->extractCannots($crawler))
106 1
                ->setMust($this->extractMusts($crawler))
107 1
                ->setSource($detailsPageUrl)
108 1
                ->setCreatedAt(new DateTimeImmutable);
109
110 1
            return $license;
111 1
        } catch (GuzzleException $exception) {
112 1
            throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception);
113
        }
114
    }
115
116 1
    private function extractCans(Crawler $crawler): array
117
    {
118 1
        return $this->extractListByColor($crawler, 'green');
119
    }
120
121 1
    private function extractCannots(Crawler $crawler): array
122
    {
123 1
        return $this->extractListByColor($crawler, 'red');
124
    }
125
126 1
    private function extractMusts(Crawler $crawler): array
127
    {
128 1
        return $this->extractListByColor($crawler, 'blue');
129
    }
130
131 1
    private function extractListByColor(Crawler $crawler, $color): array
132
    {
133 1
        $headings = $crawler->filter(".bucket-list.$color li div.attr-head")
134 1
                            ->each(function (Crawler $crawler) {
135
                                return $crawler->getNode(0)->textContent;
136 1
                            });
137
138 1
        $bodies = $crawler->filter(".bucket-list.$color li div.attr-body")
139 1
                          ->each(function (Crawler $crawler) {
140
                              return $crawler->getNode(0)->textContent;
141 1
                          });
142
143 1
        return array_combine($headings, $bodies);
144
    }
145
146
    /**
147
     * Find the best matching link by comparing the similarity of the link and text.
148
     *
149
     * @param  array  $zipped
150
     * @param  string  $licenseShortName
151
     * @return string
152
     */
153 2
    private function findBestMatch(array $zipped, string $licenseShortName): string
154
    {
155 2
        $bestMatch = 0;
156 2
        $matchingLink = '';
157
158 2
        foreach ($zipped as [$title, $link]) {
159 2
            $titleMatch = similar_text($title, $licenseShortName);
160 2
            $linkMatch = similar_text($link, $licenseShortName);
161
162 2
            $totalMatch = $titleMatch + $linkMatch;
163
164 2
            if ($totalMatch > $bestMatch) {
165 2
                $bestMatch = $totalMatch;
166 2
                $matchingLink = $link;
167
            }
168
        }
169
170 2
        return $matchingLink;
171
    }
172
173
    public function setCache(CacheInterface $cache): void
174
    {
175
        $this->cache = $cache;
176
    }
177
}
178