Passed
Push — master ( d33d35...37f9e4 )
by Dominik
01:18
created

LicenseLookup::extractCannots()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Dominikb\ComposerLicenseChecker;
6
7
use DateTimeImmutable;
8
use Dominikb\ComposerLicenseChecker\Exceptions\NoLookupPossibleException;
9
use GuzzleHttp\Exception\GuzzleException;
10
use GuzzleHttp\ClientInterface;
11
use Psr\SimpleCache\CacheInterface;
12
use Symfony\Component\DomCrawler\Crawler;
13
use Symfony\Component\Cache\Simple\FilesystemCache;
14
use Dominikb\ComposerLicenseChecker\Contracts\LicenseLookup as LicenseLookupContract;
15
16
class LicenseLookup implements LicenseLookupContract
17
{
18
    const API_HOST = 'https://tldrlegal.com';
19
20
    /** @var ClientInterface */
21
    protected $http;
22
    /** @var CacheInterface */
23
    protected $cache;
24
    /** @var string[] */
25
    private static $noLookup = [
26
        'none',
27
        'proprietary',
28
    ];
29
30
    public function __construct(ClientInterface $http, CacheInterface $cache = null)
31
    {
32
        $this->http = $http;
33
        $this->cache = $cache ?? new FilesystemCache('LicenseLookup', 3600, __DIR__.'/../.cache');
34
    }
35
36
    public function lookUp(string $licenseName): License
37
    {
38
        if ($cached = $this->cache->get($licenseName)) {
39
            return $cached;
40
        }
41
42
        try {
43
            $detailsPageUrl = $this->queryForDetailPageUrl($licenseName);
44
45
            $license = $this->resolveLicenseInformation($licenseName, $detailsPageUrl);
46
        }catch (NoLookupPossibleException $exception) {
47
            $license = new NoLookupLicenses($licenseName);
48
        }
49
50
        $this->cache->set($licenseName, $license);
51
52
        return $license;
53
    }
54
55
    /**
56
     * @param string $licenseShortName
57
     *
58
     * @return string
59
     * @throws NoLookupPossibleException
60
     */
61
    private function queryForDetailPageUrl(string $licenseShortName): string
62
    {
63
        if (in_array($licenseShortName, static::$noLookup)) {
0 ignored issues
show
Bug introduced by
Since $noLookup is declared private, accessing it with static will lead to errors in possible sub-classes; you can either use self, or increase the visibility of $noLookup to at least protected.
Loading history...
64
            throw new NoLookupPossibleException;
65
        }
66
67
        $searchUrl = sprintf('%s/search?q=%s', static::API_HOST, $licenseShortName);
68
69
        try {
70
            $response = $this->http->request('get', $searchUrl);
71
        } catch (GuzzleException $exception) {
72
            throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception);
73
        }
74
75
        $crawler = $this->makeCrawler($response->getBody()->getContents());
76
77
        $headings = $crawler->filter('div#licenses > .search-result > a > h3')->extract(['_text']);
78
        $links = $crawler->filter('div#licenses > .search-result > a')->extract(['href']);
79
80
        $zipped = array_map(null, $headings, $links);
81
82
        $relativeUrl = $this->findBestMatch($zipped, $licenseShortName);
83
84
        return static::API_HOST.$relativeUrl;
85
    }
86
87
    private function makeCrawler(string $html): Crawler
88
    {
89
        return new Crawler($html);
90
    }
91
92
    /**
93
     * @param string $licenseShortName
94
     * @param string $detailsPageUrl
95
     *
96
     * @return License
97
     * @throws NoLookupPossibleException
98
     */
99
    private function resolveLicenseInformation(string $licenseShortName, string $detailsPageUrl): License
100
    {
101
        try {
102
            $response = $this->http->request('get', $detailsPageUrl);
103
            $pageContent = $response->getBody()->getContents();
104
105
            $crawler = $this->makeCrawler($pageContent);
106
107
            $license = (new License($licenseShortName))
108
                ->setCan($this->extractCans($crawler))
109
                ->setCannot($this->extractCannots($crawler))
110
                ->setMust($this->extractMusts($crawler))
111
                ->setSource($detailsPageUrl)
112
                ->setCreatedAt(new DateTimeImmutable);
113
114
            return $license;
115
        } catch (GuzzleException $exception) {
116
            throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception);
117
        }
118
    }
119
120
    private function extractCans(Crawler $crawler): array
121
    {
122
        return $this->extractListByColor($crawler, 'green');
123
    }
124
125
    private function extractCannots(Crawler $crawler): array
126
    {
127
        return $this->extractListByColor($crawler, 'red');
128
    }
129
130
    private function extractMusts(Crawler $crawler): array
131
    {
132
        return $this->extractListByColor($crawler, 'blue');
133
    }
134
135
    private function extractListByColor(Crawler $crawler, $color): array
136
    {
137
        $headings = $crawler->filter(".bucket-list.$color li div.attr-head")
138
                            ->each(function (Crawler $crawler) {
139
                                return $crawler->getNode(0)->textContent;
140
                            });
141
142
        $bodies = $crawler->filter(".bucket-list.$color li div.attr-body")
143
                          ->each(function (Crawler $crawler) {
144
                              return $crawler->getNode(0)->textContent;
145
                          });
146
147
        return array_combine($headings, $bodies);
148
    }
149
150
    /**
151
     * Find the best matching link by comparing the similarity of the link and text.
152
     *
153
     * @param array  $zipped
154
     * @param string $licenseShortName
155
     *
156
     * @return string
157
     */
158
    private function findBestMatch(array $zipped, string $licenseShortName): string
159
    {
160
        $bestMatch = 0;
161
        $matchingLink = '';
162
163
        foreach($zipped as [$title, $link]) {
164
            $titleMatch = similar_text($title, $licenseShortName);
165
            $linkMatch = similar_text($link, $licenseShortName);
166
167
            $totalMatch = $titleMatch + $linkMatch;
168
169
            if ($totalMatch > $bestMatch) {
170
                $bestMatch = $totalMatch;
171
                $matchingLink = $link;
172
            }
173
        }
174
175
        return $matchingLink;
176
    }
177
178
    public function setCache(CacheInterface $cache): void
179
    {
180
        $this->cache = $cache;
181
    }
182
}
183