1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Dominikb\ComposerLicenseChecker; |
||
6 | |||
7 | use DateTimeImmutable; |
||
8 | use Dominikb\ComposerLicenseChecker\Contracts\LicenseLookup as LicenseLookupContract; |
||
9 | use Dominikb\ComposerLicenseChecker\Exceptions\NoLookupPossibleException; |
||
10 | use GuzzleHttp\ClientInterface; |
||
11 | use GuzzleHttp\Exception\GuzzleException; |
||
12 | use Symfony\Component\Cache\Adapter\FilesystemAdapter; |
||
13 | use Symfony\Component\DomCrawler\Crawler; |
||
14 | use Symfony\Contracts\Cache\CacheInterface; |
||
15 | |||
16 | class LicenseLookup implements LicenseLookupContract |
||
17 | { |
||
18 | const API_HOST = 'https://tldrlegal.com'; |
||
19 | |||
20 | /** @var ClientInterface */ |
||
21 | protected $http; |
||
22 | /** @var CacheInterface */ |
||
23 | protected $cache; |
||
24 | /** @var string[] */ |
||
25 | private static $noLookup = [ |
||
26 | 'none', |
||
27 | 'proprietary', |
||
28 | ]; |
||
29 | |||
30 | 4 | public function __construct(ClientInterface $http, ?CacheInterface $cache = null) |
|
31 | { |
||
32 | 4 | $this->http = $http; |
|
33 | 4 | $this->cache = $cache ?? new FilesystemAdapter('FilesystemCache', 3600, __DIR__.'/../.cache'); |
|
34 | } |
||
35 | |||
36 | 4 | public function lookUp(string $licenseName): License |
|
37 | { |
||
38 | 4 | return $this->cache->get($licenseName, function () use ($licenseName) { |
|
39 | try { |
||
40 | 4 | $detailsPageUrl = $this->queryForDetailPageUrl($licenseName); |
|
41 | |||
42 | 2 | $license = $this->resolveLicenseInformation($licenseName, $detailsPageUrl); |
|
43 | 3 | } catch (NoLookupPossibleException $exception) { |
|
44 | 3 | $license = new NoLookupLicenses($licenseName); |
|
45 | } |
||
46 | |||
47 | 4 | return $license; |
|
48 | 4 | }); |
|
49 | } |
||
50 | |||
51 | /** |
||
52 | * @param string $licenseShortName |
||
53 | * @return string |
||
54 | * |
||
55 | * @throws NoLookupPossibleException |
||
56 | */ |
||
57 | 4 | private function queryForDetailPageUrl(string $licenseShortName): string |
|
58 | { |
||
59 | 4 | if (in_array($licenseShortName, self::$noLookup)) { |
|
60 | 1 | throw new NoLookupPossibleException; |
|
61 | } |
||
62 | |||
63 | 3 | $searchUrl = sprintf('%s/search?query=%s', static::API_HOST, $licenseShortName); |
|
64 | |||
65 | try { |
||
66 | 3 | $response = $this->http->request('get', $searchUrl); |
|
67 | 1 | } catch (GuzzleException $exception) { |
|
68 | 1 | throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception); |
|
69 | } |
||
70 | |||
71 | 2 | $crawler = $this->makeCrawler($response->getBody()->getContents()); |
|
72 | |||
73 | 2 | $headings = $crawler->filter('div.search-result-items .cc-semibold')->extract(['_text']); |
|
74 | 2 | $links = $crawler->filter('div.search-result-items .c-link-arrow')->extract(['href']); |
|
75 | |||
76 | 2 | $zipped = array_map(null, $headings, $links); |
|
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
77 | |||
78 | 2 | $relativeUrl = $this->findBestMatch($zipped, $licenseShortName); |
|
79 | |||
80 | 2 | return static::API_HOST.$relativeUrl; |
|
81 | } |
||
82 | |||
83 | 2 | private function makeCrawler(string $html): Crawler |
|
84 | { |
||
85 | 2 | return new Crawler($html); |
|
86 | } |
||
87 | |||
88 | /** |
||
89 | * @param string $licenseShortName |
||
90 | * @param string $detailsPageUrl |
||
91 | * @return License |
||
92 | * |
||
93 | * @throws NoLookupPossibleException |
||
94 | */ |
||
95 | 2 | private function resolveLicenseInformation(string $licenseShortName, string $detailsPageUrl): License |
|
96 | { |
||
97 | try { |
||
98 | 2 | $response = $this->http->request('get', $detailsPageUrl); |
|
99 | 1 | $pageContent = $response->getBody()->getContents(); |
|
100 | |||
101 | 1 | $crawler = $this->makeCrawler($pageContent); |
|
102 | |||
103 | 1 | $license = (new License($licenseShortName)) |
|
104 | 1 | ->setCan($this->extractCans($crawler)) |
|
105 | 1 | ->setCannot($this->extractCannots($crawler)) |
|
106 | 1 | ->setMust($this->extractMusts($crawler)) |
|
107 | 1 | ->setSource($detailsPageUrl) |
|
108 | 1 | ->setCreatedAt(new DateTimeImmutable); |
|
109 | |||
110 | 1 | return $license; |
|
111 | 1 | } catch (GuzzleException $exception) { |
|
112 | 1 | throw new NoLookupPossibleException($exception->getMessage(), $exception->getCode(), $exception); |
|
113 | } |
||
114 | } |
||
115 | |||
116 | 1 | private function extractCans(Crawler $crawler): array |
|
117 | { |
||
118 | 1 | return $this->extractListByIndex($crawler, 1); |
|
119 | } |
||
120 | |||
121 | 1 | private function extractCannots(Crawler $crawler): array |
|
122 | { |
||
123 | 1 | return $this->extractListByIndex($crawler, 3); |
|
124 | } |
||
125 | |||
126 | 1 | private function extractMusts(Crawler $crawler): array |
|
127 | { |
||
128 | 1 | return $this->extractListByIndex($crawler, 5); |
|
129 | } |
||
130 | |||
131 | // Index is the offset as children of div.features |
||
132 | 1 | private function extractListByIndex(Crawler $crawler, $index): array |
|
133 | { |
||
134 | 1 | $headings = $crawler->filter('div.c-feature:nth-child('.$index.') .c-text-md.cc-semibold') |
|
135 | 1 | ->each(function (Crawler $crawler) { |
|
136 | return $crawler->getNode(0)->textContent; |
||
137 | 1 | }); |
|
138 | |||
139 | 1 | $bodies = $crawler->filter('div.c-feature:nth-child('.$index.') .c-text-sm') |
|
140 | 1 | ->each(function (Crawler $crawler) { |
|
141 | return $crawler->getNode(0)->textContent; |
||
142 | 1 | }); |
|
143 | |||
144 | 1 | return array_combine($headings, $bodies); |
|
145 | } |
||
146 | |||
147 | /** |
||
148 | * Find the best matching link by comparing the similarity of the link and text. |
||
149 | * |
||
150 | * @param array $zipped |
||
151 | * @param string $licenseShortName |
||
152 | * @return string |
||
153 | */ |
||
154 | 2 | private function findBestMatch(array $zipped, string $licenseShortName): string |
|
155 | { |
||
156 | 2 | $bestMatch = 0; |
|
157 | 2 | $matchingLink = ''; |
|
158 | |||
159 | 2 | foreach ($zipped as [$title, $link]) { |
|
160 | 2 | $titleMatch = similar_text($title, $licenseShortName); |
|
161 | 2 | $linkMatch = similar_text($link, $licenseShortName); |
|
162 | |||
163 | 2 | $totalMatch = $titleMatch + $linkMatch; |
|
164 | |||
165 | 2 | if ($totalMatch > $bestMatch) { |
|
166 | 2 | $bestMatch = $totalMatch; |
|
167 | 2 | $matchingLink = $link; |
|
168 | } |
||
169 | } |
||
170 | |||
171 | 2 | return $matchingLink; |
|
172 | } |
||
173 | |||
174 | public function setCache(CacheInterface $cache): void |
||
175 | { |
||
176 | $this->cache = $cache; |
||
177 | } |
||
178 | } |
||
179 |