1 | <?php |
||
2 | /* |
||
3 | * This file is part of dispositif/wikibot application (@github) |
||
4 | * 2019-2023 © Philippe M./Irønie <[email protected]> |
||
5 | * For the full copyright and MIT license information, view the license file. |
||
6 | */ |
||
7 | |||
8 | declare(strict_types=1); |
||
9 | |||
10 | namespace App\Domain\ExternLink; |
||
11 | |||
12 | use App\Application\Utils\HttpUtil; |
||
13 | use App\Domain\ExternLink\Validators\URLWithoutTagValidator; |
||
0 ignored issues
–
show
|
|||
14 | use App\Domain\InfrastructurePorts\InternetDomainParserInterface; |
||
15 | use App\Domain\Utils\WikiTextUtil; |
||
16 | use App\Infrastructure\Monitor\NullLogger; |
||
17 | use Exception; |
||
18 | use Psr\Log\LoggerInterface; |
||
19 | |||
20 | /** |
||
21 | * Todo move infra ? |
||
22 | */ |
||
23 | class CheckURL |
||
24 | { |
||
25 | /** |
||
26 | * @var LoggerInterface |
||
27 | */ |
||
28 | protected $log; |
||
29 | |||
30 | /** |
||
31 | * @var string |
||
32 | */ |
||
33 | protected $registrableDomain; |
||
34 | /** |
||
35 | * @var string |
||
36 | */ |
||
37 | protected $url; |
||
38 | /** |
||
39 | * @var InternetDomainParserInterface |
||
40 | */ |
||
41 | protected $internetDomainParser; |
||
42 | |||
43 | public function __construct(InternetDomainParserInterface $domainParser, ?LoggerInterface $logger = null) |
||
44 | { |
||
45 | $this->log = $logger ?? new NullLogger(); |
||
46 | $this->internetDomainParser = $domainParser; |
||
47 | } |
||
48 | |||
49 | public function isURLAuthorized(string $url): bool |
||
50 | { |
||
51 | $this->url = $url; |
||
52 | $this->registrableDomain = null; |
||
53 | if (!HttpUtil::isHttpURL($url)) { |
||
54 | $this->log->debug('Skip : not Http URL : ' . $url, ['stats' => 'externref.skip.notRawURL']); |
||
55 | return false; |
||
56 | } |
||
57 | |||
58 | if ($this->hasForbiddenFilenameExtension()) { |
||
59 | $this->log->debug('Skip : ForbiddenFilenameExtension : ' . $url, ['stats' => 'externref.skip.forbiddenFilenameExtension']); |
||
60 | return false; |
||
61 | } |
||
62 | |||
63 | if (WikiTextUtil::containsWikiTag($url)) { |
||
64 | $this->log->debug('Skip : URL contains HTML tag : ' . $url, ['stats' => 'externref.skip.URLcontainsTag']); |
||
65 | return false; |
||
66 | } |
||
67 | |||
68 | $this->findRegistrableDomain(); |
||
69 | |||
70 | return true; |
||
71 | } |
||
72 | |||
73 | /** |
||
74 | * todo move URL parsing |
||
75 | * Skip PDF GIF etc |
||
76 | * https://fr.wikipedia.org/wiki/Liste_d%27extensions_de_fichiers |
||
77 | */ |
||
78 | protected function hasForbiddenFilenameExtension(): bool |
||
79 | { |
||
80 | return (bool)preg_match( |
||
81 | '#\.(pdf|jpg|jpeg|gif|png|webp|xls|xlsx|xlr|xml|xlt|txt|csv|js|docx|exe|gz|zip|ini|movie|mp3|mp4|ogg|raw|rss|tar|tgz|wma)$#i', |
||
82 | $this->url |
||
83 | ); |
||
84 | } |
||
85 | |||
86 | protected function findRegistrableDomain(): ?string |
||
87 | { |
||
88 | try { |
||
89 | $this->registrableDomain = $this->internetDomainParser->getRegistrableDomainFromURL($this->url); |
||
90 | } catch (Exception) { |
||
91 | $this->log->warning('Skip : registrableDomain not a valid URL : ' . $this->url, |
||
92 | ['stats' => 'externref.skip.URLAuthorized.exception2'] |
||
93 | ); |
||
94 | return null; |
||
95 | } |
||
96 | return $this->registrableDomain; |
||
97 | } |
||
98 | |||
99 | public function getRegistrableDomain($url): ?string |
||
100 | { |
||
101 | if ($url === $this->url && $this->registrableDomain) { |
||
102 | return $this->registrableDomain; |
||
103 | } |
||
104 | $this->url = $url; |
||
105 | |||
106 | return $this->findRegistrableDomain(); |
||
107 | } |
||
108 | } |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths