Dispositif /
Wikibot
| 1 | <?php |
||
| 2 | /* |
||
| 3 | * This file is part of dispositif/wikibot application (@github) |
||
| 4 | * 2019-2023 © Philippe M./Irønie <[email protected]> |
||
| 5 | * For the full copyright and MIT license information, view the license file. |
||
| 6 | */ |
||
| 7 | |||
| 8 | declare(strict_types=1); |
||
| 9 | |||
| 10 | namespace App\Domain\ExternLink; |
||
| 11 | |||
| 12 | use App\Application\Utils\HttpUtil; |
||
| 13 | use App\Domain\ExternLink\Validators\URLWithoutTagValidator; |
||
|
0 ignored issues
–
show
|
|||
| 14 | use App\Domain\InfrastructurePorts\InternetDomainParserInterface; |
||
| 15 | use App\Domain\Utils\WikiTextUtil; |
||
| 16 | use App\Infrastructure\Monitor\NullLogger; |
||
| 17 | use Exception; |
||
| 18 | use Psr\Log\LoggerInterface; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * Todo move infra ? |
||
| 22 | */ |
||
| 23 | class CheckURL |
||
| 24 | { |
||
| 25 | /** |
||
| 26 | * @var LoggerInterface |
||
| 27 | */ |
||
| 28 | protected $log; |
||
| 29 | |||
| 30 | /** |
||
| 31 | * @var string |
||
| 32 | */ |
||
| 33 | protected $registrableDomain; |
||
| 34 | /** |
||
| 35 | * @var string |
||
| 36 | */ |
||
| 37 | protected $url; |
||
| 38 | /** |
||
| 39 | * @var InternetDomainParserInterface |
||
| 40 | */ |
||
| 41 | protected $internetDomainParser; |
||
| 42 | |||
| 43 | public function __construct(InternetDomainParserInterface $domainParser, ?LoggerInterface $logger = null) |
||
| 44 | { |
||
| 45 | $this->log = $logger ?? new NullLogger(); |
||
| 46 | $this->internetDomainParser = $domainParser; |
||
| 47 | } |
||
| 48 | |||
| 49 | public function isURLAuthorized(string $url): bool |
||
| 50 | { |
||
| 51 | $this->url = $url; |
||
| 52 | $this->registrableDomain = null; |
||
| 53 | if (!HttpUtil::isHttpURL($url)) { |
||
| 54 | $this->log->debug('Skip : not Http URL : ' . $url, ['stats' => 'externref.skip.notRawURL']); |
||
| 55 | return false; |
||
| 56 | } |
||
| 57 | |||
| 58 | if ($this->hasForbiddenFilenameExtension()) { |
||
| 59 | $this->log->debug('Skip : ForbiddenFilenameExtension : ' . $url, ['stats' => 'externref.skip.forbiddenFilenameExtension']); |
||
| 60 | return false; |
||
| 61 | } |
||
| 62 | |||
| 63 | if (WikiTextUtil::containsWikiTag($url)) { |
||
| 64 | $this->log->debug('Skip : URL contains HTML tag : ' . $url, ['stats' => 'externref.skip.URLcontainsTag']); |
||
| 65 | return false; |
||
| 66 | } |
||
| 67 | |||
| 68 | $this->findRegistrableDomain(); |
||
| 69 | |||
| 70 | return true; |
||
| 71 | } |
||
| 72 | |||
| 73 | /** |
||
| 74 | * todo move URL parsing |
||
| 75 | * Skip PDF GIF etc |
||
| 76 | * https://fr.wikipedia.org/wiki/Liste_d%27extensions_de_fichiers |
||
| 77 | */ |
||
| 78 | protected function hasForbiddenFilenameExtension(): bool |
||
| 79 | { |
||
| 80 | return (bool)preg_match( |
||
| 81 | '#\.(pdf|jpg|jpeg|gif|png|webp|xls|xlsx|xlr|xml|xlt|txt|csv|js|docx|exe|gz|zip|ini|movie|mp3|mp4|ogg|raw|rss|tar|tgz|wma)$#i', |
||
| 82 | $this->url |
||
| 83 | ); |
||
| 84 | } |
||
| 85 | |||
| 86 | protected function findRegistrableDomain(): ?string |
||
| 87 | { |
||
| 88 | try { |
||
| 89 | $this->registrableDomain = $this->internetDomainParser->getRegistrableDomainFromURL($this->url); |
||
| 90 | } catch (Exception) { |
||
| 91 | $this->log->warning('Skip : registrableDomain not a valid URL : ' . $this->url, |
||
| 92 | ['stats' => 'externref.skip.URLAuthorized.exception2'] |
||
| 93 | ); |
||
| 94 | return null; |
||
| 95 | } |
||
| 96 | return $this->registrableDomain; |
||
| 97 | } |
||
| 98 | |||
| 99 | public function getRegistrableDomain($url): ?string |
||
| 100 | { |
||
| 101 | if ($url === $this->url && $this->registrableDomain) { |
||
| 102 | return $this->registrableDomain; |
||
| 103 | } |
||
| 104 | $this->url = $url; |
||
| 105 | |||
| 106 | return $this->findRegistrableDomain(); |
||
| 107 | } |
||
| 108 | } |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths