Issues (106)

src/Domain/ExternLink/CheckURL.php (1 issue)

Labels
Severity
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\ExternLink;
11
12
use App\Application\Utils\HttpUtil;
13
use App\Domain\ExternLink\Validators\URLWithoutTagValidator;
0 ignored issues
show
The type App\Domain\ExternLink\Va...\URLWithoutTagValidator was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
14
use App\Domain\InfrastructurePorts\InternetDomainParserInterface;
15
use App\Domain\Utils\WikiTextUtil;
16
use App\Infrastructure\Monitor\NullLogger;
17
use Exception;
18
use Psr\Log\LoggerInterface;
19
20
/**
21
 * Todo move infra ?
22
 */
23
class CheckURL
24
{
25
    /**
26
     * @var LoggerInterface
27
     */
28
    protected $log;
29
30
    /**
31
     * @var string
32
     */
33
    protected $registrableDomain;
34
    /**
35
     * @var string
36
     */
37
    protected $url;
38
    /**
39
     * @var InternetDomainParserInterface
40
     */
41
    protected $internetDomainParser;
42
43
    public function __construct(InternetDomainParserInterface $domainParser, ?LoggerInterface $logger = null)
44
    {
45
        $this->log = $logger ?? new NullLogger();
46
        $this->internetDomainParser = $domainParser;
47
    }
48
49
    public function isURLAuthorized(string $url): bool
50
    {
51
        $this->url = $url;
52
        $this->registrableDomain = null;
53
        if (!HttpUtil::isHttpURL($url)) {
54
            $this->log->debug('Skip : not Http URL : ' . $url, ['stats' => 'externref.skip.notRawURL']);
55
            return false;
56
        }
57
58
        if ($this->hasForbiddenFilenameExtension()) {
59
            $this->log->debug('Skip : ForbiddenFilenameExtension : ' . $url, ['stats' => 'externref.skip.forbiddenFilenameExtension']);
60
            return false;
61
        }
62
63
        if (WikiTextUtil::containsWikiTag($url)) {
64
            $this->log->debug('Skip : URL contains HTML tag : ' . $url, ['stats' => 'externref.skip.URLcontainsTag']);
65
            return false;
66
        }
67
68
        $this->findRegistrableDomain();
69
70
        return true;
71
    }
72
73
    /**
74
     * todo move URL parsing
75
     * Skip PDF GIF etc
76
     * https://fr.wikipedia.org/wiki/Liste_d%27extensions_de_fichiers
77
     */
78
    protected function hasForbiddenFilenameExtension(): bool
79
    {
80
        return (bool)preg_match(
81
            '#\.(pdf|jpg|jpeg|gif|png|webp|xls|xlsx|xlr|xml|xlt|txt|csv|js|docx|exe|gz|zip|ini|movie|mp3|mp4|ogg|raw|rss|tar|tgz|wma)$#i',
82
            $this->url
83
        );
84
    }
85
86
    protected function findRegistrableDomain(): ?string
87
    {
88
        try {
89
            $this->registrableDomain = $this->internetDomainParser->getRegistrableDomainFromURL($this->url);
90
        } catch (Exception) {
91
            $this->log->warning('Skip : registrableDomain not a valid URL : ' . $this->url,
92
                ['stats' => 'externref.skip.URLAuthorized.exception2']
93
            );
94
            return null;
95
        }
96
        return $this->registrableDomain;
97
    }
98
99
    public function getRegistrableDomain($url): ?string
100
    {
101
        if ($url === $this->url && $this->registrableDomain) {
102
            return $this->registrableDomain;
103
        }
104
        $this->url = $url;
105
106
        return $this->findRegistrableDomain();
107
    }
108
}