Passed
Push — master ( 4109cd...3773bc )
by Dev
13:23
created

PageScannerService::isWebLink()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
rs 10
c 1
b 0
f 0
1
<?php
2
3
namespace PiedWeb\CMSBundle\PageScanner;
4
5
use Doctrine\ORM\EntityManagerInterface;
6
use PiedWeb\CMSBundle\Entity\PageInterface;
7
use PiedWeb\CMSBundle\Service\AppConfigHelper as App;
8
use PiedWeb\CMSBundle\Service\AppConfigHelper;
9
use PiedWeb\CMSBundle\Utils\GenerateLivePathForTrait;
10
use PiedWeb\CMSBundle\Utils\KernelTrait;
11
use Symfony\Component\HttpFoundation\Request;
12
use Symfony\Component\HttpFoundation\Response;
13
use Symfony\Component\HttpKernel\KernelInterface;
14
use Symfony\Component\Routing\Router;
15
use Symfony\Component\Routing\RouterInterface;
16
use Twig\Environment as Twig_Environment;
17
18
/**
19
 * Permit to find error in image or link.
20
 */
21
class PageScannerService
22
{
23
    use KernelTrait, GenerateLivePathForTrait;
24
25
    /**
26
     * @var AppConfigHelper
27
     */
28
    protected $app;
29
30
    protected $em;
31
    protected $pageHtml;
32
    protected $twig;
33
    protected $currentPage;
34
    protected $webDir;
35
    protected $apps;
36
    protected $linksCheckedCounter = 0;
37
    protected $errors = [];
38
    protected $everChecked = [];
39
    public static $appKernel;
40
41
    public function __construct(
42
        Twig_Environment $twig,
43
        EntityManagerInterface $em,
44
        string $webDir,
45
        array $apps,
46
        RouterInterface $router,
47
        KernelInterface $kernel
48
    ) {
49
        $this->twig = $twig;
50
        $this->router = $router;
51
        $this->em = $em;
52
        $this->webDir = $webDir;
53
        $this->apps = $apps;
54
55
        static::loadKernel($kernel);
56
    }
57
58
    public function scan(PageInterface $page)
59
    {
60
        $this->app = new AppConfigHelper($page->getHost(), $this->apps);
61
        $this->currentPage = $page;
62
        $this->errors = [];
63
        $this->pageHtml = '';
64
65
        if (false !== $page->getRedirection()) {
66
            // check $page->getRedirection() return 20X
67
68
            return true; // or status code
69
        }
70
71
        $liveUri = $this->generateLivePathFor($page);
72
        $this->pageHtml = $this->getHtml($liveUri);
73
74
        // 2. Je récupère tout les liens et je les check
75
        // href="", data-rot="" data-img="", src="", data-bg
76
        if ($this->pageHtml) {
77
            $this->checkLinkedDocs($this->getLinkedDocs());
78
        }
79
80
81
        return empty($this->errors) ? true : $this->errors;
82
    }
83
84
85
86
    protected function getHtml($liveUri)
87
    {
88
        $request = Request::create($liveUri);
89
        $response = static::$appKernel->handle($request);
90
91
        if ($response->isRedirect()) {
92
            $linkedDocs[] = $response->headers->get('location');
0 ignored issues
show
Comprehensibility Best Practice introduced by
$linkedDocs was never initialized. Although not strictly required by PHP, it is generally a good practice to add $linkedDocs = array(); before regardless.
Loading history...
93
            return;
94
        }
95
        elseif (200 != $response->getStatusCode()) {
96
            $this->addError('error on generating the page ('.$response->getStatusCode().')');
97
            exit;
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
98
            return;
0 ignored issues
show
Unused Code introduced by
return is not reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
99
        }
100
101
        return $response->getContent();
102
    }
103
104
    protected function addError($message)
105
    {
106
        $this->errors[] = [
107
            'message' => $message,
108
            'page' => $this->currentPage,
109
        ];
110
    }
111
112
    protected static function prepareForRegex ($var)
113
    {
114
        if (is_string($var)) {
115
            return preg_quote($var, '/');
116
        }
117
118
        $var = array_map('static::prepareForRegex', $var);
119
120
        return '('.implode('|', $var).')';
121
    }
122
123
    public static function isWebLink(string $url)
124
    {
125
        return preg_match('@^((?:(http:|https:)//([\w\d-]+\.)+[\w\d-]+){0,1}(/?[\w~,;\-\./?%&+#=]*))$@', $url);
126
    }
127
128
    protected function getLinkedDocs(): array
129
    {
130
        $urlInAttributes = ' '.self::prepareForRegex(['href', 'data-rot', 'src', 'data-img', 'data-bg']);
131
        $regex = '/'.$urlInAttributes.'=((["\'])([^\3]+)\3|([^\s>]+)[\s>])/iU';
132
        preg_match_all(            $regex,            $this->pageHtml,            $matches        );
133
134
        $linkedDocs = [];
135
        foreach ($matches[0] as $k => $match) {
136
            $uri = isset($matches[4][$k]) ? $matches[4][$k] : $matches[5][$k];
137
            $uri = 'data-rot' == $matches[1][$k] ? str_rot13($uri) : $uri;
138
            $uri = strtok($uri, '#');
139
            $uri = $this->removeBase($uri);
140
            if ('' !== $uri && self::isWebLink($uri)) {
141
142
                $linkedDocs[] = $uri;
143
            }
144
        }
145
146
        return array_unique($linkedDocs);
147
    }
148
149
    protected function removeBase($url) {
150
        if (strpos($url, 'https://'.$this->app->getMainHost()) === 0) {
151
            return substr($url, strlen('https://'.$this->app->getMainHost()));
152
        }
153
        return $url;
154
    }
155
156
    public function getLinksCheckedCounter()
157
    {
158
        return $this->linksCheckedCounter;
159
    }
160
161
    protected function checkLinkedDocs(array $linkedDocs)
162
    {
163
        foreach ($linkedDocs as $uri) {
164
            $this->linksCheckedCounter++;
165
            if (!is_string($uri))
166
                continue;
167
            if (($uri[0] == '/' && !$this->uriExist($uri) )
168
                || (strpos($uri, 'http') === 0 && !$this->urlExist($uri))) {
169
                $this->addError('<code>'.$uri.'</code> introuvable');
170
            }
171
        }
172
    }
173
174
    protected function urlExist($uri) {
0 ignored issues
show
Unused Code introduced by
The parameter $uri is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

174
    protected function urlExist(/** @scrutinizer ignore-unused */ $uri) {

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
175
        // todo check external resource
176
        return true;
177
    }
178
179
    protected function uriExist($uri)
180
    {
181
        $slug = ltrim($uri, '/');
182
183
        if (isset($this->everChecked[$slug])) {
184
            return $this->everChecked[$slug];
185
        }
186
187
        $checkDatabase = 0 !== strpos($slug, 'media/'); // we avoid to check in db the media, file exists is enough
188
        $page = true !== $checkDatabase ? null : $this->em->getRepository(\get_class($this->currentPage))
189
            ->findOneBy(['slug' => '' == $slug ? 'homepage' : $slug]); // todo add domain check (currentPage domain)
190
191
        $this->everChecked[$slug] = (
192
            null === $page
193
                && !file_exists($this->webDir.'/'.$slug)
194
                && 'feed.xml' !== $slug
195
        ) ? false : true;
196
197
        return $this->everChecked[$slug];
198
    }
199
}
200