Passed
Push — master ( 4017d9...9757db )
by Dev
10:15
created

PageScannerService::getLinksCheckedCounter()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace PiedWeb\CMSBundle\Extension\PageScanner;
4
5
use Doctrine\ORM\EntityManagerInterface;
6
use PiedWeb\CMSBundle\Entity\PageInterface;
7
use PiedWeb\CMSBundle\Service\AppConfigHelper;
8
use PiedWeb\CMSBundle\Utils\GenerateLivePathForTrait;
9
use PiedWeb\CMSBundle\Utils\KernelTrait;
10
use Symfony\Component\HttpFoundation\Request;
11
use Symfony\Component\HttpKernel\KernelInterface;
12
use Symfony\Component\Routing\RouterInterface;
13
use Twig\Environment as Twig_Environment;
14
15
/**
16
 * Permit to find error in image or link.
17
 */
18
class PageScannerService
19
{
20
    use GenerateLivePathForTrait;
21
    use KernelTrait;
22
23
    /**
24
     * @var AppConfigHelper
25
     */
26
    protected $app;
27
28
    protected $em;
29
    protected $pageHtml;
30
    protected $twig;
31
    protected $currentPage;
32
    protected $webDir;
33
    protected $apps;
34
    protected $linksCheckedCounter = 0;
35
    protected $errors = [];
36
    protected $everChecked = [];
37
    public static $appKernel;
38
39
    public function __construct(
40
        Twig_Environment $twig,
41
        EntityManagerInterface $em,
42
        string $webDir,
43
        array $apps,
44
        RouterInterface $router,
45
        KernelInterface $kernel
46
    ) {
47
        $this->twig = $twig;
48
        $this->router = $router;
49
        $this->em = $em;
50
        $this->webDir = $webDir;
51
        $this->apps = $apps;
52
53
        static::loadKernel($kernel);
54
    }
55
56
    protected function resetErrors()
57
    {
58
        $this->errors = [];
59
    }
60
61
    public function scan(PageInterface $page)
62
    {
63
        $this->app = new AppConfigHelper($page->getHost(), $this->apps);
64
        $this->currentPage = $page;
65
        $this->resetErrors();
66
        $this->pageHtml = '';
67
68
        if (false !== $page->getRedirection()) {
69
            // check $page->getRedirection() return 20X
70
71
            return true; // or status code
72
        }
73
74
        $liveUri = $this->generateLivePathFor($page);
75
        $this->pageHtml = $this->getHtml($liveUri);
76
77
        // 2. Je récupère tout les liens et je les check
78
        // href="", data-rot="" data-img="", src="", data-bg
79
        if ($this->pageHtml) {
80
            $this->checkLinkedDocs($this->getLinkedDocs());
81
        }
82
83
        return empty($this->errors) ? true : $this->errors;
84
    }
85
86
    protected function getHtml($liveUri)
87
    {
88
        $request = Request::create($liveUri);
89
        $response = static::$appKernel->handle($request);
90
91
        if ($response->isRedirect()) {
92
            $linkedDocs[] = $response->headers->get('location');
0 ignored issues
show
Comprehensibility Best Practice introduced by
$linkedDocs was never initialized. Although not strictly required by PHP, it is generally a good practice to add $linkedDocs = array(); before regardless.
Loading history...
93
94
            return;
95
        } elseif (200 != $response->getStatusCode()) {
96
            $this->addError('error on generating the page ('.$response->getStatusCode().')');
97
            exit;
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
98
99
            return;
0 ignored issues
show
Unused Code introduced by
return is not reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
100
        }
101
102
        return $response->getContent();
103
    }
104
105
    protected function addError($message)
106
    {
107
        $this->errors[] = [
108
            'message' => $message,
109
            'page' => $this->currentPage,
110
        ];
111
    }
112
113
    protected static function prepareForRegex($var)
114
    {
115
        if (\is_string($var)) {
116
            return preg_quote($var, '/');
117
        }
118
119
        $var = array_map('static::prepareForRegex', $var);
120
121
        return '('.implode('|', $var).')';
122
    }
123
124
    protected static function isWebLink(string $url)
125
    {
126
        return preg_match('@^((?:(http:|https:)//([\w\d-]+\.)+[\w\d-]+){0,1}(/?[\w~,;\-\./?%&+#=]*))$@', $url);
127
    }
128
129
    protected function getLinkedDocs(): array
130
    {
131
        $urlInAttributes = ' '.self::prepareForRegex(['href', 'data-rot', 'src', 'data-img', 'data-bg']);
132
        $regex = '/'.$urlInAttributes.'=((["\'])([^\3]+)\3|([^\s>]+)[\s>])/iU';
133
        preg_match_all($regex, $this->pageHtml, $matches);
134
135
        $linkedDocs = [];
136
        for ($k = 0; $k < \count($matches[0]); ++$k) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
137
            $uri = isset($matches[4][$k]) ? $matches[4][$k] : $matches[5][$k];
138
            $uri = 'data-rot' == $matches[1][$k] ? str_rot13($uri) : $uri;
139
            $uri = strtok($uri, '#');
140
            $uri = $this->removeBase($uri);
141
            if ('' !== $uri && self::isWebLink($uri)) {
142
                $linkedDocs[] = $uri;
143
            }
144
        }
145
146
        return array_unique($linkedDocs);
147
    }
148
149
    protected function removeBase($url)
150
    {
151
        if (0 === strpos($url, 'https://'.$this->app->getMainHost())) {
152
            return substr($url, \strlen('https://'.$this->app->getMainHost()));
153
        }
154
155
        return $url;
156
    }
157
158
    public function getLinksCheckedCounter()
159
    {
160
        return $this->linksCheckedCounter;
161
    }
162
163
    protected function checkLinkedDocs(array $linkedDocs)
164
    {
165
        foreach ($linkedDocs as $uri) {
166
            ++$this->linksCheckedCounter;
167
            if (!\is_string($uri)) {
168
                continue;
169
            }
170
            if (('/' == $uri[0] && !$this->uriExist($uri))
171
                || (0 === strpos($uri, 'http') && !$this->urlExist($uri))) {
172
                $this->addError('<code>'.$uri.'</code> introuvable');
173
            }
174
        }
175
    }
176
177
    protected function urlExist($uri)
0 ignored issues
show
Unused Code introduced by
The parameter $uri is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

177
    protected function urlExist(/** @scrutinizer ignore-unused */ $uri)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
178
    {
179
        // todo check external resource
180
        return true;
181
    }
182
183
    protected function uriExist($uri)
184
    {
185
        $slug = ltrim($uri, '/');
186
187
        if (isset($this->everChecked[$slug])) {
188
            return $this->everChecked[$slug];
189
        }
190
191
        $checkDatabase = 0 !== strpos($slug, 'media/'); // we avoid to check in db the media, file exists is enough
192
        $page = true !== $checkDatabase ? null : $this->em->getRepository(\get_class($this->currentPage))
193
            ->findOneBy(['slug' => '' == $slug ? 'homepage' : $slug]); // todo add domain check (currentPage domain)
194
195
        $this->everChecked[$slug] = (
196
            null === $page
197
                && !file_exists($this->webDir.'/'.$slug)
198
                && 'feed.xml' !== $slug
199
        ) ? false : true;
200
201
        return $this->everChecked[$slug];
202
    }
203
}
204