1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PiedWeb\CMSBundle\Extension\PageScanner; |
4
|
|
|
|
5
|
|
|
use Doctrine\ORM\EntityManagerInterface; |
6
|
|
|
use PiedWeb\CMSBundle\Entity\PageInterface; |
7
|
|
|
use PiedWeb\CMSBundle\Service\AppConfigHelper; |
8
|
|
|
use PiedWeb\CMSBundle\Utils\GenerateLivePathForTrait; |
9
|
|
|
use PiedWeb\CMSBundle\Utils\KernelTrait; |
10
|
|
|
use Symfony\Component\HttpFoundation\Request; |
11
|
|
|
use Symfony\Component\HttpKernel\KernelInterface; |
12
|
|
|
use Symfony\Component\Routing\RouterInterface; |
13
|
|
|
use Twig\Environment as Twig_Environment; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Permit to find error in image or link. |
17
|
|
|
*/ |
18
|
|
|
class PageScannerService |
19
|
|
|
{ |
20
|
|
|
use GenerateLivePathForTrait; |
21
|
|
|
use KernelTrait; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* @var AppConfigHelper |
25
|
|
|
*/ |
26
|
|
|
protected $app; |
27
|
|
|
|
28
|
|
|
protected $em; |
29
|
|
|
protected $pageHtml; |
30
|
|
|
protected $twig; |
31
|
|
|
protected $currentPage; |
32
|
|
|
protected $webDir; |
33
|
|
|
protected $apps; |
34
|
|
|
protected $linksCheckedCounter = 0; |
35
|
|
|
protected $errors = []; |
36
|
|
|
protected $everChecked = []; |
37
|
|
|
public static $appKernel; |
38
|
|
|
|
39
|
|
|
public function __construct( |
40
|
|
|
Twig_Environment $twig, |
41
|
|
|
EntityManagerInterface $em, |
42
|
|
|
string $webDir, |
43
|
|
|
array $apps, |
44
|
|
|
RouterInterface $router, |
45
|
|
|
KernelInterface $kernel |
46
|
|
|
) { |
47
|
|
|
$this->twig = $twig; |
48
|
|
|
$this->router = $router; |
49
|
|
|
$this->em = $em; |
50
|
|
|
$this->webDir = $webDir; |
51
|
|
|
$this->apps = $apps; |
52
|
|
|
|
53
|
|
|
static::loadKernel($kernel); |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
protected function resetErrors() |
57
|
|
|
{ |
58
|
|
|
$this->errors = []; |
59
|
|
|
} |
60
|
|
|
|
61
|
|
|
public function scan(PageInterface $page) |
62
|
|
|
{ |
63
|
|
|
$this->app = new AppConfigHelper($page->getHost(), $this->apps); |
64
|
|
|
$this->currentPage = $page; |
65
|
|
|
$this->resetErrors(); |
66
|
|
|
$this->pageHtml = ''; |
67
|
|
|
|
68
|
|
|
if (false !== $page->getRedirection()) { |
69
|
|
|
// check $page->getRedirection() return 20X |
70
|
|
|
|
71
|
|
|
return true; // or status code |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
$liveUri = $this->generateLivePathFor($page); |
75
|
|
|
$this->pageHtml = $this->getHtml($liveUri); |
76
|
|
|
|
77
|
|
|
// 2. Je récupère tout les liens et je les check |
78
|
|
|
// href="", data-rot="" data-img="", src="", data-bg |
79
|
|
|
if ($this->pageHtml) { |
80
|
|
|
$this->checkLinkedDocs($this->getLinkedDocs()); |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
return empty($this->errors) ? true : $this->errors; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
protected function getHtml($liveUri) |
87
|
|
|
{ |
88
|
|
|
$request = Request::create($liveUri); |
89
|
|
|
$response = static::$appKernel->handle($request); |
90
|
|
|
|
91
|
|
|
if ($response->isRedirect()) { |
92
|
|
|
$linkedDocs[] = $response->headers->get('location'); |
|
|
|
|
93
|
|
|
|
94
|
|
|
return; |
95
|
|
|
} elseif (200 != $response->getStatusCode()) { |
96
|
|
|
$this->addError('error on generating the page ('.$response->getStatusCode().')'); |
97
|
|
|
exit; |
|
|
|
|
98
|
|
|
|
99
|
|
|
return; |
|
|
|
|
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
return $response->getContent(); |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
protected function addError($message) |
106
|
|
|
{ |
107
|
|
|
$this->errors[] = [ |
108
|
|
|
'message' => $message, |
109
|
|
|
'page' => $this->currentPage, |
110
|
|
|
]; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
protected static function prepareForRegex($var) |
114
|
|
|
{ |
115
|
|
|
if (\is_string($var)) { |
116
|
|
|
return preg_quote($var, '/'); |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
$var = array_map('static::prepareForRegex', $var); |
120
|
|
|
|
121
|
|
|
return '('.implode('|', $var).')'; |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
protected static function isWebLink(string $url) |
125
|
|
|
{ |
126
|
|
|
return preg_match('@^((?:(http:|https:)//([\w\d-]+\.)+[\w\d-]+){0,1}(/?[\w~,;\-\./?%&+#=]*))$@', $url); |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
protected function getLinkedDocs(): array |
130
|
|
|
{ |
131
|
|
|
$urlInAttributes = ' '.self::prepareForRegex(['href', 'data-rot', 'src', 'data-img', 'data-bg']); |
132
|
|
|
$regex = '/'.$urlInAttributes.'=((["\'])([^\3]+)\3|([^\s>]+)[\s>])/iU'; |
133
|
|
|
preg_match_all($regex, $this->pageHtml, $matches); |
134
|
|
|
|
135
|
|
|
$linkedDocs = []; |
136
|
|
|
for ($k = 0; $k < \count($matches[0]); ++$k) { |
|
|
|
|
137
|
|
|
$uri = isset($matches[4][$k]) ? $matches[4][$k] : $matches[5][$k]; |
138
|
|
|
$uri = 'data-rot' == $matches[1][$k] ? str_rot13($uri) : $uri; |
139
|
|
|
$uri = strtok($uri, '#'); |
140
|
|
|
$uri = $this->removeBase($uri); |
141
|
|
|
if ('' !== $uri && self::isWebLink($uri)) { |
142
|
|
|
$linkedDocs[] = $uri; |
143
|
|
|
} |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
return array_unique($linkedDocs); |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
protected function removeBase($url) |
150
|
|
|
{ |
151
|
|
|
if (0 === strpos($url, 'https://'.$this->app->getMainHost())) { |
152
|
|
|
return substr($url, \strlen('https://'.$this->app->getMainHost())); |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
return $url; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
public function getLinksCheckedCounter() |
159
|
|
|
{ |
160
|
|
|
return $this->linksCheckedCounter; |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
protected function checkLinkedDocs(array $linkedDocs) |
164
|
|
|
{ |
165
|
|
|
foreach ($linkedDocs as $uri) { |
166
|
|
|
++$this->linksCheckedCounter; |
167
|
|
|
if (!\is_string($uri)) { |
168
|
|
|
continue; |
169
|
|
|
} |
170
|
|
|
if (('/' == $uri[0] && !$this->uriExist($uri)) |
171
|
|
|
|| (0 === strpos($uri, 'http') && !$this->urlExist($uri))) { |
172
|
|
|
$this->addError('<code>'.$uri.'</code> introuvable'); |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
protected function urlExist($uri) |
|
|
|
|
178
|
|
|
{ |
179
|
|
|
// todo check external resource |
180
|
|
|
return true; |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
protected function uriExist($uri) |
184
|
|
|
{ |
185
|
|
|
$slug = ltrim($uri, '/'); |
186
|
|
|
|
187
|
|
|
if (isset($this->everChecked[$slug])) { |
188
|
|
|
return $this->everChecked[$slug]; |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
$checkDatabase = 0 !== strpos($slug, 'media/'); // we avoid to check in db the media, file exists is enough |
192
|
|
|
$page = true !== $checkDatabase ? null : $this->em->getRepository(\get_class($this->currentPage)) |
193
|
|
|
->findOneBy(['slug' => '' == $slug ? 'homepage' : $slug]); // todo add domain check (currentPage domain) |
194
|
|
|
|
195
|
|
|
$this->everChecked[$slug] = ( |
196
|
|
|
null === $page |
197
|
|
|
&& !file_exists($this->webDir.'/'.$slug) |
198
|
|
|
&& 'feed.xml' !== $slug |
199
|
|
|
) ? false : true; |
200
|
|
|
|
201
|
|
|
return $this->everChecked[$slug]; |
202
|
|
|
} |
203
|
|
|
} |
204
|
|
|
|