1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PiedWeb\CMSBundle\PageScanner; |
4
|
|
|
|
5
|
|
|
use Doctrine\ORM\EntityManagerInterface; |
6
|
|
|
use PiedWeb\CMSBundle\Entity\PageInterface; |
7
|
|
|
use PiedWeb\CMSBundle\Service\AppConfigHelper as App; |
8
|
|
|
use PiedWeb\CMSBundle\Service\AppConfigHelper; |
9
|
|
|
use PiedWeb\CMSBundle\Utils\GenerateLivePathForTrait; |
10
|
|
|
use PiedWeb\CMSBundle\Utils\KernelTrait; |
11
|
|
|
use Symfony\Component\HttpFoundation\Request; |
12
|
|
|
use Symfony\Component\HttpFoundation\Response; |
13
|
|
|
use Symfony\Component\HttpKernel\KernelInterface; |
14
|
|
|
use Symfony\Component\Routing\Router; |
15
|
|
|
use Symfony\Component\Routing\RouterInterface; |
16
|
|
|
use Twig\Environment as Twig_Environment; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Permit to find error in image or link. |
20
|
|
|
*/ |
21
|
|
|
class PageScannerService |
22
|
|
|
{ |
23
|
|
|
use KernelTrait, GenerateLivePathForTrait; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var AppConfigHelper |
27
|
|
|
*/ |
28
|
|
|
protected $app; |
29
|
|
|
|
30
|
|
|
protected $em; |
31
|
|
|
protected $pageHtml; |
32
|
|
|
protected $twig; |
33
|
|
|
protected $currentPage; |
34
|
|
|
protected $webDir; |
35
|
|
|
protected $apps; |
36
|
|
|
protected $linksCheckedCounter = 0; |
37
|
|
|
protected $errors = []; |
38
|
|
|
protected $everChecked = []; |
39
|
|
|
public static $appKernel; |
40
|
|
|
|
41
|
|
|
public function __construct( |
42
|
|
|
Twig_Environment $twig, |
43
|
|
|
EntityManagerInterface $em, |
44
|
|
|
string $webDir, |
45
|
|
|
array $apps, |
46
|
|
|
RouterInterface $router, |
47
|
|
|
KernelInterface $kernel |
48
|
|
|
) { |
49
|
|
|
$this->twig = $twig; |
50
|
|
|
$this->router = $router; |
51
|
|
|
$this->em = $em; |
52
|
|
|
$this->webDir = $webDir; |
53
|
|
|
$this->apps = $apps; |
54
|
|
|
|
55
|
|
|
static::loadKernel($kernel); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
public function scan(PageInterface $page) |
59
|
|
|
{ |
60
|
|
|
$this->app = new AppConfigHelper($page->getHost(), $this->apps); |
61
|
|
|
$this->currentPage = $page; |
62
|
|
|
$this->errors = []; |
63
|
|
|
$this->pageHtml = ''; |
64
|
|
|
|
65
|
|
|
if (false !== $page->getRedirection()) { |
66
|
|
|
// check $page->getRedirection() return 20X |
67
|
|
|
|
68
|
|
|
return true; // or status code |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
$liveUri = $this->generateLivePathFor($page); |
72
|
|
|
$this->pageHtml = $this->getHtml($liveUri); |
73
|
|
|
|
74
|
|
|
// 2. Je récupère tout les liens et je les check |
75
|
|
|
// href="", data-rot="" data-img="", src="", data-bg |
76
|
|
|
if ($this->pageHtml) { |
77
|
|
|
$this->checkLinkedDocs($this->getLinkedDocs()); |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
|
81
|
|
|
return empty($this->errors) ? true : $this->errors; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
|
85
|
|
|
|
86
|
|
|
protected function getHtml($liveUri) |
87
|
|
|
{ |
88
|
|
|
$request = Request::create($liveUri); |
89
|
|
|
$response = static::$appKernel->handle($request); |
90
|
|
|
|
91
|
|
|
if ($response->isRedirect()) { |
92
|
|
|
$linkedDocs[] = $response->headers->get('location'); |
|
|
|
|
93
|
|
|
return; |
94
|
|
|
} |
95
|
|
|
elseif (200 != $response->getStatusCode()) { |
96
|
|
|
$this->addError('error on generating the page ('.$response->getStatusCode().')'); |
97
|
|
|
exit; |
|
|
|
|
98
|
|
|
return; |
|
|
|
|
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
return $response->getContent(); |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
protected function addError($message) |
105
|
|
|
{ |
106
|
|
|
$this->errors[] = [ |
107
|
|
|
'message' => $message, |
108
|
|
|
'page' => $this->currentPage, |
109
|
|
|
]; |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
protected static function prepareForRegex ($var) |
113
|
|
|
{ |
114
|
|
|
if (is_string($var)) { |
115
|
|
|
return preg_quote($var, '/'); |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
$var = array_map('static::prepareForRegex', $var); |
119
|
|
|
|
120
|
|
|
return '('.implode('|', $var).')'; |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
public static function isWebLink(string $url) |
124
|
|
|
{ |
125
|
|
|
return preg_match('@^((?:(http:|https:)//([\w\d-]+\.)+[\w\d-]+){0,1}(/?[\w~,;\-\./?%&+#=]*))$@', $url); |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
protected function getLinkedDocs(): array |
129
|
|
|
{ |
130
|
|
|
$urlInAttributes = ' '.self::prepareForRegex(['href', 'data-rot', 'src', 'data-img', 'data-bg']); |
131
|
|
|
$regex = '/'.$urlInAttributes.'=((["\'])([^\3]+)\3|([^\s>]+)[\s>])/iU'; |
132
|
|
|
preg_match_all( $regex, $this->pageHtml, $matches ); |
133
|
|
|
|
134
|
|
|
$linkedDocs = []; |
135
|
|
|
foreach ($matches[0] as $k => $match) { |
136
|
|
|
$uri = isset($matches[4][$k]) ? $matches[4][$k] : $matches[5][$k]; |
137
|
|
|
$uri = 'data-rot' == $matches[1][$k] ? str_rot13($uri) : $uri; |
138
|
|
|
$uri = strtok($uri, '#'); |
139
|
|
|
$uri = $this->removeBase($uri); |
140
|
|
|
if ('' !== $uri && self::isWebLink($uri)) { |
141
|
|
|
|
142
|
|
|
$linkedDocs[] = $uri; |
143
|
|
|
} |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
return array_unique($linkedDocs); |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
protected function removeBase($url) { |
150
|
|
|
if (strpos($url, 'https://'.$this->app->getMainHost()) === 0) { |
151
|
|
|
return substr($url, strlen('https://'.$this->app->getMainHost())); |
152
|
|
|
} |
153
|
|
|
return $url; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
public function getLinksCheckedCounter() |
157
|
|
|
{ |
158
|
|
|
return $this->linksCheckedCounter; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
protected function checkLinkedDocs(array $linkedDocs) |
162
|
|
|
{ |
163
|
|
|
foreach ($linkedDocs as $uri) { |
164
|
|
|
$this->linksCheckedCounter++; |
165
|
|
|
if (!is_string($uri)) |
166
|
|
|
continue; |
167
|
|
|
if (($uri[0] == '/' && !$this->uriExist($uri) ) |
168
|
|
|
|| (strpos($uri, 'http') === 0 && !$this->urlExist($uri))) { |
169
|
|
|
$this->addError('<code>'.$uri.'</code> introuvable'); |
170
|
|
|
} |
171
|
|
|
} |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
protected function urlExist($uri) { |
|
|
|
|
175
|
|
|
// todo check external resource |
176
|
|
|
return true; |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
protected function uriExist($uri) |
180
|
|
|
{ |
181
|
|
|
$slug = ltrim($uri, '/'); |
182
|
|
|
|
183
|
|
|
if (isset($this->everChecked[$slug])) { |
184
|
|
|
return $this->everChecked[$slug]; |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
$checkDatabase = 0 !== strpos($slug, 'media/'); // we avoid to check in db the media, file exists is enough |
188
|
|
|
$page = true !== $checkDatabase ? null : $this->em->getRepository(\get_class($this->currentPage)) |
189
|
|
|
->findOneBy(['slug' => '' == $slug ? 'homepage' : $slug]); // todo add domain check (currentPage domain) |
190
|
|
|
|
191
|
|
|
$this->everChecked[$slug] = ( |
192
|
|
|
null === $page |
193
|
|
|
&& !file_exists($this->webDir.'/'.$slug) |
194
|
|
|
&& 'feed.xml' !== $slug |
195
|
|
|
) ? false : true; |
196
|
|
|
|
197
|
|
|
return $this->everChecked[$slug]; |
198
|
|
|
} |
199
|
|
|
} |
200
|
|
|
|