GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 7856af...b60ec8 )
by Emmanuel
09:42
created

src/GlLinkChecker.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Main Class
4
 *
5
 * PHP version 5.4
6
 *
7
 * @category  GLICER
8
 * @package   GlLinkChecker
9
 * @author    Emmanuel ROECKER
10
 * @author    Rym BOUCHAGOUR
11
 * @copyright 2015 GLICER
12
 * @license   MIT
13
 * @link      http://dev.glicer.com/
14
 *
15
 * Created : 10/03/15
16
 * File : GlLinkChecker.php
17
 *
18
 */
19
namespace GlLinkChecker;
20
21
use GlHtml\GlHtml;
22
use GuzzleHttp\Client;
23
use Symfony\Component\Finder\Finder;
24
use Symfony\Component\Finder\SplFileInfo;
25
26
/**
27
 * Class GlLinkChecker
28
 * @package GLLinkChecker
29
 */
30
class GlLinkChecker
31
{
32
    /**
33
     * @var \GuzzleHttp\Client
34
     */
35
    private $client;
36
37
    /**
38
     * @var array $internalurls
39
     */
40
    private $internalurls;
41
42
    /**
43
     *
44
     */
45
    public function __construct($rooturl = null, array $internalurls = null)
46
    {
47
        $this->client = new Client([
48
            'base_url' => $rooturl,
49
            'defaults' => [
50
                'headers' => [
51
                    'User-Agent'      => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0',
52
                    'Accept'          => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
53
                    'Accept-Language' => 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
54
                    'Accept-Encoding' => 'gzip, deflate'
55
                ]
56
            ]
57
        ]);
58
        $this->client->setDefaultOption('verify', false);
59
        $this->internalurls = $internalurls;
60
    }
61
62
    /**
63
     * @param string $text
64
     * @param array  $links
65
     *
66
     * @return array
67
     */
68
    private function getLinks($text, &$links)
0 ignored issues
show
This method is not used, and could be removed.
Loading history...
69
    {
70
        $regexUrl = '/[">\s]+((http|https|ftp|ftps)\:\/\/(.*?))["<\s]+/';
71
        $urls     = null;
72
        if (preg_match_all($regexUrl, $text, $urls) > 0) {
73
            $matches = $urls[1];
74
            foreach ($matches as $url) {
75
                if (filter_var($url, FILTER_VALIDATE_URL)) {
76
                    $links[$url] = $url;
77
                }
78
            }
79
        }
80
    }
81
82
83
    /**
84
     * get all links in an object
85
     *
86
     * @param       $obj
87
     * @param array $links
88
     */
89
    private function searchInArray($obj, array &$links)
90
    {
91
        foreach ($obj as $key => $elem) {
92
            if (is_string($elem)) {
93
                if (preg_match("/^(http|https|ftp|ftps).*$/", $elem)) {
94
                    if (filter_var($elem, FILTER_VALIDATE_URL)) {
95
                        $links[$elem] = $elem;
96
                    }
97
                }
98
            } else {
99
                if (is_array($elem)) {
100
                    $this->searchInArray($elem, $links);
101
                }
102
            }
103
        }
104
    }
105
106
    /**
107
     * get all links in a json
108
     *
109
     * @param string $json
110
     *
111
     * @return array
112
     */
113
    private function getJsonLinks($json)
114
    {
115
        $obj   = json_decode($json, true);
116
        $links = [];
117
        $this->searchInArray($obj, $links);
118
119
        return $links;
120
    }
121
122
123
    /**
124
     * check links in a sitemap
125
     *
126
     * @param string $sitemap
127
     *
128
     * @return array
129
     * @throws \Exception
130
     */
131
    private function checkSitemap($sitemap)
132
    {
133
        $xml     = new GlHtml($sitemap);
134
        $listloc = $xml->get("loc");
135
        $result  = [];
136
        foreach ($listloc as $loc) {
137
            $response = $this->client->get($loc->getText(), ['exceptions' => false]);
138
            if ($response->getStatusCode() != 200) {
139
                $result['error'][] = $loc->getText();
140
            } else {
141
                $result['ok'][] = $loc->getText();
142
            }
143
        }
144
145
        return $result;
146
    }
147
148
    /**
149
     * check 403 and 404 errors
150
     *
151
     * @param array $urlerrors
152
     * @param array $urlforbiddens
153
     *
154
     * @return string
155
     */
156
    public function checkErrors(array $urlerrors, array $urlforbiddens)
157
    {
158
        $result = [];
159
160 View Code Duplication
        foreach ($urlerrors as $urlerror) {
161
            $response = $this->client->get($urlerror, ['exceptions' => false]);
162
            if ($response->getStatusCode() != 404) {
163
                $result["404"]["error"][] = $urlerror;
164
            } else {
165
                $result["404"]["ok"][] = $urlerror;
166
            }
167
        }
168
169 View Code Duplication
        foreach ($urlforbiddens as $urlforbidden) {
170
            $response = $this->client->get($urlforbidden, ['exceptions' => false]);
171
            if ($response->getStatusCode() != 403) {
172
                $result["403"]["error"][] = $urlforbidden;
173
            } else {
174
                $result["403"]["ok"][] = $urlforbidden;
175
            }
176
        }
177
178
        return $result;
179
    }
180
181
    /**
182
     * check links in robots.txt and sitemap
183
     *
184
     * @return array
185
     * @throws \Exception
186
     */
187
    public function checkRobotsSitemap()
188
    {
189
        $response = $this->client->get("/robots.txt");
190
        if ($response->getStatusCode() != 200) {
191
            throw new \Exception("Cannot find robots.txt");
192
        }
193
194
        $robotstxt = $response->getBody()->getContents();
195
        $robotstxt = explode("\n", $robotstxt);
196
        $result    = [];
197
        foreach ($robotstxt as $line) {
198 View Code Duplication
            if (preg_match('/^\s*Sitemap:(.*)/i', $line, $match)) {
199
                $urlsitemap = trim($match[1]);
200
                $response   = $this->client->get($urlsitemap, ['exceptions' => false]);
201
                if ($response->getStatusCode() != 200) {
202
                    $result['sitemap']['error'][] = $urlsitemap;
203
                } else {
204
                    $result['sitemap']['ok'][$urlsitemap] = $this->checkSitemap($response->getBody()->getContents());
205
                }
206
            }
207
208 View Code Duplication
            if (preg_match('/^\s*Disallow:(.*)/i', $line, $match)) {
209
                $urldisallow = trim($match[1]);
210
                $response    = $this->client->get($urldisallow, ['exceptions' => false]);
211
                if (($response->getStatusCode() != 200) && ($response->getStatusCode() != 403)) {
212
                    $result['disallow']['error'][] = $urldisallow;
213
                } else {
214
                    $result['disallow']['ok'][] = $urldisallow;
215
                }
216
            }
217
        }
218
219
        return $result;
220
    }
221
222
223
    /**
224
     * check links in html and json files
225
     *
226
     * @param Finder   $files
227
     * @param callable $checkstart
228
     * @param callable $checking
229
     * @param callable $checkend
230
     *
231
     * @throws \Exception
232
     * @return GlLinkCheckerError[]
233
     */
234
    public function checkFiles(Finder $files, callable $checkstart, callable $checking, callable $checkend)
235
    {
236
        $linksByFile = [];
237
        /**
238
         * @var SplFileInfo $file
239
         */
240
        foreach ($files as $file) {
241
            $inner   = file_get_contents($file->getRealPath());
242
            $keyname = $file->getRelativePathname();
243
            if ($file->getExtension() == 'html') {
244
                $html                  = new GlHtml($inner);
245
                $linksByFile[$keyname] = $html->getLinks();
246
            } else {
247
                if ($file->getExtension() == 'json') {
248
                    $linksByFile[$keyname] = $this->getJsonLinks($inner);
249
                } else {
250
                    throw new \Exception("Extension unknown : " . $keyname);
251
                }
252
            }
253
        }
254
255
        //reverse $linksByFile
256
        $links = [];
257
        foreach ($linksByFile as $filename => $filelinks) {
258
            foreach ($filelinks as $filelink) {
259
                $links[$filelink][] = $filename;
260
            }
261
        }
262
263
        $checkstart(count($links));
264
        $result = [];
265
        foreach ($links as $link => $files) {
266
            $checking($link, $files);
267
268
            $gllink = new GlLinkCheckerError($this->client, $link, $files);
269
270
            if ($gllink->isInternal($this->internalurls)) {
271
                $gllink->check(['lowercase', 'endslash', 'absolute']);
272
            }
273
274
            $gllink->check(['exist']);
275
            $result[] = $gllink;
276
        }
277
        $checkend();
278
279
        return $result;
280
    }
281
}