GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

GlLinkChecker::searchInArray()   B
last analyzed

Complexity

Conditions 6
Paths 6

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 1 Features 0
Metric Value
c 2
b 1
f 0
dl 0
loc 16
rs 8.8571
cc 6
eloc 9
nc 6
nop 2
1
<?php
2
/**
3
 * Main Class
4
 *
5
 * PHP version 5.4
6
 *
7
 * @category  GLICER
8
 * @package   GlLinkChecker
9
 * @author    Emmanuel ROECKER
10
 * @author    Rym BOUCHAGOUR
11
 * @copyright 2015 GLICER
12
 * @license   MIT
13
 * @link      http://dev.glicer.com/
14
 *
15
 * Created : 10/03/15
16
 * File : GlLinkChecker.php
17
 *
18
 */
19
namespace GlLinkChecker;
20
21
use GlHtml\GlHtml;
22
use GuzzleHttp\Client;
23
use Symfony\Component\Finder\Finder;
24
use Symfony\Component\Finder\SplFileInfo;
25
26
/**
27
 * Class GlLinkChecker
28
 * @package GLLinkChecker
29
 */
30
class GlLinkChecker
31
{
32
    /**
33
     * @var \GuzzleHttp\Client
34
     */
35
    private $client;
36
37
    /**
38
     * @var array|null $internalurls
39
     */
40
    private $internalurls;
41
42
    /**
43
     *
44
     */
45
    public function __construct($rooturl = null, array $internalurls = null)
46
    {
47
        $this->client = new Client([
48
            'base_uri' => $rooturl,
49
            'verify'   => false,
50
            'defaults' => [
51
                'headers' => [
52
                    'User-Agent'      => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0',
53
                    'Accept'          => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
54
                    'Accept-Language' => 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
55
                    'Accept-Encoding' => 'gzip, deflate'
56
                ]
57
            ]
58
        ]);
59
        $this->internalurls = $internalurls;
60
    }
61
62
    /**
63
     * get all links in an object
64
     *
65
     * @param       $obj
66
     * @param array $links
67
     */
68
    private function searchInArray($obj, array &$links)
69
    {
70
        foreach ($obj as $key => $elem) {
71
            if (is_string($elem)) {
72
                if (preg_match("/^(http|https|ftp|ftps).*$/", $elem)) {
73
                    if (filter_var($elem, FILTER_VALIDATE_URL)) {
74
                        $links[$elem] = $elem;
75
                    }
76
                }
77
            } else {
78
                if (is_array($elem)) {
79
                    $this->searchInArray($elem, $links);
80
                }
81
            }
82
        }
83
    }
84
85
    /**
86
     * get all links in a json
87
     *
88
     * @param string $json
89
     *
90
     * @return array
91
     */
92
    private function getJsonLinks($json)
93
    {
94
        $obj   = json_decode($json, true);
95
        $links = [];
96
        $this->searchInArray($obj, $links);
97
98
        return $links;
99
    }
100
101
102
    /**
103
     * check links in a sitemap
104
     *
105
     * @param string $sitemap
106
     *
107
     * @return array
108
     * @throws \Exception
109
     */
110
    private function checkSitemap($sitemap)
111
    {
112
        $xml     = new GlHtml($sitemap);
113
        $listloc = $xml->get("loc");
114
        $result  = [];
115
        foreach ($listloc as $loc) {
116
            $response = $this->client->get($loc->getText(), ['exceptions' => false]);
117
            if ($response->getStatusCode() != 200) {
118
                $result['error'][] = $loc->getText();
119
            } else {
120
                $result['ok'][] = $loc->getText();
121
            }
122
        }
123
124
        return $result;
125
    }
126
127
    /**
128
     * check http error status code
129
     *
130
     * @param array $result
131
     * @param array $urls
132
     * @param int   $statuscode
133
     */
134
    private function checkStatus(array &$result, array $urls, $statuscode) {
135
        foreach ($urls as $url) {
136
            $response = $this->client->get($url, ['exceptions' => false]);
137
            if ($response->getStatusCode() != $statuscode) {
138
                $result[$statuscode]["error"][] = $url;
139
            } else {
140
                $result[$statuscode]["ok"][] = $url;
141
            }
142
        }
143
    }
144
    
145
    /**
146
     * check 403 and 404 errors
147
     *
148
     * @param array $urlerrors
149
     * @param array $urlforbiddens
150
     *
151
     * @return string
152
     */
153
    public function checkErrors(array $urlerrors, array $urlforbiddens)
154
    {
155
        $result = [];
156
157
        $this->checkStatus($result,$urlerrors,404);
158
        $this->checkStatus($result,$urlforbiddens, 403);
159
160
        return $result;
161
    }
162
163
    /**
164
     * check links in robots.txt and sitemap
165
     *
166
     * @return array
167
     * @throws \Exception
168
     */
169
    public function checkRobotsSitemap()
170
    {
171
        $response = $this->client->get("/robots.txt");
172
        if ($response->getStatusCode() != 200) {
173
            throw new \Exception("Cannot find robots.txt");
174
        }
175
176
        $robotstxt = $response->getBody()->getContents();
177
        $robotstxt = explode("\n", $robotstxt);
178
        $result    = [];
179
        foreach ($robotstxt as $line) {
180 View Code Duplication
            if (preg_match('/^\s*Sitemap:(.*)/i', $line, $match)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
181
                $urlsitemap = trim($match[1]);
182
                $response   = $this->client->get($urlsitemap, ['exceptions' => false]);
183
                if ($response->getStatusCode() != 200) {
184
                    $result['sitemap']['error'][] = $urlsitemap;
185
                } else {
186
                    $result['sitemap']['ok'][$urlsitemap] = $this->checkSitemap($response->getBody()->getContents());
187
                }
188
            }
189
190 View Code Duplication
            if (preg_match('/^\s*Disallow:(.*)/i', $line, $match)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
191
                $urldisallow = trim($match[1]);
192
                $response    = $this->client->get($urldisallow, ['exceptions' => false]);
193
                if (($response->getStatusCode() != 200) && ($response->getStatusCode() != 403)) {
194
                    $result['disallow']['error'][] = $urldisallow;
195
                } else {
196
                    $result['disallow']['ok'][] = $urldisallow;
197
                }
198
            }
199
        }
200
201
        return $result;
202
    }
203
204
205
    /**
206
     * check links in html and json files
207
     *
208
     * @param Finder   $files
209
     * @param callable $checkstart
210
     * @param callable $checking
211
     * @param callable $checkend
212
     *
213
     * @throws \Exception
214
     * @return GlLinkCheckerError[]
215
     */
216
    public function checkFiles(Finder $files, callable $checkstart, callable $checking, callable $checkend)
217
    {
218
        $linksByFile = [];
219
        /**
220
         * @var SplFileInfo $file
221
         */
222
        foreach ($files as $file) {
223
            $inner   = file_get_contents($file->getRealPath());
224
            $keyname = $file->getRelativePathname();
225
            if ($file->getExtension() == 'html') {
226
                $html                  = new GlHtml($inner);
227
                $linksByFile[$keyname] = $html->getLinks();
228
            } else {
229
                if ($file->getExtension() == 'json') {
230
                    $linksByFile[$keyname] = $this->getJsonLinks($inner);
231
                } else {
232
                    throw new \Exception("Extension unknown : " . $keyname);
233
                }
234
            }
235
        }
236
237
        //reverse $linksByFile
238
        $links = [];
239
        foreach ($linksByFile as $filename => $filelinks) {
240
            foreach ($filelinks as $filelink) {
241
                $links[$filelink][] = $filename;
242
            }
243
        }
244
245
        $checkstart(count($links));
246
        $result = [];
247
        foreach ($links as $link => $files) {
248
            $checking($link, $files);
249
250
            $gllink = new GlLinkCheckerError($this->client, $link, $files);
251
252
            if ($gllink->isInternal($this->internalurls)) {
253
                $gllink->check(['lowercase', 'endslash', 'absolute']);
254
            }
255
256
            $gllink->check(['exist']);
257
            $result[] = $gllink;
258
        }
259
        $checkend();
260
261
        return $result;
262
    }
263
}
264