SafeBrowsingClient::getMalwaresData()   C
last analyzed

Complexity

Conditions 14
Paths 28

Size

Total Lines 89

Duplication

Lines 42
Ratio 47.19 %

Code Coverage

Tests 57
CRAP Score 14

Importance

Changes 0
Metric Value
dl 42
loc 89
ccs 57
cts 57
cp 1
rs 5.2533
c 0
b 0
f 0
cc 14
nc 28
nop 1
crap 14

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Yandex PHP Library
4
 *
5
 * @copyright NIX Solutions Ltd.
6
 * @link https://github.com/nixsolutions/yandex-php-library
7
 */
8
9
/**
10
 * @namespace
11
 */
12
namespace Yandex\SafeBrowsing;
13
14
use Yandex\Common\AbstractServiceClient;
15
use GuzzleHttp\Psr7\Response;
16
use GuzzleHttp\Exception\ClientException;
17
use Yandex\Common\Exception\ForbiddenException;
18
use Yandex\Common\Exception\NotFoundException;
19
use Yandex\Common\Exception\UnauthorizedException;
20
21
/**
22
 * Class SafeBrowsingClient
23
 *
24
 * @category Yandex
25
 * @package SafeBrowsing
26
 *
27
 * @author   Alexander Khaylo <[email protected]>
28
 * @created  31.01.14 17:32
29
 */
30
class SafeBrowsingClient extends AbstractServiceClient
31
{
32
    /**
33
     * @var string
34
     */
35
    protected $serviceDomain = 'sba.yandex.net';
36
37
    /**
38
     * @var
39
     */
40
    protected $apiKey;
41
42
    /**
43
     * @var string
44
     */
45
    protected $appVer = '2.3';
46
47
    /**
48
     * @var string
49
     */
50
    protected $pVer = '2.3';
51
52
    /**
53
     * @var array
54
     */
55
    protected $malwareShavars = [
56
        'ydx-malware-shavar',
57
        'ydx-phish-shavar',
58
        'goog-malware-shavar',
59
        'goog-phish-shavar'
60
    ];
61
62
    /**
63
     * @param string $apiKey
64
     */
65 31
    public function __construct($apiKey = '')
66
    {
67 31
        $this->setApiKey($apiKey);
68 31
    }
69
70
    /**
71
     * @param string $apiKey
72
     */
73 31
    public function setApiKey($apiKey)
74
    {
75 31
        $this->apiKey = $apiKey;
76 31
    }
77
78
    /**
79
     * @return string
80
     */
81 3
    public function getApiKey()
82
    {
83 3
        return $this->apiKey;
84
    }
85
86
    /**
87
     * @param array $malwareShavars
88
     */
89 3
    public function setMalwareShavars($malwareShavars)
90
    {
91 3
        $this->malwareShavars = $malwareShavars;
92 3
    }
93
94
    /**
95
     * @return array
96
     */
97 5
    public function getMalwareShavars()
98
    {
99 5
        return $this->malwareShavars;
100
    }
101
102
    /**
103
     * Get url to service resource with parameters
104
     *
105
     * @param string $resource
106
     * @return string
107
     */
108 15
    public function getServiceUrl($resource = '')
109
    {
110 15
        return $this->serviceScheme . '://' . $this->serviceDomain . '/'
111 15
        . $resource . '?client=api&apikey=' . $this->apiKey . '&appver=' . $this->appVer . '&pver=' . $this->pVer;
112
    }
113
114
    /**
115
     * Get url to service Lookup resource with parameters
116
     *
117
     * @param string $url
118
     * @return string
119
     */
120 2
    public function getLookupUrl($url = '')
121
    {
122 2
        $pVer = '3.5'; //Specific version
123 2
        return $this->serviceScheme . '://' . $this->serviceDomain . '/'
124 2
        . 'lookup?client=api&apikey=' . $this->apiKey . '&pver=' . $pVer . '&url=' . $url;
125
    }
126
127
    /**
128
     * Get url to service Check Adult  resource with parameters
129
     *
130
     * @param string $url
131
     * @return string
132
     */
133 6
    public function getCheckAdultUrl($url = '')
134
    {
135 6
        $pVer = '4.0'; //Specific version
136 6
        return $this->serviceScheme . '://' . $this->serviceDomain . '/'
137 6
        . 'cp?client=api&pver=' . $pVer . '&url=' . $url;
138
    }
139
140
    /**
141
     * Sends a request
142
     *
143
     * @param string              $method  HTTP method
144
     * @param string $uri     URI object or string.
145
     * @param array               $options Request options to apply.
146
     *
147
     * @return Response
148
     *
149
     * @throws ForbiddenException
150
     * @throws UnauthorizedException
151
     * @throws SafeBrowsingException
152
     * @throws NotFoundException
153
     */
154 24
    protected function sendRequest($method, $uri, array $options = [])
155
    {
156
        try {
157 24
            $response = $this->getClient()->request($method, $uri, $options);
158 4
        } catch (ClientException $ex) {
159 4
            $result = $ex->getResponse();
160 4
            $code = $result->getStatusCode();
161 4
            $message = $result->getReasonPhrase();
162
163 4
            if ($code === 403) {
164 1
                throw new ForbiddenException($message);
165
            }
166
167 3
            if ($code === 401) {
168 1
                throw new UnauthorizedException($message);
169
            }
170
171 2
            if ($code === 404) {
172 1
                throw new NotFoundException($message);
173
            }
174
175 1
            throw new SafeBrowsingException(
176 1
                'Service responded with error code: "' . $code . '" and message: "' . $message . '"',
177 1
                $code
178
            );
179
        }
180
181 20
        return $response;
182
    }
183
184
    /**
185
     * @param string $bodyString
186
     * @see https://developers.google.com/safe-browsing/developers_guide_v2#HTTPRequestForHashes
187
     * @return array
188
     */
189 4 View Code Duplication
    public function checkHash($bodyString = '')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
190
    {
191 4
        $resource = 'gethash';
192
193 4
        $response = $this->sendRequest(
194 4
            'POST',
195 4
            $this->getServiceUrl($resource),
196
            [
197 4
                'body' => $bodyString
198
            ]
199
        );
200
201
        return [
202 4
            'code' => $response->getStatusCode(),
203 4
            'data' => $response->getBody()
204
        ];
205
    }
206
207
    /**
208
     * @param string $bodyString
209
     * @see https://developers.google.com/safe-browsing/developers_guide_v2#HTTPRequestForData
210
     * @return array
211
     */
212 10 View Code Duplication
    public function getChunks($bodyString = '')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
213
    {
214 10
        $resource = 'downloads';
215
216 10
        $response = $this->sendRequest(
217 10
            'POST',
218 10
            $this->getServiceUrl($resource),
219
            [
220 10
                'body' => $bodyString
221
            ]
222
        );
223
224
        return [
225 10
            'code' => $response->getStatusCode(),
226 10
            'data' => $response->getBody()
227
        ];
228
    }
229
230
    /**
231
     * @see https://developers.google.com/safe-browsing/developers_guide_v2#HTTPRequestForList
232
     * @return array
233
     */
234 1
    public function getShavarsList()
235
    {
236 1
        $resource = 'list';
237 1
        $response = $this->sendRequest('GET', $this->getServiceUrl($resource));
238 1
        return explode("\n", trim($response->getBody()));
239
    }
240
241
    /**
242
     * @param string $url
243
     * @return string|false
244
     */
245 2
    public function lookup($url)
246
    {
247 2
        $response = $this->sendRequest('GET', $this->getLookupUrl($url));
248 2
        if ($response->getStatusCode() === 200) {
249 1
            return $response->getBody()->getContents();
250
        }
251 1
        return false;
252
    }
253
254
    /**
255
     * @param string $url
256
     * @return bool
257
     */
258 6
    public function checkAdult($url)
259
    {
260 6
        $response = $this->sendRequest('GET', $this->getCheckAdultUrl($url));
261 2
        if ($response->getBody()->getContents() === 'adult') {
262 1
            return true;
263
        }
264 1
        return false;
265
    }
266
267
    /**
268
     * @param string $url
269
     * @return string
270
     */
271 1
    public function getChunkByUrl($url)
272
    {
273 1
        $client = $this->getClient();
274
275 1
        $host = parse_url($url, PHP_URL_HOST);
276 1
        $headers = $client->getConfig('headers');
0 ignored issues
show
Deprecated Code introduced by
The method GuzzleHttp\ClientInterface::getConfig() has been deprecated with message: ClientInterface::getConfig will be removed in guzzlehttp/guzzle:8.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
277 1
        if ($host) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $host of type string|false is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
278 1
            $headers['Host'] = $host;
279
        }
280
281 1
        $response = $this->sendRequest(
282 1
            'GET',
283 1
            $url,
284
            [
285 1
                'headers' => $headers
286
            ]
287
        );
288 1
        return $response->getBody()->getContents();
289
    }
290
291
    /**
292
     * @param string $url
293
     * @return bool|array
294
     * @throws \Exception
295
     */
296 4
    public function searchUrl($url)
297
    {
298 4
        $hashes = $this->getHashesByUrl($url);
299
300 4
        $clientMalwareShavars = $this->getMalwareShavars();
301
302 4
        foreach ($hashes as $hash) {
303 4
            $prefixPack = pack("H*", $hash['prefix']);
304 4
            $prefixSize = strlen($hash['prefix']) / 2;
305 4
            $length = strlen($prefixPack) * $prefixSize;
306 4
            $bodyString = "$prefixSize:$length\n" . $prefixPack;
307 4
            $result = $this->checkHash($bodyString);
308
309 4
            if ($result['code'] == 200 && !empty($result['data'])) {
310 2
                $responseMalwareShavars = $this->getFullHashes($result['data']);
311 2
                foreach ($responseMalwareShavars as $shavarName => $fullHashes) {
312 2
                    if (in_array($shavarName, $clientMalwareShavars)) {
313 1
                        foreach ($fullHashes as $fullHash) {
314 1
                            if ($fullHash === $hash['full']) {
315 2
                                return $hash;
316
                            }
317
                        }
318
                    }
319
                }
320 2
            } elseif ($result['code'] == 204 && strlen($result['data']) == 0) {
321
                //204 Means no match
322
            } else {
323 1
                throw new SafeBrowsingException(
324 4
                    "ERROR: Invalid response returned from Safe Browsing ({$result['code']})"
325
                );
326
            }
327
        }
328 2
        return false;
329
    }
330
331
    /**
332
     * @param string $responseData
333
     * @return array
334
     */
335 2
    public function getFullHashes($responseData)
336
    {
337 2
        $hashesData = [];
338 2
        while (strlen($responseData) > 0) {
339 2
            $splithead = explode("\n", $responseData, 2);
340
341 2
            list($listname, $malwareId, $length) = explode(':', $splithead[0]);
342 2
            $data = bin2hex(substr($splithead[1], 0, $length));
343 2
            while (strlen($data) > 0) {
344 2
                $hashesData[$listname][$malwareId] = substr($data, 0, 64);
345 2
                $data = substr($data, 64);
346
            }
347 2
            $responseData = substr($splithead[1], $length);
348
        }
349 2
        return $hashesData;
350
    }
351
352
    /**
353
     * @param string $url
354
     * @return array
355
     */
356 6
    public function getHashesByUrl($url)
357
    {
358
        //Remove line feeds, return carriages, tabs, vertical tabs
359 6
        $url = trim(str_replace(["\x09", "\x0A", "\x0D", "\x0B"], '', $url));
360
        //extract hostname
361 6
        $parts = parse_url(strtolower($url));
362 6
        if (!isset($parts['scheme'])) {
363
            //Add default scheme
364 4
            $parts = parse_url('http://' . $url);
365
        }
366 6
        $host = $parts['host'];
367
368
        //const
369 6
        $maxCountDomains = 5;
370
371
        //Exact hostname in the url
372 6
        $hosts = [];
373 6
        if (filter_var($host, FILTER_VALIDATE_IP)) {
374 1
            $hosts[] = $host . '/';
375
        } else {
376 5
            $domains = explode('.', $host);
377 5
            $countDomains = count($domains);
378 5
            if ($countDomains > $maxCountDomains) {
379 1
                $domains = array_slice($domains, $countDomains - $maxCountDomains, $maxCountDomains);
380
            }
381
382 5
            while (count($domains) > 1) {
383 5
                $hosts[] = implode('.', $domains) . '/';
384 5
                array_shift($domains);
385
            }
386
        }
387
388 6
        $hosts = array_unique($hosts);
389 6
        return $this->getHashesByHosts($hosts);
390
    }
391
392
    /**
393
     * @param array $hosts
394
     * @return array
395
     */
396 6
    private function getHashesByHosts($hosts)
397
    {
398 6
        $hashes = [];
399 6
        foreach ($hosts as $host) {
400 6
            $hashes[] = $this->getHashByHost($host);
401
        }
402 6
        return $hashes;
403
    }
404
405
    /**
406
     * @param string $host
407
     * @return array
408
     */
409 6
    private function getHashByHost($host)
410
    {
411
        //SHA-256
412 6
        $hash = hash('sha256', $host);
413 6
        return ["host" => $host, "prefix" => substr($hash, 0, 8), "full" => $hash];
414
    }
415
416
    /**
417
     * @param array $savedChunks
418
     * @return string
419
     * @throws SafeBrowsingException
420
     */
421 11
    private function prepareDownloadsRequest($savedChunks = [])
422
    {
423 11
        $body = '';
424 11
        if (count($this->malwareShavars) < 1) {
425 1
            throw new SafeBrowsingException(
426 1
                'ERROR: Empty malware shavars'
427
            );
428
        }
429
430 10
        foreach ($this->malwareShavars as $malwareShavar) {
431 10
            if ($savedChunks && isset($savedChunks[$malwareShavar])) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $savedChunks of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
432
                //ydx-malware-shavar;s:18888-19061:a:21355-21687
433
434 1
                $range = '';
435 1
                if (isset($savedChunks[$malwareShavar]['removed'])
436 1
                    && isset($savedChunks[$malwareShavar]['removed']['min'])
437 1
                    && isset($savedChunks[$malwareShavar]['removed']['max'])
438 1
                    && $savedChunks[$malwareShavar]['removed']['min'] > 0
439 1
                    && $savedChunks[$malwareShavar]['removed']['max'] > 0
440
                ) {
441 1
                    $range .= 's:' . $savedChunks[$malwareShavar]['removed']['min']
442 1
                        . '-' . $savedChunks[$malwareShavar]['removed']['max'];
443
                }
444
445 1
                if (isset($savedChunks[$malwareShavar]['added'])
446 1
                    && isset($savedChunks[$malwareShavar]['added']['min'])
447 1
                    && isset($savedChunks[$malwareShavar]['added']['max'])
448 1
                    && $savedChunks[$malwareShavar]['added']['min'] > 0
449 1
                    && $savedChunks[$malwareShavar]['added']['max'] > 0
450
                ) {
451 1
                    if ($range) {
452 1
                        $range .= ':';
453
                    }
454 1
                    $range .= 'a:' . $savedChunks[$malwareShavar]['added']['min']
455 1
                        . '-' . $savedChunks[$malwareShavar]['added']['max'];
456
457 1
                    $body .= $malwareShavar . ';' . $range . "\n";
458
                }
459
            } else {
460 10
                $body .= $malwareShavar . ";\n";
461
            }
462
        }
463 10
        return $body;
464
    }
465
466
    /**
467
     * Get malwares prefixes data
468
     *
469
     * @param array $savedChunks
470
     * @return array
471
     * @throws SafeBrowsingException
472
     */
473 11
    public function getMalwaresData($savedChunks = [])
474
    {
475 11
        $body = $this->prepareDownloadsRequest($savedChunks);
476
477 10
        $response = $this->getChunks($body);
478 10
        $result = [];
479
480 10
        $response['data'] = (string) $response['data'];
481
482 10
        if (substr_count($response['data'], 'r:pleasereset') > 0) {
483 1
            return 'pleasereset';
484
        }
485
486 9
        $chunksList = [];
487 9
        if (substr_count($response['data'], 'i:') < 1) {
488 1
            throw new SafeBrowsingException(
489 1
                'ERROR: Incorrect data in list'
490
            );
491
        }
492
493 8
        $shavarsData = explode('i:', $response['data']);
494 8
        unset($shavarsData[0]);
495 8
        foreach ($shavarsData as $shavar) {
496 8
            $listData = explode("\n", trim($shavar));
497 8
            $chunksList[array_shift($listData)] = $listData;
498
        }
499 8
        foreach ($chunksList as $listName => $list) {
500 8
            $chunksByList = [];
501 8
            foreach ($list as $value) {
502 8
                if (substr_count($value, "u:") > 0) {
503
                    try {
504 7
                        $chunkData = $this->getChunkByUrl('http://' . trim(str_replace('u:', '', $value)));
505 6
                        $processed = $this->parseChunk($chunkData);
506 4
                        $chunksByList[$processed['type']][$processed['chunk_num']] = $processed['prefixes'];
507 3
                    } catch (NotFoundException $e) {
508 5
                        continue;
509
                    }
510 1
                } elseif (substr_count($value, "ad:") > 0) {
511 1 View Code Duplication
                    if (substr_count($value, ',') > 0) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
512 1
                        $ranges = explode(',', trim(str_replace("ad:", "", $value)));
513 1
                        $rangesData = [];
514 1
                        foreach ($ranges as $range) {
515 1
                            list($min, $max) = explode('-', $range);
516 1
                            $rangesData[] = [
517 1
                                'min' => $min,
518 1
                                'max' => $max
519
                            ];
520
                        }
521 1
                        $chunksByList['delete_added_ranges'] = $rangesData;
522
                    } else {
523 1
                        $range = trim(str_replace("sd:", "", $value));
524 1
                        list($min, $max) = explode('-', $range);
525 1
                        $chunksByList['delete_added_ranges'] = [
526
                            [
527 1
                                'min' => $min,
528 1
                                'max' => $max
529
                            ]
530
                        ];
531
                    }
532 1
                } elseif (substr_count($value, "sd:") > 0) {
533 1 View Code Duplication
                    if (substr_count($value, ',') > 0) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
534 1
                        $ranges = explode(',', trim(str_replace("sd:", "", $value)));
535 1
                        $rangesData = [];
536 1
                        foreach ($ranges as $range) {
537 1
                            list($min, $max) = explode('-', $range);
538 1
                            $rangesData[] = [
539 1
                                'min' => $min,
540 1
                                'max' => $max
541
                            ];
542
                        }
543 1
                        $chunksByList['delete_removed_ranges'] = $rangesData;
544
                    } else {
545 1
                        $range = trim(str_replace("sd:", "", $value));
546 1
                        list($min, $max) = explode('-', $range);
547 1
                        $chunksByList['delete_removed_ranges'] = [
548
                            [
549 1
                                'min' => $min,
550 5
                                'max' => $max
551
                            ]
552
                        ];
553
                    }
554
                }
555
            }
556
557 6
            $result[$listName] = $chunksByList;
558
        }
559
560 6
        return $result;
561
    }
562
563
    /**
564
     * Parsing chunk
565
     *
566
     * @param string $data
567
     * @return array
568
     * @throws SafeBrowsingException
569
     */
570 6
    private function parseChunk($data)
571
    {
572 6
        $data = trim($data);
573 6
        if (strlen($data) === 0) {
574 1
            throw new SafeBrowsingException(
575 1
                'ERROR: Incorrect chunk data "' . $data . '"'
576
            );
577
        }
578
579 5
        $splitHead = explode("\n", $data, 2);
580 5
        $chunkInfo = explode(':', $splitHead[0]);
581 5
        list($type, $chunkNum, $hashLen, $chunkLen) = $chunkInfo;
582
583 5
        if ($chunkLen > 0) {
584
            //Convert to hex for easy processing
585
            //First get chunkData according to length
586 4
            $chunkData = bin2hex(substr($splitHead[1], 0, $chunkLen));
587
        } else {
588
            //No ChunkData, Still Parse
589 1
            $chunkData = '';
590
        }
591
592 5
        if ($type == 'a') {
593 1
            $prefixes = [];
594 1
            while (strlen($chunkData) > 0) {
595 1
                $prefixes[] = substr($chunkData, 0, 8);
596 1
                $count = hexdec(substr($chunkData, 8, 2));
597 1
                $chunkData = substr($chunkData, 10);
598 1 View Code Duplication
                for ($i = 0; $i < $count; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
599 1
                    $chunkData = substr($chunkData, (($hashLen * 2)));
600
                }
601
            }
602
603
            return [
604 1
                'type' => 'added',
605 1
                'chunk_num' => $chunkNum,
606 1
                'prefixes' => $prefixes
607
            ];
608 4
        } elseif ($type == 's') {
609 3
            $prefixes = [];
610 3
            while (strlen($chunkData) > 0) {
611 2
                $prefixes[] = substr($chunkData, 0, 8);
612 2
                $count = hexdec(substr($chunkData, 8, 2));
613 2
                $chunkData = substr($chunkData, 10);
614 2
                if ($count > 0) {
615 1 View Code Duplication
                    for ($i = 0; $i < $count; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
616 1
                        $chunkData = substr($chunkData, (($hashLen * 2) + 8));
617
                    }
618
                } else {
619 1
                    $chunkData = substr($chunkData, 8);
620
                }
621
            }
622
623
            return [
624 3
                'type' => 'removed',
625 3
                'chunk_num' => $chunkNum,
626 3
                'prefixes' => $prefixes
627
            ];
628
        } else {
629 1
            throw new SafeBrowsingException(
630 1
                'ERROR: In chunkNum "' . $chunkNum . '" incorrect type "' . $type . '"'
631
            );
632
        }
633
    }
634
}
635