Completed
Push — master ( 3ab51d...5fce14 )
by Vinicius
04:43
created

Crawler   A

Complexity

Total Complexity 7

Size/Duplication

Total Lines 71
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 6

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 7
lcom 1
cbo 6
dl 0
loc 71
ccs 23
cts 23
cp 1
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 2
A parseResult() 0 9 1
A getUrl() 0 4 1
A getResults() 0 21 3
1
<?php
2
namespace CViniciusSDias\GoogleCrawler;
3
4
use CViniciusSDias\GoogleCrawler\Exception\InvalidResultException;
5
use CViniciusSDias\GoogleCrawler\Proxy\{
6
    GoogleProxy, NoProxy
7
};
8
use Psr\Http\Message\ResponseInterface;
9
use Symfony\Component\DomCrawler\Crawler as DomCrawler;
10
use Symfony\Component\DomCrawler\Link;
11
12
/**
13
 * Google Crawler
14
 *
15
 * @package CViniciusSDias\GoogleCrawler
16
 * @author Vinicius Dias
17
 */
18
class Crawler
19
{
20
    /** @var string $url*/
21
    protected $url;
22
    /** @var GoogleProxy $proxy */
23
    protected $proxy;
24
25 10
    public function __construct(SearchTermInterface $searchTerm, GoogleProxy $proxy = null)
26
    {
27
        // You can concatenate &gl=XX replacing XX with your country code (BR = Brazil; US = United States)
28
        // You should also add the coutry specific part of the google url, (like .br or .es)
29 10
        $this->url = "http://www.google.com/search?q=$searchTerm&num=100";
30 10
        $this->proxy = is_null($proxy) ? new NoProxy() : $proxy;
0 ignored issues
show
Documentation Bug introduced by
It seems like is_null($proxy) ? new \C...roxy\NoProxy() : $proxy of type object<CViniciusSDias\Go...wler\Proxy\GoogleProxy> is incompatible with the declared type object<GoogleProxy> of property $proxy.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
31 10
    }
32
33
    /**
34
     * Returns the 100 first found results for the specified search term
35
     *
36
     * @return ResultList
37
     * @throws \GuzzleHttp\Exception\ServerException If the proxy was overused
38
     * @throws \GuzzleHttp\Exception\ConnectException If the proxy is unavailable
39
     */
40 10
    public function getResults(): ResultList
41
    {
42
        /** @var ResponseInterface $response */
43 10
        $response = $this->proxy->getHttpResponse($this->url);
44 10
        $stringResponse = (string) $response->getBody();
45 10
        $domCrawler = new DomCrawler($stringResponse);
46 10
        $googleResults = $domCrawler->filter('h3.r > a');
47 10
        $resultList = new ResultList($googleResults->count());
48
49 10
        foreach ($googleResults as $result) {
50 10
            $resultLink = new Link($result, 'http://google.com/');
51
            try {
52 10
                $googleResult = $this->parseResult($resultLink);
53 10
                $resultList->addResult($googleResult);
54 9
            } catch (InvalidResultException $invalidResult) {
55
                // TODO Maybe log this exception. Other than that, there's nothing to do, cause it isn't an error.
56
            }
57
        }
58
59 10
        return $resultList;
60
    }
61
62
    /**
63
     * If $resultLink is a valid link, this method assembles the Result and adds it to $googleResults
64
     *
65
     * @param Link $resultLink
66
     * @return Result
67
     */
68 10
    private function parseResult(Link $resultLink): Result
69
    {
70 10
        $googleResult = new Result();
71
        $googleResult
72 10
            ->setTitle($resultLink->getNode()->nodeValue)
73 10
            ->setUrl($this->getUrl($resultLink->getUri()));
74
75 10
        return $googleResult;
76
    }
77
78
    /**
79
     * Parses the URL using the parser provided by $proxy
80
     *
81
     * @param string $url
82
     * @return string
83
     */
84 10
    private function getUrl(string $url): string
85
    {
86 10
        return $this->proxy->parseUrl($url);
87
    }
88
}
89