Completed
Push — master ( 4a1f1f...a4d897 )
by Vinicius
04:33
created

Crawler   A

Complexity

Total Complexity 7

Size/Duplication

Total Lines 69
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 6

Test Coverage

Coverage 95.65%

Importance

Changes 0
Metric Value
wmc 7
lcom 1
cbo 6
dl 0
loc 69
ccs 22
cts 23
cp 0.9565
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 2
A parseResult() 0 9 1
A getUrl() 0 4 1
A getResults() 0 21 3
1
<?php
2
namespace CViniciusSDias\GoogleCrawler;
3
4
use CViniciusSDias\GoogleCrawler\Exception\InvalidResultException;
5
use CViniciusSDias\GoogleCrawler\Proxy\{
6
    GoogleProxy, NoProxy
7
};
8
use Psr\Http\Message\ResponseInterface;
9
use Symfony\Component\DomCrawler\Crawler as DomCrawler;
10
use Symfony\Component\DomCrawler\Link;
11
12
/**
13
 * Google Crawler
14
 *
15
 * @package CViniciusSDias\GoogleCrawler
16
 * @author Vinicius Dias
17
 */
18
class Crawler
19
{
20
    /** @var string $url*/
21
    protected $url;
22
    /** @var GoogleProxy $proxy */
23
    protected $proxy;
24
25 13
    public function __construct(SearchTermInterface $searchTerm, GoogleProxy $proxy = null)
26
    {
27
        // You can concatenate &gl=XX replacing XX with your country code (BR = Brazil; US = United States)
28
        // You should also add the coutry specific part of the google url, (like .br or .es)
29 13
        $this->url = "http://www.google.com/search?q=$searchTerm&num=100";
30 13
        $this->proxy = is_null($proxy) ? new NoProxy() : $proxy;
0 ignored issues
show
Documentation Bug introduced by
It seems like is_null($proxy) ? new \C...roxy\NoProxy() : $proxy of type object<CViniciusSDias\Go...wler\Proxy\GoogleProxy> is incompatible with the declared type object<GoogleProxy> of property $proxy.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
31 13
    }
32
33
    /**
34
     * Returns the 100 first found results for the specified search term
35
     *
36
     * @return ResultList
37
     */
38 13
    public function getResults(): ResultList
39
    {
40
        /** @var ResponseInterface $response */
41 13
        $response = $this->proxy->getHttpResponse($this->url);
42 13
        $stringResponse = (string) $response->getBody();
43 13
        $domCrawler = new DomCrawler($stringResponse);
44 13
        $googleResults = $domCrawler->filter('h3.r > a');
45 13
        $resultList = new ResultList($googleResults->count());
46
47 13
        foreach ($googleResults as $result) {
48 13
            $resultLink = new Link($result, 'http://google.com/');
49
            try {
50 13
                $googleResult = $this->parseResult($resultLink);
51 13
                $resultList->addResult($googleResult);
52
            } catch (InvalidResultException $invalidResult) {
53
                // TODO Maybe log this exception. Other than that, there's nothing to do, cause it isn't an error.
54
            }
55
        }
56
57 13
        return $resultList;
58
    }
59
60
    /**
61
     * If $resultLink is a valid link, this method assembles the Result and adds it to $googleResults
62
     *
63
     * @param Link $resultLink
64
     * @return Result
65
     */
66 13
    private function parseResult(Link $resultLink): Result
67
    {
68 13
        $googleResult = new Result();
69
        $googleResult
70 13
            ->setTitle($resultLink->getNode()->nodeValue)
71 13
            ->setUrl($this->getUrl($resultLink->getUri()));
72
73 13
        return $googleResult;
74
    }
75
76
    /**
77
     * Parses the URL using the parser provided by $proxy
78
     *
79
     * @param string $url
80
     * @return string
81
     */
82 13
    private function getUrl(string $url): string
83
    {
84 13
        return $this->proxy->parseUrl($url);
85
    }
86
}
87