Completed
Push — master ( 5fce14...dad0cb )
by Vinicius
03:43
created

Crawler::getResults()   B

Complexity

Conditions 3
Paths 4

Size

Total Lines 24
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 3.0026

Importance

Changes 0
Metric Value
dl 0
loc 24
ccs 14
cts 15
cp 0.9333
rs 8.9713
c 0
b 0
f 0
cc 3
eloc 16
nc 4
nop 0
crap 3.0026
1
<?php
2
namespace CViniciusSDias\GoogleCrawler;
3
4
use CViniciusSDias\GoogleCrawler\Exception\InvalidResultException;
5
use CViniciusSDias\GoogleCrawler\Proxy\{
6
    GoogleProxy, NoProxy
7
};
8
use Psr\Http\Message\ResponseInterface;
9
use Symfony\Component\DomCrawler\Crawler as DomCrawler;
10
use Symfony\Component\DomCrawler\Link;
11
use DOMElement;
12
13
/**
14
 * Google Crawler
15
 *
16
 * @package CViniciusSDias\GoogleCrawler
17
 * @author Vinicius Dias
18
 */
19
class Crawler
20
{
21
    /** @var string $url*/
22
    protected $url;
23
    /** @var GoogleProxy $proxy */
24
    protected $proxy;
25
26 13
    public function __construct(SearchTermInterface $searchTerm, GoogleProxy $proxy = null)
27
    {
28
        // You can concatenate &gl=XX replacing XX with your country code (BR = Brazil; US = United States)
29
        // You should also add the coutry specific part of the google url, (like .br or .es)
30 13
        $this->url = "http://www.google.com/search?q=$searchTerm&num=100";
31 13
        $this->proxy = is_null($proxy) ? new NoProxy() : $proxy;
0 ignored issues
show
Documentation Bug introduced by
It seems like is_null($proxy) ? new \C...roxy\NoProxy() : $proxy of type object<CViniciusSDias\Go...wler\Proxy\GoogleProxy> is incompatible with the declared type object<GoogleProxy> of property $proxy.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
32 13
    }
33
34
    /**
35
     * Returns the 100 first found results for the specified search term
36
     *
37
     * @return ResultList
38
     * @throws \GuzzleHttp\Exception\ServerException If the proxy was overused
39
     * @throws \GuzzleHttp\Exception\ConnectException If the proxy is unavailable
40
     */
41 13
    public function getResults(): ResultList
42
    {
43
        /** @var ResponseInterface $response */
44 13
        $response = $this->proxy->getHttpResponse($this->url);
45 13
        $stringResponse = (string) $response->getBody();
46 13
        $domCrawler = new DomCrawler($stringResponse);
47 13
        $googleResults = $domCrawler->filterXPath('//div[@class="g" and h3[@class="r" and a]]');
48 13
        $resultList = new ResultList($googleResults->count());
49
50 13
        foreach ($googleResults as $result) {
51 13
            $resultCrawler = new DomCrawler($result);
52 13
            $linkElement = $resultCrawler->filterXPath('//h3[@class="r"]/a')->getNode(0);
53 13
            $resultLink = new Link($linkElement, 'http://google.com/');
0 ignored issues
show
Bug introduced by
It seems like $linkElement defined by $resultCrawler->filterXP...ss="r"]/a')->getNode(0) on line 52 can be null; however, Symfony\Component\DomCra...iElement::__construct() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
54 13
            $descriptionElement = $resultCrawler->filterXPath('//span[@class="st"]')->getNode(0);
55
            try {
56 13
                $googleResult = $this->parseResult($resultLink, $descriptionElement);
0 ignored issues
show
Bug introduced by
It seems like $descriptionElement defined by $resultCrawler->filterXP...ass="st"]')->getNode(0) on line 54 can be null; however, CViniciusSDias\GoogleCra...\Crawler::parseResult() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
57 13
                $resultList->addResult($googleResult);
58
            } catch (InvalidResultException $invalidResult) {
59
                // TODO Maybe log this exception. Other than that, there's nothing to do, cause it isn't an error.
60
            }
61
        }
62
63 13
        return $resultList;
64
    }
65
66
    /**
67
     * If $resultLink is a valid link, this method assembles the Result and adds it to $googleResults
68
     *
69
     * @param Link $resultLink
70
     * @param DOMElement $descriptionElement
71
     * @return Result
72
     * @throws InvalidResultException
73
     */
74 13
    private function parseResult(Link $resultLink, DOMElement $descriptionElement): Result
75
    {
76 13
        $description = $descriptionElement->nodeValue
77 13
            ?? 'A description for this result isn\'t available due to the robots.txt file.';
78
79 13
        $googleResult = new Result();
80
        $googleResult
81 13
            ->setTitle($resultLink->getNode()->nodeValue)
82 13
            ->setUrl($this->getUrl($resultLink->getUri()))
83 13
            ->setDescription($description);
84
85 13
        return $googleResult;
86
    }
87
88
    /**
89
     * Parses the URL using the parser provided by $proxy
90
     *
91
     * @param string $url
92
     * @return string
93
     * @throws InvalidResultException
94
     */
95 13
    private function getUrl(string $url): string
96
    {
97 13
        return $this->proxy->parseUrl($url);
98
    }
99
}
100