Test Failed
Push — master ( 138499...684014 )
by Dev
02:21
created

Search   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 184
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 24
eloc 65
dl 0
loc 184
rs 10
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
A getSelector() 0 7 2
A getNbrResults() 0 13 3
A amIKickedByGoogleThePowerful() 0 17 3
A extractResults() 0 26 6
A generateGoogleSearchUrl() 0 5 1
A normalizeTextFromGoogle() 0 3 1
A exctractOrganicResults() 0 15 3
A getUrlFromGoogleSerpFromat() 0 6 2
A generateParameters() 0 3 1
A getError() 0 7 2
1
<?php
2
3
namespace rOpenDev\Google;
4
5
use rOpenDev\curl\CurlRequest;
6
7
abstract class Search
8
{
9
    use CacheTrait, ConfSearchTrait, SleepTrait;
10
11
    // =======
12
    // -------
13
    // =======
14
15
    /**
16
     * @var array from \rOpenDev\Google\ResultTypes.php
17
     */
18
    protected $types;
19
20
    /** @var int Current page * */
21
    protected $page = 1;
22
23
    /**
24
     * @var object dom
25
     */
26
    protected $html;
27
28
    /**
29
     * @var string contain the current error
30
     */
31
    protected $error;
32
33
    protected $errors = [
34
        1 => 'Google Captcha',
35
        2 => 'Google `We\'re sorry` (flagged as automated request)',
36
        3 => 'Erreurs cURL',
37
    ];
38
39
    /**
40
     * Contient les erreurs provenant du cURL.
41
     */
42
    public $cErrors;
43
44
    public function generateGoogleSearchUrl()
45
    {
46
        $this->setParameter('q', $this->q);
47
        $url = 'https://www.google.'.$this->tld.'/search?'.$this->generateParameters(); //$url = 'https://www.google.'.$this->tld.'/search?q='.self::encodeLikeGoogle($kw)
48
        return $url;
49
    }
50
51
    protected function generateParameters()
52
    {
53
        return http_build_query($this->parameters, '', '&');
54
    }
55
56
    /**
57
     * @return string|false Contenu html de la page
58
     */
59
    abstract protected function requestGoogle(string $url);
60
61
    /*
62
     * Am I Kicked By Google ? Did you reach the google limits ?!
63
     *
64
     * @param string $output Html source
65
     *
66
     * @return int|false
67
     */
68
    protected function amIKickedByGoogleThePowerful($output)
69
    {
70
        /* Google respond :
71
         * We're sorry...... but your computer or network may be sending automated queries.
72
         * To protect our users, we can't process your request right now.'
73
         */
74
        if (false !== strpos($output, '<title>Sorry...</title>')) {
75
            return 2;
76
        }
77
78
        /* Captcha Google */
79
        elseif (false !== strpos($output, 'e=document.getElementById(\'captcha\');if(e){e.focus();}')) {
80
            return 1;
81
        }
82
83
        /* RAS */
84
        return false;
85
    }
86
87
    /**
88
     * @return string explaining the error
89
     */
90
    public function getError()
91
    {
92
        if (null !== $this->error) {
93
            return $this->errors[$this->error];
94
        }
95
96
        return false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return false returns the type false which is incompatible with the documented return type string.
Loading history...
97
    }
98
99
    /**
100
     * @return array|false containing the results with column type, link, title, page, page_pos
101
     */
102
    public function extractResults()
103
    {
104
        for ($this->page = 1; $this->page <= $this->nbrPage; ++$this->page) {
105
            if (!isset($url)) {// On génère l'url pour la première requète... Ensuite, on utilisera le lien Suivant.
106
                $url = $this->generateGoogleSearchUrl();
107
            }
108
109
            $output = $this->requestGoogle($url);
110
            if (false === $output) {
111
                return false;
112
            }
113
114
            $this->html = new \simple_html_dom();
115
            $this->html->load($output);
116
117
            $this->exctractOrganicResults();
118
            // todo : extract other results
119
120
            if ($this->nbrPage > 1 && isset($this->html->find('#pnnext')[0])) { // #pnnext // td.b a
121
                $url = 'https://www.google.'.$this->tld.str_replace('&amp;', '&', $this->html->find('#pnnext')[0]->href);
122
            } else {
123
                break;
124
            }
125
        }
126
127
        return $this->result;
128
    }
129
130
    protected function exctractOrganicResults()
131
    {
132
        $results = $this->html->find(str_replace(';', ', ',$this->getSelector('organic')));
133
        $page_pos = 1;
134
        foreach ($results as $r) {
135
            $title = $r->find('h3, [role=heading]', 0);
136
            if ($title) {
137
                $this->result[] = [
138
                    'type' => 'organic',
139
                    'title' => $this->normalizeTextFromGoogle($title->innertext),
140
                    'link' => $this->getUrlFromGoogleSerpFromat($r->find('a', 0)->href),
141
                    'page' => $this->page,
142
                    'page_pos' => $page_pos,
143
                ];
144
                ++$page_pos;
145
            }
146
        }
147
    }
148
149
    protected function getSelector(string $type)
150
    {
151
        if (null === $this->types) {
152
            $this->types = ResultsTypes::get();
153
        }
154
155
        return $this->types[$type];
156
    }
157
158
    /**
159
     * getNbrResults va chercher le nombre de résulats que Google affiche proposer.
160
     *
161
     * @return int
162
     */
163
    public function getNbrResults()
164
    {
165
        $url = $this->generateGoogleSearchUrl();
166
        $output = $this->requestGoogle($url);
167
        if (false !== $output) {
168
            $html = new \simple_html_dom();
169
            $html->load($output);
170
171
            $rS = $html->find('#resultStats');
172
            if (isset($rS[0]->plaintext)) {
173
                $s = (string) $this->normalizeTextFromGoogle($rS[0]->plaintext);
174
175
                return intval(preg_replace('/[^0-9]/', '', $s));
176
            }
177
        }
178
    }
179
180
    protected static function getUrlFromGoogleSerpFromat($str)
181
    {
182
        preg_match('/\/url\?.*(q|url)=(http.+)&/SiU', $str, $m1);
183
        $str = isset($m1[2]) ? $m1[2] : $str;
184
185
        return $str;
186
    }
187
188
    protected function normalizeTextFromGoogle($text)
189
    {
190
        return htmlspecialchars_decode(html_entity_decode(strip_tags($text)));
191
    }
192
}
193