RobinDev /
Google
| 1 | <?php |
||||
| 2 | |||||
| 3 | namespace rOpenDev\Qwant; |
||||
| 4 | |||||
| 5 | use rOpenDev\Google\CacheTrait; |
||||
| 6 | use rOpenDev\Google\ConfSearchTrait; |
||||
| 7 | use rOpenDev\Google\SleepTrait; |
||||
| 8 | |||||
| 9 | abstract class QwantSearch |
||||
| 10 | { |
||||
| 11 | use CacheTrait; |
||||
| 12 | use ConfSearchTrait; |
||||
| 13 | use SleepTrait; |
||||
| 14 | |||||
| 15 | // ======= |
||||
| 16 | // ------- |
||||
| 17 | // ======= |
||||
| 18 | |||||
| 19 | protected $offset = 0; |
||||
| 20 | |||||
| 21 | /** @var int Current page * */ |
||||
| 22 | protected $page = 1; |
||||
| 23 | |||||
| 24 | /** |
||||
| 25 | * @var string contain the current error |
||||
| 26 | */ |
||||
| 27 | protected $error; |
||||
| 28 | |||||
| 29 | protected $errors = [ |
||||
| 30 | 1 => 'Google Captcha', |
||||
| 31 | 2 => 'Google `We\'re sorry` (flagged as automated request)', |
||||
| 32 | 3 => 'Erreurs cURL', |
||||
| 33 | ]; |
||||
| 34 | |||||
| 35 | /** |
||||
| 36 | * Contient les erreurs provenant du cURL. |
||||
| 37 | */ |
||||
| 38 | public $cErrors; |
||||
| 39 | |||||
| 40 | public function generateGoogleSearchUrl() |
||||
| 41 | { |
||||
| 42 | $this->setParameter('q', $this->q); |
||||
| 43 | // ToSearchFromFranceInFrench, move it to config (todo) |
||||
| 44 | $defaultParameter = 'r=FR&sr=fr&l=fr_fr&h=0&s=1&a=1&b=1&vt=0&hc=1&smartNews=1&smartSocial=1&theme=0&i=1&donation=0&qoz=1&shb=1&shl=1'; |
||||
|
0 ignored issues
–
show
Unused Code
introduced
by
Loading history...
|
|||||
| 45 | //$url = 'https://www.qwant.com/search?'.$defaultParameter.'&q='.urlencode($this->q);//.$this->generateParameters(); |
||||
| 46 | $url = 'https://api.qwant.com/api/search/web?count=10&q='.urlencode($this->q).'&t=web&device=desktop&extensionDisabled=true&safesearch=1&locale=fr_FR&uiv=4'; |
||||
| 47 | |||||
| 48 | return $url; |
||||
| 49 | } |
||||
| 50 | |||||
| 51 | protected function generateParameters() |
||||
| 52 | { |
||||
| 53 | return http_build_query($this->parameters, '', '&'); |
||||
| 54 | } |
||||
| 55 | |||||
| 56 | /** |
||||
| 57 | * @return string|false Contenu html de la page |
||||
| 58 | */ |
||||
| 59 | abstract protected function requestGoogle(string $url); |
||||
| 60 | |||||
| 61 | /* |
||||
| 62 | * Am I Kicked By Google ? Did you reach the google limits ?! |
||||
| 63 | * |
||||
| 64 | * @param string $output Html source |
||||
| 65 | * |
||||
| 66 | * @return int|false |
||||
| 67 | */ |
||||
| 68 | protected function amIKickedByGoogleThePowerful($output) |
||||
|
0 ignored issues
–
show
The parameter
$output is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||
| 69 | { |
||||
| 70 | return false; |
||||
| 71 | } |
||||
| 72 | |||||
| 73 | /** |
||||
| 74 | * @return string explaining the error |
||||
| 75 | */ |
||||
| 76 | public function getError() |
||||
| 77 | { |
||||
| 78 | if (null !== $this->error) { |
||||
| 79 | return $this->errors[$this->error]; |
||||
| 80 | } |
||||
| 81 | |||||
| 82 | return false; |
||||
|
0 ignored issues
–
show
|
|||||
| 83 | } |
||||
| 84 | |||||
| 85 | /** |
||||
| 86 | * @return array|false containing the results with column type, link, title |
||||
| 87 | */ |
||||
| 88 | public function extractResults() |
||||
| 89 | { |
||||
| 90 | for ($this->page = 1; $this->page <= $this->nbrPage; ++$this->page) { |
||||
| 91 | if (! isset($url)) {// On génère l'url pour la première requète... Ensuite, on utilisera le lien Suivant. |
||||
| 92 | $url = $this->generateGoogleSearchUrl(); |
||||
| 93 | } |
||||
| 94 | |||||
| 95 | $output = $this->requestGoogle($url); |
||||
| 96 | if (false === $output) { |
||||
| 97 | return false; |
||||
| 98 | } |
||||
| 99 | |||||
| 100 | $extract = $this->extractResultsFromJson(json_decode($output, true)); |
||||
| 101 | //var_dump($extract); exit; |
||||
| 102 | $this->numberItemsJustExtracted = \count($extract); |
||||
|
0 ignored issues
–
show
|
|||||
| 103 | $this->result = array_merge($this->result, $extract); |
||||
| 104 | |||||
| 105 | //h3 > a[href] |
||||
| 106 | $nextPageLink = $this->getNextPageLink(); |
||||
| 107 | if ($this->nbrPage > 1 && $nextPageLink) { |
||||
| 108 | $url = $nextPageLink; |
||||
| 109 | } else { |
||||
| 110 | break; |
||||
| 111 | } |
||||
| 112 | } |
||||
| 113 | |||||
| 114 | return $this->result; |
||||
| 115 | } |
||||
| 116 | |||||
| 117 | public function extractResultsFromJson($json) |
||||
| 118 | { |
||||
| 119 | $results = []; |
||||
| 120 | |||||
| 121 | if (isset($json['data']['result']['items'])) { |
||||
| 122 | foreach ($json['data']['result']['items'] as $item) { |
||||
| 123 | $results[] = [ |
||||
| 124 | 'type' => 'organic', |
||||
| 125 | 'title' => strip_tags($item['title']), |
||||
| 126 | 'link' => $item['url'], |
||||
| 127 | ]; |
||||
| 128 | } |
||||
| 129 | } |
||||
| 130 | |||||
| 131 | return $results; |
||||
| 132 | } |
||||
| 133 | |||||
| 134 | public function getNextPageLink() |
||||
| 135 | { |
||||
| 136 | if ($this->offset > 90 || $this->numberItemsJustExtracted < 10) { |
||||
| 137 | return false; |
||||
| 138 | } |
||||
| 139 | |||||
| 140 | $this->offset = $this->offset + 10; |
||||
| 141 | |||||
| 142 | return $this->generateGoogleSearchUrl().'&offset='.$this->offset; |
||||
| 143 | } |
||||
| 144 | } |
||||
| 145 |