1 | <?php |
||||
2 | |||||
3 | namespace rOpenDev\Qwant; |
||||
4 | |||||
5 | use rOpenDev\Google\CacheTrait; |
||||
6 | use rOpenDev\Google\ConfSearchTrait; |
||||
7 | use rOpenDev\Google\SleepTrait; |
||||
8 | |||||
9 | abstract class QwantSearch |
||||
10 | { |
||||
11 | use CacheTrait; |
||||
12 | use ConfSearchTrait; |
||||
13 | use SleepTrait; |
||||
14 | |||||
15 | // ======= |
||||
16 | // ------- |
||||
17 | // ======= |
||||
18 | |||||
19 | protected $offset = 0; |
||||
20 | |||||
21 | /** @var int Current page * */ |
||||
22 | protected $page = 1; |
||||
23 | |||||
24 | /** |
||||
25 | * @var string contain the current error |
||||
26 | */ |
||||
27 | protected $error; |
||||
28 | |||||
29 | protected $errors = [ |
||||
30 | 1 => 'Google Captcha', |
||||
31 | 2 => 'Google `We\'re sorry` (flagged as automated request)', |
||||
32 | 3 => 'Erreurs cURL', |
||||
33 | ]; |
||||
34 | |||||
35 | /** |
||||
36 | * Contient les erreurs provenant du cURL. |
||||
37 | */ |
||||
38 | public $cErrors; |
||||
39 | |||||
40 | public function generateGoogleSearchUrl() |
||||
41 | { |
||||
42 | $this->setParameter('q', $this->q); |
||||
43 | // ToSearchFromFranceInFrench, move it to config (todo) |
||||
44 | $defaultParameter = 'r=FR&sr=fr&l=fr_fr&h=0&s=1&a=1&b=1&vt=0&hc=1&smartNews=1&smartSocial=1&theme=0&i=1&donation=0&qoz=1&shb=1&shl=1'; |
||||
0 ignored issues
–
show
Unused Code
introduced
by
![]() |
|||||
45 | //$url = 'https://www.qwant.com/search?'.$defaultParameter.'&q='.urlencode($this->q);//.$this->generateParameters(); |
||||
46 | $url = 'https://api.qwant.com/api/search/web?count=10&q='.urlencode($this->q).'&t=web&device=desktop&extensionDisabled=true&safesearch=1&locale=fr_FR&uiv=4'; |
||||
47 | |||||
48 | return $url; |
||||
49 | } |
||||
50 | |||||
51 | protected function generateParameters() |
||||
52 | { |
||||
53 | return http_build_query($this->parameters, '', '&'); |
||||
54 | } |
||||
55 | |||||
56 | /** |
||||
57 | * @return string|false Contenu html de la page |
||||
58 | */ |
||||
59 | abstract protected function requestGoogle(string $url); |
||||
60 | |||||
61 | /* |
||||
62 | * Am I Kicked By Google ? Did you reach the google limits ?! |
||||
63 | * |
||||
64 | * @param string $output Html source |
||||
65 | * |
||||
66 | * @return int|false |
||||
67 | */ |
||||
68 | protected function amIKickedByGoogleThePowerful($output) |
||||
0 ignored issues
–
show
The parameter
$output is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||
69 | { |
||||
70 | return false; |
||||
71 | } |
||||
72 | |||||
73 | /** |
||||
74 | * @return string explaining the error |
||||
75 | */ |
||||
76 | public function getError() |
||||
77 | { |
||||
78 | if (null !== $this->error) { |
||||
79 | return $this->errors[$this->error]; |
||||
80 | } |
||||
81 | |||||
82 | return false; |
||||
0 ignored issues
–
show
|
|||||
83 | } |
||||
84 | |||||
85 | /** |
||||
86 | * @return array|false containing the results with column type, link, title |
||||
87 | */ |
||||
88 | public function extractResults() |
||||
89 | { |
||||
90 | for ($this->page = 1; $this->page <= $this->nbrPage; ++$this->page) { |
||||
91 | if (! isset($url)) {// On génère l'url pour la première requète... Ensuite, on utilisera le lien Suivant. |
||||
92 | $url = $this->generateGoogleSearchUrl(); |
||||
93 | } |
||||
94 | |||||
95 | $output = $this->requestGoogle($url); |
||||
96 | if (false === $output) { |
||||
97 | return false; |
||||
98 | } |
||||
99 | |||||
100 | $extract = $this->extractResultsFromJson(json_decode($output, true)); |
||||
101 | //var_dump($extract); exit; |
||||
102 | $this->numberItemsJustExtracted = \count($extract); |
||||
0 ignored issues
–
show
|
|||||
103 | $this->result = array_merge($this->result, $extract); |
||||
104 | |||||
105 | //h3 > a[href] |
||||
106 | $nextPageLink = $this->getNextPageLink(); |
||||
107 | if ($this->nbrPage > 1 && $nextPageLink) { |
||||
108 | $url = $nextPageLink; |
||||
109 | } else { |
||||
110 | break; |
||||
111 | } |
||||
112 | } |
||||
113 | |||||
114 | return $this->result; |
||||
115 | } |
||||
116 | |||||
117 | public function extractResultsFromJson($json) |
||||
118 | { |
||||
119 | $results = []; |
||||
120 | |||||
121 | if (isset($json['data']['result']['items'])) { |
||||
122 | foreach ($json['data']['result']['items'] as $item) { |
||||
123 | $results[] = [ |
||||
124 | 'type' => 'organic', |
||||
125 | 'title' => strip_tags($item['title']), |
||||
126 | 'link' => $item['url'], |
||||
127 | ]; |
||||
128 | } |
||||
129 | } |
||||
130 | |||||
131 | return $results; |
||||
132 | } |
||||
133 | |||||
134 | public function getNextPageLink() |
||||
135 | { |
||||
136 | if ($this->offset > 90 || $this->numberItemsJustExtracted < 10) { |
||||
137 | return false; |
||||
138 | } |
||||
139 | |||||
140 | $this->offset = $this->offset + 10; |
||||
141 | |||||
142 | return $this->generateGoogleSearchUrl().'&offset='.$this->offset; |
||||
143 | } |
||||
144 | } |
||||
145 |