1
|
|
|
<?php |
2
|
|
|
namespace CViniciusSDias\GoogleCrawler\Proxy; |
3
|
|
|
use CViniciusSDias\GoogleCrawler\Exception\InvalidResultException; |
4
|
|
|
use GuzzleHttp\Client; |
5
|
|
|
use Psr\Http\Message\ResponseInterface; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* Class that for using the kproxy.com servers |
9
|
|
|
* |
10
|
|
|
* @package CViniciusSDias\GoogleCrawler\Proxy |
11
|
|
|
* @author Vinicius Dias |
12
|
|
|
*/ |
13
|
|
|
class KProxy implements GoogleProxy |
14
|
|
|
{ |
15
|
|
|
/** @var string $endpoint */ |
16
|
|
|
protected $endpoint; |
17
|
|
|
/** @var int $serverNumber */ |
18
|
|
|
protected $serverNumber; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* Constructor that initializes the proxy service in one of its servers, which go from 1 to 9 |
22
|
|
|
* |
23
|
|
|
* @param int $serverNumber |
24
|
|
|
*/ |
25
|
13 |
|
public function __construct(int $serverNumber) |
26
|
|
|
{ |
27
|
13 |
|
if ($serverNumber > 9 || $serverNumber < 1) { |
28
|
2 |
|
throw new \InvalidArgumentException(); |
29
|
|
|
} |
30
|
11 |
|
$this->serverNumber = $serverNumber; |
31
|
11 |
|
$this->endpoint = "https://server{$serverNumber}.kproxy.com"; |
32
|
11 |
|
} |
33
|
|
|
|
34
|
|
|
/** {@inheritdoc} */ |
35
|
9 |
|
public function getHttpResponse(string $url): ResponseInterface |
36
|
|
|
{ |
37
|
9 |
|
$httpClient = new Client(['cookies' => true]); |
38
|
9 |
|
$this->accessMainPage($httpClient); |
39
|
9 |
|
$this->sendRequestToProxy($httpClient, $url); |
40
|
|
|
|
41
|
9 |
|
$parsedUrl = parse_url($url); |
42
|
9 |
|
$queryString = $parsedUrl['query']; |
43
|
9 |
|
$actualUrl = "{$this->endpoint}/servlet/redirect.srv/swh/suxm/sqyudex/spqr/p1/search?{$queryString}"; |
44
|
|
|
|
45
|
9 |
|
return $httpClient->request('GET', $actualUrl); |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Accesses the main page of the kproxy.com server. This is mandatory. |
50
|
|
|
* |
51
|
|
|
* @param Client $httpClient |
52
|
|
|
*/ |
53
|
9 |
|
private function accessMainPage(Client $httpClient): void |
54
|
|
|
{ |
55
|
9 |
|
$httpClient->request('GET', "{$this->endpoint}/index.jsp"); |
56
|
9 |
|
} |
57
|
|
|
|
58
|
|
|
/** {@inheritdoc} */ |
59
|
10 |
View Code Duplication |
public function parseUrl(string $url): string |
|
|
|
|
60
|
|
|
{ |
61
|
10 |
|
$parsedUrl = parse_url($url); |
62
|
10 |
|
parse_str($parsedUrl['query'], $link); |
63
|
|
|
|
64
|
10 |
|
$url = filter_var($link['q'], FILTER_VALIDATE_URL); |
65
|
|
|
// If this is not a valid URL, so the result is (probably) an image, news or video suggestion |
66
|
10 |
|
if (!$url) { |
67
|
1 |
|
throw new InvalidResultException(); |
68
|
|
|
} |
69
|
|
|
|
70
|
9 |
|
return $url; |
71
|
|
|
} |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* Sends the request to the proxy service that saves the info in session. After this we can redirect |
75
|
|
|
* the user to the search results |
76
|
|
|
* |
77
|
|
|
* @param Client $httpClient |
78
|
|
|
* @param string $url |
79
|
|
|
*/ |
80
|
9 |
|
private function sendRequestToProxy(Client $httpClient, string $url): void |
81
|
|
|
{ |
82
|
9 |
|
$encodedUrl = urlencode($url); |
83
|
9 |
|
$postData = ['page' => $encodedUrl, 'x' => 0, 'y' => 0]; |
84
|
|
|
$headers = [ |
85
|
|
|
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' |
86
|
9 |
|
]; |
87
|
9 |
|
$httpClient->request( |
88
|
9 |
|
'POST', |
89
|
9 |
|
"{$this->endpoint}/doproxy.jsp", |
90
|
9 |
|
['form_params' => $postData, 'headers' => $headers] |
91
|
|
|
); |
92
|
9 |
|
} |
93
|
|
|
} |
94
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.