Ne-Lexa /
google-play-scraper
| 1 | <?php |
||||||
| 2 | |||||||
| 3 | declare(strict_types=1); |
||||||
| 4 | |||||||
| 5 | /* |
||||||
| 6 | * Copyright (c) Ne-Lexa |
||||||
| 7 | * |
||||||
| 8 | * For the full copyright and license information, please view |
||||||
| 9 | * the LICENSE file that was distributed with this source code. |
||||||
| 10 | * |
||||||
| 11 | * @see https://github.com/Ne-Lexa/google-play-scraper |
||||||
| 12 | */ |
||||||
| 13 | |||||||
| 14 | namespace Nelexa\GPlay\HttpClient; |
||||||
| 15 | |||||||
| 16 | use GuzzleHttp\Client as GuzzleClient; |
||||||
| 17 | use GuzzleHttp\Exception\ConnectException; |
||||||
| 18 | use GuzzleHttp\Exception\TransferException; |
||||||
| 19 | use GuzzleHttp\HandlerStack; |
||||||
| 20 | use GuzzleHttp\MessageFormatter; |
||||||
| 21 | use GuzzleHttp\Middleware; |
||||||
| 22 | use GuzzleHttp\Pool; |
||||||
| 23 | use GuzzleHttp\Promise\FulfilledPromise; |
||||||
| 24 | use GuzzleHttp\Promise\PromiseInterface; |
||||||
| 25 | use GuzzleHttp\RequestOptions; |
||||||
| 26 | use Psr\Http\Message\RequestInterface; |
||||||
| 27 | use Psr\Http\Message\ResponseInterface; |
||||||
| 28 | use Psr\SimpleCache\CacheInterface; |
||||||
| 29 | use Psr\SimpleCache\InvalidArgumentException; |
||||||
| 30 | |||||||
| 31 | class HttpClient |
||||||
| 32 | { |
||||||
| 33 | public const DEFAULT_CONCURRENCY = 4; |
||||||
| 34 | |||||||
| 35 | /** @var \Psr\SimpleCache\CacheInterface|null */ |
||||||
| 36 | private $cache; |
||||||
| 37 | |||||||
| 38 | /** @var \GuzzleHttp\Client */ |
||||||
| 39 | private $client; |
||||||
| 40 | |||||||
| 41 | /** @var array */ |
||||||
| 42 | private $options = []; |
||||||
| 43 | |||||||
| 44 | 50 | public function __construct(?GuzzleClient $client = null, ?CacheInterface $cache = null) |
|||||
| 45 | { |
||||||
| 46 | 2 | if ($client === null) { |
|||||
| 47 | 2 | $proxy = getenv('HTTP_PROXY'); |
|||||
| 48 | |||||||
| 49 | 2 | $defaultOptions = [ |
|||||
| 50 | 2 | RequestOptions::HEADERS => [ |
|||||
| 51 | 2 | 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.7390.123 Safari/537.36', |
|||||
| 52 | 2 | 'Referer' => 'https://play.google.com/', |
|||||
| 53 | 2 | ], |
|||||
| 54 | 2 | ]; |
|||||
| 55 | |||||||
| 56 | 2 | if ($proxy !== false) { |
|||||
| 57 | $defaultOptions[RequestOptions::PROXY] = $proxy; |
||||||
| 58 | } |
||||||
| 59 | |||||||
| 60 | 2 | $stack = HandlerStack::create(); |
|||||
| 61 | 2 | if (\PHP_SAPI === 'cli') { |
|||||
| 62 | 2 | $logTemplate = $config['logTemplate'] |
|||||
|
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
Loading history...
|
|||||||
| 63 | 2 | ?? '🌎 [{ts}] "{method} {url} HTTP/{version}" {code} "{phrase}" - {res_header_Content-Length}'; |
|||||
| 64 | 2 | $stack->push(Middleware::log(new ConsoleLog(), new MessageFormatter($logTemplate)), 'logger'); |
|||||
| 65 | } |
||||||
| 66 | 2 | $stack->push( |
|||||
| 67 | 2 | Middleware::retry( |
|||||
| 68 | 2 | static function ( |
|||||
| 69 | 2 | int $retries, |
|||||
| 70 | 2 | RequestInterface $request, |
|||||
| 71 | 2 | ?ResponseInterface $response = null, |
|||||
| 72 | 2 | ?TransferException $exception = null |
|||||
| 73 | 2 | ) { |
|||||
| 74 | 50 | return $retries < 3 && ( |
|||||
| 75 | 50 | $exception instanceof ConnectException |
|||||
| 76 | 50 | || ( |
|||||
| 77 | 50 | $response !== null |
|||||
| 78 | 50 | && \in_array($response->getStatusCode(), [408, 429, 500, 502, 503, 522], true) |
|||||
| 79 | 50 | ) |
|||||
| 80 | 50 | ); |
|||||
| 81 | 2 | }, |
|||||
| 82 | 2 | static function (int $retries) { |
|||||
| 83 | return 2 ** $retries * 1000; |
||||||
| 84 | 2 | } |
|||||
| 85 | 2 | ), |
|||||
| 86 | 2 | 'retry' |
|||||
| 87 | 2 | ); |
|||||
| 88 | 2 | $defaultOptions['handler'] = $stack; |
|||||
| 89 | |||||||
| 90 | 2 | $client = new GuzzleClient($defaultOptions); |
|||||
| 91 | } |
||||||
| 92 | |||||||
| 93 | 2 | $this->client = $client; |
|||||
| 94 | 2 | $this->cache = $cache; |
|||||
| 95 | } |
||||||
| 96 | |||||||
| 97 | /** |
||||||
| 98 | * @return \Psr\SimpleCache\CacheInterface|null |
||||||
| 99 | */ |
||||||
| 100 | public function getCache(): ?CacheInterface |
||||||
| 101 | { |
||||||
| 102 | return $this->cache; |
||||||
| 103 | } |
||||||
| 104 | |||||||
| 105 | /** |
||||||
| 106 | * @return \GuzzleHttp\Client |
||||||
| 107 | */ |
||||||
| 108 | 3 | public function getClient(): GuzzleClient |
|||||
| 109 | { |
||||||
| 110 | 3 | return $this->client; |
|||||
| 111 | } |
||||||
| 112 | |||||||
| 113 | /** |
||||||
| 114 | * @param \Nelexa\GPlay\HttpClient\Request $request |
||||||
| 115 | * @param \Closure|null $onRejected |
||||||
| 116 | * |
||||||
| 117 | * @return mixed |
||||||
| 118 | */ |
||||||
| 119 | 35 | public function request(Request $request, ?\Closure $onRejected = null) |
|||||
| 120 | { |
||||||
| 121 | 35 | $promise = $this->getRequestPromise($request); |
|||||
| 122 | 35 | $promise->otherwise( |
|||||
| 123 | 35 | $onRejected ?? static function (\Throwable $throwable) { |
|||||
| 124 | 1 | return $throwable; |
|||||
| 125 | 35 | } |
|||||
| 126 | 35 | ); |
|||||
| 127 | |||||||
| 128 | 35 | return $promise->wait(); |
|||||
| 129 | } |
||||||
| 130 | |||||||
| 131 | /** |
||||||
| 132 | * @param \Nelexa\GPlay\HttpClient\Request $request |
||||||
| 133 | * |
||||||
| 134 | * @return \GuzzleHttp\Promise\PromiseInterface |
||||||
| 135 | * |
||||||
| 136 | * @internal |
||||||
| 137 | */ |
||||||
| 138 | 48 | public function getRequestPromise(Request $request): PromiseInterface |
|||||
| 139 | { |
||||||
| 140 | 48 | $options = array_merge($this->options, $request->getOptions()); |
|||||
| 141 | 48 | $cacheKey = null; |
|||||
| 142 | |||||||
| 143 | if ( |
||||||
| 144 | 48 | $this->cache !== null |
|||||
| 145 | 48 | && !\array_key_exists('no_cache', $options) |
|||||
| 146 | 48 | && \array_key_exists('cache_ttl', $options) |
|||||
| 147 | ) { |
||||||
| 148 | $cacheKey = $options['cache_key'] ?? sprintf( |
||||||
| 149 | 'http_client_gplay.v1.%s.%s', |
||||||
| 150 | HashUtil::hashCallable($request->getParseHandler()), |
||||||
| 151 | HashUtil::getRequestHash($request->getPsrRequest()) |
||||||
| 152 | ); |
||||||
| 153 | try { |
||||||
| 154 | $cachedValue = $this->cache->get($cacheKey); |
||||||
| 155 | } catch (InvalidArgumentException $e) { |
||||||
| 156 | throw new \RuntimeException('Error fetch cache'); |
||||||
| 157 | } |
||||||
| 158 | |||||||
| 159 | if ($cachedValue !== null) { |
||||||
| 160 | return new FulfilledPromise($cachedValue); |
||||||
| 161 | } |
||||||
| 162 | } |
||||||
| 163 | |||||||
| 164 | 48 | return $this->client |
|||||
| 165 | 48 | ->sendAsync($request->getPsrRequest(), $request->getOptions()) |
|||||
| 166 | 48 | ->then(function (ResponseInterface $response) use ($request, $cacheKey, $options) { |
|||||
| 167 | 46 | $parseResult = $request->getParseHandler()($request->getPsrRequest(), $response, $options); |
|||||
| 168 | 46 | if ($cacheKey !== null && $parseResult !== null) { |
|||||
| 169 | $this->cache->set($cacheKey, $parseResult, $options['cache_ttl']); |
||||||
|
0 ignored issues
–
show
The method
set() does not exist on null.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed. Loading history...
|
|||||||
| 170 | } |
||||||
| 171 | |||||||
| 172 | 46 | return $parseResult; |
|||||
| 173 | 48 | }) |
|||||
| 174 | 48 | ; |
|||||
| 175 | } |
||||||
| 176 | |||||||
| 177 | /** |
||||||
| 178 | * @param array<Request> $requests |
||||||
| 179 | * @param \Closure|null $onRejected |
||||||
| 180 | * |
||||||
| 181 | * @return array |
||||||
| 182 | */ |
||||||
| 183 | 13 | public function requestPool(array $requests, ?\Closure $onRejected = null): array |
|||||
| 184 | { |
||||||
| 185 | 13 | $makeRequests = function () use ($requests): \Generator { |
|||||
| 186 | 13 | foreach ($requests as $key => $request) { |
|||||
| 187 | 13 | yield $key => function () use ($request): PromiseInterface { |
|||||
| 188 | 13 | return $this->getRequestPromise($request); |
|||||
| 189 | 13 | }; |
|||||
| 190 | } |
||||||
| 191 | 13 | }; |
|||||
| 192 | |||||||
| 193 | 13 | $results = []; |
|||||
| 194 | 13 | $pool = new Pool($this->client, $makeRequests(), [ |
|||||
| 195 | 13 | 'concurrency' => $options['concurrency'] ?? self::DEFAULT_CONCURRENCY, |
|||||
|
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||||||
| 196 | 13 | 'fulfilled' => static function ($result, $key) use (&$results): void { |
|||||
| 197 | 12 | $results[$key] = $result; |
|||||
| 198 | 13 | }, |
|||||
| 199 | 13 | 'rejected' => $onRejected ?? static function (\Throwable $throwable, $key): void { |
|||||
|
0 ignored issues
–
show
The parameter
$key is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||||
| 200 | 2 | throw $throwable; |
|||||
| 201 | 13 | }, |
|||||
| 202 | 13 | ]); |
|||||
| 203 | |||||||
| 204 | 13 | $pool->promise()->wait(); |
|||||
| 205 | |||||||
| 206 | 11 | return $results; |
|||||
| 207 | } |
||||||
| 208 | |||||||
| 209 | /** |
||||||
| 210 | * @param \Psr\SimpleCache\CacheInterface|null $cache |
||||||
| 211 | * |
||||||
| 212 | * @return HttpClient |
||||||
| 213 | */ |
||||||
| 214 | public function setCache(?CacheInterface $cache): self |
||||||
| 215 | { |
||||||
| 216 | $this->cache = $cache; |
||||||
| 217 | |||||||
| 218 | return $this; |
||||||
| 219 | } |
||||||
| 220 | |||||||
| 221 | /** |
||||||
| 222 | * @param \GuzzleHttp\Client $client |
||||||
| 223 | * |
||||||
| 224 | * @return HttpClient |
||||||
| 225 | */ |
||||||
| 226 | public function setClient(GuzzleClient $client): self |
||||||
| 227 | { |
||||||
| 228 | $this->client = $client; |
||||||
| 229 | |||||||
| 230 | return $this; |
||||||
| 231 | } |
||||||
| 232 | |||||||
| 233 | public function setOption(string $key, $value): self |
||||||
| 234 | { |
||||||
| 235 | $this->options[$key] = $value; |
||||||
| 236 | |||||||
| 237 | return $this; |
||||||
| 238 | } |
||||||
| 239 | |||||||
| 240 | 3 | public function setConcurrency(int $concurrency): self |
|||||
| 241 | { |
||||||
| 242 | 3 | $this->options['concurrency'] = max(1, $concurrency); |
|||||
| 243 | |||||||
| 244 | 3 | return $this; |
|||||
| 245 | } |
||||||
| 246 | |||||||
| 247 | 2 | public function getConcurrency(): int |
|||||
| 248 | { |
||||||
| 249 | 2 | return $this->options['concurrency'] ?? self::DEFAULT_CONCURRENCY; |
|||||
| 250 | } |
||||||
| 251 | |||||||
| 252 | public function setConnectTimeout(float $connectTimeout): self |
||||||
| 253 | { |
||||||
| 254 | $this->options[RequestOptions::CONNECT_TIMEOUT] = max(0, $connectTimeout); |
||||||
| 255 | |||||||
| 256 | return $this; |
||||||
| 257 | } |
||||||
| 258 | |||||||
| 259 | public function setTimeout(float $timeout): self |
||||||
| 260 | { |
||||||
| 261 | $this->options[RequestOptions::TIMEOUT] = max(0, $timeout); |
||||||
| 262 | |||||||
| 263 | return $this; |
||||||
| 264 | } |
||||||
| 265 | } |
||||||
| 266 |