AOEpeople /
crawler
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace AOE\Crawler\CrawlStrategy; |
||
| 6 | |||
| 7 | /* |
||
| 8 | * (c) 2020 AOE GmbH <[email protected]> |
||
| 9 | * |
||
| 10 | * This file is part of the TYPO3 Crawler Extension. |
||
| 11 | * |
||
| 12 | * It is free software; you can redistribute it and/or modify it under |
||
| 13 | * the terms of the GNU General Public License, either version 2 |
||
| 14 | * of the License, or any later version. |
||
| 15 | * |
||
| 16 | * For the full copyright and license information, please read the |
||
| 17 | * LICENSE.txt file that was distributed with this source code. |
||
| 18 | * |
||
| 19 | * The TYPO3 project - inspiring people to share! |
||
| 20 | */ |
||
| 21 | |||
| 22 | use GuzzleHttp\Exception\RequestException; |
||
| 23 | use Psr\Http\Message\ResponseInterface; |
||
| 24 | use Psr\Http\Message\UriInterface; |
||
| 25 | use Psr\Log\LoggerAwareInterface; |
||
| 26 | use Psr\Log\LoggerAwareTrait; |
||
| 27 | use TYPO3\CMS\Core\Http\RequestFactory; |
||
| 28 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
| 29 | |||
| 30 | /** |
||
| 31 | * Calls Guzzle / CURL (based on TYPO3 settings) for fetching a URL. |
||
| 32 | */ |
||
| 33 | class GuzzleExecutionStrategy implements LoggerAwareInterface, CrawlStrategy |
||
| 34 | { |
||
| 35 | use LoggerAwareTrait; |
||
| 36 | |||
| 37 | /** |
||
| 38 | * Sets up a CURL / Guzzle Request for fetching the request. |
||
| 39 | * |
||
| 40 | * @return bool|mixed |
||
| 41 | */ |
||
| 42 | 1 | public function fetchUrlContents(UriInterface $url, string $crawlerId) |
|
| 43 | { |
||
| 44 | 1 | $reqHeaders = $this->buildRequestHeaders($crawlerId); |
|
| 45 | |||
| 46 | 1 | $options = ['headers' => $reqHeaders]; |
|
| 47 | 1 | if ($url->getUserInfo()) { |
|
| 48 | $options['auth'] = explode(':', $url->getUserInfo()); |
||
| 49 | } |
||
| 50 | try { |
||
| 51 | 1 | $url = (string) $url; |
|
| 52 | 1 | $response = $this->getResponse($url, $options); |
|
| 53 | 1 | $contents = $response->getBody()->getContents(); |
|
| 54 | 1 | return unserialize($contents); |
|
| 55 | } catch (RequestException $e) { |
||
| 56 | $response = $e->getResponse(); |
||
| 57 | $message = ($response ? $response->getStatusCode() : 0) |
||
| 58 | . chr(32) |
||
| 59 | . ($response ? $response->getReasonPhrase() : $e->getMessage()); |
||
| 60 | |||
| 61 | $this->logger->debug( |
||
|
0 ignored issues
–
show
|
|||
| 62 | sprintf('Error while opening "%s" - ' . $message, $url), |
||
| 63 | ['crawlerId' => $crawlerId] |
||
| 64 | ); |
||
| 65 | return $message; |
||
| 66 | } |
||
| 67 | } |
||
| 68 | |||
| 69 | protected function getResponse(string $url, array $options): ResponseInterface |
||
| 70 | { |
||
| 71 | return GeneralUtility::makeInstance(RequestFactory::class) |
||
| 72 | ->request( |
||
| 73 | $url, |
||
| 74 | 'GET', |
||
| 75 | $options |
||
| 76 | ); |
||
| 77 | } |
||
| 78 | |||
| 79 | /** |
||
| 80 | * Builds HTTP request headers. |
||
| 81 | */ |
||
| 82 | 1 | private function buildRequestHeaders(string $crawlerId): array |
|
| 83 | { |
||
| 84 | return [ |
||
| 85 | 1 | 'Connection' => 'close', |
|
| 86 | 1 | 'X-T3Crawler' => $crawlerId, |
|
| 87 | 1 | 'User-Agent' => 'TYPO3 crawler', |
|
| 88 | ]; |
||
| 89 | } |
||
| 90 | } |
||
| 91 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.