AOEpeople /
crawler
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace AOE\Crawler; |
||
| 6 | |||
| 7 | /* |
||
| 8 | * (c) 2020 AOE GmbH <[email protected]> |
||
| 9 | * |
||
| 10 | * This file is part of the TYPO3 Crawler Extension. |
||
| 11 | * |
||
| 12 | * It is free software; you can redistribute it and/or modify it under |
||
| 13 | * the terms of the GNU General Public License, either version 2 |
||
| 14 | * of the License, or any later version. |
||
| 15 | * |
||
| 16 | * For the full copyright and license information, please read the |
||
| 17 | * LICENSE.txt file that was distributed with this source code. |
||
| 18 | * |
||
| 19 | * The TYPO3 project - inspiring people to share! |
||
| 20 | */ |
||
| 21 | |||
| 22 | use AOE\Crawler\Controller\CrawlerController; |
||
| 23 | use AOE\Crawler\Converter\JsonCompatibilityConverter; |
||
| 24 | use AOE\Crawler\CrawlStrategy\CallbackExecutionStrategy; |
||
| 25 | use AOE\Crawler\CrawlStrategy\CrawlStrategy; |
||
| 26 | use AOE\Crawler\CrawlStrategy\CrawlStrategyFactory; |
||
| 27 | use AOE\Crawler\Utility\SignalSlotUtility; |
||
| 28 | use TYPO3\CMS\Core\Http\Uri; |
||
| 29 | use TYPO3\CMS\Core\SingletonInterface; |
||
| 30 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
| 31 | |||
| 32 | /** |
||
| 33 | * Fetches a URL based on the selected strategy or via a callback. |
||
| 34 | * @internal since v9.2.5 |
||
| 35 | */ |
||
| 36 | class QueueExecutor implements SingletonInterface |
||
| 37 | { |
||
| 38 | /** |
||
| 39 | * @var CrawlStrategy |
||
| 40 | */ |
||
| 41 | protected $crawlStrategy; |
||
| 42 | |||
| 43 | 46 | public function __construct(CrawlStrategyFactory $crawlStrategyFactory) |
|
| 44 | { |
||
| 45 | 46 | $this->crawlStrategy = $crawlStrategyFactory->create(); |
|
| 46 | 46 | } |
|
| 47 | |||
| 48 | /** |
||
| 49 | * Takes a queue record and fetches the contents of the URL. |
||
| 50 | * In the future, updating the queue item & additional signal/slot/events should also happen in here. |
||
| 51 | * |
||
| 52 | * @return array|bool|mixed|string |
||
| 53 | */ |
||
| 54 | 7 | public function executeQueueItem(array $queueItem, CrawlerController $crawlerController) |
|
| 55 | { |
||
| 56 | 7 | $parameters = ''; |
|
| 57 | 7 | if (isset($queueItem['parameters'])) { |
|
| 58 | // Decode parameters: |
||
| 59 | /** @var JsonCompatibilityConverter $jsonCompatibleConverter */ |
||
| 60 | 6 | $jsonCompatibleConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class); |
|
| 61 | 6 | $parameters = $jsonCompatibleConverter->convert($queueItem['parameters']); |
|
| 62 | } |
||
| 63 | |||
| 64 | 7 | if (! is_array($parameters) || empty($parameters)) { |
|
| 65 | 6 | return 'ERROR'; |
|
| 66 | } |
||
| 67 | 1 | if ($parameters['_CALLBACKOBJ']) { |
|
| 68 | 1 | $className = $parameters['_CALLBACKOBJ']; |
|
| 69 | 1 | unset($parameters['_CALLBACKOBJ']); |
|
| 70 | 1 | $result = GeneralUtility::makeInstance(CallbackExecutionStrategy::class) |
|
| 71 | 1 | ->fetchByCallback($className, $parameters, $crawlerController); |
|
| 72 | 1 | $result = ['content' => json_encode($result)]; |
|
| 73 | } else { |
||
| 74 | // Regular FE request |
||
| 75 | $crawlerId = $this->generateCrawlerIdFromQueueItem($queueItem); |
||
| 76 | |||
| 77 | $url = new Uri($parameters['url']); |
||
| 78 | $result = $this->crawlStrategy->fetchUrlContents($url, $crawlerId); |
||
| 79 | if ($result !== false) { |
||
| 80 | $result = ['content' => json_encode($result)]; |
||
| 81 | } |
||
| 82 | |||
| 83 | $signalPayload = ['url' => $parameters['url'], 'result' => $result]; |
||
| 84 | SignalSlotUtility::emitSignal( |
||
|
0 ignored issues
–
show
Deprecated Code
introduced
by
Loading history...
|
|||
| 85 | self::class, |
||
| 86 | SignalSlotUtility::SIGNAL_URL_CRAWLED, |
||
| 87 | $signalPayload |
||
| 88 | ); |
||
| 89 | } |
||
| 90 | 1 | return $result; |
|
| 91 | } |
||
| 92 | |||
| 93 | protected function generateCrawlerIdFromQueueItem(array $queueItem): string |
||
| 94 | { |
||
| 95 | return $queueItem['qid'] . ':' . md5($queueItem['qid'] . '|' . $queueItem['set_id'] . '|' . $GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey']); |
||
| 96 | } |
||
| 97 | } |
||
| 98 |