1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace AOE\Crawler; |
||
6 | |||
7 | /* |
||
8 | * (c) 2020 AOE GmbH <[email protected]> |
||
9 | * |
||
10 | * This file is part of the TYPO3 Crawler Extension. |
||
11 | * |
||
12 | * It is free software; you can redistribute it and/or modify it under |
||
13 | * the terms of the GNU General Public License, either version 2 |
||
14 | * of the License, or any later version. |
||
15 | * |
||
16 | * For the full copyright and license information, please read the |
||
17 | * LICENSE.txt file that was distributed with this source code. |
||
18 | * |
||
19 | * The TYPO3 project - inspiring people to share! |
||
20 | */ |
||
21 | |||
22 | use AOE\Crawler\Controller\CrawlerController; |
||
23 | use AOE\Crawler\Converter\JsonCompatibilityConverter; |
||
24 | use AOE\Crawler\CrawlStrategy\CallbackExecutionStrategy; |
||
25 | use AOE\Crawler\CrawlStrategy\CrawlStrategy; |
||
26 | use AOE\Crawler\CrawlStrategy\CrawlStrategyFactory; |
||
27 | use AOE\Crawler\Utility\SignalSlotUtility; |
||
28 | use TYPO3\CMS\Core\Http\Uri; |
||
29 | use TYPO3\CMS\Core\SingletonInterface; |
||
30 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
31 | |||
32 | /** |
||
33 | * Fetches a URL based on the selected strategy or via a callback. |
||
34 | * @internal since v9.2.5 |
||
35 | */ |
||
36 | class QueueExecutor implements SingletonInterface |
||
37 | { |
||
38 | /** |
||
39 | * @var CrawlStrategy |
||
40 | */ |
||
41 | protected $crawlStrategy; |
||
42 | |||
43 | 46 | public function __construct(CrawlStrategyFactory $crawlStrategyFactory) |
|
44 | { |
||
45 | 46 | $this->crawlStrategy = $crawlStrategyFactory->create(); |
|
46 | 46 | } |
|
47 | |||
48 | /** |
||
49 | * Takes a queue record and fetches the contents of the URL. |
||
50 | * In the future, updating the queue item & additional signal/slot/events should also happen in here. |
||
51 | * |
||
52 | * @return array|bool|mixed|string |
||
53 | */ |
||
54 | 7 | public function executeQueueItem(array $queueItem, CrawlerController $crawlerController) |
|
55 | { |
||
56 | 7 | $parameters = ''; |
|
57 | 7 | if (isset($queueItem['parameters'])) { |
|
58 | // Decode parameters: |
||
59 | /** @var JsonCompatibilityConverter $jsonCompatibleConverter */ |
||
60 | 6 | $jsonCompatibleConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class); |
|
61 | 6 | $parameters = $jsonCompatibleConverter->convert($queueItem['parameters']); |
|
62 | } |
||
63 | |||
64 | 7 | if (! is_array($parameters) || empty($parameters)) { |
|
65 | 6 | return 'ERROR'; |
|
66 | } |
||
67 | 1 | if ($parameters['_CALLBACKOBJ']) { |
|
68 | 1 | $className = $parameters['_CALLBACKOBJ']; |
|
69 | 1 | unset($parameters['_CALLBACKOBJ']); |
|
70 | 1 | $result = GeneralUtility::makeInstance(CallbackExecutionStrategy::class) |
|
71 | 1 | ->fetchByCallback($className, $parameters, $crawlerController); |
|
72 | 1 | $result = ['content' => json_encode($result)]; |
|
73 | } else { |
||
74 | // Regular FE request |
||
75 | $crawlerId = $this->generateCrawlerIdFromQueueItem($queueItem); |
||
76 | |||
77 | $url = new Uri($parameters['url']); |
||
78 | $result = $this->crawlStrategy->fetchUrlContents($url, $crawlerId); |
||
79 | if ($result !== false) { |
||
80 | $result = ['content' => json_encode($result)]; |
||
81 | } |
||
82 | |||
83 | $signalPayload = ['url' => $parameters['url'], 'result' => $result]; |
||
84 | SignalSlotUtility::emitSignal( |
||
0 ignored issues
–
show
Deprecated Code
introduced
by
![]() |
|||
85 | self::class, |
||
86 | SignalSlotUtility::SIGNAL_URL_CRAWLED, |
||
87 | $signalPayload |
||
88 | ); |
||
89 | } |
||
90 | 1 | return $result; |
|
91 | } |
||
92 | |||
93 | protected function generateCrawlerIdFromQueueItem(array $queueItem): string |
||
94 | { |
||
95 | return $queueItem['qid'] . ':' . md5($queueItem['qid'] . '|' . $queueItem['set_id'] . '|' . $GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey']); |
||
96 | } |
||
97 | } |
||
98 |