AOEpeople /
crawler
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace AOE\Crawler\Service; |
||
| 6 | |||
| 7 | /* |
||
| 8 | * (c) 2020 AOE GmbH <[email protected]> |
||
| 9 | * |
||
| 10 | * This file is part of the TYPO3 Crawler Extension. |
||
| 11 | * |
||
| 12 | * It is free software; you can redistribute it and/or modify it under |
||
| 13 | * the terms of the GNU General Public License, either version 2 |
||
| 14 | * of the License, or any later version. |
||
| 15 | * |
||
| 16 | * For the full copyright and license information, please read the |
||
| 17 | * LICENSE.txt file that was distributed with this source code. |
||
| 18 | * |
||
| 19 | * The TYPO3 project - inspiring people to share! |
||
| 20 | */ |
||
| 21 | |||
| 22 | use AOE\Crawler\Controller\CrawlerController; |
||
| 23 | use TYPO3\CMS\Core\Domain\Repository\PageRepository; |
||
| 24 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
| 25 | |||
| 26 | /** |
||
| 27 | * @internal since v9.2.5 |
||
| 28 | */ |
||
| 29 | class QueueService |
||
| 30 | { |
||
| 31 | /** |
||
| 32 | * @var CrawlerController |
||
| 33 | */ |
||
| 34 | private $crawlerController; |
||
| 35 | |||
| 36 | 3 | public function injectCrawlerController(CrawlerController $crawlerController): void |
|
| 37 | { |
||
| 38 | 3 | $this->crawlerController = $crawlerController; |
|
| 39 | 3 | $this->crawlerController->setID = GeneralUtility::md5int(microtime()); |
|
| 40 | 3 | } |
|
| 41 | |||
| 42 | 3 | public function addPageToQueue(int $pageUid, int $time = 0): void |
|
| 43 | { |
||
| 44 | /** |
||
| 45 | * Todo: Switch back to getPage(); when dropping support for TYPO3 9 LTS - TNM |
||
| 46 | * This switch to getPage_noCheck() is needed as TYPO3 9 LTS doesn't return dokType < 200, therefore automatically |
||
| 47 | * adding pages to crawler queue when editing page-titles from the page tree directly was not working. |
||
| 48 | */ |
||
| 49 | 3 | $pageData = GeneralUtility::makeInstance(PageRepository::class)->getPage_noCheck($pageUid, true); |
|
| 50 | 3 | $configurations = $this->crawlerController->getUrlsForPageRow($pageData); |
|
| 51 | // Currently this is only used from the DataHandlerHook, and we don't know of any allowed/disallowed configurations, |
||
| 52 | // when clearing the cache, therefore we allow all configurations in this case. |
||
| 53 | // This next lines could be skipped as it will return the incomming configurations, but for visibility and |
||
| 54 | // later implementation it's kept as it do no harm. |
||
| 55 | 3 | $allowedConfigurations = []; |
|
| 56 | 3 | $configurations = ConfigurationService::removeDisallowedConfigurations($allowedConfigurations, $configurations); |
|
| 57 | 3 | $downloadUrls = []; |
|
| 58 | 3 | $duplicateTrack = []; |
|
| 59 | |||
| 60 | 3 | if (is_array($configurations)) { |
|
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 61 | 3 | foreach ($configurations as $configuration) { |
|
| 62 | //enable inserting of entries |
||
| 63 | 3 | $this->crawlerController->registerQueueEntriesInternallyOnly = false; |
|
| 64 | 3 | $this->crawlerController->urlListFromUrlArray( |
|
| 65 | 3 | $configuration, |
|
| 66 | $pageData, |
||
| 67 | $time, |
||
| 68 | 3 | 300, |
|
| 69 | 3 | true, |
|
| 70 | 3 | false, |
|
| 71 | $duplicateTrack, |
||
| 72 | $downloadUrls, |
||
| 73 | 3 | array_keys($this->getCrawlerProcInstructions()) |
|
| 74 | ); |
||
| 75 | |||
| 76 | //reset the queue because the entries have been written to the db |
||
| 77 | 3 | unset($this->crawlerController->queueEntries); |
|
| 78 | } |
||
| 79 | } |
||
| 80 | 3 | } |
|
| 81 | |||
| 82 | /** |
||
| 83 | * Reads the registered processingInstructions of the crawler |
||
| 84 | */ |
||
| 85 | 3 | private function getCrawlerProcInstructions(): array |
|
| 86 | { |
||
| 87 | 3 | $crawlerProcInstructions = []; |
|
| 88 | 3 | if (! empty($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'])) { |
|
| 89 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'] as $configuration) { |
||
| 90 | $crawlerProcInstructions[$configuration['key']] = $configuration['value']; |
||
| 91 | } |
||
| 92 | } |
||
| 93 | |||
| 94 | 3 | return $crawlerProcInstructions; |
|
| 95 | } |
||
| 96 | } |
||
| 97 |