1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace AOE\Crawler\Service; |
||
6 | |||
7 | /* |
||
8 | * (c) 2020 AOE GmbH <[email protected]> |
||
9 | * |
||
10 | * This file is part of the TYPO3 Crawler Extension. |
||
11 | * |
||
12 | * It is free software; you can redistribute it and/or modify it under |
||
13 | * the terms of the GNU General Public License, either version 2 |
||
14 | * of the License, or any later version. |
||
15 | * |
||
16 | * For the full copyright and license information, please read the |
||
17 | * LICENSE.txt file that was distributed with this source code. |
||
18 | * |
||
19 | * The TYPO3 project - inspiring people to share! |
||
20 | */ |
||
21 | |||
22 | use AOE\Crawler\Controller\CrawlerController; |
||
23 | use TYPO3\CMS\Core\Domain\Repository\PageRepository; |
||
24 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
25 | |||
26 | /** |
||
27 | * @internal since v9.2.5 |
||
28 | */ |
||
29 | class QueueService |
||
30 | { |
||
31 | /** |
||
32 | * @var CrawlerController |
||
33 | */ |
||
34 | private $crawlerController; |
||
35 | |||
36 | 3 | public function injectCrawlerController(CrawlerController $crawlerController): void |
|
37 | { |
||
38 | 3 | $this->crawlerController = $crawlerController; |
|
39 | 3 | $this->crawlerController->setID = GeneralUtility::md5int(microtime()); |
|
40 | 3 | } |
|
41 | |||
42 | 3 | public function addPageToQueue(int $pageUid, int $time = 0): void |
|
43 | { |
||
44 | /** |
||
45 | * Todo: Switch back to getPage(); when dropping support for TYPO3 9 LTS - TNM |
||
46 | * This switch to getPage_noCheck() is needed as TYPO3 9 LTS doesn't return dokType < 200, therefore automatically |
||
47 | * adding pages to crawler queue when editing page-titles from the page tree directly was not working. |
||
48 | */ |
||
49 | 3 | $pageData = GeneralUtility::makeInstance(PageRepository::class)->getPage_noCheck($pageUid, true); |
|
50 | 3 | $configurations = $this->crawlerController->getUrlsForPageRow($pageData); |
|
51 | // Currently this is only used from the DataHandlerHook, and we don't know of any allowed/disallowed configurations, |
||
52 | // when clearing the cache, therefore we allow all configurations in this case. |
||
53 | // This next lines could be skipped as it will return the incomming configurations, but for visibility and |
||
54 | // later implementation it's kept as it do no harm. |
||
55 | 3 | $allowedConfigurations = []; |
|
56 | 3 | $configurations = ConfigurationService::removeDisallowedConfigurations($allowedConfigurations, $configurations); |
|
57 | 3 | $downloadUrls = []; |
|
58 | 3 | $duplicateTrack = []; |
|
59 | |||
60 | 3 | if (is_array($configurations)) { |
|
0 ignored issues
–
show
introduced
by
![]() |
|||
61 | 3 | foreach ($configurations as $configuration) { |
|
62 | //enable inserting of entries |
||
63 | 3 | $this->crawlerController->registerQueueEntriesInternallyOnly = false; |
|
64 | 3 | $this->crawlerController->urlListFromUrlArray( |
|
65 | 3 | $configuration, |
|
66 | $pageData, |
||
67 | $time, |
||
68 | 3 | 300, |
|
69 | 3 | true, |
|
70 | 3 | false, |
|
71 | $duplicateTrack, |
||
72 | $downloadUrls, |
||
73 | 3 | array_keys($this->getCrawlerProcInstructions()) |
|
74 | ); |
||
75 | |||
76 | //reset the queue because the entries have been written to the db |
||
77 | 3 | unset($this->crawlerController->queueEntries); |
|
78 | } |
||
79 | } |
||
80 | 3 | } |
|
81 | |||
82 | /** |
||
83 | * Reads the registered processingInstructions of the crawler |
||
84 | */ |
||
85 | 3 | private function getCrawlerProcInstructions(): array |
|
86 | { |
||
87 | 3 | $crawlerProcInstructions = []; |
|
88 | 3 | if (! empty($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'])) { |
|
89 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'] as $configuration) { |
||
90 | $crawlerProcInstructions[$configuration['key']] = $configuration['value']; |
||
91 | } |
||
92 | } |
||
93 | |||
94 | 3 | return $crawlerProcInstructions; |
|
95 | } |
||
96 | } |
||
97 |