| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | declare(strict_types=1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | namespace AOE\Crawler\Service; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | /* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * (c) 2020 AOE GmbH <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * This file is part of the TYPO3 Crawler Extension. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  * It is free software; you can redistribute it and/or modify it under | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  * the terms of the GNU General Public License, either version 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  * of the License, or any later version. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  * For the full copyright and license information, please read the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  * LICENSE.txt file that was distributed with this source code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  * The TYPO3 project - inspiring people to share! | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  | use AOE\Crawler\Controller\CrawlerController; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  | use TYPO3\CMS\Core\Domain\Repository\PageRepository; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  | use TYPO3\CMS\Core\Utility\GeneralUtility; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 26 |  |  | class QueueService | 
            
                                                                        
                            
            
                                    
            
            
                | 27 |  |  | { | 
            
                                                                        
                            
            
                                    
            
            
                | 28 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 29 |  |  |      * @var CrawlerController | 
            
                                                                        
                            
            
                                    
            
            
                | 30 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 31 |  |  |     private $crawlerController; | 
            
                                                                        
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 | 3 |  |     public function injectCrawlerController(CrawlerController $crawlerController): void | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 | 3 |  |         $this->crawlerController = $crawlerController; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 36 | 3 |  |         $this->crawlerController->setID = GeneralUtility::md5int(microtime()); | 
            
                                                                        
                            
            
                                    
            
            
                | 37 | 3 |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 39 | 3 |  |     public function addPageToQueue(int $pageUid, int $time = 0): void | 
            
                                                                        
                            
            
                                    
            
            
                | 40 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 41 |  |  |         /** | 
            
                                                                        
                            
            
                                    
            
            
                | 42 |  |  |          * Todo: Switch back to getPage(); when dropping support for TYPO3 9 LTS - TNM | 
            
                                                                        
                            
            
                                    
            
            
                | 43 |  |  |          * This switch to getPage_noCheck() is needed as TYPO3 9 LTS doesn't return dokType < 200, therefore automatically | 
            
                                                                        
                            
            
                                    
            
            
                | 44 |  |  |          * adding pages to crawler queue when editing page-titles from the page tree directly was not working. | 
            
                                                                        
                            
            
                                    
            
            
                | 45 |  |  |          */ | 
            
                                                                        
                            
            
                                    
            
            
                | 46 | 3 |  |         $pageData = GeneralUtility::makeInstance(PageRepository::class)->getPage_noCheck($pageUid, true); | 
            
                                                                        
                            
            
                                    
            
            
                | 47 | 3 |  |         $configurations = $this->crawlerController->getUrlsForPageRow($pageData); | 
            
                                                                        
                            
            
                                    
            
            
                | 48 |  |  |         // Currently this is only used from the DataHandlerHook, and we don't know of any allowed/disallowed configurations, | 
            
                                                                        
                            
            
                                    
            
            
                | 49 |  |  |         // when clearing the cache, therefore we allow all configurations in this case. | 
            
                                                                        
                            
            
                                    
            
            
                | 50 |  |  |         // This next lines could be skipped as it will return the incomming configurations, but for visibility and | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |         // later implementation it's kept as it do no harm. | 
            
                                                                        
                            
            
                                    
            
            
                | 52 | 3 |  |         $allowedConfigurations = []; | 
            
                                                                        
                            
            
                                    
            
            
                | 53 | 3 |  |         $configurations = ConfigurationService::removeDisallowedConfigurations($allowedConfigurations, $configurations); | 
            
                                                                        
                            
            
                                    
            
            
                | 54 | 3 |  |         $downloadUrls = []; | 
            
                                                                        
                            
            
                                    
            
            
                | 55 | 3 |  |         $duplicateTrack = []; | 
            
                                                                        
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 57 | 3 |  |         if (is_array($configurations)) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 58 | 3 |  |             foreach ($configurations as $configuration) { | 
            
                                                                        
                            
            
                                    
            
            
                | 59 |  |  |                 //enable inserting of entries | 
            
                                                                        
                            
            
                                    
            
            
                | 60 | 3 |  |                 $this->crawlerController->registerQueueEntriesInternallyOnly = false; | 
            
                                                                        
                            
            
                                    
            
            
                | 61 | 3 |  |                 $this->crawlerController->urlListFromUrlArray( | 
            
                                                                        
                            
            
                                    
            
            
                | 62 | 3 |  |                     $configuration, | 
            
                                                                        
                            
            
                                    
            
            
                | 63 | 3 |  |                     $pageData, | 
            
                                                                        
                            
            
                                    
            
            
                | 64 | 3 |  |                     $time, | 
            
                                                                        
                            
            
                                    
            
            
                | 65 | 3 |  |                     300, | 
            
                                                                        
                            
            
                                    
            
            
                | 66 | 3 |  |                     true, | 
            
                                                                        
                            
            
                                    
            
            
                | 67 | 3 |  |                     false, | 
            
                                                                        
                            
            
                                    
            
            
                | 68 | 3 |  |                     $duplicateTrack, | 
            
                                                                        
                            
            
                                    
            
            
                | 69 | 3 |  |                     $downloadUrls, | 
            
                                                                        
                            
            
                                    
            
            
                | 70 | 3 |  |                     array_keys($this->getCrawlerProcInstructions()) | 
            
                                                                        
                            
            
                                    
            
            
                | 71 |  |  |                 ); | 
            
                                                                        
                            
            
                                    
            
            
                | 72 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 73 |  |  |                 //reset the queue because the entries have been written to the db | 
            
                                                                        
                            
            
                                    
            
            
                | 74 | 3 |  |                 unset($this->crawlerController->queueEntries); | 
            
                                                                        
                            
            
                                    
            
            
                | 75 |  |  |             } | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 77 | 3 |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 80 |  |  |      * Reads the registered processingInstructions of the crawler | 
            
                                                                        
                            
            
                                    
            
            
                | 81 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 | 3 |  |     private function getCrawlerProcInstructions(): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 | 3 |  |         $crawlerProcInstructions = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 | 3 |  |         if (! empty($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |             foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'] as $configuration) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |                 $crawlerProcInstructions[$configuration['key']] = $configuration['value']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 91 | 3 |  |         return $crawlerProcInstructions; | 
            
                                                                        
                                                                
            
                                    
            
            
                | 92 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 93 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 94 |  |  |  |