Passed
Push — cleanup/misc-changes ( 60631a...a39946 )
by Tomas Norre
05:30
created

QueueService   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 81
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 34
c 1
b 0
f 0
dl 0
loc 81
ccs 0
cts 47
cp 0
rs 10
wmc 9

3 Methods

Rating   Name   Duplication   Size   Complexity  
A addPageToQueue() 0 37 3
A findCrawler() 0 11 3
A getCrawlerProcInstructions() 0 10 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Service;
6
7
/*
8
 * (c) 2020 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use AOE\Crawler\Controller\CrawlerController;
23
use AOE\Crawler\Exception\CrawlerObjectException;
24
use TYPO3\CMS\Core\Utility\GeneralUtility;
25
use TYPO3\CMS\Frontend\Page\PageRepository;
26
27
class QueueService
28
{
29
    /**
30
     * @var CrawlerController
31
     */
32
    protected $crawlerController;
33
34
    public function addPageToQueue(int $pageUid, int $time = 0): void
35
    {
36
        $crawler = $this->findCrawler();
37
        /**
38
         * Todo: Switch back to getPage(); when dropping support for TYPO3 9 LTS - TNM
39
         * This switch to getPage_noCheck() is needed as TYPO3 9 LTS doesn't return dokType < 200, therefore automatically
40
         * adding pages to crawler queue when editing page-titles from the page tree directly was not working.
41
         */
42
        $pageData = GeneralUtility::makeInstance(PageRepository::class)->getPage_noCheck($pageUid, true);
43
        $configurations = $crawler->getUrlsForPageRow($pageData);
44
        // Currently this is only used from the DataHandlerHook, and we don't know of any allowed/disallowed configurations,
45
        // when clearing the cache, therefore we allow all configurations in this case.
46
        // This next lines could be skipped as it will return the incomming configurations, but for visibility and
47
        // later implementation it's kept as it do no harm.
48
        $allowedConfigurations = [];
49
        $configurations = ConfigurationService::removeDisallowedConfigurations($allowedConfigurations, $configurations);
50
        $downloadUrls = [];
51
        $duplicateTrack = [];
52
53
        if (is_array($configurations)) {
0 ignored issues
show
introduced by
The condition is_array($configurations) is always true.
Loading history...
54
            foreach ($configurations as $configuration) {
55
                //enable inserting of entries
56
                $crawler->registerQueueEntriesInternallyOnly = false;
57
                $crawler->urlListFromUrlArray(
58
                    $configuration,
59
                    $pageData,
60
                    $time,
61
                    300,
62
                    true,
63
                    false,
64
                    $duplicateTrack,
65
                    $downloadUrls,
66
                    array_keys($this->getCrawlerProcInstructions())
67
                );
68
69
                //reset the queue because the entries have been written to the db
70
                unset($crawler->queueEntries);
71
            }
72
        }
73
    }
74
75
    /**
76
     * Reads the registered processingInstructions of the crawler
77
     */
78
    private function getCrawlerProcInstructions(): array
79
    {
80
        $crawlerProcInstructions = [];
81
        if (! empty($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'])) {
82
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'] as $configuration) {
83
                $crawlerProcInstructions[$configuration['key']] = $configuration['value'];
84
            }
85
        }
86
87
        return $crawlerProcInstructions;
88
    }
89
90
    /**
91
     * Method to get an instance of the internal crawler singleton
92
     *
93
     * @return CrawlerController Instance of the crawler lib
94
     *
95
     * @throws CrawlerObjectException
96
     */
97
    private function findCrawler()
98
    {
99
        if (! is_object($this->crawlerController)) {
100
            $this->crawlerController = GeneralUtility::makeInstance(CrawlerController::class);
101
            $this->crawlerController->setID = GeneralUtility::md5int(microtime());
102
        }
103
104
        if (is_object($this->crawlerController)) {
105
            return $this->crawlerController;
106
        }
107
        throw new CrawlerObjectException('no crawler object', 1608465082);
108
    }
109
}
110