Passed
Push — deprecate/crawlerapi ( 17ac42 )
by Tomas Norre
05:43
created

QueueService   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 77
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 34
c 1
b 0
f 0
dl 0
loc 77
ccs 0
cts 47
cp 0
rs 10
wmc 9

3 Methods

Rating   Name   Duplication   Size   Complexity  
A addPageToQueue() 0 33 3
A findCrawler() 0 11 3
A getCrawlerProcInstructions() 0 10 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Service;
6
7
/*
8
 * (c) 2020 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use AOE\Crawler\Controller\CrawlerController;
23
use AOE\Crawler\Exception\CrawlerObjectException;
24
use TYPO3\CMS\Core\Utility\GeneralUtility;
25
use TYPO3\CMS\Frontend\Page\PageRepository;
26
27
class QueueService
28
{
29
    /**
30
     * @var CrawlerController
31
     */
32
    protected $crawlerController;
33
34
    public function addPageToQueue(int $pageUid, int $time = 0): void
35
    {
36
        $crawler = $this->findCrawler();
37
        /**
38
         * Todo: Switch back to getPage(); when dropping support for TYPO3 9 LTS - TNM
39
         * This switch to getPage_noCheck() is needed as TYPO3 9 LTS doesn't return dokType < 200, therefore automatically
40
         * adding pages to crawler queue when editing page-titles from the page tree directly was not working.
41
         */
42
        $pageData = GeneralUtility::makeInstance(PageRepository::class)->getPage_noCheck($pageUid, true);
43
        $configurations = $crawler->getUrlsForPageRow($pageData);
44
        $allowedConfigurations = [];
45
        $configurations = ConfigurationService::removeDisallowedConfigurations($allowedConfigurations, $configurations);
46
        $downloadUrls = [];
47
        $duplicateTrack = [];
48
49
        if (is_array($configurations)) {
0 ignored issues
show
introduced by
The condition is_array($configurations) is always true.
Loading history...
50
            foreach ($configurations as $configuration) {
51
                //enable inserting of entries
52
                $crawler->registerQueueEntriesInternallyOnly = false;
53
                $crawler->urlListFromUrlArray(
54
                    $configuration,
55
                    $pageData,
56
                    $time,
57
                    300,
58
                    true,
59
                    false,
60
                    $duplicateTrack,
61
                    $downloadUrls,
62
                    array_keys($this->getCrawlerProcInstructions())
63
                );
64
65
                //reset the queue because the entries have been written to the db
66
                unset($crawler->queueEntries);
67
            }
68
        }
69
    }
70
71
    /**
72
     * Reads the registered processingInstructions of the crawler
73
     */
74
    private function getCrawlerProcInstructions(): array
75
    {
76
        $crawlerProcInstructions = [];
77
        if (! empty($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'])) {
78
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'] as $configuration) {
79
                $crawlerProcInstructions[$configuration['key']] = $configuration['value'];
80
            }
81
        }
82
83
        return $crawlerProcInstructions;
84
    }
85
86
    /**
87
     * Method to get an instance of the internal crawler singleton
88
     *
89
     * @return CrawlerController Instance of the crawler lib
90
     *
91
     * @throws CrawlerObjectException
92
     */
93
    private function findCrawler()
94
    {
95
        if (! is_object($this->crawlerController)) {
96
            $this->crawlerController = GeneralUtility::makeInstance(CrawlerController::class);
97
            $this->crawlerController->setID = GeneralUtility::md5int(microtime());
98
        }
99
100
        if (is_object($this->crawlerController)) {
101
            return $this->crawlerController;
102
        }
103
        throw new CrawlerObjectException('no crawler object', 1608465082);
104
    }
105
}
106