Passed
Push — typo3v9 ( 2404ee...b9b5fa )
by Tomas Norre
05:51
created

BuildQueueCommand::getConfigurationKeys()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 2
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 4
ccs 0
cts 4
cp 0
crap 6
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Command;
6
7
/*
8
 * (c) 2020 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use AOE\Crawler\Controller\CrawlerController;
23
use AOE\Crawler\Domain\Model\Reason;
24
use AOE\Crawler\Event\EventDispatcher;
25
use Symfony\Component\Console\Command\Command;
26
use Symfony\Component\Console\Input\InputInterface;
27
use Symfony\Component\Console\Input\InputOption;
28
use Symfony\Component\Console\Output\OutputInterface;
29
use TYPO3\CMS\Core\Utility\GeneralUtility;
30
use TYPO3\CMS\Core\Utility\MathUtility;
31
use TYPO3\CMS\Extbase\Object\ObjectManager;
32
33
class BuildQueueCommand extends Command
34
{
35
    protected function configure(): void
36
    {
37
        $this->setHelp(
38
            'Try "typo3 help crawler:flushQueue" to see your options' . chr(10) . chr(10) .
39
            'Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; 
40
It can put entries in the queue from command line options, return the list of URLs and even execute
41
all entries right away without having to queue them up - this can be useful for immediate re-cache,
42
re-indexing or static publishing from command line.' . chr(10) . chr(10) .
43
            '
44
            Examples:
45
              --- Re-cache pages from page 7 and two levels down, executed immediately
46
              $ typo3 crawler:buildQueue --page 7 --depth 2 --conf defaultConfiguration --mode exec
47
             
48
              --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
49
              $ typo3 crawler:buildQueue --page 7 --depth 0 --conf defaultConfiguration --mode queue --number 4
50
            '
51
        );
52
53
        $this->addOption(
54
            'conf',
55
            'c',
56
            InputOption::VALUE_REQUIRED,
57
            'A comma separated list of crawler configurations'
58
        );
59
60
        $this->addOption(
61
            'page',
62
            'p',
63
            InputOption::VALUE_OPTIONAL,
64
            'The page from where the queue building should start',
65
            0
66
        );
67
68
        $this->addOption(
69
            'depth',
70
            'd',
71
            InputOption::VALUE_OPTIONAL,
72
            'Tree depth, 0-99\', "How many levels under the \'page_id\' to include.',
73
            0
74
        );
75
76
        $this->addOption(
77
            'mode',
78
            'm',
79
            InputOption::VALUE_OPTIONAL,
80
            'Specifies output modes url : Will list URLs which wget could use as input. queue: Will put entries in queue table. exec: Will execute all entries right away!'
81
        );
82
83
        $this->addOption(
84
            'number',
85
            '',
86
            InputOption::VALUE_OPTIONAL,
87
            'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"',
88
            0
89
        );
90
    }
91
92
    /**
93
     * Crawler Command - Submitting URLs to be crawled.
94
     *
95
     * Works as a CLI interface to some functionality from the Web > Info > Site Crawler module;
96
     * It can put entries in the queue from command line options, return the list of URLs and even execute
97
     * all entries right away without having to queue them up - this can be useful for immediate re-cache,
98
     * re-indexing or static publishing from command line.
99
     *
100
     * Examples:
101
     *
102
     * --- Re-cache pages from page 7 and two levels down, executed immediately
103
     * $ typo3 crawler:buildQueue --page 7 --depth 2 --conf defaultConfiguration --mode exec
104
     *
105
     *
106
     * --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
107
     * $ typo3 crawler:buildQueue --page 7 --depth 0 --conf defaultConfiguration --mode queue --number 4
108
     *
109
     */
110
    protected function execute(InputInterface $input, OutputInterface $output): void
111
    {
112
        $mode = $input->getOption('mode');
113
114
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
115
116
        /** @var CrawlerController $crawlerController */
117
        $crawlerController = $objectManager->get(CrawlerController::class);
118
119
        if ($mode === 'exec') {
120
            $crawlerController->registerQueueEntriesInternallyOnly = true;
121
        }
122
123
        $pageId = MathUtility::forceIntegerInRange($input->getOption('page'), 0);
124
125
        $configurationKeys = $this->getConfigurationKeys($input->getOption('conf'));
126
127
        if (!is_array($configurationKeys)) {
0 ignored issues
show
introduced by
The condition is_array($configurationKeys) is always true.
Loading history...
128
            $configurations = $crawlerController->getUrlsForPageId($pageId);
129
            if (is_array($configurations)) {
130
                $configurationKeys = array_keys($configurations);
131
            } else {
132
                $configurationKeys = [];
133
            }
134
        }
135
136
        if ($mode === 'queue' || $mode === 'exec') {
137
            $reason = new Reason();
138
            $reason->setReason(Reason::REASON_GUI_SUBMIT);
139
            $reason->setDetailText('The cli script of the crawler added to the queue');
140
            EventDispatcher::getInstance()->post(
141
                'invokeQueueChange',
142
                $crawlerController->setID,
143
                ['reason' => $reason]
144
            );
145
        }
146
147
        if ($crawlerController->extensionSettings['cleanUpOldQueueEntries']) {
148
            $crawlerController->cleanUpOldQueueEntries();
149
        }
150
151
        $crawlerController->setID = (int) GeneralUtility::md5int(microtime());
152
        $crawlerController->getPageTreeAndUrls(
153
            $pageId,
154
            MathUtility::forceIntegerInRange($input->getOption('depth'), 0, 99),
155
            $crawlerController->getCurrentTime(),
156
            MathUtility::forceIntegerInRange($input->getOption('number') ?: 30, 1, 1000),
157
            $mode === 'queue' || $mode === 'exec',
158
            $mode === 'url',
159
            [],
160
            $configurationKeys
161
        );
162
163
        if ($mode === 'url') {
164
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->downloadUrls) . PHP_EOL . '</info>');
165
        } elseif ($mode === 'exec') {
166
            $output->writeln('<info>Executing ' . count($crawlerController->urlList) . ' requests right away:</info>');
167
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
168
            $output->writeln('<info>Processing</info>' . PHP_EOL);
169
170
            foreach ($crawlerController->queueEntries as $queueRec) {
171
                $p = unserialize($queueRec['parameters']);
172
                $output->writeln('<info>' . $p['url'] . ' (' . implode(',', $p['procInstructions']) . ') => ' . '</info>' . PHP_EOL);
173
                $result = $crawlerController->readUrlFromArray($queueRec);
174
175
                $requestResult = unserialize($result['content']);
176
                if (is_array($requestResult)) {
177
                    $resLog = is_array($requestResult['log']) ? PHP_EOL . chr(9) . chr(9) . implode(PHP_EOL . chr(9) . chr(9), $requestResult['log']) : '';
178
                    $output->writeln('<info>OK: ' . $resLog . '</info>' . PHP_EOL);
179
                } else {
180
                    $output->writeln('<errror>Error checking Crawler Result:  ' . substr(preg_replace('/\s+/', ' ', strip_tags($result['content'])), 0, 30000) . '...' . PHP_EOL . '</errror>' . PHP_EOL);
181
                }
182
            }
183
        } elseif ($mode === 'queue') {
184
            $output->writeln('<info>Putting ' . count($crawlerController->urlList) . ' entries in queue:</info>' . PHP_EOL);
185
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
186
        } else {
187
            $output->writeln('<info>' . count($crawlerController->urlList) . ' entries found for processing. (Use "mode" to decide action):</info>' . PHP_EOL);
188
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
189
        }
190
    }
191
192
    /**
193
     * Obtains configuration keys from the CLI arguments
194
     *
195
     * @param $conf string
196
     * @return array
197
     */
198
    private function getConfigurationKeys($conf)
199
    {
200
        $parameter = trim($conf);
201
        return ($parameter != '' ? GeneralUtility::trimExplode(',', $parameter) : []);
202
    }
203
}
204