Completed
Push — typo3v9 ( cd95b9...9e3523 )
by Tomas Norre
05:42
created

BuildQueueCommand::execute()   D

Complexity

Conditions 15
Paths 168

Size

Total Lines 81

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 240

Importance

Changes 0
Metric Value
cc 15
nc 168
nop 2
dl 0
loc 81
ccs 0
cts 68
cp 0
crap 240
rs 4.6945
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
declare(strict_types=1);
3
namespace AOE\Crawler\Command;
4
5
/***************************************************************
6
 *  Copyright notice
7
 *
8
 *  (c) 2019 AOE GmbH <[email protected]>
9
 *
10
 *  All rights reserved
11
 *
12
 *  This script is part of the TYPO3 project. The TYPO3 project is
13
 *  free software; you can redistribute it and/or modify
14
 *  it under the terms of the GNU General Public License as published by
15
 *  the Free Software Foundation; either version 3 of the License, or
16
 *  (at your option) any later version.
17
 *
18
 *  The GNU General Public License can be found at
19
 *  http://www.gnu.org/copyleft/gpl.html.
20
 *
21
 *  This script is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU General Public License for more details.
25
 *
26
 *  This copyright notice MUST APPEAR in all copies of the script!
27
 ***************************************************************/
28
29
use AOE\Crawler\Controller\CrawlerController;
30
use AOE\Crawler\Domain\Model\Reason;
31
use AOE\Crawler\Event\EventDispatcher;
32
use Symfony\Component\Console\Command\Command;
33
use Symfony\Component\Console\Input\InputInterface;
34
use Symfony\Component\Console\Input\InputOption;
35
use Symfony\Component\Console\Output\OutputInterface;
36
use TYPO3\CMS\Core\Utility\GeneralUtility;
37
use TYPO3\CMS\Core\Utility\MathUtility;
38
use TYPO3\CMS\Extbase\Object\ObjectManager;
39
40
class BuildQueueCommand extends Command
41
{
42
    protected function configure(): void
43
    {
44
        $this->setHelp(
45
            'Try "typo3 help crawler:flushQueue" to see your options' . chr(10) . chr(10) .
46
            'Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; 
47
It can put entries in the queue from command line options, return the list of URLs and even execute
48
all entries right away without having to queue them up - this can be useful for immediate re-cache,
49
re-indexing or static publishing from command line.' . chr(10) . chr(10) .
50
            '
51
            Examples:
52
              --- Re-cache pages from page 7 and two levels down, executed immediately
53
              $ typo3 crawler:buildQueue --page 7 --depth 2 --conf defaultConfiguration --mode exec
54
             
55
              --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
56
              $ typo3 crawler:buildQueue --page 7 --depth 0 --conf defaultConfiguration --mode queue --number 4
57
            '
58
        );
59
60
        $this->addOption(
61
            'conf',
62
            'c',
63
            InputOption::VALUE_REQUIRED,
64
            'A comma separated list of crawler configurations'
65
        );
66
67
        $this->addOption(
68
            'page',
69
            'p',
70
            InputOption::VALUE_OPTIONAL,
71
            'The page from where the queue building should start',
72
            0
73
        );
74
75
        $this->addOption(
76
            'depth',
77
            'd',
78
            InputOption::VALUE_OPTIONAL,
79
            'Tree depth, 0-99\', "How many levels under the \'page_id\' to include.',
80
            0
81
        );
82
83
        $this->addOption(
84
            'mode',
85
            'm',
86
            InputOption::VALUE_OPTIONAL,
87
            'Specifies output modes url : Will list URLs which wget could use as input. queue: Will put entries in queue table. exec: Will execute all entries right away!'
88
        );
89
90
        $this->addOption(
91
            'number',
92
            '',
93
            InputOption::VALUE_OPTIONAL,
94
            'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"',
95
            0
96
        );
97
    }
98
99
    /**
100
     * Crawler Command - Submitting URLs to be crawled.
101
     *
102
     * Works as a CLI interface to some functionality from the Web > Info > Site Crawler module;
103
     * It can put entries in the queue from command line options, return the list of URLs and even execute
104
     * all entries right away without having to queue them up - this can be useful for immediate re-cache,
105
     * re-indexing or static publishing from command line.
106
     *
107
     * Examples:
108
     *
109
     * --- Re-cache pages from page 7 and two levels down, executed immediately
110
     * $ typo3 crawler:buildQueue --page 7 --depth 2 --conf defaultConfiguration --mode exec
111
     *
112
     *
113
     * --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
114
     * $ typo3 crawler:buildQueue --page 7 --depth 0 --conf defaultConfiguration --mode queue --number 4
115
     *
116
     */
117
    protected function execute(InputInterface $input, OutputInterface $output)
118
    {
119
        $mode = $input->getOption('mode');
120
121
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
122
123
        /** @var CrawlerController $crawlerController */
124
        $crawlerController = $objectManager->get(CrawlerController::class);
125
126
        if ($mode === 'exec') {
127
            $crawlerController->registerQueueEntriesInternallyOnly = true;
128
        }
129
130
        $pageId = MathUtility::forceIntegerInRange($input->getOption('page'), 0);
131
132
        $configurationKeys = $this->getConfigurationKeys($input->getOption('conf'));
133
134
        if (!is_array($configurationKeys)) {
135
            $configurations = $crawlerController->getUrlsForPageId($pageId);
136
            if (is_array($configurations)) {
137
                $configurationKeys = array_keys($configurations);
138
            } else {
139
                $configurationKeys = [];
140
            }
141
        }
142
143
        if ($mode === 'queue' || $mode === 'exec') {
144
            $reason = new Reason();
145
            $reason->setReason(Reason::REASON_GUI_SUBMIT);
146
            $reason->setDetailText('The cli script of the crawler added to the queue');
147
            EventDispatcher::getInstance()->post(
148
                'invokeQueueChange',
149
                $crawlerController->setID,
150
                ['reason' => $reason]
151
            );
152
        }
153
154
        if ($crawlerController->extensionSettings['cleanUpOldQueueEntries']) {
155
            $crawlerController->cleanUpOldQueueEntries();
156
        }
157
158
        $crawlerController->setID = (int) GeneralUtility::md5int(microtime());
159
        $crawlerController->getPageTreeAndUrls(
160
            $pageId,
161
            MathUtility::forceIntegerInRange($input->getOption('depth'), 0, 99),
162
            $crawlerController->getCurrentTime(),
163
            MathUtility::forceIntegerInRange($input->getOption('number') ?: 30, 1, 1000),
164
            $mode === 'queue' || $mode === 'exec',
165
            $mode === 'url',
166
            [],
167
            $configurationKeys
168
        );
169
170
        if ($mode === 'url') {
171
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->downloadUrls) . PHP_EOL . '</info>');
172
        } elseif ($mode === 'exec') {
173
            $output->writeln('<info>Executing ' . count($crawlerController->urlList) . ' requests right away:</info>');
174
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
175
            $output->writeln('<info>Processing</info>' . PHP_EOL);
176
177
            foreach ($crawlerController->queueEntries as $queueRec) {
178
                $p = unserialize($queueRec['parameters']);
179
                $output->writeln('<info>' . $p['url'] . ' (' . implode(',', $p['procInstructions']) . ') => ' . '</info>' . PHP_EOL);
180
                $result = $crawlerController->readUrlFromArray($queueRec);
181
182
                $requestResult = unserialize($result['content']);
183
                if (is_array($requestResult)) {
184
                    $resLog = is_array($requestResult['log']) ? PHP_EOL . chr(9) . chr(9) . implode(PHP_EOL . chr(9) . chr(9), $requestResult['log']) : '';
185
                    $output->writeln('<info>OK: ' . $resLog . '</info>' . PHP_EOL);
186
                } else {
187
                    $output->writeln('<errror>Error checking Crawler Result:  ' . substr(preg_replace('/\s+/', ' ', strip_tags($result['content'])), 0, 30000) . '...' . PHP_EOL . '</errror>' . PHP_EOL);
188
                }
189
            }
190
        } elseif ($mode === 'queue') {
191
            $output->writeln('<info>Putting ' . count($crawlerController->urlList) . ' entries in queue:</info>' . PHP_EOL);
192
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
193
        } else {
194
            $output->writeln('<info>' . count($crawlerController->urlList) . ' entries found for processing. (Use "mode" to decide action):</info>' . PHP_EOL);
195
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
196
        }
197
    }
198
199
    /**
200
     * Obtains configuration keys from the CLI arguments
201
     *
202
     * @param $conf string
203
     * @return array
204
     */
205
    private function getConfigurationKeys($conf)
206
    {
207
        $parameter = trim($conf);
208
        return ($parameter != '' ? GeneralUtility::trimExplode(',', $parameter) : []);
209
    }
210
}
211