Completed
Push — typo3v9 ( 0dcd72...98ac46 )
by Tomas Norre
06:11
created

BuildQueueCommand::configure()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 56

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 0
dl 0
loc 56
ccs 0
cts 47
cp 0
crap 2
rs 8.9599
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
declare(strict_types=1);
3
namespace AOE\Crawler\Command;
4
5
/***************************************************************
6
 *  Copyright notice
7
 *
8
 *  (c) 2019 AOE GmbH <[email protected]>
9
 *
10
 *  All rights reserved
11
 *
12
 *  This script is part of the TYPO3 project. The TYPO3 project is
13
 *  free software; you can redistribute it and/or modify
14
 *  it under the terms of the GNU General Public License as published by
15
 *  the Free Software Foundation; either version 3 of the License, or
16
 *  (at your option) any later version.
17
 *
18
 *  The GNU General Public License can be found at
19
 *  http://www.gnu.org/copyleft/gpl.html.
20
 *
21
 *  This script is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU General Public License for more details.
25
 *
26
 *  This copyright notice MUST APPEAR in all copies of the script!
27
 ***************************************************************/
28
29
use AOE\Crawler\Controller\CrawlerController;
30
use AOE\Crawler\Domain\Model\Reason;
31
use AOE\Crawler\Event\EventDispatcher;
32
use Symfony\Component\Console\Input\InputInterface;
33
use Symfony\Component\Console\Input\InputOption;
34
use Symfony\Component\Console\Output\OutputInterface;
35
use Symfony\Component\Console\Command\Command;
36
use TYPO3\CMS\Core\Utility\GeneralUtility;
37
use TYPO3\CMS\Core\Utility\MathUtility;
38
use TYPO3\CMS\Extbase\Object\ObjectManager;
39
40
class BuildQueueCommand extends Command
41
{
42
    protected function configure(): void
43
    {
44
        $this->setHelp(
45
            'Try "typo3 help crawler:flushQueue" to see your options' . chr(10) . chr(10) .
46
            'Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; 
47
It can put entries in the queue from command line options, return the list of URLs and even execute
48
all entries right away without having to queue them up - this can be useful for immediate re-cache,
49
re-indexing or static publishing from command line.' . chr(10) . chr(10) .
50
            '
51
            Examples:
52
              --- Re-cache pages from page 7 and two levels down, executed immediately
53
              $ typo3 crawler:buildQueue --page 7 --depth 2 --conf defaultConfiguration --mode exec
54
             
55
              --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
56
              $ typo3 crawler:buildQueue --page 7 --depth 0 --conf defaultConfiguration --mode queue --number 4
57
            '
58
        );
59
60
        $this->addOption(
61
            'conf',
62
            'c',
63
            InputOption::VALUE_REQUIRED,
64
            'A comma separated list of crawler configurations'
65
        );
66
67
        $this->addOption(
68
            'page',
69
            'p',
70
            InputOption::VALUE_OPTIONAL,
71
            'The page from where the queue building should start',
72
            0
73
        );
74
75
        $this->addOption(
76
            'depth',
77
            'd',
78
            InputOption::VALUE_OPTIONAL,
79
            'Tree depth, 0-99\', "How many levels under the \'page_id\' to include.',
80
            0
81
        );
82
83
        $this->addOption(
84
            'mode',
85
            'm',
86
            InputOption::VALUE_OPTIONAL,
87
            'Specifies output modes url : Will list URLs which wget could use as input. queue: Will put entries in queue table. exec: Will execute all entries right away!'
88
        );
89
90
        $this->addOption(
91
            'number',
92
            '',
93
            InputOption::VALUE_OPTIONAL,
94
            'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"',
95
            0
96
        );
97
    }
98
99
    /**
100
     * Crawler Command - Submitting URLs to be crawled.
101
     *
102
     * Works as a CLI interface to some functionality from the Web > Info > Site Crawler module;
103
     * It can put entries in the queue from command line options, return the list of URLs and even execute
104
     * all entries right away without having to queue them up - this can be useful for immediate re-cache,
105
     * re-indexing or static publishing from command line.
106
     *
107
     * Examples:
108
     *
109
     * --- Re-cache pages from page 7 and two levels down, executed immediately
110
     * $ typo3 crawler:buildQueue --page 7 --depth 2 --conf defaultConfiguration --mode exec
111
112
     *
113
     * --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
114
     * $ typo3 crawler:buildQueue --page 7 --depth 0 --conf defaultConfiguration --mode queue --number 4
115
     *
116
     */
117
    protected function execute(InputInterface $input, OutputInterface $output)
118
    {
119
120
        $mode = $input->getOption('mode');
121
122
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
123
124
        /** @var CrawlerController $crawlerController */
125
        $crawlerController = $objectManager->get(CrawlerController::class);
126
127
        if ( $mode === 'exec') {
128
            $crawlerController->registerQueueEntriesInternallyOnly = true;
129
        }
130
131
        if ($this->request instanceof \TYPO3\CMS\Extbase\Mvc\Cli\Request) {
0 ignored issues
show
Bug introduced by
The class TYPO3\CMS\Extbase\Mvc\Cli\Request does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
132
            $pageId = MathUtility::forceIntegerInRange($input->getOption('startpage'), 0);
133
        } else {
134
            // Crawler is called over Backend
135
            $pageId = 1;
136
        }
137
138
        $configurationKeys = $this->getConfigurationKeys($input->getOption('conf'));
139
140
        if (!is_array($configurationKeys)) {
141
            $configurations = $crawlerController->getUrlsForPageId($pageId);
142
            if (is_array($configurations)) {
143
                $configurationKeys = array_keys($configurations);
144
            } else {
145
                $configurationKeys = [];
146
            }
147
        }
148
149
        if ($mode === 'queue' || $mode === 'exec') {
150
            $reason = new Reason();
151
            $reason->setReason(Reason::REASON_GUI_SUBMIT);
152
            $reason->setDetailText('The cli script of the crawler added to the queue');
153
            EventDispatcher::getInstance()->post(
154
                'invokeQueueChange',
155
                $crawlerController->setID,
156
                ['reason' => $reason]
157
            );
158
        }
159
160
        if ($crawlerController->extensionSettings['cleanUpOldQueueEntries']) {
161
            $crawlerController->cleanUpOldQueueEntries();
162
        }
163
164
        $crawlerController->setID = (int) GeneralUtility::md5int(microtime());
165
        $crawlerController->getPageTreeAndUrls(
166
            $pageId,
167
            MathUtility::forceIntegerInRange($input->getOption('depth'), 0, 99),
168
            $crawlerController->getCurrentTime(),
169
            MathUtility::forceIntegerInRange($input->getOption('number') ?:  30, 1, 1000),
170
            $mode === 'queue' || $mode === 'exec',
171
            $mode === 'url',
172
            [],
173
            $configurationKeys
174
        );
175
176
        if ($mode === 'url') {
177
             $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->downloadUrls) . PHP_EOL . '</info>');
178
        } elseif ($mode === 'exec') {
179
             $output->writeln('<info>Executing ' . count($crawlerController->urlList) . ' requests right away:</info>');
180
             $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
181
             $output->writeln('<info>Processing</info>' . PHP_EOL);
182
183
            foreach ($crawlerController->queueEntries as $queueRec) {
184
                $p = unserialize($queueRec['parameters']);
185
                 $output->writeln('<info>' . $p['url'] . ' (' . implode(',', $p['procInstructions']) . ') => ' . '</info>' . PHP_EOL);
186
                $result = $crawlerController->readUrlFromArray($queueRec);
187
188
                $requestResult = unserialize($result['content']);
189
                if (is_array($requestResult)) {
190
                    $resLog = is_array($requestResult['log']) ? PHP_EOL . chr(9) . chr(9) . implode(PHP_EOL . chr(9) . chr(9), $requestResult['log']) : '';
191
                     $output->writeln('<info>OK: ' . $resLog . '</info>' . PHP_EOL);
192
                } else {
193
                     $output->writeln('<errror>Error checking Crawler Result:  ' . substr(preg_replace('/\s+/', ' ', strip_tags($result['content'])), 0, 30000) . '...' . PHP_EOL . '</errror>' . PHP_EOL);
194
                }
195
            }
196
        } elseif ($mode === 'queue') {
197
             $output->writeln('<info>Putting ' . count($crawlerController->urlList) . ' entries in queue:</info>' . PHP_EOL);
198
             $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
199
        } else {
200
             $output->writeln('<info>' . count($crawlerController->urlList) . ' entries found for processing. (Use "mode" to decide action):</info>' . PHP_EOL);
201
             $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->urlList) . '</info>' . PHP_EOL);
202
        }
203
    }
204
205
    /**
206
     * Obtains configuration keys from the CLI arguments
207
     *
208
     * @param $conf string
209
     * @return array
210
     */
211
    private function getConfigurationKeys($conf)
212
    {
213
        $parameter = trim($conf);
214
        return ($parameter != '' ? GeneralUtility::trimExplode(',', $parameter) : []);
215
    }
216
217
218
}
219