Passed
Push — main ( d42fc3...d75d73 )
by Tomas Norre
30:18 queued 25:31
created

ProcessQueueCommand::getQueueRepository()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1.037

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 3
cp 0.6667
crap 1.037
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Command;
6
7
/***************************************************************
8
 *  Copyright notice
9
 *
10
 *  (c) 2019 AOE GmbH <[email protected]>
11
 *
12
 *  All rights reserved
13
 *
14
 *  This script is part of the TYPO3 project. The TYPO3 project is
15
 *  free software; you can redistribute it and/or modify
16
 *  it under the terms of the GNU General Public License as published by
17
 *  the Free Software Foundation; either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  The GNU General Public License can be found at
21
 *  http://www.gnu.org/copyleft/gpl.html.
22
 *
23
 *  This script is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26
 *  GNU General Public License for more details.
27
 *
28
 *  This copyright notice MUST APPEAR in all copies of the script!
29
 ***************************************************************/
30
31
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
32
use AOE\Crawler\Controller\CrawlerController;
33
use AOE\Crawler\Crawler;
34
use AOE\Crawler\Domain\Model\Process;
35
use AOE\Crawler\Domain\Repository\ProcessRepository;
36
use AOE\Crawler\Domain\Repository\QueueRepository;
37
use Symfony\Component\Console\Command\Command;
38
use Symfony\Component\Console\Input\InputInterface;
39
use Symfony\Component\Console\Input\InputOption;
40
use Symfony\Component\Console\Output\OutputInterface;
41
use TYPO3\CMS\Core\Utility\GeneralUtility;
42
use TYPO3\CMS\Extbase\Object\ObjectManager;
43
44
class ProcessQueueCommand extends Command
45
{
46
    /**
47
     * @deprecated since 9.2.5 will be made private in v11.x
48
     */
49
    public const CLI_STATUS_NOTHING_PROCCESSED = 0;
50
51
    /**
52
     * queue not empty
53
     * @deprecated since 9.2.5 will be made private in v11.x
54
     */
55
    public const CLI_STATUS_REMAIN = 1;
56
57
    /**
58
     * (some) queue items where processed
59
     * @deprecated since 9.2.5 will be made private in v11.x
60
     */
61
    public const CLI_STATUS_PROCESSED = 2;
62
63
    /**
64
     * instance didn't finish
65
     * @deprecated since 9.2.5 will be made private in v11.x
66
     */
67
    public const CLI_STATUS_ABORTED = 4;
68
69
    /**
70
     * @deprecated since 9.2.5 will be made private in v11.x
71
     */
72
    public const CLI_STATUS_POLLABLE_PROCESSED = 8;
73
74
    /**
75
     * @var Crawler
76
     */
77
    private $crawler;
78
79
    /**
80
     * @var CrawlerController
81
     */
82
    private $crawlerController;
83
84
    /**
85
     * @var ProcessRepository
86
     */
87
    private $processRepository;
88
89
    /**
90
     * @var QueueRepository
91
     */
92
    private $queueRepository;
93
94
    /**
95
     * @var string
96
     */
97
    private $processId;
98
99
    /**
100
     * @var array
101
     */
102
    private $extensionSettings;
103
104
    /**
105
     * Crawler Command - Crawling the URLs from the queue
106
     *
107
     * Examples:
108
     *
109
     * --- Will trigger the crawler which starts to process the queue entries
110
     * $ typo3 crawler:crawlQueue
111
     *
112
     * @return int
113
     */
114 2
    public function execute(InputInterface $input, OutputInterface $output)
115
    {
116 2
        $amount = $input->getOption('amount');
117 2
        $sleeptime = $input->getOption('sleeptime');
118 2
        $sleepafter = $input->getOption('sleepafter');
119
120 2
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
121 2
        $this->extensionSettings = $this->getExtensionSettings();
122
123 2
        $result = self::CLI_STATUS_NOTHING_PROCCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...ATUS_NOTHING_PROCCESSED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

123
        $result = /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_NOTHING_PROCCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
124
125
        /** @var QueueRepository $queueRepository */
126 2
        $queueRepository = $objectManager->get(QueueRepository::class);
127
        /** @var ProcessRepository $processRepository */
128 2
        $processRepository = $objectManager->get(ProcessRepository::class);
129
130
        /** @var Crawler $crawler */
131 2
        $crawler = GeneralUtility::makeInstance(Crawler::class);
132
133 2
        if (! $crawler->isDisabled() && $this->checkAndAcquireNewProcess($this->getProcessId())) {
134 2
            $countInARun = $amount ? (int) $amount : (int) $this->extensionSettings['countInARun'];
135 2
            $sleepAfterFinish = $sleepafter ? (int) $sleepafter : (int) $this->extensionSettings['sleepAfterFinish'];
136 2
            $sleepTime = $sleeptime ? (int) $sleeptime : (int) $this->extensionSettings['sleepTime'];
137
138
            try {
139
                // Run process:
140 2
                $result = $this->runProcess($countInARun, $sleepTime, $sleepAfterFinish);
141
            } catch (\Throwable $e) {
142
                $output->writeln('<warning>' . get_class($e) . ': ' . $e->getMessage() . '</warning>');
143
                $result = self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

143
                $result = /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
144
            }
145
146
            // Cleanup
147 2
            $processRepository->deleteProcessesWithoutItemsAssigned();
148 2
            $processRepository->markRequestedProcessesAsNotActive([$this->getProcessId()]);
149 2
            $queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries([$this->getProcessId()]);
150
151 2
            $output->writeln('<info>Unprocessed Items remaining:' . count($queueRepository->getUnprocessedItems()) . ' (' . $this->getProcessId() . ')</info>');
152 2
            $result |= (count($queueRepository->getUnprocessedItems()) > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...ATUS_NOTHING_PROCCESSED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

152
            $result |= (count($queueRepository->getUnprocessedItems()) > 0 ? self::CLI_STATUS_REMAIN : /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_NOTHING_PROCCESSED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...mand::CLI_STATUS_REMAIN has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

152
            $result |= (count($queueRepository->getUnprocessedItems()) > 0 ? /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
153
        } else {
154
            $result |= self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

154
            $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
155
        }
156
157 2
        $output->writeln($result);
158 2
        return $result & self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

158
        return $result & /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
159
    }
160
161 2
    protected function configure(): void
162
    {
163 2
        $this->setDescription('Trigger the crawler to process the queue entries');
164
165 2
        $this->setHelp(
166 2
            'Crawler Command - Crawling the URLs from the queue' . chr(10) . chr(10) .
167 2
            '
168
            Examples:
169
              --- Will trigger the crawler which starts to process the queue entries
170
              $ typo3 crawler:processqueue --amount 15 --sleepafter 5 --sleeptime 2
171
            '
172
        );
173 2
        $this->addOption(
174 2
            'amount',
175 2
            '',
176 2
            InputOption::VALUE_OPTIONAL,
177 2
            'How many pages should be crawled during that run',
178 2
            '0'
179
        );
180
181 2
        $this->addOption(
182 2
            'sleepafter',
183 2
            '',
184 2
            InputOption::VALUE_OPTIONAL,
185 2
            'Amount of milliseconds which the system should use to relax between crawls',
186 2
            '0'
187
        );
188
189 2
        $this->addOption(
190 2
            'sleeptime',
191 2
            '',
192 2
            InputOption::VALUE_OPTIONAL,
193 2
            'Amount of seconds which the system should use to relax after all crawls are done.'
194
        );
195 2
    }
196
197
    /**
198
     * Running the functionality of the CLI (crawling URLs from queue)
199
     */
200 2
    private function runProcess(int $countInARun, int $sleepTime, int $sleepAfterFinish): int
201
    {
202 2
        $result = 0;
203 2
        $counter = 0;
204
205
        // First, run hooks:
206 2
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
207 2
            trigger_error(
208 2
                'This hook (crawler/cli_hooks) is deprecated since 9.1.5 and will be removed when dropping support for TYPO3 9LTS and 10LTS',
209 2
                E_USER_DEPRECATED
210
            );
211 2
            $hookObj = GeneralUtility::makeInstance($objRef);
212 2
            if (is_object($hookObj)) {
213 2
                $hookObj->crawler_init($this->getCrawlerController());
214
            }
215
        }
216
217
        // Clean up the queue
218 2
        $this->getQueueRepository()->cleanupQueue();
219
220
        // Select entries:
221 2
        $records = $this->getQueueRepository()->fetchRecordsToBeCrawled($countInARun);
222
223 2
        if (! empty($records)) {
224 2
            $quidList = [];
225
226 2
            foreach ($records as $record) {
227 2
                $quidList[] = $record['qid'];
228
            }
229
230 2
            $processId = $this->getProcessId();
231
232
            //save the number of assigned queue entries to determine how many have been processed later
233 2
            $numberOfAffectedRows = $this->getQueueRepository()->updateProcessIdAndSchedulerForQueueIds($quidList, $processId);
234 2
            $this->getProcessRepository()->updateProcessAssignItemsCount($numberOfAffectedRows, $processId);
235
236 2
            if ($numberOfAffectedRows !== count($quidList)) {
237
                return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

237
                return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
238
            }
239
240 2
            foreach ($records as $record) {
241 2
                $result |= $this->getCrawlerController()->readUrl($record['qid']);
242
243 2
                $counter++;
244
                // Just to relax the system
245 2
                usleep($sleepTime);
246
247
                // if during the start and the current read url the cli has been disable we need to return from the function
248
                // mark the process NOT as ended.
249 2
                if ($this->getCrawler()->isDisabled()) {
250
                    return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

250
                    return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
251
                }
252
253 2
                if (! $this->getProcessRepository()->isProcessActive($this->getProcessId())) {
254
                    $result |= self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

254
                    $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
255
                    //possible timeout
256
                    break;
257
                }
258
            }
259
260 2
            sleep($sleepAfterFinish);
261
        }
262
263 2
        if ($counter > 0) {
264 2
            $result |= self::CLI_STATUS_PROCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...d::CLI_STATUS_PROCESSED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

264
            $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_PROCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
265
        }
266
267 2
        return $result;
268
    }
269
270
    /**
271
     * Try to acquire a new process with the given id
272
     * also performs some auto-cleanup for orphan processes
273
     */
274 2
    private function checkAndAcquireNewProcess(string $id): bool
275
    {
276 2
        $returnValue = true;
277
278 2
        $systemProcessId = getmypid();
279 2
        if (! $systemProcessId) {
280
            return false;
281
        }
282
283 2
        $processCount = 0;
284 2
        $orphanProcesses = [];
285
286 2
        $activeProcesses = $this->getProcessRepository()->findAllActive();
287
288
        /** @var Process $process */
289 2
        foreach ($activeProcesses as $process) {
290
            if ($process->getTtl() < time()) {
291
                $orphanProcesses[] = $process->getProcessId();
292
            } else {
293
                $processCount++;
294
            }
295
        }
296
297
        // if there are less than allowed active processes then add a new one
298 2
        if ($processCount < (int) $this->extensionSettings['processLimit']) {
299 2
            $this->getProcessRepository()->addProcess($id, $systemProcessId);
300
        } else {
301
            $returnValue = false;
302
        }
303
304 2
        $this->getProcessRepository()->deleteProcessesMarkedAsDeleted();
305 2
        $this->getProcessRepository()->markRequestedProcessesAsNotActive($orphanProcesses);
306 2
        $this->getQueueRepository()->unsetProcessScheduledAndProcessIdForQueueEntries($orphanProcesses);
307
308 2
        return $returnValue;
309
    }
310
311
    /**
312
     * Create a unique Id for the current process
313
     */
314 2
    private function getProcessId(): string
315
    {
316 2
        if (! $this->processId) {
317 2
            $this->processId = GeneralUtility::shortMD5(microtime(true));
318
        }
319 2
        return $this->processId;
320
    }
321
322 2
    private function getCrawler(): Crawler
323
    {
324 2
        return $this->crawler ?? new Crawler();
325
    }
326
327 2
    private function getCrawlerController(): CrawlerController
328
    {
329 2
        return $this->crawlerController ?? GeneralUtility::makeInstance(CrawlerController::class);
330
    }
331
332 2
    private function getProcessRepository(): ProcessRepository
333
    {
334 2
        return $this->processRepository ?? GeneralUtility::makeInstance(ProcessRepository::class);
335
    }
336
337 2
    private function getQueueRepository(): QueueRepository
338
    {
339 2
        return $this->queueRepository ?? GeneralUtility::makeInstance(QueueRepository::class);
340
    }
341
342 2
    private function getExtensionSettings(): array
343
    {
344 2
        return GeneralUtility::makeInstance(ExtensionConfigurationProvider::class)->getExtensionConfiguration();
345
    }
346
}
347