Passed
Push — refactor/move-cli-to-process-c... ( 0c6a3f )
by Tomas Norre
17:30
created

ProcessQueueCommand::checkAndAcquireNewProcess()   A

Complexity

Conditions 5
Paths 7

Size

Total Lines 35
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 20
nc 7
nop 1
dl 0
loc 35
rs 9.2888
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Command;
6
7
/***************************************************************
8
 *  Copyright notice
9
 *
10
 *  (c) 2019 AOE GmbH <[email protected]>
11
 *
12
 *  All rights reserved
13
 *
14
 *  This script is part of the TYPO3 project. The TYPO3 project is
15
 *  free software; you can redistribute it and/or modify
16
 *  it under the terms of the GNU General Public License as published by
17
 *  the Free Software Foundation; either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  The GNU General Public License can be found at
21
 *  http://www.gnu.org/copyleft/gpl.html.
22
 *
23
 *  This script is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26
 *  GNU General Public License for more details.
27
 *
28
 *  This copyright notice MUST APPEAR in all copies of the script!
29
 ***************************************************************/
30
31
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
32
use AOE\Crawler\Controller\CrawlerController;
33
use AOE\Crawler\Crawler;
34
use AOE\Crawler\Domain\Model\Process;
35
use AOE\Crawler\Domain\Repository\ProcessRepository;
36
use AOE\Crawler\Domain\Repository\QueueRepository;
37
use Symfony\Component\Console\Command\Command;
38
use Symfony\Component\Console\Input\InputInterface;
39
use Symfony\Component\Console\Input\InputOption;
40
use Symfony\Component\Console\Output\OutputInterface;
41
use TYPO3\CMS\Core\Utility\GeneralUtility;
42
use TYPO3\CMS\Extbase\Object\ObjectManager;
43
44
class ProcessQueueCommand extends Command
45
{
46
    /**
47
     * @deprecated since 9.2.5 will be made private in v11.x
48
     */
49
    public const CLI_STATUS_NOTHING_PROCCESSED = 0;
50
51
    /**
52
     * queue not empty
53
     * @deprecated since 9.2.5 will be made private in v11.x
54
     */
55
    public const CLI_STATUS_REMAIN = 1;
56
57
    /**
58
     * (some) queue items where processed
59
     * @deprecated since 9.2.5 will be made private in v11.x
60
     */
61
    public const CLI_STATUS_PROCESSED = 2;
62
63
    /**
64
     * instance didn't finish
65
     * @deprecated since 9.2.5 will be made private in v11.x
66
     */
67
    public const CLI_STATUS_ABORTED = 4;
68
69
    /**
70
     * @deprecated since 9.2.5 will be made private in v11.x
71
     */
72
    public const CLI_STATUS_POLLABLE_PROCESSED = 8;
73
74
    /**
75
     * @var Crawler
76
     */
77
    private $crawler;
78
79
    /**
80
     * @var CrawlerController
81
     */
82
    private $crawlerController;
83
84
    /**
85
     * @var ProcessRepository
86
     */
87
    private $processRepository;
88
89
    /**
90
     * @var QueueRepository
91
     */
92
    private $queueRepository;
93
94
    /**
95
     * @var string
96
     */
97
    private $processId;
98
99
    /**
100
     * @var array
101
     */
102
    private $extensionSettings;
103
104
    /**
105
     * Crawler Command - Crawling the URLs from the queue
106
     *
107
     * Examples:
108
     *
109
     * --- Will trigger the crawler which starts to process the queue entries
110
     * $ typo3 crawler:crawlQueue
111
     *
112
     * @return int
113
     */
114
    public function execute(InputInterface $input, OutputInterface $output)
115
    {
116
        $amount = $input->getOption('amount');
117
        $sleeptime = $input->getOption('sleeptime');
118
        $sleepafter = $input->getOption('sleepafter');
119
120
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
121
        $this->extensionSettings = $this->getExtensionSettings();
122
123
        $result = self::CLI_STATUS_NOTHING_PROCCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...ATUS_NOTHING_PROCCESSED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

123
        $result = /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_NOTHING_PROCCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
124
125
        /** @var QueueRepository $queueRepository */
126
        $queueRepository = $objectManager->get(QueueRepository::class);
127
        /** @var ProcessRepository $processRepository */
128
        $processRepository = $objectManager->get(ProcessRepository::class);
129
130
        /** @var Crawler $crawler */
131
        $crawler = GeneralUtility::makeInstance(Crawler::class);
132
133
        if (! $crawler->isDisabled() && $this->checkAndAcquireNewProcess($this->getProcessId())) {
134
            $countInARun = $amount ? (int) $amount : (int) $this->extensionSettings['countInARun'];
135
            $sleepAfterFinish = $sleepafter ? (int) $sleepafter : (int) $this->extensionSettings['sleepAfterFinish'];
136
            $sleepTime = $sleeptime ? (int) $sleeptime : (int) $this->extensionSettings['sleepTime'];
137
138
            try {
139
                // Run process:
140
                $result = $this->runProcess($countInARun, $sleepTime, $sleepAfterFinish);
141
            } catch (\Throwable $e) {
142
                $output->writeln('<warning>' . get_class($e) . ': ' . $e->getMessage() . '</warning>');
143
                $result = self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

143
                $result = /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
144
            }
145
146
            // Cleanup
147
            $processRepository->deleteProcessesWithoutItemsAssigned();
148
            $processRepository->markRequestedProcessesAsNotActive([$this->getProcessId()]);
149
            $queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries([$this->getProcessId()]);
150
151
            $output->writeln('<info>Unprocessed Items remaining:' . count($queueRepository->getUnprocessedItems()) . ' (' . $this->getProcessId() . ')</info>');
152
            $result |= (count($queueRepository->getUnprocessedItems()) > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...ATUS_NOTHING_PROCCESSED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

152
            $result |= (count($queueRepository->getUnprocessedItems()) > 0 ? self::CLI_STATUS_REMAIN : /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_NOTHING_PROCCESSED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...mand::CLI_STATUS_REMAIN has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

152
            $result |= (count($queueRepository->getUnprocessedItems()) > 0 ? /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
153
        } else {
154
            $result |= self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

154
            $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
155
        }
156
157
        $output->writeln($result);
158
        return $result & self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

158
        return $result & /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
159
    }
160
161
    protected function configure(): void
162
    {
163
        $this->setDescription('Trigger the crawler to process the queue entries');
164
165
        $this->setHelp(
166
            'Crawler Command - Crawling the URLs from the queue' . chr(10) . chr(10) .
167
            '
168
            Examples:
169
              --- Will trigger the crawler which starts to process the queue entries
170
              $ typo3 crawler:processqueue --amount 15 --sleepafter 5 --sleeptime 2
171
            '
172
        );
173
        $this->addOption(
174
            'amount',
175
            '',
176
            InputOption::VALUE_OPTIONAL,
177
            'How many pages should be crawled during that run',
178
            '0'
179
        );
180
181
        $this->addOption(
182
            'sleepafter',
183
            '',
184
            InputOption::VALUE_OPTIONAL,
185
            'Amount of milliseconds which the system should use to relax between crawls',
186
            '0'
187
        );
188
189
        $this->addOption(
190
            'sleeptime',
191
            '',
192
            InputOption::VALUE_OPTIONAL,
193
            'Amount of seconds which the system should use to relax after all crawls are done.'
194
        );
195
    }
196
197
    /**
198
     * Running the functionality of the CLI (crawling URLs from queue)
199
     */
200
    private function runProcess(int $countInARun, int $sleepTime, int $sleepAfterFinish): int
201
    {
202
        $result = 0;
203
        $counter = 0;
204
205
        // First, run hooks:
206
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
207
            trigger_error(
208
                'This hook (crawler/cli_hooks) is deprecated since 9.1.5 and will be removed when dropping support for TYPO3 9LTS and 10LTS',
209
                E_USER_DEPRECATED
210
            );
211
            $hookObj = GeneralUtility::makeInstance($objRef);
212
            if (is_object($hookObj)) {
213
                $hookObj->crawler_init($this->getCrawlerController());
214
            }
215
        }
216
217
        // Clean up the queue
218
        $this->getQueueRepository()->cleanupQueue();
219
220
        // Select entries:
221
        $records = $this->getQueueRepository()->fetchRecordsToBeCrawled($countInARun);
222
223
        if (! empty($records)) {
224
            $quidList = [];
225
226
            foreach ($records as $record) {
227
                $quidList[] = $record['qid'];
228
            }
229
230
            $processId = $this->getProcessId();
231
232
            //save the number of assigned queue entries to determine how many have been processed later
233
            $numberOfAffectedRows = $this->getQueueRepository()->updateProcessIdAndSchedulerForQueueIds($quidList, $processId);
234
            $this->getProcessRepository()->updateProcessAssignItemsCount($numberOfAffectedRows, $processId);
235
236
            if ($numberOfAffectedRows !== count($quidList)) {
237
                return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

237
                return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
238
            }
239
240
            foreach ($records as $record) {
241
                $result |= $this->getCrawlerController()->readUrl($record['qid']);
242
243
                $counter++;
244
                // Just to relax the system
245
                usleep($sleepTime);
246
247
                // if during the start and the current read url the cli has been disable we need to return from the function
248
                // mark the process NOT as ended.
249
                if ($this->getCrawler()->isDisabled()) {
250
                    return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

250
                    return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
251
                }
252
253
                if (! $this->getProcessRepository()->isProcessActive($this->getProcessId())) {
254
                    $result |= self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...and::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

254
                    $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
255
                    //possible timeout
256
                    break;
257
                }
258
            }
259
260
            sleep($sleepAfterFinish);
261
        }
262
263
        if ($counter > 0) {
264
            $result |= self::CLI_STATUS_PROCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Command\Proc...d::CLI_STATUS_PROCESSED has been deprecated: since 9.2.5 will be made private in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

264
            $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_PROCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
265
        }
266
267
        return $result;
268
    }
269
270
    /**
271
     * Try to acquire a new process with the given id
272
     * also performs some auto-cleanup for orphan processes
273
     */
274
    private function checkAndAcquireNewProcess(string $id): bool
275
    {
276
        $returnValue = true;
277
278
        $systemProcessId = getmypid();
279
        if (! $systemProcessId) {
280
            return false;
281
        }
282
283
        $processCount = 0;
284
        $orphanProcesses = [];
285
286
        $activeProcesses = $this->getProcessRepository()->findAllActive();
287
288
        /** @var Process $process */
289
        foreach ($activeProcesses as $process) {
290
            if ($process->getTtl() < time()) {
291
                $orphanProcesses[] = $process->getProcessId();
292
            } else {
293
                $processCount++;
294
            }
295
        }
296
297
        // if there are less than allowed active processes then add a new one
298
        if ($processCount < (int) $this->extensionSettings['processLimit']) {
299
            $this->getProcessRepository()->addProcess($id, $systemProcessId);
300
        } else {
301
            $returnValue = false;
302
        }
303
304
        $this->getProcessRepository()->deleteProcessesMarkedAsDeleted();
305
        $this->getProcessRepository()->markRequestedProcessesAsNotActive($orphanProcesses);
306
        $this->getQueueRepository()->unsetProcessScheduledAndProcessIdForQueueEntries($orphanProcesses);
307
308
        return $returnValue;
309
    }
310
311
    /**
312
     * Create a unique Id for the current process
313
     */
314
    private function getProcessId(): string
315
    {
316
        if (! $this->processId) {
317
            $this->processId = GeneralUtility::shortMD5(microtime(true));
318
        }
319
        return $this->processId;
320
    }
321
322
    private function getCrawler(): Crawler
323
    {
324
        return $this->crawler ?? new Crawler();
325
    }
326
327
    private function getCrawlerController(): CrawlerController
328
    {
329
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
330
        return $this->crawlerController ?? $objectManager->get(CrawlerController::class);
331
    }
332
333
    private function getProcessRepository(): ProcessRepository
334
    {
335
        return $this->processRepository ?? new ProcessRepository();
336
    }
337
338
    private function getQueueRepository(): QueueRepository
339
    {
340
        return $this->queueRepository ?? new QueueRepository();
341
    }
342
343
    private function getExtensionSettings(): array
344
    {
345
        return GeneralUtility::makeInstance(ExtensionConfigurationProvider::class)->getExtensionConfiguration();
346
    }
347
}
348