Passed
Push — typo3v9 ( 2404ee...b9b5fa )
by Tomas Norre
05:51
created

ProcessService::startProcess()   A

Complexity

Conditions 5
Paths 8

Size

Total Lines 23
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 30

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 5
eloc 15
c 2
b 0
f 0
nc 8
nop 0
dl 0
loc 23
ccs 0
cts 20
cp 0
crap 30
rs 9.4555
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Service;
6
7
/***************************************************************
8
 *  Copyright notice
9
 *
10
 *  (c) 2017 AOE GmbH <[email protected]>
11
 *
12
 *  All rights reserved
13
 *
14
 *  This script is part of the TYPO3 project. The TYPO3 project is
15
 *  free software; you can redistribute it and/or modify
16
 *  it under the terms of the GNU General Public License as published by
17
 *  the Free Software Foundation; either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  The GNU General Public License can be found at
21
 *  http://www.gnu.org/copyleft/gpl.html.
22
 *
23
 *  This script is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU General Public License for more details.
27
 *
28
 *  This copyright notice MUST APPEAR in all copies of the script!
29
 ***************************************************************/
30
31
use AOE\Crawler\Controller\CrawlerController;
32
use AOE\Crawler\Domain\Repository\ProcessRepository;
33
use AOE\Crawler\Domain\Repository\QueueRepository;
34
use AOE\Crawler\Utility\PhpBinaryUtility;
35
use TYPO3\CMS\Core\Core\Environment;
36
use TYPO3\CMS\Core\Utility\CommandUtility;
37
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
38
use TYPO3\CMS\Core\Utility\GeneralUtility;
39
use TYPO3\CMS\Extbase\Object\ObjectManager;
40
41
/**
42
 * Class ProcessService
43
 *
44
 * @package AOE\Crawler\Service
45
 */
46
class ProcessService
47
{
48
    /**
49
     * @var $timeToLive integer
0 ignored issues
show
Documentation Bug introduced by
The doc comment $timeToLive at position 0 could not be parsed: Unknown type name '$timeToLive' at position 0 in $timeToLive.
Loading history...
50
     */
51
    private $timeToLive;
52
53
    /**
54
     * @var integer
55
     */
56
    private $countInARun;
57
58
    /**
59
     * @var integer
60
     */
61
    private $processLimit;
62
63
    /**
64
     * @var CrawlerController
65
     */
66
    private $crawlerController;
67
68
    /**
69
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
70
     */
71
    private $queueRepository;
72
73
    /**
74
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
75
     */
76
    private $processRepository;
77
78
    /**
79
     * @var $verbose boolean
0 ignored issues
show
Documentation Bug introduced by
The doc comment $verbose at position 0 could not be parsed: Unknown type name '$verbose' at position 0 in $verbose.
Loading history...
80
     */
81
    private $verbose;
82
83
    /**
84
     * the constructor
85
     */
86
    public function __construct()
87
    {
88
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
89
        $this->processRepository = $objectManager->get(ProcessRepository::class);
90
        $this->queueRepository = $objectManager->get(QueueRepository::class);
91
        $this->crawlerController = $objectManager->get(CrawlerController::class);
92
        $this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']);
93
        $this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']);
94
        $this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']);
95
        $this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']);
96
    }
97
98
    /**
99
     * starts multiple processes
100
     *
101
     * @param integer $timeout
102
     *
103
     * @throws \RuntimeException
104
     */
105 1
    public function multiProcess($timeout): void
106
    {
107 1
        if ($this->processLimit <= 1) {
108 1
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
109
        }
110
111
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
112
        $itemReportLimit = 20;
113
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
114
        if ($this->verbose) {
115
            $this->reportItemStatus();
116
        }
117
        $this->startRequiredProcesses();
118
        $nextTimeOut = time() + $this->timeToLive;
119
        $currentPendingItems = '';
120
        for ($i = 0; $i < $timeout; $i++) {
121
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
122
            if ($this->startRequiredProcesses()) {
123
                $nextTimeOut = time() + $this->timeToLive;
124
            }
125
            if ($currentPendingItems == 0) {
126
                if ($this->verbose) {
127
                    echo 'Finished...' . chr(10);
128
                }
129
                break;
130
            }
131
            if ($currentPendingItems < $reportItemCount) {
132
                if ($this->verbose) {
133
                    $this->reportItemStatus();
134
                }
135
                $reportItemCount = $currentPendingItems - $itemReportLimit;
136
            }
137
            sleep(1);
138
            if ($nextTimeOut < time()) {
139
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
0 ignored issues
show
Unused Code introduced by
The call to AOE\Crawler\Domain\Repos...ssRepository::findAll() has too many arguments starting with ''. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

139
                /** @scrutinizer ignore-call */ 
140
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
140
                $nextTimeOut = time() + $this->timeToLive;
141
                if ($this->verbose) {
142
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
143
                }
144
                $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
145
            }
146
        }
147
        if ($currentPendingItems > 0 && $this->verbose) {
148
            echo 'Stop with timeout' . chr(10);
149
        }
150
    }
151
152
    /**
153
     * Reports curent Status of queue
154
     */
155
    protected function reportItemStatus(): void
156
    {
157
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
158
    }
159
160
    /**
161
     * according to the given count of pending items and the countInARun Setting this method
162
     * starts more crawling processes
163
     *
164
     * @return boolean if processes are started
165
     * @throws \Exception
166
     *
167
     */
168
    private function startRequiredProcesses()
169
    {
170
        $ret = false;
171
        $currentProcesses = $this->processRepository->countActive();
172
        $availableProcessesCount = $this->processLimit - $currentProcesses;
173
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
174
        $startProcessCount = min([$availableProcessesCount, $requiredProcessesCount]);
175
        if ($startProcessCount <= 0) {
176
            return $ret;
177
        }
178
        if ($startProcessCount && $this->verbose) {
179
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
180
        }
181
        for ($i = 0; $i < $startProcessCount; $i++) {
182
            usleep(100);
183
            if ($this->startProcess()) {
184
                if ($this->verbose) {
185
                    echo '.';
186
                    $ret = true;
187
                }
188
            }
189
        }
190
        if ($this->verbose) {
191
            echo chr(10);
192
        }
193
        return $ret;
194
    }
195
196
    /**
197
     * starts new process
198
     * @throws \Exception if no crawler process was started
199
     */
200
    public function startProcess()
201
    {
202
        $ttl = (time() + $this->timeToLive - 1);
203
        $current = $this->processRepository->countNotTimeouted($ttl);
204
205
        // Check whether OS is Windows
206
        if (Environment::isWindows()) {
207
            $completePath = 'start ' . $this->getCrawlerCliPath();
208
        } else {
209
            $completePath = '(' . $this->getCrawlerCliPath() . ' &) > /dev/null';
210
        }
211
212
        $fileHandler = CommandUtility::exec($completePath);
213
        if ($fileHandler === false) {
214
            throw new \Exception('could not start process!');
215
        } else {
216
            for ($i = 0; $i < 10; $i++) {
217
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
218
                    return true;
219
                }
220
                sleep(1);
221
            }
222
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
223
        }
224
    }
225
226
    /**
227
     * Returns the path to start the crawler from the command line
228
     *
229
     * @return string
230
     */
231 2
    public function getCrawlerCliPath(): string
232
    {
233 2
        $phpPath = PhpBinaryUtility::getPhpBinary();
234 1
        $typo3BinaryPath = ExtensionManagementUtility::extPath('core') . 'bin/';
235 1
        $cliPart = 'typo3 crawler:processQueue';
236
        // Don't like the spacing, but don't have an better idea for now
237 1
        $scriptPath = $phpPath . ' ' . $typo3BinaryPath . $cliPart;
238
239 1
        if (Environment::isWindows()) {
240
            $scriptPath = str_replace('/', '\\', $scriptPath);
241
        }
242
243 1
        return ltrim($scriptPath);
244
    }
245
}
246