Completed
Push — typo3v9 ( 46f78b...efe784 )
by Tomas Norre
12:27 queued 03:44
created

ProcessService   A

Complexity

Total Complexity 32

Size/Duplication

Total Lines 208
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Test Coverage

Coverage 2.36%

Importance

Changes 0
Metric Value
dl 0
loc 208
ccs 3
cts 127
cp 0.0236
rs 9.84
c 0
b 0
f 0
wmc 32
lcom 1
cbo 4

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 10 1
C multiProcess() 0 46 13
A reportItemStatus() 0 4 1
B startRequiredProcesses() 0 27 8
A startProcess() 0 25 5
A getCrawlerCliPath() 0 23 4
1
<?php
2
namespace AOE\Crawler\Service;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Controller\CrawlerController;
29
use AOE\Crawler\Domain\Repository\ProcessRepository;
30
use AOE\Crawler\Domain\Repository\QueueRepository;
31
use TYPO3\CMS\Core\Utility\CommandUtility;
32
use TYPO3\CMS\Core\Utility\GeneralUtility;
33
34
/**
35
 * Class ProcessService
36
 *
37
 * @package AOE\Crawler\Service
38
 */
39
class ProcessService
40
{
41
    /**
42
     * @var $timeToLive integer
43
     */
44
    private $timeToLive;
45
46
    /**
47
     * @var integer
48
     */
49
    private $countInARun;
50
51
    /**
52
     * @var integer
53
     */
54
    private $processLimit;
55
56
    /**
57
     * @var CrawlerController
58
     */
59
    private $crawlerController;
60
61
    /**
62
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
63
     */
64
    private $queueRepository;
65
66
    /**
67
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
68
     */
69
    private $processRepository;
70
71
    /**
72
     * @var $verbose boolean
73
     */
74
    private $verbose;
75
76
    /**
77
     * the constructor
78
     */
79
    public function __construct()
80
    {
81
        $this->processRepository = new ProcessRepository();
82
        $this->queueRepository = new QueueRepository();
83
        $this->crawlerController = GeneralUtility::makeInstance(CrawlerController::class);
84
        $this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']);
85
        $this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']);
86
        $this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']);
87
        $this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']);
88
    }
89
90
    /**
91
     * starts multiple processes
92
     *
93
     * @param integer $timeout
94
     *
95
     * @throws \RuntimeException
96
     */
97 1
    public function multiProcess($timeout)
98
    {
99 1
        if ($this->processLimit <= 1) {
100 1
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
101
        }
102
103
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
104
        $itemReportLimit = 20;
105
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
106
        if ($this->verbose) {
107
            $this->reportItemStatus();
108
        }
109
        $this->startRequiredProcesses();
110
        $nextTimeOut = time() + $this->timeToLive;
111
        $currentPendingItems = '';
112
        for ($i = 0; $i < $timeout; $i++) {
113
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
114
            if ($this->startRequiredProcesses()) {
115
                $nextTimeOut = time() + $this->timeToLive;
116
            }
117
            if ($currentPendingItems == 0) {
118
                if ($this->verbose) {
119
                    echo 'Finished...' . chr(10);
120
                }
121
                break;
122
            }
123
            if ($currentPendingItems < $reportItemCount) {
124
                if ($this->verbose) {
125
                    $this->reportItemStatus();
126
                }
127
                $reportItemCount = $currentPendingItems - $itemReportLimit;
128
            }
129
            sleep(1);
130
            if ($nextTimeOut < time()) {
131
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
0 ignored issues
show
Unused Code introduced by
The call to ProcessRepository::findAll() has too many arguments starting with ''.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
132
                $nextTimeOut = time() + $this->timeToLive;
133
                if ($this->verbose) {
134
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
135
                }
136
                $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
137
            }
138
        }
139
        if ($currentPendingItems > 0 && $this->verbose) {
140
            echo 'Stop with timeout' . chr(10);
141
        }
142
    }
143
144
    /**
145
     * Reports curent Status of queue
146
     */
147
    protected function reportItemStatus()
148
    {
149
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
150
    }
151
152
    /**
153
     * according to the given count of pending items and the countInARun Setting this method
154
     * starts more crawling processes
155
     *
156
     * @return boolean if processes are started
157
     * @throws \Exception
158
     *
159
     */
160
    private function startRequiredProcesses()
161
    {
162
        $ret = false;
163
        $currentProcesses = $this->processRepository->countActive();
164
        $availableProcessesCount = $this->processLimit - $currentProcesses;
165
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
166
        $startProcessCount = min([$availableProcessesCount, $requiredProcessesCount]);
167
        if ($startProcessCount <= 0) {
168
            return $ret;
169
        }
170
        if ($startProcessCount && $this->verbose) {
171
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
172
        }
173
        for ($i = 0; $i < $startProcessCount; $i++) {
174
            usleep(100);
175
            if ($this->startProcess()) {
176
                if ($this->verbose) {
177
                    echo '.';
178
                    $ret = true;
179
                }
180
            }
181
        }
182
        if ($this->verbose) {
183
            echo chr(10);
184
        }
185
        return $ret;
186
    }
187
188
    /**
189
     * starts new process
190
     * @throws \Exception if no crawler process was started
191
     */
192
    public function startProcess()
193
    {
194
        $ttl = (time() + $this->timeToLive - 1);
195
        $current = $this->processRepository->countNotTimeouted($ttl);
196
197
        // Check whether OS is Windows
198
        if (TYPO3_OS === 'WIN') {
199
            $completePath = escapeshellcmd('start ' . $this->getCrawlerCliPath());
200
        } else {
201
            $completePath = '(' . escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null';
202
        }
203
204
        $fileHandler = CommandUtility::exec($completePath);
205
        if ($fileHandler === false) {
206
            throw new \Exception('could not start process!');
207
        } else {
208
            for ($i = 0; $i < 10; $i++) {
209
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
210
                    return true;
211
                }
212
                sleep(1);
213
            }
214
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
215
        }
216
    }
217
218
    /**
219
     * Returns the path to start the crawler from the command line
220
     *
221
     * @return string
222
     */
223
    public function getCrawlerCliPath()
224
    {
225
        $composerRootDir = getenv('TYPO3_PATH_COMPOSER_ROOT') . '/';
226
        $jsonDecoded = json_decode(file_get_contents($composerRootDir . 'composer.json'), true);
227
228
        if (isset($jsonDecoded['config']['bin-dir'])) {
229
            $binDir = $jsonDecoded['config']['bin-dir'];
230
        } elseif (isset($jsonDecoded['config']['vendor-dir'])) {
231
            $binDir = $jsonDecoded['config']['vendor-dir'] . '/bin';
232
        } else {
233
            $binDir = 'vendor/bin';
234
        }
235
236
        $phpPath = $this->crawlerController->extensionSettings['phpPath'] . ' ';
237
        $cliPart = '/typo3cms crawler:crawlqueue';
238
        $scriptPath = $phpPath . $composerRootDir . $binDir . $cliPart;
239
240
        if (TYPO3_OS === 'WIN') {
241
            $scriptPath = str_replace('/', '\\', $scriptPath);
242
        }
243
244
        return ltrim($scriptPath);
245
    }
246
}
247