Completed
Push — typo3v9 ( 43e1a1...658720 )
by Tomas Norre
06:14
created

ProcessService::getCrawlerCliPath()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
nc 4
nop 0
dl 0
loc 16
ccs 0
cts 13
cp 0
crap 12
rs 9.7333
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Service;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Controller\CrawlerController;
29
use AOE\Crawler\Domain\Repository\ProcessRepository;
30
use AOE\Crawler\Domain\Repository\QueueRepository;
31
use TYPO3\CMS\Core\Utility\GeneralUtility;
32
33
/**
34
 * Class ProcessService
35
 *
36
 * @package AOE\Crawler\Service
37
 */
38
class ProcessService
39
{
40
    /**
41
     * @var $timeToLive integer
42
     */
43
    private $timeToLive;
44
45
    /**
46
     * @var integer
47
     */
48
    private $countInARun;
49
50
    /**
51
     * @var integer
52
     */
53
    private $processLimit;
54
55
    /**
56
     * @var CrawlerController
57
     */
58
    private $crawlerController;
59
60
    /**
61
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
62
     */
63
    private $queueRepository;
64
65
    /**
66
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
67
     */
68
    private $processRepository;
69
70
    /**
71
     * @var $verbose boolean
72
     */
73
    private $verbose;
74
75
    /**
76
     * the constructor
77
     */
78
    public function __construct()
79
    {
80
        $this->processRepository = new ProcessRepository();
81
        $this->queueRepository = new QueueRepository();
82
        $this->crawlerController = GeneralUtility::makeInstance(CrawlerController::class);
83
        $this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']);
84
        $this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']);
85
        $this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']);
86
        $this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']);
87
    }
88
89
    /**
90
     * starts multiple processes
91
     *
92
     * @param integer $timeout
93
     *
94
     * @throws \RuntimeException
95
     */
96 1
    public function multiProcess($timeout)
97
    {
98 1
        if ($this->processLimit <= 1) {
99 1
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
100
        }
101
102
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
103
        $itemReportLimit = 20;
104
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
105
        if ($this->verbose) {
106
            $this->reportItemStatus();
107
        }
108
        $this->startRequiredProcesses();
109
        $nextTimeOut = time() + $this->timeToLive;
110
        $currentPendingItems = '';
111
        for ($i = 0; $i < $timeout; $i++) {
112
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
113
            if ($this->startRequiredProcesses()) {
114
                $nextTimeOut = time() + $this->timeToLive;
115
            }
116
            if ($currentPendingItems == 0) {
117
                if ($this->verbose) {
118
                    echo 'Finished...' . chr(10);
119
                }
120
                break;
121
            }
122
            if ($currentPendingItems < $reportItemCount) {
123
                if ($this->verbose) {
124
                    $this->reportItemStatus();
125
                }
126
                $reportItemCount = $currentPendingItems - $itemReportLimit;
127
            }
128
            sleep(1);
129
            if ($nextTimeOut < time()) {
130
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
0 ignored issues
show
Unused Code introduced by
The call to ProcessRepository::findAll() has too many arguments starting with ''.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
131
                $nextTimeOut = time() + $this->timeToLive;
132
                if ($this->verbose) {
133
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
134
                }
135
                $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
136
            }
137
        }
138
        if ($currentPendingItems > 0 && $this->verbose) {
139
            echo 'Stop with timeout' . chr(10);
140
        }
141
    }
142
143
    /**
144
     * Reports curent Status of queue
145
     */
146
    protected function reportItemStatus()
147
    {
148
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
149
    }
150
151
    /**
152
     * according to the given count of pending items and the countInARun Setting this method
153
     * starts more crawling processes
154
     *
155
     * @throws \Exception
156
     *
157
     * @return boolean if processes are started
158
     */
159
    private function startRequiredProcesses()
160
    {
161
        $ret = false;
162
        $currentProcesses = $this->processRepository->countActive();
163
        $availableProcessesCount = $this->processLimit - $currentProcesses;
164
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
165
        $startProcessCount = min([$availableProcessesCount,$requiredProcessesCount]);
166
        if ($startProcessCount <= 0) {
167
            return $ret;
168
        }
169
        if ($startProcessCount && $this->verbose) {
170
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
171
        }
172
        for ($i = 0;$i < $startProcessCount;$i++) {
173
            usleep(100);
174
            if ($this->startProcess()) {
175
                if ($this->verbose) {
176
                    echo '.';
177
                    $ret = true;
178
                }
179
            }
180
        }
181
        if ($this->verbose) {
182
            echo chr(10);
183
        }
184
        return $ret;
185
    }
186
187
    /**
188
     * starts new process
189
     * @throws \Exception if no crawler process was started
190
     */
191
    public function startProcess()
192
    {
193
        $ttl = (time() + $this->timeToLive - 1);
194
        $current = $this->processRepository->countNotTimeouted($ttl);
195
196
        // Check whether OS is Windows
197
        if (TYPO3_OS === 'WIN') {
198
            $completePath = escapeshellcmd('start ' . $this->getCrawlerCliPath());
199
        } else {
200
            $completePath = '(' . escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null';
201
        }
202
203
        $fileHandler = system($completePath);
204
        if ($fileHandler === false) {
205
            throw new \Exception('could not start process!');
206
        } else {
207
            for ($i = 0;$i < 10;$i++) {
208
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
209
                    return true;
210
                }
211
                sleep(1);
212
            }
213
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
214
        }
215
    }
216
217
    /**
218
     * Returns the path to start the crawler from the command line
219
     *
220
     * @return string
221
     */
222
    public function getCrawlerCliPath()
223
    {
224
        $composerRootDir = getenv('TYPO3_PATH_COMPOSER_ROOT') . '/';
225
        $jsonDecoded = json_decode(file_get_contents($composerRootDir . 'composer.json'), true);
226
        $binDir = $jsonDecoded['config']['bin-dir'] ?: 'vendor/bin';
227
228
        $phpPath = $this->crawlerController->extensionSettings['phpPath'] . ' ';
229
        $cliPart = '/typo3cms crawler:crawlqueue';
230
        $scriptPath = $phpPath . $composerRootDir . $binDir . $cliPart;
231
232
        if (TYPO3_OS === 'WIN') {
233
            $scriptPath = str_replace('/', '\\', $scriptPath);
234
        }
235
236
        return ltrim($scriptPath);
237
    }
238
}
239