Completed
Push — issue/450 ( ad4603...cd42c5 )
by Tomas Norre
05:34 queued 34s
created

ProcessService::startProcess()   A

Complexity

Conditions 5
Paths 8

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 30

Importance

Changes 0
Metric Value
cc 5
nc 8
nop 0
dl 0
loc 25
rs 9.2088
c 0
b 0
f 0
ccs 0
cts 22
cp 0
crap 30
1
<?php
2
namespace AOE\Crawler\Service;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Controller\CrawlerController;
29
use AOE\Crawler\Domain\Repository\ProcessRepository;
30
use AOE\Crawler\Domain\Repository\QueueRepository;
31
use TYPO3\CMS\Core\Utility\CommandUtility;
32
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
33
use TYPO3\CMS\Core\Utility\GeneralUtility;
34
use TYPO3\CMS\Extbase\Object\ObjectManager;
35
36
/**
37
 * Class ProcessService
38
 *
39
 * @package AOE\Crawler\Service
40
 */
41
class ProcessService
42
{
43
    /**
44
     * @var $timeToLive integer
45
     */
46
    private $timeToLive;
47
48
    /**
49
     * @var integer
50
     */
51
    private $countInARun;
52
53
    /**
54
     * @var integer
55
     */
56
    private $processLimit;
57
58
    /**
59
     * @var CrawlerController
60
     */
61
    private $crawlerController;
62
63
    /**
64
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
65
     */
66
    private $queueRepository;
67
68
    /**
69
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
70
     */
71
    private $processRepository;
72
73
    /**
74
     * @var $verbose boolean
75
     */
76
    private $verbose;
77
78
    /**
79
     * the constructor
80
     */
81
    public function __construct()
82
    {
83
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
84
        $this->processRepository = $objectManager->get(ProcessRepository::class);
85
        $this->queueRepository = $objectManager->get(QueueRepository::class);
86
        $this->crawlerController = $objectManager->get(CrawlerController::class);
87
        $this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']);
88
        $this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']);
89
        $this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']);
90
        $this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']);
91
    }
92
93
    /**
94
     * starts multiple processes
95
     *
96
     * @param integer $timeout
97
     *
98
     * @throws \RuntimeException
99
     */
100 1
    public function multiProcess($timeout)
101
    {
102 1
        if ($this->processLimit <= 1) {
103 1
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
104
        }
105
106
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
107
        $itemReportLimit = 20;
108
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
109
        if ($this->verbose) {
110
            $this->reportItemStatus();
111
        }
112
        $this->startRequiredProcesses();
113
        $nextTimeOut = time() + $this->timeToLive;
114
        $currentPendingItems = '';
115
        for ($i = 0; $i < $timeout; $i++) {
116
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
117
            if ($this->startRequiredProcesses()) {
118
                $nextTimeOut = time() + $this->timeToLive;
119
            }
120
            if ($currentPendingItems == 0) {
121
                if ($this->verbose) {
122
                    echo 'Finished...' . chr(10);
123
                }
124
                break;
125
            }
126
            if ($currentPendingItems < $reportItemCount) {
127
                if ($this->verbose) {
128
                    $this->reportItemStatus();
129
                }
130
                $reportItemCount = $currentPendingItems - $itemReportLimit;
131
            }
132
            sleep(1);
133
            if ($nextTimeOut < time()) {
134
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
135
                $nextTimeOut = time() + $this->timeToLive;
136
                if ($this->verbose) {
137
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
138
                }
139
                $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
140
            }
141
        }
142
        if ($currentPendingItems > 0 && $this->verbose) {
143
            echo 'Stop with timeout' . chr(10);
144
        }
145
    }
146
147
    /**
148
     * Reports curent Status of queue
149
     */
150
    protected function reportItemStatus()
151
    {
152
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
153
    }
154
155
    /**
156
     * according to the given count of pending items and the countInARun Setting this method
157
     * starts more crawling processes
158
     *
159
     * @return boolean if processes are started
160
     * @throws \Exception
161
     *
162
     */
163
    private function startRequiredProcesses()
164
    {
165
        $ret = false;
166
        $currentProcesses = $this->processRepository->countActive();
167
        $availableProcessesCount = $this->processLimit - $currentProcesses;
168
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
169
        $startProcessCount = min([$availableProcessesCount, $requiredProcessesCount]);
170
        if ($startProcessCount <= 0) {
171
            return $ret;
172
        }
173
        if ($startProcessCount && $this->verbose) {
174
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
175
        }
176
        for ($i = 0; $i < $startProcessCount; $i++) {
177
            usleep(100);
178
            if ($this->startProcess()) {
179
                if ($this->verbose) {
180
                    echo '.';
181
                    $ret = true;
182
                }
183
            }
184
        }
185
        if ($this->verbose) {
186
            echo chr(10);
187
        }
188
        return $ret;
189
    }
190
191
    /**
192
     * starts new process
193
     * @throws \Exception if no crawler process was started
194
     */
195
    public function startProcess()
196
    {
197
        $ttl = (time() + $this->timeToLive - 1);
198
        $current = $this->processRepository->countNotTimeouted($ttl);
199
200
        // Check whether OS is Windows
201
        if (TYPO3_OS === 'WIN') {
202
            $completePath = escapeshellcmd('start ' . $this->getCrawlerCliPath());
203
        } else {
204
            $completePath = '(' . escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null';
205
        }
206
207
        $fileHandler = CommandUtility::exec($completePath);
208
        if ($fileHandler === false) {
209
            throw new \Exception('could not start process!');
210
        } else {
211
            for ($i = 0; $i < 10; $i++) {
212
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
213
                    return true;
214
                }
215
                sleep(1);
216
            }
217
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
218
        }
219
    }
220
221
    /**
222
     * Returns the path to start the crawler from the command line
223
     *
224
     * @return string
225
     */
226 1
    public function getCrawlerCliPath(): string
227
    {
228 1
        $phpPath = $this->crawlerController->extensionSettings['phpPath'] . ' ';
229 1
        $cliPart = 'typo3 crawler:processQueue';
230 1
        $typo3BinaryPath = ExtensionManagementUtility::extPath('core') . 'bin/';
231 1
        $scriptPath = $phpPath . $typo3BinaryPath . $cliPart;
232
233 1
        if (TYPO3_OS === 'WIN') {
234
            $scriptPath = str_replace('/', '\\', $scriptPath);
235
        }
236
237 1
        return ltrim($scriptPath);
238
    }
239
}
240