Completed
Push — issue/444 ( 7ee4c4 )
by Tomas Norre
06:40
created

ProcessService::getCrawlerCliPath()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
nc 2
nop 0
dl 0
loc 14
ccs 0
cts 11
cp 0
crap 6
rs 9.7998
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Service;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Controller\CrawlerController;
29
use AOE\Crawler\Domain\Repository\ProcessRepository;
30
use AOE\Crawler\Domain\Repository\QueueRepository;
31
use AOE\Crawler\Utility\PhpBinaryUtility;
32
use TYPO3\CMS\Core\Core\Environment;
33
use TYPO3\CMS\Core\Utility\CommandUtility;
34
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
35
use TYPO3\CMS\Core\Utility\GeneralUtility;
36
use TYPO3\CMS\Extbase\Object\ObjectManager;
37
38
/**
39
 * Class ProcessService
40
 *
41
 * @package AOE\Crawler\Service
42
 */
43
class ProcessService
44
{
45
    /**
46
     * @var $timeToLive integer
47
     */
48
    private $timeToLive;
49
50
    /**
51
     * @var integer
52
     */
53
    private $countInARun;
54
55
    /**
56
     * @var integer
57
     */
58
    private $processLimit;
59
60
    /**
61
     * @var CrawlerController
62
     */
63
    private $crawlerController;
64
65
    /**
66
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
67
     */
68
    private $queueRepository;
69
70
    /**
71
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
72
     */
73
    private $processRepository;
74
75
    /**
76
     * @var $verbose boolean
77
     */
78
    private $verbose;
79
80
    /**
81
     * the constructor
82
     */
83
    public function __construct()
84
    {
85
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
86
        $this->processRepository = $objectManager->get(ProcessRepository::class);
87
        $this->queueRepository = $objectManager->get(QueueRepository::class);
88
        $this->crawlerController = $objectManager->get(CrawlerController::class);
89
        $this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']);
90
        $this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']);
91
        $this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']);
92
        $this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']);
93
    }
94
95
    /**
96
     * starts multiple processes
97
     *
98
     * @param integer $timeout
99
     *
100
     * @throws \RuntimeException
101
     */
102 1
    public function multiProcess($timeout)
103
    {
104 1
        if ($this->processLimit <= 1) {
105 1
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
106
        }
107
108
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
109
        $itemReportLimit = 20;
110
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
111
        if ($this->verbose) {
112
            $this->reportItemStatus();
113
        }
114
        $this->startRequiredProcesses();
115
        $nextTimeOut = time() + $this->timeToLive;
116
        $currentPendingItems = '';
117
        for ($i = 0; $i < $timeout; $i++) {
118
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
119
            if ($this->startRequiredProcesses()) {
120
                $nextTimeOut = time() + $this->timeToLive;
121
            }
122
            if ($currentPendingItems == 0) {
123
                if ($this->verbose) {
124
                    echo 'Finished...' . chr(10);
125
                }
126
                break;
127
            }
128
            if ($currentPendingItems < $reportItemCount) {
129
                if ($this->verbose) {
130
                    $this->reportItemStatus();
131
                }
132
                $reportItemCount = $currentPendingItems - $itemReportLimit;
133
            }
134
            sleep(1);
135
            if ($nextTimeOut < time()) {
136
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
137
                $nextTimeOut = time() + $this->timeToLive;
138
                if ($this->verbose) {
139
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
140
                }
141
                $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
142
            }
143
        }
144
        if ($currentPendingItems > 0 && $this->verbose) {
145
            echo 'Stop with timeout' . chr(10);
146
        }
147
    }
148
149
    /**
150
     * Reports curent Status of queue
151
     */
152
    protected function reportItemStatus()
153
    {
154
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
155
    }
156
157
    /**
158
     * according to the given count of pending items and the countInARun Setting this method
159
     * starts more crawling processes
160
     *
161
     * @return boolean if processes are started
162
     * @throws \Exception
163
     *
164
     */
165
    private function startRequiredProcesses()
166
    {
167
        $ret = false;
168
        $currentProcesses = $this->processRepository->countActive();
169
        $availableProcessesCount = $this->processLimit - $currentProcesses;
170
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
171
        $startProcessCount = min([$availableProcessesCount, $requiredProcessesCount]);
172
        if ($startProcessCount <= 0) {
173
            return $ret;
174
        }
175
        if ($startProcessCount && $this->verbose) {
176
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
177
        }
178
        for ($i = 0; $i < $startProcessCount; $i++) {
179
            usleep(100);
180
            if ($this->startProcess()) {
181
                if ($this->verbose) {
182
                    echo '.';
183
                    $ret = true;
184
                }
185
            }
186
        }
187
        if ($this->verbose) {
188
            echo chr(10);
189
        }
190
        return $ret;
191
    }
192
193
    /**
194
     * starts new process
195
     * @throws \Exception if no crawler process was started
196
     */
197
    public function startProcess()
198
    {
199
        $ttl = (time() + $this->timeToLive - 1);
200
        $current = $this->processRepository->countNotTimeouted($ttl);
201
202
        // Check whether OS is Windows
203
        if (Environment::isWindows()) {
204
            $completePath = CommandUtility::escapeShellArgument('start ' . $this->getCrawlerCliPath());
205
        } else {
206
            $completePath = '(' . CommandUtility::escapeShellArgument($this->getCrawlerCliPath()) . ' &) > /dev/null';
207
        }
208
209
        $fileHandler = CommandUtility::exec($completePath);
210
        if ($fileHandler === false) {
211
            throw new \Exception('could not start process!');
212
        } else {
213
            for ($i = 0; $i < 10; $i++) {
214
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
215
                    return true;
216
                }
217
                sleep(1);
218
            }
219
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
220
        }
221
    }
222
223
    /**
224
     * Returns the path to start the crawler from the command line
225
     *
226
     * @return string
227
     */
228
    public function getCrawlerCliPath(): string
229
    {
230
        $phpPath = PhpBinaryUtility::getPhpBinary($this->crawlerController->extensionSettings);
231
        $typo3BinaryPath = ExtensionManagementUtility::extPath('core') . 'bin/';
232
        $cliPart = 'typo3 crawler:processQueue';
233
        // Don't like the spacing, but don't have an better idea for now
234
        $scriptPath = $phpPath . ' ' . $typo3BinaryPath . $cliPart;
235
236
        if (Environment::isWindows()) {
237
            $scriptPath = str_replace('/', '\\', $scriptPath);
238
        }
239
240
        return ltrim($scriptPath);
241
    }
242
}
243