Completed
Push — issue/446 ( c81005...2713e2 )
by Tomas Norre
04:41
created

ProcessService::getCrawlerCliPath()   A

Complexity

Conditions 5
Paths 8

Size

Total Lines 29

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 5.2742

Importance

Changes 0
Metric Value
cc 5
nc 8
nop 0
dl 0
loc 29
ccs 14
cts 18
cp 0.7778
crap 5.2742
rs 9.1448
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Service;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2019 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Controller\CrawlerController;
29
use AOE\Crawler\Domain\Repository\ProcessRepository;
30
use AOE\Crawler\Domain\Repository\QueueRepository;
31
use TYPO3\CMS\Core\Utility\CommandUtility;
32
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
33
use TYPO3\CMS\Core\Utility\GeneralUtility;
34
use TYPO3\CMS\Extbase\Object\ObjectManager;
35
36
/**
37
 * Class ProcessService
38
 *
39
 * @package AOE\Crawler\Service
40
 */
41
class ProcessService
42
{
43
    /**
44
     * @var $timeToLive integer
45
     */
46
    private $timeToLive;
47
48
    /**
49
     * @var integer
50
     */
51
    private $countInARun;
52
53
    /**
54
     * @var integer
55
     */
56
    private $processLimit;
57
58
    /**
59
     * @var CrawlerController
60
     */
61
    private $crawlerController;
62
63
    /**
64
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
65
     */
66
    private $queueRepository;
67
68
    /**
69
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
70
     */
71
    private $processRepository;
72
73
    /**
74
     * @var $verbose boolean
75
     */
76
    private $verbose;
77
78
    /**
79
     * the constructor
80
     */
81 1
    public function __construct()
82
    {
83 1
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
84 1
        $this->processRepository = $objectManager->get(ProcessRepository::class);
85 1
        $this->queueRepository = $objectManager->get(QueueRepository::class);
86 1
        $this->crawlerController = $objectManager->get(CrawlerController::class);
87 1
        $this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']);
88 1
        $this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']);
89 1
        $this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']);
90 1
        $this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']);
91 1
    }
92
93
    /**
94
     * starts multiple processes
95
     *
96
     * @param integer $timeout
97
     *
98
     * @throws \RuntimeException
99
     */
100 1
    public function multiProcess($timeout)
101
    {
102 1
        if ($this->processLimit <= 1) {
103 1
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
104
        }
105
106
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
107
        $itemReportLimit = 20;
108
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
109
        if ($this->verbose) {
110
            $this->reportItemStatus();
111
        }
112
        $this->startRequiredProcesses();
113
        $nextTimeOut = time() + $this->timeToLive;
114
        $currentPendingItems = '';
115
        for ($i = 0; $i < $timeout; $i++) {
116
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
117
            if ($this->startRequiredProcesses()) {
118
                $nextTimeOut = time() + $this->timeToLive;
119
            }
120
            if ($currentPendingItems == 0) {
121
                if ($this->verbose) {
122
                    echo 'Finished...' . chr(10);
123
                }
124
                break;
125
            }
126
            if ($currentPendingItems < $reportItemCount) {
127
                if ($this->verbose) {
128
                    $this->reportItemStatus();
129
                }
130
                $reportItemCount = $currentPendingItems - $itemReportLimit;
131
            }
132
            sleep(1);
133
            if ($nextTimeOut < time()) {
134
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
135
                $nextTimeOut = time() + $this->timeToLive;
136
                if ($this->verbose) {
137
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
138
                }
139
                $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Controller\C...:CLI_releaseProcesses() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
140
            }
141
        }
142
        if ($currentPendingItems > 0 && $this->verbose) {
143
            echo 'Stop with timeout' . chr(10);
144
        }
145
    }
146
147
    /**
148
     * Reports curent Status of queue
149
     */
150
    protected function reportItemStatus()
151
    {
152
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
153
    }
154
155
    /**
156
     * according to the given count of pending items and the countInARun Setting this method
157
     * starts more crawling processes
158
     *
159
     * @return boolean if processes are started
160
     * @throws \Exception
161
     *
162
     */
163
    private function startRequiredProcesses()
164
    {
165
        $ret = false;
166
        $currentProcesses = $this->processRepository->countActive();
167
        $availableProcessesCount = $this->processLimit - $currentProcesses;
168
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
169
        $startProcessCount = min([$availableProcessesCount, $requiredProcessesCount]);
170
        if ($startProcessCount <= 0) {
171
            return $ret;
172
        }
173
        if ($startProcessCount && $this->verbose) {
174
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
175
        }
176
        for ($i = 0; $i < $startProcessCount; $i++) {
177
            usleep(100);
178
            if ($this->startProcess()) {
179
                if ($this->verbose) {
180
                    echo '.';
181
                    $ret = true;
182
                }
183
            }
184
        }
185
        if ($this->verbose) {
186
            echo chr(10);
187
        }
188
        return $ret;
189
    }
190
191
    /**
192
     * starts new process
193
     * @throws \Exception if no crawler process was started
194
     */
195
    public function startProcess()
196
    {
197
        $ttl = (time() + $this->timeToLive - 1);
198
        $current = $this->processRepository->countNotTimeouted($ttl);
199
200
        // Check whether OS is Windows
201
        if (TYPO3_OS === 'WIN') {
202
            $completePath = escapeshellcmd('start ' . $this->getCrawlerCliPath());
203
        } else {
204
            $completePath = '(' . escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null';
205
        }
206
207
        $fileHandler = CommandUtility::exec($completePath);
208
        if ($fileHandler === false) {
209
            throw new \Exception('could not start process!');
210
        } else {
211
            for ($i = 0; $i < 10; $i++) {
212
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
213
                    return true;
214
                }
215
                sleep(1);
216
            }
217
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
218
        }
219
    }
220
221
    /**
222
     * Returns the path to start the crawler from the command line
223
     *
224
     * @return string
225
     */
226 2
    public function getCrawlerCliPath()
227
    {
228 2
        $composerRootDir = getenv('TYPO3_PATH_COMPOSER_ROOT') . '/';
229 2
        $composerFile = $composerRootDir . 'composer.json';
230 2
        $phpPath = $this->crawlerController->extensionSettings['phpPath'] . ' ';
231 2
        $cliPart = 'typo3cms crawler:crawlqueue';
232
233 2
        if (file_exists($composerFile)) {
234 1
            $jsonDecoded = json_decode(file_get_contents($composerRootDir . 'composer.json'), true);
235
236 1
            if (isset($jsonDecoded['config']['bin-dir'])) {
237 1
                $binDir = $jsonDecoded['config']['bin-dir'];
238
            } elseif (isset($jsonDecoded['config']['vendor-dir'])) {
239
                $binDir = $jsonDecoded['config']['vendor-dir'] . '/bin';
240
            } else {
241
                $binDir = 'vendor/bin';
242
            }
243 1
            $scriptPath = $phpPath . $composerRootDir . $binDir . '/' . $cliPart;
244
        } else {
245 1
            $typo3ConsolePath = ExtensionManagementUtility::extPath('typo3_console');
246 1
            $scriptPath = $phpPath . $typo3ConsolePath . $cliPart;
247
        }
248
249 2
        if (TYPO3_OS === 'WIN') {
250
            $scriptPath = str_replace('/', '\\', $scriptPath);
251
        }
252
253 2
        return ltrim($scriptPath);
254
    }
255
}
256