Completed
Branch test-coverage (951e01)
by Tomas Norre
15:40 queued 13:43
created

ProcessService::getCrawlerCliPath()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 14
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 9
nc 2
nop 0
dl 0
loc 14
ccs 0
cts 12
cp 0
crap 6
rs 9.4285
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Service;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Controller\CrawlerController;
29
use AOE\Crawler\Domain\Repository\ProcessRepository;
30
use AOE\Crawler\Domain\Repository\QueueRepository;
31
use TYPO3\CMS\Core\Utility\GeneralUtility;
32
33
/**
34
 * Class ProcessService
35
 *
36
 * @package AOE\Crawler\Service
37
 */
38
class ProcessService
39
{
40
    /**
41
     * @var $timeToLive integer
42
     */
43
    private $timeToLive;
44
45
    /**
46
     * @var integer
47
     */
48
    private $countInARun;
49
50
    /**
51
     * @var integer
52
     */
53
    private $processLimit;
54
55
    /**
56
     * @var CrawlerController
57
     */
58
    private $crawlerController;
59
60
    /**
61
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
62
     */
63
    private $queueRepository;
64
65
    /**
66
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
67
     */
68
    private $processRepository;
69
70
    /**
71
     * @var $verbose boolean
72
     */
73
    private $verbose;
74
75
    /**
76
     * the constructor
77
     */
78
    public function __construct()
79
    {
80
        $this->processRepository = new ProcessRepository();
81
        $this->queueRepository = new QueueRepository();
82
        $this->crawlerController = GeneralUtility::makeInstance(CrawlerController::class);
83
        $this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']);
84
        $this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']);
85
        $this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']);
86
        $this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']);
87
    }
88
89
    /**
90
     * starts multiple processes
91
     *
92
     * @param integer $timeout
93
     *
94
     * @throws \RuntimeException
95
     */
96
    public function multiProcess($timeout)
97
    {
98
        if ($this->processLimit <= 1) {
99
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
100
        }
101
102
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
103
        $itemReportLimit = 20;
104
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
105
        if ($this->verbose) {
106
            $this->reportItemStatus();
107
        }
108
        $this->startRequiredProcesses();
109
        $nextTimeOut = time() + $this->timeToLive;
110
        $currentPendingItems = '';
111
        for ($i = 0; $i < $timeout; $i++) {
112
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
113
            if ($this->startRequiredProcesses()) {
114
                $nextTimeOut = time() + $this->timeToLive;
115
            }
116
            if ($currentPendingItems == 0) {
117
                if ($this->verbose) {
118
                    echo 'Finished...' . chr(10);
119
                }
120
                break;
121
            }
122
            if ($currentPendingItems < $reportItemCount) {
123
                if ($this->verbose) {
124
                    $this->reportItemStatus();
125
                }
126
                $reportItemCount = $currentPendingItems - $itemReportLimit;
127
            }
128
            sleep(1);
129
            if ($nextTimeOut < time()) {
130
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
131
                $nextTimeOut = time() + $this->timeToLive;
132
                if ($this->verbose) {
133
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
134
                }
135
                $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
136
            }
137
        }
138
        if ($currentPendingItems > 0 && $this->verbose) {
139
            echo 'Stop with timeout' . chr(10);
140
        }
141
    }
142
143
    /**
144
     * Reports curent Status of queue
145
     */
146
    protected function reportItemStatus()
147
    {
148
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
149
    }
150
151
    /**
152
     * according to the given count of pending items and the countInARun Setting this method
153
     * starts more crawling processes
154
     *
155
     * @throws \Exception
156
     *
157
     * @return boolean if processes are started
158
     */
159
    private function startRequiredProcesses()
160
    {
161
        $ret = false;
162
        $currentProcesses = $this->processRepository->countActive();
163
        $availableProcessesCount = $this->processLimit - $currentProcesses;
164
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
165
        $startProcessCount = min([$availableProcessesCount,$requiredProcessesCount]);
166
        if ($startProcessCount <= 0) {
167
            return $ret;
168
        }
169
        if ($startProcessCount && $this->verbose) {
170
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
171
        }
172
        for ($i = 0;$i < $startProcessCount;$i++) {
173
            usleep(100);
174
            if ($this->startProcess()) {
175
                if ($this->verbose) {
176
                    echo '.';
177
                    $ret = true;
178
                }
179
            }
180
        }
181
        if ($this->verbose) {
182
            echo chr(10);
183
        }
184
        return $ret;
185
    }
186
187
    /**
188
     * starts new process
189
     * @throws \Exception if no crawler process was started
190
     */
191
    public function startProcess()
192
    {
193
        $ttl = (time() + $this->timeToLive - 1);
194
        $current = $this->processRepository->countNotTimeouted($ttl);
195
196
        // Check whether OS is Windows
197
        if (TYPO3_OS === 'WIN') {
198
            $completePath = escapeshellcmd('start ' . $this->getCrawlerCliPath());
199
        } else {
200
            $completePath = '(' . escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null';
201
        }
202
203
        $fileHandler = system($completePath);
204
        if ($fileHandler === false) {
205
            throw new \Exception('could not start process!');
206
        } else {
207
            for ($i = 0;$i < 10;$i++) {
208
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
209
                    return true;
210
                }
211
                sleep(1);
212
            }
213
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
214
        }
215
    }
216
217
    /**
218
     * Returns the path to start the crawler from the command line
219
     *
220
     * @return string
221
     */
222
    public function getCrawlerCliPath()
223
    {
224
        $phpPath = $this->crawlerController->extensionSettings['phpPath'] . ' ';
225
        $pathToTypo3 = rtrim(GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/');
226
        $pathToTypo3 .= rtrim(GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/');
227
        $cliPart = '/typo3/cli_dispatch.phpsh crawler';
228
        $scriptPath = $phpPath . $pathToTypo3 . $cliPart;
229
230
        if (TYPO3_OS === 'WIN') {
231
            $scriptPath = str_replace('/', '\\', $scriptPath);
232
        }
233
234
        return $scriptPath;
235
    }
236
}
237