Completed
Branch master (b7ffcb)
by Tomas Norre
17:57
created

ProcessService   A

Complexity

Total Complexity 30

Size/Duplication

Total Lines 198
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 5

Importance

Changes 0
Metric Value
dl 0
loc 198
rs 10
c 0
b 0
f 0
wmc 30
lcom 1
cbo 5

6 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 10 1
C multiProcess() 0 45 13
A reportItemStatus() 0 4 1
C startRequiredProcesses() 0 27 8
B startProcess() 0 25 5
A getCrawlerCliPath() 0 14 2
1
<?php
2
namespace AOE\Crawler\Service;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Controller\CrawlerController;
29
use AOE\Crawler\Domain\Repository\ProcessRepository;
30
use AOE\Crawler\Domain\Repository\QueueRepository;
31
use TYPO3\CMS\Core\Utility\GeneralUtility;
32
33
/**
34
 * Class ProcessService
35
 *
36
 * @package AOE\Crawler\Service
37
 */
38
class ProcessService
39
{
40
    /**
41
     * @var $timeToLive integer
42
     */
43
    private $timeToLive;
44
45
    /**
46
     * @var integer
47
     */
48
    private $countInARun;
49
50
    /**
51
     * @var integer
52
     */
53
    private $processLimit;
54
55
    /**
56
     * @var CrawlerController
57
     */
58
    private $crawlerObj;
59
60
    /**
61
     * @var \AOE\Crawler\Domain\Repository\QueueRepository
62
     */
63
    private $queueRepository;
64
65
    /**
66
     * @var \AOE\Crawler\Domain\Repository\ProcessRepository
67
     */
68
    private $processRepository;
69
70
    /**
71
     * @var $verbose boolean
72
     */
73
    private $verbose;
74
75
    /**
76
     * the constructor
77
     */
78
    public function __construct()
79
    {
80
        $this->processRepository = new ProcessRepository();
81
        $this->queueRepository = new QueueRepository();
82
        $this->crawlerObj = GeneralUtility::makeInstance(CrawlerController::class);
83
        $this->timeToLive = intval($this->crawlerObj->extensionSettings['processMaxRunTime']);
84
        $this->countInARun = intval($this->crawlerObj->extensionSettings['countInARun']);
85
        $this->processLimit = intval($this->crawlerObj->extensionSettings['processLimit']);
86
        $this->verbose = intval($this->crawlerObj->extensionSettings['processVerbose']);
87
    }
88
89
    /**
90
     * starts multiple processes
91
     *
92
     * @param integer $timeout
93
     *
94
     * @throws \RuntimeException
95
     */
96
    public function multiProcess($timeout)
97
    {
98
        if ($this->processLimit <= 1) {
99
            throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL);
100
        }
101
102
        $pendingItemsStart = $this->queueRepository->countAllPendingItems();
103
        $itemReportLimit = 20;
104
        $reportItemCount = $pendingItemsStart - $itemReportLimit;
105
        if ($this->verbose) {
106
            $this->reportItemStatus();
107
        }
108
        $this->startRequiredProcesses();
109
        $nextTimeOut = time() + $this->timeToLive;
110
        for ($i = 0; $i < $timeout; $i++) {
111
            $currentPendingItems = $this->queueRepository->countAllPendingItems();
112
            if ($this->startRequiredProcesses($this->verbose)) {
0 ignored issues
show
Unused Code introduced by
The call to ProcessService::startRequiredProcesses() has too many arguments starting with $this->verbose.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
113
                $nextTimeOut = time() + $this->timeToLive;
114
            }
115
            if ($currentPendingItems == 0) {
116
                if ($this->verbose) {
117
                    echo 'Finished...' . chr(10);
118
                }
119
                break;
120
            }
121
            if ($currentPendingItems < $reportItemCount) {
122
                if ($this->verbose) {
123
                    $this->reportItemStatus();
124
                }
125
                $reportItemCount = $currentPendingItems - $itemReportLimit;
126
            }
127
            sleep(1);
128
            if ($nextTimeOut < time()) {
129
                $timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut);
130
                $nextTimeOut = time() + $this->timeToLive;
131
                if ($this->verbose) {
132
                    echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10);
133
                }
134
                $this->crawlerObj->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true);
135
            }
136
        }
137
        if ($currentPendingItems > 0 && $this->verbose) {
0 ignored issues
show
Bug introduced by
The variable $currentPendingItems does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
138
            echo 'Stop with timeout' . chr(10);
139
        }
140
    }
141
142
    /**
143
     * Reports curent Status of queue
144
     */
145
    protected function reportItemStatus()
146
    {
147
        echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10);
148
    }
149
150
    /**
151
     * according to the given count of pending items and the countInARun Setting this method
152
     * starts more crawling processes
153
     *
154
     * @throws \Exception
155
     *
156
     * @return boolean if processes are started
157
     */
158
    private function startRequiredProcesses()
159
    {
160
        $ret = false;
161
        $currentProcesses = $this->processRepository->countActive();
162
        $availableProcessesCount = $this->processLimit - $currentProcesses;
163
        $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun);
164
        $startProcessCount = min([$availableProcessesCount,$requiredProcessesCount]);
165
        if ($startProcessCount <= 0) {
166
            return $ret;
167
        }
168
        if ($startProcessCount && $this->verbose) {
169
            echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')';
170
        }
171
        for ($i = 0;$i < $startProcessCount;$i++) {
172
            usleep(100);
173
            if ($this->startProcess()) {
174
                if ($this->verbose) {
175
                    echo '.';
176
                    $ret = true;
177
                }
178
            }
179
        }
180
        if ($this->verbose) {
181
            echo chr(10);
182
        }
183
        return $ret;
184
    }
185
186
    /**
187
     * starts new process
188
     * @throws \Exception if no crawler process was started
189
     */
190
    public function startProcess()
191
    {
192
        $ttl = (time() + $this->timeToLive - 1);
193
        $current = $this->processRepository->countNotTimeouted($ttl);
194
195
        // Check whether OS is Windows
196
        if (TYPO3_OS === 'WIN') {
197
            $completePath = escapeshellcmd('start ' . $this->getCrawlerCliPath());
198
        } else {
199
            $completePath = '(' . escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null';
200
        }
201
202
        $fileHandler = system($completePath);
203
        if ($fileHandler === false) {
204
            throw new \Exception('could not start process!');
205
        } else {
206
            for ($i = 0;$i < 10;$i++) {
207
                if ($this->processRepository->countNotTimeouted($ttl) > $current) {
208
                    return true;
209
                }
210
                sleep(1);
211
            }
212
            throw new \Exception('Something went wrong: process did not appear within 10 seconds.');
213
        }
214
    }
215
216
    /**
217
     * Returns the path to start the crawler from the command line
218
     *
219
     * @return string
220
     */
221
    public function getCrawlerCliPath()
222
    {
223
        $phpPath = $this->crawlerObj->extensionSettings['phpPath'] . ' ';
224
        $pathToTypo3 = rtrim(GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/');
225
        $pathToTypo3 .= rtrim(GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/');
226
        $cliPart = '/typo3/cli_dispatch.phpsh crawler';
227
        $scriptPath = $phpPath . $pathToTypo3 . $cliPart;
228
229
        if (TYPO3_OS === 'WIN') {
230
            $scriptPath = str_replace('/', '\\', $scriptPath);
231
        }
232
233
        return $scriptPath;
234
    }
235
}
236