1
|
|
|
<?php |
2
|
|
|
namespace AOE\Crawler\Service; |
3
|
|
|
|
4
|
|
|
/*************************************************************** |
5
|
|
|
* Copyright notice |
6
|
|
|
* |
7
|
|
|
* (c) 2019 AOE GmbH <[email protected]> |
8
|
|
|
* |
9
|
|
|
* All rights reserved |
10
|
|
|
* |
11
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
12
|
|
|
* free software; you can redistribute it and/or modify |
13
|
|
|
* it under the terms of the GNU General Public License as published by |
14
|
|
|
* the Free Software Foundation; either version 3 of the License, or |
15
|
|
|
* (at your option) any later version. |
16
|
|
|
* |
17
|
|
|
* The GNU General Public License can be found at |
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
19
|
|
|
* |
20
|
|
|
* This script is distributed in the hope that it will be useful, |
21
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
22
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23
|
|
|
* GNU General Public License for more details. |
24
|
|
|
* |
25
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
26
|
|
|
***************************************************************/ |
27
|
|
|
|
28
|
|
|
use AOE\Crawler\Controller\CrawlerController; |
29
|
|
|
use AOE\Crawler\Domain\Repository\ProcessRepository; |
30
|
|
|
use AOE\Crawler\Domain\Repository\QueueRepository; |
31
|
|
|
use TYPO3\CMS\Core\Utility\CommandUtility; |
32
|
|
|
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; |
33
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
34
|
|
|
use TYPO3\CMS\Core\Utility\VersionNumberUtility; |
35
|
|
|
use TYPO3\CMS\Extbase\Object\ObjectManager; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* Class ProcessService |
39
|
|
|
* |
40
|
|
|
* @package AOE\Crawler\Service |
41
|
|
|
*/ |
42
|
|
|
class ProcessService |
43
|
|
|
{ |
44
|
|
|
/** |
45
|
|
|
* @var $timeToLive integer |
46
|
|
|
*/ |
47
|
|
|
private $timeToLive; |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* @var integer |
51
|
|
|
*/ |
52
|
|
|
private $countInARun; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* @var integer |
56
|
|
|
*/ |
57
|
|
|
private $processLimit; |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* @var CrawlerController |
61
|
|
|
*/ |
62
|
|
|
private $crawlerController; |
63
|
|
|
|
64
|
|
|
/** |
65
|
|
|
* @var \AOE\Crawler\Domain\Repository\QueueRepository |
66
|
|
|
*/ |
67
|
|
|
private $queueRepository; |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* @var \AOE\Crawler\Domain\Repository\ProcessRepository |
71
|
|
|
*/ |
72
|
|
|
private $processRepository; |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* @var $verbose boolean |
76
|
|
|
*/ |
77
|
|
|
private $verbose; |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* the constructor |
81
|
|
|
*/ |
82
|
|
|
public function __construct() |
83
|
|
|
{ |
84
|
|
|
$objectManager = GeneralUtility::makeInstance(ObjectManager::class); |
85
|
|
|
$this->processRepository = $objectManager->get(ProcessRepository::class); |
86
|
|
|
$this->queueRepository = $objectManager->get(QueueRepository::class); |
87
|
|
|
$this->crawlerController = $objectManager->get(CrawlerController::class); |
88
|
|
|
$this->timeToLive = intval($this->crawlerController->extensionSettings['processMaxRunTime']); |
89
|
|
|
$this->countInARun = intval($this->crawlerController->extensionSettings['countInARun']); |
90
|
|
|
$this->processLimit = intval($this->crawlerController->extensionSettings['processLimit']); |
91
|
|
|
$this->verbose = intval($this->crawlerController->extensionSettings['processVerbose']); |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* starts multiple processes |
96
|
|
|
* |
97
|
|
|
* @param integer $timeout |
98
|
|
|
* |
99
|
|
|
* @throws \RuntimeException |
100
|
|
|
*/ |
101
|
1 |
|
public function multiProcess($timeout) |
102
|
|
|
{ |
103
|
1 |
|
if ($this->processLimit <= 1) { |
104
|
1 |
|
throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL); |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
$pendingItemsStart = $this->queueRepository->countAllPendingItems(); |
108
|
|
|
$itemReportLimit = 20; |
109
|
|
|
$reportItemCount = $pendingItemsStart - $itemReportLimit; |
110
|
|
|
if ($this->verbose) { |
111
|
|
|
$this->reportItemStatus(); |
112
|
|
|
} |
113
|
|
|
$this->startRequiredProcesses(); |
114
|
|
|
$nextTimeOut = time() + $this->timeToLive; |
115
|
|
|
$currentPendingItems = ''; |
116
|
|
|
for ($i = 0; $i < $timeout; $i++) { |
117
|
|
|
$currentPendingItems = $this->queueRepository->countAllPendingItems(); |
118
|
|
|
if ($this->startRequiredProcesses()) { |
119
|
|
|
$nextTimeOut = time() + $this->timeToLive; |
120
|
|
|
} |
121
|
|
|
if ($currentPendingItems == 0) { |
122
|
|
|
if ($this->verbose) { |
123
|
|
|
echo 'Finished...' . chr(10); |
124
|
|
|
} |
125
|
|
|
break; |
126
|
|
|
} |
127
|
|
|
if ($currentPendingItems < $reportItemCount) { |
128
|
|
|
if ($this->verbose) { |
129
|
|
|
$this->reportItemStatus(); |
130
|
|
|
} |
131
|
|
|
$reportItemCount = $currentPendingItems - $itemReportLimit; |
132
|
|
|
} |
133
|
|
|
sleep(1); |
134
|
|
|
if ($nextTimeOut < time()) { |
135
|
|
|
$timedOutProcesses = $this->processRepository->findAll('', 'DESC', null, 0, 'ttl >' . $nextTimeOut); |
136
|
|
|
$nextTimeOut = time() + $this->timeToLive; |
137
|
|
|
if ($this->verbose) { |
138
|
|
|
echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10); |
139
|
|
|
} |
140
|
|
|
$this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true); |
|
|
|
|
141
|
|
|
} |
142
|
|
|
} |
143
|
|
|
if ($currentPendingItems > 0 && $this->verbose) { |
144
|
|
|
echo 'Stop with timeout' . chr(10); |
145
|
|
|
} |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
/** |
149
|
|
|
* Reports curent Status of queue |
150
|
|
|
*/ |
151
|
|
|
protected function reportItemStatus() |
152
|
|
|
{ |
153
|
|
|
echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10); |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* according to the given count of pending items and the countInARun Setting this method |
158
|
|
|
* starts more crawling processes |
159
|
|
|
* |
160
|
|
|
* @return boolean if processes are started |
161
|
|
|
* @throws \Exception |
162
|
|
|
* |
163
|
|
|
*/ |
164
|
|
|
private function startRequiredProcesses() |
165
|
|
|
{ |
166
|
|
|
$ret = false; |
167
|
|
|
$currentProcesses = $this->processRepository->countActive(); |
168
|
|
|
$availableProcessesCount = $this->processLimit - $currentProcesses; |
169
|
|
|
$requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun); |
170
|
|
|
$startProcessCount = min([$availableProcessesCount, $requiredProcessesCount]); |
171
|
|
|
if ($startProcessCount <= 0) { |
172
|
|
|
return $ret; |
173
|
|
|
} |
174
|
|
|
if ($startProcessCount && $this->verbose) { |
175
|
|
|
echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')'; |
176
|
|
|
} |
177
|
|
|
for ($i = 0; $i < $startProcessCount; $i++) { |
178
|
|
|
usleep(100); |
179
|
|
|
if ($this->startProcess()) { |
180
|
|
|
if ($this->verbose) { |
181
|
|
|
echo '.'; |
182
|
|
|
$ret = true; |
183
|
|
|
} |
184
|
|
|
} |
185
|
|
|
} |
186
|
|
|
if ($this->verbose) { |
187
|
|
|
echo chr(10); |
188
|
|
|
} |
189
|
|
|
return $ret; |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* starts new process |
194
|
|
|
* @throws \Exception if no crawler process was started |
195
|
|
|
*/ |
196
|
|
|
public function startProcess() |
197
|
|
|
{ |
198
|
|
|
$ttl = (time() + $this->timeToLive - 1); |
199
|
|
|
$current = $this->processRepository->countNotTimeouted($ttl); |
200
|
|
|
|
201
|
|
|
// Check whether OS is Windows |
202
|
|
|
if (TYPO3_OS === 'WIN') { |
203
|
|
|
$completePath = escapeshellcmd('start ' . $this->getCrawlerCliPath()); |
204
|
|
|
} else { |
205
|
|
|
$completePath = '(' . escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null'; |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
$fileHandler = CommandUtility::exec($completePath); |
209
|
|
|
if ($fileHandler === false) { |
210
|
|
|
throw new \Exception('could not start process!'); |
211
|
|
|
} else { |
212
|
|
|
for ($i = 0; $i < 10; $i++) { |
213
|
|
|
if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
214
|
|
|
return true; |
215
|
|
|
} |
216
|
|
|
sleep(1); |
217
|
|
|
} |
218
|
|
|
throw new \Exception('Something went wrong: process did not appear within 10 seconds.'); |
219
|
|
|
} |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
/** |
223
|
|
|
* Returns the path to start the crawler from the command line |
224
|
|
|
* |
225
|
|
|
* @return string |
226
|
|
|
* @throws \TYPO3\CMS\Core\Package\Exception |
227
|
|
|
*/ |
228
|
1 |
|
public function getCrawlerCliPath() |
229
|
|
|
{ |
230
|
1 |
|
$composerRootDir = getenv('TYPO3_PATH_COMPOSER_ROOT') . '/'; |
231
|
1 |
|
$composerFile = $composerRootDir . 'composer.json'; |
232
|
1 |
|
$phpPath = $this->crawlerController->extensionSettings['phpPath'] . ' '; |
233
|
1 |
|
$cliPart = 'typo3cms crawler:crawlqueue'; |
234
|
|
|
|
235
|
1 |
|
if (file_exists($composerFile)) { |
236
|
1 |
|
$jsonDecoded = json_decode(file_get_contents($composerFile), true); |
237
|
|
|
|
238
|
1 |
|
if (isset($jsonDecoded['config']['bin-dir'])) { |
239
|
1 |
|
$binDir = $jsonDecoded['config']['bin-dir']; |
240
|
|
|
} elseif (isset($jsonDecoded['config']['vendor-dir'])) { |
241
|
|
|
$binDir = $jsonDecoded['config']['vendor-dir'] . '/bin'; |
242
|
|
|
} else { |
243
|
|
|
$binDir = 'vendor/bin'; |
244
|
|
|
} |
245
|
1 |
|
$scriptPath = $phpPath . $composerRootDir . $binDir . '/' . $cliPart; |
246
|
|
|
} else { |
247
|
|
|
$typo3ConsolePath = ExtensionManagementUtility::extPath('typo3_console'); |
248
|
|
|
|
249
|
|
|
$isTypo3ConsoleVersion4 = VersionNumberUtility::convertVersionNumberToInteger(ExtensionManagementUtility::getExtensionVersion('typo3_console')) < 5000000; |
250
|
|
|
if ($isTypo3ConsoleVersion4) { |
251
|
|
|
$scriptPath = $phpPath . $typo3ConsolePath . 'Scripts/' . $cliPart; |
252
|
|
|
} else { |
253
|
|
|
$scriptPath = $phpPath . $typo3ConsolePath . $cliPart; |
254
|
|
|
} |
255
|
|
|
} |
256
|
|
|
|
257
|
1 |
|
if (TYPO3_OS === 'WIN') { |
258
|
|
|
$scriptPath = str_replace('/', '\\', $scriptPath); |
259
|
|
|
} |
260
|
|
|
|
261
|
1 |
|
return ltrim($scriptPath); |
262
|
|
|
} |
263
|
|
|
} |
264
|
|
|
|
This method has been deprecated. The supplier of the class has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.