AOEpeople /
crawler
| 1 | <?php |
||||||
| 2 | |||||||
| 3 | declare(strict_types=1); |
||||||
| 4 | |||||||
| 5 | namespace AOE\Crawler\Service; |
||||||
| 6 | |||||||
| 7 | /*************************************************************** |
||||||
| 8 | * Copyright notice |
||||||
| 9 | * |
||||||
| 10 | * (c) 2020 AOE GmbH <[email protected]> |
||||||
| 11 | * |
||||||
| 12 | * All rights reserved |
||||||
| 13 | * |
||||||
| 14 | * This script is part of the TYPO3 project. The TYPO3 project is |
||||||
| 15 | * free software; you can redistribute it and/or modify |
||||||
| 16 | * it under the terms of the GNU General Public License as published by |
||||||
| 17 | * the Free Software Foundation; either version 3 of the License, or |
||||||
| 18 | * (at your option) any later version. |
||||||
| 19 | * |
||||||
| 20 | * The GNU General Public License can be found at |
||||||
| 21 | * http://www.gnu.org/copyleft/gpl.html. |
||||||
| 22 | * |
||||||
| 23 | * This script is distributed in the hope that it will be useful, |
||||||
| 24 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
| 25 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
| 26 | * GNU General Public License for more details. |
||||||
| 27 | * |
||||||
| 28 | * This copyright notice MUST APPEAR in all copies of the script! |
||||||
| 29 | ***************************************************************/ |
||||||
| 30 | |||||||
| 31 | use AOE\Crawler\Configuration\ExtensionConfigurationProvider; |
||||||
| 32 | use AOE\Crawler\Controller\CrawlerController; |
||||||
| 33 | use AOE\Crawler\Domain\Repository\ProcessRepository; |
||||||
| 34 | use AOE\Crawler\Domain\Repository\QueueRepository; |
||||||
| 35 | use AOE\Crawler\Exception\ProcessException; |
||||||
| 36 | use AOE\Crawler\Utility\PhpBinaryUtility; |
||||||
| 37 | use TYPO3\CMS\Core\Compatibility\PublicMethodDeprecationTrait; |
||||||
| 38 | use TYPO3\CMS\Core\Compatibility\PublicPropertyDeprecationTrait; |
||||||
| 39 | use TYPO3\CMS\Core\Core\Environment; |
||||||
| 40 | use TYPO3\CMS\Core\Utility\CommandUtility; |
||||||
| 41 | use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; |
||||||
| 42 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||||
| 43 | use TYPO3\CMS\Extbase\Object\ObjectManager; |
||||||
| 44 | |||||||
| 45 | /** |
||||||
| 46 | * @package AOE\Crawler\Service |
||||||
| 47 | * @ignoreAnnotation("noRector") |
||||||
| 48 | * |
||||||
| 49 | * @internal since v9.2.5 |
||||||
| 50 | */ |
||||||
| 51 | class ProcessService |
||||||
| 52 | { |
||||||
| 53 | use PublicMethodDeprecationTrait; |
||||||
| 54 | use PublicPropertyDeprecationTrait; |
||||||
| 55 | |||||||
| 56 | /** |
||||||
| 57 | * @var string[] |
||||||
| 58 | * @noRector |
||||||
| 59 | * @noRector \Rector\DeadCode\Rector\Property\RemoveUnusedPrivatePropertyRector |
||||||
| 60 | * @noRector \Rector\DeadCode\Rector\Property\RemoveSetterOnlyPropertyAndMethodCallRector |
||||||
| 61 | */ |
||||||
| 62 | private $deprecatedPublicProperties = [ |
||||||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||||||
| 63 | 'queueRepository' => 'Using queueRepository is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 64 | 'crawlerController' => 'Using crawlerController is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 65 | 'countInARun' => 'Using countInARun is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 66 | 'processLimit' => 'Using processLimit is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 67 | 'verbose' => 'Using verbose is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 68 | ]; |
||||||
| 69 | |||||||
| 70 | /** |
||||||
| 71 | * @var string[] |
||||||
| 72 | * @noRector |
||||||
| 73 | * @noRector \Rector\DeadCode\Rector\Property\RemoveUnusedPrivatePropertyRector |
||||||
| 74 | * @noRector \Rector\DeadCode\Rector\Property\RemoveSetterOnlyPropertyAndMethodCallRector |
||||||
| 75 | */ |
||||||
| 76 | private $deprecatedPublicMethods = [ |
||||||
|
0 ignored issues
–
show
|
|||||||
| 77 | 'multiProcess' => 'Using ProcessService::multiProcess() is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 78 | 'reportItemStatus' => 'Using ProcessService::reportItemStatus() is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 79 | 'startRequiredProcesses' => 'Using ProcessService::startRequiredProcesses() is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
| 80 | ]; |
||||||
| 81 | |||||||
| 82 | /** |
||||||
| 83 | * @var int |
||||||
| 84 | */ |
||||||
| 85 | private $timeToLive; |
||||||
| 86 | |||||||
| 87 | /** |
||||||
| 88 | * @var int |
||||||
| 89 | * @deprecated |
||||||
| 90 | */ |
||||||
| 91 | private $countInARun; |
||||||
| 92 | |||||||
| 93 | /** |
||||||
| 94 | * @var int |
||||||
| 95 | * @deprecated |
||||||
| 96 | */ |
||||||
| 97 | private $processLimit; |
||||||
| 98 | |||||||
| 99 | /** |
||||||
| 100 | * @var CrawlerController |
||||||
| 101 | * @deprecated |
||||||
| 102 | */ |
||||||
| 103 | private $crawlerController; |
||||||
| 104 | |||||||
| 105 | /** |
||||||
| 106 | * @var \AOE\Crawler\Domain\Repository\QueueRepository |
||||||
| 107 | * @deprecated |
||||||
| 108 | */ |
||||||
| 109 | private $queueRepository; |
||||||
| 110 | |||||||
| 111 | /** |
||||||
| 112 | * @var \AOE\Crawler\Domain\Repository\ProcessRepository |
||||||
| 113 | */ |
||||||
| 114 | private $processRepository; |
||||||
| 115 | |||||||
| 116 | /** |
||||||
| 117 | * @var array |
||||||
| 118 | */ |
||||||
| 119 | private $extensionSettings; |
||||||
| 120 | |||||||
| 121 | /** |
||||||
| 122 | * @var bool |
||||||
| 123 | * @deprecated |
||||||
| 124 | */ |
||||||
| 125 | private $verbose; |
||||||
| 126 | |||||||
| 127 | 8 | public function __construct() |
|||||
| 128 | { |
||||||
| 129 | 8 | $objectManager = GeneralUtility::makeInstance(ObjectManager::class); |
|||||
| 130 | 8 | $this->processRepository = $objectManager->get(ProcessRepository::class); |
|||||
| 131 | 8 | $this->queueRepository = $objectManager->get(QueueRepository::class); |
|||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\Proc...rvice::$queueRepository has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 132 | 8 | $this->extensionSettings = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class)->getExtensionConfiguration(); |
|||||
| 133 | 8 | $this->timeToLive = (int) $this->extensionSettings['processMaxRunTime']; |
|||||
| 134 | 8 | $this->countInARun = (int) $this->extensionSettings['countInARun']; |
|||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$countInARun has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 135 | 8 | $this->processLimit = (int) $this->extensionSettings['processLimit']; |
|||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$processLimit has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 136 | 8 | $this->verbose = (bool) $this->extensionSettings['processVerbose']; |
|||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 137 | 8 | } |
|||||
| 138 | |||||||
| 139 | /** |
||||||
| 140 | * starts multiple processes |
||||||
| 141 | * |
||||||
| 142 | * @param integer $timeout |
||||||
| 143 | * |
||||||
| 144 | * @throws \RuntimeException |
||||||
| 145 | * @deprecated |
||||||
| 146 | * @codeCoverageIgnore |
||||||
| 147 | */ |
||||||
| 148 | public function multiProcess($timeout): void |
||||||
| 149 | { |
||||||
| 150 | if ($this->processLimit <= 1) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$processLimit has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 151 | throw new \RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL); |
||||||
| 152 | } |
||||||
| 153 | |||||||
| 154 | $pendingItemsStart = $this->queueRepository->countAllPendingItems(); |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\Proc...rvice::$queueRepository has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 155 | $itemReportLimit = 20; |
||||||
| 156 | $reportItemCount = $pendingItemsStart - $itemReportLimit; |
||||||
| 157 | if ($this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 158 | $this->reportItemStatus(); |
||||||
|
0 ignored issues
–
show
The function
AOE\Crawler\Service\Proc...ice::reportItemStatus() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 159 | } |
||||||
| 160 | $this->startRequiredProcesses(); |
||||||
|
0 ignored issues
–
show
The function
AOE\Crawler\Service\Proc...tartRequiredProcesses() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 161 | $nextTimeOut = time() + $this->timeToLive; |
||||||
| 162 | $currentPendingItems = ''; |
||||||
| 163 | for ($i = 0; $i < $timeout; $i++) { |
||||||
| 164 | $currentPendingItems = $this->queueRepository->countAllPendingItems(); |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\Proc...rvice::$queueRepository has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 165 | if ($this->startRequiredProcesses()) { |
||||||
|
0 ignored issues
–
show
The function
AOE\Crawler\Service\Proc...tartRequiredProcesses() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 166 | $nextTimeOut = time() + $this->timeToLive; |
||||||
| 167 | } |
||||||
| 168 | if ($currentPendingItems === 0) { |
||||||
| 169 | if ($this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 170 | echo 'Finished...' . chr(10); |
||||||
| 171 | } |
||||||
| 172 | break; |
||||||
| 173 | } |
||||||
| 174 | if ($currentPendingItems < $reportItemCount) { |
||||||
| 175 | if ($this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 176 | $this->reportItemStatus(); |
||||||
|
0 ignored issues
–
show
The function
AOE\Crawler\Service\Proc...ice::reportItemStatus() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 177 | } |
||||||
| 178 | $reportItemCount = $currentPendingItems - $itemReportLimit; |
||||||
| 179 | } |
||||||
| 180 | sleep(1); |
||||||
| 181 | if ($nextTimeOut < time()) { |
||||||
| 182 | $timedOutProcesses = $this->processRepository->findAll(); |
||||||
| 183 | $nextTimeOut = time() + $this->timeToLive; |
||||||
| 184 | if ($this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 185 | echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10); |
||||||
| 186 | } |
||||||
| 187 | $this->crawlerController->CLI_releaseProcesses($timedOutProcesses->getProcessIds()); |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\Proc...ice::$crawlerController has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
The function
AOE\Crawler\Controller\C...:CLI_releaseProcesses() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 188 | } |
||||||
| 189 | } |
||||||
| 190 | if ($currentPendingItems > 0 && $this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 191 | echo 'Stop with timeout' . chr(10); |
||||||
| 192 | } |
||||||
| 193 | } |
||||||
| 194 | |||||||
| 195 | /** |
||||||
| 196 | * starts new process |
||||||
| 197 | * @throws ProcessException if no crawler process was started |
||||||
| 198 | */ |
||||||
| 199 | 1 | public function startProcess(): bool |
|||||
| 200 | { |
||||||
| 201 | 1 | $ttl = (time() + $this->timeToLive - 1); |
|||||
| 202 | 1 | $current = $this->processRepository->countNotTimeouted($ttl); |
|||||
| 203 | |||||||
| 204 | // Check whether OS is Windows |
||||||
| 205 | 1 | if (Environment::isWindows()) { |
|||||
| 206 | $completePath = 'start ' . $this->getCrawlerCliPath(); |
||||||
| 207 | } else { |
||||||
| 208 | 1 | $completePath = '(' . $this->getCrawlerCliPath() . ' &) > /dev/null'; |
|||||
| 209 | } |
||||||
| 210 | |||||||
| 211 | 1 | $output = null; |
|||||
| 212 | 1 | $returnValue = 0; |
|||||
| 213 | 1 | CommandUtility::exec($completePath, $output, $returnValue); |
|||||
| 214 | 1 | if ($returnValue !== 0) { |
|||||
| 215 | throw new ProcessException('could not start process!'); |
||||||
| 216 | } |
||||||
| 217 | 1 | for ($i = 0; $i < 10; $i++) { |
|||||
| 218 | 1 | if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
|||||
| 219 | 1 | return true; |
|||||
| 220 | } |
||||||
| 221 | sleep(1); |
||||||
| 222 | } |
||||||
| 223 | throw new ProcessException('Something went wrong: process did not appear within 10 seconds.'); |
||||||
| 224 | } |
||||||
| 225 | |||||||
| 226 | /** |
||||||
| 227 | * Returns the path to start the crawler from the command line |
||||||
| 228 | */ |
||||||
| 229 | 7 | public function getCrawlerCliPath(): string |
|||||
| 230 | { |
||||||
| 231 | 7 | $phpPath = PhpBinaryUtility::getPhpBinary(); |
|||||
| 232 | 7 | $typo3BinaryPath = ExtensionManagementUtility::extPath('core') . 'bin/'; |
|||||
| 233 | 7 | $cliPart = 'typo3 crawler:processQueue'; |
|||||
| 234 | // Don't like the spacing, but don't have an better idea for now |
||||||
| 235 | 7 | $scriptPath = $phpPath . ' ' . $typo3BinaryPath . $cliPart; |
|||||
| 236 | |||||||
| 237 | 7 | if (Environment::isWindows()) { |
|||||
| 238 | $scriptPath = str_replace('/', '\\', $scriptPath); |
||||||
| 239 | } |
||||||
| 240 | |||||||
| 241 | 7 | return ltrim($scriptPath); |
|||||
| 242 | } |
||||||
| 243 | |||||||
| 244 | /** |
||||||
| 245 | * Reports curent Status of queue |
||||||
| 246 | * @deprecated |
||||||
| 247 | * @codeCoverageIgnore |
||||||
| 248 | */ |
||||||
| 249 | protected function reportItemStatus(): void |
||||||
| 250 | { |
||||||
| 251 | echo 'Pending:' . $this->queueRepository->countAllPendingItems() . ' / Assigned:' . $this->queueRepository->countAllAssignedPendingItems() . chr(10); |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\Proc...rvice::$queueRepository has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 252 | } |
||||||
| 253 | |||||||
| 254 | /** |
||||||
| 255 | * according to the given count of pending items and the countInARun Setting this method |
||||||
| 256 | * starts more crawling processes |
||||||
| 257 | * |
||||||
| 258 | * @return boolean if processes are started |
||||||
| 259 | * @throws ProcessException |
||||||
| 260 | * @deprecated |
||||||
| 261 | * @codeCoverageIgnore |
||||||
| 262 | */ |
||||||
| 263 | private function startRequiredProcesses() |
||||||
| 264 | { |
||||||
| 265 | $ret = false; |
||||||
| 266 | $currentProcesses = $this->processRepository->findAllActive()->count(); |
||||||
| 267 | $availableProcessesCount = $this->processLimit - $currentProcesses; |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$processLimit has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 268 | $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun); |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\Proc...rvice::$queueRepository has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
The property
AOE\Crawler\Service\ProcessService::$countInARun has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 269 | $startProcessCount = min([$availableProcessesCount, $requiredProcessesCount]); |
||||||
| 270 | if ($startProcessCount <= 0) { |
||||||
| 271 | return $ret; |
||||||
| 272 | } |
||||||
| 273 | if ($startProcessCount && $this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 274 | echo 'Start ' . $startProcessCount . ' new processes (Running:' . $currentProcesses . ')'; |
||||||
| 275 | } |
||||||
| 276 | for ($i = 0; $i < $startProcessCount; $i++) { |
||||||
| 277 | usleep(100); |
||||||
| 278 | if ($this->startProcess() && $this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 279 | echo '.'; |
||||||
| 280 | $ret = true; |
||||||
| 281 | } |
||||||
| 282 | } |
||||||
| 283 | if ($this->verbose) { |
||||||
|
0 ignored issues
–
show
The property
AOE\Crawler\Service\ProcessService::$verbose has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 284 | echo chr(10); |
||||||
| 285 | } |
||||||
| 286 | return $ret; |
||||||
| 287 | } |
||||||
| 288 | } |
||||||
| 289 |